Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use separate config properties for adapter enable/disable lists #256

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 21 additions & 6 deletions src/adapters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,18 @@ pub struct AdaptInfo {
/// (enabledAdapters, disabledAdapters)
type AdaptersTuple = (Vec<Arc<dyn FileAdapter>>, Vec<Arc<dyn FileAdapter>>);

pub fn get_all_adapters(custom_adapters: Option<Vec<CustomAdapterConfig>>) -> AdaptersTuple {
/// ```
/// # use ripgrep_all::adapters::get_all_adapters;
/// let enable = &[];
/// let disable = &[String::from("ffmpeg")];
/// let (_, disabled) = get_all_adapters(None, enable, disable);
/// assert!(!disabled.is_empty())
/// ```
pub fn get_all_adapters(
custom_adapters: Option<Vec<CustomAdapterConfig>>,
adapters_enable: &[String],
adapters_disable: &[String],
) -> AdaptersTuple {
// order in descending priority
let mut adapters: Vec<Arc<dyn FileAdapter>> = vec![];
if let Some(custom_adapters) = custom_adapters {
Expand All @@ -134,9 +145,10 @@ pub fn get_all_adapters(custom_adapters: Option<Vec<CustomAdapterConfig>>) -> Ad
);
adapters.extend(internal_adapters);

adapters
.into_iter()
.partition(|e| !e.metadata().disabled_by_default)
adapters.into_iter().partition(|e| {
!adapters_disable.contains(&e.metadata().name)
&& (adapters_enable.contains(&e.metadata().name) || !e.metadata().disabled_by_default)
})
}

/**
Expand All @@ -149,9 +161,12 @@ pub fn get_all_adapters(custom_adapters: Option<Vec<CustomAdapterConfig>>) -> Ad
*/
pub fn get_adapters_filtered<T: AsRef<str>>(
custom_adapters: Option<Vec<CustomAdapterConfig>>,
adapters_enable: &[String],
adapters_disable: &[String],
adapter_names: &[T],
) -> Result<Vec<Arc<dyn FileAdapter>>> {
let (def_enabled_adapters, def_disabled_adapters) = get_all_adapters(custom_adapters);
let (def_enabled_adapters, def_disabled_adapters) =
get_all_adapters(custom_adapters, adapters_enable, adapters_disable);
let adapters = if !adapter_names.is_empty() {
let adapters_map: HashMap<_, _> = def_enabled_adapters
.iter()
Expand All @@ -168,9 +183,9 @@ pub fn get_adapters_filtered<T: AsRef<str>>(
name = &name[1..];
adapters = def_enabled_adapters.clone();
} else if i == 0 && (name.starts_with('+')) {
additive = true;
name = &name[1..];
adapters = def_enabled_adapters.clone();
additive = true;
}
if subtractive {
let inx = adapters
Expand Down
44 changes: 30 additions & 14 deletions src/adapters/custom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,32 +25,48 @@ use tokio_util::io::StreamReader;
// mostly the same as AdapterMeta + SpawningFileAdapter
#[derive(Debug, Deserialize, Serialize, JsonSchema, Default, PartialEq, Clone)]
pub struct CustomAdapterConfig {
/// the unique identifier and name of this adapter. Must only include a-z, 0-9, _
/// The unique identifier and name of this adapter.
///
/// Must only include a-z, 0-9, _.
pub name: String,
/// a description of this adapter. shown in help

/// The description of this adapter shown in help.
pub description: String,
/// if true, the adapter will be disabled by default

/// If true, the adapter will be disabled by default.
pub disabled_by_default: Option<bool>,
/// version identifier. used to key cache entries, change if the configuration or program changes

/// Version identifier used to key cache entries.
///
/// Change this if the configuration or program changes.
pub version: i32,
/// the file extensions this adapter supports. For example ["epub", "mobi"]

/// The file extensions this adapter supports, for example `["epub", "mobi"]`.
pub extensions: Vec<String>,
/// if not null and --rga-accurate is enabled, mime type matching is used instead of file name matching

/// If not null and `--rga-accurate` is enabled, mimetype matching is used instead of file name matching.
pub mimetypes: Option<Vec<String>>,
/// if --rga-accurate, only match by mime types, ignore extensions completely

/// If `--rga-accurate`, only match by mime types and ignore extensions completely.
pub match_only_by_mime: Option<bool>,
/// the name or path of the binary to run

/// The name or path of the binary to run.
pub binary: String,
/// The arguments to run the program with. Placeholders:
/// - $input_file_extension: the file extension (without dot). e.g. foo.tar.gz -> gz
/// - $input_file_stem, the file name without the last extension. e.g. foo.tar.gz -> foo.tar
/// - $input_virtual_path: the full input file path. Note that this path may not actually exist on disk because it is the result of another adapter

/// The arguments to run the program with.
/// Placeholders:
/// - `$input_file_extension`: the file extension (without dot). e.g. foo.tar.gz -> gz
/// - `$input_file_stem`: the file name without the last extension. e.g. foo.tar.gz -> foo.tar
/// - `$input_virtual_path`: the full input file path.
/// Note that this path may not actually exist on disk because it is the result of another adapter.
///
/// stdin of the program will be connected to the input file, and stdout is assumed to be the converted file
pub args: Vec<String>,
/// The output path hint. The placeholders are the same as for `.args`

/// The output path hint.
/// The placeholders are the same as for `.args`
///
/// If not set, defaults to "${input_virtual_path}.txt"
/// If not set, defaults to `"${input_virtual_path}.txt"`.
///
/// Setting this is useful if the output format is not plain text (.txt) but instead some other format that should be passed to another adapter
pub output_path_hint: Option<String>,
Expand Down
13 changes: 11 additions & 2 deletions src/bin/rga.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@ use std::process::Command;
use std::time::Instant;

fn list_adapters(args: RgaConfig) -> Result<()> {
let (enabled_adapters, disabled_adapters) = get_all_adapters(args.custom_adapters);
let (enabled_adapters, disabled_adapters) = get_all_adapters(
args.custom_adapters,
&args.adapters_enable,
&args.adapters_disable,
);

println!("Adapters:\n");
let print = |adapter: std::sync::Arc<dyn FileAdapter>| {
Expand Down Expand Up @@ -87,7 +91,12 @@ fn main() -> anyhow::Result<()> {
return Ok(());
}

let adapters = get_adapters_filtered(config.custom_adapters.clone(), &config.adapters)?;
let adapters = get_adapters_filtered(
config.custom_adapters.clone(),
&config.adapters_enable,
&config.adapters_disable,
&config.adapters,
)?;

let pre_glob = if !config.accurate {
let extensions = adapters
Expand Down
91 changes: 51 additions & 40 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,11 @@ impl FromStr for CacheMaxBlobLen {

/// # rga configuration
///
/// this is kind of a "polyglot" struct, since it serves three functions
/// This is kind of a "polyglot" struct serving multiple purposes:
///
/// 1. describing the command line arguments using structopt+clap and for man page / readme generation
/// 2. describing the config file format (output as JSON schema via schemars)
/// 1. Declare the command line arguments using structopt+clap
/// 1. Provide information for manpage / readme generation.
/// 1. Describe the config file format (output as JSON schema via schemars).
#[derive(StructOpt, Debug, Deserialize, Serialize, JsonSchema, Default, Clone)]
#[structopt(
name = "ripgrep-all",
Expand All @@ -114,36 +115,47 @@ impl FromStr for CacheMaxBlobLen {
usage = "rga [RGA OPTIONS] [RG OPTIONS] PATTERN [PATH ...]"
)]
pub struct RgaConfig {
/// Use more accurate but slower matching by mime type
/// Use more accurate but slower matching by mime type.
///
/// By default, rga will match files using file extensions.
/// Some programs, such as sqlite3, don't care about the file extension at all,
/// so users sometimes use any or no extension at all. With this flag, rga
/// will try to detect the mime type of input files using the magic bytes
/// (similar to the `file` utility), and use that to choose the adapter.
/// Some programs, such as sqlite3, don't care about the file extension at all, so users sometimes use any or no extension at all.
/// With this flag, rga will try to detect the mime type of input files using the magic bytes (similar to the `file` utility), and use that to choose the adapter.
/// Detection is only done on the first 8KiB of the file, since we can't always seek on the input (in archives).
#[serde(default, skip_serializing_if = "is_default")]
#[structopt(long = "--rga-accurate")]
pub accurate: bool,

/// Change which adapters to use and in which priority order (descending)
/// Change which adapters to use and in which priority order (descending).
///
/// "foo,bar" means use only adapters foo and bar.
/// "-bar,baz" means use all default adapters except for bar and baz.
/// "+bar,baz" means use all default adapters and also bar and baz.
#[serde(default, skip_serializing_if = "is_default")]
/// - "foo,bar" means use only adapters foo and bar.
/// - "-bar,baz" means use all default adapters except for bar and baz.
/// - "+bar,baz" means use all default adapters and also bar and baz.
#[serde(skip)] // CLI only
#[structopt(
long = "--rga-adapters",
require_equals = true,
require_delimiter = true
)]
pub adapters: Vec<String>,

/// Additional adapters to enable in addition to any default adapters.
#[serde(default, skip_serializing_if = "is_default")]
#[structopt(skip)] // config file only
pub adapters_enable: Vec<String>,

/// Adapters to explicitly disable.
///
/// Entries in this list will overrule those in `adapters_enable`;
/// if the same adapter is present in both lists it will be disabled.
#[serde(default, skip_serializing_if = "is_default")]
#[structopt(skip)] // config file only
pub adapters_disable: Vec<String>,

#[serde(default, skip_serializing_if = "is_default")]
#[structopt(flatten)]
pub cache: CacheConfig,

/// Maximum nestedness of archives to recurse into
/// Maximum depth of nested archives to recurse into.
///
/// When searching in archives, rga will recurse into archives inside archives.
/// This option limits the depth.
Expand All @@ -164,63 +176,62 @@ pub struct RgaConfig {
#[structopt(long = "--rga-no-prefix-filenames")]
pub no_prefix_filenames: bool,

//////////////////////////////////////////
//////////////////////////// Config file only
//////////////////////////////////////////
#[serde(default, skip_serializing_if = "is_default")]
#[structopt(skip)]
#[structopt(skip)] // config file only
pub custom_adapters: Option<Vec<CustomAdapterConfig>>,
//////////////////////////////////////////
//////////////////////////// CMD line only
//////////////////////////////////////////

#[serde(skip)]
#[structopt(long = "--rga-config-file", require_equals = true)]
pub config_file_path: Option<String>,

/// same as passing path directly, except if argument is empty
/// kinda hacky, but if no file is found, fzf calls rga with empty string as path, which causes No such file or directory from rg. So filter those cases and return specially
#[serde(skip)]
/// Same as passing path directly, except if argument is empty.
///
/// Kinda hacky, but if no file is found, `fzf` calls `rga` with empty string as path, which causes "No such file or directory from rg".
/// So filter those cases and return specially.
#[serde(skip)] // CLI only
#[structopt(long = "--rga-fzf-path", require_equals = true, hidden = true)]
pub fzf_path: Option<String>,

// these arguments are basically "subcommands" that stop the process, so don't serialize them
#[serde(skip)]
#[serde(skip)] // CLI only
#[structopt(long = "--rga-list-adapters", help = "List all known adapters")]
pub list_adapters: bool,

#[serde(skip)]
#[serde(skip)] // CLI only
#[structopt(
long = "--rga-print-config-schema",
help = "Print the JSON Schema of the configuration file"
)]
pub print_config_schema: bool,

#[serde(skip)]
#[serde(skip)] // CLI only
#[structopt(long, help = "Show help for ripgrep itself")]
pub rg_help: bool,

#[serde(skip)]
#[serde(skip)] // CLI only
#[structopt(long, help = "Show version of ripgrep itself")]
pub rg_version: bool,
}

#[derive(StructOpt, Debug, Deserialize, Serialize, JsonSchema, Default, Clone, PartialEq)]
pub struct CacheConfig {
/// Disable caching of results
/// Disable caching of results.
///
/// By default, rga caches the extracted text, if it is small enough,
/// to a database in ${XDG_CACHE_DIR-~/.cache}/ripgrep-all on Linux,
/// ~/Library/Caches/ripgrep-all on macOS,
/// or C:\Users\username\AppData\Local\ripgrep-all on Windows.
/// By default, rga caches the extracted text, if it is small enough, to a database.
/// This way, repeated searches on the same set of files will be much faster.
/// The location of the DB varies by platform:
/// - `${XDG_CACHE_DIR-~/.cache}/ripgrep-all` on Linux
/// - `~/Library/Caches/ripgrep-all` on macOS
/// - `C:\Users\username\AppData\Local\ripgrep-all` on Windows
///
/// If you pass this flag, all caching will be disabled.
#[serde(default, skip_serializing_if = "is_default")]
#[structopt(long = "--rga-no-cache")]
pub disabled: bool,

/// Max compressed size to cache
/// Max compressed size to cache.
///
/// Longest byte length (after compression) to store in cache. Longer adapter outputs will not be cached and recomputed every time.
/// Longest byte length (after compression) to store in cache.
/// Longer adapter outputs will not be cached and recomputed every time.
///
/// Allowed suffixes on command line: k M G
#[serde(default, skip_serializing_if = "is_default")]
Expand All @@ -233,9 +244,9 @@ pub struct CacheConfig {
)]
pub max_blob_len: CacheMaxBlobLen,

/// ZSTD compression level to apply to adapter outputs before storing in cache db
/// ZSTD compression level to apply to adapter outputs before storing in cache DB.
///
/// Ranges from 1 - 22
/// Ranges from 1 - 22.
#[serde(default, skip_serializing_if = "is_default")]
#[structopt(
default_value,
Expand All @@ -246,7 +257,7 @@ pub struct CacheConfig {
)]
pub compression_level: CacheCompressionLevel,

/// Path to store cache db
/// Path to store cache DB.
#[serde(default, skip_serializing_if = "is_default")]
#[structopt(
default_value,
Expand Down Expand Up @@ -380,7 +391,7 @@ where
)
})?;
{
// readd values with [serde(skip)]
// read values with [serde(skip)]
res.fzf_path = arg_matches.fzf_path;
res.list_adapters = arg_matches.list_adapters;
res.print_config_schema = arg_matches.print_config_schema;
Expand Down
7 changes: 6 additions & 1 deletion src/preproc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,12 @@ async fn choose_adapter(
archive_recursion_depth: i32,
inp: &mut (impl AsyncBufRead + Unpin),
) -> Result<Option<(Arc<dyn FileAdapter>, FileMatcher, ActiveAdapters)>> {
let active_adapters = get_adapters_filtered(config.custom_adapters.clone(), &config.adapters)?;
let active_adapters = get_adapters_filtered(
config.custom_adapters.clone(),
&config.adapters_enable,
&config.adapters_disable,
&config.adapters,
)?;
let adapters = adapter_matcher(&active_adapters, config.accurate)?;
let filename = filepath_hint
.file_name()
Expand Down