Skip to content

Commit

Permalink
Add an --only="these,words" option
Browse files Browse the repository at this point in the history
  • Loading branch information
havenwood committed Jun 17, 2024
1 parent 404964f commit 2e36b11
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 10 deletions.
12 changes: 5 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ Options:
-c, --case <FORMAT> Case normalization [default: lower] [possible values: original, upper, lower]
-m, --min-chars <COUNT> Exclude words containing fewer than min chars [default: 1]
-M, --min-count <COUNT> Exclude words appearing fewer than min times [default: 1]
-e, --exclude <WORDS> Exclude words from a comma-delimited list
-e, --exclude <WORDS> Exclude any words from a comma-delimited list
-O, --only <WORDS> Only include words from a comma-delimited list
-D, --delimiter <VALUE> Delimiter between keys and values [default: " "]
-o, --output <PATH> Write output to file rather than stdout
-v, --verbose Print verbose details
Expand All @@ -31,13 +32,10 @@ Options:
## Examples

```sh
> word-tally README.md | head -n6
tally 19
> word-tally README.md | head -n3
tally 20
word 17
https 10
default 7
output 5
print 5
https 11
```

```sh
Expand Down
6 changes: 5 additions & 1 deletion src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,14 @@ pub struct Args {
#[arg(short = 'M', long, default_value_t = 1, value_name = "COUNT")]
pub min_count: u64,

/// Exclude words from a comma-delimited list.
/// Exclude any words from a comma-delimited list.
#[arg(short, long, use_value_delimiter = true, value_name = "WORDS")]
pub exclude: Option<Vec<String>>,

/// Only include words from a comma-delimited list.
#[arg(short = 'O', long, use_value_delimiter = true, value_name = "WORDS")]
pub only: Option<Vec<String>>,

/// Delimiter between keys and values.
#[arg(short = 'D', long, default_value = " ", value_name = "VALUE")]
pub delimiter: String,
Expand Down
22 changes: 21 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,24 @@ impl Count {
pub struct Words {
/// A list of words that should not be tallied.
pub exclude: Option<Vec<String>>,

/// A list of words to only tally.
pub only: Option<Vec<String>>,
}

impl Words {
pub const fn exclude(words: Option<Vec<String>>) -> Self {
Self { exclude: words }
Self {
exclude: words,
only: None,
}
}

pub const fn only(words: Option<Vec<String>>) -> Self {
Self {
only: words,
exclude: None,
}
}
}

Expand All @@ -173,6 +186,13 @@ impl WordTally {
.collect();
tally_map.retain(|word, _| !normalized_excludes.contains(word));
}
if let Some(exclusives) = filters.words.only {
let normalized_exclusives: Vec<_> = exclusives
.iter()
.map(|exclusive| Self::normalize_case(exclusive, case))
.collect();
tally_map.retain(|word, _| normalized_exclusives.contains(word));
}
let count = tally_map.values().sum();
let tally = Vec::from_iter(tally_map);
let uniq_count = tally.len();
Expand Down
5 changes: 4 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@ fn main() -> Result<()> {
let filters = Filters {
chars: Chars::min(args.min_chars),
count: Count::min(args.min_count),
words: Words::exclude(args.exclude.clone()),
words: Words {
exclude: args.exclude.clone(),
only: args.only.clone(),
},
};
let word_tally = WordTally::new(reader, args.case, args.sort, filters);
let delimiter = unescape(&args.delimiter)?;
Expand Down
19 changes: 19 additions & 0 deletions tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -287,3 +287,22 @@ fn test_excluding_words() {
assert!(!result.iter().any(|(word, _)| word == "heaven"));
assert!(!result.iter().any(|(word, _)| word == "hell"));
}

#[test]
fn test_only_words() {
let input = "One must still have chaos in oneself to be able to give birth to a dancing star. I tell you: you have chaos in yourselves.".as_bytes();
let only = vec!["chaos".to_string(), "star".to_string()];
let filters = Filters {
words: Words {
only: Some(only),
..Words::default()
},
..Filters::default()
};
let tally = WordTally::new(input, Case::Lower, Sort::Desc, filters);
let result = tally.tally();

let expected = vec![("chaos".to_string(), 2), ("star".to_string(), 1)];

assert_eq!(result, expected);
}
11 changes: 11 additions & 0 deletions tests/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,14 @@ fn test_exclude_words() {
.success()
.stdout(contains("tree").and(contains("heaven").not().and(contains("hell").not())));
}

#[test]
fn test_only_words() {
let input = "One must still have chaos in oneself to be able to give birth to a dancing star. I tell you: you have chaos in yourselves.";
let mut cmd = Command::cargo_bin("word-tally").unwrap();
cmd.write_stdin(input)
.arg("--only=chaos,star")
.assert()
.success()
.stdout("chaos 2\nstar 1\n");
}

0 comments on commit 2e36b11

Please sign in to comment.