Skip to content

Commit

Permalink
Apply min chars filter after initial tally
Browse files Browse the repository at this point in the history
  • Loading branch information
havenwood committed Sep 30, 2024
1 parent d32ba2f commit 5e0597a
Showing 1 changed file with 12 additions and 24 deletions.
36 changes: 12 additions & 24 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ impl From<Vec<String>> for WordsOnly {
impl WordTally {
/// Constructs a new `WordTally` from a source that implements `Read` like file or stdin.
pub fn new<T: Read>(input: T, case: Case, order: Sort, filters: Filters) -> Self {
let mut tally_map = Self::tally_map(input, case, filters.min_chars);
let mut tally_map = Self::tally_map(input, case);
Self::filter(&mut tally_map, filters, case);

let count = tally_map.values().sum();
Expand Down Expand Up @@ -242,43 +242,31 @@ impl WordTally {
}

/// Creates a tally of normalized words from an input that implements `Read`.
fn tally_map<T: Read>(
input: T,
case: Case,
min_chars: Option<MinChars>,
) -> IndexMap<Box<str>, u64> {
fn tally_map<T: Read>(input: T, case: Case) -> IndexMap<Box<str>, u64> {
let mut tally = IndexMap::new();
let lines = BufReader::new(input).lines();

match min_chars {
Some(MinChars(count)) => {
for line in lines.map_while(Result::ok) {
line.unicode_words()
.filter(|word| word.graphemes(true).count() >= count)
.for_each(|word| {
*tally.entry(Self::normalize_case(word, case)).or_insert(0) += 1;
});
}
}
None => {
for line in lines.map_while(Result::ok) {
line.unicode_words().for_each(|word| {
*tally.entry(Self::normalize_case(word, case)).or_insert(0) += 1;
});
}
}
for line in lines.map_while(Result::ok) {
line.unicode_words().for_each(|word| {
*tally.entry(Self::normalize_case(word, case)).or_insert(0) += 1;
});
}

tally
}

/// Removes words from the `tally_map` based on any word `Filters`.
fn filter(tally_map: &mut IndexMap<Box<str>, u64>, filters: Filters, case: Case) {
// Remove any words that lack the minimum number of characters.
// Remove any words that lack the minimum count.
if let Some(MinCount(min_count)) = filters.min_count {
tally_map.retain(|_, &mut count| count >= min_count);
}

// Remove any words that lack the minimum numbner of characters.
if let Some(MinChars(min_chars)) = filters.min_chars {
tally_map.retain(|word, _| word.graphemes(true).count() >= min_chars);
}

// Remove any words on the `exclude` word list.
if let Some(WordsExclude(excludes)) = filters.words_exclude {
let normalized_excludes: Vec<_> = excludes
Expand Down

0 comments on commit 5e0597a

Please sign in to comment.