-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Extract filters and options into their own mods
- Loading branch information
Showing
3 changed files
with
181 additions
and
163 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
use crate::Case; | ||
use core::fmt::{self, Display, Formatter}; | ||
use indexmap::IndexMap; | ||
use unicode_segmentation::UnicodeSegmentation; | ||
|
||
/// Filters for words to be included in the tally. | ||
#[derive(Clone, Debug, Default, Eq, PartialEq, PartialOrd, Ord, Hash)] | ||
pub struct Filters { | ||
/// Word chars filters for tallying. | ||
pub min_chars: Option<MinChars>, | ||
|
||
/// Word count filters for tallying. | ||
pub min_count: Option<MinCount>, | ||
|
||
/// List of specific words to exclude for tallying. | ||
pub words_exclude: Option<WordsExclude>, | ||
|
||
/// List of specific words to only include for tallying. | ||
pub words_only: Option<WordsOnly>, | ||
} | ||
|
||
impl Filters { | ||
/// Removes words from the `tally_map` based on any word `Filters`. | ||
pub fn apply(&self, tally_map: &mut IndexMap<Box<str>, u64>, case: Case) { | ||
// Remove any words that lack the minimum count. | ||
if let Some(MinCount(min_count)) = self.min_count { | ||
tally_map.retain(|_, &mut count| count >= min_count); | ||
} | ||
|
||
// Remove any words that lack the minimum numbner of characters. | ||
if let Some(MinChars(min_chars)) = self.min_chars { | ||
tally_map.retain(|word, _| word.graphemes(true).count() >= min_chars); | ||
} | ||
|
||
// Remove any words on the `exclude` word list. | ||
if let Some(WordsExclude(excludes)) = &self.words_exclude { | ||
let normalized_excludes: Vec<_> = | ||
excludes.iter().map(|exclude| case.apply(exclude)).collect(); | ||
tally_map.retain(|word, _| !normalized_excludes.contains(word)); | ||
} | ||
|
||
// Remove any words absent from the `only` word list. | ||
if let Some(WordsOnly(exclusives)) = &self.words_only { | ||
let normalized_exclusives: Vec<_> = exclusives | ||
.iter() | ||
.map(|exclusive| case.apply(exclusive)) | ||
.collect(); | ||
tally_map.retain(|word, _| normalized_exclusives.contains(word)); | ||
} | ||
} | ||
} | ||
|
||
/// Min number of chars a word needs to be tallied. | ||
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, PartialOrd, Ord, Hash)] | ||
pub struct MinChars(pub usize); | ||
|
||
impl Display for MinChars { | ||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { | ||
write!(f, "{}", self.0) | ||
} | ||
} | ||
|
||
impl From<usize> for MinChars { | ||
fn from(raw: usize) -> Self { | ||
Self(raw) | ||
} | ||
} | ||
|
||
/// Min count a word needs to be tallied. | ||
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, PartialOrd, Ord, Hash)] | ||
pub struct MinCount(pub u64); | ||
|
||
impl Display for MinCount { | ||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { | ||
write!(f, "{}", self.0) | ||
} | ||
} | ||
|
||
impl From<u64> for MinCount { | ||
fn from(raw: u64) -> Self { | ||
Self(raw) | ||
} | ||
} | ||
|
||
/// A list of words that should not be tallied. | ||
#[derive(Clone, Debug, Default, Eq, PartialEq, PartialOrd, Ord, Hash)] | ||
pub struct WordsExclude(pub Vec<String>); | ||
|
||
impl From<Vec<String>> for WordsExclude { | ||
fn from(raw: Vec<String>) -> Self { | ||
Self(raw) | ||
} | ||
} | ||
|
||
/// A list of words that should only be tallied. | ||
#[derive(Clone, Debug, Default, Eq, PartialEq, PartialOrd, Ord, Hash)] | ||
pub struct WordsOnly(pub Vec<String>); | ||
|
||
impl From<Vec<String>> for WordsOnly { | ||
fn from(raw: Vec<String>) -> Self { | ||
Self(raw) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
use crate::WordTally; | ||
use clap::ValueEnum; | ||
use core::cmp::Reverse; | ||
use core::fmt::{self, Display, Formatter}; | ||
|
||
/// Word case normalization. | ||
#[derive(Clone, Copy, Debug, Default, ValueEnum)] | ||
pub enum Case { | ||
Original, | ||
Upper, | ||
#[default] | ||
Lower, | ||
} | ||
|
||
impl Case { | ||
/// Normalizes word case if a `Case` other than `Case::Original` is provided. | ||
pub fn apply(&self, word: &str) -> Box<str> { | ||
match self { | ||
Self::Lower => word.to_lowercase().into_boxed_str(), | ||
Self::Upper => word.to_uppercase().into_boxed_str(), | ||
Self::Original => Box::from(word), | ||
} | ||
} | ||
} | ||
|
||
impl Display for Case { | ||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { | ||
let case = match self { | ||
Self::Lower => "lower", | ||
Self::Upper => "upper", | ||
Self::Original => "original", | ||
}; | ||
|
||
f.write_str(case) | ||
} | ||
} | ||
|
||
/// Sort order by count. | ||
#[derive(Clone, Copy, Debug, Default, ValueEnum)] | ||
pub enum Sort { | ||
#[default] | ||
Desc, | ||
Asc, | ||
Unsorted, | ||
} | ||
|
||
impl Sort { | ||
/// Sorts the `tally` field in place if a sort order other than `Unsorted` is provided. | ||
pub fn apply(&self, w: &mut WordTally) { | ||
match self { | ||
Self::Desc => w.tally.sort_unstable_by_key(|&(_, count)| Reverse(count)), | ||
Self::Asc => w.tally.sort_unstable_by_key(|&(_, count)| count), | ||
Self::Unsorted => (), | ||
} | ||
} | ||
} | ||
|
||
impl Display for Sort { | ||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { | ||
let order = match self { | ||
Self::Desc => "desc", | ||
Self::Asc => "asc", | ||
Self::Unsorted => "unsorted", | ||
}; | ||
|
||
f.write_str(order) | ||
} | ||
} |