Skip to content

Commit

Permalink
Remove the trivial but annoying avg field
Browse files Browse the repository at this point in the history
  • Loading branch information
havenwood committed Oct 10, 2024
1 parent 2876633 commit 0218370
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 49 deletions.
30 changes: 2 additions & 28 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
//! `Original` (case sensitive) and `Lower` or `Upper` case normalization. `Sort`
//! order can be `Unsorted` or sorted `Desc` (descending) or `Asc` (ascending).
//! A `tally` can be sorted upon contruction or sorted later with the `sort` method.
//! Sorting doesn't impact the `count`, `uniq_count` or `avg` fields. `Filter`s can
//! Sorting doesn't impact the `count` or `uniq_count` fields. `Filter`s can
//! be used to provide list of words that should or shouldn't be tallied.
//!
//! # Examples
Expand All @@ -29,7 +29,6 @@
//!
//! assert_eq!(words.tally(), expected_tally);
//! ```
use core::hash::{Hash, Hasher};
use indexmap::IndexMap;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
Expand All @@ -43,7 +42,7 @@ pub use filters::{Filters, MinChars, MinCount, WordsExclude, WordsOnly};
pub use options::{Case, Options, Sort};

#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, Debug, Default, PartialEq)]
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub struct WordTally {
/// Ordered pairs of words and the count of times they appear.
Expand All @@ -54,18 +53,6 @@ pub struct WordTally {

/// The sum of uniq words tallied.
uniq_count: usize,

/// The mean average count per word, if there are words.
avg: Option<f64>,
}

impl Eq for WordTally {}

/// Since the other fields are derived from it, hash by just the `tally`.
impl Hash for WordTally {
fn hash<H: Hasher>(&self, state: &mut H) {
self.tally.hash(state);
}
}

/// A `tally` supports `iter` and can also be represented as a `Vec`.
Expand All @@ -85,12 +72,10 @@ impl WordTally {
let count = tally_map.values().sum();
let tally: Box<[_]> = tally_map.into_iter().collect();
let uniq_count = tally.len();
let avg = Self::calculate_avg(count, uniq_count);
let mut word_tally = Self {
tally,
count,
uniq_count,
avg,
};
word_tally.sort(options.sort);

Expand All @@ -117,17 +102,6 @@ impl WordTally {
self.count
}

/// Gets the `avg` field.
pub const fn avg(&self) -> Option<f64> {
self.avg
}

/// Calculates an approximate mean average word count if there are words.
/// Note: Casting `u64` to `f64` and floating point arithmatic cause a loss of precision.
fn calculate_avg(count: u64, uniq_count: usize) -> Option<f64> {
(count > 0).then(|| count as f64 / uniq_count as f64)
}

/// Creates a tally of normalized words from an input that implements `Read`.
fn tally_map<T: Read>(input: T, case: Case) -> IndexMap<Box<str>, u64> {
let mut tally = IndexMap::new();
Expand Down
4 changes: 0 additions & 4 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,6 @@ fn log_details(
log_detail(&mut w, "source", delimiter, source)?;
log_detail(&mut w, "total-words", delimiter, word_tally.count())?;
log_detail(&mut w, "unique-words", delimiter, word_tally.uniq_count())?;

if let Some(avg) = word_tally.avg() {
log_detail(&mut w, "average-word-count", delimiter, format!("{avg:.3}"))?;
}
}

if log_config.debug {
Expand Down
18 changes: 2 additions & 16 deletions tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ const TEST_WORDS_PATH: &str = "tests/files/words.txt";
struct ExpectedFields<'a> {
count: u64,
uniq_count: usize,
avg: Option<f64>,
tally: Vec<(&'a str, u64)>,
}

Expand All @@ -24,7 +23,6 @@ fn word_tally_test(case: Case, sort: Sort, filters: Filters, fields: &ExpectedFi
let word_tally = word_tally(Options { case, sort }, filters);
assert_eq!(word_tally.count(), fields.count);
assert_eq!(word_tally.uniq_count(), fields.uniq_count);
assert_eq!(word_tally.avg(), fields.avg);

let expected_tally = fields
.tally
Expand All @@ -44,7 +42,6 @@ fn lower_case_desc_order() {
&ExpectedFields {
count: 45,
uniq_count: 5,
avg: Some(9.0),
tally: vec![("c", 15), ("d", 11), ("123", 9), ("b", 7), ("a", 3)],
},
);
Expand All @@ -62,7 +59,6 @@ fn min_char_count_at_max() {
&ExpectedFields {
count: 9,
uniq_count: 1,
avg: Some(9.0),
tally: vec![("123", 9)],
},
);
Expand All @@ -80,7 +76,6 @@ fn min_char_count_above_max() {
&ExpectedFields {
count: 0,
uniq_count: 0,
avg: None,
tally: vec![],
},
);
Expand All @@ -95,7 +90,6 @@ fn min_char_count_at_min() {
&ExpectedFields {
count: 45,
uniq_count: 5,
avg: Some(9.0),
tally: vec![("c", 15), ("d", 11), ("123", 9), ("b", 7), ("a", 3)],
},
);
Expand All @@ -113,7 +107,6 @@ fn min_word_count_at_max() {
&ExpectedFields {
count: 15,
uniq_count: 1,
avg: Some(15.0),
tally: vec![("c", 15)],
},
);
Expand All @@ -128,7 +121,6 @@ fn default_case_unsorted_order() {
&ExpectedFields {
count: 45,
uniq_count: 5,
avg: Some(9.0),
tally: vec![("d", 11), ("123", 9), ("a", 3), ("c", 15), ("b", 7)],
},
);
Expand All @@ -143,7 +135,6 @@ fn upper_case_desc_order() {
&ExpectedFields {
count: 45,
uniq_count: 5,
avg: Some(9.0),
tally: vec![("C", 15), ("D", 11), ("123", 9), ("B", 7), ("A", 3)],
},
);
Expand All @@ -158,7 +149,6 @@ fn lower_case_asc_order() {
&ExpectedFields {
count: 45,
uniq_count: 5,
avg: Some(9.0),
tally: vec![("a", 3), ("b", 7), ("123", 9), ("d", 11), ("c", 15)],
},
);
Expand All @@ -173,7 +163,6 @@ fn upper_case_asc_order() {
&ExpectedFields {
count: 45,
uniq_count: 5,
avg: Some(9.0),
tally: vec![("A", 3), ("B", 7), ("123", 9), ("D", 11), ("C", 15)],
},
);
Expand All @@ -188,7 +177,6 @@ fn original_case_desc_order() {
&ExpectedFields {
count: 45,
uniq_count: 9,
avg: Some(5.0),
tally: vec![
("123", 9),
("C", 8),
Expand All @@ -213,7 +201,6 @@ fn original_case_asc_order() {
&ExpectedFields {
count: 45,
uniq_count: 9,
avg: Some(5.0),
tally: vec![
("a", 1),
("A", 2),
Expand Down Expand Up @@ -391,7 +378,7 @@ fn test_to_json() {
);
let serialized = serde_json::to_string(&expected).unwrap();

let expected_json = r#"{"tally":[["wombat",2],["bat",1]],"count":3,"uniq_count":2,"avg":1.5}"#;
let expected_json = r#"{"tally":[["wombat",2],["bat",1]],"count":3,"uniq_count":2}"#;
assert_eq!(serialized, expected_json);
}

Expand All @@ -407,8 +394,7 @@ fn test_from_json() {
{
"tally": [["wombat", 2], ["bat", 1]],
"count": 3,
"uniq_count": 2,
"avg": 1.5
"uniq_count": 2
}
"#;

Expand Down
2 changes: 1 addition & 1 deletion tests/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ fn verbose_with_input() {
let assert = word_tally().write_stdin("wombat").arg("-v").assert();
assert
.success()
.stderr("source -\ntotal-words 1\nunique-words 1\naverage-word-count 1.000\n\n")
.stderr("source -\ntotal-words 1\nunique-words 1\n\n")
.stdout("wombat 1\n");
}

Expand Down

0 comments on commit 0218370

Please sign in to comment.