From 6ab3bb1d2262d79d8cda4ebd4aecfd0c2a21c2e9 Mon Sep 17 00:00:00 2001 From: larry-the-table-guy <180724454+larry-the-table-guy@users.noreply.github.com> Date: Sat, 16 Nov 2024 10:11:27 -0500 Subject: [PATCH 1/4] Add fast path for search Load small prefix of file and attempt to use that to reject files --- src/directory.rs | 12 ++++++++++++ src/search/mod.rs | 28 ++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/src/directory.rs b/src/directory.rs index 8488e51..7ecd102 100644 --- a/src/directory.rs +++ b/src/directory.rs @@ -12,6 +12,15 @@ use crate::BattleToolsError; /// Anything that wants to parse logs should implement this pub trait LogParser { + /// Optional fast path for log handling. Passes the path instead of the JSON because some + /// usecases don't need the entire file. + /// Returns None if no decision could be made (need to run LogParser::handle_log_file), + /// Some(R) if a result was produced early. + /// Errors are funneled into None so that error handling code isn't duplicated (it will be + /// handled by the later handle_log_file call). + fn fast_handle_log_file(&self, _file_path: &Path) -> Option { + None + } /// Parses an individual log file's JSON fn handle_log_file(&self, raw_json: String, file_path: &Path) -> Result; /// Parses the results from an entire directory. @@ -82,6 +91,9 @@ where } let path = entry.path(); + if let Some(res) = self.fast_handle_log_file(&path) { + return Some(res); + }; let raw_json = match fs::read_to_string(entry.path()) { Ok(s) => s, Err(e) => { diff --git a/src/search/mod.rs b/src/search/mod.rs index cf526ab..b4e7c22 100644 --- a/src/search/mod.rs +++ b/src/search/mod.rs @@ -23,6 +23,34 @@ impl BattleSearcher { } impl LogParser<()> for BattleSearcher { + fn fast_handle_log_file(&self, path: &Path) -> Option<()> { + use std::io::Read; + // We expect the p1, p2 fields to be very early in the file and to almost never match + // self.user_id. So, we can do a very cheap pass to reject most cases. + // We turn errors into None to avoid duplicating error handling, and it's fine perf-wise + // because these errors won't be common. + let mut buf = [0u8; 256]; + let mut f = std::fs::File::open(path).ok()?; + f.read_exact(&mut buf).ok()?; + // bump ptr back until ascii + let end_idx = buf.iter().rposition(|b| b.is_ascii())?; + let file_snippet = std::str::from_utf8(&buf[..end_idx]).ok()?; + // find last instance of `,"`, replace with '}' to get valid JSON + // we know that the only way to see a comma followed by a quote is right after a value. + let last_field_sep = file_snippet.rfind(",\"")?; + buf[last_field_sep] = b'}'; + let raw_json = std::str::from_utf8(&buf[..last_field_sep + 1]).ok()?; + // now we have valid JSON that probably contains p1, p2. + // Let's check 'em! + let p1id = to_id(gjson::get(&raw_json, "p1").str()); + let p2id = to_id(gjson::get(&raw_json, "p2").str()); + if p1id != self.user_id && p2id != self.user_id { + // Searched user is not a player in the battle. + return Some(()); + } + None + } + fn handle_log_file(&self, raw_json: String, path: &Path) -> Result<(), BattleToolsError> { let date = match path.parent() { Some(p) => p From a0e979259c66654dbff80e39edc46d03d5825014 Mon Sep 17 00:00:00 2001 From: larry-the-table-guy <180724454+larry-the-table-guy@users.noreply.github.com> Date: Sat, 16 Nov 2024 11:21:11 -0500 Subject: [PATCH 2/4] Fix edge case in fast path's JSON fixup Would behave incorrectly if a string ended with a comma --- src/search/mod.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/search/mod.rs b/src/search/mod.rs index b4e7c22..218bd3d 100644 --- a/src/search/mod.rs +++ b/src/search/mod.rs @@ -35,9 +35,8 @@ impl LogParser<()> for BattleSearcher { // bump ptr back until ascii let end_idx = buf.iter().rposition(|b| b.is_ascii())?; let file_snippet = std::str::from_utf8(&buf[..end_idx]).ok()?; - // find last instance of `,"`, replace with '}' to get valid JSON - // we know that the only way to see a comma followed by a quote is right after a value. - let last_field_sep = file_snippet.rfind(",\"")?; + // Assumes minified JSON, and p1team appearing after p1, p2 + let last_field_sep = file_snippet.rfind(",\"p1team\"")?; buf[last_field_sep] = b'}'; let raw_json = std::str::from_utf8(&buf[..last_field_sep + 1]).ok()?; // now we have valid JSON that probably contains p1, p2. From 9a8f0da87e5837e5d6440566d78e58b1395cd831 Mon Sep 17 00:00:00 2001 From: larry-the-table-guy <180724454+larry-the-table-guy@users.noreply.github.com> Date: Sat, 16 Nov 2024 11:37:50 -0500 Subject: [PATCH 3/4] Add bench case for search w/ no matches Also fix 2 clippy warnings for redundant refs --- src/search/mod.rs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/search/mod.rs b/src/search/mod.rs index 218bd3d..b963cab 100644 --- a/src/search/mod.rs +++ b/src/search/mod.rs @@ -41,8 +41,8 @@ impl LogParser<()> for BattleSearcher { let raw_json = std::str::from_utf8(&buf[..last_field_sep + 1]).ok()?; // now we have valid JSON that probably contains p1, p2. // Let's check 'em! - let p1id = to_id(gjson::get(&raw_json, "p1").str()); - let p2id = to_id(gjson::get(&raw_json, "p2").str()); + let p1id = to_id(gjson::get(raw_json, "p1").str()); + let p2id = to_id(gjson::get(raw_json, "p2").str()); if p1id != self.user_id && p2id != self.user_id { // Searched user is not a player in the battle. return Some(()); @@ -192,4 +192,16 @@ mod unit_tests { .unwrap() }); } + + #[bench] + fn bench_handle_directory_1k_absent(b: &mut Bencher) { + build_test_dir(1_000).unwrap(); + + let mut searcher = BattleSearcher::new("Nobody", false, false); + b.iter(|| { + searcher + .handle_directories(vec![TEST_ROOT_DIR.clone()], None) + .unwrap() + }); + } } From b80f0401ab4917e342a1f5b463416a2fec2d482d Mon Sep 17 00:00:00 2001 From: larry-the-table-guy <180724454+larry-the-table-guy@users.noreply.github.com> Date: Sat, 16 Nov 2024 11:41:26 -0500 Subject: [PATCH 4/4] Fix clippy lint for needless return In its infinite wisdom, clippy won't let me push my changes until this unrelated lint is fixed. --- src/anonymize/anonymizer.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/anonymize/anonymizer.rs b/src/anonymize/anonymizer.rs index 500d206..50873ed 100644 --- a/src/anonymize/anonymizer.rs +++ b/src/anonymize/anonymizer.rs @@ -261,7 +261,7 @@ impl Anonymizer { ); } - return Some( + Some( p2regex .replace_all( p1regex @@ -270,7 +270,7 @@ impl Anonymizer { &p2_anon as &str, ) .to_string(), - ); + ) }) .collect::>(), );