diff --git a/src/anonymize/anonymizer.rs b/src/anonymize/anonymizer.rs index 500d206..50873ed 100644 --- a/src/anonymize/anonymizer.rs +++ b/src/anonymize/anonymizer.rs @@ -261,7 +261,7 @@ impl Anonymizer { ); } - return Some( + Some( p2regex .replace_all( p1regex @@ -270,7 +270,7 @@ impl Anonymizer { &p2_anon as &str, ) .to_string(), - ); + ) }) .collect::>(), ); diff --git a/src/directory.rs b/src/directory.rs index 8488e51..7ecd102 100644 --- a/src/directory.rs +++ b/src/directory.rs @@ -12,6 +12,15 @@ use crate::BattleToolsError; /// Anything that wants to parse logs should implement this pub trait LogParser { + /// Optional fast path for log handling. Passes the path instead of the JSON because some + /// usecases don't need the entire file. + /// Returns None if no decision could be made (need to run LogParser::handle_log_file), + /// Some(R) if a result was produced early. + /// Errors are funneled into None so that error handling code isn't duplicated (it will be + /// handled by the later handle_log_file call). + fn fast_handle_log_file(&self, _file_path: &Path) -> Option { + None + } /// Parses an individual log file's JSON fn handle_log_file(&self, raw_json: String, file_path: &Path) -> Result; /// Parses the results from an entire directory. @@ -82,6 +91,9 @@ where } let path = entry.path(); + if let Some(res) = self.fast_handle_log_file(&path) { + return Some(res); + }; let raw_json = match fs::read_to_string(entry.path()) { Ok(s) => s, Err(e) => { diff --git a/src/search/mod.rs b/src/search/mod.rs index cf526ab..b963cab 100644 --- a/src/search/mod.rs +++ b/src/search/mod.rs @@ -23,6 +23,33 @@ impl BattleSearcher { } impl LogParser<()> for BattleSearcher { + fn fast_handle_log_file(&self, path: &Path) -> Option<()> { + use std::io::Read; + // We expect the p1, p2 fields to be very early in the file and to almost never match + // self.user_id. So, we can do a very cheap pass to reject most cases. + // We turn errors into None to avoid duplicating error handling, and it's fine perf-wise + // because these errors won't be common. + let mut buf = [0u8; 256]; + let mut f = std::fs::File::open(path).ok()?; + f.read_exact(&mut buf).ok()?; + // bump ptr back until ascii + let end_idx = buf.iter().rposition(|b| b.is_ascii())?; + let file_snippet = std::str::from_utf8(&buf[..end_idx]).ok()?; + // Assumes minified JSON, and p1team appearing after p1, p2 + let last_field_sep = file_snippet.rfind(",\"p1team\"")?; + buf[last_field_sep] = b'}'; + let raw_json = std::str::from_utf8(&buf[..last_field_sep + 1]).ok()?; + // now we have valid JSON that probably contains p1, p2. + // Let's check 'em! + let p1id = to_id(gjson::get(raw_json, "p1").str()); + let p2id = to_id(gjson::get(raw_json, "p2").str()); + if p1id != self.user_id && p2id != self.user_id { + // Searched user is not a player in the battle. + return Some(()); + } + None + } + fn handle_log_file(&self, raw_json: String, path: &Path) -> Result<(), BattleToolsError> { let date = match path.parent() { Some(p) => p @@ -165,4 +192,16 @@ mod unit_tests { .unwrap() }); } + + #[bench] + fn bench_handle_directory_1k_absent(b: &mut Bencher) { + build_test_dir(1_000).unwrap(); + + let mut searcher = BattleSearcher::new("Nobody", false, false); + b.iter(|| { + searcher + .handle_directories(vec![TEST_ROOT_DIR.clone()], None) + .unwrap() + }); + } }