Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Proof-of-Concept: Fast path for search #9

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/anonymize/anonymizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ impl Anonymizer {
);
}

return Some(
Some(
p2regex
.replace_all(
p1regex
Expand All @@ -270,7 +270,7 @@ impl Anonymizer {
&p2_anon as &str,
)
.to_string(),
);
)
})
.collect::<Vec<_>>(),
);
Expand Down
12 changes: 12 additions & 0 deletions src/directory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,15 @@ use crate::BattleToolsError;

/// Anything that wants to parse logs should implement this
pub trait LogParser<R> {
/// Optional fast path for log handling. Passes the path instead of the JSON because some
/// usecases don't need the entire file.
/// Returns None if no decision could be made (need to run LogParser::handle_log_file),
/// Some(R) if a result was produced early.
/// Errors are funneled into None so that error handling code isn't duplicated (it will be
/// handled by the later handle_log_file call).
fn fast_handle_log_file(&self, _file_path: &Path) -> Option<R> {
None
}
/// Parses an individual log file's JSON
fn handle_log_file(&self, raw_json: String, file_path: &Path) -> Result<R, BattleToolsError>;
/// Parses the results from an entire directory.
Expand Down Expand Up @@ -82,6 +91,9 @@ where
}

let path = entry.path();
if let Some(res) = self.fast_handle_log_file(&path) {
return Some(res);
};
let raw_json = match fs::read_to_string(entry.path()) {
Ok(s) => s,
Err(e) => {
Expand Down
39 changes: 39 additions & 0 deletions src/search/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,33 @@ impl BattleSearcher {
}

impl LogParser<()> for BattleSearcher {
fn fast_handle_log_file(&self, path: &Path) -> Option<()> {
use std::io::Read;
// We expect the p1, p2 fields to be very early in the file and to almost never match
// self.user_id. So, we can do a very cheap pass to reject most cases.
// We turn errors into None to avoid duplicating error handling, and it's fine perf-wise
// because these errors won't be common.
let mut buf = [0u8; 256];
let mut f = std::fs::File::open(path).ok()?;
f.read_exact(&mut buf).ok()?;
// bump ptr back until ascii
let end_idx = buf.iter().rposition(|b| b.is_ascii())?;
let file_snippet = std::str::from_utf8(&buf[..end_idx]).ok()?;
// Assumes minified JSON, and p1team appearing after p1, p2
let last_field_sep = file_snippet.rfind(",\"p1team\"")?;
buf[last_field_sep] = b'}';
let raw_json = std::str::from_utf8(&buf[..last_field_sep + 1]).ok()?;
// now we have valid JSON that probably contains p1, p2.
// Let's check 'em!
let p1id = to_id(gjson::get(raw_json, "p1").str());
let p2id = to_id(gjson::get(raw_json, "p2").str());
if p1id != self.user_id && p2id != self.user_id {
// Searched user is not a player in the battle.
return Some(());
}
None
}

fn handle_log_file(&self, raw_json: String, path: &Path) -> Result<(), BattleToolsError> {
let date = match path.parent() {
Some(p) => p
Expand Down Expand Up @@ -165,4 +192,16 @@ mod unit_tests {
.unwrap()
});
}

#[bench]
fn bench_handle_directory_1k_absent(b: &mut Bencher) {
build_test_dir(1_000).unwrap();

let mut searcher = BattleSearcher::new("Nobody", false, false);
b.iter(|| {
searcher
.handle_directories(vec![TEST_ROOT_DIR.clone()], None)
.unwrap()
});
}
}