Skip to content

Commit

Permalink
Construct Tree Sitter queries only once (#18)
Browse files Browse the repository at this point in the history
* refactor: re-add main.rs and release-with-debug profile

This makes it easier to profile the program with a profiler (e.g. cargo
flamegraph), even if the Rust main won't be distributed.

* perf: create TS queries only once

Creating the queries isn't free, and when we're parsing lots of files in
directoy, it becomes a hotspot.
  • Loading branch information
oyarsa authored Jan 16, 2025
1 parent 473202a commit e4f3793
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 23 deletions.
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,7 @@ streaming-iterator = "0.1.9"

[features]
extension-module = ["pyo3/extension-module"]

[profile.release-with-debug]
inherits = "release"
debug = true
47 changes: 27 additions & 20 deletions src/input.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use anyhow::Result;
use std::fs;
use std::path::PathBuf;
use std::sync::LazyLock;
use walkdir::WalkDir;

#[derive(Debug, Clone, PartialEq)]
Expand All @@ -22,15 +23,34 @@ impl TryFrom<&PathBuf> for FileType {
}
}

static TS_QUERY_PYTHON: LazyLock<tree_sitter::Query> = LazyLock::new(|| {
tree_sitter::Query::new(&tree_sitter_python::LANGUAGE.into(), "(comment) @comment")
.expect("Query must be valid")
});

static TS_QUERY_RUST: LazyLock<tree_sitter::Query> = LazyLock::new(|| {
tree_sitter::Query::new(
&tree_sitter_rust::LANGUAGE.into(),
"(line_comment) @comment
(block_comment) @comment",
)
.expect("Query must be valid")
});

static TS_QUERY_JAVASCRIPT: LazyLock<tree_sitter::Query> = LazyLock::new(|| {
tree_sitter::Query::new(
&tree_sitter_javascript::LANGUAGE.into(),
"(comment) @comment",
)
.expect("Query must be valid")
});

impl FileType {
pub fn tree_sitter_query(&self) -> &'static str {
pub fn tree_sitter_query(&self) -> &'static tree_sitter::Query {
match self {
FileType::Python => "(comment) @comment",
FileType::Rust => {
"(line_comment) @comment
(block_comment) @comment"
}
FileType::JavaScript => "(comment) @comment",
FileType::Python => &TS_QUERY_PYTHON,
FileType::Rust => &TS_QUERY_RUST,
FileType::JavaScript => &TS_QUERY_JAVASCRIPT,
}
}

Expand Down Expand Up @@ -88,17 +108,4 @@ mod tests {
);
assert!(determine_file_type(&PathBuf::from("test.txt")).is_err());
}

#[test]
fn test_queries_are_valid() {
for file_type in [FileType::Python, FileType::Rust, FileType::JavaScript] {
let language = file_type.tree_sitter_language();
let query = file_type.tree_sitter_query();
assert!(
tree_sitter::Query::new(&language, query).is_ok(),
"Testing query from {:?}.",
file_type
);
}
}
}
7 changes: 7 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
use anyhow::Result;

fn main() -> Result<()> {
let args: Vec<String> = std::env::args().collect();
anot::cli::run(args)?;
Ok(())
}
5 changes: 2 additions & 3 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,16 @@ pub fn extract_annotations(
let mut parser = tree_sitter::Parser::new();
let language = file_type.tree_sitter_language();
parser.set_language(&language)?;
let query = file_type.tree_sitter_query();

// Parse the full source code
let source = source_code.as_bytes();
let tree = parser
.parse(source, None)
.ok_or_else(|| anyhow::anyhow!("Failed to parse source code"))?;

let query = tree_sitter::Query::new(&language, query)?;
let query = file_type.tree_sitter_query();
let mut query_cursor = tree_sitter::QueryCursor::new();
let mut matches = query_cursor.matches(&query, tree.root_node(), source);
let mut matches = query_cursor.matches(query, tree.root_node(), source);

let mut annotations = Vec::new();

Expand Down

0 comments on commit e4f3793

Please sign in to comment.