Skip to content

Commit

Permalink
More popular languages to source loader
Browse files Browse the repository at this point in the history
- Support Scala, Java, Kotlin, and CSharp
- Use cpp parser to parse .h file
  • Loading branch information
quangIO committed Jan 10, 2025
1 parent a244a77 commit c1433de
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 19 deletions.
20 changes: 14 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,16 @@ glob = "0.3.1"
strum_macros = "0.26.2"
async-recursion = "1.1.0"
tree-sitter = { version = "0.24", optional = true }
tree-sitter-rust = { version = "0.23", optional = true }
tree-sitter-cpp = { version = "0.23", optional = true }
tree-sitter-javascript = { version = "0.23", optional = true }
tree-sitter-c = { version = "0.23", optional = true }
tree-sitter-c-sharp = { version = "0.23", optional = true }
tree-sitter-cpp = { version = "0.23", optional = true }
tree-sitter-go = { version = "0.23", optional = true }
tree-sitter-java = { version = "0.23", optional = true }
tree-sitter-javascript = { version = "0.23", optional = true }
tree-sitter-kotlin-ng = { version = "1.1", optional = true }
tree-sitter-python = { version = "0.23", optional = true }
tree-sitter-rust = { version = "0.23", optional = true }
tree-sitter-scala = { version = "0.23", optional = true }
tree-sitter-typescript = { version = "0.23", optional = true }
qdrant-client = { version = "1.10.1", optional = true }
ollama-rs = { version = "0.2.0", optional = true, features = [
Expand Down Expand Up @@ -100,12 +104,16 @@ surrealdb = ["dep:surrealdb"]
tree-sitter = [
"cc",
"dep:tree-sitter",
"dep:tree-sitter-rust",
"dep:tree-sitter-cpp",
"dep:tree-sitter-javascript",
"dep:tree-sitter-c",
"dep:tree-sitter-c-sharp",
"dep:tree-sitter-cpp",
"dep:tree-sitter-go",
"dep:tree-sitter-java",
"dep:tree-sitter-javascript",
"dep:tree-sitter-kotlin-ng",
"dep:tree-sitter-python",
"dep:tree-sitter-rust",
"dep:tree-sitter-scala",
"dep:tree-sitter-typescript",
]

Expand Down
35 changes: 22 additions & 13 deletions src/document_loaders/source_code_loader/language_parsers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,17 @@ use tree_sitter::{Parser, Tree};

#[derive(Display, Debug, Clone)]
pub enum Language {
Rust,
C,
CSharp,
Cpp,
Javascript,
Typescript,
Go,
Java,
Javascript,
Kotlin,
Python,
Rust,
Scala,
Typescript,
}

pub enum LanguageContentTypes {
Expand Down Expand Up @@ -72,30 +76,35 @@ impl Clone for LanguageParser {
pub fn get_language_by_filename(name: &String) -> Language {
let extension = name.split('.').last().unwrap();
match extension.to_lowercase().as_str() {
"rs" => Language::Rust,
"c" => Language::C,
"cpp" => Language::Cpp,
"h" => Language::C,
"hpp" => Language::Cpp,
"js" => Language::Javascript,
"ts" => Language::Typescript,
"tsx" => Language::Typescript,
"cs" => Language::CSharp,
"cc" | "cpp" | ".h" | "hpp" => Language::Cpp,
"go" => Language::Go,
"java" => Language::Java,
"js" => Language::Javascript,
"kt" => Language::Kotlin,
"py" => Language::Python,
"rs" => Language::Rust,
"scala" | "sc" => Language::Scala,
"ts" | "tsx" => Language::Typescript,
_ => panic!("Unsupported language"),
}
}

fn get_language_parser(language: &Language) -> Parser {
let mut parser = Parser::new();
let lang = match language {
Language::Rust => tree_sitter_rust::LANGUAGE,
Language::C => tree_sitter_c::LANGUAGE,
Language::CSharp => tree_sitter_c_sharp::LANGUAGE,
Language::Cpp => tree_sitter_cpp::LANGUAGE,
Language::Javascript => tree_sitter_javascript::LANGUAGE,
Language::Typescript => tree_sitter_typescript::LANGUAGE_TSX,
Language::Go => tree_sitter_go::LANGUAGE,
Language::Java => tree_sitter_java::LANGUAGE,
Language::Javascript => tree_sitter_javascript::LANGUAGE,
Language::Kotlin => tree_sitter_kotlin_ng::LANGUAGE,
Language::Python => tree_sitter_python::LANGUAGE,
Language::Rust => tree_sitter_rust::LANGUAGE,
Language::Scala => tree_sitter_scala::LANGUAGE,
Language::Typescript => tree_sitter_typescript::LANGUAGE_TSX,
};
parser
.set_language(&lang.into())
Expand Down

0 comments on commit c1433de

Please sign in to comment.