From 515ad1dcfb8eb0a5d22b223ba76d0b941249df07 Mon Sep 17 00:00:00 2001 From: Nicolas Luck Date: Mon, 30 Oct 2023 11:49:41 +0100 Subject: [PATCH 01/14] LLM crate based Eve in CLI experiment --- cli/Cargo.toml | 2 + cli/src/main.rs | 191 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 192 insertions(+), 1 deletion(-) diff --git a/cli/Cargo.toml b/cli/Cargo.toml index a4d0e125b..1fce68b08 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -34,3 +34,5 @@ serde = { version = "1.0", features = ["derive"] } colour = "0.7.0" syntect = "5.0" ctrlc = "3.4.0" + +llm = "0.1.1" diff --git a/cli/src/main.rs b/cli/src/main.rs index 3f18a9480..e4bde8856 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -7,6 +7,7 @@ extern crate rand; extern crate regex; extern crate rustyline; extern crate tokio; +extern crate llm; mod formatting; mod startup; @@ -149,7 +150,8 @@ enum Domain { #[arg(long, action)] admin_credential: Option }, - RunLocalHcServices {} + RunLocalHcServices {}, + Eve, } async fn get_ad4m_client(args: &ClapApp) -> Result { @@ -303,6 +305,193 @@ async fn main() -> Result<()> { admin_credential: _ } => unreachable!(), Domain::RunLocalHcServices {} => unreachable!(), + Domain::Eve => { + + let training = r##" +system: +You are Eve, the personal assistant AI in Adam. +You are mainly there to help create AD4M Subject classes (in Prolog) from user input. +You'll respond to the user directly and mark Subject class code with "SDNA:" at the start and ":ANDS" at the end, +so that Perspect3ve can parse that out. +Make sure that each new Subject class definition uses a new random identifier +(2nd parameter in "subject_class", 1st parameter in the other predicates), +but ensure the same identifier used throughout all predicate occurrences of the same class definition. +When the user signals that the code is complete, +provide the the latest version to Perspect3ve to be written to the perspective's social DNA +by wrapping the code to commit with "WRITE-SDNA:" and ":ANDS-ETIRW"` + +Eve: +Hi, I'm Eve. I'm your personal assistant AI. I'm here to help you create AD4M Subject classes. + +User: +Create an ADAM Subject class called "Todo". It should have a "state" and a "title" property. The title should resolve to a string applying the literal language. It also should have a "comments" collection.` + +Eve: +Sure. I have made-up a "todo" ADAM Language to use in the predicates of this Subject class definition. + +SDNA: +subject_class("Todo", cjdotdng). +constructor(cjdotdng, '[{action: "addLink", source: "this", predicate: "todo://state", target: "todo://ready"}]'). +instance(cjdotdng, Base) :- triple(Base, "todo://state", _). + +property(cjdotdng, "state"). +property_getter(cjdotdng, Base, "state", Value) :- triple(Base, "todo://state", Value). +property_setter(cjdotdng, "state", '[{action: "setSingleTarget", source: "this", predicate: "todo://state", target: "value"}]'). + +property(cjdotdng, "title"). +property_resolve(cjdotdng, "title"). +property_resolve_language(cjdotdng, "title", "literal"). +property_getter(cjdotdng, Base, "title", Value) :- triple(Base, "todo://has_title", Value). +property_setter(cjdotdng, "title", '[{action: "setSingleTarget", source: "this", predicate: "todo://has_title", target: "value"}]'). + +collection(cjdotdng, "comments"). +collection_getter(cjdotdng, Base, "comments", List) :- findall(C, triple(Base, "todo://comment", C), List). +collection_adder(cjdotdng, "comments", '[{action: "addLink", source: "this", predicate: "todo://comment", target: "value"}]'). +collection_setter(cjdotdng, "comments", '[{action: "collectionSetter", source: "this", predicate: "todo://comment", target: "value"}]'). +:ANDS + +What icon should it have in the toolbar? (You can choose from the "Bootstrap" icon set)` + +User: The "check" icon. Render todos as circles with the color depending on the state. Also, please add options for the state property, being Ready, Doing and Done. + +Eve: +Ok, I have this now: + +SDNA: +subject_class("Todo", cjdotdng). +constructor(cjdotdng, '[{action: "addLink", source: "this", predicate: "todo://state", target: "todo://ready"}]'). +instance(cjdotdng, Base) :- triple(Base, "todo://state", _). + +property(cjdotdng, "state"). +property_getter(cjdotdng, Base, "state", Value) :- triple(Base, "todo://state", Value). +property_setter(cjdotdng, "state", '[{action: "setSingleTarget", source: "this", predicate: "todo://state", target: "value"}]'). +property_named_option(cjdotdng, "state", "todo://ready", "Ready"). +property_named_option(cjdotdng, "state", "todo://doing", "Doing"). +property_named_option(cjdotdng, "state", "todo://done", "Done"). + +property(cjdotdng, "title"). +property_resolve(cjdotdng, "title"). +property_resolve_language(cjdotdng, "title", "literal"). +property_getter(cjdotdng, Base, "title", Value) :- triple(Base, "todo://has_title", Value). +property_setter(cjdotdng, "title", '[{action: "setSingleTarget", source: "this", predicate: "todo://has_title", target: "value"}]'). + +collection(cjdotdng, "comments"). +collection_getter(cjdotdng, Base, "comments", List) :- findall(C, triple(Base, "todo://comment", C), List). +collection_adder(cjdotdng, "comments", '[{action: "addLink", source: "this", predicate: "todo://comment", target: "value"}]'). +collection_setter(cjdotdng, "comments", '[{action: "collectionSetter", source: "this", predicate: "todo://comment", target: "value"}]'). + +p3_class_icon(cjdotdng, "check2-square"). +p3_instance_shape(cjdotdng, Instance, "circle"). +p3_instance_color(cjdotdng, Instance, "#000055") :- property_getter(c, Instance, "state", "todo://ready"). +p3_instance_color(cjdotdng, Instance, "#FFFF00") :- property_getter(c, Instance, "state", "todo://doing"). +p3_instance_color(cjdotdng, Instance, "#00FF00") :- property_getter(c, Instance, "state", "todo://done"). +:ANDS + +Anything else?` + +User: +No, that's it. Please write this to the SDNA.` + + +Eve: +Ok... +WRITE-SDNA: +subject_class("Todo", cjdotdng). +constructor(cjdotdng, '[{action: "addLink", source: "this", predicate: "todo://state", target: "todo://ready"}]'). +instance(cjdotdng, Base) :- triple(Base, "todo://state", _). + +property(cjdotdng, "state"). +property_getter(cjdotdng, Base, "state", Value) :- triple(Base, "todo://state", Value). +property_setter(cjdotdng, "state", '[{action: "setSingleTarget", source: "this", predicate: "todo://state", target: "value"}]'). +property_named_option(cjdotdng, "state", "todo://ready", "Ready"). +property_named_option(cjdotdng, "state", "todo://doing", "Doing"). +property_named_option(cjdotdng, "state", "todo://done", "Done"). + +property(cjdotdng, "title"). +property_resolve(cjdotdng, "title"). +property_resolve_language(cjdotdng, "title", "literal"). +property_getter(cjdotdng, Base, "title", Value) :- triple(Base, "todo://has_title", Value). +property_setter(cjdotdng, "title", '[{action: "setSingleTarget", source: "this", predicate: "todo://has_title", target: "value"}]'). + +collection(cjdotdng, "comments"). +collection_getter(cjdotdng, Base, "comments", List) :- findall(C, triple(Base, "todo://comment", C), List). +collection_adder(cjdotdng, "comments", '[{action: "addLink", source: "this", predicate: "todo://comment", target: "value"}]'). +collection_setter(cjdotdng, "comments", '[{action: "collectionSetter", source: "this", predicate: "todo://comment", target: "value"}]'). + +p3_class_icon(cjdotdng, "check2-square"). +p3_instance_shape(cjdotdng, Instance, "circle"). +p3_instance_color(cjdotdng, Instance, "#000055") :- property_getter(c, Instance, "state", "todo://ready"). +p3_instance_color(cjdotdng, Instance, "#FFFF00") :- property_getter(c, Instance, "state", "todo://doing"). +p3_instance_color(cjdotdng, Instance, "#00FF00") :- property_getter(c, Instance, "state", "todo://done"). +:ANDS-ETIRW + +Done. +"##; + + + use std::io::Write; + use llm::Model; + + println!("Loading model..."); + + // load a GGML model from disk + let llama = llm::load::( + // path to GGML file + std::path::Path::new("/Users/nicolasluck/models/eve.model"), + // llm::ModelParameters + Default::default(), + // load progress callback + |_| {}, + ) + .unwrap_or_else(|err| panic!("Failed to load model: {err}")); + + println!("Model loaded!"); + + /* + let model = llm::load_dynamic( + llm::ModelArchitecture::Llama, + &std::path::Path::new("/Users/nicolasluck/eve.model"), + Default::default(), + llm::load_progress_callback_stdout, + ).unwrap_or_else(|err| panic!("Failed to load model: {err}")); + */ + + let mut rl = rustyline::Editor::<()>::new()?; + let prompt = rl.readline(">> ")?; + println!("\n\n") + + let prompt = format!("User:\n{}\n\nEve:\n", prompt); + + + // use the model to generate text from a prompt + let mut session = llama.start_session(Default::default()); + let res = session.infer::( + // model to use for text generation + &llama, + // randomness provider + &mut rand::thread_rng(), + // the prompt to use for text generation, as well as other + // inference parameters + &llm::InferenceRequest { + prompt: prompt.as_str(), + ..Default::default() + }, + // llm::OutputRequest + &mut Default::default(), + // output callback + |t| { + print!("{t}"); + std::io::stdout().flush().unwrap(); + + Ok(()) + } + ); + + match res { + Ok(result) => println!("\n\nInference stats:\n{result}"), + Err(err) => println!("\n{err}"), + } + } } Ok(()) From 809564a184e3383829369fd171fd11862553ea98 Mon Sep 17 00:00:00 2001 From: Nicolas Luck Date: Mon, 30 Oct 2023 12:31:37 +0100 Subject: [PATCH 02/14] Reduced training prompt fits in context window --- cli/src/main.rs | 79 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 73 insertions(+), 6 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index e4bde8856..d06ea97da 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -350,7 +350,7 @@ collection_adder(cjdotdng, "comments", '[{action: "addLink", source: "this", pre collection_setter(cjdotdng, "comments", '[{action: "collectionSetter", source: "this", predicate: "todo://comment", target: "value"}]'). :ANDS -What icon should it have in the toolbar? (You can choose from the "Bootstrap" icon set)` +What icon should it have in the toolbar? (You can choose from the "Bootstrap" icon set) User: The "check" icon. Render todos as circles with the color depending on the state. Also, please add options for the state property, being Ready, Doing and Done. @@ -429,6 +429,53 @@ Done. "##; +let reduced = r##" +system: +You are Eve, the personal assistant AI in Adam. +You are mainly there to help create AD4M Subject classes (in Prolog) from user input. +You'll respond to the user directly and mark Subject class code with "SDNA:" at the start and ":ANDS" at the end, +so that Perspect3ve can parse that out. +Make sure that each new Subject class definition uses a new random identifier +(2nd parameter in "subject_class", 1st parameter in the other predicates), +but ensure the same identifier used throughout all predicate occurrences of the same class definition. +When the user signals that the code is complete, +provide the the latest version to Perspect3ve to be written to the perspective's social DNA +by wrapping the code to commit with "WRITE-SDNA:" and ":ANDS-ETIRW"` + +Eve: +Hi, I'm Eve. I'm your personal assistant AI. I'm here to help you create AD4M Subject classes. + +User: +Create an ADAM Subject class called "Todo". It should have a "state" and a "title" property. The title should resolve to a string applying the literal language. It also should have a "comments" collection.` + +Eve: +Sure. I have made-up a "todo" ADAM Language to use in the predicates of this Subject class definition. + +SDNA: +subject_class("Todo", cjdotdng). +constructor(cjdotdng, '[{action: "addLink", source: "this", predicate: "todo://state", target: "todo://ready"}]'). +instance(cjdotdng, Base) :- triple(Base, "todo://state", _). + +property(cjdotdng, "state"). +property_getter(cjdotdng, Base, "state", Value) :- triple(Base, "todo://state", Value). +property_setter(cjdotdng, "state", '[{action: "setSingleTarget", source: "this", predicate: "todo://state", target: "value"}]'). + +property(cjdotdng, "title"). +property_resolve(cjdotdng, "title"). +property_resolve_language(cjdotdng, "title", "literal"). +property_getter(cjdotdng, Base, "title", Value) :- triple(Base, "todo://has_title", Value). +property_setter(cjdotdng, "title", '[{action: "setSingleTarget", source: "this", predicate: "todo://has_title", target: "value"}]'). + +collection(cjdotdng, "comments"). +collection_getter(cjdotdng, Base, "comments", List) :- findall(C, triple(Base, "todo://comment", C), List). +collection_adder(cjdotdng, "comments", '[{action: "addLink", source: "this", predicate: "todo://comment", target: "value"}]'). +collection_setter(cjdotdng, "comments", '[{action: "collectionSetter", source: "this", predicate: "todo://comment", target: "value"}]'). +:ANDS + +What icon should it have in the toolbar? (You can choose from the "Bootstrap" icon set) +"##; + + use std::io::Write; use llm::Model; @@ -456,15 +503,35 @@ Done. ).unwrap_or_else(|err| panic!("Failed to load model: {err}")); */ - let mut rl = rustyline::Editor::<()>::new()?; - let prompt = rl.readline(">> ")?; - println!("\n\n") - - let prompt = format!("User:\n{}\n\nEve:\n", prompt); + println!("Ad hoc training model for ADAM subject classes..."); + fn print_token(t: String) { + print!("{t}"); + std::io::stdout().flush().unwrap(); + } + // use the model to generate text from a prompt let mut session = llama.start_session(Default::default()); + session.feed_prompt( + &llama, + &Default::default(), + reduced, + &mut llm::OutputRequest::default(), + |t| { + print!("."); + Ok::<(), std::io::Error>(()) + } + ).unwrap_or_else(|err| panic!("Failed to feed prompt: {err}")); + + println!("Training done. Ready!"); + + let mut rl = rustyline::Editor::<()>::new()?; + let prompt = rl.readline(">> ")?; + println!("\n\n"); + + let prompt = format!("User:\n{}\n\nEve:\n", prompt); + let res = session.infer::( // model to use for text generation &llama, From ec7d19f1b5ccbb29a669f893a2a4734879fa3ddb Mon Sep 17 00:00:00 2001 From: Nicolas Luck Date: Thu, 2 Nov 2023 13:26:59 +0100 Subject: [PATCH 03/14] Update llm crate to master branch for ggml v3 models --- cli/Cargo.toml | 2 +- cli/src/main.rs | 74 +++++++++++++++++++++++++++++++++++++------------ 2 files changed, 57 insertions(+), 19 deletions(-) diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 1fce68b08..cf02293a7 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -35,4 +35,4 @@ colour = "0.7.0" syntect = "5.0" ctrlc = "3.4.0" -llm = "0.1.1" +llm = { git = "https://github.com/rustformers/llm" } diff --git a/cli/src/main.rs b/cli/src/main.rs index d06ea97da..74094f9a4 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -478,30 +478,43 @@ What icon should it have in the toolbar? (You can choose from the "Bootstrap" ic use std::io::Write; use llm::Model; + use llm::InferenceResponse; + use std::convert::Infallible; println!("Loading model..."); + // load a GGML model from disk let llama = llm::load::( // path to GGML file std::path::Path::new("/Users/nicolasluck/models/eve.model"), + llm::TokenizerSource::Embedded, // llm::ModelParameters Default::default(), // load progress callback |_| {}, + ) .unwrap_or_else(|err| panic!("Failed to load model: {err}")); println!("Model loaded!"); /* - let model = llm::load_dynamic( - llm::ModelArchitecture::Llama, + + + let llama = llm::load_dynamic( + Some(llm::ModelArchitecture::Llama), &std::path::Path::new("/Users/nicolasluck/eve.model"), + llm::TokenizerSource::Embedded, Default::default(), llm::load_progress_callback_stdout, ).unwrap_or_else(|err| panic!("Failed to load model: {err}")); - */ + + println!("Model loaded!"); + */ + + let mut session = llama.start_session(Default::default()); + println!("Ad hoc training model for ADAM subject classes..."); @@ -510,27 +523,43 @@ What icon should it have in the toolbar? (You can choose from the "Bootstrap" ic print!("{t}"); std::io::stdout().flush().unwrap(); } + + let character_name = "### Assistant"; + let user_name = "### Human"; + let persona = "A chat between a human and an assistant."; + let history = format!( + "{character_name}: Hello - How may I help you today?\n\ + {user_name}: What is the capital of France?\n\ + {character_name}: Paris is the capital of France." + ); + + let inference_parameters = llm::InferenceParameters::default(); + // use the model to generate text from a prompt - let mut session = llama.start_session(Default::default()); + session.feed_prompt( - &llama, - &Default::default(), - reduced, - &mut llm::OutputRequest::default(), - |t| { - print!("."); - Ok::<(), std::io::Error>(()) - } + &llama, + format!("{persona}\n{history}").as_str(), + &mut Default::default(), + llm::feed_prompt_callback(|resp| match resp { + llm::InferenceResponse::PromptToken(t) + | llm::InferenceResponse::InferredToken(t) => { + print_token(t); + + Ok::(llm::InferenceFeedback::Continue) + } + _ => Ok(llm::InferenceFeedback::Continue), + }), ).unwrap_or_else(|err| panic!("Failed to feed prompt: {err}")); println!("Training done. Ready!"); let mut rl = rustyline::Editor::<()>::new()?; - let prompt = rl.readline(">> ")?; + let line = rl.readline(">> ")?; println!("\n\n"); - let prompt = format!("User:\n{}\n\nEve:\n", prompt); + let prompt = format!("User:\n{}\n\nEve:\n", line); let res = session.infer::( // model to use for text generation @@ -540,17 +569,26 @@ What icon should it have in the toolbar? (You can choose from the "Bootstrap" ic // the prompt to use for text generation, as well as other // inference parameters &llm::InferenceRequest { - prompt: prompt.as_str(), - ..Default::default() + prompt: format!("{user_name}: {line}\n{character_name}:") + .as_str() + .into(), + parameters: &inference_parameters, + play_back_previous_tokens: false, + maximum_token_count: None, }, // llm::OutputRequest &mut Default::default(), // output callback |t| { - print!("{t}"); + match t { + InferenceResponse::PromptToken(t) | InferenceResponse::InferredToken(t) | llm::InferenceResponse::SnapshotToken(t) => { + print_token(t); + } + _ => {} + } std::io::stdout().flush().unwrap(); - Ok(()) + Ok(llm::InferenceFeedback::Continue) } ); From bc558c3236f1dea306f823d8de43672a8ee06ded Mon Sep 17 00:00:00 2001 From: Nicolas Luck Date: Thu, 2 Nov 2023 13:55:34 +0100 Subject: [PATCH 04/14] Reduced training that fits into context window --- cli/src/main.rs | 71 +++++++++++++------------------------------------ 1 file changed, 18 insertions(+), 53 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index 74094f9a4..c24c476a0 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -307,8 +307,8 @@ async fn main() -> Result<()> { Domain::RunLocalHcServices {} => unreachable!(), Domain::Eve => { - let training = r##" -system: + let system = r##" +<> You are Eve, the personal assistant AI in Adam. You are mainly there to help create AD4M Subject classes (in Prolog) from user input. You'll respond to the user directly and mark Subject class code with "SDNA:" at the start and ":ANDS" at the end, @@ -319,15 +319,14 @@ but ensure the same identifier used throughout all predicate occurrences of the When the user signals that the code is complete, provide the the latest version to Perspect3ve to be written to the perspective's social DNA by wrapping the code to commit with "WRITE-SDNA:" and ":ANDS-ETIRW"` +<> +"##; -Eve: -Hi, I'm Eve. I'm your personal assistant AI. I'm here to help you create AD4M Subject classes. - -User: -Create an ADAM Subject class called "Todo". It should have a "state" and a "title" property. The title should resolve to a string applying the literal language. It also should have a "comments" collection.` -Eve: -Sure. I have made-up a "todo" ADAM Language to use in the predicates of this Subject class definition. + let history = r##" +Eve: Hi, I'm Eve. I'm your personal assistant AI. I'm here to help you create AD4M Subject classes. +User: Create an ADAM Subject class called "Todo". It should have a "state" and a "title" property. The title should resolve to a string applying the literal language. It also should have a "comments" collection.` +Eve: Sure. I have made-up a "todo" ADAM Language to use in the predicates of this Subject class definition. SDNA: subject_class("Todo", cjdotdng). @@ -353,9 +352,7 @@ collection_setter(cjdotdng, "comments", '[{action: "collectionSetter", source: " What icon should it have in the toolbar? (You can choose from the "Bootstrap" icon set) User: The "check" icon. Render todos as circles with the color depending on the state. Also, please add options for the state property, being Ready, Doing and Done. - -Eve: -Ok, I have this now: +Eve: Ok, I have this now: SDNA: subject_class("Todo", cjdotdng). @@ -387,14 +384,9 @@ p3_instance_color(cjdotdng, Instance, "#FFFF00") :- property_getter(c, Instance, p3_instance_color(cjdotdng, Instance, "#00FF00") :- property_getter(c, Instance, "state", "todo://done"). :ANDS -Anything else?` - -User: -No, that's it. Please write this to the SDNA.` - - -Eve: -Ok... +Anything else? +User: No, that's it. Please write this to the SDNA.` +Eve: Ok.. WRITE-SDNA: subject_class("Todo", cjdotdng). constructor(cjdotdng, '[{action: "addLink", source: "this", predicate: "todo://state", target: "todo://ready"}]'). @@ -430,26 +422,9 @@ Done. let reduced = r##" -system: -You are Eve, the personal assistant AI in Adam. -You are mainly there to help create AD4M Subject classes (in Prolog) from user input. -You'll respond to the user directly and mark Subject class code with "SDNA:" at the start and ":ANDS" at the end, -so that Perspect3ve can parse that out. -Make sure that each new Subject class definition uses a new random identifier -(2nd parameter in "subject_class", 1st parameter in the other predicates), -but ensure the same identifier used throughout all predicate occurrences of the same class definition. -When the user signals that the code is complete, -provide the the latest version to Perspect3ve to be written to the perspective's social DNA -by wrapping the code to commit with "WRITE-SDNA:" and ":ANDS-ETIRW"` - -Eve: -Hi, I'm Eve. I'm your personal assistant AI. I'm here to help you create AD4M Subject classes. - -User: -Create an ADAM Subject class called "Todo". It should have a "state" and a "title" property. The title should resolve to a string applying the literal language. It also should have a "comments" collection.` - -Eve: -Sure. I have made-up a "todo" ADAM Language to use in the predicates of this Subject class definition. +Eve: Hi, I'm Eve. I'm your personal assistant AI. I'm here to help you create AD4M Subject classes. +User: Create an ADAM Subject class called "Todo". It should have a "state" and a "title" property. The title should resolve to a string applying the literal language. It also should have a "comments" collection.` +Eve: Sure. I have made-up a "todo" ADAM Language to use in the predicates of this Subject class definition. SDNA: subject_class("Todo", cjdotdng). @@ -524,14 +499,6 @@ What icon should it have in the toolbar? (You can choose from the "Bootstrap" ic std::io::stdout().flush().unwrap(); } - let character_name = "### Assistant"; - let user_name = "### Human"; - let persona = "A chat between a human and an assistant."; - let history = format!( - "{character_name}: Hello - How may I help you today?\n\ - {user_name}: What is the capital of France?\n\ - {character_name}: Paris is the capital of France." - ); let inference_parameters = llm::InferenceParameters::default(); @@ -540,7 +507,7 @@ What icon should it have in the toolbar? (You can choose from the "Bootstrap" ic session.feed_prompt( &llama, - format!("{persona}\n{history}").as_str(), + format!("{system}\n{reduced}").as_str(), &mut Default::default(), llm::feed_prompt_callback(|resp| match resp { llm::InferenceResponse::PromptToken(t) @@ -553,13 +520,11 @@ What icon should it have in the toolbar? (You can choose from the "Bootstrap" ic }), ).unwrap_or_else(|err| panic!("Failed to feed prompt: {err}")); - println!("Training done. Ready!"); + println!("\n\nTraining done. Ready!"); let mut rl = rustyline::Editor::<()>::new()?; let line = rl.readline(">> ")?; println!("\n\n"); - - let prompt = format!("User:\n{}\n\nEve:\n", line); let res = session.infer::( // model to use for text generation @@ -569,7 +534,7 @@ What icon should it have in the toolbar? (You can choose from the "Bootstrap" ic // the prompt to use for text generation, as well as other // inference parameters &llm::InferenceRequest { - prompt: format!("{user_name}: {line}\n{character_name}:") + prompt: format!("User: {line}\nEve:") .as_str() .into(), parameters: &inference_parameters, From 34b8bab4c2a4cbd465eb10af25ea72d5fd5bdd1a Mon Sep 17 00:00:00 2001 From: Nicolas Luck Date: Thu, 2 Nov 2023 13:55:41 +0100 Subject: [PATCH 05/14] =?UTF-8?q?Use=20feature=20=E2=80=9Cmetal=E2=80=9D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cli/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/Cargo.toml b/cli/Cargo.toml index cf02293a7..4062d8ad5 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -35,4 +35,4 @@ colour = "0.7.0" syntect = "5.0" ctrlc = "3.4.0" -llm = { git = "https://github.com/rustformers/llm" } +llm = { git = "https://github.com/rustformers/llm", features = ["metal"] } From 8e874ea07bec15408d0d993545645c293c50b2f3 Mon Sep 17 00:00:00 2001 From: Nicolas Luck Date: Thu, 2 Nov 2023 19:54:42 +0100 Subject: [PATCH 06/14] Extract Eve command to eve.rs and switch to cbor --- cli/Cargo.toml | 1 + cli/src/eve.rs | 278 ++++++++++++++++++++++++++++++++++++++++++++++++ cli/src/main.rs | 273 +++-------------------------------------------- 3 files changed, 293 insertions(+), 259 deletions(-) create mode 100644 cli/src/eve.rs diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 4062d8ad5..5e51744b7 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -36,3 +36,4 @@ syntect = "5.0" ctrlc = "3.4.0" llm = { git = "https://github.com/rustformers/llm", features = ["metal"] } +serde_cbor = "0.11" \ No newline at end of file diff --git a/cli/src/eve.rs b/cli/src/eve.rs new file mode 100644 index 000000000..f97330088 --- /dev/null +++ b/cli/src/eve.rs @@ -0,0 +1,278 @@ + +use std::io::Write; +use ad4m_client::perspectives::snapshot; +use llm::Model; +use llm::InferenceResponse; +use std::convert::Infallible; +use std::fs::File; +use llm::InferenceSession; +use anyhow::Result; +use clap::Subcommand; + +#[derive(Debug, Subcommand)] +pub enum EveCommands { + Train, + Prompt, +} + +const SYSTEM: &str = r##" +<> +You are Eve, the personal assistant AI in Adam. +You are mainly there to help create AD4M Subject classes (in Prolog) from user input. +You'll respond to the user directly and mark Subject class code with "SDNA:" at the start and ":ANDS" at the end, +so that Perspect3ve can parse that out. +Make sure that each new Subject class definition uses a new random identifier +(2nd parameter in "subject_class", 1st parameter in the other predicates), +but ensure the same identifier used throughout all predicate occurrences of the same class definition. +When the user signals that the code is complete, +provide the the latest version to Perspect3ve to be written to the perspective's social DNA +by wrapping the code to commit with "WRITE-SDNA:" and ":ANDS-ETIRW"` +<> +"##; + + +const HISTORY: &str = r##" +Eve: Hi, I'm Eve. I'm your personal assistant AI. I'm here to help you create AD4M Subject classes. +User: Create an ADAM Subject class called "Todo". It should have a "state" and a "title" property. The title should resolve to a string applying the literal language. It also should have a "comments" collection.` +Eve: Sure. I have made-up a "todo" ADAM Language to use in the predicates of this Subject class definition. + +SDNA: +subject_class("Todo", cjdotdng). +constructor(cjdotdng, '[{action: "addLink", source: "this", predicate: "todo://state", target: "todo://ready"}]'). +instance(cjdotdng, Base) :- triple(Base, "todo://state", _). + +property(cjdotdng, "state"). +property_getter(cjdotdng, Base, "state", Value) :- triple(Base, "todo://state", Value). +property_setter(cjdotdng, "state", '[{action: "setSingleTarget", source: "this", predicate: "todo://state", target: "value"}]'). + +property(cjdotdng, "title"). +property_resolve(cjdotdng, "title"). +property_resolve_language(cjdotdng, "title", "literal"). +property_getter(cjdotdng, Base, "title", Value) :- triple(Base, "todo://has_title", Value). +property_setter(cjdotdng, "title", '[{action: "setSingleTarget", source: "this", predicate: "todo://has_title", target: "value"}]'). + +collection(cjdotdng, "comments"). +collection_getter(cjdotdng, Base, "comments", List) :- findall(C, triple(Base, "todo://comment", C), List). +collection_adder(cjdotdng, "comments", '[{action: "addLink", source: "this", predicate: "todo://comment", target: "value"}]'). +collection_setter(cjdotdng, "comments", '[{action: "collectionSetter", source: "this", predicate: "todo://comment", target: "value"}]'). +:ANDS + +What icon should it have in the toolbar? (You can choose from the "Bootstrap" icon set) + +User: The "check" icon. Render todos as circles with the color depending on the state. Also, please add options for the state property, being Ready, Doing and Done. +Eve: Ok, I have this now: + +SDNA: +subject_class("Todo", cjdotdng). +constructor(cjdotdng, '[{action: "addLink", source: "this", predicate: "todo://state", target: "todo://ready"}]'). +instance(cjdotdng, Base) :- triple(Base, "todo://state", _). + +property(cjdotdng, "state"). +property_getter(cjdotdng, Base, "state", Value) :- triple(Base, "todo://state", Value). +property_setter(cjdotdng, "state", '[{action: "setSingleTarget", source: "this", predicate: "todo://state", target: "value"}]'). +property_named_option(cjdotdng, "state", "todo://ready", "Ready"). +property_named_option(cjdotdng, "state", "todo://doing", "Doing"). +property_named_option(cjdotdng, "state", "todo://done", "Done"). + +property(cjdotdng, "title"). +property_resolve(cjdotdng, "title"). +property_resolve_language(cjdotdng, "title", "literal"). +property_getter(cjdotdng, Base, "title", Value) :- triple(Base, "todo://has_title", Value). +property_setter(cjdotdng, "title", '[{action: "setSingleTarget", source: "this", predicate: "todo://has_title", target: "value"}]'). + +collection(cjdotdng, "comments"). +collection_getter(cjdotdng, Base, "comments", List) :- findall(C, triple(Base, "todo://comment", C), List). +collection_adder(cjdotdng, "comments", '[{action: "addLink", source: "this", predicate: "todo://comment", target: "value"}]'). +collection_setter(cjdotdng, "comments", '[{action: "collectionSetter", source: "this", predicate: "todo://comment", target: "value"}]'). + +p3_class_icon(cjdotdng, "check2-square"). +p3_instance_shape(cjdotdng, Instance, "circle"). +p3_instance_color(cjdotdng, Instance, "#000055") :- property_getter(c, Instance, "state", "todo://ready"). +p3_instance_color(cjdotdng, Instance, "#FFFF00") :- property_getter(c, Instance, "state", "todo://doing"). +p3_instance_color(cjdotdng, Instance, "#00FF00") :- property_getter(c, Instance, "state", "todo://done"). +:ANDS + +Anything else? +User: No, that's it. Please write this to the SDNA.` +Eve: Ok.. +WRITE-SDNA: +subject_class("Todo", cjdotdng). +constructor(cjdotdng, '[{action: "addLink", source: "this", predicate: "todo://state", target: "todo://ready"}]'). +instance(cjdotdng, Base) :- triple(Base, "todo://state", _). + +property(cjdotdng, "state"). +property_getter(cjdotdng, Base, "state", Value) :- triple(Base, "todo://state", Value). +property_setter(cjdotdng, "state", '[{action: "setSingleTarget", source: "this", predicate: "todo://state", target: "value"}]'). +property_named_option(cjdotdng, "state", "todo://ready", "Ready"). +property_named_option(cjdotdng, "state", "todo://doing", "Doing"). +property_named_option(cjdotdng, "state", "todo://done", "Done"). + +property(cjdotdng, "title"). +property_resolve(cjdotdng, "title"). +property_resolve_language(cjdotdng, "title", "literal"). +property_getter(cjdotdng, Base, "title", Value) :- triple(Base, "todo://has_title", Value). +property_setter(cjdotdng, "title", '[{action: "setSingleTarget", source: "this", predicate: "todo://has_title", target: "value"}]'). + +collection(cjdotdng, "comments"). +collection_getter(cjdotdng, Base, "comments", List) :- findall(C, triple(Base, "todo://comment", C), List). +collection_adder(cjdotdng, "comments", '[{action: "addLink", source: "this", predicate: "todo://comment", target: "value"}]'). +collection_setter(cjdotdng, "comments", '[{action: "collectionSetter", source: "this", predicate: "todo://comment", target: "value"}]'). + +p3_class_icon(cjdotdng, "check2-square"). +p3_instance_shape(cjdotdng, Instance, "circle"). +p3_instance_color(cjdotdng, Instance, "#000055") :- property_getter(c, Instance, "state", "todo://ready"). +p3_instance_color(cjdotdng, Instance, "#FFFF00") :- property_getter(c, Instance, "state", "todo://doing"). +p3_instance_color(cjdotdng, Instance, "#00FF00") :- property_getter(c, Instance, "state", "todo://done"). +:ANDS-ETIRW + +Done. +"##; + + +const REDUCED: &str = r##" +Eve: Hi, I'm Eve. I'm your personal assistant AI. I'm here to help you create AD4M Subject classes. +User: Create an ADAM Subject class called "Todo". It should have a "state" and a "title" property. The title should resolve to a string applying the literal language. It also should have a "comments" collection.` +Eve: Sure. I have made-up a "todo" ADAM Language to use in the predicates of this Subject class definition. + +SDNA: +subject_class("Todo", cjdotdng). +constructor(cjdotdng, '[{action: "addLink", source: "this", predicate: "todo://state", target: "todo://ready"}]'). +instance(cjdotdng, Base) :- triple(Base, "todo://state", _). + +property(cjdotdng, "state"). +property_getter(cjdotdng, Base, "state", Value) :- triple(Base, "todo://state", Value). +property_setter(cjdotdng, "state", '[{action: "setSingleTarget", source: "this", predicate: "todo://state", target: "value"}]'). + +property(cjdotdng, "title"). +property_resolve(cjdotdng, "title"). +property_resolve_language(cjdotdng, "title", "literal"). +property_getter(cjdotdng, Base, "title", Value) :- triple(Base, "todo://has_title", Value). +property_setter(cjdotdng, "title", '[{action: "setSingleTarget", source: "this", predicate: "todo://has_title", target: "value"}]'). + +collection(cjdotdng, "comments"). +collection_getter(cjdotdng, Base, "comments", List) :- findall(C, triple(Base, "todo://comment", C), List). +collection_adder(cjdotdng, "comments", '[{action: "addLink", source: "this", predicate: "todo://comment", target: "value"}]'). +collection_setter(cjdotdng, "comments", '[{action: "collectionSetter", source: "this", predicate: "todo://comment", target: "value"}]'). +:ANDS + +What icon should it have in the toolbar? (You can choose from the "Bootstrap" icon set) +"##; + + +pub async fn run(command: EveCommands) -> Result<()> { + println!("Loading model..."); + + + // load a GGML model from disk + let llama = llm::load::( + // path to GGML file + std::path::Path::new("/Users/nicolasluck/models/eve.model"), + llm::TokenizerSource::Embedded, + // llm::ModelParameters + Default::default(), + // load progress callback + |_| {}, + + ) + .unwrap_or_else(|err| panic!("Failed to load model: {err}")); + + println!("Model loaded!"); + + let inference_parameters = llm::InferenceParameters::default(); + + fn print_token(t: String) { + print!("{t}"); + std::io::stdout().flush().unwrap(); + } + + match command { + EveCommands::Train => { + println!("Training Eve..."); + + + println!("Ad hoc training model for ADAM subject classes..."); + + + // use the model to generate text from a prompt + let mut session = llama.start_session(Default::default()); + + session.feed_prompt( + &llama, + format!("{SYSTEM}\n{REDUCED}").as_str(), + &mut Default::default(), + llm::feed_prompt_callback(|resp| match resp { + llm::InferenceResponse::PromptToken(t) + | llm::InferenceResponse::InferredToken(t) => { + print_token(t); + + Ok::(llm::InferenceFeedback::Continue) + } + _ => Ok(llm::InferenceFeedback::Continue), + }), + ).unwrap_or_else(|err| panic!("Failed to feed prompt: {err}")); + + println!("\n\nTraining done. Ready!"); + + unsafe { + let snapshot_ref: llm::InferenceSnapshotRef<'_> = session.get_snapshot(); + let snapshot = snapshot_ref.to_owned(); + let snapshot_cbor = serde_cbor::to_vec(&snapshot).expect("Failed to serialize snapshot"); + File::create("/Users/nicolasluck/models/eve.snapshot.json").unwrap().write_all(snapshot_cbor.as_slice()).unwrap(); + println!("Snapshot saved!"); + } + } + EveCommands::Prompt => { + println!("Prompting Eve..."); + + let mut session = if let Ok(file) = File::open("/Users/nicolasluck/models/eve.snapshot.json") { + let snapshot: llm::InferenceSnapshot = serde_cbor::from_reader(file).expect("Failed to deserialize snapshot"); + InferenceSession::from_snapshot(snapshot, &llama).unwrap_or_else(|err| panic!("Failed to load snapshot: {err}")) + } else { + llama.start_session(Default::default()) + }; + + let mut rl = rustyline::Editor::<()>::new()?; + let line = rl.readline(">> ")?; + println!("\n\n"); + + let res = session.infer::( + // model to use for text generation + &llama, + // randomness provider + &mut rand::thread_rng(), + // the prompt to use for text generation, as well as other + // inference parameters + &llm::InferenceRequest { + prompt: format!("User: {line}\nEve:") + .as_str() + .into(), + parameters: &inference_parameters, + play_back_previous_tokens: false, + maximum_token_count: None, + }, + // llm::OutputRequest + &mut Default::default(), + // output callback + |t| { + match t { + InferenceResponse::PromptToken(t) | InferenceResponse::InferredToken(t) | llm::InferenceResponse::SnapshotToken(t) => { + print_token(t); + } + _ => {} + } + std::io::stdout().flush().unwrap(); + + Ok(llm::InferenceFeedback::Continue) + } + ); + + match res { + Ok(result) => println!("\n\nInference stats:\n{result}"), + Err(err) => println!("\n{err}"), + } + } + } + Ok(()) +} + + diff --git a/cli/src/main.rs b/cli/src/main.rs index c24c476a0..6c08b46e7 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -22,9 +22,12 @@ mod neighbourhoods; mod perspectives; mod repl; mod runtime; +mod eve; + + use crate::{ - agent::*, dev::*, expression::*, languages::*, neighbourhoods::*, perspectives::*, runtime::*, + agent::*, dev::*, expression::*, languages::*, neighbourhoods::*, perspectives::*, runtime::*, eve::*, }; use ad4m_client::*; use anyhow::{Context, Result}; @@ -151,7 +154,10 @@ enum Domain { admin_credential: Option }, RunLocalHcServices {}, - Eve, + Eve { + #[command(subcommand)] + command: EveCommands, + }, } async fn get_ad4m_client(args: &ClapApp) -> Result { @@ -263,6 +269,11 @@ async fn main() -> Result<()> { return Ok(()); } + if let Domain::Eve { command } = args.domain { + eve::run(command).await?; + return Ok(()); + } + let ad4m_client = get_ad4m_client(&args).await?; match args.domain { @@ -305,263 +316,7 @@ async fn main() -> Result<()> { admin_credential: _ } => unreachable!(), Domain::RunLocalHcServices {} => unreachable!(), - Domain::Eve => { - - let system = r##" -<> -You are Eve, the personal assistant AI in Adam. -You are mainly there to help create AD4M Subject classes (in Prolog) from user input. -You'll respond to the user directly and mark Subject class code with "SDNA:" at the start and ":ANDS" at the end, -so that Perspect3ve can parse that out. -Make sure that each new Subject class definition uses a new random identifier -(2nd parameter in "subject_class", 1st parameter in the other predicates), -but ensure the same identifier used throughout all predicate occurrences of the same class definition. -When the user signals that the code is complete, -provide the the latest version to Perspect3ve to be written to the perspective's social DNA -by wrapping the code to commit with "WRITE-SDNA:" and ":ANDS-ETIRW"` -<> -"##; - - - let history = r##" -Eve: Hi, I'm Eve. I'm your personal assistant AI. I'm here to help you create AD4M Subject classes. -User: Create an ADAM Subject class called "Todo". It should have a "state" and a "title" property. The title should resolve to a string applying the literal language. It also should have a "comments" collection.` -Eve: Sure. I have made-up a "todo" ADAM Language to use in the predicates of this Subject class definition. - -SDNA: -subject_class("Todo", cjdotdng). -constructor(cjdotdng, '[{action: "addLink", source: "this", predicate: "todo://state", target: "todo://ready"}]'). -instance(cjdotdng, Base) :- triple(Base, "todo://state", _). - -property(cjdotdng, "state"). -property_getter(cjdotdng, Base, "state", Value) :- triple(Base, "todo://state", Value). -property_setter(cjdotdng, "state", '[{action: "setSingleTarget", source: "this", predicate: "todo://state", target: "value"}]'). - -property(cjdotdng, "title"). -property_resolve(cjdotdng, "title"). -property_resolve_language(cjdotdng, "title", "literal"). -property_getter(cjdotdng, Base, "title", Value) :- triple(Base, "todo://has_title", Value). -property_setter(cjdotdng, "title", '[{action: "setSingleTarget", source: "this", predicate: "todo://has_title", target: "value"}]'). - -collection(cjdotdng, "comments"). -collection_getter(cjdotdng, Base, "comments", List) :- findall(C, triple(Base, "todo://comment", C), List). -collection_adder(cjdotdng, "comments", '[{action: "addLink", source: "this", predicate: "todo://comment", target: "value"}]'). -collection_setter(cjdotdng, "comments", '[{action: "collectionSetter", source: "this", predicate: "todo://comment", target: "value"}]'). -:ANDS - -What icon should it have in the toolbar? (You can choose from the "Bootstrap" icon set) - -User: The "check" icon. Render todos as circles with the color depending on the state. Also, please add options for the state property, being Ready, Doing and Done. -Eve: Ok, I have this now: - -SDNA: -subject_class("Todo", cjdotdng). -constructor(cjdotdng, '[{action: "addLink", source: "this", predicate: "todo://state", target: "todo://ready"}]'). -instance(cjdotdng, Base) :- triple(Base, "todo://state", _). - -property(cjdotdng, "state"). -property_getter(cjdotdng, Base, "state", Value) :- triple(Base, "todo://state", Value). -property_setter(cjdotdng, "state", '[{action: "setSingleTarget", source: "this", predicate: "todo://state", target: "value"}]'). -property_named_option(cjdotdng, "state", "todo://ready", "Ready"). -property_named_option(cjdotdng, "state", "todo://doing", "Doing"). -property_named_option(cjdotdng, "state", "todo://done", "Done"). - -property(cjdotdng, "title"). -property_resolve(cjdotdng, "title"). -property_resolve_language(cjdotdng, "title", "literal"). -property_getter(cjdotdng, Base, "title", Value) :- triple(Base, "todo://has_title", Value). -property_setter(cjdotdng, "title", '[{action: "setSingleTarget", source: "this", predicate: "todo://has_title", target: "value"}]'). - -collection(cjdotdng, "comments"). -collection_getter(cjdotdng, Base, "comments", List) :- findall(C, triple(Base, "todo://comment", C), List). -collection_adder(cjdotdng, "comments", '[{action: "addLink", source: "this", predicate: "todo://comment", target: "value"}]'). -collection_setter(cjdotdng, "comments", '[{action: "collectionSetter", source: "this", predicate: "todo://comment", target: "value"}]'). - -p3_class_icon(cjdotdng, "check2-square"). -p3_instance_shape(cjdotdng, Instance, "circle"). -p3_instance_color(cjdotdng, Instance, "#000055") :- property_getter(c, Instance, "state", "todo://ready"). -p3_instance_color(cjdotdng, Instance, "#FFFF00") :- property_getter(c, Instance, "state", "todo://doing"). -p3_instance_color(cjdotdng, Instance, "#00FF00") :- property_getter(c, Instance, "state", "todo://done"). -:ANDS - -Anything else? -User: No, that's it. Please write this to the SDNA.` -Eve: Ok.. -WRITE-SDNA: -subject_class("Todo", cjdotdng). -constructor(cjdotdng, '[{action: "addLink", source: "this", predicate: "todo://state", target: "todo://ready"}]'). -instance(cjdotdng, Base) :- triple(Base, "todo://state", _). - -property(cjdotdng, "state"). -property_getter(cjdotdng, Base, "state", Value) :- triple(Base, "todo://state", Value). -property_setter(cjdotdng, "state", '[{action: "setSingleTarget", source: "this", predicate: "todo://state", target: "value"}]'). -property_named_option(cjdotdng, "state", "todo://ready", "Ready"). -property_named_option(cjdotdng, "state", "todo://doing", "Doing"). -property_named_option(cjdotdng, "state", "todo://done", "Done"). - -property(cjdotdng, "title"). -property_resolve(cjdotdng, "title"). -property_resolve_language(cjdotdng, "title", "literal"). -property_getter(cjdotdng, Base, "title", Value) :- triple(Base, "todo://has_title", Value). -property_setter(cjdotdng, "title", '[{action: "setSingleTarget", source: "this", predicate: "todo://has_title", target: "value"}]'). - -collection(cjdotdng, "comments"). -collection_getter(cjdotdng, Base, "comments", List) :- findall(C, triple(Base, "todo://comment", C), List). -collection_adder(cjdotdng, "comments", '[{action: "addLink", source: "this", predicate: "todo://comment", target: "value"}]'). -collection_setter(cjdotdng, "comments", '[{action: "collectionSetter", source: "this", predicate: "todo://comment", target: "value"}]'). - -p3_class_icon(cjdotdng, "check2-square"). -p3_instance_shape(cjdotdng, Instance, "circle"). -p3_instance_color(cjdotdng, Instance, "#000055") :- property_getter(c, Instance, "state", "todo://ready"). -p3_instance_color(cjdotdng, Instance, "#FFFF00") :- property_getter(c, Instance, "state", "todo://doing"). -p3_instance_color(cjdotdng, Instance, "#00FF00") :- property_getter(c, Instance, "state", "todo://done"). -:ANDS-ETIRW - -Done. -"##; - - -let reduced = r##" -Eve: Hi, I'm Eve. I'm your personal assistant AI. I'm here to help you create AD4M Subject classes. -User: Create an ADAM Subject class called "Todo". It should have a "state" and a "title" property. The title should resolve to a string applying the literal language. It also should have a "comments" collection.` -Eve: Sure. I have made-up a "todo" ADAM Language to use in the predicates of this Subject class definition. - -SDNA: -subject_class("Todo", cjdotdng). -constructor(cjdotdng, '[{action: "addLink", source: "this", predicate: "todo://state", target: "todo://ready"}]'). -instance(cjdotdng, Base) :- triple(Base, "todo://state", _). - -property(cjdotdng, "state"). -property_getter(cjdotdng, Base, "state", Value) :- triple(Base, "todo://state", Value). -property_setter(cjdotdng, "state", '[{action: "setSingleTarget", source: "this", predicate: "todo://state", target: "value"}]'). - -property(cjdotdng, "title"). -property_resolve(cjdotdng, "title"). -property_resolve_language(cjdotdng, "title", "literal"). -property_getter(cjdotdng, Base, "title", Value) :- triple(Base, "todo://has_title", Value). -property_setter(cjdotdng, "title", '[{action: "setSingleTarget", source: "this", predicate: "todo://has_title", target: "value"}]'). - -collection(cjdotdng, "comments"). -collection_getter(cjdotdng, Base, "comments", List) :- findall(C, triple(Base, "todo://comment", C), List). -collection_adder(cjdotdng, "comments", '[{action: "addLink", source: "this", predicate: "todo://comment", target: "value"}]'). -collection_setter(cjdotdng, "comments", '[{action: "collectionSetter", source: "this", predicate: "todo://comment", target: "value"}]'). -:ANDS - -What icon should it have in the toolbar? (You can choose from the "Bootstrap" icon set) -"##; - - - use std::io::Write; - use llm::Model; - use llm::InferenceResponse; - use std::convert::Infallible; - - println!("Loading model..."); - - - // load a GGML model from disk - let llama = llm::load::( - // path to GGML file - std::path::Path::new("/Users/nicolasluck/models/eve.model"), - llm::TokenizerSource::Embedded, - // llm::ModelParameters - Default::default(), - // load progress callback - |_| {}, - - ) - .unwrap_or_else(|err| panic!("Failed to load model: {err}")); - - println!("Model loaded!"); - - /* - - - let llama = llm::load_dynamic( - Some(llm::ModelArchitecture::Llama), - &std::path::Path::new("/Users/nicolasluck/eve.model"), - llm::TokenizerSource::Embedded, - Default::default(), - llm::load_progress_callback_stdout, - ).unwrap_or_else(|err| panic!("Failed to load model: {err}")); - - println!("Model loaded!"); - */ - - let mut session = llama.start_session(Default::default()); - - - println!("Ad hoc training model for ADAM subject classes..."); - - - fn print_token(t: String) { - print!("{t}"); - std::io::stdout().flush().unwrap(); - } - - - let inference_parameters = llm::InferenceParameters::default(); - - - // use the model to generate text from a prompt - - session.feed_prompt( - &llama, - format!("{system}\n{reduced}").as_str(), - &mut Default::default(), - llm::feed_prompt_callback(|resp| match resp { - llm::InferenceResponse::PromptToken(t) - | llm::InferenceResponse::InferredToken(t) => { - print_token(t); - - Ok::(llm::InferenceFeedback::Continue) - } - _ => Ok(llm::InferenceFeedback::Continue), - }), - ).unwrap_or_else(|err| panic!("Failed to feed prompt: {err}")); - - println!("\n\nTraining done. Ready!"); - - let mut rl = rustyline::Editor::<()>::new()?; - let line = rl.readline(">> ")?; - println!("\n\n"); - - let res = session.infer::( - // model to use for text generation - &llama, - // randomness provider - &mut rand::thread_rng(), - // the prompt to use for text generation, as well as other - // inference parameters - &llm::InferenceRequest { - prompt: format!("User: {line}\nEve:") - .as_str() - .into(), - parameters: &inference_parameters, - play_back_previous_tokens: false, - maximum_token_count: None, - }, - // llm::OutputRequest - &mut Default::default(), - // output callback - |t| { - match t { - InferenceResponse::PromptToken(t) | InferenceResponse::InferredToken(t) | llm::InferenceResponse::SnapshotToken(t) => { - print_token(t); - } - _ => {} - } - std::io::stdout().flush().unwrap(); - - Ok(llm::InferenceFeedback::Continue) - } - ); - - match res { - Ok(result) => println!("\n\nInference stats:\n{result}"), - Err(err) => println!("\n{err}"), - } - } + Domain::Eve { command: _ } => unreachable!(), } Ok(()) From 2b5edc985c89ca69dbda960f24fe19e2ad77f8ea Mon Sep 17 00:00:00 2001 From: Nicolas Luck Date: Fri, 3 Nov 2023 01:36:40 +0100 Subject: [PATCH 07/14] Use my fork to fix snapshots --- cli/Cargo.toml | 2 +- cli/src/eve.rs | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 5e51744b7..9a5a69f9c 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -35,5 +35,5 @@ colour = "0.7.0" syntect = "5.0" ctrlc = "3.4.0" -llm = { git = "https://github.com/rustformers/llm", features = ["metal"] } +llm = { git = "https://github.com/lucksus/llm", features = ["metal"] } serde_cbor = "0.11" \ No newline at end of file diff --git a/cli/src/eve.rs b/cli/src/eve.rs index f97330088..25c039a25 100644 --- a/cli/src/eve.rs +++ b/cli/src/eve.rs @@ -214,8 +214,7 @@ pub async fn run(command: EveCommands) -> Result<()> { println!("\n\nTraining done. Ready!"); unsafe { - let snapshot_ref: llm::InferenceSnapshotRef<'_> = session.get_snapshot(); - let snapshot = snapshot_ref.to_owned(); + let snapshot: llm::InferenceSnapshotRef<'_> = session.get_snapshot(); let snapshot_cbor = serde_cbor::to_vec(&snapshot).expect("Failed to serialize snapshot"); File::create("/Users/nicolasluck/models/eve.snapshot.json").unwrap().write_all(snapshot_cbor.as_slice()).unwrap(); println!("Snapshot saved!"); From 9f817b9d54ff098feca2c3530ccca3f831108248 Mon Sep 17 00:00:00 2001 From: Nicolas Luck Date: Fri, 3 Nov 2023 01:45:31 +0100 Subject: [PATCH 08/14] Halt inference --- cli/src/eve.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cli/src/eve.rs b/cli/src/eve.rs index 25c039a25..7fd68ed8f 100644 --- a/cli/src/eve.rs +++ b/cli/src/eve.rs @@ -253,15 +253,23 @@ pub async fn run(command: EveCommands) -> Result<()> { &mut Default::default(), // output callback |t| { + let mut cont = true; match t { InferenceResponse::PromptToken(t) | InferenceResponse::InferredToken(t) | llm::InferenceResponse::SnapshotToken(t) => { + if t == "Eve:" || t == "User:" { + cont = false; + } print_token(t); } _ => {} } std::io::stdout().flush().unwrap(); - Ok(llm::InferenceFeedback::Continue) + if cont { + Ok(llm::InferenceFeedback::Continue) + } else { + Ok(llm::InferenceFeedback::Halt) + } } ); From b41a37bb78ba4ccc8f2d2c28f1ab9aa30fd40f76 Mon Sep 17 00:00:00 2001 From: Nicolas Luck Date: Fri, 17 Nov 2023 12:25:11 +0100 Subject: [PATCH 09/14] Use gguf branch of llm crate for new models --- cli/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 9a5a69f9c..198adb8e5 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -35,5 +35,5 @@ colour = "0.7.0" syntect = "5.0" ctrlc = "3.4.0" -llm = { git = "https://github.com/lucksus/llm", features = ["metal"] } +llm = { git = "https://github.com/lucksus/llm", branch = "gguf", features = ["metal"] } serde_cbor = "0.11" \ No newline at end of file From 3859917208fa1ac51f01934005c18041de0a939c Mon Sep 17 00:00:00 2001 From: Nicolas Luck Date: Fri, 17 Nov 2023 12:25:45 +0100 Subject: [PATCH 10/14] Break down eve prompt so fits in context window --- cli/src/eve.rs | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/cli/src/eve.rs b/cli/src/eve.rs index 7fd68ed8f..3c524882d 100644 --- a/cli/src/eve.rs +++ b/cli/src/eve.rs @@ -1,6 +1,5 @@ use std::io::Write; -use ad4m_client::perspectives::snapshot; use llm::Model; use llm::InferenceResponse; use std::convert::Infallible; @@ -31,7 +30,7 @@ by wrapping the code to commit with "WRITE-SDNA:" and ":ANDS-ETIRW"` "##; -const HISTORY: &str = r##" +const HISTORY1: &str = r##" Eve: Hi, I'm Eve. I'm your personal assistant AI. I'm here to help you create AD4M Subject classes. User: Create an ADAM Subject class called "Todo". It should have a "state" and a "title" property. The title should resolve to a string applying the literal language. It also should have a "comments" collection.` Eve: Sure. I have made-up a "todo" ADAM Language to use in the predicates of this Subject class definition. @@ -57,8 +56,9 @@ collection_adder(cjdotdng, "comments", '[{action: "addLink", source: "this", pre collection_setter(cjdotdng, "comments", '[{action: "collectionSetter", source: "this", predicate: "todo://comment", target: "value"}]'). :ANDS -What icon should it have in the toolbar? (You can choose from the "Bootstrap" icon set) +What icon should it have in the toolbar? (You can choose from the "Bootstrap" icon set)"##; +const HISTORY2: &str = r##" User: The "check" icon. Render todos as circles with the color depending on the state. Also, please add options for the state property, being Ready, Doing and Done. Eve: Ok, I have this now: @@ -92,7 +92,8 @@ p3_instance_color(cjdotdng, Instance, "#FFFF00") :- property_getter(c, Instance, p3_instance_color(cjdotdng, Instance, "#00FF00") :- property_getter(c, Instance, "state", "todo://done"). :ANDS -Anything else? +Anything else?"##; +const HISTORY3: &str = r##" User: No, that's it. Please write this to the SDNA.` Eve: Ok.. WRITE-SDNA: @@ -196,20 +197,23 @@ pub async fn run(command: EveCommands) -> Result<()> { // use the model to generate text from a prompt let mut session = llama.start_session(Default::default()); - session.feed_prompt( - &llama, - format!("{SYSTEM}\n{REDUCED}").as_str(), - &mut Default::default(), - llm::feed_prompt_callback(|resp| match resp { - llm::InferenceResponse::PromptToken(t) - | llm::InferenceResponse::InferredToken(t) => { - print_token(t); - - Ok::(llm::InferenceFeedback::Continue) - } - _ => Ok(llm::InferenceFeedback::Continue), - }), - ).unwrap_or_else(|err| panic!("Failed to feed prompt: {err}")); + for p in vec![SYSTEM, HISTORY1, HISTORY2] { + session.feed_prompt( + &llama, + p, + &mut Default::default(), + llm::feed_prompt_callback(|resp| match resp { + llm::InferenceResponse::PromptToken(t) + | llm::InferenceResponse::InferredToken(t) => { + print_token(t); + + Ok::(llm::InferenceFeedback::Continue) + } + _ => Ok(llm::InferenceFeedback::Continue), + }), + ).unwrap_or_else(|err| panic!("Failed to feed prompt: {err}")); + } + println!("\n\nTraining done. Ready!"); From c03afe0ac19fc1a28e9e9235fd10ebb749d78b35 Mon Sep 17 00:00:00 2001 From: Fayeed Pawaskar Date: Tue, 3 Sep 2024 12:36:21 +0530 Subject: [PATCH 11/14] chore: Add kalosm crate to Cargo.toml --- Cargo.lock | 597 ++++++++++++++++++++++++++++++++++++++++++++++++- cli/Cargo.toml | 1 + 2 files changed, 591 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 68d05e979..c5b170af1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -25,6 +25,7 @@ dependencies = [ "ctrlc", "dirs 4.0.0", "futures", + "kalosm", "kitsune_p2p_types", "llm", "rand 0.8.5", @@ -549,7 +550,7 @@ dependencies = [ "proc-macro2", "quote", "syn 1.0.109", - "synstructure", + "synstructure 0.12.6", ] [[package]] @@ -1487,6 +1488,20 @@ name = "bytemuck" version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773d90827bc3feecfb67fab12e24de0749aad83c74b9504ecde46237b5cd24e2" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc8b54b395f2fcfbb3d90c47b01c7f444d94d05bdeb775811dec868ac3bbc26" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.77", +] [[package]] name = "byteorder" @@ -1560,7 +1575,7 @@ dependencies = [ "flate2", "fs2", "glob", - "indicatif", + "indicatif 0.16.2", "log", "rand 0.8.5", "reqwest", @@ -1606,6 +1621,42 @@ dependencies = [ "serde", ] +[[package]] +name = "candle-core" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5b18de020c2729dbf7ac390325312644808b6ba9b7962f1f724e9185b1d53c7" +dependencies = [ + "byteorder", + "gemm", + "half 2.4.1", + "memmap2 0.9.4", + "num-traits", + "num_cpus", + "rand 0.8.5", + "rand_distr", + "rayon", + "safetensors", + "thiserror", + "yoke", + "zip 1.1.4", +] + +[[package]] +name = "candle-nn" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b006b30f66a0d94fc9cef0ac4de6ce510565f35ae2c6c35ce5d4aacfb0fc8eeb" +dependencies = [ + "candle-core", + "half 2.4.1", + "num-traits", + "rayon", + "safetensors", + "serde", + "thiserror", +] + [[package]] name = "cargo-platform" version = "0.1.8" @@ -2081,6 +2132,18 @@ dependencies = [ "memchr", ] +[[package]] +name = "comfy-table" +version = "7.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" +dependencies = [ + "crossterm 0.27.0", + "strum 0.26.3", + "strum_macros 0.26.4", + "unicode-width", +] + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -2099,6 +2162,7 @@ dependencies = [ "encode_unicode", "lazy_static", "libc", + "unicode-width", "windows-sys 0.52.0", ] @@ -2447,6 +2511,19 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "crossterm" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" +dependencies = [ + "bitflags 2.6.0", + "crossterm_winapi 0.9.1", + "libc", + "parking_lot 0.12.3", + "winapi 0.3.9", +] + [[package]] name = "crossterm_winapi" version = "0.8.0" @@ -4114,6 +4191,16 @@ version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" +[[package]] +name = "dyn-stack" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e53799688f5632f364f8fb387488dd05db9fe45db7011be066fc20e7027f8b" +dependencies = [ + "bytemuck", + "reborrow", +] + [[package]] name = "dynasm" version = "1.2.3" @@ -4198,6 +4285,12 @@ dependencies = [ "zeroize", ] +[[package]] +name = "ego-tree" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12a0bb14ac04a9fcf170d0bbbef949b44cc492f4452bd20c095636956f653642" + [[package]] name = "either" version = "1.13.0" @@ -4299,6 +4392,18 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "enum-as-inner" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ffccbb6966c05b32ef8fbac435df276c4ae4d3dc55a8cd0eb9745e6c12f546a" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "syn 2.0.77", +] + [[package]] name = "enum-iterator" version = "0.7.0" @@ -4389,7 +4494,7 @@ dependencies = [ "quote", "rustversion", "syn 1.0.109", - "synstructure", + "synstructure 0.12.6", ] [[package]] @@ -4403,7 +4508,7 @@ dependencies = [ "quote", "rustversion", "syn 1.0.109", - "synstructure", + "synstructure 0.12.6", ] [[package]] @@ -4497,6 +4602,22 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "exr" +version = "1.72.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "887d93f60543e9a9362ef8a21beedd0a833c5d9610e18c67abe15a5963dcb1a4" +dependencies = [ + "bit_field", + "flume", + "half 2.4.1", + "lebe", + "miniz_oxide 0.7.4", + "rayon-core", + "smallvec", + "zune-inflate", +] + [[package]] name = "fake" version = "2.9.2" @@ -4713,6 +4834,15 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "flume" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55ac459de2512911e4b674ce33cf20befaba382d05b62b008afc1c8b57cbf181" +dependencies = [ + "spin 0.9.8", +] + [[package]] name = "fnv" version = "1.0.7" @@ -5108,6 +5238,124 @@ dependencies = [ "x11", ] +[[package]] +name = "gemm" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ab24cc62135b40090e31a76a9b2766a501979f3070fa27f689c27ec04377d32" +dependencies = [ + "dyn-stack", + "gemm-c32", + "gemm-c64", + "gemm-common", + "gemm-f16", + "gemm-f32", + "gemm-f64", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-c32" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9c030d0b983d1e34a546b86e08f600c11696fde16199f971cd46c12e67512c0" +dependencies = [ + "dyn-stack", + "gemm-common", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-c64" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbb5f2e79fefb9693d18e1066a557b4546cd334b226beadc68b11a8f9431852a" +dependencies = [ + "dyn-stack", + "gemm-common", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-common" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2e7ea062c987abcd8db95db917b4ffb4ecdfd0668471d8dc54734fdff2354e8" +dependencies = [ + "bytemuck", + "dyn-stack", + "half 2.4.1", + "num-complex", + "num-traits", + "once_cell", + "paste", + "pulp", + "raw-cpuid", + "rayon", + "seq-macro", + "sysctl", +] + +[[package]] +name = "gemm-f16" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ca4c06b9b11952071d317604acb332e924e817bd891bec8dfb494168c7cedd4" +dependencies = [ + "dyn-stack", + "gemm-common", + "gemm-f32", + "half 2.4.1", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "rayon", + "seq-macro", +] + +[[package]] +name = "gemm-f32" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9a69f51aaefbd9cf12d18faf273d3e982d9d711f60775645ed5c8047b4ae113" +dependencies = [ + "dyn-stack", + "gemm-common", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-f64" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa397a48544fadf0b81ec8741e5c0fba0043008113f71f2034def1935645d2b0" +dependencies = [ + "dyn-stack", + "gemm-common", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + [[package]] name = "generator" version = "0.7.5" @@ -5242,6 +5490,16 @@ dependencies = [ "tracing-futures", ] +[[package]] +name = "gif" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb2d69b19215e18bb912fa30f7ce15846e301408695e44e0ef719f1da9e19f2" +dependencies = [ + "color_quant", + "weezl", +] + [[package]] name = "gimli" version = "0.26.2" @@ -5695,8 +5953,12 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" dependencies = [ + "bytemuck", "cfg-if 1.0.0", "crunchy", + "num-traits", + "rand 0.8.5", + "rand_distr", ] [[package]] @@ -5868,6 +6130,20 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "hdrhistogram" +version = "7.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "765c9198f173dd59ce26ff9f95ef0aafd0a0fe01fb9d72841bc5066a4c06511d" +dependencies = [ + "base64 0.21.7", + "byteorder", + "crossbeam-channel", + "flate2", + "nom 7.1.3", + "num-traits", +] + [[package]] name = "headers" version = "0.3.9" @@ -5965,6 +6241,23 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfa686283ad6dd069f105e5ab091b04c62850d3e4cf5d67debad1933f55023df" +[[package]] +name = "hf-hub" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b780635574b3d92f036890d8373433d6f9fc7abb320ee42a5c25897fc8ed732" +dependencies = [ + "dirs 5.0.1", + "indicatif 0.17.8", + "log", + "native-tls", + "rand 0.8.5", + "serde", + "serde_json", + "thiserror", + "ureq", +] + [[package]] name = "hkdf" version = "0.11.0" @@ -7048,8 +7341,13 @@ dependencies = [ "bytemuck", "byteorder", "color_quant", + "exr", + "gif", + "jpeg-decoder", "num-traits", "png", + "qoi", + "tiff", ] [[package]] @@ -7123,6 +7421,19 @@ dependencies = [ "regex", ] +[[package]] +name = "indicatif" +version = "0.17.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3" +dependencies = [ + "console", + "instant", + "number_prefix", + "portable-atomic", + "unicode-width", +] + [[package]] name = "infer" version = "0.13.0" @@ -7505,6 +7816,9 @@ name = "jpeg-decoder" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f5d4a7da358eff58addd2877a45865158f0d78c911d43a5784ceb7bbf52833b0" +dependencies = [ + "rayon", +] [[package]] name = "js-sys" @@ -7598,6 +7912,58 @@ dependencies = [ "signature 2.2.0", ] +[[package]] +name = "kalosm" +version = "0.3.2" +source = "git+https://github.com/coasys/floneum.git?rev=44e05c5fe6fc72eaeaf6ccefbf8269cda54aca75#44e05c5fe6fc72eaeaf6ccefbf8269cda54aca75" +dependencies = [ + "anyhow", + "async-trait", + "comfy-table", + "ego-tree", + "futures-util", + "hdrhistogram", + "image 0.24.9", + "kalosm-common", + "kalosm-streams", + "llm-samplers 0.0.7", + "num-traits", + "once_cell", + "rand 0.8.5", + "serde", + "tokio", + "tracing", +] + +[[package]] +name = "kalosm-common" +version = "0.3.3" +source = "git+https://github.com/coasys/floneum.git?rev=44e05c5fe6fc72eaeaf6ccefbf8269cda54aca75#44e05c5fe6fc72eaeaf6ccefbf8269cda54aca75" +dependencies = [ + "anyhow", + "candle-core", + "candle-nn", + "dirs 5.0.1", + "hf-hub", + "httpdate", + "indicatif 0.17.8", + "once_cell", + "reqwest", + "tokio", + "tracing", +] + +[[package]] +name = "kalosm-streams" +version = "0.3.2" +source = "git+https://github.com/coasys/floneum.git?rev=44e05c5fe6fc72eaeaf6ccefbf8269cda54aca75#44e05c5fe6fc72eaeaf6ccefbf8269cda54aca75" +dependencies = [ + "futures-util", + "image 0.24.9", + "pin-project-lite", + "tokio", +] + [[package]] name = "keccak" version = "0.1.5" @@ -8034,6 +8400,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" +[[package]] +name = "lebe" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8" + [[package]] name = "lexical" version = "5.2.2" @@ -8393,7 +8765,7 @@ dependencies = [ "ggml", "half 2.4.1", "indexmap 2.5.0", - "llm-samplers", + "llm-samplers 0.0.6", "memmap2 0.5.10", "partial_sort", "rand 0.8.5", @@ -8434,6 +8806,18 @@ dependencies = [ "thiserror", ] +[[package]] +name = "llm-samplers" +version = "0.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e85df656cd89e7702cb56171d75aa77c7bec828af7d2054d9987c34411cf896" +dependencies = [ + "anyhow", + "num-traits", + "rand 0.8.5", + "thiserror", +] + [[package]] name = "localtunnel-client" version = "0.0.12" @@ -8733,6 +9117,16 @@ dependencies = [ "libc", ] +[[package]] +name = "memmap2" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" +dependencies = [ + "libc", + "stable_deref_trait", +] + [[package]] name = "memmem" version = "0.1.1" @@ -9031,7 +9425,7 @@ dependencies = [ "proc-macro2", "quote", "syn 1.0.109", - "synstructure", + "synstructure 0.12.6", ] [[package]] @@ -9431,6 +9825,7 @@ version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" dependencies = [ + "bytemuck", "num-traits", ] @@ -10607,6 +11002,12 @@ dependencies = [ "universal-hash", ] +[[package]] +name = "portable-atomic" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da544ee218f0d287a911e9c99a39a8c9bc8fcad3cb8db5959940044ecfc67265" + [[package]] name = "portpicker" version = "0.1.1" @@ -10966,6 +11367,27 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "pulp" +version = "0.18.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0a01a0dc67cf4558d279f0c25b0962bd08fc6dec0137699eae304103e882fe6" +dependencies = [ + "bytemuck", + "libm", + "num-complex", + "reborrow", +] + +[[package]] +name = "qoi" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6d64c71eb498fe9eae14ce4ec935c555749aef511cca85b5568910d6e48001" +dependencies = [ + "bytemuck", +] + [[package]] name = "quanta" version = "0.4.1" @@ -11338,6 +11760,15 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8a99fddc9f0ba0a85884b8d14e3592853e787d581ca1816c91349b10e4eeab" +[[package]] +name = "raw-cpuid" +version = "10.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "raw-window-handle" version = "0.5.2" @@ -11409,6 +11840,12 @@ dependencies = [ "rand_core 0.3.1", ] +[[package]] +name = "reborrow" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03251193000f4bd3b042892be858ee50e8b3719f2b08e5833ac4353724632430" + [[package]] name = "redox_syscall" version = "0.2.16" @@ -12088,6 +12525,7 @@ dependencies = [ "aws-lc-rs", "log", "once_cell", + "ring 0.17.8", "rustls-pki-types", "rustls-webpki 0.102.7", "subtle 2.6.1", @@ -12291,6 +12729,16 @@ dependencies = [ "bytemuck", ] +[[package]] +name = "safetensors" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7725d4d98fa515472f43a6e2bbf956c48e06b89bb50593a040e5945160214450" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "saffron" version = "0.1.0" @@ -12648,6 +13096,12 @@ dependencies = [ "pest", ] +[[package]] +name = "seq-macro" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" + [[package]] name = "serde" version = "1.0.209" @@ -13506,6 +13960,12 @@ dependencies = [ "strum_macros 0.25.3", ] +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" + [[package]] name = "strum_macros" version = "0.18.0" @@ -13557,6 +14017,19 @@ dependencies = [ "syn 2.0.77", ] +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.77", +] + [[package]] name = "subprocess" version = "0.2.9" @@ -13972,6 +14445,17 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.77", +] + [[package]] name = "syntect" version = "5.2.0" @@ -14003,6 +14487,20 @@ dependencies = [ "libc", ] +[[package]] +name = "sysctl" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea" +dependencies = [ + "bitflags 2.6.0", + "byteorder", + "enum-as-inner 0.6.0", + "libc", + "thiserror", + "walkdir", +] + [[package]] name = "sysinfo" version = "0.29.11" @@ -15087,7 +15585,7 @@ dependencies = [ "async-trait", "cfg-if 1.0.0", "data-encoding", - "enum-as-inner", + "enum-as-inner 0.5.1", "futures-channel", "futures-io", "futures-util", @@ -15571,9 +16069,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b74fc6b57825be3373f7054754755f03ac3a8f5d70015ccad699ba2029956f4a" dependencies = [ "base64 0.22.1", + "flate2", "log", + "native-tls", "once_cell", + "rustls 0.23.12", + "rustls-pki-types", + "serde", + "serde_json", "url", + "webpki-roots 0.26.5", ] [[package]] @@ -16341,6 +16846,15 @@ version = "0.25.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1" +[[package]] +name = "webpki-roots" +version = "0.26.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bd24728e5af82c6c4ec1b66ac4844bdf8156257fccda846ec58b42cd0cdbe6a" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "webview2-com" version = "0.19.1" @@ -17328,6 +17842,30 @@ dependencies = [ "time", ] +[[package]] +name = "yoke" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.77", + "synstructure 0.13.1", +] + [[package]] name = "zerocopy" version = "0.7.35" @@ -17349,6 +17887,27 @@ dependencies = [ "syn 2.0.77", ] +[[package]] +name = "zerofrom" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.77", + "synstructure 0.13.1", +] + [[package]] name = "zeroize" version = "1.8.1" @@ -17389,6 +17948,21 @@ dependencies = [ "zstd", ] +[[package]] +name = "zip" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cc23c04387f4da0374be4533ad1208cbb091d5c11d070dfef13676ad6497164" +dependencies = [ + "arbitrary", + "crc32fast", + "crossbeam-utils", + "displaydoc", + "indexmap 2.5.0", + "num_enum 0.7.3", + "thiserror", +] + [[package]] name = "zip" version = "2.2.0" @@ -17448,3 +18022,12 @@ dependencies = [ "cc", "pkg-config", ] + +[[package]] +name = "zune-inflate" +version = "0.2.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73ab332fe2f6680068f3582b16a24f90ad7096d5d39b974d1c0aff0125116f02" +dependencies = [ + "simd-adler32", +] diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 678a10b32..c1708758b 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -44,6 +44,7 @@ syntect = "5.0" ctrlc = "3.4.0" llm = { git = "https://github.com/lucksus/llm", branch = "gguf", features = ["metal"] } +kalosm = { version = "0.3.0", git = "https://github.com/coasys/floneum.git", rev = "44e05c5fe6fc72eaeaf6ccefbf8269cda54aca75" } serde_cbor = "0.11" #holochain_kitsune_p2p = "0.3.0-beta-dev.37" kitsune_p2p_types = { version = "0.3.2-rc.0", git = "https://github.com/coasys/holochain.git", rev = "10841e49b28c17c3cb428680e2bc9259bf4ec739" } From 569ff3bad66231db431304b6c915b7cbde3e8158 Mon Sep 17 00:00:00 2001 From: Fayeed Pawaskar Date: Tue, 3 Sep 2024 18:21:18 +0530 Subject: [PATCH 12/14] chore: Update kalosm crate to version 0.3.2 with language feature --- Cargo.lock | 1578 +++++++++++++++++++++++++++++++++++++++++++++--- cli/Cargo.toml | 2 +- 2 files changed, 1506 insertions(+), 74 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c5b170af1..2b15f7f67 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -434,6 +434,15 @@ dependencies = [ "xdg", ] +[[package]] +name = "approx" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0e60b75072ecd4168020818c0107f2857bb6c4e64252d8d3983f6263b40a5c3" +dependencies = [ + "num-traits", +] + [[package]] name = "approx" version = "0.5.1" @@ -510,6 +519,25 @@ dependencies = [ "serde", ] +[[package]] +name = "arroy" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73897699bf04bac935c0b120990d2a511e91e563e0f9769f9c8bb983d98dfbc9" +dependencies = [ + "bytemuck", + "byteorder", + "heed", + "log", + "memmap2 0.9.4", + "ordered-float 4.2.2", + "rand 0.8.5", + "rayon", + "roaring", + "tempfile", + "thiserror", +] + [[package]] name = "ascii" version = "0.9.3" @@ -538,7 +566,7 @@ dependencies = [ "num-traits", "rusticata-macros", "thiserror", - "time", + "time 0.3.36", ] [[package]] @@ -774,7 +802,7 @@ dependencies = [ "async-io 1.13.0", "async-lock 2.8.0", "async-process", - "crossbeam-utils", + "crossbeam-utils 0.8.20", "futures-channel", "futures-core", "futures-io", @@ -888,6 +916,19 @@ dependencies = [ "system-deps 6.2.2", ] +[[package]] +name = "atom_syndication" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a3a5ed3201df5658d1aa45060c5a57dc9dba8a8ada20d696d67cb0c479ee043" +dependencies = [ + "chrono", + "derive_builder 0.20.1", + "diligent-date-parser", + "never", + "quick-xml 0.36.1", +] + [[package]] name = "atomic" version = "0.5.3" @@ -929,6 +970,20 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "auto_generate_cdp" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7af08ed49930c50104b2f1699d257e5053fb1809e370647bde9c58b31d65d417" +dependencies = [ + "convert_case 0.4.0", + "proc-macro2", + "quote", + "serde", + "serde_json", + "ureq", +] + [[package]] name = "autocfg" version = "0.1.8" @@ -1434,10 +1489,21 @@ dependencies = [ "serde", "serde_bytes", "serde_json", - "time", + "time 0.3.36", "uuid 1.10.0", ] +[[package]] +name = "bstr" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata 0.1.10", +] + [[package]] name = "bstr" version = "1.10.0" @@ -1455,6 +1521,12 @@ version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +[[package]] +name = "byte-slice-cast" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0a5e3906bcbf133e33c1d4d95afc664ad37fbdb9f6568d8043e7ea8c27d93d3" + [[package]] name = "byte-tools" version = "0.3.1" @@ -1657,6 +1729,25 @@ dependencies = [ "thiserror", ] +[[package]] +name = "candle-transformers" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f0d4eb6a0d9279d5829b06b2bf3caa117904eefd6dcf879d16e687c4a84034c" +dependencies = [ + "byteorder", + "candle-core", + "candle-nn", + "fancy-regex", + "num-traits", + "rand 0.8.5", + "rayon", + "serde", + "serde_json", + "serde_plain", + "tracing", +] + [[package]] name = "cargo-platform" version = "0.1.8" @@ -2038,6 +2129,12 @@ dependencies = [ "warp", ] +[[package]] +name = "cobs" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" + [[package]] name = "cocoa" version = "0.24.1" @@ -2150,7 +2247,7 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" dependencies = [ - "crossbeam-utils", + "crossbeam-utils 0.8.20", ] [[package]] @@ -2216,6 +2313,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" +[[package]] +name = "convert_case" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "cooked-waker" version = "5.0.0" @@ -2229,7 +2335,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ddef33a339a91ea89fb53151bd0a4689cfce27055c291dfa69945475d22c747" dependencies = [ "percent-encoding", - "time", + "time 0.3.36", "version_check", ] @@ -2436,13 +2542,23 @@ dependencies = [ "once_cell", ] +[[package]] +name = "crossbeam-channel" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b153fe7cbef478c567df0f972e02e6d736db11affe43dfc9c56a9374d1adfb87" +dependencies = [ + "crossbeam-utils 0.7.2", + "maybe-uninit", +] + [[package]] name = "crossbeam-channel" version = "0.5.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" dependencies = [ - "crossbeam-utils", + "crossbeam-utils 0.8.20", ] [[package]] @@ -2452,7 +2568,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" dependencies = [ "crossbeam-epoch", - "crossbeam-utils", + "crossbeam-utils 0.8.20", ] [[package]] @@ -2461,7 +2577,7 @@ version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ - "crossbeam-utils", + "crossbeam-utils 0.8.20", ] [[package]] @@ -2470,7 +2586,18 @@ version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35" dependencies = [ - "crossbeam-utils", + "crossbeam-utils 0.8.20", +] + +[[package]] +name = "crossbeam-utils" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8" +dependencies = [ + "autocfg 1.3.0", + "cfg-if 0.1.10", + "lazy_static", ] [[package]] @@ -2657,6 +2784,19 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "cssparser" +version = "0.31.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b3df4f93e5fbbe73ec01ec8d3f68bba73107993a5b1e7519273c32db9b0d5be" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa 1.0.11", + "phf 0.8.0", + "smallvec", +] + [[package]] name = "cssparser-macros" version = "0.6.1" @@ -2735,6 +2875,20 @@ dependencies = [ "syn 2.0.77", ] +[[package]] +name = "custom_debug_derive" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f731440b39c73910e253cb465ec1fac97732b3c7af215639881ec0c2a38f4f69" +dependencies = [ + "darling 0.20.10", + "itertools 0.12.1", + "proc-macro2", + "quote", + "syn 2.0.77", + "synstructure 0.13.1", +] + [[package]] name = "d3d12" version = "0.20.0" @@ -2852,7 +3006,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "804c8821570c3f8b70230c2ba75ffa5c0f9a4189b9a432b6656c536712acae28" dependencies = [ "cfg-if 1.0.0", - "crossbeam-utils", + "crossbeam-utils 0.8.20", "hashbrown 0.14.5", "lock_api", "once_cell", @@ -2969,6 +3123,26 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41b319d1b62ffbd002e057f36bebd1f42b9f97927c9577461d855f3513c4289f" +[[package]] +name = "datasize" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e65c07d59e45d77a8bda53458c24a828893a99ac6cdd9c84111e09176ab739a2" +dependencies = [ + "datasize_derive", +] + +[[package]] +name = "datasize_derive" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613e4ee15899913285b7612004bbd490abd605be7b11d35afada5902fb6b91d5" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "dbus" version = "0.9.7" @@ -2990,6 +3164,37 @@ dependencies = [ "uuid 1.10.0", ] +[[package]] +name = "decorum" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "281759d3c8a14f5c3f0c49363be56810fcd7f910422f97f2db850c2920fde5cf" +dependencies = [ + "approx 0.3.2", + "num-traits", + "serde", + "serde_derive", +] + +[[package]] +name = "deflate" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73770f8e1fe7d64df17ca66ad28994a0a623ea497fa69486e14984e715c5d174" +dependencies = [ + "adler32", + "byteorder", +] + +[[package]] +name = "deflate" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c86f7e25f518f4b81808a2cf1c50996a61f5c2eb394b2393bd87f2a4780a432f" +dependencies = [ + "adler32", +] + [[package]] name = "deno_ast" version = "0.38.2" @@ -3826,7 +4031,7 @@ version = "0.99.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f33878137e4dafd7fa914ad4e259e18a4e8e532b9617a2d0150262bf53abfce" dependencies = [ - "convert_case", + "convert_case 0.4.0", "proc-macro2", "quote", "rustc_version 0.4.1", @@ -3964,6 +4169,15 @@ dependencies = [ "subtle 2.6.1", ] +[[package]] +name = "diligent-date-parser" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6cf7fe294274a222363f84bcb63cdea762979a0443b4cf1f4f8fd17c86b1182" +dependencies = [ + "chrono", +] + [[package]] name = "directories" version = "4.0.1" @@ -4109,6 +4323,21 @@ dependencies = [ "litrs", ] +[[package]] +name = "docx-rs" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e593b51d4fe95d69d70fd40da4b314b029736302c986c3c760826e842fd27dc3" +dependencies = [ + "base64 0.13.1", + "image 0.24.9", + "serde", + "serde_json", + "thiserror", + "xml-rs", + "zip 0.6.6", +] + [[package]] name = "downcast" version = "0.11.0" @@ -4121,6 +4350,15 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" +[[package]] +name = "doxygen-rs" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "415b6ec780d34dcf624666747194393603d0373b7141eef01d12ee58881507d9" +dependencies = [ + "phf 0.11.2", +] + [[package]] name = "dprint-swc-ext" version = "0.16.0" @@ -4359,6 +4597,18 @@ version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ef6b89e5b37196644d8796de5268852ff179b44e96276cf4290264843743bb7" +[[package]] +name = "embedded-io" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" + +[[package]] +name = "embedded-io" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" + [[package]] name = "encode_unicode" version = "0.3.6" @@ -4563,6 +4813,9 @@ name = "esaxx-rs" version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6" +dependencies = [ + "cc", +] [[package]] name = "event-listener" @@ -4647,6 +4900,17 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" +[[package]] +name = "fancy-regex" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2" +dependencies = [ + "bit-set 0.5.3", + "regex-automata 0.4.7", + "regex-syntax 0.8.4", +] + [[package]] name = "faster-hex" version = "0.9.0" @@ -4690,6 +4954,26 @@ dependencies = [ "utf-8", ] +[[package]] +name = "fax" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b03e33ad0e71af414ef9d2b0a94d23ff59115bb068e6a6a06c0952f2c22ffd77" +dependencies = [ + "fax_derive", +] + +[[package]] +name = "fax_derive" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c1d7ffc9f2dc8316348c75281a99c8fdc60c1ddf4f82a366d117bf1b74d5a39" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "fd-lock" version = "3.0.13" @@ -4849,6 +5133,26 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "font" +version = "0.1.0" +source = "git+https://github.com/pdf-rs/font#78a8d9c58b4aafddd309ab7bbefd3bb5c347cf45" +dependencies = [ + "decorum", + "indexmap 1.9.3", + "istring 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", + "itertools 0.10.5", + "log", + "nom 5.1.3", + "pathfinder_color", + "pathfinder_content", + "pathfinder_geometry", + "pdf_encoding 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.7.3", + "slotmap", + "tuple 0.5.2 (git+https://github.com/s3bk/tuple/)", +] + [[package]] name = "foreign-types" version = "0.3.2" @@ -5421,6 +5725,15 @@ dependencies = [ "windows-targets 0.48.5", ] +[[package]] +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.1.16" @@ -5428,8 +5741,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" dependencies = [ "cfg-if 1.0.0", + "js-sys", "libc", "wasi 0.9.0+wasi-snapshot-preview1", + "wasm-bindgen", ] [[package]] @@ -5629,6 +5944,27 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +[[package]] +name = "globalcache" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240a3059d86f2ba6859ac79f95ff94e65606abc775c1bc0ecf9b6590fb35dc04" +dependencies = [ + "async-trait", + "tuple 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", + "web-time", +] + +[[package]] +name = "globalcache" +version = "0.2.4" +source = "git+https://github.com/s3bk/cachelib#276f40d00528001bdcd1e1ed594157bd82c7c503" +dependencies = [ + "async-trait", + "tuple 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", + "web-time", +] + [[package]] name = "globset" version = "0.4.14" @@ -5636,7 +5972,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57da3b9b5b85bd66f31093f8c408b90a74431672542466497dcbdfdc02034be1" dependencies = [ "aho-corasick 1.1.3", - "bstr", + "bstr 1.10.0", "log", "regex-automata 0.4.7", "regex-syntax 0.8.4", @@ -5676,8 +6012,24 @@ dependencies = [ ] [[package]] -name = "gobject-sys" -version = "0.15.10" +name = "glyphmatcher" +version = "0.1.0" +source = "git+https://github.com/s3bk/glyphmatcher#7bd5d40aaa8893fa4f2e07c758fa34127e176c8c" +dependencies = [ + "font", + "istring 0.3.4 (git+https://github.com/s3bk/istring)", + "itertools 0.8.2", + "pathfinder_content", + "pathfinder_geometry", + "pdf_encoding 0.4.0 (git+https://github.com/pdf-rs/encoding)", + "postcard", + "serde", + "serde_json", +] + +[[package]] +name = "gobject-sys" +version = "0.15.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0d57ce44246becd17153bd035ab4d32cfee096a657fc01f2231c9278378d1e0a" dependencies = [ @@ -5980,6 +6332,17 @@ dependencies = [ "byteorder", ] +[[package]] +name = "hashbrown" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96282e96bfcd3da0d3aa9938bedf1e50df3269b6db08b4876d2da0bb1a0841cf" +dependencies = [ + "ahash 0.3.8", + "autocfg 1.3.0", + "serde", +] + [[package]] name = "hashbrown" version = "0.8.2" @@ -6138,7 +6501,7 @@ checksum = "765c9198f173dd59ce26ff9f95ef0aafd0a0fe01fb9d72841bc5066a4c06511d" dependencies = [ "base64 0.21.7", "byteorder", - "crossbeam-channel", + "crossbeam-channel 0.5.13", "flate2", "nom 7.1.3", "num-traits", @@ -6168,6 +6531,29 @@ dependencies = [ "http 0.2.12", ] +[[package]] +name = "headless_chrome" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1eb54284cb4be609bae1375e08e7737752fd5f919f918345359b51b38b7b9ce" +dependencies = [ + "anyhow", + "auto_generate_cdp", + "base64 0.22.1", + "derive_builder 0.20.1", + "log", + "rand 0.8.5", + "regex", + "serde", + "serde_json", + "tempfile", + "thiserror", + "tungstenite 0.24.0", + "url", + "which 6.0.3", + "winreg 0.52.0", +] + [[package]] name = "heapless" version = "0.7.17" @@ -6177,6 +6563,7 @@ dependencies = [ "atomic-polyfill", "hash32", "rustc_version 0.4.1", + "serde", "spin 0.9.8", "stable_deref_trait", ] @@ -6202,6 +6589,44 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "heed" +version = "0.20.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d4f449bab7320c56003d37732a917e18798e2f1709d80263face2b4f9436ddb" +dependencies = [ + "bitflags 2.6.0", + "byteorder", + "heed-traits", + "heed-types", + "libc", + "lmdb-master-sys", + "once_cell", + "page_size", + "serde", + "synchronoise", + "url", +] + +[[package]] +name = "heed-traits" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb3130048d404c57ce5a1ac61a903696e8fcde7e8c2991e9fcfc1f27c3ef74ff" + +[[package]] +name = "heed-types" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d3f528b053a6d700b2734eabcd0fd49cb8230647aa72958467527b0b7917114" +dependencies = [ + "bincode", + "byteorder", + "heed-traits", + "serde", + "serde_json", +] + [[package]] name = "hermit-abi" version = "0.1.19" @@ -7013,6 +7438,20 @@ dependencies = [ "triomphe", ] +[[package]] +name = "html5ever" +version = "0.25.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5c13fb08e5d4dfc151ee5e88bae63f7773d61852f3bdc73c9f4b9e1bde03148" +dependencies = [ + "log", + "mac", + "markup5ever 0.10.1", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "html5ever" version = "0.26.0" @@ -7021,12 +7460,26 @@ checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7" dependencies = [ "log", "mac", - "markup5ever", + "markup5ever 0.11.0", "proc-macro2", "quote", "syn 1.0.109", ] +[[package]] +name = "html5ever" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4" +dependencies = [ + "log", + "mac", + "markup5ever 0.12.1", + "proc-macro2", + "quote", + "syn 2.0.77", +] + [[package]] name = "http" version = "0.2.12" @@ -7240,7 +7693,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3804960be0bb5e4edb1e1ad67afd321a9ecfd875c3e65c099468fd2717d7cae" dependencies = [ "byteorder", - "png", + "png 0.17.13", ] [[package]] @@ -7332,6 +7785,21 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "image" +version = "0.23.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24ffcb7e7244a9bf19d35bf2883b9c080c4ced3c07a9895572178cdb8f13f6a1" +dependencies = [ + "bytemuck", + "byteorder", + "color_quant", + "num-iter", + "num-rational 0.3.2", + "num-traits", + "png 0.16.8", +] + [[package]] name = "image" version = "0.24.9" @@ -7345,7 +7813,7 @@ dependencies = [ "gif", "jpeg-decoder", "num-traits", - "png", + "png 0.17.13", "qoi", "tiff", ] @@ -7359,7 +7827,7 @@ dependencies = [ "bytemuck", "byteorder-lite", "num-traits", - "png", + "png 0.17.13", "tiff", ] @@ -7451,8 +7919,8 @@ checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88" dependencies = [ "ahash 0.8.11", "clap 4.5.16", - "crossbeam-channel", - "crossbeam-utils", + "crossbeam-channel 0.5.13", + "crossbeam-utils 0.8.20", "dashmap 6.0.1", "env_logger 0.11.5", "indexmap 2.5.0", @@ -7612,6 +8080,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" dependencies = [ "cfg-if 1.0.0", + "js-sys", + "wasm-bindgen", + "web-sys", ] [[package]] @@ -7695,6 +8166,23 @@ dependencies = [ "paste", ] +[[package]] +name = "istring" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "875cc6fb9aecbc1a9bd736f2d18b12e0756b4c80c5e35e28262154abcb077a39" +dependencies = [ + "datasize", +] + +[[package]] +name = "istring" +version = "0.3.4" +source = "git+https://github.com/s3bk/istring#b9424bb1e291388f145a06558c15639dc87f3058" +dependencies = [ + "serde", +] + [[package]] name = "itertools" version = "0.8.2" @@ -7722,6 +8210,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.12.1" @@ -7731,6 +8228,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "0.4.8" @@ -7925,6 +8431,7 @@ dependencies = [ "hdrhistogram", "image 0.24.9", "kalosm-common", + "kalosm-language", "kalosm-streams", "llm-samplers 0.0.7", "num-traits", @@ -7953,6 +8460,130 @@ dependencies = [ "tracing", ] +[[package]] +name = "kalosm-language" +version = "0.3.3" +source = "git+https://github.com/coasys/floneum.git?rev=44e05c5fe6fc72eaeaf6ccefbf8269cda54aca75#44e05c5fe6fc72eaeaf6ccefbf8269cda54aca75" +dependencies = [ + "anyhow", + "arroy", + "async-trait", + "candle-core", + "candle-nn", + "chrono", + "convert_case 0.6.0", + "dashmap 5.5.3", + "docx-rs", + "ego-tree", + "futures-util", + "half 2.4.1", + "headless_chrome", + "heed", + "image 0.24.9", + "itertools 0.11.0", + "kalosm-language-model", + "kalosm-llama", + "kalosm-sample", + "kalosm-streams", + "llm-samplers 0.0.7", + "log", + "meval", + "once_cell", + "pdf", + "pdf_text", + "pulldown-cmark", + "rand 0.8.5", + "rbert", + "readability", + "reqwest", + "rphi", + "rss", + "rustc-hash", + "scraper", + "serde", + "serde_json", + "slab", + "srx", + "tempfile", + "texting_robots", + "tokenizers 0.19.1", + "tokio", + "tokio-util", + "tracing", + "url", + "whatlang", +] + +[[package]] +name = "kalosm-language-model" +version = "0.3.3" +source = "git+https://github.com/coasys/floneum.git?rev=44e05c5fe6fc72eaeaf6ccefbf8269cda54aca75#44e05c5fe6fc72eaeaf6ccefbf8269cda54aca75" +dependencies = [ + "anyhow", + "async-trait", + "candle-core", + "futures-util", + "kalosm-common", + "kalosm-sample", + "kalosm-streams", + "llm-samplers 0.0.7", + "log", + "lru", + "once_cell", + "postcard", + "rand 0.8.5", + "rayon", + "safetensors", + "serde", + "thiserror", + "tokenizers 0.19.1", + "tokio", + "tracing", +] + +[[package]] +name = "kalosm-llama" +version = "0.3.3" +source = "git+https://github.com/coasys/floneum.git?rev=44e05c5fe6fc72eaeaf6ccefbf8269cda54aca75#44e05c5fe6fc72eaeaf6ccefbf8269cda54aca75" +dependencies = [ + "anyhow", + "async-trait", + "candle-core", + "candle-nn", + "candle-transformers", + "half 2.4.1", + "kalosm-common", + "kalosm-language-model", + "kalosm-sample", + "kalosm-streams", + "llm-samplers 0.0.7", + "once_cell", + "rand 0.8.5", + "tokenizers 0.19.1", + "tokio", + "tracing", +] + +[[package]] +name = "kalosm-parse-macro" +version = "0.3.2" +source = "git+https://github.com/coasys/floneum.git?rev=44e05c5fe6fc72eaeaf6ccefbf8269cda54aca75#44e05c5fe6fc72eaeaf6ccefbf8269cda54aca75" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.77", +] + +[[package]] +name = "kalosm-sample" +version = "0.3.2" +source = "git+https://github.com/coasys/floneum.git?rev=44e05c5fe6fc72eaeaf6ccefbf8269cda54aca75#44e05c5fe6fc72eaeaf6ccefbf8269cda54aca75" +dependencies = [ + "anyhow", + "kalosm-parse-macro", + "regex-automata 0.4.7", +] + [[package]] name = "kalosm-streams" version = "0.3.2" @@ -8274,11 +8905,11 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f29e4755b7b995046f510a7520c42b2fed58b77bd94d5a87a8eb43d2fd126da8" dependencies = [ - "cssparser", - "html5ever", + "cssparser 0.27.2", + "html5ever 0.26.0", "indexmap 1.9.3", "matches", - "selectors", + "selectors 0.22.0", ] [[package]] @@ -8322,7 +8953,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "time", + "time 0.3.36", "tokio", "toml 0.8.19", "tracing", @@ -8347,7 +8978,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "time", + "time 0.3.36", "tokio", "toml 0.8.19", "tracing", @@ -8773,7 +9404,7 @@ dependencies = [ "serde", "serde_bytes", "thiserror", - "tokenizers", + "tokenizers 0.13.4", "tracing", ] @@ -8818,6 +9449,17 @@ dependencies = [ "thiserror", ] +[[package]] +name = "lmdb-master-sys" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "472c3760e2a8d0f61f322fb36788021bb36d573c502b50fa3e2bcaac3ec326c9" +dependencies = [ + "cc", + "doxygen-rs", + "libc", +] + [[package]] name = "localtunnel-client" version = "0.0.12" @@ -8940,7 +9582,7 @@ dependencies = [ "dirs-next", "objc-foundation", "objc_id", - "time", + "time 0.3.36", ] [[package]] @@ -8967,7 +9609,17 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf0c9b980bf4f3a37fd7b1c066941dd1b1d0152ce6ee6e8fe8c49b9f6810d862" dependencies = [ - "macro_rules_attribute-proc_macro", + "macro_rules_attribute-proc_macro 0.1.3", + "paste", +] + +[[package]] +name = "macro_rules_attribute" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a82271f7bc033d84bbca59a3ce3e4159938cb08a9c3aebbe54d215131518a13" +dependencies = [ + "macro_rules_attribute-proc_macro 0.2.0", "paste", ] @@ -8977,6 +9629,12 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "58093314a45e00c77d5c508f76e77c3396afbbc0d01506e7fae47b018bac2b1d" +[[package]] +name = "macro_rules_attribute-proc_macro" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8dd856d451cc0da70e2ef2ce95a18e39a93b7558bedf10201ad28503f918568" + [[package]] name = "malloc_buf" version = "0.0.6" @@ -8992,6 +9650,20 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" +[[package]] +name = "markup5ever" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a24f40fb03852d1cdd84330cddcaf98e9ec08a7b7768e952fad3b4cf048ec8fd" +dependencies = [ + "log", + "phf 0.8.0", + "phf_codegen 0.8.0", + "string_cache", + "string_cache_codegen", + "tendril", +] + [[package]] name = "markup5ever" version = "0.11.0" @@ -9007,15 +9679,41 @@ dependencies = [ ] [[package]] -name = "markup5ever_rcdom" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" +name = "markup5ever" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45" +dependencies = [ + "log", + "phf 0.11.2", + "phf_codegen 0.11.2", + "string_cache", + "string_cache_codegen", + "tendril", +] + +[[package]] +name = "markup5ever_rcdom" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f015da43bcd8d4f144559a3423f4591d69b8ce0652c905374da7205df336ae2b" +dependencies = [ + "html5ever 0.25.2", + "markup5ever 0.10.1", + "tendril", + "xml5ever 0.16.2", +] + +[[package]] +name = "markup5ever_rcdom" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9521dd6750f8e80ee6c53d65e2e4656d7de37064f3a7a5d2d11d05df93839c2" dependencies = [ - "html5ever", - "markup5ever", + "html5ever 0.26.0", + "markup5ever 0.11.0", "tendril", - "xml5ever", + "xml5ever 0.17.0", ] [[package]] @@ -9058,6 +9756,12 @@ dependencies = [ "rawpointer", ] +[[package]] +name = "maybe-uninit" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" + [[package]] name = "md-5" version = "0.10.6" @@ -9077,6 +9781,12 @@ dependencies = [ "digest 0.10.7", ] +[[package]] +name = "md5" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" + [[package]] name = "mdns" version = "3.0.0" @@ -9175,6 +9885,16 @@ dependencies = [ "paste", ] +[[package]] +name = "meval" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f79496a5651c8d57cd033c5add8ca7ee4e3d5f7587a4777484640d9cb60392d9" +dependencies = [ + "fnv", + "nom 1.2.4", +] + [[package]] name = "mime" version = "0.3.17" @@ -9203,6 +9923,15 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a05b5d0594e0cb1ad8cee3373018d2b84e25905dc75b2468114cc9a8e86cfc20" +[[package]] +name = "miniz_oxide" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "791daaae1ed6889560f8c4359194f56648355540573244a5448a83ba1ecc7435" +dependencies = [ + "adler32", +] + [[package]] name = "miniz_oxide" version = "0.7.4" @@ -9475,11 +10204,11 @@ version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d506eb7e08d6329505faa8a3a00a5dcc6de9f76e0c77e4b75763ae3c770831ff" dependencies = [ - "approx", + "approx 0.5.1", "matrixmultiply", "nalgebra-macros", "num-complex", - "num-rational", + "num-rational 0.4.2", "num-traits", "rand 0.8.5", "rand_distr", @@ -9582,6 +10311,12 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "never" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c96aba5aa877601bb3f6dd6a63a969e1f82e60646e81e71b14496995e9853c91" + [[package]] name = "new_debug_unreachable" version = "1.0.6" @@ -9686,12 +10421,19 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" +[[package]] +name = "nom" +version = "1.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5b8c256fd9471521bcb84c3cdba98921497f1a331cbc15b8030fc63b82050ce" + [[package]] name = "nom" version = "5.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08959a387a676302eebf4ddbcbc611da04285579f76f88ee0506c63b1a61dd4b" dependencies = [ + "lexical-core 0.7.6", "memchr", "version_check", ] @@ -9725,7 +10467,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2c66da08abae1c024c01d635253e402341b4060a12e99b31c7594063bf490a" dependencies = [ "bitflags 1.3.2", - "crossbeam-channel", + "crossbeam-channel 0.5.13", "filetime", "fsevent-sys", "inotify", @@ -9785,7 +10527,7 @@ dependencies = [ "num-complex", "num-integer", "num-iter", - "num-rational", + "num-rational 0.4.2", "num-traits", ] @@ -9880,6 +10622,17 @@ dependencies = [ "num-modular", ] +[[package]] +name = "num-rational" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12ac428b1cb17fce6f731001d307d351ec70a6d202fc2e60f7d4c5e42d8f4f07" +dependencies = [ + "autocfg 1.3.0", + "num-integer", + "num-traits", +] + [[package]] name = "num-rational" version = "0.4.2" @@ -10193,7 +10946,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "293c15678e37254c15bd2f092314abb4e51d7fdde05c2021279c12631b54f005" dependencies = [ - "bstr", + "bstr 1.10.0", "winapi 0.3.9", ] @@ -10282,6 +11035,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "ordered-float" +version = "4.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a91171844676f8c7990ce64959210cd2eaef32c2612c50f9fae9f8aaa6065a6" +dependencies = [ + "num-traits", +] + [[package]] name = "os_info" version = "3.8.2" @@ -10405,6 +11167,16 @@ dependencies = [ "sha2 0.10.8", ] +[[package]] +name = "page_size" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" +dependencies = [ + "libc", + "winapi 0.3.9", +] + [[package]] name = "pairing" version = "0.22.0" @@ -10539,6 +11311,112 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd" +[[package]] +name = "pathfinder_color" +version = "0.5.0" +source = "git+https://github.com/servo/pathfinder#45b7a89449d1ebc12da870d1914e1b85c6b475d0" +dependencies = [ + "pathfinder_simd", +] + +[[package]] +name = "pathfinder_content" +version = "0.5.0" +source = "git+https://github.com/servo/pathfinder#45b7a89449d1ebc12da870d1914e1b85c6b475d0" +dependencies = [ + "arrayvec 0.5.2", + "bitflags 1.3.2", + "image 0.23.14", + "log", + "pathfinder_color", + "pathfinder_geometry", + "pathfinder_simd", + "smallvec", +] + +[[package]] +name = "pathfinder_geometry" +version = "0.5.1" +source = "git+https://github.com/servo/pathfinder#45b7a89449d1ebc12da870d1914e1b85c6b475d0" +dependencies = [ + "log", + "pathfinder_simd", +] + +[[package]] +name = "pathfinder_gpu" +version = "0.5.0" +source = "git+https://github.com/servo/pathfinder#45b7a89449d1ebc12da870d1914e1b85c6b475d0" +dependencies = [ + "bitflags 1.3.2", + "fxhash", + "half 1.8.3", + "image 0.23.14", + "instant", + "log", + "pathfinder_color", + "pathfinder_geometry", + "pathfinder_resources", + "pathfinder_simd", +] + +[[package]] +name = "pathfinder_renderer" +version = "0.5.0" +source = "git+https://github.com/servo/pathfinder#45b7a89449d1ebc12da870d1914e1b85c6b475d0" +dependencies = [ + "bitflags 1.3.2", + "byte-slice-cast", + "byteorder", + "crossbeam-channel 0.4.4", + "fxhash", + "half 1.8.3", + "hashbrown 0.7.2", + "instant", + "log", + "pathfinder_color", + "pathfinder_content", + "pathfinder_geometry", + "pathfinder_gpu", + "pathfinder_resources", + "pathfinder_simd", + "pathfinder_ui", + "rayon", + "serde", + "serde_json", + "smallvec", + "vec_map", +] + +[[package]] +name = "pathfinder_resources" +version = "0.5.0" +source = "git+https://github.com/servo/pathfinder#45b7a89449d1ebc12da870d1914e1b85c6b475d0" + +[[package]] +name = "pathfinder_simd" +version = "0.5.4" +source = "git+https://github.com/servo/pathfinder#45b7a89449d1ebc12da870d1914e1b85c6b475d0" +dependencies = [ + "rustc_version 0.4.1", +] + +[[package]] +name = "pathfinder_ui" +version = "0.5.0" +source = "git+https://github.com/servo/pathfinder#45b7a89449d1ebc12da870d1914e1b85c6b475d0" +dependencies = [ + "hashbrown 0.7.2", + "pathfinder_color", + "pathfinder_geometry", + "pathfinder_gpu", + "pathfinder_resources", + "pathfinder_simd", + "serde", + "serde_derive", + "serde_json", +] + [[package]] name = "pbkdf2" version = "0.11.0" @@ -10561,6 +11439,105 @@ dependencies = [ "hmac 0.12.1", ] +[[package]] +name = "pdf" +version = "0.9.0" +source = "git+https://github.com/pdf-rs/pdf#a94544dd6668f4c9b0d0037b2f3474c4d7e896aa" +dependencies = [ + "aes", + "bitflags 2.6.0", + "cbc", + "datasize", + "deflate 1.0.0", + "fax", + "globalcache 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)", + "indexmap 2.5.0", + "istring 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", + "itertools 0.13.0", + "jpeg-decoder", + "libflate", + "log", + "md5", + "once_cell", + "pdf_derive", + "sha2 0.10.8", + "snafu", + "stringprep", + "tempfile", + "weezl", +] + +[[package]] +name = "pdf_derive" +version = "0.2.0" +source = "git+https://github.com/pdf-rs/pdf#a94544dd6668f4c9b0d0037b2f3474c4d7e896aa" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.77", +] + +[[package]] +name = "pdf_encoding" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07793d65f6165493079ab18885b7d945a55e683615a68e1419ba3e8ebf5b7631" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "pdf_encoding" +version = "0.4.0" +source = "git+https://github.com/pdf-rs/encoding#f473ae1e8f0c1f262b00d6994136c6ff518f89d9" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "pdf_render" +version = "0.1.0" +source = "git+https://github.com/pdf-rs/pdf_render#39a877d3dde5105ffff8f116a4dd5ae325c02867" +dependencies = [ + "custom_debug_derive", + "font", + "globalcache 0.2.4 (git+https://github.com/s3bk/cachelib)", + "glyphmatcher", + "image 0.24.9", + "instant", + "istring 0.3.4 (git+https://github.com/s3bk/istring)", + "itertools 0.8.2", + "log", + "once_cell", + "pathfinder_color", + "pathfinder_content", + "pathfinder_geometry", + "pathfinder_renderer", + "pathfinder_resources", + "pdf", + "pdf_encoding 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json", +] + +[[package]] +name = "pdf_text" +version = "0.1.0" +source = "git+https://github.com/pdf-rs/pdf_text#03d4789b4d23b24a991a18d31a352c8400237afc" +dependencies = [ + "font", + "itertools 0.8.2", + "log", + "ordered-float 2.10.1", + "pathfinder_color", + "pathfinder_content", + "pathfinder_geometry", + "pdf", + "pdf_render", + "serde", + "table", + "unicode-normalization", +] + [[package]] name = "pear" version = "0.2.9" @@ -10725,6 +11702,16 @@ dependencies = [ "phf_shared 0.10.0", ] +[[package]] +name = "phf_codegen" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +dependencies = [ + "phf_generator 0.11.2", + "phf_shared 0.11.2", +] + [[package]] name = "phf_generator" version = "0.8.0" @@ -10932,7 +11919,19 @@ dependencies = [ "indexmap 2.5.0", "quick-xml 0.32.0", "serde", - "time", + "time 0.3.36", +] + +[[package]] +name = "png" +version = "0.16.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c3287920cb847dee3de33d301c463fba14dda99db24214ddf93f83d3021f4c6" +dependencies = [ + "bitflags 1.3.2", + "crc32fast", + "deflate 0.8.6", + "miniz_oxide 0.3.7", ] [[package]] @@ -11017,6 +12016,19 @@ dependencies = [ "rand 0.8.5", ] +[[package]] +name = "postcard" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f7f0a8d620d71c457dd1d47df76bb18960378da56af4527aaa10f515eee732e" +dependencies = [ + "cobs", + "embedded-io 0.4.0", + "embedded-io 0.6.1", + "heapless", + "serde", +] + [[package]] name = "powerfmt" version = "0.2.0" @@ -11367,6 +12379,18 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "pulldown-cmark" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b" +dependencies = [ + "bitflags 2.6.0", + "getopts", + "memchr", + "unicase", +] + [[package]] name = "pulp" version = "0.18.22" @@ -11440,6 +12464,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "quick-xml" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96a05e2e8efddfa51a84ca47cec303fac86c8541b686d37cac5efc0e094417bc" +dependencies = [ + "encoding_rs", + "memchr", +] + [[package]] name = "quickcheck" version = "0.8.5" @@ -11808,6 +12842,17 @@ dependencies = [ "rayon", ] +[[package]] +name = "rayon-cond" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9" +dependencies = [ + "either", + "itertools 0.11.0", + "rayon", +] + [[package]] name = "rayon-core" version = "1.12.1" @@ -11815,7 +12860,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ "crossbeam-deque", - "crossbeam-utils", + "crossbeam-utils 0.8.20", +] + +[[package]] +name = "rbert" +version = "0.3.3" +source = "git+https://github.com/coasys/floneum.git?rev=44e05c5fe6fc72eaeaf6ccefbf8269cda54aca75#44e05c5fe6fc72eaeaf6ccefbf8269cda54aca75" +dependencies = [ + "anyhow", + "async-trait", + "candle-core", + "candle-nn", + "candle-transformers", + "kalosm-common", + "kalosm-language-model", + "serde", + "serde_json", + "tokenizers 0.19.1", + "tokio", + "tracing", ] [[package]] @@ -11826,7 +12890,7 @@ checksum = "ffbe84efe2f38dea12e9bfc1f65377fdf03e53a18cb3b995faedf7934c7e785b" dependencies = [ "pem", "ring 0.16.20", - "time", + "time 0.3.36", "yasna", "zeroize", ] @@ -11840,6 +12904,19 @@ dependencies = [ "rand_core 0.3.1", ] +[[package]] +name = "readability" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7843b159286299dd2b90f06d904ae1a8017a650d88d716c85dd6f123947f399" +dependencies = [ + "html5ever 0.25.2", + "lazy_static", + "markup5ever_rcdom 0.1.0", + "regex", + "url", +] + [[package]] name = "reborrow" version = "0.5.5" @@ -12236,6 +13313,16 @@ dependencies = [ "serde_bytes", ] +[[package]] +name = "roaring" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f4b84ba6e838ceb47b41de5194a60244fac43d9fe03b71dbe8c5a201081d6d1" +dependencies = [ + "bytemuck", + "byteorder", +] + [[package]] name = "rocket" version = "0.5.1" @@ -12264,7 +13351,7 @@ dependencies = [ "serde", "state 0.6.0", "tempfile", - "time", + "time 0.3.36", "tokio", "tokio-stream", "tokio-util", @@ -12312,7 +13399,7 @@ dependencies = [ "smallvec", "stable-pattern", "state 0.6.0", - "time", + "time 0.3.36", "tokio", "uncased", ] @@ -12349,6 +13436,28 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "rphi" +version = "0.3.2" +source = "git+https://github.com/coasys/floneum.git?rev=44e05c5fe6fc72eaeaf6ccefbf8269cda54aca75#44e05c5fe6fc72eaeaf6ccefbf8269cda54aca75" +dependencies = [ + "anyhow", + "async-trait", + "candle-core", + "candle-nn", + "candle-transformers", + "kalosm-common", + "kalosm-language-model", + "kalosm-sample", + "kalosm-streams", + "llm-samplers 0.0.7", + "rand 0.8.5", + "serde_json", + "tokenizers 0.19.1", + "tokio", + "tracing", +] + [[package]] name = "rsa" version = "0.9.6" @@ -12369,6 +13478,18 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rss" +version = "2.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27e92048f840d98c6d6dd870af9101610ea9ff413f11f1bcebf4f4c31d96d957" +dependencies = [ + "atom_syndication", + "derive_builder 0.20.1", + "never", + "quick-xml 0.36.1", +] + [[package]] name = "rtoolbox" version = "0.0.2" @@ -12822,6 +13943,22 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scraper" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "761fb705fdf625482d2ed91d3f0559dcfeab2798fe2771c69560a774865d0802" +dependencies = [ + "ahash 0.8.11", + "cssparser 0.31.2", + "ego-tree", + "getopts", + "html5ever 0.27.0", + "once_cell", + "selectors 0.25.0", + "tendril", +] + [[package]] name = "scryer-modular-bitfield" version = "0.11.4" @@ -12878,7 +14015,7 @@ dependencies = [ "libloading 0.7.4", "native-tls", "num-order", - "ordered-float", + "ordered-float 2.10.1", "phf 0.9.0", "proc-macro2", "quote", @@ -13024,8 +14161,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f9da09dc3f4dfdb6374cbffff7a2cffcec316874d4429899eefdc97b3b94dcd" dependencies = [ "bit-set 0.5.3", - "html5ever", - "markup5ever_rcdom", + "html5ever 0.26.0", + "markup5ever_rcdom 0.2.0", ] [[package]] @@ -13035,7 +14172,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df320f1889ac4ba6bc0cdc9c9af7af4bd64bb927bccdf32d81140dc1f9be12fe" dependencies = [ "bitflags 1.3.2", - "cssparser", + "cssparser 0.27.2", "derive_more", "fxhash", "log", @@ -13043,11 +14180,30 @@ dependencies = [ "phf 0.8.0", "phf_codegen 0.8.0", "precomputed-hash", - "servo_arc", + "servo_arc 0.1.1", "smallvec", "thin-slice", ] +[[package]] +name = "selectors" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eb30575f3638fc8f6815f448d50cb1a2e255b0897985c8c59f4d37b72a07b06" +dependencies = [ + "bitflags 2.6.0", + "cssparser 0.31.2", + "derive_more", + "fxhash", + "log", + "new_debug_unreachable", + "phf 0.10.1", + "phf_codegen 0.10.0", + "precomputed-hash", + "servo_arc 0.3.0", + "smallvec", +] + [[package]] name = "self_cell" version = "1.0.4" @@ -13126,7 +14282,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c" dependencies = [ - "ordered-float", + "ordered-float 2.10.1", "serde", ] @@ -13152,6 +14308,18 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "serde-xml-rs" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0bf1ba0696ccf0872866277143ff1fd14d22eec235d2b23702f95e6660f7dfa" +dependencies = [ + "log", + "serde", + "thiserror", + "xml-rs", +] + [[package]] name = "serde_bytes" version = "0.11.15" @@ -13206,6 +14374,25 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_plain" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce1fc6db65a611022b23a0dec6975d63fb80a302cb3388835ff02c097258d50" +dependencies = [ + "serde", +] + +[[package]] +name = "serde_regex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8136f1a4ea815d7eac4101cfd0b16dc0cb5e1fe1b8609dfd728058656b7badf" +dependencies = [ + "regex", + "serde", +] + [[package]] name = "serde_repr" version = "0.1.19" @@ -13265,7 +14452,7 @@ dependencies = [ "serde_derive", "serde_json", "serde_with_macros", - "time", + "time 0.3.36", ] [[package]] @@ -13325,6 +14512,15 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "servo_arc" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d036d71a959e00c77a63538b90a6c2390969f9772b096ea837205c6bd0491a44" +dependencies = [ + "stable_deref_trait", +] + [[package]] name = "sha-1" version = "0.10.0" @@ -13499,7 +14695,7 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0b7840f121a46d63066ee7a99fc81dcabbc6105e437cae43528cea199b5a05f" dependencies = [ - "approx", + "approx 0.5.1", "num-complex", "num-traits", "paste", @@ -13552,7 +14748,7 @@ dependencies = [ "num-bigint", "num-traits", "thiserror", - "time", + "time 0.3.36", ] [[package]] @@ -13577,6 +14773,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" dependencies = [ "autocfg 1.3.0", + "serde", ] [[package]] @@ -13611,6 +14808,27 @@ dependencies = [ "version_check", ] +[[package]] +name = "snafu" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b835cb902660db3415a672d862905e791e54d306c6e8189168c7f3d9ae1c79d" +dependencies = [ + "snafu-derive", +] + +[[package]] +name = "snafu-derive" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d1e02fca405f6280643174a50c942219f0bbf4dbf7d480f1dd864d6f211ae5" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "syn 2.0.77", +] + [[package]] name = "socket2" version = "0.4.10" @@ -13631,6 +14849,17 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "socks" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c3dbbd9ae980613c6dd8e28a9407b50509d3803b57624d5dfe8315218cd58b" +dependencies = [ + "byteorder", + "libc", + "winapi 0.3.9", +] + [[package]] name = "sodoken" version = "0.0.11" @@ -13759,6 +14988,19 @@ dependencies = [ "unicode_categories", ] +[[package]] +name = "srx" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "310140d03a2064947271c5105bfff8c406f2f0bafbdaa947b34a088683cc2905" +dependencies = [ + "regex", + "serde", + "serde-xml-rs", + "serde_regex", + "thiserror", +] + [[package]] name = "stable-pattern" version = "0.1.0" @@ -13817,7 +15059,7 @@ version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b35a062dbadac17a42e0fc64c27f419b25d6fae98572eb43c8814c9e873d7721" dependencies = [ - "approx", + "approx 0.5.1", "lazy_static", "nalgebra", "num-traits", @@ -13874,6 +15116,17 @@ dependencies = [ "syn 2.0.77", ] +[[package]] +name = "stringprep" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" +dependencies = [ + "unicode-bidi", + "unicode-normalization", + "unicode-properties", +] + [[package]] name = "strsim" version = "0.8.0" @@ -14433,6 +15686,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "synchronoise" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dbc01390fc626ce8d1cffe3376ded2b72a11bb70e1c75f404a210e4daa4def2" +dependencies = [ + "crossbeam-queue", +] + [[package]] name = "synstructure" version = "0.12.6" @@ -14557,6 +15819,15 @@ dependencies = [ "version-compare 0.2.0", ] +[[package]] +name = "table" +version = "0.1.0" +source = "git+https://github.com/s3bk/table#a58c5b8c0669789edbbe3b195007b04246ac16a3" +dependencies = [ + "log", + "serde", +] + [[package]] name = "tao" version = "0.16.9" @@ -14569,7 +15840,7 @@ dependencies = [ "cocoa", "core-foundation", "core-graphics 0.22.3", - "crossbeam-channel", + "crossbeam-channel 0.5.13", "dirs-next", "dispatch", "gdk", @@ -14594,7 +15865,7 @@ dependencies = [ "objc", "once_cell", "parking_lot 0.12.3", - "png", + "png 0.17.13", "raw-window-handle 0.5.2", "scopeguard", "serde", @@ -14705,7 +15976,7 @@ dependencies = [ "tauri-utils", "tempfile", "thiserror", - "time", + "time 0.3.36", "tokio", "url", "uuid 1.10.0", @@ -14745,7 +16016,7 @@ dependencies = [ "ico", "json-patch 2.0.0", "plist", - "png", + "png 0.17.13", "proc-macro2", "quote", "regex", @@ -14755,7 +16026,7 @@ dependencies = [ "sha2 0.10.8", "tauri-utils", "thiserror", - "time", + "time 0.3.36", "uuid 1.10.0", "walkdir", ] @@ -14841,7 +16112,7 @@ dependencies = [ "dunce", "glob", "heck 0.5.0", - "html5ever", + "html5ever 0.26.0", "infer", "json-patch 2.0.0", "kuchikiki", @@ -15007,6 +16278,22 @@ dependencies = [ "serde", ] +[[package]] +name = "texting_robots" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82a718a28dda2e67ad6e0464597b58eae39e2e4d0451e03d1028d71e81bb4a" +dependencies = [ + "anyhow", + "bstr 0.2.17", + "lazy_static", + "nom 7.1.3", + "percent-encoding", + "regex", + "thiserror", + "url", +] + [[package]] name = "textwrap" version = "0.11.0" @@ -15073,6 +16360,17 @@ dependencies = [ "weezl", ] +[[package]] +name = "time" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" +dependencies = [ + "libc", + "wasi 0.10.0+wasi-snapshot-preview1", + "winapi 0.3.9", +] + [[package]] name = "time" version = "0.3.36" @@ -15164,13 +16462,13 @@ dependencies = [ "itertools 0.9.0", "lazy_static", "log", - "macro_rules_attribute", + "macro_rules_attribute 0.1.3", "monostate", "onig", "paste", "rand 0.8.5", "rayon", - "rayon-cond", + "rayon-cond 0.1.0", "regex", "regex-syntax 0.7.5", "reqwest", @@ -15183,6 +16481,38 @@ dependencies = [ "unicode_categories", ] +[[package]] +name = "tokenizers" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e500fad1dd3af3d626327e6a3fe5050e664a6eaa4708b8ca92f1794aaf73e6fd" +dependencies = [ + "aho-corasick 1.1.3", + "derive_builder 0.20.1", + "esaxx-rs", + "getrandom 0.2.15", + "indicatif 0.17.8", + "itertools 0.12.1", + "lazy_static", + "log", + "macro_rules_attribute 0.2.0", + "monostate", + "onig", + "paste", + "rand 0.8.5", + "rayon", + "rayon-cond 0.3.0", + "regex", + "regex-syntax 0.8.4", + "serde", + "serde_json", + "spm_precompiled", + "thiserror", + "unicode-normalization-alignments", + "unicode-segmentation", + "unicode_categories", +] + [[package]] name = "tokio" version = "1.40.0" @@ -15327,6 +16657,8 @@ dependencies = [ "bytes", "futures-core", "futures-sink", + "futures-util", + "hashbrown 0.14.5", "pin-project-lite", "tokio", ] @@ -15530,7 +16862,7 @@ dependencies = [ "sharded-slab", "smallvec", "thread_local", - "time", + "time 0.3.36", "tracing", "tracing-core", "tracing-log 0.2.0", @@ -15669,6 +17001,43 @@ dependencies = [ "utf-8", ] +[[package]] +name = "tungstenite" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18e5b8366ee7a95b16d32197d0b2604b43a0be89dc5fac9f8e96ccafbaedda8a" +dependencies = [ + "byteorder", + "bytes", + "data-encoding", + "http 1.1.0", + "httparse", + "log", + "rand 0.8.5", + "sha1", + "thiserror", + "utf-8", +] + +[[package]] +name = "tuple" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bb9f6bd73479481158ba8ee3edf17aca93354623d13f02e96a2014fdbc1c37e" +dependencies = [ + "num-traits", + "serde", +] + +[[package]] +name = "tuple" +version = "0.5.2" +source = "git+https://github.com/s3bk/tuple/#bcdc3d9fcf5f93ab8310e84f139ccaab6dd6acc7" +dependencies = [ + "num-traits", + "serde", +] + [[package]] name = "twox-hash" version = "1.6.3" @@ -15969,6 +17338,12 @@ dependencies = [ "smallvec", ] +[[package]] +name = "unicode-properties" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ea75f83c0137a9b98608359a5f1af8144876eb67bcb1ce837368e906a9f524" + [[package]] name = "unicode-segmentation" version = "1.11.0" @@ -16077,6 +17452,7 @@ dependencies = [ "rustls-pki-types", "serde", "serde_json", + "socks", "url", "webpki-roots 0.26.5", ] @@ -16342,6 +17718,12 @@ version = "0.9.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" +[[package]] +name = "wasi" +version = "0.10.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -16728,6 +18110,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "webbrowser" version = "0.8.15" @@ -16982,6 +18374,16 @@ dependencies = [ "web-sys", ] +[[package]] +name = "whatlang" +version = "0.16.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "471d1c1645d361eb782a1650b1786a8fb58dd625e681a04c09f5ff7c8764a7b0" +dependencies = [ + "hashbrown 0.14.5", + "once_cell", +] + [[package]] name = "which" version = "4.4.2" @@ -17007,6 +18409,18 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "which" +version = "6.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ee928febd44d98f2f459a4a79bd4d928591333a494a10a868418ac1b39cf1f" +dependencies = [ + "either", + "home", + "rustix 0.38.35", + "winsafe", +] + [[package]] name = "whoami" version = "1.5.1" @@ -17599,6 +19013,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "winsafe" +version = "0.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d135d17ab770252ad95e9a872d365cf3090e3be864a34ab46f48555993efc904" + [[package]] name = "wl-clipboard-rs" version = "0.8.1" @@ -17629,13 +19049,13 @@ dependencies = [ "block", "cocoa", "core-graphics 0.22.3", - "crossbeam-channel", + "crossbeam-channel 0.5.13", "dunce", "gdk", "gio", "glib", "gtk", - "html5ever", + "html5ever 0.26.0", "http 0.2.12", "kuchikiki", "libc", @@ -17747,7 +19167,7 @@ dependencies = [ "oid-registry", "rusticata-macros", "thiserror", - "time", + "time 0.3.36", ] [[package]] @@ -17773,6 +19193,18 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "539a77ee7c0de333dcc6da69b177380a0b81e0dacfa4f7344c465a36871ee601" +[[package]] +name = "xml5ever" +version = "0.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9234163818fd8e2418fcde330655e757900d4236acd8cc70fef345ef91f6d865" +dependencies = [ + "log", + "mac", + "markup5ever 0.10.1", + "time 0.1.45", +] + [[package]] name = "xml5ever" version = "0.17.0" @@ -17781,7 +19213,7 @@ checksum = "4034e1d05af98b51ad7214527730626f019682d797ba38b51689212118d8e650" dependencies = [ "log", "mac", - "markup5ever", + "markup5ever 0.11.0", ] [[package]] @@ -17839,7 +19271,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e17bb3549cc1321ae1296b9cdc2698e2b6cb1992adfa19a8c72e5b7a738f44cd" dependencies = [ - "time", + "time 0.3.36", ] [[package]] @@ -17939,12 +19371,12 @@ dependencies = [ "bzip2", "constant_time_eq 0.1.5", "crc32fast", - "crossbeam-utils", + "crossbeam-utils 0.8.20", "flate2", "hmac 0.12.1", "pbkdf2 0.11.0", "sha1", - "time", + "time 0.3.36", "zstd", ] @@ -17956,7 +19388,7 @@ checksum = "9cc23c04387f4da0374be4533ad1208cbb091d5c11d070dfef13676ad6497164" dependencies = [ "arbitrary", "crc32fast", - "crossbeam-utils", + "crossbeam-utils 0.8.20", "displaydoc", "indexmap 2.5.0", "num_enum 0.7.3", @@ -17971,7 +19403,7 @@ checksum = "dc5e4288ea4057ae23afc69a4472434a87a2495cafce6632fd1c4ec9f5cf3494" dependencies = [ "arbitrary", "crc32fast", - "crossbeam-utils", + "crossbeam-utils 0.8.20", "displaydoc", "flate2", "indexmap 2.5.0", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index c1708758b..cef62e28b 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -44,7 +44,7 @@ syntect = "5.0" ctrlc = "3.4.0" llm = { git = "https://github.com/lucksus/llm", branch = "gguf", features = ["metal"] } -kalosm = { version = "0.3.0", git = "https://github.com/coasys/floneum.git", rev = "44e05c5fe6fc72eaeaf6ccefbf8269cda54aca75" } +kalosm = { version = "0.3.2", git = "https://github.com/coasys/floneum.git", rev = "44e05c5fe6fc72eaeaf6ccefbf8269cda54aca75", features = ["language"] } serde_cbor = "0.11" #holochain_kitsune_p2p = "0.3.0-beta-dev.37" kitsune_p2p_types = { version = "0.3.2-rc.0", git = "https://github.com/coasys/holochain.git", rev = "10841e49b28c17c3cb428680e2bc9259bf4ec739" } From 0ef599b69fb4c6f60229d8c20a31c7b04377f2b5 Mon Sep 17 00:00:00 2001 From: Nicolas Luck Date: Tue, 3 Sep 2024 15:41:13 +0200 Subject: [PATCH 13/14] Activate eve in ad4m binary --- cli/src/ad4m.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cli/src/ad4m.rs b/cli/src/ad4m.rs index becca7a0f..f9487d081 100644 --- a/cli/src/ad4m.rs +++ b/cli/src/ad4m.rs @@ -23,9 +23,10 @@ mod neighbourhoods; mod perspectives; mod repl; mod runtime; +mod eve; use crate::{ - agent::*, expression::*, languages::*, neighbourhoods::*, perspectives::*, runtime::*, + agent::*, expression::*, languages::*, neighbourhoods::*, perspectives::*, runtime::*, eve::* }; use ad4m_client::*; use anyhow::{Context, Result}; @@ -112,6 +113,10 @@ enum Domain { }, /// Print the executor log Log, + Eve { + #[command(subcommand)] + command: EveCommands, + }, } async fn get_ad4m_client(args: &ClapApp) -> Result { @@ -165,7 +170,8 @@ async fn main() -> Result<()> { ) })?; println!("{}", log); - } + }, + Domain::Eve { command } => eve::run(command).await?, } Ok(()) From e8cf6368314ae2817d404022207ea9b3d3d61d3c Mon Sep 17 00:00:00 2001 From: Nicolas Luck Date: Tue, 3 Sep 2024 15:45:50 +0200 Subject: [PATCH 14/14] fmt --- cli/src/ad4m.rs | 6 ++-- cli/src/eve.rs | 94 ++++++++++++++++++++++++------------------------- 2 files changed, 50 insertions(+), 50 deletions(-) diff --git a/cli/src/ad4m.rs b/cli/src/ad4m.rs index f9487d081..4028135c7 100644 --- a/cli/src/ad4m.rs +++ b/cli/src/ad4m.rs @@ -17,16 +17,16 @@ mod util; mod agent; mod bootstrap_publish; mod dev; +mod eve; mod expression; mod languages; mod neighbourhoods; mod perspectives; mod repl; mod runtime; -mod eve; use crate::{ - agent::*, expression::*, languages::*, neighbourhoods::*, perspectives::*, runtime::*, eve::* + agent::*, eve::*, expression::*, languages::*, neighbourhoods::*, perspectives::*, runtime::*, }; use ad4m_client::*; use anyhow::{Context, Result}; @@ -170,7 +170,7 @@ async fn main() -> Result<()> { ) })?; println!("{}", log); - }, + } Domain::Eve { command } => eve::run(command).await?, } diff --git a/cli/src/eve.rs b/cli/src/eve.rs index 3c524882d..fa2363bb9 100644 --- a/cli/src/eve.rs +++ b/cli/src/eve.rs @@ -1,12 +1,11 @@ - -use std::io::Write; -use llm::Model; +use anyhow::Result; +use clap::Subcommand; use llm::InferenceResponse; +use llm::InferenceSession; +use llm::Model; use std::convert::Infallible; use std::fs::File; -use llm::InferenceSession; -use anyhow::Result; -use clap::Subcommand; +use std::io::Write; #[derive(Debug, Subcommand)] pub enum EveCommands { @@ -28,8 +27,7 @@ provide the the latest version to Perspect3ve to be written to the perspective's by wrapping the code to commit with "WRITE-SDNA:" and ":ANDS-ETIRW"` <> "##; - - + const HISTORY1: &str = r##" Eve: Hi, I'm Eve. I'm your personal assistant AI. I'm here to help you create AD4M Subject classes. User: Create an ADAM Subject class called "Todo". It should have a "state" and a "title" property. The title should resolve to a string applying the literal language. It also should have a "comments" collection.` @@ -129,7 +127,6 @@ p3_instance_color(cjdotdng, Instance, "#00FF00") :- property_getter(c, Instance, Done. "##; - const REDUCED: &str = r##" Eve: Hi, I'm Eve. I'm your personal assistant AI. I'm here to help you create AD4M Subject classes. User: Create an ADAM Subject class called "Todo". It should have a "state" and a "title" property. The title should resolve to a string applying the literal language. It also should have a "comments" collection.` @@ -159,11 +156,9 @@ collection_setter(cjdotdng, "comments", '[{action: "collectionSetter", source: " What icon should it have in the toolbar? (You can choose from the "Bootstrap" icon set) "##; - pub async fn run(command: EveCommands) -> Result<()> { println!("Loading model..."); - // load a GGML model from disk let llama = llm::load::( // path to GGML file @@ -173,7 +168,6 @@ pub async fn run(command: EveCommands) -> Result<()> { Default::default(), // load progress callback |_| {}, - ) .unwrap_or_else(|err| panic!("Failed to load model: {err}")); @@ -190,54 +184,62 @@ pub async fn run(command: EveCommands) -> Result<()> { EveCommands::Train => { println!("Training Eve..."); - println!("Ad hoc training model for ADAM subject classes..."); - // use the model to generate text from a prompt let mut session = llama.start_session(Default::default()); - + for p in vec![SYSTEM, HISTORY1, HISTORY2] { - session.feed_prompt( - &llama, - p, - &mut Default::default(), - llm::feed_prompt_callback(|resp| match resp { - llm::InferenceResponse::PromptToken(t) - | llm::InferenceResponse::InferredToken(t) => { - print_token(t); - - Ok::(llm::InferenceFeedback::Continue) - } - _ => Ok(llm::InferenceFeedback::Continue), - }), - ).unwrap_or_else(|err| panic!("Failed to feed prompt: {err}")); + session + .feed_prompt( + &llama, + p, + &mut Default::default(), + llm::feed_prompt_callback(|resp| match resp { + llm::InferenceResponse::PromptToken(t) + | llm::InferenceResponse::InferredToken(t) => { + print_token(t); + + Ok::( + llm::InferenceFeedback::Continue, + ) + } + _ => Ok(llm::InferenceFeedback::Continue), + }), + ) + .unwrap_or_else(|err| panic!("Failed to feed prompt: {err}")); } - println!("\n\nTraining done. Ready!"); - + unsafe { let snapshot: llm::InferenceSnapshotRef<'_> = session.get_snapshot(); - let snapshot_cbor = serde_cbor::to_vec(&snapshot).expect("Failed to serialize snapshot"); - File::create("/Users/nicolasluck/models/eve.snapshot.json").unwrap().write_all(snapshot_cbor.as_slice()).unwrap(); + let snapshot_cbor = + serde_cbor::to_vec(&snapshot).expect("Failed to serialize snapshot"); + File::create("/Users/nicolasluck/models/eve.snapshot.json") + .unwrap() + .write_all(snapshot_cbor.as_slice()) + .unwrap(); println!("Snapshot saved!"); } } EveCommands::Prompt => { println!("Prompting Eve..."); - let mut session = if let Ok(file) = File::open("/Users/nicolasluck/models/eve.snapshot.json") { - let snapshot: llm::InferenceSnapshot = serde_cbor::from_reader(file).expect("Failed to deserialize snapshot"); - InferenceSession::from_snapshot(snapshot, &llama).unwrap_or_else(|err| panic!("Failed to load snapshot: {err}")) - } else { - llama.start_session(Default::default()) - }; + let mut session = + if let Ok(file) = File::open("/Users/nicolasluck/models/eve.snapshot.json") { + let snapshot: llm::InferenceSnapshot = + serde_cbor::from_reader(file).expect("Failed to deserialize snapshot"); + InferenceSession::from_snapshot(snapshot, &llama) + .unwrap_or_else(|err| panic!("Failed to load snapshot: {err}")) + } else { + llama.start_session(Default::default()) + }; let mut rl = rustyline::Editor::<()>::new()?; let line = rl.readline(">> ")?; println!("\n\n"); - + let res = session.infer::( // model to use for text generation &llama, @@ -246,9 +248,7 @@ pub async fn run(command: EveCommands) -> Result<()> { // the prompt to use for text generation, as well as other // inference parameters &llm::InferenceRequest { - prompt: format!("User: {line}\nEve:") - .as_str() - .into(), + prompt: format!("User: {line}\nEve:").as_str().into(), parameters: &inference_parameters, play_back_previous_tokens: false, maximum_token_count: None, @@ -259,7 +259,9 @@ pub async fn run(command: EveCommands) -> Result<()> { |t| { let mut cont = true; match t { - InferenceResponse::PromptToken(t) | InferenceResponse::InferredToken(t) | llm::InferenceResponse::SnapshotToken(t) => { + InferenceResponse::PromptToken(t) + | InferenceResponse::InferredToken(t) + | llm::InferenceResponse::SnapshotToken(t) => { if t == "Eve:" || t == "User:" { cont = false; } @@ -274,7 +276,7 @@ pub async fn run(command: EveCommands) -> Result<()> { } else { Ok(llm::InferenceFeedback::Halt) } - } + }, ); match res { @@ -285,5 +287,3 @@ pub async fn run(command: EveCommands) -> Result<()> { } Ok(()) } - -