From 7eb27b3443ce8994408c0254395a40fd907bbbd3 Mon Sep 17 00:00:00 2001 From: mike dupont Date: Wed, 6 Dec 2023 10:03:45 -0500 Subject: [PATCH] now it is letting the llm control the output --- README.md | 7 +++++++ embedding.py | 8 ++++++-- examples/main/main.cpp | 16 ++++++++-------- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index d42b8a1bfc708..57f2e0c1af1dc 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,10 @@ +# llama.cpp python hack + +`./bin/main -m ~/.ollama/models/mistral --interactive -r STOP -p 'WHat is a tensor?'` + +Will call embedding.py and then if the plugin ends in stop the results will start a new prompt for the llm. + + # llama.cpp ![llama](https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png) diff --git a/embedding.py b/embedding.py index d6ff4fc2bf346..5d95f663f62dd 100644 --- a/embedding.py +++ b/embedding.py @@ -1,2 +1,6 @@ -print("hello llama.cpp" + llm_input) -llm_output = "Is it because of your mother that " + llm_input + "?"; +print("hello llama.cpp, got input:\n" + llm_input + "\n") + +if len(llm_input) > 20: + llm_output = "Reinterpret with emojis " + llm_input + "?\nSTOP"; +else: + llm_output = llm_input diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 6e69658176213..4cd63650c3fd7 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -497,7 +497,7 @@ int main(int argc, char ** argv) { struct llama_sampling_context * ctx_sampling = llama_sampling_init(sparams); //print_fields(*ctx_sampling); - + std::string last_output; // the output from python at any time while ((n_remain != 0 && !is_antiprompt) || params.interactive) { // predict if (!embd.empty()) { @@ -706,7 +706,7 @@ int main(int argc, char ** argv) { // just print the whole thing const std::string last_output1 = output_ss.str(); printf("%s",last_output1.c_str()); - const std::string last_output = process_output_plugin(last_output1); + last_output = process_output_plugin(last_output1); printf("%s",last_output.c_str()); // if not currently processing queued inputs; @@ -716,7 +716,7 @@ int main(int argc, char ** argv) { const int n_prev = 32; const std::string last_output1 = llama_sampling_prev_str(ctx_sampling, ctx, n_prev); // now plugin the python : - const std::string last_output = process_output_plugin(last_output1); + const std::string partial_output = process_output_plugin(last_output1); is_antiprompt = false; // Check if each of the reverse prompts appears at the end of the output. @@ -783,11 +783,11 @@ int main(int argc, char ** argv) { console::set_display(console::user_input); std::string line; - bool another_line = true; - do { - another_line = console::readline(line, params.multiline_input); - buffer += line; - } while (another_line); + //bool another_line = true; + //do { + // another_line = console::readline(line, params.multiline_input); + buffer += last_output; + //} while (another_line); // done taking input, reset color console::set_display(console::reset);