diff --git a/README.md b/README.md
index d42b8a1bfc708..57f2e0c1af1dc 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,10 @@
+# llama.cpp python hack
+
+`./bin/main -m ~/.ollama/models/mistral --interactive -r STOP -p 'WHat is a tensor?'`
+
+Will call embedding.py and then if the plugin ends in stop the results will start a new prompt for the llm.
+
+
 # llama.cpp
 
 ![llama](https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png)
diff --git a/embedding.py b/embedding.py
index d6ff4fc2bf346..5d95f663f62dd 100644
--- a/embedding.py
+++ b/embedding.py
@@ -1,2 +1,6 @@
-print("hello llama.cpp" + llm_input)
-llm_output = "Is it because of your mother that " + llm_input + "?";
+print("hello llama.cpp, got input:\n" + llm_input  + "\n")
+
+if len(llm_input) > 20:
+    llm_output = "Reinterpret with emojis " + llm_input + "?\nSTOP";
+else:
+    llm_output =  llm_input
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 6e69658176213..4cd63650c3fd7 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -497,7 +497,7 @@ int main(int argc, char ** argv) {
 
     struct llama_sampling_context * ctx_sampling = llama_sampling_init(sparams);
     //print_fields(*ctx_sampling);
-    
+    std::string last_output; // the output from python at any time    
     while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
         // predict
         if (!embd.empty()) {
@@ -706,7 +706,7 @@ int main(int argc, char ** argv) {
 	// just print the whole thing       	
 	const std::string last_output1 = output_ss.str();
 	printf("%s",last_output1.c_str());
-	const std::string last_output = process_output_plugin(last_output1);
+	last_output = process_output_plugin(last_output1);
 	printf("%s",last_output.c_str());
 		    
         // if not currently processing queued inputs;
@@ -716,7 +716,7 @@ int main(int argc, char ** argv) {
                 const int n_prev = 32;
                 const std::string last_output1 = llama_sampling_prev_str(ctx_sampling, ctx, n_prev);
 		// now plugin the python :
-		const std::string last_output = process_output_plugin(last_output1);
+		const std::string partial_output = process_output_plugin(last_output1);
 
                 is_antiprompt = false;
                 // Check if each of the reverse prompts appears at the end of the output.
@@ -783,11 +783,11 @@ int main(int argc, char ** argv) {
                 console::set_display(console::user_input);
 
                 std::string line;
-                bool another_line = true;
-                do {
-                    another_line = console::readline(line, params.multiline_input);
-                    buffer += line;
-                } while (another_line);
+                //bool another_line = true;
+                //do {
+		//  another_line = console::readline(line, params.multiline_input);
+		buffer += last_output;
+		//} while (another_line);
 
                 // done taking input, reset color
                 console::set_display(console::reset);