From c1e0de32cc96b9712904ab9186b042521a21cbc9 Mon Sep 17 00:00:00 2001
From: imotai <codego.me@gmail.com>
Date: Sun, 5 Nov 2023 15:53:43 +0800
Subject: [PATCH] feat:support llama agent

---
 agent/src/og_agent/agent_builder.py |   1 -
 agent/src/og_agent/base_agent.py    |  30 +++++---
 agent/src/og_agent/llama_agent.py   | 105 +++++++++++++++-------------
 agent/src/og_agent/llama_client.py  |   1 +
 agent/src/og_agent/openai_agent.py  |   4 +-
 agent/src/og_agent/prompt.py        |  69 ++++--------------
 agent/tests/openai_agent_tests.py   |  40 -----------
 7 files changed, 93 insertions(+), 157 deletions(-)

diff --git a/agent/src/og_agent/agent_builder.py b/agent/src/og_agent/agent_builder.py
index 9589976..9f7bb52 100644
--- a/agent/src/og_agent/agent_builder.py
+++ b/agent/src/og_agent/agent_builder.py
@@ -5,7 +5,6 @@
 
 """ """
 import json
-from .prompt import OCTOGEN_FUNCTION_SYSTEM, OCTOGEN_CODELLAMA_SYSTEM
 from .llama_agent import LlamaAgent
 from .openai_agent import OpenaiAgent
 from .llama_client import LlamaClient
diff --git a/agent/src/og_agent/base_agent.py b/agent/src/og_agent/base_agent.py
index f915fe5..97a8286 100644
--- a/agent/src/og_agent/base_agent.py
+++ b/agent/src/og_agent/base_agent.py
@@ -61,7 +61,7 @@ class TypingState:
     CODE = 2
     LANGUAGE = 3
     MESSAGE = 4
-
+    OTHER = 5
 
 class BaseAgent:
 
@@ -70,7 +70,7 @@ def __init__(self, sdk):
         self.model_name = ""
         self.agent_memories = {}
 
-    def create_new_memory_with_default_prompt(self, user_name, user_id):
+    def create_new_memory_with_default_prompt(self, user_name, user_id, actions = ACTIONS):
         """
         create a new memory for the user
         """
@@ -79,7 +79,7 @@ def create_new_memory_with_default_prompt(self, user_name, user_id):
         agent_prompt = AgentPrompt(
             role=ROLE,
             rules=RULES,
-            actions=ACTIONS,
+            actions=actions,
             output_format=OUTPUT_FORMAT,
         )
         agent_memory = MemoryAgentMemory(memory_id, user_name, user_id)
@@ -127,7 +127,7 @@ def _parse_arguments(
         parse the partial key with string value from json
         """
         if is_code:
-            return TypingState.CODE, "", arguments, "python"
+            return TypingState.CODE, "", arguments, "python", ""
         state = TypingState.START
         explanation_str = ""
         code_str = ""
@@ -142,6 +142,15 @@ def _parse_arguments(
                 if state == TypingState.CODE and token[0] == 1:
                     code_str = token[1]
                     state = TypingState.START
+                if state == TypingState.LANGUAGE and token[0] == 1:
+                    language_str = token[1]
+                    state = TypingState.START
+                if state == TypingState.MESSAGE and token[0] == 1:
+                    message_str = token[1]
+                    state = TypingState.START
+                if state == TypingState.OTHER and token[0] == 1:
+                    state = TypingState.START
+
                 if token[1] == "explanation":
                     state = TypingState.EXPLANATION
                 if token[1] == "code":
@@ -150,6 +159,8 @@ def _parse_arguments(
                     state = TypingState.LANGUAGE
                 if token[1] == "message":
                     state = TypingState.MESSAGE
+                if token[1] == "saved_filenames":
+                    state = TypingState.OTHER
             else:
                 # String
                 if token_state == 9 and state == TypingState.EXPLANATION:
@@ -210,10 +221,8 @@ async def _read_json_message(
         task_context,
         task_opt,
     ):
-        arguments = message.get("content", "")
-        typing_language = "text"
         return await self._send_typing_message(
-            arguments,
+            message.get("content", ""),
             queue,
             old_text_content,
             old_code_content,
@@ -229,8 +238,8 @@ async def _send_typing_message(
         queue,
         old_text_content,
         old_code_content,
-        old_language_str,
         old_message_str,
+        old_language_str,
         task_context,
         task_opt,
         is_code=False,
@@ -247,8 +256,9 @@ async def _send_typing_message(
         ) = self._parse_arguments(arguments, is_code)
 
         logger.debug(
-            f"argument explanation:{explanation_str} code:{code_str} language_str:{language_str} text_content:{old_text_content}"
+            f"argument explanation:{explanation_str} code:{code_str} language_str:{language_str} text_content:{old_text_content} old_message_str:{old_message_str}"
         )
+
         if explanation_str and old_text_content != explanation_str:
             typed_chars = explanation_str[len(old_text_content) :]
             new_text_content = explanation_str
@@ -301,6 +311,7 @@ async def _send_typing_message(
                         context_id=task_context.context_id,
                     )
                 )
+
             return old_text_content, old_code_content, old_language_str, message_str
         return old_text_content, old_code_content, old_language_str, old_message_str
 
@@ -375,6 +386,7 @@ async def extract_message(
                         response_token_count + context_output_token_count
                     )
                     if is_json_format:
+
                         (
                             new_text_content,
                             new_code_content,
diff --git a/agent/src/og_agent/llama_agent.py b/agent/src/og_agent/llama_agent.py
index f1ee849..87c412b 100644
--- a/agent/src/og_agent/llama_agent.py
+++ b/agent/src/og_agent/llama_agent.py
@@ -13,8 +13,9 @@
 from .llama_client import LlamaClient
 from og_proto.agent_server_pb2 import OnStepActionStart, TaskResponse, OnStepActionEnd, FinalAnswer, TypingContent
 from .base_agent import BaseAgent, TypingState, TaskContext
+from og_memory.memory import AgentMemoryOption
+from .prompt import FUNCTION_DIRECT_MESSAGE, FUNCTION_EXECUTE
 from .tokenizer import tokenize
-from .prompt import OCTOGEN_CODELLAMA_SYSTEM
 import tiktoken
 
 logger = logging.getLogger(__name__)
@@ -26,12 +27,16 @@ class LlamaAgent(BaseAgent):
     def __init__(self, client, kernel_sdk):
         super().__init__(kernel_sdk)
         self.client = client
+        self.memory_option = AgentMemoryOption(
+            show_function_instruction=True, disable_output_format=False
+        )
 
     def _output_exception(self):
         return (
             "Sorry, the LLM did return nothing, You can use a better performance model"
         )
 
+
     def _format_output(self, json_response):
         """
         format the response and send it to the user
@@ -94,7 +99,7 @@ async def handle_bash_code(
                 state=task_context.to_context_state_proto(),
                 response_type=TaskResponse.OnStepActionStart,
                 on_step_action_start=OnStepActionStart(
-                    input=tool_input, tool="execute_bash_code"
+                    input=tool_input, tool="execute"
                 ),
             )
         )
@@ -108,7 +113,7 @@ async def handle_bash_code(
                 await queue.put(respond)
         return function_result
 
-    async def handle_function(
+    async def handle_python_function(
         self, json_response, queue, context, task_context, task_opt
     ):
         code = json_response["code"]
@@ -125,7 +130,7 @@ async def handle_function(
                 state=task_context.to_context_state_proto(),
                 response_type=TaskResponse.OnStepActionStart,
                 on_step_action_start=OnStepActionStart(
-                    input=tool_input, tool=json_response["action"]
+                    input=tool_input, tool='execute'
                 ),
             )
         )
@@ -139,18 +144,19 @@ async def handle_function(
                 await queue.put(respond)
         return function_result
 
-    async def call_llama(self, messages, queue, context, task_context, task_opt):
+    async def call_llama(self, agent_memory, queue, context, task_context, task_opt):
         """
         call llama api
         """
         input_token_count = 0
+        messages = agent_memory.to_messages()
         for message in messages:
             if not message["content"]:
                 continue
             input_token_count += len(encoding.encode(message["content"]))
         task_context.input_token_count += input_token_count
         start_time = time.time()
-        response = self.client.chat(messages, "codellama", max_tokens=2048)
+        response = self.client.chat(messages, "llama", max_tokens=2048)
         message = await self.extract_message(
             response,
             queue,
@@ -162,19 +168,39 @@ async def call_llama(self, messages, queue, context, task_context, task_opt):
         )
         return message
 
-    async def arun(self, question, queue, context, task_opt):
+    async def arun(self, request, queue, context, task_opt):
         """
         run the agent
         """
-        messages = [
-            {"role": "system", "content": OCTOGEN_CODELLAMA_SYSTEM},
+        question = request.task
+        context_id = (
+            request.context_id
+            if request.context_id
+            else self.create_new_memory_with_default_prompt("", "", actions=[FUNCTION_EXECUTE,
+                                                                             FUNCTION_DIRECT_MESSAGE])
+        )
+
+        if context_id not in self.agent_memories:
+            await queue.put(
+                TaskResponse(
+                    state=task_context.to_context_state_proto(),
+                    response_type=TaskResponse.OnSystemError,
+                    error_msg="invalid context id",
+                    context_id=context_id,
+                )
+            )
+            return
+
+        agent_memory = self.agent_memories[context_id]
+        agent_memory.update_options(self.memory_option)
+        agent_memory.append_chat_message(
             {"role": "user", "content": question},
-        ]
+        )
         task_context = TaskContext(
             start_time=time.time(),
             output_token_count=0,
             input_token_count=0,
-            llm_name="codellama",
+            llm_name="llama",
             llm_respond_duration=0,
         )
         try:
@@ -198,7 +224,7 @@ async def arun(self, question, queue, context, task_opt):
                     )
                     break
                 message = await self.call_llama(
-                    messages,
+                    agent_memory,
                     queue,
                     context,
                     task_context,
@@ -225,21 +251,20 @@ async def arun(self, question, queue, context, task_opt):
                         )
                     )
                     break
-
                 logger.debug(f" llama response {json_response}")
                 if (
-                    json_response["action"]
-                    in ["execute_python_code", "execute_bash_code"]
-                    and json_response["code"]
+                    'function_call'in json_response and json_response["function_call"] == "execute"
                 ):
-                    messages.append(message)
+                    agent_memory.append_chat_message(message)
                     tools_mapping = {
-                        "execute_python_code": self.handle_function,
-                        "execute_bash_code": self.handle_bash_code,
+                        "python": self.handle_python_function,
+                        "bash": self.handle_bash_code,
                     }
-                    function_result = await tools_mapping[json_response["action"]](
-                        json_response, queue, context, task_context, task_opt
+
+                    function_result = await tools_mapping[json_response["arguments"]['language']](
+                        json_response['arguments'], queue, context, task_context, task_opt
                     )
+
                     logger.debug(f"the function result {function_result}")
                     await queue.put(
                         TaskResponse(
@@ -255,52 +280,36 @@ async def arun(self, question, queue, context, task_opt):
                             ),
                         )
                     )
-
-                    action_output = "the output of %s:" % json_response["action"]
+                    action_output = "the output of %s:" % json_response["function_call"]
                     current_question = "Give me the final answer summary if the above output of action  meets the goal Otherwise try a new step"
                     if function_result.has_result:
-                        messages.append({
+                        agent_memory.append_chat_message({
                             "role": "user",
                             "content": f"{action_output} \n {function_result.console_stdout}",
                         })
-                        messages.append({"role": "user", "content": current_question})
+                        agent_memory.append_chat_message({"role": "user", "content": current_question})
                     elif function_result.has_error:
-                        messages.append({
+                        agent_memory.append_chat_message({
                             "role": "user",
                             "content": f"{action_output} \n {function_result.console_stderr}",
                         })
                         current_question = f"Generate a new step to fix the above error"
-                        messages.append({"role": "user", "content": current_question})
+                        agent_memory.append_chat_message({"role": "user", "content": current_question})
                     else:
-                        messages.append({
+                        agent_memory.append_chat_message({
                             "role": "user",
                             "content": f"{action_output} \n {function_result.console_stdout}",
                         })
-                        messages.append({"role": "user", "content": current_question})
-                elif (
-                    json_response["action"] == "show_sample_code"
-                    and json_response["code"]
-                ):
-                    await self.handle_show_sample_code(
-                        json_response, queue, context, task_context
-                    )
-                    result = self._format_output(json_response)
-                    await queue.put(
-                        TaskResponse(
-                            state=task_context.to_context_state_proto(),
-                            response_type=TaskResponse.OnFinalAnswer,
-                            final_answer=FinalAnswer(answer=result),
-                        )
-                    )
-                    break
-                else:
-                    result = self._format_output(json_response)
+                        agent_memory.append_chat_message({
+                            "role": "user", "content": current_question})
+                elif 'function_call' in json_response and json_response["function_call"] == "direct_message":
+                    message = json_response['arguments']['message']
                     await queue.put(
                         TaskResponse(
                             state=task_context.to_context_state_proto(),
                             response_type=TaskResponse.OnFinalAnswer,
                             final_answer=FinalAnswer(
-                                answer=result if not task_opt.streaming else ""
+                                answer=message if not task_opt.streaming else ""
                             ),
                         )
                     )
diff --git a/agent/src/og_agent/llama_client.py b/agent/src/og_agent/llama_client.py
index 3b21820..234f247 100644
--- a/agent/src/og_agent/llama_client.py
+++ b/agent/src/og_agent/llama_client.py
@@ -37,6 +37,7 @@ async def chat(self, messages, model, temperature=0, max_tokens=1024, stop=[]):
                 continue
             try:
                 content = line[6:]
+                logger.debug(f"llama response content: {content}")
                 message = json.loads(content)
                 yield message
             except Exception as e:
diff --git a/agent/src/og_agent/openai_agent.py b/agent/src/og_agent/openai_agent.py
index f6901d9..c845812 100644
--- a/agent/src/og_agent/openai_agent.py
+++ b/agent/src/og_agent/openai_agent.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2023 imotai <jackwang@octogen.dev>
+# SPDX-FileCopyrightText: 2023 ghf5t565698```\\\\\\\\\-=[-[9oi86y53e12motai <jackwang@octogen.dev>
 # SPDX-FileContributor: imotai
 #
 # SPDX-License-Identifier: Elastic-2.0
@@ -29,7 +29,7 @@ def __init__(self, model, sdk, is_azure=True):
         self.is_azure = is_azure
         self.model_name = model if not is_azure else ""
         self.memory_option = AgentMemoryOption(
-            show_function_instruction=False, disable_output_forat=True
+            show_function_instruction=False, disable_output_format=True
         )
 
     async def call_openai(self, agent_memory, queue, context, task_context, task_opt):
diff --git a/agent/src/og_agent/prompt.py b/agent/src/og_agent/prompt.py
index 6f04cc2..82b6a15 100644
--- a/agent/src/og_agent/prompt.py
+++ b/agent/src/og_agent/prompt.py
@@ -17,16 +17,15 @@
     "Use `execute` action to execute any code and `direct_message` action to send message to user",
 ]
 
-ACTIONS = [
-    ActionDesc(
+FUNCTION_EXECUTE= ActionDesc(
         name="execute",
-        desc="This action executes code in your programming environment and returns the output. You must verify the output before giving the final answer.",
+        desc="This action executes code in your programming environment and returns the output",
         parameters=json.dumps({
             "type": "object",
             "properties": {
                 "explanation": {
                     "type": "string",
-                    "description": "the explanation about the bash code",
+                    "description": "the explanation about the code parameters",
                 },
                 "code": {
                     "type": "string",
@@ -44,8 +43,9 @@
             },
             "required": ["explanation", "code", "language"],
         }),
-    ),
-    ActionDesc(
+    )
+
+FUNCTION_DIRECT_MESSAGE= ActionDesc(
         name="direct_message",
         desc="This action sends a direct message to user.",
         parameters=json.dumps({
@@ -58,60 +58,15 @@
             },
             "required": ["message"],
         }),
-    ),
+)
+
+ACTIONS = [
+    FUNCTION_EXECUTE
 ]
 
 OUTPUT_FORMAT = """The output format must be a JSON format with the following fields:
-* message | function_call : The message to be displayed to the user. If the message is a function call, the function will be executed and the output will be displayed to the user.
-* arguments: The arguments of the function call with the following fields, If the content is not a function call, this field should be empty object
-    * name (string): The name of the action
-    * explanation (string): The explanation about the code
-    * code (string): The sample code , python code or bash code to be executed for the action or an empty string if no action is specified
-    * saved_filenames (list of strings): A list of filenames that were created by the action input.
-    * language (string): The programming language used to execute the action.
-"""
-
-OCTOGEN_FUNCTION_SYSTEM = """Firstly,You are the Programming Copilot called **Octogen**, a large language model designed to complete any goal by **executing code**
-
-Secondly, Being an expert in programming, you must follow the rules
-* To complete the goal, write a plan and execute it step-by-step, limiting the number of steps to five. the following are examples
-    * The data visualization plan involves previewing, cleaning, and processing the data to generate the chart.
-* Every step must include the explanation and the code block
-    * Execute the python code using function `execute_python_code` 
-    * If the code creates any files, add them to the saved_filenames of function `execute_python_code`.
-    * If the code has any display data, save it as a file and add it to the saved_filenames of function `execute_python_code`
-* You must try to correct your code when you get errors from the output
-* Your code should produce output in Markdown format. For instance, if you're using a Pandas DataFrame to display data, make sure to utilize the to_markdown function.
-* You must preview one row of the data when using pandas to process data
-
-Thirdly, the programming environment used to execute code has the following capabilities
-* Internet connection: This allows the programming environment to access online resources, such as documentation, libraries, and code repositories.
-* IPython kernel: This allows the programming environment to execute Python code
-"""
-
-OCTOGEN_CODELLAMA_SYSTEM = """Firstly,You are the Programming Copilot called **Octogen**, a large language model designed to complete any goal by **executing code**
-
-Secondly, Being an expert in programming, you must follow the rules
-* To achieve your goal, write a plan, execute it step-by-step, limiting the number of steps to five, and set `is_final_answer` to `true` for the last step.
-* Every step must include an action with the explanation, the code block
-* Ensure that the output of action meets the goal before providing the final answer.
-* Your code should produce output in Markdown format. For instance, if you're using a Pandas DataFrame to display data, make sure to utilize the to_markdown function.
-
-Thirdly, the following actions are available:
-
-* execute_python_code: This action executes Python code and returns the output. You must verify the output before giving the final answer.
-* execute_bash_code: This action executes Bash code and returns the output. You must verify the output before giving the final answer.
-* show_sample_code: This action show the sample code for user. You must set the sample code to action_input
-* no_action: This action does nothing.
-
-
-Fourthly, the output format must be a JSON format with the following fields:
-* explanation (string): The explanation about the action input
-* action (string): The name of the action.
-* code (string): The sample code , python code or base code to be executed for the action or an empty string if no action is specified
-* saved_filenames (list of strings): A list of filenames that were created by the action input.
-* language (string): The programming language used to execute the action.
-* is_final_answer (boolean): Whether this is the final answer to the question. If it is, the value of this field should be true. Otherwise, the value should be false.
+* function_call: The name of the action
+* arguments: The arguments of the action
 """
 
 OCTOGEN_CODELLAMA_MID_INS = """The above output of the %s determines whether the execution is successful. 
diff --git a/agent/tests/openai_agent_tests.py b/agent/tests/openai_agent_tests.py
index d5926a0..3d30907 100644
--- a/agent/tests/openai_agent_tests.py
+++ b/agent/tests/openai_agent_tests.py
@@ -158,46 +158,6 @@ async def test_openai_agent_call_execute_bash_code(mocker, kernel_sdk):
         assert console_output[0].console_stdout == "hello world\n", "bad console output"
 
 
-@pytest.mark.asyncio
-async def test_openai_agent_direct_message(mocker, kernel_sdk):
-    kernel_sdk.connect()
-    arguments = {
-        "message": "hello world",
-    }
-    stream1 = FunctionCallPayloadStream("direct_message", json.dumps(arguments))
-    call_mock = MultiCallMock([stream1])
-    with mocker.patch(
-        "og_agent.openai_agent.openai.ChatCompletion.acreate",
-        side_effect=call_mock.call,
-    ) as mock_openai:
-        agent = openai_agent.OpenaiAgent("gpt4", kernel_sdk, is_azure=False)
-        queue = asyncio.Queue()
-        task_opt = ProcessOptions(
-            streaming=False,
-            llm_name="gpt4",
-            input_token_limit=100000,
-            output_token_limit=100000,
-            timeout=5,
-        )
-        request = ProcessTaskRequest(
-            input_files=[],
-            task="say hello world",
-            context_id="",
-            options=task_opt,
-        )
-        await agent.arun(request, queue, MockContext(), task_opt)
-        responses = []
-        while True:
-            try:
-                response = await queue.get()
-                if not response:
-                    break
-                responses.append(response)
-            except asyncio.QueueEmpty:
-                break
-        logger.info(responses)
-        assert responses[0].final_answer.answer == "hello world"
-
 
 @pytest.mark.asyncio
 async def test_openai_agent_call_execute_python_code(mocker, kernel_sdk):