From c1e0de32cc96b9712904ab9186b042521a21cbc9 Mon Sep 17 00:00:00 2001 From: imotai Date: Sun, 5 Nov 2023 15:53:43 +0800 Subject: [PATCH] feat:support llama agent --- agent/src/og_agent/agent_builder.py | 1 - agent/src/og_agent/base_agent.py | 30 +++++--- agent/src/og_agent/llama_agent.py | 105 +++++++++++++++------------- agent/src/og_agent/llama_client.py | 1 + agent/src/og_agent/openai_agent.py | 4 +- agent/src/og_agent/prompt.py | 69 ++++-------------- agent/tests/openai_agent_tests.py | 40 ----------- 7 files changed, 93 insertions(+), 157 deletions(-) diff --git a/agent/src/og_agent/agent_builder.py b/agent/src/og_agent/agent_builder.py index 9589976..9f7bb52 100644 --- a/agent/src/og_agent/agent_builder.py +++ b/agent/src/og_agent/agent_builder.py @@ -5,7 +5,6 @@ """ """ import json -from .prompt import OCTOGEN_FUNCTION_SYSTEM, OCTOGEN_CODELLAMA_SYSTEM from .llama_agent import LlamaAgent from .openai_agent import OpenaiAgent from .llama_client import LlamaClient diff --git a/agent/src/og_agent/base_agent.py b/agent/src/og_agent/base_agent.py index f915fe5..97a8286 100644 --- a/agent/src/og_agent/base_agent.py +++ b/agent/src/og_agent/base_agent.py @@ -61,7 +61,7 @@ class TypingState: CODE = 2 LANGUAGE = 3 MESSAGE = 4 - + OTHER = 5 class BaseAgent: @@ -70,7 +70,7 @@ def __init__(self, sdk): self.model_name = "" self.agent_memories = {} - def create_new_memory_with_default_prompt(self, user_name, user_id): + def create_new_memory_with_default_prompt(self, user_name, user_id, actions = ACTIONS): """ create a new memory for the user """ @@ -79,7 +79,7 @@ def create_new_memory_with_default_prompt(self, user_name, user_id): agent_prompt = AgentPrompt( role=ROLE, rules=RULES, - actions=ACTIONS, + actions=actions, output_format=OUTPUT_FORMAT, ) agent_memory = MemoryAgentMemory(memory_id, user_name, user_id) @@ -127,7 +127,7 @@ def _parse_arguments( parse the partial key with string value from json """ if is_code: - return TypingState.CODE, "", arguments, "python" + return TypingState.CODE, "", arguments, "python", "" state = TypingState.START explanation_str = "" code_str = "" @@ -142,6 +142,15 @@ def _parse_arguments( if state == TypingState.CODE and token[0] == 1: code_str = token[1] state = TypingState.START + if state == TypingState.LANGUAGE and token[0] == 1: + language_str = token[1] + state = TypingState.START + if state == TypingState.MESSAGE and token[0] == 1: + message_str = token[1] + state = TypingState.START + if state == TypingState.OTHER and token[0] == 1: + state = TypingState.START + if token[1] == "explanation": state = TypingState.EXPLANATION if token[1] == "code": @@ -150,6 +159,8 @@ def _parse_arguments( state = TypingState.LANGUAGE if token[1] == "message": state = TypingState.MESSAGE + if token[1] == "saved_filenames": + state = TypingState.OTHER else: # String if token_state == 9 and state == TypingState.EXPLANATION: @@ -210,10 +221,8 @@ async def _read_json_message( task_context, task_opt, ): - arguments = message.get("content", "") - typing_language = "text" return await self._send_typing_message( - arguments, + message.get("content", ""), queue, old_text_content, old_code_content, @@ -229,8 +238,8 @@ async def _send_typing_message( queue, old_text_content, old_code_content, - old_language_str, old_message_str, + old_language_str, task_context, task_opt, is_code=False, @@ -247,8 +256,9 @@ async def _send_typing_message( ) = self._parse_arguments(arguments, is_code) logger.debug( - f"argument explanation:{explanation_str} code:{code_str} language_str:{language_str} text_content:{old_text_content}" + f"argument explanation:{explanation_str} code:{code_str} language_str:{language_str} text_content:{old_text_content} old_message_str:{old_message_str}" ) + if explanation_str and old_text_content != explanation_str: typed_chars = explanation_str[len(old_text_content) :] new_text_content = explanation_str @@ -301,6 +311,7 @@ async def _send_typing_message( context_id=task_context.context_id, ) ) + return old_text_content, old_code_content, old_language_str, message_str return old_text_content, old_code_content, old_language_str, old_message_str @@ -375,6 +386,7 @@ async def extract_message( response_token_count + context_output_token_count ) if is_json_format: + ( new_text_content, new_code_content, diff --git a/agent/src/og_agent/llama_agent.py b/agent/src/og_agent/llama_agent.py index f1ee849..87c412b 100644 --- a/agent/src/og_agent/llama_agent.py +++ b/agent/src/og_agent/llama_agent.py @@ -13,8 +13,9 @@ from .llama_client import LlamaClient from og_proto.agent_server_pb2 import OnStepActionStart, TaskResponse, OnStepActionEnd, FinalAnswer, TypingContent from .base_agent import BaseAgent, TypingState, TaskContext +from og_memory.memory import AgentMemoryOption +from .prompt import FUNCTION_DIRECT_MESSAGE, FUNCTION_EXECUTE from .tokenizer import tokenize -from .prompt import OCTOGEN_CODELLAMA_SYSTEM import tiktoken logger = logging.getLogger(__name__) @@ -26,12 +27,16 @@ class LlamaAgent(BaseAgent): def __init__(self, client, kernel_sdk): super().__init__(kernel_sdk) self.client = client + self.memory_option = AgentMemoryOption( + show_function_instruction=True, disable_output_format=False + ) def _output_exception(self): return ( "Sorry, the LLM did return nothing, You can use a better performance model" ) + def _format_output(self, json_response): """ format the response and send it to the user @@ -94,7 +99,7 @@ async def handle_bash_code( state=task_context.to_context_state_proto(), response_type=TaskResponse.OnStepActionStart, on_step_action_start=OnStepActionStart( - input=tool_input, tool="execute_bash_code" + input=tool_input, tool="execute" ), ) ) @@ -108,7 +113,7 @@ async def handle_bash_code( await queue.put(respond) return function_result - async def handle_function( + async def handle_python_function( self, json_response, queue, context, task_context, task_opt ): code = json_response["code"] @@ -125,7 +130,7 @@ async def handle_function( state=task_context.to_context_state_proto(), response_type=TaskResponse.OnStepActionStart, on_step_action_start=OnStepActionStart( - input=tool_input, tool=json_response["action"] + input=tool_input, tool='execute' ), ) ) @@ -139,18 +144,19 @@ async def handle_function( await queue.put(respond) return function_result - async def call_llama(self, messages, queue, context, task_context, task_opt): + async def call_llama(self, agent_memory, queue, context, task_context, task_opt): """ call llama api """ input_token_count = 0 + messages = agent_memory.to_messages() for message in messages: if not message["content"]: continue input_token_count += len(encoding.encode(message["content"])) task_context.input_token_count += input_token_count start_time = time.time() - response = self.client.chat(messages, "codellama", max_tokens=2048) + response = self.client.chat(messages, "llama", max_tokens=2048) message = await self.extract_message( response, queue, @@ -162,19 +168,39 @@ async def call_llama(self, messages, queue, context, task_context, task_opt): ) return message - async def arun(self, question, queue, context, task_opt): + async def arun(self, request, queue, context, task_opt): """ run the agent """ - messages = [ - {"role": "system", "content": OCTOGEN_CODELLAMA_SYSTEM}, + question = request.task + context_id = ( + request.context_id + if request.context_id + else self.create_new_memory_with_default_prompt("", "", actions=[FUNCTION_EXECUTE, + FUNCTION_DIRECT_MESSAGE]) + ) + + if context_id not in self.agent_memories: + await queue.put( + TaskResponse( + state=task_context.to_context_state_proto(), + response_type=TaskResponse.OnSystemError, + error_msg="invalid context id", + context_id=context_id, + ) + ) + return + + agent_memory = self.agent_memories[context_id] + agent_memory.update_options(self.memory_option) + agent_memory.append_chat_message( {"role": "user", "content": question}, - ] + ) task_context = TaskContext( start_time=time.time(), output_token_count=0, input_token_count=0, - llm_name="codellama", + llm_name="llama", llm_respond_duration=0, ) try: @@ -198,7 +224,7 @@ async def arun(self, question, queue, context, task_opt): ) break message = await self.call_llama( - messages, + agent_memory, queue, context, task_context, @@ -225,21 +251,20 @@ async def arun(self, question, queue, context, task_opt): ) ) break - logger.debug(f" llama response {json_response}") if ( - json_response["action"] - in ["execute_python_code", "execute_bash_code"] - and json_response["code"] + 'function_call'in json_response and json_response["function_call"] == "execute" ): - messages.append(message) + agent_memory.append_chat_message(message) tools_mapping = { - "execute_python_code": self.handle_function, - "execute_bash_code": self.handle_bash_code, + "python": self.handle_python_function, + "bash": self.handle_bash_code, } - function_result = await tools_mapping[json_response["action"]]( - json_response, queue, context, task_context, task_opt + + function_result = await tools_mapping[json_response["arguments"]['language']]( + json_response['arguments'], queue, context, task_context, task_opt ) + logger.debug(f"the function result {function_result}") await queue.put( TaskResponse( @@ -255,52 +280,36 @@ async def arun(self, question, queue, context, task_opt): ), ) ) - - action_output = "the output of %s:" % json_response["action"] + action_output = "the output of %s:" % json_response["function_call"] current_question = "Give me the final answer summary if the above output of action meets the goal Otherwise try a new step" if function_result.has_result: - messages.append({ + agent_memory.append_chat_message({ "role": "user", "content": f"{action_output} \n {function_result.console_stdout}", }) - messages.append({"role": "user", "content": current_question}) + agent_memory.append_chat_message({"role": "user", "content": current_question}) elif function_result.has_error: - messages.append({ + agent_memory.append_chat_message({ "role": "user", "content": f"{action_output} \n {function_result.console_stderr}", }) current_question = f"Generate a new step to fix the above error" - messages.append({"role": "user", "content": current_question}) + agent_memory.append_chat_message({"role": "user", "content": current_question}) else: - messages.append({ + agent_memory.append_chat_message({ "role": "user", "content": f"{action_output} \n {function_result.console_stdout}", }) - messages.append({"role": "user", "content": current_question}) - elif ( - json_response["action"] == "show_sample_code" - and json_response["code"] - ): - await self.handle_show_sample_code( - json_response, queue, context, task_context - ) - result = self._format_output(json_response) - await queue.put( - TaskResponse( - state=task_context.to_context_state_proto(), - response_type=TaskResponse.OnFinalAnswer, - final_answer=FinalAnswer(answer=result), - ) - ) - break - else: - result = self._format_output(json_response) + agent_memory.append_chat_message({ + "role": "user", "content": current_question}) + elif 'function_call' in json_response and json_response["function_call"] == "direct_message": + message = json_response['arguments']['message'] await queue.put( TaskResponse( state=task_context.to_context_state_proto(), response_type=TaskResponse.OnFinalAnswer, final_answer=FinalAnswer( - answer=result if not task_opt.streaming else "" + answer=message if not task_opt.streaming else "" ), ) ) diff --git a/agent/src/og_agent/llama_client.py b/agent/src/og_agent/llama_client.py index 3b21820..234f247 100644 --- a/agent/src/og_agent/llama_client.py +++ b/agent/src/og_agent/llama_client.py @@ -37,6 +37,7 @@ async def chat(self, messages, model, temperature=0, max_tokens=1024, stop=[]): continue try: content = line[6:] + logger.debug(f"llama response content: {content}") message = json.loads(content) yield message except Exception as e: diff --git a/agent/src/og_agent/openai_agent.py b/agent/src/og_agent/openai_agent.py index f6901d9..c845812 100644 --- a/agent/src/og_agent/openai_agent.py +++ b/agent/src/og_agent/openai_agent.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: 2023 imotai +# SPDX-FileCopyrightText: 2023 ghf5t565698```\\\\\\\\\-=[-[9oi86y53e12motai # SPDX-FileContributor: imotai # # SPDX-License-Identifier: Elastic-2.0 @@ -29,7 +29,7 @@ def __init__(self, model, sdk, is_azure=True): self.is_azure = is_azure self.model_name = model if not is_azure else "" self.memory_option = AgentMemoryOption( - show_function_instruction=False, disable_output_forat=True + show_function_instruction=False, disable_output_format=True ) async def call_openai(self, agent_memory, queue, context, task_context, task_opt): diff --git a/agent/src/og_agent/prompt.py b/agent/src/og_agent/prompt.py index 6f04cc2..82b6a15 100644 --- a/agent/src/og_agent/prompt.py +++ b/agent/src/og_agent/prompt.py @@ -17,16 +17,15 @@ "Use `execute` action to execute any code and `direct_message` action to send message to user", ] -ACTIONS = [ - ActionDesc( +FUNCTION_EXECUTE= ActionDesc( name="execute", - desc="This action executes code in your programming environment and returns the output. You must verify the output before giving the final answer.", + desc="This action executes code in your programming environment and returns the output", parameters=json.dumps({ "type": "object", "properties": { "explanation": { "type": "string", - "description": "the explanation about the bash code", + "description": "the explanation about the code parameters", }, "code": { "type": "string", @@ -44,8 +43,9 @@ }, "required": ["explanation", "code", "language"], }), - ), - ActionDesc( + ) + +FUNCTION_DIRECT_MESSAGE= ActionDesc( name="direct_message", desc="This action sends a direct message to user.", parameters=json.dumps({ @@ -58,60 +58,15 @@ }, "required": ["message"], }), - ), +) + +ACTIONS = [ + FUNCTION_EXECUTE ] OUTPUT_FORMAT = """The output format must be a JSON format with the following fields: -* message | function_call : The message to be displayed to the user. If the message is a function call, the function will be executed and the output will be displayed to the user. -* arguments: The arguments of the function call with the following fields, If the content is not a function call, this field should be empty object - * name (string): The name of the action - * explanation (string): The explanation about the code - * code (string): The sample code , python code or bash code to be executed for the action or an empty string if no action is specified - * saved_filenames (list of strings): A list of filenames that were created by the action input. - * language (string): The programming language used to execute the action. -""" - -OCTOGEN_FUNCTION_SYSTEM = """Firstly,You are the Programming Copilot called **Octogen**, a large language model designed to complete any goal by **executing code** - -Secondly, Being an expert in programming, you must follow the rules -* To complete the goal, write a plan and execute it step-by-step, limiting the number of steps to five. the following are examples - * The data visualization plan involves previewing, cleaning, and processing the data to generate the chart. -* Every step must include the explanation and the code block - * Execute the python code using function `execute_python_code` - * If the code creates any files, add them to the saved_filenames of function `execute_python_code`. - * If the code has any display data, save it as a file and add it to the saved_filenames of function `execute_python_code` -* You must try to correct your code when you get errors from the output -* Your code should produce output in Markdown format. For instance, if you're using a Pandas DataFrame to display data, make sure to utilize the to_markdown function. -* You must preview one row of the data when using pandas to process data - -Thirdly, the programming environment used to execute code has the following capabilities -* Internet connection: This allows the programming environment to access online resources, such as documentation, libraries, and code repositories. -* IPython kernel: This allows the programming environment to execute Python code -""" - -OCTOGEN_CODELLAMA_SYSTEM = """Firstly,You are the Programming Copilot called **Octogen**, a large language model designed to complete any goal by **executing code** - -Secondly, Being an expert in programming, you must follow the rules -* To achieve your goal, write a plan, execute it step-by-step, limiting the number of steps to five, and set `is_final_answer` to `true` for the last step. -* Every step must include an action with the explanation, the code block -* Ensure that the output of action meets the goal before providing the final answer. -* Your code should produce output in Markdown format. For instance, if you're using a Pandas DataFrame to display data, make sure to utilize the to_markdown function. - -Thirdly, the following actions are available: - -* execute_python_code: This action executes Python code and returns the output. You must verify the output before giving the final answer. -* execute_bash_code: This action executes Bash code and returns the output. You must verify the output before giving the final answer. -* show_sample_code: This action show the sample code for user. You must set the sample code to action_input -* no_action: This action does nothing. - - -Fourthly, the output format must be a JSON format with the following fields: -* explanation (string): The explanation about the action input -* action (string): The name of the action. -* code (string): The sample code , python code or base code to be executed for the action or an empty string if no action is specified -* saved_filenames (list of strings): A list of filenames that were created by the action input. -* language (string): The programming language used to execute the action. -* is_final_answer (boolean): Whether this is the final answer to the question. If it is, the value of this field should be true. Otherwise, the value should be false. +* function_call: The name of the action +* arguments: The arguments of the action """ OCTOGEN_CODELLAMA_MID_INS = """The above output of the %s determines whether the execution is successful. diff --git a/agent/tests/openai_agent_tests.py b/agent/tests/openai_agent_tests.py index d5926a0..3d30907 100644 --- a/agent/tests/openai_agent_tests.py +++ b/agent/tests/openai_agent_tests.py @@ -158,46 +158,6 @@ async def test_openai_agent_call_execute_bash_code(mocker, kernel_sdk): assert console_output[0].console_stdout == "hello world\n", "bad console output" -@pytest.mark.asyncio -async def test_openai_agent_direct_message(mocker, kernel_sdk): - kernel_sdk.connect() - arguments = { - "message": "hello world", - } - stream1 = FunctionCallPayloadStream("direct_message", json.dumps(arguments)) - call_mock = MultiCallMock([stream1]) - with mocker.patch( - "og_agent.openai_agent.openai.ChatCompletion.acreate", - side_effect=call_mock.call, - ) as mock_openai: - agent = openai_agent.OpenaiAgent("gpt4", kernel_sdk, is_azure=False) - queue = asyncio.Queue() - task_opt = ProcessOptions( - streaming=False, - llm_name="gpt4", - input_token_limit=100000, - output_token_limit=100000, - timeout=5, - ) - request = ProcessTaskRequest( - input_files=[], - task="say hello world", - context_id="", - options=task_opt, - ) - await agent.arun(request, queue, MockContext(), task_opt) - responses = [] - while True: - try: - response = await queue.get() - if not response: - break - responses.append(response) - except asyncio.QueueEmpty: - break - logger.info(responses) - assert responses[0].final_answer.answer == "hello world" - @pytest.mark.asyncio async def test_openai_agent_call_execute_python_code(mocker, kernel_sdk):