From 7f000734767703b31ca79f67a8f1bc6ec14d4350 Mon Sep 17 00:00:00 2001 From: Kris Wilson Date: Thu, 26 Sep 2024 19:11:03 -0700 Subject: [PATCH 1/7] Initial sketch of ell.interactive. --- src/ell/lmp/interactive.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 src/ell/lmp/interactive.py diff --git a/src/ell/lmp/interactive.py b/src/ell/lmp/interactive.py new file mode 100644 index 00000000..0d5759e0 --- /dev/null +++ b/src/ell/lmp/interactive.py @@ -0,0 +1,33 @@ +from contextlib import contextmanager + +from .complex import complex as ell_complex +from ..types import Chat + + +@contextmanager +def interactive(lmp, messages: List[Message]): + """Creates an interactive, append-mode session on top of an LMP function.""" + + @ell_complex(*args, **kwargs) + def interactive(messages: Chat) -> Chat: + return messages + + class _InteractiveSession(): + def __init__(self): + self._system_prompt = None + self._messages = messages[:] + + def set_system_prompt(self, prompt): + self._system_prompt = prompt + + def send(self, message = None): + if message: + self._messages.append(message) + + return interactive( + [self._system_prompt] + self._messages + ) + + sess = _InteractiveSession() + + yield session From 02042089a527447ef1ffef572784c8a3d6cb29c1 Mon Sep 17 00:00:00 2001 From: Kris Wilson Date: Fri, 27 Sep 2024 02:54:47 -0700 Subject: [PATCH 2/7] Add a working E2E example for ell.interactive. --- examples/interactive_tool_diff.py | 93 +++++++++++++++++++++++++++++++ src/ell/__init__.py | 1 + src/ell/lmp/interactive.py | 49 +++++++++------- 3 files changed, 123 insertions(+), 20 deletions(-) create mode 100644 examples/interactive_tool_diff.py diff --git a/examples/interactive_tool_diff.py b/examples/interactive_tool_diff.py new file mode 100644 index 00000000..b8bab928 --- /dev/null +++ b/examples/interactive_tool_diff.py @@ -0,0 +1,93 @@ +import logging +import subprocess +from datetime import datetime +from pathlib import Path +from textwrap import dedent +from typing import List + +import anthropic +import ell +from pydantic import Field + + +logger = logging.getLogger(__name__) + +client = anthropic.Anthropic() + + +def _validate_diff(diff: str) -> subprocess.CompletedProcess: + logger.info(f"Validating diff: {diff}") + return subprocess.run( + ["patch", "-p1", "--dry-run"], + input=diff.encode("utf-8"), + capture_output=True, + check=False + ) + + +@ell.tool() +def apply_diff( + diff: str = Field(description="The unified diff to apply."), +) -> str | None: + """Applies a unified diff to a local workspace using `patch -p1` and returns a natural language result.""" + logger.info(f"Tool call: apply_diff") + result = _validate_diff(diff) + # TODO(kwlzn): Can we send a structured output to the LLM with e.g. tool_call_result.exit_code and stdout/stderr for it to natively interpret? + if result.returncode == 0: + logger.info("Tool call: apply_diff succeeded") + return f"Patch applied successfully: {result.stdout.decode()}" + else: + logger.warning("Tool call: apply_diff failed") + # Provide context to the LLM on the failure by proxying the output of `patch -p1`. + return f"That patch is invalid, can you try again with the correct diff syntax? Here's the output of `patch -p1`:\n{result.stderr.decode()}" + + +def diff_loop(prompts: str, glob: str, repo: str = ".", max_loops: int = 3): + client = anthropic.Anthropic() + system_prompt = dedent("""\ + You are a helpful, expert-level programmer that generates Python code changes to an existing codebase given a request. + Your changes will be written to the filesystem using relative paths. You are in the root directory of the repository. + Apply the changes by calling the `apply_diff` tool with a valid unified diff (like `diff` or `git diff` would generate). + Use chain-of-thought reasoning to generate the code and explain your work in your response. + Carefully analyze any tool call results for any syntax issues and make sure to correct them in your response. + """) + repo_path = Path(repo) + code_file = next(repo_path.glob(glob)).relative_to(repo_path) + code = f"\n{code_file.read_text()}\n" + + with ell.interactive( + model="claude-3-5-sonnet-20240620", + client=client, + tools=[apply_diff], + max_tokens=1024, + temperature=0.5 # This seems to make Claude fail a few times before getting it right. + ) as session: + # Set the system prompt without making a request. + session.set_system_prompt(system_prompt) + + for i, prompt in enumerate(prompts): + # Send the code context on the first message, but not subsequent ones. + if i == 0: prompt = f"{code}\n\n{prompt}" + session.send(prompt) + + +def main(): + logging.basicConfig( + format='%(asctime)s %(levelname)-8s] %(message)s', + level=logging.INFO, + datefmt='%Y-%m-%d %H:%M:%S' + ) + + ell.init(verbose=True, store="./ell_logs") + + diff_loop( + prompts=[ + "Add simple argument parsing to interactive_tool_diff.py so that a user can modify the max_loops parameter to the diff_loop function call with a -m flag when invoking from the CLI." + ], + glob="**/interactive_tool_diff.py", + max_loops=3 + ) + + +if __name__ == "__main__": + main() diff --git a/src/ell/__init__.py b/src/ell/__init__.py index e7bd022a..2001e9e2 100644 --- a/src/ell/__init__.py +++ b/src/ell/__init__.py @@ -7,6 +7,7 @@ from ell.lmp.simple import simple from ell.lmp.tool import tool from ell.lmp.complex import complex +from ell.lmp.interactive import interactive from ell.types.message import system, user, assistant, Message, ContentBlock from ell.__version__ import __version__ diff --git a/src/ell/lmp/interactive.py b/src/ell/lmp/interactive.py index 0d5759e0..f67d2c01 100644 --- a/src/ell/lmp/interactive.py +++ b/src/ell/lmp/interactive.py @@ -1,33 +1,42 @@ from contextlib import contextmanager from .complex import complex as ell_complex -from ..types import Chat +from ..types.message import system as ell_system, user as ell_user @contextmanager -def interactive(lmp, messages: List[Message]): - """Creates an interactive, append-mode session on top of an LMP function.""" +def interactive(*args, **kwargs): + """A contextmanager that creates an interactive, append-mode session using an inline LMP function.""" - @ell_complex(*args, **kwargs) - def interactive(messages: Chat) -> Chat: - return messages + # TODO(kwlzn): Should this be specified/impl'd a different way for better viz/tracking in ell studio? + @ell_complex(*args, **kwargs) + def interactive(messages): + return messages - class _InteractiveSession(): - def __init__(self): - self._system_prompt = None - self._messages = messages[:] + class _InteractiveSession(): + def __init__(self): + self._system_prompt = None + self._messages = [] + self._last_response = None - def set_system_prompt(self, prompt): - self._system_prompt = prompt + def set_system_prompt(self, prompt): + self._system_prompt = ell_system(prompt) - def send(self, message = None): - if message: - self._messages.append(message) + def send(self, message = None): + if message: + self._messages.append(ell_user(message)) - return interactive( - [self._system_prompt] + self._messages - ) + # Invoke the LMP function. + self._last_response = interactive([self._system_prompt] + self._messages) - sess = _InteractiveSession() + # Append the role="assistant" response to the messages. + self._messages.append(self._last_response) - yield session + # If we have a tool call, invoke it and append the tool call result as a user message. + if (tool_call_messages := self._last_response.call_tools_and_collect_as_message()): + self._messages.append(tool_call_messages) + self.send() + + return self._last_response + + yield _InteractiveSession() From e1c4bec8bf288b9cb366ac245a1d7152033ebd0c Mon Sep 17 00:00:00 2001 From: Kris Wilson Date: Fri, 27 Sep 2024 02:55:49 -0700 Subject: [PATCH 3/7] Cleanup. --- examples/interactive_tool_diff.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/examples/interactive_tool_diff.py b/examples/interactive_tool_diff.py index b8bab928..f4a0b017 100644 --- a/examples/interactive_tool_diff.py +++ b/examples/interactive_tool_diff.py @@ -12,8 +12,6 @@ logger = logging.getLogger(__name__) -client = anthropic.Anthropic() - def _validate_diff(diff: str) -> subprocess.CompletedProcess: logger.info(f"Validating diff: {diff}") @@ -43,17 +41,18 @@ def apply_diff( def diff_loop(prompts: str, glob: str, repo: str = ".", max_loops: int = 3): + repo_path = Path(repo) + code_file = next(repo_path.glob(glob)).relative_to(repo_path) + code = f"\n{code_file.read_text()}\n" + client = anthropic.Anthropic() + system_prompt = dedent("""\ You are a helpful, expert-level programmer that generates Python code changes to an existing codebase given a request. Your changes will be written to the filesystem using relative paths. You are in the root directory of the repository. Apply the changes by calling the `apply_diff` tool with a valid unified diff (like `diff` or `git diff` would generate). Use chain-of-thought reasoning to generate the code and explain your work in your response. - Carefully analyze any tool call results for any syntax issues and make sure to correct them in your response. """) - repo_path = Path(repo) - code_file = next(repo_path.glob(glob)).relative_to(repo_path) - code = f"\n{code_file.read_text()}\n" with ell.interactive( model="claude-3-5-sonnet-20240620", From 8f966aab33d39f6f703f31d91714561497a0f97b Mon Sep 17 00:00:00 2001 From: Kris Wilson Date: Fri, 27 Sep 2024 03:36:54 -0700 Subject: [PATCH 4/7] Improvements. --- examples/interactive_tool_diff.py | 11 ++++++++--- src/ell/lmp/interactive.py | 16 ++++++++-------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/examples/interactive_tool_diff.py b/examples/interactive_tool_diff.py index f4a0b017..a428a2fe 100644 --- a/examples/interactive_tool_diff.py +++ b/examples/interactive_tool_diff.py @@ -50,7 +50,9 @@ def diff_loop(prompts: str, glob: str, repo: str = ".", max_loops: int = 3): system_prompt = dedent("""\ You are a helpful, expert-level programmer that generates Python code changes to an existing codebase given a request. Your changes will be written to the filesystem using relative paths. You are in the root directory of the repository. - Apply the changes by calling the `apply_diff` tool with a valid unified diff (like `diff` or `git diff` would generate). + Test application of the changes by calling the `apply_diff` tool with a valid unified diff (like `diff` or `git diff` would generate). + This will store the patch, but won't apply it to the local filesystem - so always generate a completely new patch for every request. + ONLY generate code through the `apply_diff` tool. Don't output any code in the response - IT WILL BE IGNORED. Use chain-of-thought reasoning to generate the code and explain your work in your response. """) @@ -59,7 +61,7 @@ def diff_loop(prompts: str, glob: str, repo: str = ".", max_loops: int = 3): client=client, tools=[apply_diff], max_tokens=1024, - temperature=0.5 # This seems to make Claude fail a few times before getting it right. + temperature=0.3 ) as session: # Set the system prompt without making a request. session.set_system_prompt(system_prompt) @@ -81,7 +83,10 @@ def main(): diff_loop( prompts=[ - "Add simple argument parsing to interactive_tool_diff.py so that a user can modify the max_loops parameter to the diff_loop function call with a -m flag when invoking from the CLI." + "Add a simple argument parsing routine to interactive_tool_diff.py that provides a --help argument.", + "Now extend the argument parsing so the user can specify a model name that will be printed when the file is invoked. Make it default to gpt4o-mini.", + "Now modify the diff_loop function in interactive_tool_diff.py to accept a model parameter that is passed via this CLI arg", + "Now make the default argument for the model name be: claude-3-5-sonnet-20240620." ], glob="**/interactive_tool_diff.py", max_loops=3 diff --git a/src/ell/lmp/interactive.py b/src/ell/lmp/interactive.py index f67d2c01..1081f757 100644 --- a/src/ell/lmp/interactive.py +++ b/src/ell/lmp/interactive.py @@ -17,7 +17,6 @@ class _InteractiveSession(): def __init__(self): self._system_prompt = None self._messages = [] - self._last_response = None def set_system_prompt(self, prompt): self._system_prompt = ell_system(prompt) @@ -27,16 +26,17 @@ def send(self, message = None): self._messages.append(ell_user(message)) # Invoke the LMP function. - self._last_response = interactive([self._system_prompt] + self._messages) + response = interactive([self._system_prompt] + self._messages) # Append the role="assistant" response to the messages. - self._messages.append(self._last_response) + self._messages.append(response) - # If we have a tool call, invoke it and append the tool call result as a user message. - if (tool_call_messages := self._last_response.call_tools_and_collect_as_message()): - self._messages.append(tool_call_messages) - self.send() + # If we have tool calls, invoke them, append the tool call result as a user message and send it back to the LLM. + if response.tool_calls: + tool_call_message = response.call_tools_and_collect_as_message() + self._messages.append(tool_call_message) + return self.send() - return self._last_response + return response yield _InteractiveSession() From 32d985083f0dbfc7966cfa75e0c8cd6060f23b69 Mon Sep 17 00:00:00 2001 From: Kris Wilson Date: Fri, 27 Sep 2024 03:39:25 -0700 Subject: [PATCH 5/7] Cleanups. --- examples/interactive_tool_diff.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/interactive_tool_diff.py b/examples/interactive_tool_diff.py index a428a2fe..ac2c96a0 100644 --- a/examples/interactive_tool_diff.py +++ b/examples/interactive_tool_diff.py @@ -52,7 +52,6 @@ def diff_loop(prompts: str, glob: str, repo: str = ".", max_loops: int = 3): Your changes will be written to the filesystem using relative paths. You are in the root directory of the repository. Test application of the changes by calling the `apply_diff` tool with a valid unified diff (like `diff` or `git diff` would generate). This will store the patch, but won't apply it to the local filesystem - so always generate a completely new patch for every request. - ONLY generate code through the `apply_diff` tool. Don't output any code in the response - IT WILL BE IGNORED. Use chain-of-thought reasoning to generate the code and explain your work in your response. """) @@ -85,7 +84,7 @@ def main(): prompts=[ "Add a simple argument parsing routine to interactive_tool_diff.py that provides a --help argument.", "Now extend the argument parsing so the user can specify a model name that will be printed when the file is invoked. Make it default to gpt4o-mini.", - "Now modify the diff_loop function in interactive_tool_diff.py to accept a model parameter that is passed via this CLI arg", + "Now modify the diff_loop function in interactive_tool_diff.py to accept a model parameter that is passed via this CLI arg.", "Now make the default argument for the model name be: claude-3-5-sonnet-20240620." ], glob="**/interactive_tool_diff.py", From 1b02591275a7508beeb468ba144e22333b5f81c7 Mon Sep 17 00:00:00 2001 From: Kris Wilson Date: Fri, 27 Sep 2024 03:43:52 -0700 Subject: [PATCH 6/7] Tweak. --- examples/interactive_tool_diff.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/interactive_tool_diff.py b/examples/interactive_tool_diff.py index ac2c96a0..1f9a708c 100644 --- a/examples/interactive_tool_diff.py +++ b/examples/interactive_tool_diff.py @@ -24,18 +24,18 @@ def _validate_diff(diff: str) -> subprocess.CompletedProcess: @ell.tool() -def apply_diff( - diff: str = Field(description="The unified diff to apply."), +def test_diff( + diff: str = Field(description="The unified diff to test."), ) -> str | None: """Applies a unified diff to a local workspace using `patch -p1` and returns a natural language result.""" - logger.info(f"Tool call: apply_diff") + logger.info(f"Tool call: test_diff") result = _validate_diff(diff) # TODO(kwlzn): Can we send a structured output to the LLM with e.g. tool_call_result.exit_code and stdout/stderr for it to natively interpret? if result.returncode == 0: - logger.info("Tool call: apply_diff succeeded") + logger.info("Tool call: test_diff succeeded") return f"Patch applied successfully: {result.stdout.decode()}" else: - logger.warning("Tool call: apply_diff failed") + logger.warning("Tool call: test_diff failed") # Provide context to the LLM on the failure by proxying the output of `patch -p1`. return f"That patch is invalid, can you try again with the correct diff syntax? Here's the output of `patch -p1`:\n{result.stderr.decode()}" @@ -50,7 +50,7 @@ def diff_loop(prompts: str, glob: str, repo: str = ".", max_loops: int = 3): system_prompt = dedent("""\ You are a helpful, expert-level programmer that generates Python code changes to an existing codebase given a request. Your changes will be written to the filesystem using relative paths. You are in the root directory of the repository. - Test application of the changes by calling the `apply_diff` tool with a valid unified diff (like `diff` or `git diff` would generate). + Test application of the changes by calling the `test_diff` tool with a valid unified diff (like `diff` or `git diff` would generate). This will store the patch, but won't apply it to the local filesystem - so always generate a completely new patch for every request. Use chain-of-thought reasoning to generate the code and explain your work in your response. """) @@ -58,9 +58,9 @@ def diff_loop(prompts: str, glob: str, repo: str = ".", max_loops: int = 3): with ell.interactive( model="claude-3-5-sonnet-20240620", client=client, - tools=[apply_diff], + tools=[test_diff], max_tokens=1024, - temperature=0.3 + temperature=0.9 ) as session: # Set the system prompt without making a request. session.set_system_prompt(system_prompt) From be0e12b2b9b106d3fa735f1f9427d7a23387ca8c Mon Sep 17 00:00:00 2001 From: Kris Wilson Date: Fri, 27 Sep 2024 04:30:57 -0700 Subject: [PATCH 7/7] Combine stderr+out for optimal feedback. --- examples/interactive_tool_diff.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/interactive_tool_diff.py b/examples/interactive_tool_diff.py index 1f9a708c..b9df5ec5 100644 --- a/examples/interactive_tool_diff.py +++ b/examples/interactive_tool_diff.py @@ -18,7 +18,8 @@ def _validate_diff(diff: str) -> subprocess.CompletedProcess: return subprocess.run( ["patch", "-p1", "--dry-run"], input=diff.encode("utf-8"), - capture_output=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, check=False ) @@ -37,7 +38,7 @@ def test_diff( else: logger.warning("Tool call: test_diff failed") # Provide context to the LLM on the failure by proxying the output of `patch -p1`. - return f"That patch is invalid, can you try again with the correct diff syntax? Here's the output of `patch -p1`:\n{result.stderr.decode()}" + return f"That patch is invalid, can you try again with the correct diff syntax? Here's the output of `patch -p1`:\n{result.stdout.decode()}" def diff_loop(prompts: str, glob: str, repo: str = ".", max_loops: int = 3): @@ -60,7 +61,7 @@ def diff_loop(prompts: str, glob: str, repo: str = ".", max_loops: int = 3): client=client, tools=[test_diff], max_tokens=1024, - temperature=0.9 + temperature=0.7 ) as session: # Set the system prompt without making a request. session.set_system_prompt(system_prompt)