From 7f000734767703b31ca79f67a8f1bc6ec14d4350 Mon Sep 17 00:00:00 2001
From: Kris Wilson <kw@onoku.com>
Date: Thu, 26 Sep 2024 19:11:03 -0700
Subject: [PATCH 1/7] Initial sketch of ell.interactive.

---
 src/ell/lmp/interactive.py | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 src/ell/lmp/interactive.py

diff --git a/src/ell/lmp/interactive.py b/src/ell/lmp/interactive.py
new file mode 100644
index 00000000..0d5759e0
--- /dev/null
+++ b/src/ell/lmp/interactive.py
@@ -0,0 +1,33 @@
+from contextlib import contextmanager
+
+from .complex import complex as ell_complex
+from ..types import Chat
+
+
+@contextmanager
+def interactive(lmp, messages: List[Message]):
+  """Creates an interactive, append-mode session on top of an LMP function."""
+
+  @ell_complex(*args, **kwargs)
+  def interactive(messages: Chat) -> Chat:
+    return messages
+
+  class _InteractiveSession():
+    def __init__(self):
+      self._system_prompt = None
+      self._messages = messages[:]
+
+    def set_system_prompt(self, prompt):
+      self._system_prompt = prompt
+
+    def send(self, message = None):
+      if message:
+        self._messages.append(message)
+
+      return interactive(
+        [self._system_prompt] + self._messages
+      )
+
+  sess = _InteractiveSession()
+
+  yield session

From 02042089a527447ef1ffef572784c8a3d6cb29c1 Mon Sep 17 00:00:00 2001
From: Kris Wilson <kw@onoku.com>
Date: Fri, 27 Sep 2024 02:54:47 -0700
Subject: [PATCH 2/7] Add a working E2E example for ell.interactive.

---
 examples/interactive_tool_diff.py | 93 +++++++++++++++++++++++++++++++
 src/ell/__init__.py               |  1 +
 src/ell/lmp/interactive.py        | 49 +++++++++-------
 3 files changed, 123 insertions(+), 20 deletions(-)
 create mode 100644 examples/interactive_tool_diff.py

diff --git a/examples/interactive_tool_diff.py b/examples/interactive_tool_diff.py
new file mode 100644
index 00000000..b8bab928
--- /dev/null
+++ b/examples/interactive_tool_diff.py
@@ -0,0 +1,93 @@
+import logging
+import subprocess
+from datetime import datetime
+from pathlib import Path
+from textwrap import dedent
+from typing import List
+
+import anthropic
+import ell
+from pydantic import Field
+
+
+logger = logging.getLogger(__name__)
+
+client = anthropic.Anthropic()
+
+
+def _validate_diff(diff: str) -> subprocess.CompletedProcess:
+  logger.info(f"Validating diff: {diff}")
+  return subprocess.run(
+    ["patch", "-p1", "--dry-run"],
+    input=diff.encode("utf-8"),
+    capture_output=True,
+    check=False
+  )
+
+
+@ell.tool()
+def apply_diff(
+  diff: str = Field(description="The unified diff to apply."),
+) -> str | None:
+  """Applies a unified diff to a local workspace using `patch -p1` and returns a natural language result."""
+  logger.info(f"Tool call: apply_diff")
+  result = _validate_diff(diff)
+  # TODO(kwlzn): Can we send a structured output to the LLM with e.g. tool_call_result.exit_code and stdout/stderr for it to natively interpret?
+  if result.returncode == 0:
+    logger.info("Tool call: apply_diff succeeded")
+    return f"Patch applied successfully: {result.stdout.decode()}"
+  else:
+    logger.warning("Tool call: apply_diff failed")
+    # Provide context to the LLM on the failure by proxying the output of `patch -p1`.
+    return f"That patch is invalid, can you try again with the correct diff syntax? Here's the output of `patch -p1`:\n{result.stderr.decode()}"
+
+
+def diff_loop(prompts: str, glob: str, repo: str = ".", max_loops: int = 3):
+  client = anthropic.Anthropic()
+  system_prompt = dedent("""\
+  You are a helpful, expert-level programmer that generates Python code changes to an existing codebase given a request.
+  Your changes will be written to the filesystem using relative paths. You are in the root directory of the repository.
+  Apply the changes by calling the `apply_diff` tool with a valid unified diff (like `diff` or `git diff` would generate).
+  Use chain-of-thought reasoning to generate the code and explain your work in your response.
+  Carefully analyze any tool call results for any syntax issues and make sure to correct them in your response.
+  """)
+  repo_path = Path(repo)
+  code_file = next(repo_path.glob(glob)).relative_to(repo_path)
+  code = f"<file:{code_file}>\n{code_file.read_text()}\n</file:{code_file}>"
+
+  with ell.interactive(
+    model="claude-3-5-sonnet-20240620",
+    client=client,
+    tools=[apply_diff],
+    max_tokens=1024,
+    temperature=0.5       # This seems to make Claude fail a few times before getting it right.
+  ) as session:
+    # Set the system prompt without making a request.
+    session.set_system_prompt(system_prompt)
+
+    for i, prompt in enumerate(prompts):
+      # Send the code context on the first message, but not subsequent ones.
+      if i == 0: prompt = f"{code}\n\n{prompt}"
+      session.send(prompt)
+
+
+def main():
+  logging.basicConfig(
+    format='%(asctime)s %(levelname)-8s] %(message)s',
+    level=logging.INFO,
+    datefmt='%Y-%m-%d %H:%M:%S'
+  )
+
+  ell.init(verbose=True, store="./ell_logs")
+
+  diff_loop(
+    prompts=[
+      "Add simple argument parsing to interactive_tool_diff.py so that a user can modify the max_loops parameter to the diff_loop function call with a -m flag when invoking from the CLI."
+    ],
+    glob="**/interactive_tool_diff.py",
+    max_loops=3
+  )
+
+
+if __name__ == "__main__":
+  main()
diff --git a/src/ell/__init__.py b/src/ell/__init__.py
index e7bd022a..2001e9e2 100644
--- a/src/ell/__init__.py
+++ b/src/ell/__init__.py
@@ -7,6 +7,7 @@
 from ell.lmp.simple import simple
 from ell.lmp.tool import tool
 from ell.lmp.complex import complex
+from ell.lmp.interactive import interactive
 from ell.types.message import system, user, assistant, Message, ContentBlock
 from ell.__version__ import __version__
 
diff --git a/src/ell/lmp/interactive.py b/src/ell/lmp/interactive.py
index 0d5759e0..f67d2c01 100644
--- a/src/ell/lmp/interactive.py
+++ b/src/ell/lmp/interactive.py
@@ -1,33 +1,42 @@
 from contextlib import contextmanager
 
 from .complex import complex as ell_complex
-from ..types import Chat
+from ..types.message import system as ell_system, user as ell_user
 
 
 @contextmanager
-def interactive(lmp, messages: List[Message]):
-  """Creates an interactive, append-mode session on top of an LMP function."""
+def interactive(*args, **kwargs):
+    """A contextmanager that creates an interactive, append-mode session using an inline LMP function."""
 
-  @ell_complex(*args, **kwargs)
-  def interactive(messages: Chat) -> Chat:
-    return messages
+    # TODO(kwlzn): Should this be specified/impl'd a different way for better viz/tracking in ell studio?
+    @ell_complex(*args, **kwargs)
+    def interactive(messages):
+        return messages
 
-  class _InteractiveSession():
-    def __init__(self):
-      self._system_prompt = None
-      self._messages = messages[:]
+    class _InteractiveSession():
+        def __init__(self):
+            self._system_prompt = None
+            self._messages = []
+            self._last_response = None
 
-    def set_system_prompt(self, prompt):
-      self._system_prompt = prompt
+        def set_system_prompt(self, prompt):
+            self._system_prompt = ell_system(prompt)
 
-    def send(self, message = None):
-      if message:
-        self._messages.append(message)
+        def send(self, message = None):
+            if message:
+                self._messages.append(ell_user(message))
 
-      return interactive(
-        [self._system_prompt] + self._messages
-      )
+            # Invoke the LMP function.
+            self._last_response = interactive([self._system_prompt] + self._messages)
 
-  sess = _InteractiveSession()
+            # Append the role="assistant" response to the messages.
+            self._messages.append(self._last_response)
 
-  yield session
+            # If we have a tool call, invoke it and append the tool call result as a user message.
+            if (tool_call_messages := self._last_response.call_tools_and_collect_as_message()):
+              self._messages.append(tool_call_messages)
+              self.send()
+
+            return self._last_response
+
+    yield _InteractiveSession()

From e1c4bec8bf288b9cb366ac245a1d7152033ebd0c Mon Sep 17 00:00:00 2001
From: Kris Wilson <kw@onoku.com>
Date: Fri, 27 Sep 2024 02:55:49 -0700
Subject: [PATCH 3/7] Cleanup.

---
 examples/interactive_tool_diff.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/examples/interactive_tool_diff.py b/examples/interactive_tool_diff.py
index b8bab928..f4a0b017 100644
--- a/examples/interactive_tool_diff.py
+++ b/examples/interactive_tool_diff.py
@@ -12,8 +12,6 @@
 
 logger = logging.getLogger(__name__)
 
-client = anthropic.Anthropic()
-
 
 def _validate_diff(diff: str) -> subprocess.CompletedProcess:
   logger.info(f"Validating diff: {diff}")
@@ -43,17 +41,18 @@ def apply_diff(
 
 
 def diff_loop(prompts: str, glob: str, repo: str = ".", max_loops: int = 3):
+  repo_path = Path(repo)
+  code_file = next(repo_path.glob(glob)).relative_to(repo_path)
+  code = f"<file:{code_file}>\n{code_file.read_text()}\n</file:{code_file}>"
+
   client = anthropic.Anthropic()
+
   system_prompt = dedent("""\
   You are a helpful, expert-level programmer that generates Python code changes to an existing codebase given a request.
   Your changes will be written to the filesystem using relative paths. You are in the root directory of the repository.
   Apply the changes by calling the `apply_diff` tool with a valid unified diff (like `diff` or `git diff` would generate).
   Use chain-of-thought reasoning to generate the code and explain your work in your response.
-  Carefully analyze any tool call results for any syntax issues and make sure to correct them in your response.
   """)
-  repo_path = Path(repo)
-  code_file = next(repo_path.glob(glob)).relative_to(repo_path)
-  code = f"<file:{code_file}>\n{code_file.read_text()}\n</file:{code_file}>"
 
   with ell.interactive(
     model="claude-3-5-sonnet-20240620",

From 8f966aab33d39f6f703f31d91714561497a0f97b Mon Sep 17 00:00:00 2001
From: Kris Wilson <kw@onoku.com>
Date: Fri, 27 Sep 2024 03:36:54 -0700
Subject: [PATCH 4/7] Improvements.

---
 examples/interactive_tool_diff.py | 11 ++++++++---
 src/ell/lmp/interactive.py        | 16 ++++++++--------
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/examples/interactive_tool_diff.py b/examples/interactive_tool_diff.py
index f4a0b017..a428a2fe 100644
--- a/examples/interactive_tool_diff.py
+++ b/examples/interactive_tool_diff.py
@@ -50,7 +50,9 @@ def diff_loop(prompts: str, glob: str, repo: str = ".", max_loops: int = 3):
   system_prompt = dedent("""\
   You are a helpful, expert-level programmer that generates Python code changes to an existing codebase given a request.
   Your changes will be written to the filesystem using relative paths. You are in the root directory of the repository.
-  Apply the changes by calling the `apply_diff` tool with a valid unified diff (like `diff` or `git diff` would generate).
+  Test application of the changes by calling the `apply_diff` tool with a valid unified diff (like `diff` or `git diff` would generate).
+  This will store the patch, but won't apply it to the local filesystem - so always generate a completely new patch for every request.
+  ONLY generate code through the `apply_diff` tool. Don't output any code in the response - IT WILL BE IGNORED.
   Use chain-of-thought reasoning to generate the code and explain your work in your response.
   """)
 
@@ -59,7 +61,7 @@ def diff_loop(prompts: str, glob: str, repo: str = ".", max_loops: int = 3):
     client=client,
     tools=[apply_diff],
     max_tokens=1024,
-    temperature=0.5       # This seems to make Claude fail a few times before getting it right.
+    temperature=0.3
   ) as session:
     # Set the system prompt without making a request.
     session.set_system_prompt(system_prompt)
@@ -81,7 +83,10 @@ def main():
 
   diff_loop(
     prompts=[
-      "Add simple argument parsing to interactive_tool_diff.py so that a user can modify the max_loops parameter to the diff_loop function call with a -m flag when invoking from the CLI."
+      "Add a simple argument parsing routine to interactive_tool_diff.py that provides a --help argument.",
+      "Now extend the argument parsing so the user can specify a model name that will be printed when the file is invoked. Make it default to gpt4o-mini.",
+      "Now modify the diff_loop function in interactive_tool_diff.py to accept a model parameter that is passed via this CLI arg",
+      "Now make the default argument for the model name be: claude-3-5-sonnet-20240620."
     ],
     glob="**/interactive_tool_diff.py",
     max_loops=3
diff --git a/src/ell/lmp/interactive.py b/src/ell/lmp/interactive.py
index f67d2c01..1081f757 100644
--- a/src/ell/lmp/interactive.py
+++ b/src/ell/lmp/interactive.py
@@ -17,7 +17,6 @@ class _InteractiveSession():
         def __init__(self):
             self._system_prompt = None
             self._messages = []
-            self._last_response = None
 
         def set_system_prompt(self, prompt):
             self._system_prompt = ell_system(prompt)
@@ -27,16 +26,17 @@ def send(self, message = None):
                 self._messages.append(ell_user(message))
 
             # Invoke the LMP function.
-            self._last_response = interactive([self._system_prompt] + self._messages)
+            response = interactive([self._system_prompt] + self._messages)
 
             # Append the role="assistant" response to the messages.
-            self._messages.append(self._last_response)
+            self._messages.append(response)
 
-            # If we have a tool call, invoke it and append the tool call result as a user message.
-            if (tool_call_messages := self._last_response.call_tools_and_collect_as_message()):
-              self._messages.append(tool_call_messages)
-              self.send()
+            # If we have tool calls, invoke them, append the tool call result as a user message and send it back to the LLM.
+            if response.tool_calls:
+              tool_call_message = response.call_tools_and_collect_as_message()
+              self._messages.append(tool_call_message)
+              return self.send()
 
-            return self._last_response
+            return response
 
     yield _InteractiveSession()

From 32d985083f0dbfc7966cfa75e0c8cd6060f23b69 Mon Sep 17 00:00:00 2001
From: Kris Wilson <kw@onoku.com>
Date: Fri, 27 Sep 2024 03:39:25 -0700
Subject: [PATCH 5/7] Cleanups.

---
 examples/interactive_tool_diff.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/examples/interactive_tool_diff.py b/examples/interactive_tool_diff.py
index a428a2fe..ac2c96a0 100644
--- a/examples/interactive_tool_diff.py
+++ b/examples/interactive_tool_diff.py
@@ -52,7 +52,6 @@ def diff_loop(prompts: str, glob: str, repo: str = ".", max_loops: int = 3):
   Your changes will be written to the filesystem using relative paths. You are in the root directory of the repository.
   Test application of the changes by calling the `apply_diff` tool with a valid unified diff (like `diff` or `git diff` would generate).
   This will store the patch, but won't apply it to the local filesystem - so always generate a completely new patch for every request.
-  ONLY generate code through the `apply_diff` tool. Don't output any code in the response - IT WILL BE IGNORED.
   Use chain-of-thought reasoning to generate the code and explain your work in your response.
   """)
 
@@ -85,7 +84,7 @@ def main():
     prompts=[
       "Add a simple argument parsing routine to interactive_tool_diff.py that provides a --help argument.",
       "Now extend the argument parsing so the user can specify a model name that will be printed when the file is invoked. Make it default to gpt4o-mini.",
-      "Now modify the diff_loop function in interactive_tool_diff.py to accept a model parameter that is passed via this CLI arg",
+      "Now modify the diff_loop function in interactive_tool_diff.py to accept a model parameter that is passed via this CLI arg.",
       "Now make the default argument for the model name be: claude-3-5-sonnet-20240620."
     ],
     glob="**/interactive_tool_diff.py",

From 1b02591275a7508beeb468ba144e22333b5f81c7 Mon Sep 17 00:00:00 2001
From: Kris Wilson <kw@onoku.com>
Date: Fri, 27 Sep 2024 03:43:52 -0700
Subject: [PATCH 6/7] Tweak.

---
 examples/interactive_tool_diff.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/examples/interactive_tool_diff.py b/examples/interactive_tool_diff.py
index ac2c96a0..1f9a708c 100644
--- a/examples/interactive_tool_diff.py
+++ b/examples/interactive_tool_diff.py
@@ -24,18 +24,18 @@ def _validate_diff(diff: str) -> subprocess.CompletedProcess:
 
 
 @ell.tool()
-def apply_diff(
-  diff: str = Field(description="The unified diff to apply."),
+def test_diff(
+  diff: str = Field(description="The unified diff to test."),
 ) -> str | None:
   """Applies a unified diff to a local workspace using `patch -p1` and returns a natural language result."""
-  logger.info(f"Tool call: apply_diff")
+  logger.info(f"Tool call: test_diff")
   result = _validate_diff(diff)
   # TODO(kwlzn): Can we send a structured output to the LLM with e.g. tool_call_result.exit_code and stdout/stderr for it to natively interpret?
   if result.returncode == 0:
-    logger.info("Tool call: apply_diff succeeded")
+    logger.info("Tool call: test_diff succeeded")
     return f"Patch applied successfully: {result.stdout.decode()}"
   else:
-    logger.warning("Tool call: apply_diff failed")
+    logger.warning("Tool call: test_diff failed")
     # Provide context to the LLM on the failure by proxying the output of `patch -p1`.
     return f"That patch is invalid, can you try again with the correct diff syntax? Here's the output of `patch -p1`:\n{result.stderr.decode()}"
 
@@ -50,7 +50,7 @@ def diff_loop(prompts: str, glob: str, repo: str = ".", max_loops: int = 3):
   system_prompt = dedent("""\
   You are a helpful, expert-level programmer that generates Python code changes to an existing codebase given a request.
   Your changes will be written to the filesystem using relative paths. You are in the root directory of the repository.
-  Test application of the changes by calling the `apply_diff` tool with a valid unified diff (like `diff` or `git diff` would generate).
+  Test application of the changes by calling the `test_diff` tool with a valid unified diff (like `diff` or `git diff` would generate).
   This will store the patch, but won't apply it to the local filesystem - so always generate a completely new patch for every request.
   Use chain-of-thought reasoning to generate the code and explain your work in your response.
   """)
@@ -58,9 +58,9 @@ def diff_loop(prompts: str, glob: str, repo: str = ".", max_loops: int = 3):
   with ell.interactive(
     model="claude-3-5-sonnet-20240620",
     client=client,
-    tools=[apply_diff],
+    tools=[test_diff],
     max_tokens=1024,
-    temperature=0.3
+    temperature=0.9
   ) as session:
     # Set the system prompt without making a request.
     session.set_system_prompt(system_prompt)

From be0e12b2b9b106d3fa735f1f9427d7a23387ca8c Mon Sep 17 00:00:00 2001
From: Kris Wilson <kw@onoku.com>
Date: Fri, 27 Sep 2024 04:30:57 -0700
Subject: [PATCH 7/7] Combine stderr+out for optimal feedback.

---
 examples/interactive_tool_diff.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/examples/interactive_tool_diff.py b/examples/interactive_tool_diff.py
index 1f9a708c..b9df5ec5 100644
--- a/examples/interactive_tool_diff.py
+++ b/examples/interactive_tool_diff.py
@@ -18,7 +18,8 @@ def _validate_diff(diff: str) -> subprocess.CompletedProcess:
   return subprocess.run(
     ["patch", "-p1", "--dry-run"],
     input=diff.encode("utf-8"),
-    capture_output=True,
+    stdout=subprocess.PIPE,
+    stderr=subprocess.STDOUT,
     check=False
   )
 
@@ -37,7 +38,7 @@ def test_diff(
   else:
     logger.warning("Tool call: test_diff failed")
     # Provide context to the LLM on the failure by proxying the output of `patch -p1`.
-    return f"That patch is invalid, can you try again with the correct diff syntax? Here's the output of `patch -p1`:\n{result.stderr.decode()}"
+    return f"That patch is invalid, can you try again with the correct diff syntax? Here's the output of `patch -p1`:\n{result.stdout.decode()}"
 
 
 def diff_loop(prompts: str, glob: str, repo: str = ".", max_loops: int = 3):
@@ -60,7 +61,7 @@ def diff_loop(prompts: str, glob: str, repo: str = ".", max_loops: int = 3):
     client=client,
     tools=[test_diff],
     max_tokens=1024,
-    temperature=0.9
+    temperature=0.7
   ) as session:
     # Set the system prompt without making a request.
     session.set_system_prompt(system_prompt)