Merge pull request #89 from commit-0/features/trace

add option for recording eval result after each commit
commit-0 · Oct 30, 2024 · 9f5bbca · 9f5bbca
2 parents 152a327 + 1439727
commit 9f5bbca
Show file tree

Hide file tree

Showing 8 changed files with 102 additions and 50 deletions.
diff --git a/agent/README.md b/agent/README.md
@@ -38,6 +38,8 @@ Available options include:
 `--max-lint-info-length: int`: Maximum length of the lint information to use. [Default: `10000`]
 `--pre-commit-config-path: str`: Path to the pre-commit config file. This is needed for running `lint`. [Default: `.pre-commit-config.yaml`]
 `--agent-config-file: str`: Path to write the agent config. [Default: `.agent.yaml`]
+`--add-import-module-to-context: bool`: Add import module to context. [Default: `False`]
+`--record-test-for-each-commit: bool`: Record test results for each commit. [Default: `False`], if set to `True`, the test results will be saved in `experiment_log_dir/eval_results.json`
 
 ## Running Agent
 Use `agent run [OPTIONS] BRANCH` to execute an agent on a specific branch.

diff --git a/agent/class_types.py b/agent/class_types.py
@@ -21,3 +21,4 @@ class AgentConfig:
     pre_commit_config_path: str
     run_tests: bool
     max_iteration: int
+    record_test_for_each_commit: bool
diff --git a/agent/cli.py b/agent/cli.py
@@ -135,6 +135,10 @@ def config(
         False,
         help="Run the lint on the entire directory",
     ),
+    record_test_for_each_commit: bool = typer.Option(
+        False,
+        help="Record the test for each commit",
+    ),
     pre_commit_config_path: str = typer.Option(
         ".pre-commit-config.yaml",
         help="Path to the pre-commit config file",
@@ -170,6 +174,7 @@ def config(
         "max_lint_info_length": max_lint_info_length,
         "run_entire_dir_lint": run_entire_dir_lint,
         "pre_commit_config_path": pre_commit_config_path,
+        "record_test_for_each_commit": record_test_for_each_commit,
     }
 
     write_agent_config(agent_config_file, agent_config)

diff --git a/agent/display.py b/agent/display.py
@@ -443,4 +443,6 @@ def __exit__(
         ) as json_file:
             json.dump(summary_data, json_file, indent=4)
 
-        print("\nSummary has been written to processing_summary.json")
+        print(
+            f"\nSummary has been written to processing_summary_{self.branch_name}.json"
+        )
diff --git a/agent/run_agent.py b/agent/run_agent.py
@@ -12,6 +12,7 @@
     get_lint_cmd,
     read_yaml_config,
 )
+import json
 import subprocess
 from agent.agents import AiderAgents
 from typing import Optional, Type, cast
@@ -20,7 +21,7 @@
 from commit0.harness.constants import SPLIT
 from commit0.harness.get_pytest_ids import main as get_tests
 from commit0.harness.constants import RUN_AGENT_LOG_DIR, RepoInstance
-from commit0.cli import read_commit0_dot_file
+from commit0.cli import read_commit0_config_file
 from pathlib import Path
 from datetime import datetime
 from agent.display import TerminalDisplay
@@ -45,6 +46,21 @@ def __exit__(
         os.chdir(self.cwd)
 
 
+def run_eval_after_each_commit(
+    branch: str, backend: str, commit0_config_file: str
+) -> str:
+    """Run the eval command after each commit."""
+    eval_cmd = f"python -m commit0 evaluate --branch {branch} --backend {backend} --commit0-config-file {commit0_config_file} --timeout 100"
+    try:
+        result = subprocess.run(
+            eval_cmd, shell=True, capture_output=True, text=True, check=True
+        )
+        return result.stdout
+    except subprocess.CalledProcessError as e:
+        print(f"Error running eval command: {e}")
+        return e.stdout if e.stdout else str(e)
+
+
 def run_agent_for_repo(
     repo_base_dir: str,
     agent_config: AgentConfig,
@@ -58,7 +74,7 @@ def run_agent_for_repo(
 ) -> None:
     """Run Aider for a given repository."""
     # get repo info
-    commit0_config = read_commit0_dot_file(commit0_config_file)
+    commit0_config = read_commit0_config_file(commit0_config_file)
 
     assert "commit0" in commit0_config["dataset_name"]
     _, repo_name = example["repo"].split("/")
@@ -130,6 +146,7 @@ def run_agent_for_repo(
     )
     experiment_log_dir.mkdir(parents=True, exist_ok=True)
 
+    eval_results = {}
     # write agent_config to .agent.yaml in the log_dir for record
     agent_config_log_file = experiment_log_dir / ".agent.yaml"
     with open(agent_config_log_file, "w") as agent_config_file:
@@ -161,6 +178,12 @@ def run_agent_for_repo(
                     test_log_dir,
                     test_first=True,
                 )
+                if agent_config.record_test_for_each_commit:
+                    current_commit = local_repo.head.commit.hexsha
+                    eval_results[current_commit] = run_eval_after_each_commit(
+                        branch, backend, commit0_config_file
+                    )
+
                 # after running the agent, update the money display
                 update_queue.put(
                     (
@@ -188,6 +211,12 @@ def run_agent_for_repo(
                     lint_log_dir,
                     lint_first=True,
                 )
+                if agent_config.record_test_for_each_commit:
+                    current_commit = local_repo.head.commit.hexsha
+                    eval_results[current_commit] = run_eval_after_each_commit(
+                        branch, backend, commit0_config_file
+                    )
+
                 # after running the agent, update the money display
                 update_queue.put(
                     (
@@ -211,12 +240,22 @@ def run_agent_for_repo(
                     repo_name, agent_config.use_lint_info, commit0_config_file
                 )
                 agent_return = agent.run(message, "", lint_cmd, [f], file_log_dir)
+                if agent_config.record_test_for_each_commit:
+                    current_commit = local_repo.head.commit.hexsha
+                    eval_results[current_commit] = run_eval_after_each_commit(
+                        branch, backend, commit0_config_file
+                    )
+
                 update_queue.put(
                     (
                         "update_money_display",
                         (repo_name, file_name, agent_return.last_cost),
                     )
                 )
+    if agent_config.record_test_for_each_commit:
+        with open(experiment_log_dir / "eval_results.json", "w") as f:
+            json.dump(eval_results, f)
+
     update_queue.put(("finish_repo", repo_name))
 
 
@@ -236,7 +275,7 @@ def run_agent(
     agent_config = AgentConfig(**config)
 
     commit0_config_file = os.path.abspath(commit0_config_file)
-    commit0_config = read_commit0_dot_file(commit0_config_file)
+    commit0_config = read_commit0_config_file(commit0_config_file)
 
     dataset = load_dataset(
         commit0_config["dataset_name"], split=commit0_config["dataset_split"]

diff --git a/agent/run_agent_no_rich.py b/agent/run_agent_no_rich.py
@@ -14,33 +14,17 @@
     read_yaml_config,
 )
 import subprocess
+import json
 from agent.agents import AiderAgents
-from typing import Optional, Type, cast
-from types import TracebackType
+from typing import cast
 from agent.class_types import AgentConfig
 from commit0.harness.constants import SPLIT
 from commit0.harness.get_pytest_ids import main as get_tests
 from commit0.harness.constants import RUN_AGENT_LOG_DIR, RepoInstance
-from commit0.cli import read_commit0_dot_file
+from commit0.cli import read_commit0_config_file
 from pathlib import Path
 from datetime import datetime
-
-
-class DirContext:
-    def __init__(self, d: str):
-        self.dir = d
-        self.cwd = os.getcwd()
-
-    def __enter__(self):
-        os.chdir(self.dir)
-
-    def __exit__(
-        self,
-        exctype: Optional[Type[BaseException]],
-        excinst: Optional[BaseException],
-        exctb: Optional[TracebackType],
-    ) -> None:
-        os.chdir(self.cwd)
+from agent.run_agent import DirContext, run_eval_after_each_commit
 
 
 def run_agent_for_repo(
@@ -55,7 +39,7 @@ def run_agent_for_repo(
 ) -> None:
     """Run Aider for a given repository."""
     # get repo info
-    commit0_config = read_commit0_dot_file(commit0_config_file)
+    commit0_config = read_commit0_config_file(commit0_config_file)
 
     assert "commit0" in commit0_config["dataset_name"]
     _, repo_name = example["repo"].split("/")
@@ -123,6 +107,7 @@ def run_agent_for_repo(
         / datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
     )
     experiment_log_dir.mkdir(parents=True, exist_ok=True)
+    eval_results = {}
 
     # write agent_config to .agent.yaml in the log_dir for record
     agent_config_log_file = experiment_log_dir / ".agent.yaml"
@@ -153,6 +138,11 @@ def run_agent_for_repo(
                     test_log_dir,
                     test_first=True,
                 )
+                if agent_config.record_test_for_each_commit:
+                    current_commit = local_repo.head.commit.hexsha
+                    eval_results[current_commit] = run_eval_after_each_commit(
+                        branch, backend, commit0_config_file
+                    )
         elif agent_config.run_entire_dir_lint:
             # when unit test feedback is available, iterate over test files
             for lint_file in lint_files:
@@ -171,6 +161,11 @@ def run_agent_for_repo(
                     lint_log_dir,
                     lint_first=True,
                 )
+                if agent_config.record_test_for_each_commit:
+                    current_commit = local_repo.head.commit.hexsha
+                    eval_results[current_commit] = run_eval_after_each_commit(
+                        branch, backend, commit0_config_file
+                    )
         else:
             # when unit test feedback is not available, iterate over target files to edit
             message = get_message(agent_config, repo_path, test_files=test_files)
@@ -185,6 +180,14 @@ def run_agent_for_repo(
                     repo_name, agent_config.use_lint_info, commit0_config_file
                 )
                 _ = agent.run(message, "", lint_cmd, [f], file_log_dir)
+                if agent_config.record_test_for_each_commit:
+                    current_commit = local_repo.head.commit.hexsha
+                    eval_results[current_commit] = run_eval_after_each_commit(
+                        branch, backend, commit0_config_file
+                    )
+    if agent_config.record_test_for_each_commit:
+        with open(experiment_log_dir / "eval_results.json", "w") as f:
+            json.dump(eval_results, f)
 
 
 def run_agent(
@@ -205,7 +208,7 @@ def run_agent(
     agent_config = AgentConfig(**config)
 
     commit0_config_file = os.path.abspath(commit0_config_file)
-    commit0_config = read_commit0_dot_file(commit0_config_file)
+    commit0_config = read_commit0_config_file(commit0_config_file)
 
     dataset = load_dataset(
         commit0_config["dataset_name"], split=commit0_config["dataset_split"]

diff --git a/commit0/cli.py b/commit0/cli.py
@@ -86,12 +86,12 @@ def check_valid(one: str, total: Union[list[str], dict[str, list[str]]]) -> None
         )
 
 
-def write_commit0_dot_file(dot_file_path: str, config: dict) -> None:
+def write_commit0_config_file(dot_file_path: str, config: dict) -> None:
     with open(dot_file_path, "w") as f:
         yaml.dump(config, f, default_flow_style=False)
 
 
-def read_commit0_dot_file(dot_file_path: str) -> dict:
+def read_commit0_config_file(dot_file_path: str) -> dict:
     # Check if the file exists before attempting to read it
     if not os.path.exists(dot_file_path):
         raise FileNotFoundError(
@@ -112,7 +112,7 @@ def setup(
     ),
     dataset_split: str = typer.Option("test", help="Split of the Huggingface dataset"),
     base_dir: str = typer.Option("repos/", help="Base directory to clone repos to"),
-    commit0_dot_file_path: str = typer.Option(
+    commit0_config_file: str = typer.Option(
         ".commit0.yaml", help="Storing path for stateful commit0 configs"
     ),
 ) -> None:
@@ -127,7 +127,7 @@ def setup(
     typer.echo(f"Dataset split: {highlight(dataset_split, Colors.ORANGE)}")
     typer.echo(f"Base directory: {highlight(base_dir, Colors.ORANGE)}")
     typer.echo(
-        f"Commit0 dot file path: {highlight(commit0_dot_file_path, Colors.ORANGE)}"
+        f"Commit0 dot file path: {highlight(commit0_config_file, Colors.ORANGE)}"
     )
 
     commit0.harness.setup.main(
@@ -138,8 +138,8 @@ def setup(
     )
 
     # after successfully setup, write the commit0 dot file
-    write_commit0_dot_file(
-        commit0_dot_file_path,
+    write_commit0_config_file(
+        commit0_config_file,
         {
             "dataset_name": dataset_name,
             "dataset_split": dataset_split,
@@ -152,7 +152,7 @@ def setup(
 @commit0_app.command()
 def build(
     num_workers: int = typer.Option(8, help="Number of workers"),
-    commit0_dot_file_path: str = typer.Option(
+    commit0_config_file: str = typer.Option(
         ".commit0.yaml",
         help="Path to the commit0 dot file, where the setup config is stored",
     ),
@@ -167,7 +167,7 @@ def build(
     """Build Commit0 split you choose in Setup Stage."""
     check_commit0_path()
 
-    commit0_config = read_commit0_dot_file(commit0_dot_file_path)
+    commit0_config = read_commit0_config_file(commit0_config_file)
     check_valid(commit0_config["repo_split"], SPLIT)
 
     typer.echo(
@@ -228,7 +228,7 @@ def test(
     rebuild: bool = typer.Option(
         False, "--rebuild", help="Whether to rebuild an image"
     ),
-    commit0_dot_file_path: str = typer.Option(
+    commit0_config_file: str = typer.Option(
         ".commit0.yaml",
         help="Path to the commit0 dot file, where the setup config is stored",
     ),
@@ -251,7 +251,7 @@ def test(
         repo_or_repo_path = repo_or_repo_path[:-1]
     check_valid(repo_or_repo_path.split("/")[-1], SPLIT_ALL)
 
-    commit0_config = read_commit0_dot_file(commit0_dot_file_path)
+    commit0_config = read_commit0_config_file(commit0_config_file)
 
     if reference:
         branch = "reference"
@@ -304,7 +304,7 @@ def evaluate(
     coverage: Annotated[
         bool, typer.Option("--coverage", help="Whether to get coverage information")
     ] = False,
-    commit0_dot_file_path: str = typer.Option(
+    commit0_config_file: str = typer.Option(
         ".commit0.yaml",
         help="Path to the commit0 dot file, where the setup config is stored",
     ),
@@ -315,7 +315,7 @@ def evaluate(
     if reference:
         branch = "reference"
 
-    commit0_config = read_commit0_dot_file(commit0_dot_file_path)
+    commit0_config = read_commit0_config_file(commit0_config_file)
     check_valid(commit0_config["repo_split"], SPLIT)
 
     typer.echo(f"Evaluating repository split: {commit0_config['repo_split']}")
@@ -344,7 +344,7 @@ def lint(
     files: Union[List[Path], None] = typer.Option(
         None, help="Files to lint. If not provided, all files will be linted."
     ),
-    commit0_dot_file_path: str = typer.Option(
+    commit0_config_file: str = typer.Option(
         ".commit0.yaml",
         help="Path to the commit0 dot file, where the setup config is stored",
     ),
@@ -358,7 +358,7 @@ def lint(
 ) -> None:
     """Lint given files if provided, otherwise lint all files in the base directory."""
     check_commit0_path()
-    commit0_config = read_commit0_dot_file(commit0_dot_file_path)
+    commit0_config = read_commit0_config_file(commit0_config_file)
     appended_files = None
     if files is not None:
         appended_files = []
@@ -383,14 +383,14 @@ def save(
     owner: str = typer.Argument(..., help="Owner of the repository"),
     branch: str = typer.Argument(..., help="Branch to save"),
     github_token: str = typer.Option(None, help="GitHub token for authentication"),
-    commit0_dot_file_path: str = typer.Option(
+    commit0_config_file: str = typer.Option(
         ".commit0.yaml",
         help="Path to the commit0 dot file, where the setup config is stored",
     ),
 ) -> None:
     """Save Commit0 split you choose in Setup Stage to GitHub."""
     check_commit0_path()
-    commit0_config = read_commit0_dot_file(commit0_dot_file_path)
+    commit0_config = read_commit0_config_file(commit0_config_file)
     check_valid(commit0_config["repo_split"], SPLIT)
 
     typer.echo(f"Saving repository split: {commit0_config['repo_split']}")