Agent enhancement 3 (#580)

Support cloud experiments: 1. Upload local pickle files for `agent` and `result_history` 2. In cloud build: Install dependecies and run `new_result = agent.execute(result_history)` 3. Download the pickle file of `new_result` 4. Save cloud exp logs 5. If cloud build fails, return a default `new_result` representing build failure. 6. Convert results into `status/**/result.json` for report generation. 7. Upload local OFG repo to cloud build, instead of asking cloud build to pull from repo. 8. Make new `Result` class compatible with report generation. TODOs: 2. More exception handlings. 3. More intermediate files (local agent dialog)
google · Sep 17, 2024 · 1c8d358 · 1c8d358
1 parent a440f81
commit 1c8d358
Show file tree

Hide file tree

Showing 19 changed files with 722 additions and 130 deletions.
diff --git a/Dockerfile.cloudbuild-agent b/Dockerfile.cloudbuild-agent
@@ -0,0 +1,38 @@
+# TODO(dongge): Automatically build and push this to registry daily:
+# us-central1-docker.pkg.dev/oss-fuzz/oss-fuzz-gen/agent-image
+FROM ubuntu:22.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Install Python 3.11 and pip
+RUN apt-get update && \
+    apt-get install -y software-properties-common curl && \
+    add-apt-repository ppa:deadsnakes/ppa && \
+    apt-get update && \
+    apt-get install -y python3.11 python3.11-dev python3.11-venv \
+    python3.11-distutils && \
+    curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
+
+# Install Docker
+RUN apt-get install -y ca-certificates gnupg lsb-release && \
+    mkdir -p /etc/apt/keyrings && \
+    curl -fsSL https://download.docker.com/linux/ubuntu/gpg | \
+    gpg --dearmor -o /etc/apt/keyrings/docker.gpg && \
+    echo "deb [arch=$(dpkg --print-architecture) \
+    signed-by=/etc/apt/keyrings/docker.gpg] \
+    https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | \
+    tee /etc/apt/sources.list.d/docker.list > /dev/null && \
+    apt-get update && \
+    apt-get install -y docker-ce docker-ce-cli containerd.io \
+    docker-buildx-plugin docker-compose-plugin
+
+ENV DEBIAN_FRONTEND=dialog
+
+# Set the working directory
+WORKDIR /workspace/ofg
+
+# Copy the requirements file
+COPY requirements.txt /workspace/ofg/
+
+# Install Python dependencies
+RUN pip3.11 install --ignore-installed -r /workspace/ofg/requirements.txt
diff --git a/agent/base_agent.py b/agent/base_agent.py
@@ -1,11 +1,15 @@
 """The abstract base class for LLM agents in stages."""
 import argparse
 import logging
+import random
 import re
 import subprocess as sp
+import time
 from abc import ABC, abstractmethod
 from typing import Optional
 
+import logger
+import utils
 from llm_toolkit.models import LLM
 from llm_toolkit.prompt_builder import DefaultTemplateBuilder
 from llm_toolkit.prompts import Prompt
@@ -29,18 +33,6 @@ def __init__(self,
     self.name: str = name or self.__class__.__name__
     self.dialog: str = ''  # Communication history between LLM and tool.
 
-    # TODO(dongge): Replace this with google-cloud-log in a module.
-    logging.basicConfig(level=logging.DEBUG,
-                        format=('%(asctime)s [Trial: %02d] %(levelname)s '
-                                '[%(module)s.%(funcName)s]: %(message)s'))
-
-    self.logger = logging.getLogger(__name__)
-    self.logger.setLevel(logging.DEBUG)
-
-  def write_to_file(self, file_path: str, file_content: str):
-    with open(file_path, 'w') as file:
-      file.writelines(file_content)
-
   def get_tool(self, tool_name: str) -> Optional[BaseTool]:
     """Gets a tool of the agent by name."""
     for tool in self.tools:
@@ -77,19 +69,62 @@ def _container_handle_bash_command(self, cur_round: int, response: str,
     if command:
       prompt_text = self._format_bash_execution_result(tool.execute(command))
     else:
-      self.logger.warning('ROUND %d No BASH command from LLM response: %s',
-                          cur_round,
-                          response,
-                          extra={'trial': self.trial})
+      logger.warning(
+          f'ROUND {cur_round} No BASH command from LLM response: {response}',
+          logging.WARNING)
       prompt_text = ('No bash command received, Please follow the '
                      'interaction protocols:\n'
                      f'{tool.tutorial()}')
     return DefaultTemplateBuilder(self.llm, None, initial=prompt_text).build([])
 
+  def _sleep_random_duration(self, min_sec: int = 1, max_sec: int = 60) -> None:
+    """Sleeps for a random duration between min_sec and max_sec. Agents uses
+    this to avoid exceeding quota limit (e.g., LLM query frequency)."""
+    duration = random.randint(min_sec, max_sec)
+    logger.debug('Sleeping for %d before the next query', duration)
+    time.sleep(duration)
+
+  @classmethod
+  def _parse_args(cls) -> argparse.Namespace:
+    """Parses command line args."""
+    parser = argparse.ArgumentParser(
+        description='Execute agent in cloud with dill files.')
+    parser.add_argument('-a',
+                        '--agent',
+                        help='The dill file path for the agent to execute.')
+    parser.add_argument(
+        '-rh',
+        '--result-history',
+        help='The dill file path for the agent input result history.')
+    parser.add_argument(
+        '-rn',
+        '--result-new',
+        help='The dill file path to store the agent output new result.')
+    return parser.parse_args()
+
+  @classmethod
+  def cloud_main(cls) -> None:
+    """Executes agent using dill files. This is for cloud experiments launched
+    by cloud_builder.py. It runs `new_result = agent.execute(result_history)` in
+    the same way as local experiments, except `agent` and `result_history` are
+    deserialized from dill files and new_result will be serialized to share data
+    with the cloud experiment requester."""
+    args = cls._parse_args()
+
+    agent = utils.deserialize_from_dill(args.agent)
+    result_history = utils.deserialize_from_dill(args.result_history)
+    result = agent.execute(result_history)
+    utils.serialize_to_dill(result, args.result_new)
+
   @abstractmethod
   def _initial_prompt(self, results: list[Result]) -> Prompt:
     """The initial prompt of the agent."""
 
   @abstractmethod
   def execute(self, result_history: list[Result]) -> Result:
     """Executes the agent based on previous result."""
+
+
+if __name__ == "__main__":
+  # For cloud experiments.
+  BaseAgent.cloud_main()
diff --git a/agent/prototyper.py b/agent/prototyper.py
@@ -1,10 +1,10 @@
 """An LLM agent to generate a simple fuzz target prototype that can build.
 Use it as a usual module locally, or as script in cloud builds.
 """
-import os
 import subprocess as sp
 from typing import Optional
 
+import logger
 from agent.base_agent import BaseAgent
 from llm_toolkit.prompt_builder import DefaultTemplateBuilder
 from llm_toolkit.prompts import Prompt
@@ -24,8 +24,7 @@ def _initial_prompt(self, results: list[Result]) -> Prompt:
     default_prompt_builder = DefaultTemplateBuilder(model=self.llm,
                                                     benchmark=benchmark)
     prompt = default_prompt_builder.build([])
-    work_dirs = results[-1].work_dirs
-    prompt.save(work_dirs.prompt)
+    # TODO(dongge): Find a way to save prompt and log for agents
     return prompt
 
   def _update_fuzz_target_and_build_script(self, cur_round: int, response: str,
@@ -36,29 +35,21 @@ def _update_fuzz_target_and_build_script(self, cur_round: int, response: str,
         self._parse_tag(response, 'fuzz target'))
     build_result.fuzz_target_source = fuzz_target_source
     if fuzz_target_source:
-      self.logger.debug('ROUND %d Parsed fuzz target from LLM: %s',
-                        cur_round,
-                        fuzz_target_source,
-                        extra={'trial': self.trial})
+      logger.debug('ROUND %02d Parsed fuzz target from LLM: %s', cur_round,
+                   fuzz_target_source)
     else:
-      self.logger.error('ROUND %d No fuzz target source code in conclusion: %s',
-                        cur_round,
-                        response,
-                        extra={'trial': self.trial})
+      logger.error('ROUND %02d No fuzz target source code in conclusion: %s',
+                   cur_round, response)
 
     build_script_source = self._filter_code(
         self._parse_tag(response, 'build script'))
     build_result.build_script_source = build_script_source
     if build_script_source:
-      self.logger.debug('ROUND %d Parsed build script from LLM: %s',
-                        cur_round,
-                        build_script_source,
-                        extra={'trial': self.trial})
+      logger.debug('ROUND %02d Parsed build script from LLM: %s', cur_round,
+                   build_script_source)
     else:
-      self.logger.debug('ROUND %d No build script in conclusion: %s',
-                        cur_round,
-                        response,
-                        extra={'trial': self.trial})
+      logger.debug('ROUND %02d No build script in conclusion: %s', cur_round,
+                   response)
 
   def _update_build_result(self, buid_result: BuildResult,
                            compile_process: sp.CompletedProcess,
@@ -89,24 +80,18 @@ def _validate_fuzz_target_and_build_script(self, cur_round: int,
               file_content=build_result.build_script_source))
 
     # Recompile.
-    self.logger.info('===== ROUND %d Recompile =====',
-                     cur_round,
-                     extra={'trial': self.trial})
+    logger.info('===== ROUND %02d Recompile =====', cur_round)
     compile_command = 'compile > /dev/null'
     compile_process = compilation_tool.execute(compile_command)
     compile_succeed = compile_process.returncode == 0
-    self.logger.debug('ROUND %d Fuzz target compile Succeessfully: %s',
-                      cur_round,
-                      compile_succeed,
-                      extra={'trial': self.trial})
+    logger.debug('ROUND %02d Fuzz target compile Succeessfully: %s', cur_round,
+                 compile_succeed)
 
     # Double-check binary.
     ls_result = compilation_tool.execute(f'ls /out/{benchmark.target_name}')
     binary_exists = ls_result.returncode == 0
-    self.logger.debug('ROUND %d Final fuzz target binary exists: %s',
-                      cur_round,
-                      binary_exists,
-                      extra={'trial': self.trial})
+    logger.debug('ROUND %02d Final fuzz target binary exists: %s', cur_round,
+                 binary_exists)
     compilation_tool.terminate()
 
     self._update_build_result(build_result,
@@ -118,30 +103,16 @@ def _container_handle_conclusion(
       build_result: BuildResult) -> Optional[Prompt]:
     """Runs a compilation tool to validate the new fuzz target and build script
     from LLM."""
-    self.logger.info('----- ROUND %d Received conclusion -----',
-                     cur_round,
-                     extra={'trial': self.trial})
+    logger.info('----- ROUND %02d Received conclusion -----', cur_round)
 
     self._update_fuzz_target_and_build_script(cur_round, response, build_result)
 
     self._validate_fuzz_target_and_build_script(cur_round, build_result)
     if build_result.status:
-      self.logger.info('***** Prototyper succeded in %d rounds *****',
-                       cur_round,
-                       extra={'trial': self.trial})
-      self.write_to_file(
-          os.path.join(build_result.work_dirs.fixed_targets,
-                       f'{build_result.trial}.fuzz_target'),
-          build_result.fuzz_target_source)
-      self.write_to_file(
-          os.path.join(build_result.work_dirs.fixed_targets,
-                       f'{build_result.trial}.build_script'),
-          build_result.build_script_source)
+      logger.info('***** Prototyper succeded in %02d rounds *****', cur_round)
       return None
 
-    self.logger.info('***** Failed to recompile in %d rounds *****',
-                     cur_round,
-                     extra={'trial': self.trial})
+    logger.info('***** Failed to recompile in %02d rounds *****', cur_round)
     prompt_text = ('Failed to build fuzz target. Here is the fuzz target, build'
                    ' script, compliation command, and other compilation runtime'
                    ' output.\n<fuzz target>\n'
@@ -163,7 +134,7 @@ def _container_tool_reaction(self, cur_round: int, response: str,
 
   def execute(self, result_history: list[Result]) -> BuildResult:
     """Executes the agent based on previous result."""
-    self.logger.info('Executing Prototyper', extra={'trial': self.trial})
+    logger.info('Executing Prototyper')
     last_result = result_history[-1]
     prompt = self._initial_prompt(result_history)
     benchmark = last_result.benchmark
@@ -179,22 +150,16 @@ def execute(self, result_history: list[Result]) -> BuildResult:
     try:
       client = self.llm.get_chat_client(model=self.llm.get_model())
       while prompt and cur_round < MAX_ROUND:
-        self.logger.debug('ROUND %d agent prompt: %s',
-                          cur_round,
-                          prompt.get(),
-                          extra={'trial': self.trial})
+        logger.info('ROUND %02d agent prompt: %s', cur_round, prompt.get())
         response = self.llm.chat_llm(client=client, prompt=prompt)
-        self.logger.debug('ROUND %d LLM response: %s',
-                          cur_round,
-                          response,
-                          extra={'trial': self.trial})
+        logger.debug('ROUND %02d LLM response: %s', cur_round, response)
         prompt = self._container_tool_reaction(cur_round, response,
                                                build_result)
         cur_round += 1
+        self._sleep_random_duration()
     finally:
       # Cleanup: stop and remove the container
-      self.logger.debug('Stopping and removing the inspect container %s...',
-                        self.inspect_tool.container_id,
-                        extra={'trial': self.trial})
+      logger.debug('Stopping and removing the inspect container %s',
+                   self.inspect_tool.container_id)
       self.inspect_tool.terminate()
     return build_result
diff --git a/ci/k8s/pr-exp.yaml b/ci/k8s/pr-exp.yaml
@@ -31,7 +31,7 @@ spec:
         # Modify the follow command to customize one-off experiments.
         # For benchmark sets that need more disk, increase the results volume
         # size too.
-        command: ["/bin/bash", "report/docker_run.sh", "${GKE_EXP_BENCHMARK}", "${GKE_EXP_NAME}", "${GKE_EXP_FUZZING_TIMEOUT}", "ofg-pr", "${GKE_EXP_LLM}", "${GKE_EXP_DELAY}", "${GKE_EXP_LOCAL_INTROSPECTOR}", "${GKE_EXP_NUM_SAMPLES}", "${GKE_EXP_LLM_FIX_LIMIT}", "${GKE_EXP_VARY_TEMPERATURE}"]
+        command: ["/bin/bash", "report/docker_run.sh", "${GKE_EXP_BENCHMARK}", "${GKE_EXP_NAME}", "${GKE_EXP_FUZZING_TIMEOUT}", "ofg-pr", "${GKE_EXP_LLM}", "${GKE_EXP_DELAY}", "${GKE_EXP_LOCAL_INTROSPECTOR}", "${GKE_EXP_NUM_SAMPLES}", "${GKE_EXP_LLM_FIX_LIMIT}", "${GKE_EXP_VARY_TEMPERATURE}", "${GKE_EXP_AGENT}"]
         resources:
           requests:
             cpu: ${GKE_EXP_REQ_CPU}

diff --git a/ci/request_pr_exp.py b/ci/request_pr_exp.py
@@ -37,6 +37,7 @@
 TEMPLATE_PATH = os.path.join(os.path.dirname(__file__), 'k8s', 'pr-exp.yaml')
 BENCHMARK_SET = 'comparison'
 LLM_NAME = 'vertex_ai_gemini-1-5'
+LLM_CHAT_NAME = 'vertex_ai_gemini-1-5-chat'
 EXP_DELAY = 0
 FUZZING_TIMEOUT = 300
 REQUEST_CPU = 6
@@ -152,6 +153,11 @@ def _parse_args(cmd) -> argparse.Namespace:
       default=VARY_TEMPERATURE,
       help=('Use different temperatures for each sample, default: '
             f'{VARY_TEMPERATURE}'))
+  parser.add_argument('-ag',
+                      '--agent',
+                      action='store_true',
+                      default=False,
+                      help='Enables agent enhancement.')
   args = parser.parse_args(cmd)
 
   assert os.path.isfile(
@@ -162,6 +168,10 @@ def _parse_args(cmd) -> argparse.Namespace:
   if args.name_suffix:
     args.experiment_name = f'{args.experiment_name}-{args.name_suffix}'
 
+  # Use Chat model by default in agent-enhance experiments.
+  if args.agent and args.llm == LLM_NAME:
+    args.llm = LLM_CHAT_NAME
+
   return args
 
 
@@ -276,6 +286,7 @@ def _fill_template(args: argparse.Namespace) -> str:
   exp_env_vars['GKE_EXP_NUM_SAMPLES'] = f'{args.num_samples}'
   exp_env_vars['GKE_EXP_LLM_FIX_LIMIT'] = f'{args.llm_fix_limit}'
   exp_env_vars['GKE_EXP_VARY_TEMPERATURE'] = f'{args.vary_temperature}'.lower()
+  exp_env_vars['GKE_EXP_AGENT'] = f'{args.agent}'.lower()
 
   with open(args.gke_template, 'r') as file:
     yaml_template = file.read()