Skip to content

Commit

Permalink
Agent enhancement 3 (#580)
Browse files Browse the repository at this point in the history
Support cloud experiments:
1. Upload local pickle files for `agent` and `result_history`
2. In cloud build: Install dependecies and run `new_result =
agent.execute(result_history)`
3. Download the pickle file of `new_result`
4. Save cloud exp logs
5. If cloud build fails, return a default `new_result` representing
build failure.
6. Convert results into `status/**/result.json` for report generation.
7. Upload local OFG repo to cloud build, instead of asking cloud build
to pull from repo.
8. Make new `Result` class compatible with report generation.

TODOs:
2. More exception handlings.
3. More intermediate files (local agent dialog)
  • Loading branch information
DonggeLiu authored Sep 17, 2024
1 parent a440f81 commit 1c8d358
Show file tree
Hide file tree
Showing 19 changed files with 722 additions and 130 deletions.
38 changes: 38 additions & 0 deletions Dockerfile.cloudbuild-agent
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# TODO(dongge): Automatically build and push this to registry daily:
# us-central1-docker.pkg.dev/oss-fuzz/oss-fuzz-gen/agent-image
FROM ubuntu:22.04

ENV DEBIAN_FRONTEND=noninteractive

# Install Python 3.11 and pip
RUN apt-get update && \
apt-get install -y software-properties-common curl && \
add-apt-repository ppa:deadsnakes/ppa && \
apt-get update && \
apt-get install -y python3.11 python3.11-dev python3.11-venv \
python3.11-distutils && \
curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11

# Install Docker
RUN apt-get install -y ca-certificates gnupg lsb-release && \
mkdir -p /etc/apt/keyrings && \
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | \
gpg --dearmor -o /etc/apt/keyrings/docker.gpg && \
echo "deb [arch=$(dpkg --print-architecture) \
signed-by=/etc/apt/keyrings/docker.gpg] \
https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | \
tee /etc/apt/sources.list.d/docker.list > /dev/null && \
apt-get update && \
apt-get install -y docker-ce docker-ce-cli containerd.io \
docker-buildx-plugin docker-compose-plugin

ENV DEBIAN_FRONTEND=dialog

# Set the working directory
WORKDIR /workspace/ofg

# Copy the requirements file
COPY requirements.txt /workspace/ofg/

# Install Python dependencies
RUN pip3.11 install --ignore-installed -r /workspace/ofg/requirements.txt
67 changes: 51 additions & 16 deletions agent/base_agent.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
"""The abstract base class for LLM agents in stages."""
import argparse
import logging
import random
import re
import subprocess as sp
import time
from abc import ABC, abstractmethod
from typing import Optional

import logger
import utils
from llm_toolkit.models import LLM
from llm_toolkit.prompt_builder import DefaultTemplateBuilder
from llm_toolkit.prompts import Prompt
Expand All @@ -29,18 +33,6 @@ def __init__(self,
self.name: str = name or self.__class__.__name__
self.dialog: str = '' # Communication history between LLM and tool.

# TODO(dongge): Replace this with google-cloud-log in a module.
logging.basicConfig(level=logging.DEBUG,
format=('%(asctime)s [Trial: %02d] %(levelname)s '
'[%(module)s.%(funcName)s]: %(message)s'))

self.logger = logging.getLogger(__name__)
self.logger.setLevel(logging.DEBUG)

def write_to_file(self, file_path: str, file_content: str):
with open(file_path, 'w') as file:
file.writelines(file_content)

def get_tool(self, tool_name: str) -> Optional[BaseTool]:
"""Gets a tool of the agent by name."""
for tool in self.tools:
Expand Down Expand Up @@ -77,19 +69,62 @@ def _container_handle_bash_command(self, cur_round: int, response: str,
if command:
prompt_text = self._format_bash_execution_result(tool.execute(command))
else:
self.logger.warning('ROUND %d No BASH command from LLM response: %s',
cur_round,
response,
extra={'trial': self.trial})
logger.warning(
f'ROUND {cur_round} No BASH command from LLM response: {response}',
logging.WARNING)
prompt_text = ('No bash command received, Please follow the '
'interaction protocols:\n'
f'{tool.tutorial()}')
return DefaultTemplateBuilder(self.llm, None, initial=prompt_text).build([])

def _sleep_random_duration(self, min_sec: int = 1, max_sec: int = 60) -> None:
"""Sleeps for a random duration between min_sec and max_sec. Agents uses
this to avoid exceeding quota limit (e.g., LLM query frequency)."""
duration = random.randint(min_sec, max_sec)
logger.debug('Sleeping for %d before the next query', duration)
time.sleep(duration)

@classmethod
def _parse_args(cls) -> argparse.Namespace:
"""Parses command line args."""
parser = argparse.ArgumentParser(
description='Execute agent in cloud with dill files.')
parser.add_argument('-a',
'--agent',
help='The dill file path for the agent to execute.')
parser.add_argument(
'-rh',
'--result-history',
help='The dill file path for the agent input result history.')
parser.add_argument(
'-rn',
'--result-new',
help='The dill file path to store the agent output new result.')
return parser.parse_args()

@classmethod
def cloud_main(cls) -> None:
"""Executes agent using dill files. This is for cloud experiments launched
by cloud_builder.py. It runs `new_result = agent.execute(result_history)` in
the same way as local experiments, except `agent` and `result_history` are
deserialized from dill files and new_result will be serialized to share data
with the cloud experiment requester."""
args = cls._parse_args()

agent = utils.deserialize_from_dill(args.agent)
result_history = utils.deserialize_from_dill(args.result_history)
result = agent.execute(result_history)
utils.serialize_to_dill(result, args.result_new)

@abstractmethod
def _initial_prompt(self, results: list[Result]) -> Prompt:
"""The initial prompt of the agent."""

@abstractmethod
def execute(self, result_history: list[Result]) -> Result:
"""Executes the agent based on previous result."""


if __name__ == "__main__":
# For cloud experiments.
BaseAgent.cloud_main()
83 changes: 24 additions & 59 deletions agent/prototyper.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
"""An LLM agent to generate a simple fuzz target prototype that can build.
Use it as a usual module locally, or as script in cloud builds.
"""
import os
import subprocess as sp
from typing import Optional

import logger
from agent.base_agent import BaseAgent
from llm_toolkit.prompt_builder import DefaultTemplateBuilder
from llm_toolkit.prompts import Prompt
Expand All @@ -24,8 +24,7 @@ def _initial_prompt(self, results: list[Result]) -> Prompt:
default_prompt_builder = DefaultTemplateBuilder(model=self.llm,
benchmark=benchmark)
prompt = default_prompt_builder.build([])
work_dirs = results[-1].work_dirs
prompt.save(work_dirs.prompt)
# TODO(dongge): Find a way to save prompt and log for agents
return prompt

def _update_fuzz_target_and_build_script(self, cur_round: int, response: str,
Expand All @@ -36,29 +35,21 @@ def _update_fuzz_target_and_build_script(self, cur_round: int, response: str,
self._parse_tag(response, 'fuzz target'))
build_result.fuzz_target_source = fuzz_target_source
if fuzz_target_source:
self.logger.debug('ROUND %d Parsed fuzz target from LLM: %s',
cur_round,
fuzz_target_source,
extra={'trial': self.trial})
logger.debug('ROUND %02d Parsed fuzz target from LLM: %s', cur_round,
fuzz_target_source)
else:
self.logger.error('ROUND %d No fuzz target source code in conclusion: %s',
cur_round,
response,
extra={'trial': self.trial})
logger.error('ROUND %02d No fuzz target source code in conclusion: %s',
cur_round, response)

build_script_source = self._filter_code(
self._parse_tag(response, 'build script'))
build_result.build_script_source = build_script_source
if build_script_source:
self.logger.debug('ROUND %d Parsed build script from LLM: %s',
cur_round,
build_script_source,
extra={'trial': self.trial})
logger.debug('ROUND %02d Parsed build script from LLM: %s', cur_round,
build_script_source)
else:
self.logger.debug('ROUND %d No build script in conclusion: %s',
cur_round,
response,
extra={'trial': self.trial})
logger.debug('ROUND %02d No build script in conclusion: %s', cur_round,
response)

def _update_build_result(self, buid_result: BuildResult,
compile_process: sp.CompletedProcess,
Expand Down Expand Up @@ -89,24 +80,18 @@ def _validate_fuzz_target_and_build_script(self, cur_round: int,
file_content=build_result.build_script_source))

# Recompile.
self.logger.info('===== ROUND %d Recompile =====',
cur_round,
extra={'trial': self.trial})
logger.info('===== ROUND %02d Recompile =====', cur_round)
compile_command = 'compile > /dev/null'
compile_process = compilation_tool.execute(compile_command)
compile_succeed = compile_process.returncode == 0
self.logger.debug('ROUND %d Fuzz target compile Succeessfully: %s',
cur_round,
compile_succeed,
extra={'trial': self.trial})
logger.debug('ROUND %02d Fuzz target compile Succeessfully: %s', cur_round,
compile_succeed)

# Double-check binary.
ls_result = compilation_tool.execute(f'ls /out/{benchmark.target_name}')
binary_exists = ls_result.returncode == 0
self.logger.debug('ROUND %d Final fuzz target binary exists: %s',
cur_round,
binary_exists,
extra={'trial': self.trial})
logger.debug('ROUND %02d Final fuzz target binary exists: %s', cur_round,
binary_exists)
compilation_tool.terminate()

self._update_build_result(build_result,
Expand All @@ -118,30 +103,16 @@ def _container_handle_conclusion(
build_result: BuildResult) -> Optional[Prompt]:
"""Runs a compilation tool to validate the new fuzz target and build script
from LLM."""
self.logger.info('----- ROUND %d Received conclusion -----',
cur_round,
extra={'trial': self.trial})
logger.info('----- ROUND %02d Received conclusion -----', cur_round)

self._update_fuzz_target_and_build_script(cur_round, response, build_result)

self._validate_fuzz_target_and_build_script(cur_round, build_result)
if build_result.status:
self.logger.info('***** Prototyper succeded in %d rounds *****',
cur_round,
extra={'trial': self.trial})
self.write_to_file(
os.path.join(build_result.work_dirs.fixed_targets,
f'{build_result.trial}.fuzz_target'),
build_result.fuzz_target_source)
self.write_to_file(
os.path.join(build_result.work_dirs.fixed_targets,
f'{build_result.trial}.build_script'),
build_result.build_script_source)
logger.info('***** Prototyper succeded in %02d rounds *****', cur_round)
return None

self.logger.info('***** Failed to recompile in %d rounds *****',
cur_round,
extra={'trial': self.trial})
logger.info('***** Failed to recompile in %02d rounds *****', cur_round)
prompt_text = ('Failed to build fuzz target. Here is the fuzz target, build'
' script, compliation command, and other compilation runtime'
' output.\n<fuzz target>\n'
Expand All @@ -163,7 +134,7 @@ def _container_tool_reaction(self, cur_round: int, response: str,

def execute(self, result_history: list[Result]) -> BuildResult:
"""Executes the agent based on previous result."""
self.logger.info('Executing Prototyper', extra={'trial': self.trial})
logger.info('Executing Prototyper')
last_result = result_history[-1]
prompt = self._initial_prompt(result_history)
benchmark = last_result.benchmark
Expand All @@ -179,22 +150,16 @@ def execute(self, result_history: list[Result]) -> BuildResult:
try:
client = self.llm.get_chat_client(model=self.llm.get_model())
while prompt and cur_round < MAX_ROUND:
self.logger.debug('ROUND %d agent prompt: %s',
cur_round,
prompt.get(),
extra={'trial': self.trial})
logger.info('ROUND %02d agent prompt: %s', cur_round, prompt.get())
response = self.llm.chat_llm(client=client, prompt=prompt)
self.logger.debug('ROUND %d LLM response: %s',
cur_round,
response,
extra={'trial': self.trial})
logger.debug('ROUND %02d LLM response: %s', cur_round, response)
prompt = self._container_tool_reaction(cur_round, response,
build_result)
cur_round += 1
self._sleep_random_duration()
finally:
# Cleanup: stop and remove the container
self.logger.debug('Stopping and removing the inspect container %s...',
self.inspect_tool.container_id,
extra={'trial': self.trial})
logger.debug('Stopping and removing the inspect container %s',
self.inspect_tool.container_id)
self.inspect_tool.terminate()
return build_result
2 changes: 1 addition & 1 deletion ci/k8s/pr-exp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ spec:
# Modify the follow command to customize one-off experiments.
# For benchmark sets that need more disk, increase the results volume
# size too.
command: ["/bin/bash", "report/docker_run.sh", "${GKE_EXP_BENCHMARK}", "${GKE_EXP_NAME}", "${GKE_EXP_FUZZING_TIMEOUT}", "ofg-pr", "${GKE_EXP_LLM}", "${GKE_EXP_DELAY}", "${GKE_EXP_LOCAL_INTROSPECTOR}", "${GKE_EXP_NUM_SAMPLES}", "${GKE_EXP_LLM_FIX_LIMIT}", "${GKE_EXP_VARY_TEMPERATURE}"]
command: ["/bin/bash", "report/docker_run.sh", "${GKE_EXP_BENCHMARK}", "${GKE_EXP_NAME}", "${GKE_EXP_FUZZING_TIMEOUT}", "ofg-pr", "${GKE_EXP_LLM}", "${GKE_EXP_DELAY}", "${GKE_EXP_LOCAL_INTROSPECTOR}", "${GKE_EXP_NUM_SAMPLES}", "${GKE_EXP_LLM_FIX_LIMIT}", "${GKE_EXP_VARY_TEMPERATURE}", "${GKE_EXP_AGENT}"]
resources:
requests:
cpu: ${GKE_EXP_REQ_CPU}
Expand Down
11 changes: 11 additions & 0 deletions ci/request_pr_exp.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
TEMPLATE_PATH = os.path.join(os.path.dirname(__file__), 'k8s', 'pr-exp.yaml')
BENCHMARK_SET = 'comparison'
LLM_NAME = 'vertex_ai_gemini-1-5'
LLM_CHAT_NAME = 'vertex_ai_gemini-1-5-chat'
EXP_DELAY = 0
FUZZING_TIMEOUT = 300
REQUEST_CPU = 6
Expand Down Expand Up @@ -152,6 +153,11 @@ def _parse_args(cmd) -> argparse.Namespace:
default=VARY_TEMPERATURE,
help=('Use different temperatures for each sample, default: '
f'{VARY_TEMPERATURE}'))
parser.add_argument('-ag',
'--agent',
action='store_true',
default=False,
help='Enables agent enhancement.')
args = parser.parse_args(cmd)

assert os.path.isfile(
Expand All @@ -162,6 +168,10 @@ def _parse_args(cmd) -> argparse.Namespace:
if args.name_suffix:
args.experiment_name = f'{args.experiment_name}-{args.name_suffix}'

# Use Chat model by default in agent-enhance experiments.
if args.agent and args.llm == LLM_NAME:
args.llm = LLM_CHAT_NAME

return args


Expand Down Expand Up @@ -276,6 +286,7 @@ def _fill_template(args: argparse.Namespace) -> str:
exp_env_vars['GKE_EXP_NUM_SAMPLES'] = f'{args.num_samples}'
exp_env_vars['GKE_EXP_LLM_FIX_LIMIT'] = f'{args.llm_fix_limit}'
exp_env_vars['GKE_EXP_VARY_TEMPERATURE'] = f'{args.vary_temperature}'.lower()
exp_env_vars['GKE_EXP_AGENT'] = f'{args.agent}'.lower()

with open(args.gke_template, 'r') as file:
yaml_template = file.read()
Expand Down
Loading

0 comments on commit 1c8d358

Please sign in to comment.