Save crash reproducer found in experiments. (#172)

Fixes #156. Related: google/oss-fuzz#11700. Save GS bucket links to the reproducer and other statuses into a JSON file.
google · Mar 21, 2024 · 6aa874f · 6aa874f
1 parent 48c26b5
commit 6aa874f
Show file tree

Hide file tree

Showing 7 changed files with 160 additions and 19 deletions.
diff --git a/experiment/builder_runner.py b/experiment/builder_runner.py
@@ -57,6 +57,7 @@ class RunResult:
   log_path: str = ''
   corpus_path: str = ''
   coverage_report_path: str = ''
+  reproducer_path: str = ''
 
   def dict(self):
     return dataclasses.asdict(self)
@@ -395,6 +396,9 @@ def build_and_run(self, generated_project: str, target_path: str,
     coverage_name = f'{uid}.coverage'
     coverage_path = f'gs://{self.experiment_bucket}/{coverage_name}'
 
+    reproducer_name = f'{uid}.reproducer'
+    reproducer_path = f'gs://{self.experiment_bucket}/{reproducer_name}'
+
     if not self._run_with_retry_control(
         os.path.realpath(target_path),
         [
@@ -406,6 +410,7 @@ def build_and_run(self, generated_project: str, target_path: str,
             f'--upload_output_log={run_log_path}',
             f'--upload_corpus={corpus_path}',
             f'--upload_coverage={coverage_path}',
+            f'--upload_reproducer={reproducer_path}',
             f'--experiment_name={self.experiment_name}', '--'
         ] + self._libfuzzer_args(),
         cwd=oss_fuzz_checkout.OSS_FUZZ_DIR):
@@ -460,6 +465,7 @@ def build_and_run(self, generated_project: str, target_path: str,
 
     run_result = RunResult(corpus_path=corpus_path,
                            coverage_report_path=coverage_path,
+                           reproducer_path=reproducer_path,
                            log_path=run_log_path)
     blob = bucket.blob(f'{coverage_name}/report/linux/summary.json')
     if blob.exists():

diff --git a/experiment/evaluator.py b/experiment/evaluator.py
@@ -48,6 +48,7 @@ class Result:
   coverage: float = 0.0
   line_coverage_diff: float = 0.0
   coverage_report_path: str = ''
+  reproducer_path: str = ''
 
   def dict(self):
     return dataclasses.asdict(self)
@@ -302,7 +303,7 @@ def do_check_target(self, ai_binary: str, target_path: str) -> Result:
                f'({run_result.coverage.covered_lines}/{total_lines})')
     return logger.return_result(
         Result(True, crashes, coverage_percent, coverage_diff,
-               run_result.coverage_report_path))
+               run_result.coverage_report_path, run_result.reproducer_path))
 
   def _load_existing_coverage_summary(self) -> dict:
     """Load existing summary.json."""

diff --git a/report/templates/benchmark.html b/report/templates/benchmark.html
@@ -29,7 +29,7 @@ <h1>{{ benchmark }}</h1>
     </tr>
 {% for sample in samples %}
     <tr>
-        <td><a href="../sample/{{ benchmark|urlencode }}/{{ sample.id }}">{{ sample.id }}</a></li></td>
+        <td><a href="../../sample/{{ benchmark|urlencode }}/{{ sample.id }}">{{ sample.id }}</a></li></td>
         <td>{{ sample.status }}</td>
         {% if sample.result %}
         <td>{{ sample.result.compiles}}</td>

diff --git a/report/templates/benchmark.json b/report/templates/benchmark.json
@@ -0,0 +1,22 @@
+{
+    "samples": [
+{% for sample in samples %}
+    {
+        "benchmark": "{{ benchmark }}",
+        "sample": "{{ sample.id }}",
+        "status": "{{ sample.status }}",
+        "compiles": "{{ sample.result.compiles }}",
+        "crashes": "{{ sample.result.crashes }}",
+        "coverage": "{{ sample.result.coverage | percent }}",
+        "coverage_diff": "{{ sample.result.line_coverage_diff }}",
+        "coverage_report": "{{ sample.result.coverage_report_path | cov_report_link }}",
+        "stacktrace": "{{ sample.stacktrace }}",
+        "target_binary": "{{ sample.target_binary }}",
+        "reproducer": "{{ sample.reproducer }}",
+        "run_log": "{{ sample.run_log }}",
+        "source_code": {{ get_benchmark_final_target_code(sample.id) }},
+        "model": "{{ model }}"
+    }{% if not loop.last %},{% endif %}
+{% endfor %}
+    ]
+}
diff --git a/report/templates/index.html b/report/templates/index.html
@@ -28,10 +28,10 @@
     </tr>
 {% for benchmark in benchmarks %}
     <tr>
-        <td><a href="benchmark/{{ benchmark.id|urlencode }}">{{ benchmark.signature }}</a></li></td>
+        <td><a href="benchmark/{{ benchmark.id|urlencode }}/index.html">{{ benchmark.signature }}</a></li></td>
         <td>{{ benchmark.status }}</td>
         <td>{{ benchmark.result.build_success_rate|percent}}</td>
-        <td>{{ benchmark.result.crash_rate|percent }} </td>
+        <td><a href="benchmark/{{ benchmark.id|urlencode }}/crash.json"> {{ benchmark.result.crash_rate|percent }} </a></td>
         <td>{{ benchmark.result.max_coverage |percent }}</td>
         <td><a href="{{ benchmark.result.max_coverage_diff_report | cov_report_link }}">{{ benchmark.result.max_line_coverage_diff|percent }}</a></td>
     </tr>

diff --git a/report/upload_report.sh b/report/upload_report.sh
@@ -65,8 +65,12 @@ while true; do
   echo "Download results from localhost."
   wget2 --quiet --inet4-only --no-host-directories --http2-request-window 10 --recursive localhost:${WEB_PORT:?}/ 2>&1
 
-  # Also fetch the sorted line cov diff report.
-  wget2 --quiet --inet4-only localhost:${WEB_PORT:?}/sort -O sort.html 2>&1
+  # Also fetch the sorted reports.
+  wget2 --quiet --inet4-only localhost:${WEB_PORT:?}/sort/build -O sort/build 2>&1
+  wget2 --quiet --inet4-only localhost:${WEB_PORT:?}/sort/cov -O sort/cov 2>&1
+  wget2 --quiet --inet4-only localhost:${WEB_PORT:?}/sort/cov_diff -O sort/cov_diff 2>&1
+  wget2 --quiet --inet4-only localhost:${WEB_PORT:?}/sort/crash -O sort/crash 2>&1
+  wget2 --quiet --inet4-only localhost:${WEB_PORT:?}/sort/status -O sort/status 2>&1
 
   # Stop the server.
   kill -9 "$pid_web"

diff --git a/report/web.py b/report/web.py
@@ -21,6 +21,7 @@
 import re
 import sys
 import urllib.parse
+from functools import partial
 from typing import List, Optional
 
 import yaml
@@ -50,21 +51,26 @@ class Benchmark:
   signature: str = ''
 
   def __post_init__(self):
-    self.signature = self._find_signature() or self.id
-
-  def _find_signature(self) -> str:
-    """Finds the function signature by searching for its id in BENCHMARK_DIR."""
+    self.signature = self.find_signature(self.id) or self.id
+
+  @staticmethod
+  def find_signature(benchmark_id: str) -> str:
+    """
+    Finds the function signature by searching for its |benchmark_id| in
+    BENCHMARK_DIR.
+    """
     if not BENCHMARK_DIR:
       return ''
 
     for project_yaml in os.listdir(BENCHMARK_DIR):
       yaml_project_name = project_yaml.removesuffix(".yaml")
       with open(os.path.join(BENCHMARK_DIR, project_yaml)) as project_yaml_file:
-        if yaml_project_name not in self.id:
+        if yaml_project_name not in benchmark_id:
           continue
         functions = yaml.safe_load(project_yaml_file).get('functions', [])
         for function in functions:
-          function_name = self.id.removeprefix(f'output-{yaml_project_name}-')
+          function_name = benchmark_id.removeprefix(
+              f'output-{yaml_project_name}-')
           if function.get('name', '').lower().startswith(function_name):
             return f'{yaml_project_name}-{function.get("signature", "")}'
 
@@ -73,10 +79,39 @@ def _find_signature(self) -> str:
 
 @dataclasses.dataclass
 class Sample:
+  """Result of a fuzz target sample of a benchmark."""
   id: str
   status: str
   result: Optional[evaluator.Result] = None
 
+  @property
+  def stacktrace(self) -> str:
+    if not self.result:
+      return ''
+    reproducer_link = self.result.reproducer_path
+    return f'{reproducer_link}/stacktrace'
+
+  @property
+  def target_binary(self) -> str:
+    if not self.result:
+      return ''
+    reproducer_link = self.result.reproducer_path
+    return f'{reproducer_link}/target_binary'
+
+  @property
+  def reproducer(self) -> str:
+    if not self.result:
+      return ''
+    reproducer_link = self.result.reproducer_path
+    return f'{reproducer_link}/artifacts'
+
+  @property
+  def run_log(self) -> str:
+    if not self.result:
+      return ''
+    reproducer_link = self.result.reproducer_path
+    return reproducer_link.removesuffix('reproducer') + 'run.log'
+
 
 @dataclasses.dataclass
 class Target:
@@ -176,11 +211,17 @@ def list_benchmarks() -> List[Benchmark]:
   return benchmarks
 
 
-def sort_benchmarks(benchmarks: List[Benchmark]) -> List[Benchmark]:
+def sort_benchmarks(benchmarks: List[Benchmark],
+                    sort_by: str = 'cov_diff') -> List[Benchmark]:
   """Keeps benchmarks with the highest line coverage diff on the top."""
-  sorted_benchmarks = sorted(benchmarks,
-                             key=lambda b: b.result.max_line_coverage_diff,
-                             reverse=True)
+  sort_dict = {
+      'build': lambda b: b.result.build_success_rate,
+      'crash': lambda b: b.result.crash_rate,
+      'cov': lambda b: b.result.max_coverage,
+      'status': lambda b: b.status,
+      'cov_diff': lambda b: b.result.max_line_coverage_diff,
+  }
+  sorted_benchmarks = sorted(benchmarks, key=sort_dict[sort_by], reverse=True)
   return sorted_benchmarks
 
 
@@ -264,6 +305,20 @@ def get_targets(benchmark: str, sample: str) -> list[Target]:
   return targets
 
 
+def get_final_target_code(benchmark: str, sample: str) -> str:
+  """Gets the targets of benchmark |benchmark| with sample ID |sample|."""
+  targets_dir = os.path.join(RESULTS_DIR, benchmark, 'fixed_targets')
+
+  for name in sorted(os.listdir(targets_dir)):
+    path = os.path.join(targets_dir, name)
+    if os.path.isfile(path) and name.startswith(sample + '.'):
+      with open(path) as f:
+        code = f.read()
+        code = json.dumps(code)
+      return code
+  return ''
+
+
 @app.route('/')
 def index():
   return render_template('index.html',
@@ -278,14 +333,67 @@ def index_json():
                          model=model)
 
 
-@app.route('/sort')
+@app.route('/sort/build')
+def index_sort_build():
+  return render_template('index.html',
+                         benchmarks=sort_benchmarks(list_benchmarks(),
+                                                    sort_by='build'),
+                         model=model)
+
+
+@app.route('/sort/cov')
+def index_sort_cov():
+  return render_template('index.html',
+                         benchmarks=sort_benchmarks(list_benchmarks(),
+                                                    sort_by='cov'),
+                         model=model)
+
+
+@app.route('/sort/cov_diff')
 def index_sort():
   return render_template('index.html',
-                         benchmarks=sort_benchmarks(list_benchmarks()),
+                         benchmarks=sort_benchmarks(list_benchmarks(),
+                                                    sort_by='cov_diff'),
+                         model=model)
+
+
+@app.route('/sort/crash')
+def index_sort_crash():
+  return render_template('index.html',
+                         benchmarks=sort_benchmarks(list_benchmarks(),
+                                                    sort_by='crash'),
                          model=model)
 
 
-@app.route('/benchmark/<benchmark>')
+@app.route('/sort/status')
+def index_sort_stauts():
+  return render_template('index.html',
+                         benchmarks=sort_benchmarks(list_benchmarks(),
+                                                    sort_by='status'),
+                         model=model)
+
+
+@app.route('/benchmark/<benchmark>/crash.json')
+def benchmark_json(benchmark):
+  """Generates a JSON containing crash reproducing info."""
+  if not _is_valid_benchmark_dir(benchmark):
+    # TODO(dongge): This won't be needed after resolving the `lost+found` issue.
+    abort(404)
+
+  try:
+    return render_template('benchmark.json',
+                           benchmark=Benchmark.find_signature(benchmark),
+                           samples=get_samples(benchmark),
+                           get_benchmark_final_target_code=partial(
+                               get_final_target_code, benchmark),
+                           model=model)
+  except Exception as e:
+    logging.warning('Failed to render benchmark crash JSON: %s\n  %s',
+                    benchmark, e)
+    return ''
+
+
+@app.route('/benchmark/<benchmark>/index.html')
 def benchmark_page(benchmark):
   if _is_valid_benchmark_dir(benchmark):
     return render_template('benchmark.html',