Pass code block to fixer prompt, not the raw LLM response (#259) (#261)

Passing the raw response can confuse LLM when it contains more text than the code block, which occurred on `Gemini 1.5`. Also, `Gemini 1.5`'s response structure seems to be different from `code-bison-32k`. This PR does three things: 1. Make the response parser compatible with both models. 2. Pass the code block in response to LLM fixer, not the raw response. 3. More instructions in the code-fixing prompt to avoid common mistakes.
google · May 10, 2024 · f1a66c1 · f1a66c1
1 parent 7dbfc82
commit f1a66c1
Show file tree

Hide file tree

Showing 3 changed files with 36 additions and 16 deletions.
diff --git a/llm_toolkit/code_fixer.py b/llm_toolkit/code_fixer.py
@@ -278,8 +278,7 @@ def llm_fix(ai_binary: str, target_path: str, benchmark: benchmarklib.Benchmark,
             llm_fix_id: int, error_desc: Optional[str], errors: list[str],
             fixer_model_name: str) -> None:
   """Reads and fixes |target_path| in place with LLM based on |error_log|."""
-  with open(target_path) as target_file:
-    raw_code = target_file.read()
+  fuzz_target_source_code = parser.parse_code(target_path)
 
   _, target_ext = os.path.splitext(os.path.basename(target_path))
   response_dir = f'{os.path.splitext(target_path)[0]}-F{llm_fix_id}'
@@ -288,7 +287,7 @@ def llm_fix(ai_binary: str, target_path: str, benchmark: benchmarklib.Benchmark,
 
   apply_llm_fix(ai_binary,
                 benchmark,
-                raw_code,
+                fuzz_target_source_code,
                 error_desc,
                 errors,
                 prompt_path,
@@ -328,7 +327,7 @@ def llm_fix(ai_binary: str, target_path: str, benchmark: benchmarklib.Benchmark,
 
 def apply_llm_fix(ai_binary: str,
                   benchmark: benchmarklib.Benchmark,
-                  raw_code: str,
+                  fuzz_target_source_code: str,
                   error_desc: Optional[str],
                   errors: list[str],
                   prompt_path: str,
@@ -344,7 +343,8 @@ def apply_llm_fix(ai_binary: str,
   )
 
   builder = prompt_builder.DefaultTemplateBuilder(fixer_model)
-  prompt = builder.build_fixer_prompt(benchmark, raw_code, error_desc, errors)
+  prompt = builder.build_fixer_prompt(benchmark, fuzz_target_source_code,
+                                      error_desc, errors)
   prompt.save(prompt_path)
 
   fixer_model.generate_code(prompt, response_dir)

diff --git a/llm_toolkit/output_parser.py b/llm_toolkit/output_parser.py
@@ -44,24 +44,41 @@ def parse_args() -> argparse.Namespace:
   return args
 
 
+def _parse_code_block_by_marker(lines: list[str], start_marker: str,
+                                end_marker: str) -> list[str]:
+  """Parses code block lines based on markers."""
+  block = []
+  in_block = False
+  contains_api = False
+
+  for line in lines:
+    if not in_block and start_marker in line.lower():
+      in_block = True  # Start a code block.
+      if not contains_api:
+        block = []  # Ignore previous block because it does not contain API.
+    elif in_block and end_marker in line:
+      in_block = False  # Finish a code block.
+      if contains_api:
+        break  # Found fuzz target.
+    elif in_block:
+      block.append(line)
+      contains_api = contains_api or 'LLVMFuzzerTestOneInput' in line
+  return block if block else lines
+
+
 def parse_code(response_path: str) -> str:
   """Parses the expected output from the |response_path|."""
   with open(response_path) as file:
     response = file.read()
   solution = response.split('</solution>')[0]
-  solution = solution.replace('<code>', '').replace('</code>', '')
-
   lines = solution.splitlines()
+  lines = _parse_code_block_by_marker(lines, '```c', '```')
+  lines = _parse_code_block_by_marker(lines, '<code>', '</code>')
 
-  def should_remove(line):
-    line = line.strip()
-    return not line or line.startswith('```')
-
-  # Remove leading empty lines or lines starting with ```.
-  while lines and should_remove(lines[0]):
+  # Remove leading and trailing empty lines.
+  while lines and not lines[0].strip():
     lines.pop(0)
-  # Remove trailing empty lines or lines starting with ```.
-  while lines and should_remove(lines[-1]):
+  while lines and not lines[-1].strip():
     lines.pop()
 
   return '\n'.join(lines)

diff --git a/prompts/template_xml/fixer_priming.txt b/prompts/template_xml/fixer_priming.txt
@@ -2,4 +2,7 @@ Given the following C++ fuzz harness and its build error message, fix the code t
 
 If there is undeclared identifier or unknown type name error, fix it by finding and including the related libraries.
 
-Note that some code may need to be wrapped with <code>extern "C"</code> as their source is C program.
+Note that some code may need to be wrapped with <code>extern "C"</code> as their source is C program.
+
+MUST RETURN THE FULL CODE, INCLUDING UNCHANGED PARTS.
+EXTREMELY IMPORTANT: AVOID USING <code>goto</code>. If you have to write code using <code>goto</code>, you MUST MUST also declare all variables BEFORE the <code>goto</code>. Never introduce new variables after the <code>goto</code>.