Skip to content

Commit

Permalink
Add context to fixer message (#397)
Browse files Browse the repository at this point in the history
Some code and its fixes failed because LLM does not know the exact
definition of types, e.g.:
```
/src/fuzz_utils.c:55:8: error: no member named 'conn_socket' in 'struct conn_rec'
   55 |     c->conn_socket = sock; // conn_socket is renamed to client_socket
      |     ~  ^
```

This PR adds context of these types to the fixer message when this error
shows up.
  • Loading branch information
DonggeLiu authored Jun 30, 2024
1 parent 46b8099 commit 73c949f
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 18 deletions.
58 changes: 47 additions & 11 deletions data_prep/project_context/context_introspector.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from data_prep import introspector
from experiment import benchmark as benchmarklib

COMPLEX_TYPES = ['const', 'enum', 'struct', 'union', 'volatile']


class ContextRetriever:
"""Class to retrieve context from introspector for
Expand Down Expand Up @@ -116,17 +118,9 @@ def _clean_type(self, type_name: str) -> str:
if '' in type_tokens:
type_tokens.remove('')

if 'struct' in type_tokens:
type_tokens.remove('struct')

if 'enum' in type_tokens:
type_tokens.remove('enum')

if 'const' in type_tokens:
type_tokens.remove('const')

if 'volatile' in type_tokens:
type_tokens.remove('volatile')
for complex_type in COMPLEX_TYPES:
if complex_type in type_tokens:
type_tokens.remove(complex_type)

# If there is more than a single token
# we probably do not care about querying for the type (?)
Expand Down Expand Up @@ -181,3 +175,45 @@ def get_context_info(self) -> dict:
logging.debug('Context: %s', context_info)

return context_info

def _concat_info_lines(self, info: dict) -> str:
"""Concatenates source code lines based on |info|."""
include_file = self._get_source_file(info)
include_lines = sorted([self._get_source_line(info)] + [
self._get_source_line(element) for element in info.get('elements', [])
])

# Add the next line after the last element.
return introspector.query_introspector_source_code(self._benchmark.project,
include_file,
include_lines[0],
include_lines[-1] + 1)

def get_type_def(self, type_name: str) -> str:
"""Retrieves the source code definitions for the given |type_name|."""
type_names = [self._clean_type(type_name)]
considered_types = []
type_def = ''

while type_names:
# Breath-first is more suitable for prompting.
current_type = type_names.pop(0)
info_list = introspector.query_introspector_type_info(
self._benchmark.project, current_type)
if not info_list:
logging.warning('Could not type info for project: %s type: %s',
self._benchmark.project, current_type)
continue

for info in info_list:
type_def += self._concat_info_lines(info) + '\n'
considered_types.append(current_type)

# Retrieve nested unseen types.
new_type_type = info.get('type')
new_type_name = info.get('name')
if (new_type_type and new_type_type in COMPLEX_TYPES and
new_type_name and new_type_name not in considered_types):
type_names.append(new_type_name)

return type_def
2 changes: 1 addition & 1 deletion helper/result_string_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def main():
search_lines = search_string.split('\n')

# Iterates through all output-*/
for output_dir in os.listdir(result_dir):
for output_dir in sorted(os.listdir(result_dir)):
if not os.path.isdir(os.path.join(result_dir, output_dir)):
continue

Expand Down
30 changes: 29 additions & 1 deletion llm_toolkit/code_fixer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@
import sys
from typing import Callable, Optional

from data_prep.project_context import context_introspector
from experiment import benchmark as benchmarklib
from llm_toolkit import models
from llm_toolkit import output_parser as parser
from llm_toolkit import prompt_builder

ERROR_LINES = 20
NO_MEMBER_ERROR_REGEX = r"error: no member named '.*' in '([^':]*):?.*'"


def parse_args():
Expand Down Expand Up @@ -406,13 +408,39 @@ def apply_llm_fix(ai_binary: str,
)

builder = prompt_builder.DefaultTemplateBuilder(fixer_model)

context = _collect_context(benchmark, errors)
prompt = builder.build_fixer_prompt(benchmark, fuzz_target_source_code,
error_desc, errors)
error_desc, errors, context)
prompt.save(prompt_path)

fixer_model.generate_code(prompt, response_dir)


def _collect_context(benchmark: benchmarklib.Benchmark,
errors: list[str]) -> str:
"""Collects the useful context to fix the errors."""
if not errors:
return ''

context = ''
for error in errors:
context += _collect_context_no_member(benchmark, error)

return context


def _collect_context_no_member(benchmark: benchmarklib.Benchmark,
error: str) -> str:
"""Collects the useful context to fix 'no member in' errors."""
matched = re.search(NO_MEMBER_ERROR_REGEX, error)
if not matched:
return ''
target_type = matched.group(1)
ci = context_introspector.ContextRetriever(benchmark)
return ci.get_type_def(target_type)


def main():
args = parse_args()
fix_all_targets(args.target_dir, args.project)
Expand Down
20 changes: 16 additions & 4 deletions llm_toolkit/prompt_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ def __init__(self,
template_dir, 'fixer_priming.txt')
self.fixer_problem_template_file = self._find_template(
template_dir, 'fixer_problem.txt')
self.fixer_context_template_file = self._find_template(
template_dir, 'fixer_context.txt')
self.triager_priming_template_file = self._find_template(
template_dir, 'triager_priming.txt')
self.triager_problem_template_file = self._find_template(
Expand Down Expand Up @@ -295,13 +297,16 @@ def build(self,
project_example_content)
return self._prompt

def build_fixer_prompt(self, benchmark: Benchmark, raw_code: str,
def build_fixer_prompt(self,
benchmark: Benchmark,
raw_code: str,
error_desc: Optional[str],
errors: list[str]) -> prompts.Prompt:
errors: list[str],
context: str = '') -> prompts.Prompt:
"""Prepares the code-fixing prompt."""
priming, priming_weight = self._format_fixer_priming(benchmark)
problem = self._format_fixer_problem(raw_code, error_desc, errors,
priming_weight)
priming_weight, context)

self._prepare_prompt(priming, problem)
return self._prompt
Expand All @@ -322,7 +327,8 @@ def _format_fixer_priming(self, benchmark: Benchmark) -> Tuple[str, int]:
return priming, priming_weight

def _format_fixer_problem(self, raw_code: str, error_desc: Optional[str],
errors: list[str], priming_weight: int) -> str:
errors: list[str], priming_weight: int,
context: str) -> str:
"""Formats a problem for code fixer based on the template."""
with open(self.fixer_problem_template_file) as f:
problem = f.read().strip()
Expand All @@ -334,6 +340,12 @@ def _format_fixer_problem(self, raw_code: str, error_desc: Optional[str],
error_summary = BUILD_ERROR_SUMMARY
problem = problem.replace('{ERROR_SUMMARY}', error_summary)

if context:
with open(self.fixer_context_template_file) as f:
context_template = f.read().strip()
context = context_template.replace('{CONTEXT_SOURCE_CODE}', context)
problem = problem.replace('{CONTEXT}', context)

problem_prompt = self._prompt.create_prompt_piece(problem, 'user')
template_piece = self._prompt.create_prompt_piece('{ERROR_MESSAGES}',
'user')
Expand Down
4 changes: 4 additions & 0 deletions prompts/template_xml/fixer_contet.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Below is the project source code to assist you fixing the error.
<code>
{CONTEXT_SOURCE_CODE}
</code>
5 changes: 4 additions & 1 deletion prompts/template_xml/fixer_problem.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,17 @@ Below is the code needs to be built:
{CODE_TO_BE_FIXED}
</code>

Below is the error to fix:
{ERROR_SUMMARY}
<error>
{ERROR_MESSAGES}
</error>

{CONTEXT}

Fix code:
1. Consider possible solutions for the issues listed above.
2. Choose a solution that can maximize fuzzing result, which is utilizing the function under test and feeding it not null input.
3. Apply the solutions to the original code.
It's important to show the complete code, not only the fixed line.
<solution>
<solution>

0 comments on commit 73c949f

Please sign in to comment.