Skip to content

Commit

Permalink
Add extern only when a c project has c++ fuzz targets (#393)
Browse files Browse the repository at this point in the history
Almost all regressions in
#382 (comment)
are due to C projects using C++ fuzz targets.
This PR resolves that.
  • Loading branch information
DonggeLiu authored Jun 28, 2024
1 parent cae20de commit 46b8099
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 15 deletions.
6 changes: 3 additions & 3 deletions data_prep/project_context/context_introspector.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ def __init__(self, benchmark: benchmarklib.Benchmark):
self._benchmark = benchmark

def _get_embeddable_declaration(self) -> str:
"""Retrieves declaration by language."""
"""Retrieves declaration by language. Attach extern C if needed."""
lang = self._benchmark.language.lower()
sig = self._benchmark.function_signature + ';'

if lang == 'c':
return sig
if self._benchmark.needs_extern:
return 'extern "C" ' + sig

if lang != 'c++':
logging.warning('Unsupported decl - Lang: %s Project: %s', lang,
Expand Down
12 changes: 8 additions & 4 deletions data_prep/project_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,17 +168,21 @@ def generate_data(project_name: str,

def _remove_header_comments(code: str) -> str:
"""Removes comments and empty lines in the code."""
# Remove single-line comments.
single_line_comment = re.compile(r'//.*?\n')
code = re.sub(single_line_comment, '\n', code)

# Remove multi-line comments.
multi_line_comment = re.compile(r'/\*.*?\*/', re.DOTALL)
code = re.sub(multi_line_comment, '', code)

# Remove single-line comments.
single_line_comment = re.compile(r'(?:^|\s+)//.*\n')
code = re.sub(single_line_comment, '\n', code)

# Remove empty lines.
empty_line = re.compile(r'\n+\s*\n+')
code = re.sub(empty_line, '\n', code)

# Trim all newlines and spaces.
code.lstrip('\n ')
code.rstrip('\n ')
return code


Expand Down
6 changes: 6 additions & 0 deletions experiment/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,12 @@ def file_type(self) -> FileType:
"""Returns the file type of the benchmark."""
return get_file_type(self.target_path)

@property
def needs_extern(self) -> bool:
"""Checks if it is C++ fuzz target for a C project, which needs `extern`."""
return (self.file_type.value.lower() == 'c++' and
self.language.lower() == 'c')


def get_file_type(file_path: str) -> FileType:
"""Returns the file type based on the extension of |file_name|."""
Expand Down
27 changes: 20 additions & 7 deletions llm_toolkit/prompt_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ def build(self,
target_file_type: FileType,
example_pair: list[list[str]],
project_example_content: Optional[list[list[str]]] = None,
project_context_content: Optional[dict] = None) -> prompts.Prompt:
project_context_content: Optional[dict] = None,
needs_extern: bool = False) -> prompts.Prompt:
"""Builds a prompt."""

@abstractmethod
Expand Down Expand Up @@ -141,10 +142,15 @@ def __init__(self,
self.triager_problem_template_file = self._find_template(
template_dir, 'triager_problem.txt')

def _format_priming(self, target_file_type: FileType) -> str:
def _format_priming(self, target_file_type: FileType,
needs_extern: bool) -> str:
"""Formats a priming based on the prompt template."""
priming = self._get_template(self.priming_template_file)
priming = priming.replace('{LANGUAGE}', target_file_type.value)
if needs_extern:
priming += ('\nNote that some code may need to be wrapped with '
'<code>extern "C"</code> because the project under test is '
'written in C but the fuzz target is in C++.\n')
if target_file_type == FileType.CPP:
type_specific_priming = self._get_template(self.cpp_priming_filler_file)
else:
Expand Down Expand Up @@ -274,9 +280,10 @@ def build(self,
target_file_type: FileType,
example_pair: list[list[str]],
project_example_content: Optional[list[list[str]]] = None,
project_context_content: Optional[dict] = None) -> prompts.Prompt:
project_context_content: Optional[dict] = None,
needs_extern: bool = False) -> prompts.Prompt:
"""Constructs a prompt using the templates in |self| and saves it."""
priming = self._format_priming(target_file_type)
priming = self._format_priming(target_file_type, needs_extern)
final_problem = self.format_problem(function_signature)
final_problem += (f'You MUST call <code>\n'
f'{function_signature}\n'
Expand All @@ -303,7 +310,11 @@ def _format_fixer_priming(self, benchmark: Benchmark) -> Tuple[str, int]:
"""Formats a priming for code fixer based on the template."""
with open(self.fixer_priming_template_file) as f:
priming = f.read().strip() + '\n'
priming = priming.replace('{LANGUAGE}', benchmark.language)
priming = priming.replace('{LANGUAGE}', benchmark.file_type.value)
if benchmark.needs_extern:
priming += ('\nNote that some code may need to be wrapped with '
'<code>extern "C"</code> because the project under test is '
'written in C but the fuzz target is in C++.\n')
priming_prompt = self._prompt.create_prompt_piece(priming, 'system')
priming_weight = self._model.estimate_token_num(priming_prompt)
# NOTE: We need to return the priming _as text_ and the weight. Otherwise,
Expand Down Expand Up @@ -733,7 +744,8 @@ def build(self,
target_file_type: FileType,
example_pair: list[list[str]],
project_example_content: Optional[list[list[str]]] = None,
project_context_content: Optional[dict] = None) -> prompts.Prompt:
project_context_content: Optional[dict] = None,
needs_extern: bool = False) -> prompts.Prompt:
"""Constructs a prompt using the templates in |self| and saves it.
Ignore target_file_type, project_example_content
and project_context_content parameters.
Expand Down Expand Up @@ -817,7 +829,8 @@ def build(self,
target_file_type: FileType,
example_pair: list[list[str]],
project_example_content: Optional[list[list[str]]] = None,
project_context_content: Optional[dict] = None) -> prompts.Prompt:
project_context_content: Optional[dict] = None,
needs_extern: bool = False) -> prompts.Prompt:
"""Constructs a prompt using the templates in |self| and saves it."""

with open(self.priming_template_file, 'r') as f:
Expand Down
3 changes: 2 additions & 1 deletion run_one_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,8 @@ def run(benchmark: Benchmark,
benchmark.file_type,
example_pair,
project_examples,
project_context_content=context_info)
project_context_content=context_info,
needs_extern=benchmark.needs_extern)
prompt.save(work_dirs.prompt)

if dry_run:
Expand Down

0 comments on commit 46b8099

Please sign in to comment.