From 6e6cec501b2a50cd95c88070821b85aef0e28793 Mon Sep 17 00:00:00 2001 From: Thomas Wolf Date: Wed, 1 Nov 2023 13:19:03 +0000 Subject: [PATCH] add more access to args --- .gitignore | 2 ++ bigcode_eval/tasks/apps.py | 11 +++++++---- bigcode_eval/tasks/codexglue_code_to_text.py | 4 ++-- bigcode_eval/tasks/codexglue_text_to_text.py | 10 ++++++---- bigcode_eval/tasks/conala.py | 6 ++++-- bigcode_eval/tasks/concode.py | 6 ++++-- bigcode_eval/tasks/ds1000.py | 4 ++-- bigcode_eval/tasks/gsm.py | 4 ++-- bigcode_eval/tasks/humaneval.py | 13 +++++++++---- 9 files changed, 38 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index 3f271e61b..a9b74ccf1 100644 --- a/.gitignore +++ b/.gitignore @@ -166,3 +166,5 @@ cython_debug/ # Script outputs evaluation*.json generations*.json + +playground/ diff --git a/bigcode_eval/tasks/apps.py b/bigcode_eval/tasks/apps.py index 1b1c7a207..9be6d242b 100644 --- a/bigcode_eval/tasks/apps.py +++ b/bigcode_eval/tasks/apps.py @@ -36,8 +36,8 @@ def create_all_tasks(): def create_task(level): class APPS(GeneralAPPS): - def __init__(self): - super().__init__(level) + def __init__(self, **kwargs): + super().__init__(level, **kwargs) return APPS @@ -50,12 +50,13 @@ class GeneralAPPS(Task): DATASET_PATH = "codeparrot/apps" DATASET_NAME = None - def __init__(self, level): + def __init__(self, level, k_list=[1, 10, 100]): self.DATASET_NAME = level super().__init__( stop_words=["\nQUESTION", "\n---", "\nANSWER"], requires_execution=True, ) + self.k_list = k_list def get_dataset(self): """Returns dataset for the task or an iterable of any object, that get_prompt can handle""" @@ -115,7 +116,9 @@ def process_results(self, generations, references): list of str containing refrences (not needed for APPS Task) """ code_metric = load("codeparrot/apps_metric") + if level is None: + level = self.DATASET_NAME results = code_metric.compute( - predictions=generations, k_list=[1, 10, 100], level=self.DATASET_NAME + predictions=generations, k_list=self.k_list, level=self.DATASET_NAME ) return results diff --git a/bigcode_eval/tasks/codexglue_code_to_text.py b/bigcode_eval/tasks/codexglue_code_to_text.py index e5182c088..e39af1a2b 100644 --- a/bigcode_eval/tasks/codexglue_code_to_text.py +++ b/bigcode_eval/tasks/codexglue_code_to_text.py @@ -46,8 +46,8 @@ def create_all_tasks(): def create_task(language): class CodeToText(GeneralCodeToText): - def __init__(self): - super().__init__(language) + def __init__(self, **kwargs): + super().__init__(language, **kwargs) return CodeToText diff --git a/bigcode_eval/tasks/codexglue_text_to_text.py b/bigcode_eval/tasks/codexglue_text_to_text.py index ab3eabf42..9b0731ffa 100644 --- a/bigcode_eval/tasks/codexglue_text_to_text.py +++ b/bigcode_eval/tasks/codexglue_text_to_text.py @@ -40,8 +40,8 @@ def create_all_tasks(): def create_task(translation_task): class CodexglueTextToTextTask(CodexglueTextToText): - def __init__(self): - super().__init__(translation_task) + def __init__(self, **kwargs): + super().__init__(translation_task, **kwargs) return CodexglueTextToTextTask @@ -51,11 +51,13 @@ class CodexglueTextToText(Task): DATASET_PATH = "code_x_glue_tt_text_to_text" DATASET_NAME = None - def __init__(self, translation_task): + def __init__(self, translation_task, max_order=4, smooth=True): self.DATASET_NAME = translation_task stop_words = ["\n"] requires_execution = False super().__init__(stop_words, requires_execution) + self.max_order = max_order + self.smooth = smooth def get_dataset(self): """Returns dataset for the task or an iterable of any object, that get_prompt can handle""" @@ -117,6 +119,6 @@ def process_results(self, generations, references): bleu = load("bleu") gens = [gen[0] for gen in generations] results = bleu.compute( - references=references, predictions=gens, max_order=4, smooth=True + references=references, predictions=gens, max_order=self.max_order, smooth=self.smooth ) return results diff --git a/bigcode_eval/tasks/conala.py b/bigcode_eval/tasks/conala.py index 80387db9a..5c8ab1ad9 100644 --- a/bigcode_eval/tasks/conala.py +++ b/bigcode_eval/tasks/conala.py @@ -34,11 +34,13 @@ class Conala(Task): DATASET_PATH = "neulab/conala" - def __init__(self): + def __init__(self, max_order=4, smooth=True): super().__init__( stop_words=["\n"], requires_execution=False, ) + self.max_order = max_order + self.smooth = smooth def get_dataset(self): """Returns dataset for the task or an iterable of any object, that get_prompt can handle""" @@ -101,6 +103,6 @@ def process_results(self, generations, references): bleu = load("bleu") gens = [gen[0] for gen in generations] results = bleu.compute( - references=references, predictions=gens, max_order=4, smooth=True + references=references, predictions=gens, max_order=self.max_order, smooth=self.smooth ) return results diff --git a/bigcode_eval/tasks/concode.py b/bigcode_eval/tasks/concode.py index 7be43497c..c772d2d92 100644 --- a/bigcode_eval/tasks/concode.py +++ b/bigcode_eval/tasks/concode.py @@ -33,11 +33,13 @@ class Concode(Task): DATASET_PATH = "code_x_glue_tc_text_to_code" - def __init__(self): + def __init__(self, max_order=4, smooth=True): super().__init__( stop_words=["\n"], requires_execution=False, ) + self.max_order = max_order + self.smooth = smooth def get_dataset(self): """Returns dataset for the task or an iterable of any object, that get_prompt can handle""" @@ -102,6 +104,6 @@ def process_results(self, generations, references): bleu = load("bleu") gens = [gen[0] for gen in generations] results = bleu.compute( - references=references, predictions=gens, max_order=4, smooth=True + references=references, predictions=gens, max_order=self.max_order, smooth=self.smooth ) return results diff --git a/bigcode_eval/tasks/ds1000.py b/bigcode_eval/tasks/ds1000.py index 6bc1c6af7..b15258d45 100644 --- a/bigcode_eval/tasks/ds1000.py +++ b/bigcode_eval/tasks/ds1000.py @@ -35,8 +35,8 @@ def create_all_tasks(): def create_task(key, mode): class DS1000(GeneralDS1000): - def __init__(self): - super().__init__(key, mode) + def __init__(self, **kwargs): + super().__init__(key, mode, **kwargs) return DS1000 diff --git a/bigcode_eval/tasks/gsm.py b/bigcode_eval/tasks/gsm.py index 478c2080f..13e557786 100644 --- a/bigcode_eval/tasks/gsm.py +++ b/bigcode_eval/tasks/gsm.py @@ -65,8 +65,8 @@ def create_all_tasks(): def create_task(cls, evaluation_type): class Gsm(cls): - def __init__(self): - super().__init__(evaluation_type) + def __init__(self, **kwargs): + super().__init__(evaluation_type, **kwargs) return Gsm diff --git a/bigcode_eval/tasks/humaneval.py b/bigcode_eval/tasks/humaneval.py index 22612089a..786ec0dd4 100644 --- a/bigcode_eval/tasks/humaneval.py +++ b/bigcode_eval/tasks/humaneval.py @@ -37,8 +37,8 @@ def create_all_tasks(): def create_task(strip_prompt): class HumanEval(GeneralHumanEval): - def __init__(self): - super().__init__(strip_prompt) + def __init__(self, **kwargs): + super().__init__(strip_prompt, **kwargs) return HumanEval @@ -50,12 +50,15 @@ class GeneralHumanEval(Task): DATASET_PATH = "openai_humaneval" - def __init__(self, strip_prompt): + def __init__(self, strip_prompt, k=[1, 10, 100], num_workers=16, timeout=3.0): super().__init__( stop_words=["\nclass", "\ndef", "\n#", "\n@", "\nprint", "\nif", "\n```"], requires_execution=True, ) self.strip_prompt = strip_prompt + self.k = k + self.num_workers = num_workers + self.timeout = timeout def get_dataset(self): """Returns dataset for the task or an iterable of any object, that get_prompt can handle""" @@ -112,6 +115,8 @@ def process_results(self, generations, references): results, _ = compute_code_eval( references=references, predictions=generations, - num_workers=4, + k=self.k, + num_workers=self.num_workers, + timeout=self.timeout, ) return results