add more access to args

bigcode-project · Nov 1, 2023 · 6e6cec5 · 6e6cec5
1 parent 9987354
commit 6e6cec5
Show file tree

Hide file tree

Showing 9 changed files with 38 additions and 22 deletions.
diff --git a/.gitignore b/.gitignore
@@ -166,3 +166,5 @@ cython_debug/
 # Script outputs
 evaluation*.json
 generations*.json
+
+playground/
diff --git a/bigcode_eval/tasks/apps.py b/bigcode_eval/tasks/apps.py
@@ -36,8 +36,8 @@ def create_all_tasks():
 
 def create_task(level):
     class APPS(GeneralAPPS):
-        def __init__(self):
-            super().__init__(level)
+        def __init__(self, **kwargs):
+            super().__init__(level, **kwargs)
 
     return APPS
 
@@ -50,12 +50,13 @@ class GeneralAPPS(Task):
     DATASET_PATH = "codeparrot/apps"
     DATASET_NAME = None
 
-    def __init__(self, level):
+    def __init__(self, level, k_list=[1, 10, 100]):
         self.DATASET_NAME = level
         super().__init__(
             stop_words=["\nQUESTION", "\n---", "\nANSWER"],
             requires_execution=True,
         )
+        self.k_list = k_list
 
     def get_dataset(self):
         """Returns dataset for the task or an iterable of any object, that get_prompt can handle"""
@@ -115,7 +116,9 @@ def process_results(self, generations, references):
             list of str containing refrences (not needed for APPS Task)
         """
         code_metric = load("codeparrot/apps_metric")
+        if level is None:
+            level = self.DATASET_NAME
         results = code_metric.compute(
-            predictions=generations, k_list=[1, 10, 100], level=self.DATASET_NAME
+            predictions=generations, k_list=self.k_list, level=self.DATASET_NAME
         )
         return results
diff --git a/bigcode_eval/tasks/codexglue_code_to_text.py b/bigcode_eval/tasks/codexglue_code_to_text.py
@@ -46,8 +46,8 @@ def create_all_tasks():
 
 def create_task(language):
     class CodeToText(GeneralCodeToText):
-        def __init__(self):
-            super().__init__(language)
+        def __init__(self, **kwargs):
+            super().__init__(language, **kwargs)
 
     return CodeToText
 

diff --git a/bigcode_eval/tasks/codexglue_text_to_text.py b/bigcode_eval/tasks/codexglue_text_to_text.py
@@ -40,8 +40,8 @@ def create_all_tasks():
 
 def create_task(translation_task):
     class CodexglueTextToTextTask(CodexglueTextToText):
-        def __init__(self):
-            super().__init__(translation_task)
+        def __init__(self, **kwargs):
+            super().__init__(translation_task, **kwargs)
 
     return CodexglueTextToTextTask
 
@@ -51,11 +51,13 @@ class CodexglueTextToText(Task):
     DATASET_PATH = "code_x_glue_tt_text_to_text"
     DATASET_NAME = None
 
-    def __init__(self, translation_task):
+    def __init__(self, translation_task, max_order=4, smooth=True):
         self.DATASET_NAME = translation_task
         stop_words = ["\n"]
         requires_execution = False
         super().__init__(stop_words, requires_execution)
+        self.max_order = max_order
+        self.smooth = smooth
 
     def get_dataset(self):
         """Returns dataset for the task or an iterable of any object, that get_prompt can handle"""
@@ -117,6 +119,6 @@ def process_results(self, generations, references):
         bleu = load("bleu")
         gens = [gen[0] for gen in generations]
         results = bleu.compute(
-            references=references, predictions=gens, max_order=4, smooth=True
+            references=references, predictions=gens, max_order=self.max_order, smooth=self.smooth
         )
         return results
diff --git a/bigcode_eval/tasks/conala.py b/bigcode_eval/tasks/conala.py
@@ -34,11 +34,13 @@ class Conala(Task):
 
     DATASET_PATH = "neulab/conala"
 
-    def __init__(self):
+    def __init__(self, max_order=4, smooth=True):
         super().__init__(
             stop_words=["\n"],
             requires_execution=False,
         )
+        self.max_order = max_order
+        self.smooth = smooth
 
     def get_dataset(self):
         """Returns dataset for the task or an iterable of any object, that get_prompt can handle"""
@@ -101,6 +103,6 @@ def process_results(self, generations, references):
         bleu = load("bleu")
         gens = [gen[0] for gen in generations]
         results = bleu.compute(
-            references=references, predictions=gens, max_order=4, smooth=True
+            references=references, predictions=gens, max_order=self.max_order, smooth=self.smooth
         )
         return results
diff --git a/bigcode_eval/tasks/concode.py b/bigcode_eval/tasks/concode.py
@@ -33,11 +33,13 @@ class Concode(Task):
 
     DATASET_PATH = "code_x_glue_tc_text_to_code"
 
-    def __init__(self):
+    def __init__(self, max_order=4, smooth=True):
         super().__init__(
             stop_words=["\n"],
             requires_execution=False,
         )
+        self.max_order = max_order
+        self.smooth = smooth
 
     def get_dataset(self):
         """Returns dataset for the task or an iterable of any object, that get_prompt can handle"""
@@ -102,6 +104,6 @@ def process_results(self, generations, references):
         bleu = load("bleu")
         gens = [gen[0] for gen in generations]
         results = bleu.compute(
-            references=references, predictions=gens, max_order=4, smooth=True
+            references=references, predictions=gens, max_order=self.max_order, smooth=self.smooth
         )
         return results
diff --git a/bigcode_eval/tasks/ds1000.py b/bigcode_eval/tasks/ds1000.py
@@ -35,8 +35,8 @@
 def create_all_tasks():
     def create_task(key, mode):
         class DS1000(GeneralDS1000):
-            def __init__(self):
-                super().__init__(key, mode)
+            def __init__(self, **kwargs):
+                super().__init__(key, mode, **kwargs)
 
         return DS1000
 

diff --git a/bigcode_eval/tasks/gsm.py b/bigcode_eval/tasks/gsm.py
@@ -65,8 +65,8 @@ def create_all_tasks():
 
 def create_task(cls, evaluation_type):
     class Gsm(cls):
-        def __init__(self):
-            super().__init__(evaluation_type)
+        def __init__(self, **kwargs):
+            super().__init__(evaluation_type, **kwargs)
 
     return Gsm
 

diff --git a/bigcode_eval/tasks/humaneval.py b/bigcode_eval/tasks/humaneval.py
@@ -37,8 +37,8 @@ def create_all_tasks():
 
 def create_task(strip_prompt):
     class HumanEval(GeneralHumanEval):
-        def __init__(self):
-            super().__init__(strip_prompt)
+        def __init__(self, **kwargs):
+            super().__init__(strip_prompt, **kwargs)
 
     return HumanEval
 
@@ -50,12 +50,15 @@ class GeneralHumanEval(Task):
 
     DATASET_PATH = "openai_humaneval"
 
-    def __init__(self, strip_prompt):
+    def __init__(self, strip_prompt, k=[1, 10, 100], num_workers=16, timeout=3.0):
         super().__init__(
             stop_words=["\nclass", "\ndef", "\n#", "\n@", "\nprint", "\nif", "\n```"],
             requires_execution=True,
         )
         self.strip_prompt = strip_prompt
+        self.k = k
+        self.num_workers = num_workers
+        self.timeout = timeout
 
     def get_dataset(self):
         """Returns dataset for the task or an iterable of any object, that get_prompt can handle"""
@@ -112,6 +115,8 @@ def process_results(self, generations, references):
         results, _ = compute_code_eval(
             references=references,
             predictions=generations,
-            num_workers=4,
+            k=self.k,
+            num_workers=self.num_workers,
+            timeout=self.timeout,
         )
         return results