From 6e6cec501b2a50cd95c88070821b85aef0e28793 Mon Sep 17 00:00:00 2001
From: Thomas Wolf <thomas@huggingface.co>
Date: Wed, 1 Nov 2023 13:19:03 +0000
Subject: [PATCH] add more access to args

---
 .gitignore                                   |  2 ++
 bigcode_eval/tasks/apps.py                   | 11 +++++++----
 bigcode_eval/tasks/codexglue_code_to_text.py |  4 ++--
 bigcode_eval/tasks/codexglue_text_to_text.py | 10 ++++++----
 bigcode_eval/tasks/conala.py                 |  6 ++++--
 bigcode_eval/tasks/concode.py                |  6 ++++--
 bigcode_eval/tasks/ds1000.py                 |  4 ++--
 bigcode_eval/tasks/gsm.py                    |  4 ++--
 bigcode_eval/tasks/humaneval.py              | 13 +++++++++----
 9 files changed, 38 insertions(+), 22 deletions(-)

diff --git a/.gitignore b/.gitignore
index 3f271e61b..a9b74ccf1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -166,3 +166,5 @@ cython_debug/
 # Script outputs
 evaluation*.json
 generations*.json
+
+playground/
diff --git a/bigcode_eval/tasks/apps.py b/bigcode_eval/tasks/apps.py
index 1b1c7a207..9be6d242b 100644
--- a/bigcode_eval/tasks/apps.py
+++ b/bigcode_eval/tasks/apps.py
@@ -36,8 +36,8 @@ def create_all_tasks():
 
 def create_task(level):
     class APPS(GeneralAPPS):
-        def __init__(self):
-            super().__init__(level)
+        def __init__(self, **kwargs):
+            super().__init__(level, **kwargs)
 
     return APPS
 
@@ -50,12 +50,13 @@ class GeneralAPPS(Task):
     DATASET_PATH = "codeparrot/apps"
     DATASET_NAME = None
 
-    def __init__(self, level):
+    def __init__(self, level, k_list=[1, 10, 100]):
         self.DATASET_NAME = level
         super().__init__(
             stop_words=["\nQUESTION", "\n---", "\nANSWER"],
             requires_execution=True,
         )
+        self.k_list = k_list
 
     def get_dataset(self):
         """Returns dataset for the task or an iterable of any object, that get_prompt can handle"""
@@ -115,7 +116,9 @@ def process_results(self, generations, references):
             list of str containing refrences (not needed for APPS Task)
         """
         code_metric = load("codeparrot/apps_metric")
+        if level is None:
+            level = self.DATASET_NAME
         results = code_metric.compute(
-            predictions=generations, k_list=[1, 10, 100], level=self.DATASET_NAME
+            predictions=generations, k_list=self.k_list, level=self.DATASET_NAME
         )
         return results
diff --git a/bigcode_eval/tasks/codexglue_code_to_text.py b/bigcode_eval/tasks/codexglue_code_to_text.py
index e5182c088..e39af1a2b 100644
--- a/bigcode_eval/tasks/codexglue_code_to_text.py
+++ b/bigcode_eval/tasks/codexglue_code_to_text.py
@@ -46,8 +46,8 @@ def create_all_tasks():
 
 def create_task(language):
     class CodeToText(GeneralCodeToText):
-        def __init__(self):
-            super().__init__(language)
+        def __init__(self, **kwargs):
+            super().__init__(language, **kwargs)
 
     return CodeToText
 
diff --git a/bigcode_eval/tasks/codexglue_text_to_text.py b/bigcode_eval/tasks/codexglue_text_to_text.py
index ab3eabf42..9b0731ffa 100644
--- a/bigcode_eval/tasks/codexglue_text_to_text.py
+++ b/bigcode_eval/tasks/codexglue_text_to_text.py
@@ -40,8 +40,8 @@ def create_all_tasks():
 
 def create_task(translation_task):
     class CodexglueTextToTextTask(CodexglueTextToText):
-        def __init__(self):
-            super().__init__(translation_task)
+        def __init__(self, **kwargs):
+            super().__init__(translation_task, **kwargs)
 
     return CodexglueTextToTextTask
 
@@ -51,11 +51,13 @@ class CodexglueTextToText(Task):
     DATASET_PATH = "code_x_glue_tt_text_to_text"
     DATASET_NAME = None
 
-    def __init__(self, translation_task):
+    def __init__(self, translation_task, max_order=4, smooth=True):
         self.DATASET_NAME = translation_task
         stop_words = ["\n"]
         requires_execution = False
         super().__init__(stop_words, requires_execution)
+        self.max_order = max_order
+        self.smooth = smooth
 
     def get_dataset(self):
         """Returns dataset for the task or an iterable of any object, that get_prompt can handle"""
@@ -117,6 +119,6 @@ def process_results(self, generations, references):
         bleu = load("bleu")
         gens = [gen[0] for gen in generations]
         results = bleu.compute(
-            references=references, predictions=gens, max_order=4, smooth=True
+            references=references, predictions=gens, max_order=self.max_order, smooth=self.smooth
         )
         return results
diff --git a/bigcode_eval/tasks/conala.py b/bigcode_eval/tasks/conala.py
index 80387db9a..5c8ab1ad9 100644
--- a/bigcode_eval/tasks/conala.py
+++ b/bigcode_eval/tasks/conala.py
@@ -34,11 +34,13 @@ class Conala(Task):
 
     DATASET_PATH = "neulab/conala"
 
-    def __init__(self):
+    def __init__(self, max_order=4, smooth=True):
         super().__init__(
             stop_words=["\n"],
             requires_execution=False,
         )
+        self.max_order = max_order
+        self.smooth = smooth
 
     def get_dataset(self):
         """Returns dataset for the task or an iterable of any object, that get_prompt can handle"""
@@ -101,6 +103,6 @@ def process_results(self, generations, references):
         bleu = load("bleu")
         gens = [gen[0] for gen in generations]
         results = bleu.compute(
-            references=references, predictions=gens, max_order=4, smooth=True
+            references=references, predictions=gens, max_order=self.max_order, smooth=self.smooth
         )
         return results
diff --git a/bigcode_eval/tasks/concode.py b/bigcode_eval/tasks/concode.py
index 7be43497c..c772d2d92 100644
--- a/bigcode_eval/tasks/concode.py
+++ b/bigcode_eval/tasks/concode.py
@@ -33,11 +33,13 @@ class Concode(Task):
 
     DATASET_PATH = "code_x_glue_tc_text_to_code"
 
-    def __init__(self):
+    def __init__(self, max_order=4, smooth=True):
         super().__init__(
             stop_words=["\n"],
             requires_execution=False,
         )
+        self.max_order = max_order
+        self.smooth = smooth
 
     def get_dataset(self):
         """Returns dataset for the task or an iterable of any object, that get_prompt can handle"""
@@ -102,6 +104,6 @@ def process_results(self, generations, references):
         bleu = load("bleu")
         gens = [gen[0] for gen in generations]
         results = bleu.compute(
-            references=references, predictions=gens, max_order=4, smooth=True
+            references=references, predictions=gens, max_order=self.max_order, smooth=self.smooth
         )
         return results
diff --git a/bigcode_eval/tasks/ds1000.py b/bigcode_eval/tasks/ds1000.py
index 6bc1c6af7..b15258d45 100644
--- a/bigcode_eval/tasks/ds1000.py
+++ b/bigcode_eval/tasks/ds1000.py
@@ -35,8 +35,8 @@
 def create_all_tasks():
     def create_task(key, mode):
         class DS1000(GeneralDS1000):
-            def __init__(self):
-                super().__init__(key, mode)
+            def __init__(self, **kwargs):
+                super().__init__(key, mode, **kwargs)
 
         return DS1000
 
diff --git a/bigcode_eval/tasks/gsm.py b/bigcode_eval/tasks/gsm.py
index 478c2080f..13e557786 100644
--- a/bigcode_eval/tasks/gsm.py
+++ b/bigcode_eval/tasks/gsm.py
@@ -65,8 +65,8 @@ def create_all_tasks():
 
 def create_task(cls, evaluation_type):
     class Gsm(cls):
-        def __init__(self):
-            super().__init__(evaluation_type)
+        def __init__(self, **kwargs):
+            super().__init__(evaluation_type, **kwargs)
 
     return Gsm
 
diff --git a/bigcode_eval/tasks/humaneval.py b/bigcode_eval/tasks/humaneval.py
index 22612089a..786ec0dd4 100644
--- a/bigcode_eval/tasks/humaneval.py
+++ b/bigcode_eval/tasks/humaneval.py
@@ -37,8 +37,8 @@ def create_all_tasks():
 
 def create_task(strip_prompt):
     class HumanEval(GeneralHumanEval):
-        def __init__(self):
-            super().__init__(strip_prompt)
+        def __init__(self, **kwargs):
+            super().__init__(strip_prompt, **kwargs)
 
     return HumanEval
 
@@ -50,12 +50,15 @@ class GeneralHumanEval(Task):
 
     DATASET_PATH = "openai_humaneval"
 
-    def __init__(self, strip_prompt):
+    def __init__(self, strip_prompt, k=[1, 10, 100], num_workers=16, timeout=3.0):
         super().__init__(
             stop_words=["\nclass", "\ndef", "\n#", "\n@", "\nprint", "\nif", "\n```"],
             requires_execution=True,
         )
         self.strip_prompt = strip_prompt
+        self.k = k
+        self.num_workers = num_workers
+        self.timeout = timeout
 
     def get_dataset(self):
         """Returns dataset for the task or an iterable of any object, that get_prompt can handle"""
@@ -112,6 +115,8 @@ def process_results(self, generations, references):
         results, _ = compute_code_eval(
             references=references,
             predictions=generations,
-            num_workers=4,
+            k=self.k,
+            num_workers=self.num_workers,
+            timeout=self.timeout,
         )
         return results