diff --git a/bigcode_eval/tasks/mbppplus.py b/bigcode_eval/tasks/mbppplus.py index 22fec8291..c9a1dfacf 100644 --- a/bigcode_eval/tasks/mbppplus.py +++ b/bigcode_eval/tasks/mbppplus.py @@ -48,7 +48,7 @@ def get_prompt(self, doc): # is different from HumanEval(+) which further requires a `check` func def get_reference(self, doc): """Builds the reference solution for the doc (sample from the test dataset).""" - use_mbpp_tests = os.getenv("MBBPPLUS_USE_MBPP_TESTS", "0") + use_mbpp_tests = os.getenv("MBPPPLUS_USE_MBPP_TESTS", "0") if use_mbpp_tests == "1": return "\n".join(doc["test_list"]) return "\n" + doc["test"] diff --git a/docs/README.md b/docs/README.md index 903c6a122..f3413c354 100644 --- a/docs/README.md +++ b/docs/README.md @@ -227,10 +227,10 @@ accelerate launch main.py \ --allow_code_execution ``` -By setting `MBBPPLUS_USE_MBPP_TESTS=1` when running MBPP+, one can run the 399 MBPP+ tasks (a subset of the 500 MBPP evaluation tasks) with the original MBPP base tests: +By setting `MBPPPLUS_USE_MBPP_TESTS=1` when running MBPP+, one can run the 399 MBPP+ tasks (a subset of the 500 MBPP evaluation tasks) with the original MBPP base tests: ```bash -MBBPPLUS_USE_MBPP_TESTS=1 accelerate launch main.py \ +MBPPPLUS_USE_MBPP_TESTS=1 accelerate launch main.py \ --tasks mbppplus \ --allow_code_execution \ --load_generations_path generations_mbppplus.json \