Skip to content

Commit

Permalink
rename lm_eval => bigcode_eval
Browse files Browse the repository at this point in the history
  • Loading branch information
thomwolf committed Oct 24, 2023
1 parent a8fb63d commit 92c81a0
Show file tree
Hide file tree
Showing 73 changed files with 35 additions and 35 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
4 changes: 2 additions & 2 deletions lm_eval/evaluator.py → bigcode_eval/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import os
import warnings

from lm_eval import tasks
from lm_eval.generation import parallel_generations
from bigcode_eval import tasks
from bigcode_eval.generation import parallel_generations

_WARNING = """
################################################################################
Expand Down
2 changes: 1 addition & 1 deletion lm_eval/generation.py → bigcode_eval/generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from torch.utils.data.dataloader import DataLoader
from transformers import StoppingCriteria, StoppingCriteriaList

from lm_eval.utils import TokenizedDataset, complete_code
from bigcode_eval.utils import TokenizedDataset, complete_code


class EndOfFunctionCriteria(StoppingCriteria):
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion lm_eval/tasks/apps.py → bigcode_eval/tasks/apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from evaluate import load

from lm_eval.base import Task
from bigcode_eval.base import Task

_CITATION = """
@article{hendrycksapps2021,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import re
import typing

from lm_eval.base import Task
from bigcode_eval.base import Task

_CITATION = """
@article{husain2019codesearchnet,
Expand Down Expand Up @@ -63,7 +63,7 @@ def compute_codexglue_code_to_text_bleu(
Taken from: https://github.com/dpfried/lm-evaluation-harness/blob/5d9a6aaaaa929bcad95bb73d85e78fe75eb64b4e/lm_eval/tasks/codexglue_summarization.py#L102
"""
from lm_eval.tasks.custom_metrics import codexglue_code_to_text_bleu
from bigcode_eval.tasks.custom_metrics import codexglue_code_to_text_bleu

predicted_map = {}
gold_map = {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from evaluate import load

from lm_eval.base import Task
from bigcode_eval.base import Task

_CITATION = """
@article{CodeXGLUE,
Expand Down
2 changes: 1 addition & 1 deletion lm_eval/tasks/conala.py → bigcode_eval/tasks/conala.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from evaluate import load

from lm_eval.base import Task
from bigcode_eval.base import Task

_CITATION = """
@inproceedings{yin2018learning,
Expand Down
2 changes: 1 addition & 1 deletion lm_eval/tasks/concode.py → bigcode_eval/tasks/concode.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from evaluate import load

from lm_eval.base import Task
from bigcode_eval.base import Task

_CITATION = """
@article{iyer2018mapping,
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from collections import Counter, defaultdict
from concurrent.futures import ThreadPoolExecutor, as_completed

from lm_eval.tasks.custom_metrics.pal_metric.python_executor import run_program
from bigcode_eval.tasks.custom_metrics.pal_metric.python_executor import run_program

# adapted from https://github.com/huggingface/evaluate/blob/main/metrics/code_eval/code_eval.py

Expand Down
2 changes: 1 addition & 1 deletion lm_eval/tasks/ds1000.py → bigcode_eval/tasks/ds1000.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import requests
import tqdm

from lm_eval.base import Task
from bigcode_eval.base import Task

_CITATION = """
@article{Lai2022DS1000,
Expand Down
4 changes: 2 additions & 2 deletions lm_eval/tasks/gsm.py → bigcode_eval/tasks/gsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@

from evaluate import load

from lm_eval.base import Task
from lm_eval.tasks.custom_metrics.pal_metric.pal_code_exec import compute
from bigcode_eval.base import Task
from bigcode_eval.tasks.custom_metrics.pal_metric.pal_code_exec import compute

_CITATION = """
@article{gao2022pal,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from evaluate import load

from lm_eval.base import Task
from bigcode_eval.base import Task

_CITATION = """
@misc{chen2021evaluating,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import re

from evaluate import load
from lm_eval.base import Task
from bigcode_eval.base import Task

_CITATION = """
@article{muennighoff2023octopack,
Expand Down Expand Up @@ -357,7 +357,7 @@ def process_results(self, generations, references):
fixed_code = ""
gen[i] = fixed_code
elif self.prompt == "diff-carper":
from lm_eval.tasks.custom_metrics.diff_eval import apply_diff
from bigcode_eval.tasks.custom_metrics.diff_eval import apply_diff
ds = self.get_dataset().select(range(len(generations)))
for gen, doc in zip(generations, ds):
prompt_base = self.get_prompt_base(doc)
Expand Down Expand Up @@ -522,7 +522,7 @@ def postprocess_generation(self, generation, idx):
# Only remove final stopwords like <MSG>
generation = self.remove_last_block(generation[len(prompt):].rstrip())
generation = prompt + generation
from lm_eval.tasks.custom_metrics.diff_eval import split_diff
from bigcode_eval.tasks.custom_metrics.diff_eval import split_diff
# From https://github.com/CarperAI/OpenELM/blob/e6402a0696096011572152334ccbe049f89c332e/src/openelm/benchmarks/benchmark_bugs.py#L93
end_of_diff = re.compile("\n[^ +-@]+")
parsed: dict = split_diff(generation)
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
"""

from evaluate import load
from lm_eval.base import Task
from lm_eval.utils import remove_after_return
from bigcode_eval.base import Task
from bigcode_eval.utils import remove_after_return

_CITATION = ""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from evaluate import load

from lm_eval.base import Task
from bigcode_eval.base import Task

_CITATION = """
@misc{chen2021evaluating,
Expand Down
2 changes: 1 addition & 1 deletion lm_eval/tasks/mbpp.py → bigcode_eval/tasks/mbpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from evaluate import load

from lm_eval.base import Task
from bigcode_eval.base import Task

_CITATION = """
@article{austin2021program,
Expand Down
6 changes: 3 additions & 3 deletions lm_eval/tasks/multiple.py → bigcode_eval/tasks/multiple.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@
from datasets import load_dataset
from tqdm import tqdm

from lm_eval.base import Task
from lm_eval.tasks.custom_metrics.multiple_metrics.evaluation import \
from bigcode_eval.base import Task
from bigcode_eval.tasks.custom_metrics.multiple_metrics.evaluation import \
evaluate_problem
from lm_eval.tasks.custom_metrics.multiple_metrics.single_experiment_pass_k import \
from bigcode_eval.tasks.custom_metrics.multiple_metrics.single_experiment_pass_k import \
for_file

_CITATION = """
Expand Down
2 changes: 1 addition & 1 deletion lm_eval/tasks/parity.py → bigcode_eval/tasks/parity.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import re

from evaluate import load
from lm_eval.base import Task
from bigcode_eval.base import Task
import tqdm

def mutate_code(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import re

from evaluate import load
from lm_eval.base import Task
from bigcode_eval.base import Task
import tqdm

_CITATION = """
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import re

from evaluate import load
from lm_eval.base import Task
from bigcode_eval.base import Task

_CITATION = """
@inproceedings{lin2017quixbugs,
Expand Down
2 changes: 1 addition & 1 deletion lm_eval/tasks/recode.py → bigcode_eval/tasks/recode.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
This task allows to run the released perturbed HumanEval benchmark, and compute the robust-pass-at-k metric.
"""
from collections import defaultdict
from lm_eval.base import Task
from bigcode_eval.base import Task

from evaluate import load

Expand Down
File renamed without changes.
6 changes: 3 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
HfArgumentParser,
)

from lm_eval.arguments import EvalArguments
from lm_eval.evaluator import Evaluator
from lm_eval.tasks import ALL_TASKS
from bigcode_eval.arguments import EvalArguments
from bigcode_eval.evaluator import Evaluator
from bigcode_eval.tasks import ALL_TASKS


class MultiChoice:
Expand Down
2 changes: 1 addition & 1 deletion templates/new_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
TODO: Write a Short Description of the task.
Homepage: TODO: Add the URL to the task's Homepage here.
"""
from lm_eval.base import Task
from bigcode_eval.base import Task

# TODO: Add the BibTeX citation for the task.
_CITATION = """
Expand Down
4 changes: 2 additions & 2 deletions tests/test_generation_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from accelerate.utils import write_basic_config
from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed

from lm_eval.arguments import EvalArguments
from lm_eval.evaluator import Evaluator
from bigcode_eval.arguments import EvalArguments
from bigcode_eval.evaluator import Evaluator

# TODO add more tasks

Expand Down
2 changes: 1 addition & 1 deletion tests/test_prompts.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json

from lm_eval import tasks
from bigcode_eval import tasks

TASKS = ["pal-gsm8k-greedy"]

Expand Down

0 comments on commit 92c81a0

Please sign in to comment.