diff --git a/Dockerfile-multiple b/Dockerfile-multiple index dec9a8a6f..77a6a9e5a 100644 --- a/Dockerfile-multiple +++ b/Dockerfile-multiple @@ -1,4 +1,4 @@ -FROM ghcr.io/nuprl/multipl-e-evaluation:2cb4fcd74d89d149f7a3feb177f7d8169cef390a7ea7951c9b84e7ff59ea26cc +FROM ghcr.io/nuprl/multipl-e-evaluation@sha256:11864ca95774df16c34b4cd1eac231f9e5466c7ea38dac98e5b5b053e18479de # Standard requirements COPY . /app diff --git a/bigcode_eval/tasks/multiple.py b/bigcode_eval/tasks/multiple.py index d0166b683..92acbee02 100644 --- a/bigcode_eval/tasks/multiple.py +++ b/bigcode_eval/tasks/multiple.py @@ -27,24 +27,33 @@ _CITATION = """ @article{cassano2022scalable, - title={A Scalable and Extensible Approach to Benchmarking NL2Code for 18 Programming Languages}, - author={Cassano, Federico and Gouwar, John and Nguyen, Daniel and Nguyen, Sydney and Phipps-Costin, Luna and Pinckney, Donald and Yee, Ming Ho and Zi, Yangtian and Anderson, Carolyn Jane and Feldman, Molly Q and others}, - journal={arXiv preprint arXiv:2208.08227}, - year={2022} + author={Cassano, Federico and Gouwar, John and Nguyen, Daniel and Nguyen, Sydney and Phipps-Costin, Luna and Pinckney, Donald and Yee, Ming-Ho and Zi, Yangtian and Anderson, Carolyn Jane and Feldman, Molly Q and Guha, Arjun and Greenberg, Michael and Jangda, Abhinav}, + journal={IEEE Transactions on Software Engineering}, + title={MultiPL-E: A Scalable and Polyglot Approach to Benchmarking Neural Code Generation}, + year={2023}, + volume={49}, + number={7}, + pages={3675-3691}, + doi={10.1109/TSE.2023.3267446} } """ LANGUAGES = [ "py", "sh", + "clj" "cpp", "cs", "d", + "dart", + "elixir", "go", + "hs", "java", "js", "jl", "lua", + "ml" "pl", "php", "r",