diff --git a/bigcode_eval/generation.py b/bigcode_eval/generation.py index 98e15a7be..520b49ae0 100644 --- a/bigcode_eval/generation.py +++ b/bigcode_eval/generation.py @@ -76,7 +76,9 @@ def parallel_generations( # The input_length / start_length set to 0 for now will be adjusted later # Check if the task has a custom check_fn method for the stopping criteria if task.stop_words and tokenizer.eos_token: - task.stop_words.append(tokenizer.eos_token) + # if padding token is same as eos it cannot be a delimiter as hpu output is padded + if not (accelerator.device.type == "hpu" and tokenizer.eos_token == tokenizer.pad_token): + task.stop_words.append(tokenizer.eos_token) if hasattr(task, "check_fn"): stopping_criteria.append( EndOfFunctionCriteria(0, task.stop_words, tokenizer, task.check_fn) diff --git a/main.py b/main.py index 5d030909c..29bab9d09 100644 --- a/main.py +++ b/main.py @@ -229,6 +229,14 @@ def get_gpus_max_memory(max_memory, num_gpus): return max_memory +def use_hpu(): + import importlib + if importlib.util.find_spec("habana_frameworks") is not None: + import habana_frameworks.torch.hpu as hthpu + if hthpu.is_available(): + return True + return False + def main(): args = parse_args() transformers.logging.set_verbosity_error() @@ -239,10 +247,19 @@ def main(): else: task_names = pattern_match(args.tasks.split(","), ALL_TASKS) - accelerator = Accelerator() + if use_hpu(): + from optimum.habana.accelerate import GaudiAccelerator + accelerator = GaudiAccelerator() + else: + accelerator = Accelerator() + if accelerator.is_main_process: print(f"Selected Tasks: {task_names}") + if accelerator.device.type == "hpu": + from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi + adapt_transformers_to_gaudi() + results = {} if args.load_generations_path: # here we don't generate code but only evaluate previously computed generations @@ -310,6 +327,10 @@ def main(): f"Non valid modeltype {args.modeltype}, choose from: causal, seq2seq" ) + if accelerator.device.type == "hpu": + from habana_frameworks.torch.hpu import wrap_in_hpu_graph + model = wrap_in_hpu_graph(model) + if args.peft_model: from peft import PeftModel # dynamic import to avoid dependency on peft