diff --git a/pyproject.toml b/pyproject.toml index 8e9e65957..c969db169 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,4 +38,9 @@ exclude = [ packages = ["src/llama_recipes"] [tool.hatch.metadata.hooks.requirements_txt] -files = ["requirements.txt"] \ No newline at end of file +files = ["requirements.txt"] + +[tool.pytest.ini_options] +markers = [ + "skip_missing_tokenizer: skip tests when we can not access meta-llama/Llama-2-7b-hf on huggingface hub (Log in with `huggingface-cli login` to unskip).", +] diff --git a/tests/conftest.py b/tests/conftest.py index a441defb3..652edbe28 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,14 +5,24 @@ from transformers import LlamaTokenizer +@pytest.fixture(scope="module") +def llama_tokenizer(): + try: + return LlamaTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf") + except OSError: + return None + @pytest.fixture -def setup_tokenizer(): - def _helper(tokenizer): +def setup_tokenizer(llama_tokenizer): + def _helper(tokenizer_mock): #Align with Llama 2 tokenizer - tokenizer.from_pretrained.return_value = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf") - tokenizer.from_pretrained.return_value.add_special_tokens({'bos_token': '', 'eos_token': ''}) - tokenizer.from_pretrained.return_value.bos_token_id = 1 - tokenizer.from_pretrained.return_value.eos_token_id = 2 + tokenizer_mock.from_pretrained.return_value = llama_tokenizer return _helper + +@pytest.fixture(autouse=True) +def skip_if_tokenizer_is_missing(request, llama_tokenizer): + if request.node.get_closest_marker("skip_missing_tokenizer"): + if llama_tokenizer is None: + pytest.skip("Llama tokenizer could not be accessed. Did you log into huggingface hub and provided the correct token?") diff --git a/tests/datasets/test_custom_dataset.py b/tests/datasets/test_custom_dataset.py index 6f830e76e..db67fe516 100644 --- a/tests/datasets/test_custom_dataset.py +++ b/tests/datasets/test_custom_dataset.py @@ -17,6 +17,7 @@ def check_padded_entry(batch): assert batch["input_ids"][0][-1] == 2 +@pytest.mark.skip_missing_tokenizer() @patch('llama_recipes.finetuning.train') @patch('llama_recipes.finetuning.LlamaTokenizer') @patch('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained') @@ -29,7 +30,7 @@ def test_custom_dataset(step_lr, optimizer, get_model, tokenizer, train, mocker, kwargs = { "dataset": "custom_dataset", - "model_name": "decapoda-research/llama-7b-hf", # We use the tokenizer as a surrogate for llama2 tokenizer here + "model_name": "meta-llama/Llama-2-7b-hf", "custom_dataset.file": "examples/custom_dataset.py", "custom_dataset.train_split": "validation", "batch_size_training": 2, diff --git a/tests/datasets/test_grammar_datasets.py b/tests/datasets/test_grammar_datasets.py index 418cc4d93..13a0271ea 100644 --- a/tests/datasets/test_grammar_datasets.py +++ b/tests/datasets/test_grammar_datasets.py @@ -1,11 +1,13 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement. +import pytest from unittest.mock import patch from transformers import LlamaTokenizer +@pytest.mark.skip_missing_tokenizer() @patch('llama_recipes.finetuning.train') @patch('llama_recipes.finetuning.LlamaTokenizer') @patch('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained') @@ -18,7 +20,7 @@ def test_grammar_dataset(step_lr, optimizer, get_model, tokenizer, train, mocker BATCH_SIZE = 8 kwargs = { - "model_name": "decapoda-research/llama-7b-hf", + "model_name": "meta-llama/Llama-2-7b-hf", "batch_size_training": BATCH_SIZE, "val_batch_size": 1, "use_peft": False, @@ -46,8 +48,8 @@ def test_grammar_dataset(step_lr, optimizer, get_model, tokenizer, train, mocker assert "input_ids" in batch.keys() assert "attention_mask" in batch.keys() - assert batch["labels"][0][29] == -100 - assert batch["labels"][0][30] == 29871 + assert batch["labels"][0][31] == -100 + assert batch["labels"][0][32] == 1152 assert batch["input_ids"][0][0] == 1 assert batch["labels"][0][-1] == 2 diff --git a/tests/datasets/test_samsum_datasets.py b/tests/datasets/test_samsum_datasets.py index 392a1e123..96c75ad2c 100644 --- a/tests/datasets/test_samsum_datasets.py +++ b/tests/datasets/test_samsum_datasets.py @@ -1,10 +1,12 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement. +import pytest from functools import partial from unittest.mock import patch +@pytest.mark.skip_missing_tokenizer() @patch('llama_recipes.finetuning.train') @patch('llama_recipes.finetuning.LlamaTokenizer') @patch('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained') @@ -17,7 +19,7 @@ def test_samsum_dataset(step_lr, optimizer, get_model, tokenizer, train, mocker, BATCH_SIZE = 8 kwargs = { - "model_name": "decapoda-research/llama-7b-hf", + "model_name": "meta-llama/Llama-2-7b-hf", "batch_size_training": BATCH_SIZE, "val_batch_size": 1, "use_peft": False, @@ -46,7 +48,7 @@ def test_samsum_dataset(step_lr, optimizer, get_model, tokenizer, train, mocker, assert "attention_mask" in batch.keys() assert batch["labels"][0][268] == -100 - assert batch["labels"][0][269] == 22291 + assert batch["labels"][0][269] == 319 assert batch["input_ids"][0][0] == 1 assert batch["labels"][0][-1] == 2 diff --git a/tests/test_batching.py b/tests/test_batching.py index 4c8ab98d8..2053c187d 100644 --- a/tests/test_batching.py +++ b/tests/test_batching.py @@ -5,6 +5,7 @@ from unittest.mock import patch +@pytest.mark.skip_missing_tokenizer() @patch('llama_recipes.finetuning.train') @patch('llama_recipes.finetuning.LlamaTokenizer') @patch('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained') @@ -16,7 +17,7 @@ def test_packing(step_lr, optimizer, get_model, tokenizer, train, mocker, setup_ setup_tokenizer(tokenizer) kwargs = { - "model_name": "decapoda-research/llama-7b-hf", + "model_name": "meta-llama/Llama-2-7b-hf", "batch_size_training": 8, "val_batch_size": 1, "use_peft": False, @@ -46,6 +47,7 @@ def test_packing(step_lr, optimizer, get_model, tokenizer, train, mocker, setup_ assert batch["attention_mask"][0].size(0) == 4096 +@pytest.mark.skip_missing_tokenizer() @patch('llama_recipes.finetuning.train') @patch('llama_recipes.finetuning.LlamaTokenizer') @patch('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained') @@ -69,7 +71,7 @@ def test_distributed_packing(dist, is_initialized, fsdp, setup, step_lr, optimiz os.environ['MASTER_PORT'] = '12345' kwargs = { - "model_name": "decapoda-research/llama-7b-hf", + "model_name": "meta-llama/Llama-2-7b-hf", "batch_size_training": 8, "val_batch_size": 1, "use_peft": False,