Fix/unit tests (#318)

meta-llama · Dec 8, 2023 · 1b9934e · 1b9934e
2 parents 58d216b + a71a68b
commit 1b9934e
Show file tree

Hide file tree

Showing 7 changed files with 67 additions and 22 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -38,4 +38,9 @@ exclude = [
 packages = ["src/llama_recipes"]
 
 [tool.hatch.metadata.hooks.requirements_txt]
-files = ["requirements.txt"]
+files = ["requirements.txt"]
+
+[tool.pytest.ini_options]
+markers = [
+    "skip_missing_tokenizer: skip tests when we can not access meta-llama/Llama-2-7b-hf on huggingface hub (Log in with `huggingface-cli login` to unskip).",
+]
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -5,14 +5,46 @@
 
 from transformers import LlamaTokenizer
 
+ACCESS_ERROR_MSG = "Could not access tokenizer at 'meta-llama/Llama-2-7b-hf'. Did you log into huggingface hub and provided the correct token?"
+
+unskip_missing_tokenizer = False
+
+@pytest.fixture(scope="module")
+def llama_tokenizer():
+    try:
+        return LlamaTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
+    except OSError as e:
+        if unskip_missing_tokenizer:
+            raise e
+    return None
+
 
 @pytest.fixture
-def setup_tokenizer():
-    def _helper(tokenizer):
+def setup_tokenizer(llama_tokenizer):
+    def _helper(tokenizer_mock):
         #Align with Llama 2 tokenizer
-        tokenizer.from_pretrained.return_value = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf")
-        tokenizer.from_pretrained.return_value.add_special_tokens({'bos_token': '<s>', 'eos_token': '</s>'})
-        tokenizer.from_pretrained.return_value.bos_token_id = 1
-        tokenizer.from_pretrained.return_value.eos_token_id = 2
+        tokenizer_mock.from_pretrained.return_value = llama_tokenizer
 
     return _helper
+
+
+@pytest.fixture(autouse=True)
+def skip_if_tokenizer_is_missing(request, llama_tokenizer):
+    if request.node.get_closest_marker("skip_missing_tokenizer") and not unskip_missing_tokenizer:
+        if llama_tokenizer is None:
+            pytest.skip(ACCESS_ERROR_MSG)
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--unskip-missing-tokenizer",
+        action="store_true",
+        default=False, help="disable skip missing tokenizer")
+
+
+@pytest.hookimpl(tryfirst=True)
+def pytest_cmdline_preparse(config, args):
+    if "--unskip-missing-tokenizer" not in args:
+        return
+    global unskip_missing_tokenizer
+    unskip_missing_tokenizer = True
diff --git a/tests/datasets/test_custom_dataset.py b/tests/datasets/test_custom_dataset.py
@@ -17,6 +17,7 @@ def check_padded_entry(batch):
     assert batch["input_ids"][0][-1] == 2
 
 
+@pytest.mark.skip_missing_tokenizer()
 @patch('llama_recipes.finetuning.train')
 @patch('llama_recipes.finetuning.LlamaTokenizer')
 @patch('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained')
@@ -29,7 +30,7 @@ def test_custom_dataset(step_lr, optimizer, get_model, tokenizer, train, mocker,
 
     kwargs = {
         "dataset": "custom_dataset",
-        "model_name": "decapoda-research/llama-7b-hf", # We use the tokenizer as a surrogate for llama2 tokenizer here
+        "model_name": "meta-llama/Llama-2-7b-hf",
         "custom_dataset.file": "examples/custom_dataset.py",
         "custom_dataset.train_split": "validation",
         "batch_size_training": 2,

diff --git a/tests/datasets/test_grammar_datasets.py b/tests/datasets/test_grammar_datasets.py
@@ -1,11 +1,13 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
 
+import pytest
 from unittest.mock import patch
 
 from transformers import LlamaTokenizer
 
 
+@pytest.mark.skip_missing_tokenizer()
 @patch('llama_recipes.finetuning.train')
 @patch('llama_recipes.finetuning.LlamaTokenizer')
 @patch('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained')
@@ -18,7 +20,7 @@ def test_grammar_dataset(step_lr, optimizer, get_model, tokenizer, train, mocker
 
     BATCH_SIZE = 8
     kwargs = {
-        "model_name": "decapoda-research/llama-7b-hf",
+        "model_name": "meta-llama/Llama-2-7b-hf",
         "batch_size_training": BATCH_SIZE,
         "val_batch_size": 1,
         "use_peft": False,
@@ -46,8 +48,8 @@ def test_grammar_dataset(step_lr, optimizer, get_model, tokenizer, train, mocker
     assert "input_ids" in batch.keys()
     assert "attention_mask" in batch.keys()
 
-    assert batch["labels"][0][29] == -100
-    assert batch["labels"][0][30] == 29871
+    assert batch["labels"][0][31] == -100
+    assert batch["labels"][0][32] == 1152
 
     assert batch["input_ids"][0][0] == 1
     assert batch["labels"][0][-1] == 2

diff --git a/tests/datasets/test_samsum_datasets.py b/tests/datasets/test_samsum_datasets.py
@@ -1,10 +1,12 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
 
+import pytest
 from functools import partial
 from unittest.mock import patch
 
 
+@pytest.mark.skip_missing_tokenizer()
 @patch('llama_recipes.finetuning.train')
 @patch('llama_recipes.finetuning.LlamaTokenizer')
 @patch('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained')
@@ -17,7 +19,7 @@ def test_samsum_dataset(step_lr, optimizer, get_model, tokenizer, train, mocker,
 
     BATCH_SIZE = 8
     kwargs = {
-        "model_name": "decapoda-research/llama-7b-hf",
+        "model_name": "meta-llama/Llama-2-7b-hf",
         "batch_size_training": BATCH_SIZE,
         "val_batch_size": 1,
         "use_peft": False,
@@ -46,7 +48,7 @@ def test_samsum_dataset(step_lr, optimizer, get_model, tokenizer, train, mocker,
     assert "attention_mask" in batch.keys()
 
     assert batch["labels"][0][268] == -100
-    assert batch["labels"][0][269] == 22291
+    assert batch["labels"][0][269] == 319
 
     assert batch["input_ids"][0][0] == 1
     assert batch["labels"][0][-1] == 2

diff --git a/tests/test_batching.py b/tests/test_batching.py
@@ -5,6 +5,7 @@
 from unittest.mock import patch
 
 
+@pytest.mark.skip_missing_tokenizer()
 @patch('llama_recipes.finetuning.train')
 @patch('llama_recipes.finetuning.LlamaTokenizer')
 @patch('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained')
@@ -16,7 +17,7 @@ def test_packing(step_lr, optimizer, get_model, tokenizer, train, mocker, setup_
     setup_tokenizer(tokenizer)
 
     kwargs = {
-        "model_name": "decapoda-research/llama-7b-hf",
+        "model_name": "meta-llama/Llama-2-7b-hf",
         "batch_size_training": 8,
         "val_batch_size": 1,
         "use_peft": False,
@@ -46,6 +47,7 @@ def test_packing(step_lr, optimizer, get_model, tokenizer, train, mocker, setup_
     assert batch["attention_mask"][0].size(0) == 4096
 
 
+@pytest.mark.skip_missing_tokenizer()
 @patch('llama_recipes.finetuning.train')
 @patch('llama_recipes.finetuning.LlamaTokenizer')
 @patch('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained')
@@ -69,7 +71,7 @@ def test_distributed_packing(dist, is_initialized, fsdp, setup, step_lr, optimiz
     os.environ['MASTER_PORT'] = '12345'
 
     kwargs = {
-        "model_name": "decapoda-research/llama-7b-hf",
+        "model_name": "meta-llama/Llama-2-7b-hf",
         "batch_size_training": 8,
         "val_batch_size": 1,
         "use_peft": False,

diff --git a/tests/test_train_utils.py b/tests/test_train_utils.py
@@ -12,7 +12,7 @@
 @patch("llama_recipes.utils.train_utils.torch.cuda.amp.GradScaler")
 @patch("llama_recipes.utils.train_utils.torch.cuda.amp.autocast")
 def test_gradient_accumulation(autocast, scaler, nullcontext, mem_trace, mocker):
-    
+
     model = mocker.MagicMock(name="model")
     model().loss.__truediv__().detach.return_value = torch.tensor(1)
     mock_tensor = mocker.MagicMock(name="tensor")
@@ -27,7 +27,8 @@ def test_gradient_accumulation(autocast, scaler, nullcontext, mem_trace, mocker)
     train_config.enable_fsdp = False
     train_config.use_fp16 = False
     train_config.run_validation = False
-
+    train_config.gradient_clipping = False
+
     train(
         model,
         train_dataloader,
@@ -38,15 +39,15 @@ def test_gradient_accumulation(autocast, scaler, nullcontext, mem_trace, mocker)
         gradient_accumulation_steps,
         train_config,
     )
-    
+
     assert optimizer.zero_grad.call_count == 5
     optimizer.zero_grad.reset_mock()
-    
+
     assert nullcontext.call_count == 5
     nullcontext.reset_mock()
-    
+
     assert autocast.call_count == 0
-    
+
     gradient_accumulation_steps = 2
     train_config.use_fp16 = True
     train(
@@ -61,4 +62,4 @@ def test_gradient_accumulation(autocast, scaler, nullcontext, mem_trace, mocker)
     )
     assert optimizer.zero_grad.call_count == 3
     assert nullcontext.call_count == 0
-    assert autocast.call_count == 5
+    assert autocast.call_count == 5