Upgrade Python to 3.9 and add Embedding Regression Tests (#1006)

Upgraded to base image 44 All tests now use Python 3.9 Embedding regression tests added for all encoding models except snowflake and Large open clip
marqo-ai · Nov 26, 2024 · 606b13c · 606b13c
1 parent 9da8619
commit 606b13c
Show file tree

Hide file tree

Showing 24 changed files with 168 additions and 55 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -139,5 +139,6 @@ local_only/
 tests/cache/
 
 cache/
+src/marqo/cache/
 
 __pycache__/
diff --git a/.github/workflows/arm64_docker_marqo.yml b/.github/workflows/arm64_docker_marqo.yml
@@ -84,7 +84,7 @@ jobs:
         with:
           fetch-depth: 0
 
-      - name: Set up Python 3.9   # TODO: Check if 3.9 is okay instead of 3.8. So far, so good
+      - name: Set up Python 3.9
         run:  |
           apt-get -y update
           apt-get -y install python3.9

diff --git a/.github/workflows/cpu_docker_marqo.yml b/.github/workflows/cpu_docker_marqo.yml
@@ -89,10 +89,10 @@ jobs:
         with:
           fetch-depth: 0
 
-      - name: Set up Python 3.8
+      - name: Set up Python 3.9
         uses: actions/setup-python@v3
         with:
-          python-version: "3.8"
+          python-version: "3.9"
           cache: "pip"
 
       - name: Install Dependencies

diff --git a/.github/workflows/cpu_local_marqo.yml b/.github/workflows/cpu_local_marqo.yml
@@ -95,10 +95,10 @@ jobs:
         with:
           fetch-depth: 0
 
-      - name: Set up Python 3.8
+      - name: Set up Python 3.9
         uses: actions/setup-python@v3
         with:
-          python-version: "3.8"
+          python-version: "3.9"
           cache: "pip"
 
       - name: Install Dependencies

diff --git a/.github/workflows/cuda_docker_marqo.yml b/.github/workflows/cuda_docker_marqo.yml
@@ -87,10 +87,10 @@ jobs:
         with:
           fetch-depth: 0
 
-      - name: Set up Python 3.8
+      - name: Set up Python 3.9
         uses: actions/setup-python@v3
         with:
-          python-version: "3.8"
+          python-version: "3.9"
           cache: "pip"
 
       - name: Install Dependencies

diff --git a/.github/workflows/largemodel_unit_test_CI.yml b/.github/workflows/largemodel_unit_test_CI.yml
@@ -70,10 +70,10 @@ jobs:
           fetch-depth: 0
           path: marqo
 
-      - name: Set up Python 3.8
+      - name: Set up Python 3.9
         uses: actions/setup-python@v3
         with:
-          python-version: "3.8"
+          python-version: "3.9"
           cache: "pip"
 
       - name: Checkout marqo-base for requirements

diff --git a/.github/workflows/locust_perf_test.yml b/.github/workflows/locust_perf_test.yml
@@ -112,10 +112,10 @@ jobs:
         with:
           ref: ${{ github.event.inputs.marqo_ref }}
 
-      - name: Set up Python 3.8
+      - name: Set up Python 3.9
         uses: actions/setup-python@v3
         with:
-          python-version: "3.8"
+          python-version: "3.9"
 
       - name: Set up Docker Buildx
         if: github.event.inputs.marqo_host == 'http://localhost:8882' && github.event.inputs.image_to_test == 'marqo_docker_0'

diff --git a/.github/workflows/test_documentation.yml b/.github/workflows/test_documentation.yml
@@ -27,10 +27,10 @@ jobs:
           path: marqo-base
           fetch-depth: 0
 
-      - name: Set up Python 3.8
+      - name: Set up Python 3.9
         uses: actions/setup-python@v3
         with:
-          python-version: "3.8"
+          python-version: "3.9"
           cache: "pip"
 
       - name: Install dependencies

diff --git a/.github/workflows/unit_test_200gb_CI.yml b/.github/workflows/unit_test_200gb_CI.yml
@@ -66,10 +66,10 @@ jobs:
           fetch-depth: 0
           path: marqo
 
-      - name: Set up Python 3.8
+      - name: Set up Python 3.9
         uses: actions/setup-python@v3
         with:
-          python-version: "3.8"
+          python-version: "3.9"
           cache: "pip"
 
       - name: Checkout marqo-base for requirements

diff --git a/Dockerfile b/Dockerfile
@@ -6,7 +6,7 @@ COPY vespa .
 RUN mvn clean package
 
 # Stage 2: Base image for Python setup
-FROM marqoai/marqo-base:36 as base_image
+FROM marqoai/marqo-base:44 as base_image
 
 # Allow mounting volume containing data and configs for vespa
 VOLUME /opt/vespa/var

diff --git a/scripts/vespa_local/vespa_local.py b/scripts/vespa_local/vespa_local.py
@@ -2,7 +2,7 @@
 
 import os
 
-VESPA_VERSION=os.getenv('VESPA_VERSION', '8.396.18')  # default version baked into marqo-base:30
+VESPA_VERSION=os.getenv('VESPA_VERSION', '8.431.32')  # default version baked into marqo-base:44
 
 
 def start(args):

diff --git a/tests/s2_inference/embeddings_reference/embeddings_all_models_python_3_8.json b/tests/s2_inference/embeddings_reference/embeddings_all_models_python_3_8.json
diff --git a/tests/s2_inference/embeddings_reference/embeddings_large_e5_python_3_8.json b/tests/s2_inference/embeddings_reference/embeddings_large_e5_python_3_8.json
diff --git a/tests/s2_inference/embeddings_reference/embeddings_large_multilingual_e5_python_3_8.json b/tests/s2_inference/embeddings_reference/embeddings_large_multilingual_e5_python_3_8.json
diff --git a/tests/s2_inference/embeddings_reference/embeddings_large_open_clip_python_3_8.json b/tests/s2_inference/embeddings_reference/embeddings_large_open_clip_python_3_8.json
diff --git a/tests/s2_inference/embeddings_reference/embeddings_open_clip_python_3_8.json b/tests/s2_inference/embeddings_reference/embeddings_open_clip_python_3_8.json
diff --git a/tests/s2_inference/embeddings_reference/embeddings_stella_python_3_8.json b/tests/s2_inference/embeddings_reference/embeddings_stella_python_3_8.json
diff --git a/tests/s2_inference/embeddings_reference/info.txt b/tests/s2_inference/embeddings_reference/info.txt
@@ -0,0 +1,7 @@
+16/10/24 - All embeddings were generated with:
+- Marqo mainline head: 055237ae6c4a8121b4026650582f3a23bd416564 (2.12.2 release notes)
+- Python 3.8.20
+- open_clip_torch==2.24.0
+- torch==1.12.1
+- Ubuntu 22.04.4 LTS
+- g4dn.xlarge EC2 instance
diff --git a/tests/s2_inference/test_encoding.py b/tests/s2_inference/test_encoding.py
@@ -1,12 +1,15 @@
 import unittest
 import torch
+import json
+import numpy as np
 from unittest.mock import MagicMock, patch
 from marqo.s2_inference.types import FloatTensor
 from marqo.s2_inference.s2_inference import clear_loaded_models, get_model_properties_from_registry
 from marqo.s2_inference.model_registry import load_model_properties, _get_open_clip_properties
 from marqo.s2_inference.s2_inference import _convert_tensor_to_numpy
 import numpy as np
 import functools
+import os
 
 from marqo.s2_inference.s2_inference import (
     _check_output_type, vectorise,
@@ -17,6 +20,13 @@
 
 _load_model = functools.partial(og_load_model, calling_func = "unit_test")
 
+
+def get_absolute_file_path(filename: str) -> str:
+    currentdir = os.path.dirname(os.path.abspath(__file__))
+    abspath = os.path.join(currentdir, filename)
+    return abspath
+
+
 class TestEncoding(unittest.TestCase):
 
     def setUp(self) -> None:
@@ -26,8 +36,12 @@ def tearDown(self) -> None:
         clear_loaded_models()
 
     def test_vectorize(self):
-        names = ["fp16/ViT-B/32", "onnx16/open_clip/ViT-B-32/laion400m_e32",
-                 'onnx32/open_clip/ViT-B-32-quickgelu/laion400m_e32',
+        """
+        Ensure that vectorised output from vectorise function matches both the model.encode output and
+        hardcoded embeddings from Python 3.8.20
+        """
+
+        names = ["fp16/ViT-B/32", "onnx16/open_clip/ViT-B-32/laion400m_e32", 'onnx32/open_clip/ViT-B-32-quickgelu/laion400m_e32',
                  "all-MiniLM-L6-v1", "all_datasets_v4_MiniLM-L6", "hf/all-MiniLM-L6-v1", "hf/all_datasets_v4_MiniLM-L6",
                  "hf/bge-small-en-v1.5", "onnx/all-MiniLM-L6-v1", "onnx/all_datasets_v4_MiniLM-L6"]
 
@@ -36,28 +50,52 @@ def test_vectorize(self):
 
         names_bge = ["hf/bge-small-en-v1.5", "hf/bge-base-en-v1.5"]
 
-        names_snowflake = ["hf/snowflake-arctic-embed-m", "hf/snowflake-arctic-embed-m-v1.5"]
+        # TODO: Re-add snowflake models when HF pooling issue is resolved
+        # names_snowflake = ["hf/snowflake-arctic-embed-m", "hf/snowflake-arctic-embed-m-v1.5"]
+        # names = names + names_e5 + names_bge + names_snowflake
 
-        names = names + names_e5 + names_bge + names_snowflake
+        names = names + names_e5 + names_bge
 
         sentences = ['hello', 'this is a test sentence. so is this.', ['hello', 'this is a test sentence. so is this.']]
         device = 'cpu'
         eps = 1e-9
+        embeddings_file_name = get_absolute_file_path("embeddings_reference/embeddings_all_models_python_3_8.json")
+
+        # Load in hardcoded embeddings json file
+        with open(embeddings_file_name, "r") as f:
+            embeddings_python_3_8 = json.load(f)
 
         for name in names:
-            model_properties = get_model_properties_from_registry(name)
-            model = _load_model(model_properties['name'], model_properties=model_properties, device=device)
+            with self.subTest(name=name):
+                # Add hardcoded embeddings into the variable.
+                model_properties = get_model_properties_from_registry(name)
+                model = _load_model(model_properties['name'], model_properties=model_properties, device=device)
 
-            for sentence in sentences:
-                output_v = vectorise(name, sentence, model_properties, device, normalize_embeddings=True)
+                for sentence in sentences:
+                    with self.subTest(sentence=sentence):
+                        output_v = vectorise(name, sentence, model_properties, device, normalize_embeddings=True)
+                        assert _check_output_type(output_v)
 
-                assert _check_output_type(output_v)
+                        output_m = model.encode(sentence, normalize=True)
 
-                output_m = model.encode(sentence, normalize=True)
+                        # Embeddings must match hardcoded python 3.8.20 embeddings
+                        if isinstance(sentence, str):
+                            with self.subTest("Hardcoded Python 3.8 Embeddings Comparison"):
+                                try:
+                                    self.assertEqual(np.allclose(output_m, embeddings_python_3_8[name][sentence],
+                                                                 atol=1e-6),
+                                                 True)
+                                except KeyError:
+                                    raise KeyError(f"Hardcoded Python 3.8 embeddings not found for "
+                                                   f"model: {name}, sentence: {sentence} in JSON file: "
+                                                   f"{embeddings_file_name}")
 
-                assert abs(torch.FloatTensor(output_m) - torch.FloatTensor(output_v)).sum() < eps
+                        with self.subTest("Model encode vs vectorize"):
+                            self.assertEqual(np.allclose(output_m, output_v, atol=eps), True,
+                                             f"Hardcoded embeddings do not match for {name}:{sentence}")
+
+                clear_loaded_models()
 
-            clear_loaded_models()
 
     def test_vectorize_normalise(self):
         open_clip_names = ["open_clip/ViT-B-32/laion2b_s34b_b79k"]
@@ -120,6 +158,7 @@ def test_cpu_encode_type(self):
 
             clear_loaded_models()
 
+
     def test_load_clip_text_model(self):
         names = ["fp16/ViT-B/32", "onnx16/open_clip/ViT-B-32/laion400m_e32", 'onnx32/open_clip/ViT-B-32-quickgelu/laion400m_e32',
                   'RN50', "ViT-B/16"]
@@ -313,6 +352,11 @@ def test_open_clip_vectorize(self):
         sentences = ['hello', 'this is a test sentence. so is this.', ['hello', 'this is a test sentence. so is this.']]
         device = 'cpu'
         eps = 1e-9
+        embeddings_reference_file = get_absolute_file_path("embeddings_reference/embeddings_open_clip_python_3_8.json")
+
+        # Load in hardcoded embeddings json file
+        with open(embeddings_reference_file, "r") as f:
+            embeddings_python_3_8 = json.load(f)
 
         for name in names:
             model_properties = get_model_properties_from_registry(name)
@@ -327,7 +371,21 @@ def test_open_clip_vectorize(self):
 
                     output_m = model.encode(sentence, normalize=normalize_embeddings)
 
-                    assert abs(torch.FloatTensor(output_m) - torch.FloatTensor(output_v)).sum() < eps
+                # Embeddings must match hardcoded python 3.8.20 embeddings
+                if isinstance(sentence, str):
+                    with self.subTest("Hardcoded Python 3.8 Embeddings Comparison"):
+                        try:
+                            self.assertEqual(np.allclose(output_m, embeddings_python_3_8[name][sentence], atol=1e-5),
+                                            True, f"For model {name} and sentence {sentence}: "
+                                                    f"Calculated embedding is {output_m} but "
+                                                  f"hardcoded embedding is {embeddings_python_3_8[name][sentence]}")
+                        except KeyError:
+                            raise KeyError(f"Hardcoded Python 3.8 embeddings not found for "
+                                           f"model: {name}, sentence: {sentence} in JSON file: "
+                                           f"{embeddings_reference_file}")
+
+                with self.subTest("Model encode vs vectorize"):
+                    self.assertEqual(np.allclose(output_m, output_v, atol=eps), True)
 
             clear_loaded_models()