Fix most warning messages in the unit test and large model unit test …

…pipelines (#1069)
marqo-ai · Dec 24, 2024 · 588ea14 · 588ea14
1 parent 5c45824
commit 588ea14
Show file tree

Hide file tree

Showing 29 changed files with 209 additions and 198 deletions.
diff --git a/.github/workflows/largemodel_unit_test_CI.yml b/.github/workflows/largemodel_unit_test_CI.yml
@@ -179,15 +179,16 @@ jobs:
         run: |
           if [[ "${GITHUB_EVENT_NAME}" == "pull_request" ]]; then
             export BASE_BRANCH="${{ github.event.pull_request.base.ref }}"
+            cd marqo
+            echo "Running diff-cover against branch $BASE_BRANCH"
+            git fetch origin $BASE_BRANCH:$BASE_BRANCH
+            diff-cover cov.xml --html-report diff_cov.html --markdown-report diff_cov.md \
+              --compare-branch $BASE_BRANCH
           else
-            export BASE_BRANCH=mainline
+            echo "Skipping diff-cover on Push events"
+            echo "Skipped diff-cover on Push events" > marqo/diff_cov.md
+            touch marqo/diff_cov.html
           fi
-          
-          cd marqo
-          echo "Running diff-cover against branch $BASE_BRANCH"
-          git fetch origin $BASE_BRANCH:$BASE_BRANCH
-          diff-cover cov.xml --html-report diff_cov.html --markdown-report diff_cov.md \
-            --compare-branch $BASE_BRANCH
 
       - name: Upload Test Report
         uses: actions/upload-artifact@v4

diff --git a/.github/workflows/unit_test_200gb_CI.yml b/.github/workflows/unit_test_200gb_CI.yml
@@ -180,15 +180,16 @@ jobs:
         run: |
           if [[ "${GITHUB_EVENT_NAME}" == "pull_request" ]]; then
             export BASE_BRANCH="${{ github.event.pull_request.base.ref }}"
+            cd marqo
+            echo "Running diff-cover against branch $BASE_BRANCH"
+            git fetch origin $BASE_BRANCH:$BASE_BRANCH
+            diff-cover cov.xml --html-report diff_cov.html --markdown-report diff_cov.md \
+              --compare-branch $BASE_BRANCH
           else
-            export BASE_BRANCH=mainline
+            echo "Skipping diff-cover on Push events"
+            echo "Skipped diff-cover on Push events" > marqo/diff_cov.md
+            touch marqo/diff_cov.html
           fi
-          
-          cd marqo
-          echo "Running diff-cover against branch $BASE_BRANCH"
-          git fetch origin $BASE_BRANCH:$BASE_BRANCH
-          diff-cover cov.xml --html-report diff_cov.html --markdown-report diff_cov.md \
-            --compare-branch $BASE_BRANCH
 
       - name: Upload Test Report
         uses: actions/upload-artifact@v4

diff --git a/requirements.dev.txt b/requirements.dev.txt
@@ -1,6 +1,7 @@
 # test requirements
 pyvespa==0.37.1
-pytest==7.4.3
-pytest-cov==5.0.0
+pytest==8.3.4
+pytest-cov==6.0.0
 diff-cover==9.2.0
-pytest-md-report==0.6.2
+pytest-md-report==0.6.2
+pytest-asyncio==0.23.8
diff --git a/src/marqo/connections.py b/src/marqo/connections.py
@@ -57,7 +57,7 @@ def init_from_app(self, host, port):
             logger.info(f"Took {((t1 - t0) * 1000):.3f}ms to connect to redis and load scripts.")
 
         except Exception as e:
-            logger.warn(generate_redis_warning(skipped_operation="loading throttling scripts", exc=e))
+            logger.warning(generate_redis_warning(skipped_operation="loading throttling scripts", exc=e))
             self.faulty = True
 
     def connect(self) -> redis.Redis:

diff --git a/src/marqo/core/index_management/vespa_application_package.py b/src/marqo/core/index_management/vespa_application_package.py
@@ -58,7 +58,7 @@ def _ensure_only_one(self, xml_path: str) -> ET.Element:
 
     def add_schema(self, name: str) -> None:
         if self._documents.find(f'document[@type="{name}"]') is not None:
-            logger.warn(f'Schema {name} already exists in services.xml, nothing to add')
+            logger.warning(f'Schema {name} already exists in services.xml, nothing to add')
         else:
             new_document = ET.SubElement(self._documents, 'document')
             new_document.set('type', name)
@@ -67,7 +67,7 @@ def add_schema(self, name: str) -> None:
     def remove_schema(self, name: str) -> None:
         docs = self._documents.findall(f'document[@type="{name}"]')
         if not docs:
-            logger.warn(f'Schema {name} does not exist in services.xml, nothing to remove')
+            logger.warning(f'Schema {name} does not exist in services.xml, nothing to remove')
         else:
             for doc in docs:
                 self._documents.remove(doc)
@@ -210,7 +210,7 @@ def save_index_setting(self, index_setting: MarqoIndex) -> None:
 
     def delete_index_setting(self, index_setting_name: str) -> None:
         if index_setting_name not in self._index_settings:
-            logger.warn(f"Index setting {index_setting_name} does not exist, nothing to delete")
+            logger.warning(f"Index setting {index_setting_name} does not exist, nothing to delete")
         else:
             self._move_to_history(index_setting_name)
             del self._index_settings[index_setting_name]
@@ -452,7 +452,7 @@ def read_binary_file(self, *paths: str) -> Optional[bytes]:
     def save_file(self, content: Union[str, bytes], *paths: str, backup: Optional[VespaAppBackup] = None) -> None:
         path = self._full_path(*paths)
         if os.path.exists(path):
-            logger.warn(f"{path} already exists in application package, overwriting")
+            logger.warning(f"{path} already exists in application package, overwriting")
             if backup is not None:
                 backup.backup_file(self.read_binary_file(*paths), *paths)
         else:  # add file
@@ -467,7 +467,7 @@ def save_file(self, content: Union[str, bytes], *paths: str, backup: Optional[Ve
     def remove_file(self, *paths: str, backup: Optional[VespaAppBackup] = None) -> None:
         path = self._full_path(*paths)
         if not os.path.exists(path):
-            logger.warn(f"{path} does not exist in application package, nothing to delete")
+            logger.warning(f"{path} does not exist in application package, nothing to delete")
         else:
             if backup is not None:
                 backup.backup_file(self.read_binary_file(*paths), *paths)
@@ -530,7 +530,7 @@ def deploy_application(self) -> None:
 
 
 class VespaApplicationPackage:
-    """
+    r"""
     Represents a Vespa application package. This class provides useful methods to manage contents in the application
     package. A Vespa application package usually contains the following contents
     app-root

diff --git a/src/marqo/core/monitoring/monitoring.py b/src/marqo/core/monitoring/monitoring.py
@@ -60,9 +60,9 @@ def get_index_stats(self, marqo_index: MarqoIndex) -> MarqoIndexStats:
 
         # Occasionally Vespa returns empty metrics, often for the first call after a restart
         if memory_utilization is None:
-            logger.warn(f'Vespa did not return a value for memory utilization metrics')
+            logger.warning(f'Vespa did not return a value for memory utilization metrics')
         if disk_utilization is None:
-            logger.warn(f'Vespa did not return a value for disk utilization metrics')
+            logger.warning(f'Vespa did not return a value for disk utilization metrics')
 
         return MarqoIndexStats(
             number_of_documents=doc_count_query_result.total_count,
@@ -140,7 +140,7 @@ def _get_vespa_health(self, hostname_filter: Optional[str]) -> VespaHealthStatus
         disk_utilization = metrics.clusterController_resourceUsage_maxDiskUtilization_max
 
         if nodes_above_limit is None:
-            logger.warn(f'Vespa did not return a value for nodes_above_limit metric')
+            logger.warning(f'Vespa did not return a value for nodes_above_limit metric')
             feed_status = HealthStatus.Yellow
         elif nodes_above_limit > 0:
             feed_status = HealthStatus.Yellow

diff --git a/src/marqo/tensor_search/index_meta_cache.py b/src/marqo/tensor_search/index_meta_cache.py
@@ -93,7 +93,7 @@ def _check_refresh_thread(index_management: IndexManagement):
         if refresh_thread is None or not refresh_thread.is_alive():
             if refresh_thread is not None:
                 # If not None, then it has died
-                logger.warn('Dead index cache refresh thread detected. Will start a new one')
+                logger.warning('Dead index cache refresh thread detected. Will start a new one')
 
             logger.info('Starting index cache refresh thread')
 
@@ -110,7 +110,7 @@ def refresh():
                     except VespaError as e:
                         if isinstance(e, VespaStatusError) and e.status_code == 400:
                             # This can happen when settings schema doesn't exist
-                            logger.warn(
+                            logger.warning(
                                 'Failed to populate index cache due to 400 error from vector store. This can happen '
                                 f'if Marqo settings schema does not exist. Error: {e}'
                             )

diff --git a/src/marqo/tensor_search/telemetry.py b/src/marqo/tensor_search/telemetry.py
@@ -26,7 +26,7 @@ def __init__(self):
     def start(self) -> None:
         """Start a new timer"""
         if self.start_time is not None:
-            logger.warn(f"'.start()' called on already running timer.")
+            logger.warning(f"'.start()' called on already running timer.")
         else:
             self.start_time = time.perf_counter()
 
@@ -104,7 +104,7 @@ def stop(self, k: str) -> float:
             self.add_time(k, elapsed_time)
             return elapsed_time
         except TimerError:
-            logger.warn(f"timer {k} stopped incorrectly. Time not recorded.")
+            logger.warning(f"timer {k} stopped incorrectly. Time not recorded.")
 
     def increment_counter(self, k: str, v: int = 1):
         self.counter[k] += v

diff --git a/src/marqo/tensor_search/throttling/redis_throttle.py b/src/marqo/tensor_search/throttling/redis_throttle.py
@@ -52,7 +52,7 @@ def remove_thread_from_set(key, name):
                 try:
                     redis.zrem(key, name)
                 except Exception as e:
-                    logger.warn(generate_redis_warning(skipped_operation="throttling thread count decrement", exc=e))
+                    logger.warning(generate_redis_warning(skipped_operation="throttling thread count decrement", exc=e))
                     redis_driver.set_faulty(True)
 
             # Check current thread count / increment using LUA script
@@ -66,7 +66,7 @@ def remove_thread_from_set(key, name):
                     utils.read_env_vars_and_defaults(EnvVars.MARQO_THREAD_EXPIRY_TIME)  # expire_time
                 )
             except Exception as e:
-                logger.warn(generate_redis_warning(skipped_operation="throttling thread count check", exc=e))
+                logger.warning(generate_redis_warning(skipped_operation="throttling thread count check", exc=e))
                 redis_driver.set_faulty(True)
                 return function(*args, **kwargs)
 

diff --git a/src/marqo/tensor_search/utils.py b/src/marqo/tensor_search/utils.py
@@ -197,7 +197,7 @@ def read_env_vars_and_defaults_ints(var: str) -> Optional[int]:
 
 
 def parse_lexical_query(text: str) -> Tuple[List[str], List[str]]:
-    """
+    r"""
     Find required terms enclosed within double quotes.
 
     All other terms go into blob, split by whitespace. blob starts as a string then splits into

diff --git a/src/marqo/vespa/vespa_client.py b/src/marqo/vespa/vespa_client.py
@@ -1031,7 +1031,7 @@ def _query_raise_for_status(self, resp: httpx.Response) -> None:
                     ):
                         # The soft doom error is a bug in certain Vespa versions. Newer versions should always return
                         # a code 12 for timeouts
-                        logger.warn('Detected soft doomed query')
+                        logger.warning('Detected soft doomed query')
                         raise VespaTimeoutError(message=resp.text, cause=e) from e
 
                 raise e

diff --git a/tests/core/document/test_partial_document_update.py b/tests/core/document/test_partial_document_update.py
@@ -670,7 +670,7 @@ def randomly_update_document(number_of_updates: int = 50) -> None:
                                      "float_field_filter", "bool_field_filter"}
 
             for _ in range(number_of_updates):
-                picked_fields = random.sample(updating_fields_pools, 3)
+                picked_fields = random.sample(list(updating_fields_pools), 3)
 
                 updated_doc = {
                     "_id": "1"

diff --git a/tests/core/index_management/test_index_management.py b/tests/core/index_management/test_index_management.py
@@ -33,7 +33,6 @@
 from tests.marqo_test import MarqoTestCase
 
 
-@pytest.mark.slowtest
 class TestIndexManagement(MarqoTestCase):
 
     def setUp(self):

diff --git a/tests/core/index_management/test_services_xml.py b/tests/core/index_management/test_services_xml.py
@@ -9,7 +9,7 @@
 from marqo.core.index_management.vespa_application_package import ServicesXml
 
 
-@pytest.mark.unnittest
+@pytest.mark.unittest
 class TestIndexSettingStore(unittest.TestCase):
 
     _TEMPLATE = Template(textwrap.dedent("""<?xml version="1.0" encoding="utf-8" ?>

diff --git a/tests/core/inference/test_tensor_field_chunkers.py b/tests/core/inference/test_tensor_field_chunkers.py
@@ -39,8 +39,8 @@ def test_image_chunker_should_chunk_image(self):
                                      image_preprocessing=ImagePreProcessing(patch_method=PatchMethod.DinoV1))
 
         chunks, content_chunks = image_chunker.chunk(image_url)
-        self.assertEquals([[0.0, 0.0, 512.0, 512.0], [0.0, 102.4, 409.6, 375.46666666666664]], chunks)
-        self.assertEquals(2, len(content_chunks))
+        self.assertEqual([[0.0, 0.0, 512.0, 512.0], [0.0, 102.4, 409.6, 375.46666666666664]], chunks)
+        self.assertEqual(2, len(content_chunks))
         self.assertIsInstance(content_chunks[0], Image)
         self.assertIsInstance(content_chunks[1], Image)
 
@@ -51,8 +51,8 @@ def test_image_chunker_should_return_single_chunk_when_patch_method_is_none(self
                                      image_preprocessing=ImagePreProcessing(patch_method=None))
 
         chunks, content_chunks = image_chunker.chunk(image_url)
-        self.assertEquals([TestImageUrls.HIPPO_REALISTIC.value], chunks)
-        self.assertEquals(1, len(content_chunks))
+        self.assertEqual([TestImageUrls.HIPPO_REALISTIC.value], chunks)
+        self.assertEqual(1, len(content_chunks))
         self.assertIsInstance(content_chunks[0], Image)
 
     def test_image_chunker_should_return_single_chunk_when_single_chunk_flag_is_true(self):
@@ -62,8 +62,8 @@ def test_image_chunker_should_return_single_chunk_when_single_chunk_flag_is_true
                                      image_preprocessing=ImagePreProcessing(patch_method=PatchMethod.DinoV1))
 
         chunks, content_chunks = image_chunker.chunk(image_url, single_chunk=True)
-        self.assertEquals([TestImageUrls.HIPPO_REALISTIC.value], chunks)
-        self.assertEquals(1, len(content_chunks))
+        self.assertEqual([TestImageUrls.HIPPO_REALISTIC.value], chunks)
+        self.assertEqual(1, len(content_chunks))
         self.assertIsInstance(content_chunks[0], Image)
 
     @patch('marqo.core.inference.tensor_fields_container.image_processor.chunk_image')
@@ -79,7 +79,7 @@ def test_image_chunker_should_raise_error_when_chunking_fails(self, mock_chunk_i
         with self.assertRaises(AddDocumentsError) as err_context:
             image_chunker.chunk(image_url)
 
-        self.assertEquals('BOOM!', err_context.exception.error_message)
+        self.assertEqual('BOOM!', err_context.exception.error_message)
 
     def test_audio_video_chunker_should_chunk_audio_and_video(self):
         media_repo = {'url': [
@@ -89,7 +89,7 @@ def test_audio_video_chunker_should_chunk_audio_and_video(self):
         audio_video_chunker = AudioVideoChunker(media_repo=media_repo)
 
         chunks, content_chunks = audio_video_chunker.chunk('url')
-        self.assertEquals(['[5, 15]', '[20, 25]'], chunks)
+        self.assertEqual(['[5, 15]', '[20, 25]'], chunks)
         self.assertTrue(torch.equal(tensor([1.0, 2.0]), content_chunks[0]))
         self.assertTrue(torch.equal(tensor([4.0, 6.0]), content_chunks[1]))
 
@@ -99,4 +99,4 @@ def test_audio_video_chunker_should_raise_error_when_single_chunk_flag_is_true(s
         with self.assertRaises(RuntimeError) as err_context:
             audio_video_chunker.chunk('url', single_chunk=True)
 
-        self.assertEquals('Video and Audio chunker does not support single_chunk', str(err_context.exception))
+        self.assertEqual('Video and Audio chunker does not support single_chunk', str(err_context.exception))
diff --git a/tests/core/inference/test_tensor_field_content.py b/tests/core/inference/test_tensor_field_content.py
@@ -57,10 +57,10 @@ def test_populate_chunks_and_embeddings(self):
                 )
 
                 tensor_field_content.populate_chunks_and_embeddings(['hello world'], [[1.0, 1.2]])
-                self.assertEquals(['hello world'], tensor_field_content.chunks)
-                self.assertEquals([], tensor_field_content.content_chunks)
-                self.assertEquals([[1.0, 1.2]], tensor_field_content.embeddings)
-                self.assertEquals(resolved_after_population, tensor_field_content.is_resolved)
+                self.assertEqual(['hello world'], tensor_field_content.chunks)
+                self.assertEqual([], tensor_field_content.content_chunks)
+                self.assertEqual([[1.0, 1.2]], tensor_field_content.embeddings)
+                self.assertEqual(resolved_after_population, tensor_field_content.is_resolved)
 
     def test_chunk_resolved_field_will_not_generate_more_content_chunks(self):
         tensor_field_content = TensorFieldContent(
-Original file line number
+Diff line change
@@ Expand Up @@
     def parse_lexical_query(text: str) -> Tuple[List[str], List[str]]:
-        """
+        r"""
         Find required terms enclosed within double quotes.
         All other terms go into blob, split by whitespace. blob starts as a string then splits into
@@ Expand Down @@