Fix inference time regression by removing cuda empty cache(#1005)

marqo-ai · Oct 15, 2024 · 117208c · 117208c
1 parent a8bafb2
commit 117208c
Show file tree

Hide file tree

Showing 2 changed files with 1 addition and 6 deletions.
diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py
@@ -139,11 +139,6 @@ def _encode_without_cache(model_cache_key: str, content: Union[str, List[str], L
                 raise RuntimeError(f"Vectorise created an empty list of batches! Content: {content}")
             else:
                 vectorised = np.concatenate(vector_batches, axis=0)
-
-                # Clear CUDA cache
-                if torch.cuda.is_available():
-                    torch.cuda.empty_cache()
-
     except (UnidentifiedImageError, OSError) as e:
         if isinstance(e, UnidentifiedImageError) or "image file is truncated" in str(e):
             raise VectoriseError(f"Could not process given image: {content}. Original Error message: {e}") from e

diff --git a/src/marqo/version.py b/src/marqo/version.py
@@ -1,4 +1,4 @@
-__version__ = "2.12.3"
+__version__ = "2.12.4"
 
 def get_version() -> str:
     return f"{__version__}"