Skip to content

Commit

Permalink
Fix inference time regression by removing cuda empty cache(#1005)
Browse files Browse the repository at this point in the history
  • Loading branch information
wanliAlex authored Oct 15, 2024
1 parent a8bafb2 commit 117208c
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 6 deletions.
5 changes: 0 additions & 5 deletions src/marqo/s2_inference/s2_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,6 @@ def _encode_without_cache(model_cache_key: str, content: Union[str, List[str], L
raise RuntimeError(f"Vectorise created an empty list of batches! Content: {content}")
else:
vectorised = np.concatenate(vector_batches, axis=0)

# Clear CUDA cache
if torch.cuda.is_available():
torch.cuda.empty_cache()

except (UnidentifiedImageError, OSError) as e:
if isinstance(e, UnidentifiedImageError) or "image file is truncated" in str(e):
raise VectoriseError(f"Could not process given image: {content}. Original Error message: {e}") from e
Expand Down
2 changes: 1 addition & 1 deletion src/marqo/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "2.12.3"
__version__ = "2.12.4"

def get_version() -> str:
return f"{__version__}"

0 comments on commit 117208c

Please sign in to comment.