Skip to content

Commit

Permalink
adding marqtune model names to model_registry.py (#1063)
Browse files Browse the repository at this point in the history
Fixed KeyError in the latest commit
  • Loading branch information
adityabharadwaj198 authored Dec 15, 2024
1 parent fa0d695 commit 0daa36b
Show file tree
Hide file tree
Showing 8 changed files with 45,093 additions and 22 deletions.
153 changes: 151 additions & 2 deletions src/marqo/s2_inference/model_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,26 +144,51 @@ def _get_open_clip_properties() -> Dict:
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'openai'},
'Marqo/ViT-B-32.openai': {'name': 'open_clip/ViT-B-32/openai',
'dimensions': 512,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'openai'},
'open_clip/ViT-B-32/laion400m_e31': {'name': 'open_clip/ViT-B-32/laion400m_e31',
'dimensions': 512,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion400m_e31'},
'Marqo/ViT-B-32.laion400m_e31': {'name': 'open_clip/ViT-B-32/laion400m_e31',
'dimensions': 512,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion400m_e31'},
'open_clip/ViT-B-32/laion400m_e32': {'name': 'open_clip/ViT-B-32/laion400m_e32',
'dimensions': 512,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion400m_e32'},
'Marqo/ViT-B-32.laion400m_e32': {'name': 'open_clip/ViT-B-32/laion400m_e32',
'dimensions': 512,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion400m_e32'},
'open_clip/ViT-B-32/laion2b_e16': {'name': 'open_clip/ViT-B-32/laion2b_e16',
'dimensions': 512,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion2b_e16'},
'Marqo/ViT-B-32.laion2b_e16': {'name': 'open_clip/ViT-B-32/laion2b_e16',
'dimensions': 512,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion2b_e16'},
'open_clip/ViT-B-32/laion2b_s34b_b79k': {'name': 'open_clip/ViT-B-32/laion2b_s34b_b79k',
'dimensions': 512,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion2b_s34b_b79k'},
'Marqo/ViT-B-32.laion2b_s34b_b79k': {'name': 'open_clip/ViT-B-32/laion2b_s34b_b79k',
'dimensions': 512,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion2b_s34b_b79k'},
'open_clip/ViT-B-32-quickgelu/openai': {'name': 'open_clip/ViT-B-32-quickgelu/openai',
'dimensions': 512,
'note': 'open_clip models',
Expand All @@ -184,21 +209,41 @@ def _get_open_clip_properties() -> Dict:
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'openai'},
'Marqo/ViT-B-16.openai': {'name': 'open_clip/ViT-B-16/openai',
'dimensions': 512,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'openai'},
'open_clip/ViT-B-16/laion400m_e31': {'name': 'open_clip/ViT-B-16/laion400m_e31',
'dimensions': 512,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion400m_e31'},
'Marqo/ViT-B-16.laion400m_e31': {'name': 'open_clip/ViT-B-16/laion400m_e31',
'dimensions': 512,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion400m_e31'},
'open_clip/ViT-B-16/laion400m_e32': {'name': 'open_clip/ViT-B-16/laion400m_e32',
'dimensions': 512,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion400m_e32'},
'Marqo/ViT-B-16.laion400m_e32': {'name': 'open_clip/ViT-B-16/laion400m_e32',
'dimensions': 512,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion400m_e32'},
'open_clip/ViT-B-16/laion2b_s34b_b88k': {'name': 'open_clip/ViT-B-16/laion2b_s34b_b88k',
'dimensions': 512,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion2b_s34b_b88k'},
'Marqo/ViT-B-16.laion2b_s34b_b88k': {'name': 'open_clip/ViT-B-16/laion2b_s34b_b88k',
'dimensions': 512,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion2b_s34b_b88k'},
'open_clip/ViT-B-16-plus-240/laion400m_e31': {'name': 'open_clip/ViT-B-16-plus-240/laion400m_e31',
'dimensions': 640,
'note': 'open_clip models',
Expand All @@ -214,21 +259,41 @@ def _get_open_clip_properties() -> Dict:
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'openai'},
'Marqo/ViT-L-14.openai': {'name': 'hf-hub:timm/vit_large_patch14_clip_224.openai',
'dimensions': 768,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'openai'},
'open_clip/ViT-L-14/laion400m_e31': {'name': 'open_clip/ViT-L-14/laion400m_e31',
'dimensions': 768,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion400m_e31'},
'Marqo/ViT-L-14.laion400m_e31': {'name': 'hf-hub:timm/vit_large_patch14_clip_224.laion400m_e31',
'dimensions': 768,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion400m_e31'},
'open_clip/ViT-L-14/laion400m_e32': {'name': 'open_clip/ViT-L-14/laion400m_e32',
'dimensions': 768,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion400m_e32'},
'Marqo/ViT-L-14.laion400m_e32': {'name': 'hf-hub:timm/vit_large_patch14_clip_224.laion400m_e32',
'dimensions': 768,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion400m_e32'},
'open_clip/ViT-L-14/laion2b_s32b_b82k': {'name': 'open_clip/ViT-L-14/laion2b_s32b_b82k',
'dimensions': 768,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion2b_s32b_b82k'},
'Marqo/ViT-L-14.laion2b_s32b_b82k': {'name': 'hf-hub:laion/CLIP-ViT-L-14-laion2B-s32B-b82K',
'dimensions': 768,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion2b_s32b_b82k'},
'open_clip/ViT-L-14-336/openai': {'name': 'open_clip/ViT-L-14-336/openai',
'dimensions': 768,
'note': 'open_clip models',
Expand Down Expand Up @@ -259,18 +324,35 @@ def _get_open_clip_properties() -> Dict:
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion2b_s12b_b32k'},
'Marqo/roberta-ViT-B-32.laion2b_s12b_b32k': {'name': 'hf-hub:laion/CLIP-ViT-B-32-roberta-base-laion2B-s12B-b32k',
'dimensions': 512,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion2b_s12b_b32k'},
'open_clip/xlm-roberta-base-ViT-B-32/laion5b_s13b_b90k': {
'name': 'open_clip/xlm-roberta-base-ViT-B-32/laion5b_s13b_b90k',
'dimensions': 512,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion5b_s13b_b90k'},
'Marqo/xlm-roberta-base-ViT-B-32.laion5b_s13b_b90k': {
'name': 'open_clip/xlm-roberta-base-ViT-B-32/laion5b_s13b_b90k',
'dimensions': 512,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'laion5b_s13b_b90k'},
'open_clip/xlm-roberta-large-ViT-H-14/frozen_laion5b_s13b_b90k': {
'name': 'open_clip/xlm-roberta-large-ViT-H-14/frozen_laion5b_s13b_b90k',
'dimensions': 1024,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'frozen_laion5b_s13b_b90k'},
'Marqo/xlm-roberta-large-ViT-H-14.frozen_laion5b_s13b_b90k': {
'name': 'hf-hub:laion/CLIP-ViT-H-14-frozen-xlm-roberta-large-laion5B-s13B-b90k',
'dimensions': 1024,
'note': 'open_clip models',
'type': 'open_clip',
'pretrained': 'frozen_laion5b_s13b_b90k'},
'open_clip/convnext_base/laion400m_s13b_b51k': {'name': 'open_clip/convnext_base/laion400m_s13b_b51k',
'dimensions': 512,
'note': 'open_clip models',
Expand Down Expand Up @@ -610,6 +692,74 @@ def _get_sbert_properties() -> Dict:
"tokens":128,
"type":"sbert",
"notes": ""},
"intfloat/multilingual-e5-small":
{"name": 'intfloat/multilingual-e5-small',
"dimensions": 384,
"tokens": 512,
"type": "sbert",
"model_size": 0.471,
"text_query_prefix": "query: ",
"text_chunk_prefix": "passage: ",
"notes": ""},
"intfloat/multilingual-e5-base":
{"name": 'intfloat/multilingual-e5-base',
"dimensions": 768,
"tokens": 512,
"type": "sbert",
"model_size": 1.11,
"text_query_prefix": "query: ",
"text_chunk_prefix": "passage: ",
"notes": ""},
"intfloat/multilingual-e5-large":
{"name": 'intfloat/multilingual-e5-large',
"dimensions": 1024,
"tokens": 512,
"type": "sbert",
"model_size": 2.24,
"text_query_prefix": "query: ",
"text_chunk_prefix": "passage: ",
"notes": ""},
"intfloat/e5-small-v2":
{"name": 'intfloat/e5-small-v2',
"dimensions": 384,
"tokens": 512,
"type": "sbert",
"model_size": 0.134,
"text_query_prefix": "query: ",
"text_chunk_prefix": "passage: ",
"notes": ""},
"intfloat/e5-base-v2":
{"name": 'intfloat/e5-base-v2',
"dimensions": 768,
"tokens": 512,
"type": "sbert",
"model_size": 0.438,
"text_query_prefix": "query: ",
"text_chunk_prefix": "passage: ",
"notes": ""},
"intfloat/e5-large-v2":
{"name": 'intfloat/e5-large-v2',
"dimensions": 1024,
"tokens": 512,
"type": "sbert",
"model_size": 1.34,
"text_query_prefix": "query: ",
"text_chunk_prefix": "passage: ",
"notes": ""},
"BAAI/bge-base-en-v1.5":
{"name": 'BAAI/bge-base-en-v1.5',
"dimensions": 768,
"tokens": 512,
"type": "sbert",
"text_query_prefix": "Represent this sentence for searching relevant passages: ",
"notes": ""},
"BAAI/bge-large-en-v1.5":
{"name": 'BAAI/bge-large-en-v1.5',
"dimensions": 1024,
"tokens": 512,
"type": "sbert",
"text_query_prefix": "Represent this sentence for searching relevant passages: ",
"notes": ""},
}
return SBERT_MODEL_PROPERTIES

Expand Down Expand Up @@ -786,8 +936,7 @@ def _get_hf_properties() -> Dict:
"text_query_prefix": "query: ",
"text_chunk_prefix": "passage: ",
"notes": ""},

# New models as of: Marqo 2.7.0
# New models as of: Marqo 2.7.0
"hf/multilingual-e5-large-instruct":
{"name": 'intfloat/multilingual-e5-large-instruct',
"dimensions": 1024,
Expand Down
2 changes: 0 additions & 2 deletions tests/core/inference/test_open_clip_model_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
from marqo.tensor_search.models.external_apis.s3 import S3Auth
from marqo.tensor_search.models.private_models import ModelAuth, ModelLocation

OPEN_CLIP_MODEL_PROPERTIES = _get_open_clip_properties()

@pytest.mark.unittest
class TestOpenCLIPModelLoad(TestCase):
"""A test suite for loading OpenCLIP models.
Expand Down
44,865 changes: 44,864 additions & 1 deletion tests/s2_inference/embeddings_reference/embeddings_all_models_python_3_8.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def test_validate_model_into_device(self):
small_list_of_models = ['open_clip/convnext_base_w_320/laion_aesthetic_s13b_b82k',
"sentence-transformers/all-MiniLM-L6-v2",
"flax-sentence-embeddings/all_datasets_v4_mpnet-base",
'Marqo/ViT-B-16.laion2b_s34b_b88k',
'open_clip/ViT-B-16/laion2b_s34b_b88k']
content = "Try to kill the cpu"

Expand All @@ -56,6 +57,7 @@ def test_check_memory_threshold_for_model(self):
small_list_of_models = ['open_clip/convnext_base_w_320/laion_aesthetic_s13b_b82k',
"sentence-transformers/all-MiniLM-L6-v2",
"flax-sentence-embeddings/all_datasets_v4_mpnet-base",
'Marqo/ViT-B-16.laion2b_s34b_b88k',
'open_clip/ViT-B-16/laion2b_s34b_b88k']

content = "Try to kill the cpu"
Expand Down
Loading

0 comments on commit 0daa36b

Please sign in to comment.