From 309c6eb14ef1010b7bd3a83fcb80012518bc1ee3 Mon Sep 17 00:00:00 2001 From: Isabelle Mohr Date: Tue, 6 Aug 2024 15:10:02 +0200 Subject: [PATCH] Add normalization kwarg for encode --- model_meta.yml | 1 + models.py | 1 + 2 files changed, 2 insertions(+) diff --git a/model_meta.yml b/model_meta.yml index 9f87583..aa09e3e 100644 --- a/model_meta.yml +++ b/model_meta.yml @@ -154,6 +154,7 @@ model_meta: mteb_retrieval: 47.87 mteb_sts: 80.70 mteb_clustering: 41.73 + normalize_embedddings: True mixedbread-ai/mxbai-embed-large-v1: link: https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1 revision: 990580e27d329c7408b3741ecff85876e128e203 diff --git a/models.py b/models.py index 2b34f72..e07cb66 100644 --- a/models.py +++ b/models.py @@ -232,6 +232,7 @@ def retrieve(self, query, corpus, model_name, topk=1): model = self.load_model(model_name) kwargs = {} if self.use_gcp_index else {"convert_to_tensor": True} + kwargs["normalize_embeddings"] = self.model_meta[model_name].get("normalize_embeddings", False) if f"instruction_query_{corpus}" in self.model_meta[model_name]: kwargs["instruction"] = self.model_meta[model_name][f"instruction_query_{corpus}"] logger.info(f"Using instruction: {kwargs['instruction']}")