From 3d7a02de0800972575083ce0a852c54552781bac Mon Sep 17 00:00:00 2001 From: Raynor Chavez Date: Tue, 25 Jun 2024 11:51:45 +0800 Subject: [PATCH] Change unstructured vespa score modifier expression (#880) Unstructured Indexes: This expression is a fix for the `0` bug in `multiply_score_by` in score modifiers wherein when a score modifier doesn't exist in a document but is specified in a query, the score becomes 0. --- .../unstructured_vespa_schema.py | 9 ++++----- .../test_schemas/unstructured_vespa_index_schema.sd | 4 ++-- ...uctured_vespa_index_schema_distance_metric_angular.sd | 2 +- ...ured_vespa_index_schema_distance_metric_dotproduct.sd | 2 +- ...tured_vespa_index_schema_distance_metric_euclidean.sd | 2 +- ...ured_vespa_index_schema_distance_metric_geodegrees.sd | 2 +- ...uctured_vespa_index_schema_distance_metric_hamming.sd | 2 +- ...index_schema_distance_metric_prenormalized-angular.sd | 2 +- .../integ_tests/test_dict_score_modifiers.py | 5 +++-- 9 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/marqo/core/unstructured_vespa_index/unstructured_vespa_schema.py b/src/marqo/core/unstructured_vespa_index/unstructured_vespa_schema.py index 25b584a0a..dc5bbacbf 100644 --- a/src/marqo/core/unstructured_vespa_index/unstructured_vespa_schema.py +++ b/src/marqo/core/unstructured_vespa_index/unstructured_vespa_schema.py @@ -69,11 +69,10 @@ def _generate_unstructured_schema(cls, marqo_index: UnstructuredMarqoIndex) -> s dimension = str(marqo_index.model.get_dimension()) _score_modifier_expression = ( - f'if (count(query(marqo__mult_weights)) == 0, 1, ' - f'reduce(query(marqo__mult_weights) ' - f'* attribute(marqo__score_modifiers), prod)) * score ' - f'+ reduce(query(marqo__add_weights) ' - f'* attribute(marqo__score_modifiers), sum)' + f'if (count(query(marqo__mult_weights) * attribute(marqo__score_modifiers)) == 0, ' + f' 1, reduce(query(marqo__mult_weights) * attribute(marqo__score_modifiers), prod)) ' + f'* score ' + f'+ reduce(query(marqo__add_weights) * attribute(marqo__score_modifiers), sum)' ) return textwrap.dedent( diff --git a/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema.sd b/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema.sd index f2b0e635b..e7c1aaa4b 100644 --- a/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema.sd +++ b/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema.sd @@ -118,8 +118,8 @@ schema marqo__test_00unstructured_00schema { query(marqo__add_weights) tensor(p{}) } function modify(score) { - expression: if (count(query(marqo__mult_weights)) == 0, 1, reduce(query(marqo__mult_weights) * attribute(marqo__score_modifiers), prod)) * score + reduce(query(marqo__add_weights) * attribute(marqo__score_modifiers), sum) - } + expression: if (count(query(marqo__mult_weights) * attribute(marqo__score_modifiers)) == 0, 1, reduce(query(marqo__mult_weights) * attribute(marqo__score_modifiers), prod)) * score + reduce(query(marqo__add_weights) * attribute(marqo__score_modifiers), sum) + } } rank-profile bm25_modifiers inherits modifiers { diff --git a/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_angular.sd b/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_angular.sd index c78b458f7..d57a91e90 100644 --- a/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_angular.sd +++ b/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_angular.sd @@ -118,7 +118,7 @@ schema marqo__test_00unstructured_00schema_00distance_00metric { query(marqo__add_weights) tensor(p{}) } function modify(score) { - expression: if (count(query(marqo__mult_weights)) == 0, 1, reduce(query(marqo__mult_weights) * attribute(marqo__score_modifiers), prod)) * score + reduce(query(marqo__add_weights) * attribute(marqo__score_modifiers), sum) + expression: if (count(query(marqo__mult_weights) * attribute(marqo__score_modifiers)) == 0, 1, reduce(query(marqo__mult_weights) * attribute(marqo__score_modifiers), prod)) * score + reduce(query(marqo__add_weights) * attribute(marqo__score_modifiers), sum) } } diff --git a/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_dotproduct.sd b/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_dotproduct.sd index 4bc326cd5..6704e9959 100644 --- a/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_dotproduct.sd +++ b/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_dotproduct.sd @@ -118,7 +118,7 @@ schema marqo__test_00unstructured_00schema_00distance_00metric { query(marqo__add_weights) tensor(p{}) } function modify(score) { - expression: if (count(query(marqo__mult_weights)) == 0, 1, reduce(query(marqo__mult_weights) * attribute(marqo__score_modifiers), prod)) * score + reduce(query(marqo__add_weights) * attribute(marqo__score_modifiers), sum) + expression: if (count(query(marqo__mult_weights) * attribute(marqo__score_modifiers)) == 0, 1, reduce(query(marqo__mult_weights) * attribute(marqo__score_modifiers), prod)) * score + reduce(query(marqo__add_weights) * attribute(marqo__score_modifiers), sum) } } diff --git a/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_euclidean.sd b/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_euclidean.sd index f0f5d3106..1ec49cb13 100644 --- a/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_euclidean.sd +++ b/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_euclidean.sd @@ -118,7 +118,7 @@ schema marqo__test_00unstructured_00schema_00distance_00metric { query(marqo__add_weights) tensor(p{}) } function modify(score) { - expression: if (count(query(marqo__mult_weights)) == 0, 1, reduce(query(marqo__mult_weights) * attribute(marqo__score_modifiers), prod)) * score + reduce(query(marqo__add_weights) * attribute(marqo__score_modifiers), sum) + expression: if (count(query(marqo__mult_weights) * attribute(marqo__score_modifiers)) == 0, 1, reduce(query(marqo__mult_weights) * attribute(marqo__score_modifiers), prod)) * score + reduce(query(marqo__add_weights) * attribute(marqo__score_modifiers), sum) } } diff --git a/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_geodegrees.sd b/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_geodegrees.sd index 7390a943b..40d420b17 100644 --- a/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_geodegrees.sd +++ b/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_geodegrees.sd @@ -118,7 +118,7 @@ schema marqo__test_00unstructured_00schema_00distance_00metric { query(marqo__add_weights) tensor(p{}) } function modify(score) { - expression: if (count(query(marqo__mult_weights)) == 0, 1, reduce(query(marqo__mult_weights) * attribute(marqo__score_modifiers), prod)) * score + reduce(query(marqo__add_weights) * attribute(marqo__score_modifiers), sum) + expression: if (count(query(marqo__mult_weights) * attribute(marqo__score_modifiers)) == 0, 1, reduce(query(marqo__mult_weights) * attribute(marqo__score_modifiers), prod)) * score + reduce(query(marqo__add_weights) * attribute(marqo__score_modifiers), sum) } } diff --git a/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_hamming.sd b/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_hamming.sd index 8876f8395..1ad772ec3 100644 --- a/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_hamming.sd +++ b/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_hamming.sd @@ -118,7 +118,7 @@ schema marqo__test_00unstructured_00schema_00distance_00metric { query(marqo__add_weights) tensor(p{}) } function modify(score) { - expression: if (count(query(marqo__mult_weights)) == 0, 1, reduce(query(marqo__mult_weights) * attribute(marqo__score_modifiers), prod)) * score + reduce(query(marqo__add_weights) * attribute(marqo__score_modifiers), sum) + expression: if (count(query(marqo__mult_weights) * attribute(marqo__score_modifiers)) == 0, 1, reduce(query(marqo__mult_weights) * attribute(marqo__score_modifiers), prod)) * score + reduce(query(marqo__add_weights) * attribute(marqo__score_modifiers), sum) } } diff --git a/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_prenormalized-angular.sd b/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_prenormalized-angular.sd index 5692f6fcd..4875a4328 100644 --- a/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_prenormalized-angular.sd +++ b/tests/core/unstructured_vespa_index/test_schemas/unstructured_vespa_index_schema_distance_metric_prenormalized-angular.sd @@ -118,7 +118,7 @@ schema marqo__test_00unstructured_00schema_00distance_00metric { query(marqo__add_weights) tensor(p{}) } function modify(score) { - expression: if (count(query(marqo__mult_weights)) == 0, 1, reduce(query(marqo__mult_weights) * attribute(marqo__score_modifiers), prod)) * score + reduce(query(marqo__add_weights) * attribute(marqo__score_modifiers), sum) + expression: if (count(query(marqo__mult_weights) * attribute(marqo__score_modifiers)) == 0, 1, reduce(query(marqo__mult_weights) * attribute(marqo__score_modifiers), prod)) * score + reduce(query(marqo__add_weights) * attribute(marqo__score_modifiers), sum) } } diff --git a/tests/tensor_search/integ_tests/test_dict_score_modifiers.py b/tests/tensor_search/integ_tests/test_dict_score_modifiers.py index 8bfc12e8d..640e9ca34 100644 --- a/tests/tensor_search/integ_tests/test_dict_score_modifiers.py +++ b/tests/tensor_search/integ_tests/test_dict_score_modifiers.py @@ -2,7 +2,6 @@ from unittest import mock from marqo.api.models.update_documents import UpdateDocumentsBodyParams -from marqo.core.exceptions import UnsupportedFeatureError from marqo.core.models.marqo_index import * from marqo.core.models.marqo_index_request import FieldRequest from marqo.core.structured_vespa_index.structured_vespa_index import StructuredVespaIndex @@ -216,7 +215,9 @@ def test_multiply_score_by_map_score_modifier(self): # Search with score modifier # 0.5 * 0.5 * 4 = 1 (1 and 7) score_modifier = ScoreModifier( - **{"multiply_score_by": [{"field_name": "map_score_mods.a", "weight": 4}]}) + **{"multiply_score_by": [{"field_name": "map_score_mods.a", "weight": 4}, + {"field_name": "map_score_mods.d", "weight": 4}]}) # Nonexistent field. + # Nonexistent field should not zero out the whole score res = tensor_search.search( index_name=index.name, config=self.config, text="", score_modifiers=score_modifier,