diff --git a/qa/restart-upgrade/build.gradle b/qa/restart-upgrade/build.gradle index fe3db254c..cca53f09e 100644 --- a/qa/restart-upgrade/build.gradle +++ b/qa/restart-upgrade/build.gradle @@ -102,6 +102,7 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) { if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*" + excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchWithRescoreIT.*" } } @@ -166,6 +167,7 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) { if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*" + excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchWithRescoreIT.*" } } diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchWithRescoreIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchWithRescoreIT.java new file mode 100644 index 000000000..66905bdfc --- /dev/null +++ b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchWithRescoreIT.java @@ -0,0 +1,116 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ +package org.opensearch.neuralsearch.bwc; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import static org.opensearch.neuralsearch.util.TestUtils.DEFAULT_COMBINATION_METHOD; +import static org.opensearch.neuralsearch.util.TestUtils.DEFAULT_NORMALIZATION_METHOD; +import static org.opensearch.neuralsearch.util.TestUtils.NODES_BWC_CLUSTER; +import static org.opensearch.neuralsearch.util.TestUtils.PARAM_NAME_WEIGHTS; +import static org.opensearch.neuralsearch.util.TestUtils.TEXT_EMBEDDING_PROCESSOR; +import static org.opensearch.neuralsearch.util.TestUtils.getModelId; + +import org.opensearch.index.query.MatchQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.knn.index.query.rescore.RescoreContext; +import org.opensearch.neuralsearch.query.HybridQueryBuilder; +import org.opensearch.neuralsearch.query.NeuralQueryBuilder; + +public class HybridSearchWithRescoreIT extends AbstractRestartUpgradeRestTestCase { + private static final String PIPELINE_NAME = "nlp-hybrid-with-rescore-pipeline"; + private static final String SEARCH_PIPELINE_NAME = "nlp-search-with_rescore-pipeline"; + private static final String TEST_FIELD = "passage_text"; + private static final String TEXT = "Hello world"; + private static final String TEXT_UPGRADED = "Hi earth"; + private static final String QUERY = "Hi world"; + private static final int NUM_DOCS_PER_ROUND = 1; + private static final String VECTOR_EMBEDDING_FIELD = "passage_embedding"; + protected static final String RESCORE_QUERY = "hi"; + + /** + * Test normalization with hybrid query and rescore. This test is required as rescore will not be compatible with version lower than 2.15 + */ + public void testHybridQueryWithRescore_whenIndexWithMultipleShards_E2EFlow() throws Exception { + waitForClusterHealthGreen(NODES_BWC_CLUSTER); + + if (isRunningAgainstOldCluster()) { + String modelId = uploadTextEmbeddingModel(); + loadModel(modelId); + createPipelineProcessor(modelId, PIPELINE_NAME); + createIndexWithConfiguration( + getIndexNameForTest(), + Files.readString(Path.of(classLoader.getResource("processor/IndexMappingMultipleShard.json").toURI())), + PIPELINE_NAME + ); + addDocument(getIndexNameForTest(), "0", TEST_FIELD, TEXT, null, null); + createSearchPipeline( + SEARCH_PIPELINE_NAME, + DEFAULT_NORMALIZATION_METHOD, + DEFAULT_COMBINATION_METHOD, + Map.of(PARAM_NAME_WEIGHTS, Arrays.toString(new float[] { 0.3f, 0.7f })) + ); + } else { + String modelId = null; + try { + modelId = getModelId(getIngestionPipeline(PIPELINE_NAME), TEXT_EMBEDDING_PROCESSOR); + loadModel(modelId); + addDocument(getIndexNameForTest(), "1", TEST_FIELD, TEXT_UPGRADED, null, null); + HybridQueryBuilder hybridQueryBuilder = getQueryBuilder(modelId, null, null); + QueryBuilder rescorer = QueryBuilders.matchQuery(TEST_FIELD, RESCORE_QUERY).boost(0.3f); + validateTestIndex(getIndexNameForTest(), hybridQueryBuilder, rescorer); + hybridQueryBuilder = getQueryBuilder(modelId, Map.of("ef_search", 100), RescoreContext.getDefault()); + validateTestIndex(getIndexNameForTest(), hybridQueryBuilder, rescorer); + } finally { + wipeOfTestResources(getIndexNameForTest(), PIPELINE_NAME, modelId, null); + } + } + } + + private void validateTestIndex(final String index, HybridQueryBuilder queryBuilder, QueryBuilder rescorer) { + int docCount = getDocCount(index); + assertEquals(2, docCount); + Map searchResponseAsMap = search(index, queryBuilder, rescorer, 1, Map.of("search_pipeline", SEARCH_PIPELINE_NAME)); + assertNotNull(searchResponseAsMap); + int hits = getHitCount(searchResponseAsMap); + assertEquals(1, hits); + List scoresList = getNormalizationScoreList(searchResponseAsMap); + for (Double score : scoresList) { + assertTrue(0 <= score && score <= 2); + } + } + + private HybridQueryBuilder getQueryBuilder( + final String modelId, + final Map methodParameters, + final RescoreContext rescoreContextForNeuralQuery + ) { + NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder(); + neuralQueryBuilder.fieldName(VECTOR_EMBEDDING_FIELD); + neuralQueryBuilder.modelId(modelId); + neuralQueryBuilder.queryText(QUERY); + neuralQueryBuilder.k(5); + if (methodParameters != null) { + neuralQueryBuilder.methodParameters(methodParameters); + } + if (Objects.nonNull(rescoreContextForNeuralQuery)) { + neuralQueryBuilder.rescoreContext(rescoreContextForNeuralQuery); + } + + MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder("text", QUERY); + + HybridQueryBuilder hybridQueryBuilder = new HybridQueryBuilder(); + hybridQueryBuilder.add(matchQueryBuilder); + hybridQueryBuilder.add(neuralQueryBuilder); + + return hybridQueryBuilder; + } +} diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchIT.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchIT.java index 78caca0e0..7f1da63c3 100644 --- a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchIT.java +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchIT.java @@ -68,8 +68,7 @@ public void testNormalizationProcessor_whenIndexWithMultipleShards_E2EFlow() thr if (isFirstMixedRound()) { totalDocsCountMixed = NUM_DOCS_PER_ROUND; HybridQueryBuilder hybridQueryBuilder = getQueryBuilder(modelId, null, null, null); - QueryBuilder rescorer = QueryBuilders.matchQuery(TEST_FIELD, RESCORE_QUERY).boost(0.3f); - validateTestIndexOnUpgrade(totalDocsCountMixed, modelId, hybridQueryBuilder, rescorer); + validateTestIndexOnUpgrade(totalDocsCountMixed, modelId, hybridQueryBuilder, null); addDocument(getIndexNameForTest(), "1", TEST_FIELD, TEXT_MIXED, null, null); } else { totalDocsCountMixed = 2 * NUM_DOCS_PER_ROUND; @@ -84,10 +83,9 @@ public void testNormalizationProcessor_whenIndexWithMultipleShards_E2EFlow() thr loadModel(modelId); addDocument(getIndexNameForTest(), "2", TEST_FIELD, TEXT_UPGRADED, null, null); HybridQueryBuilder hybridQueryBuilder = getQueryBuilder(modelId, null, null, null); - QueryBuilder rescorer = QueryBuilders.matchQuery(TEST_FIELD, RESCORE_QUERY).boost(0.3f); - validateTestIndexOnUpgrade(totalDocsCountUpgraded, modelId, hybridQueryBuilder, rescorer); + validateTestIndexOnUpgrade(totalDocsCountUpgraded, modelId, hybridQueryBuilder, null); hybridQueryBuilder = getQueryBuilder(modelId, Boolean.FALSE, Map.of("ef_search", 100), RescoreContext.getDefault()); - validateTestIndexOnUpgrade(totalDocsCountUpgraded, modelId, hybridQueryBuilder, rescorer); + validateTestIndexOnUpgrade(totalDocsCountUpgraded, modelId, hybridQueryBuilder, null); } finally { wipeOfTestResources(getIndexNameForTest(), PIPELINE_NAME, modelId, SEARCH_PIPELINE_NAME); }