Skip to content

Commit

Permalink
Merge from main
Browse files Browse the repository at this point in the history
Signed-off-by: Varun Jain <[email protected]>
  • Loading branch information
vibrantvarun committed Jan 8, 2025
2 parents 62498ad + fea0a7f commit ca81c58
Show file tree
Hide file tree
Showing 41 changed files with 1,663 additions and 897 deletions.
8 changes: 6 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Support new knn query parameter expand_nested ([#1013](https://github.com/opensearch-project/neural-search/pull/1013))
- Implement pruning for neural sparse ingestion pipeline and two phase search processor ([#988](https://github.com/opensearch-project/neural-search/pull/988))
- Support empty string for fields in text embedding processor ([#1041](https://github.com/opensearch-project/neural-search/pull/1041))
- Optimize ML inference connection retry logic ([#1054](https://github.com/opensearch-project/neural-search/pull/1054))
- Support for builder constructor in Neural Query Builder ([#1047](https://github.com/opensearch-project/neural-search/pull/1047))
### Bug Fixes
- Address inconsistent scoring in hybrid query results ([#998](https://github.com/opensearch-project/neural-search/pull/998))
- Fix bug where ingested document has list of nested objects ([#1040](https://github.com/opensearch-project/neural-search/pull/1040))
- Address inconsistent scoring in hybrid query results ([#998](https://github.com/opensearch-project/neural-search/pull/998))
- Fix bug where ingested document has list of nested objects ([#1040](https://github.com/opensearch-project/neural-search/pull/1040))
- Fixed document source and score field mismatch in sorted hybrid queries ([#1043](https://github.com/opensearch-project/neural-search/pull/1043))
- Fix bug where embedding is missing when ingested document has "." in field name, and mismatches fieldMap config ([#1062](https://github.com/opensearch-project/neural-search/pull/1062))
### Infrastructure
### Documentation
### Maintenance
Expand Down
2 changes: 2 additions & 0 deletions qa/restart-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchWithRescoreIT.*"
}
}

Expand Down Expand Up @@ -166,6 +167,7 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) {
if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchWithRescoreIT.*"
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,11 +122,12 @@ private HybridQueryBuilder getQueryBuilder(
final Map<String, ?> methodParameters,
final RescoreContext rescoreContext
) {
NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder();
neuralQueryBuilder.fieldName("passage_embedding");
neuralQueryBuilder.modelId(modelId);
neuralQueryBuilder.queryText(QUERY);
neuralQueryBuilder.k(5);
NeuralQueryBuilder neuralQueryBuilder = NeuralQueryBuilder.builder()
.fieldName("passage_embedding")
.modelId(modelId)
.queryText(QUERY)
.k(5)
.build();
if (expandNestedDocs != null) {
neuralQueryBuilder.expandNested(expandNestedDocs);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;

import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Objects;

import static org.opensearch.neuralsearch.util.TestUtils.DEFAULT_COMBINATION_METHOD;
import static org.opensearch.neuralsearch.util.TestUtils.DEFAULT_NORMALIZATION_METHOD;
import static org.opensearch.neuralsearch.util.TestUtils.NODES_BWC_CLUSTER;
import static org.opensearch.neuralsearch.util.TestUtils.PARAM_NAME_WEIGHTS;
import static org.opensearch.neuralsearch.util.TestUtils.TEXT_EMBEDDING_PROCESSOR;
import static org.opensearch.neuralsearch.util.TestUtils.getModelId;

import org.opensearch.index.query.MatchQueryBuilder;
import org.opensearch.index.query.QueryBuilder;
import org.opensearch.index.query.QueryBuilders;
import org.opensearch.knn.index.query.rescore.RescoreContext;
import org.opensearch.neuralsearch.query.HybridQueryBuilder;
import org.opensearch.neuralsearch.query.NeuralQueryBuilder;

public class HybridSearchWithRescoreIT extends AbstractRestartUpgradeRestTestCase {
private static final String PIPELINE_NAME = "nlp-hybrid-with-rescore-pipeline";
private static final String SEARCH_PIPELINE_NAME = "nlp-search-with_rescore-pipeline";
private static final String TEST_FIELD = "passage_text";
private static final String TEXT = "Hello world";
private static final String TEXT_UPGRADED = "Hi earth";
private static final String QUERY = "Hi world";
private static final int NUM_DOCS_PER_ROUND = 1;
private static final String VECTOR_EMBEDDING_FIELD = "passage_embedding";
protected static final String RESCORE_QUERY = "hi";

/**
* Test normalization with hybrid query and rescore. This test is required as rescore will not be compatible with version lower than 2.15
*/
public void testHybridQueryWithRescore_whenIndexWithMultipleShards_E2EFlow() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);

if (isRunningAgainstOldCluster()) {
String modelId = uploadTextEmbeddingModel();
loadModel(modelId);
createPipelineProcessor(modelId, PIPELINE_NAME);
createIndexWithConfiguration(
getIndexNameForTest(),
Files.readString(Path.of(classLoader.getResource("processor/IndexMappingMultipleShard.json").toURI())),
PIPELINE_NAME
);
addDocument(getIndexNameForTest(), "0", TEST_FIELD, TEXT, null, null);
createSearchPipeline(
SEARCH_PIPELINE_NAME,
DEFAULT_NORMALIZATION_METHOD,
DEFAULT_COMBINATION_METHOD,
Map.of(PARAM_NAME_WEIGHTS, Arrays.toString(new float[] { 0.3f, 0.7f }))
);
} else {
String modelId = null;
try {
modelId = getModelId(getIngestionPipeline(PIPELINE_NAME), TEXT_EMBEDDING_PROCESSOR);
loadModel(modelId);
addDocument(getIndexNameForTest(), "1", TEST_FIELD, TEXT_UPGRADED, null, null);
HybridQueryBuilder hybridQueryBuilder = getQueryBuilder(modelId, null, null);
QueryBuilder rescorer = QueryBuilders.matchQuery(TEST_FIELD, RESCORE_QUERY).boost(0.3f);
validateTestIndex(getIndexNameForTest(), hybridQueryBuilder, rescorer);
hybridQueryBuilder = getQueryBuilder(modelId, Map.of("ef_search", 100), RescoreContext.getDefault());
validateTestIndex(getIndexNameForTest(), hybridQueryBuilder, rescorer);
} finally {
wipeOfTestResources(getIndexNameForTest(), PIPELINE_NAME, modelId, null);
}
}
}

private void validateTestIndex(final String index, HybridQueryBuilder queryBuilder, QueryBuilder rescorer) {
int docCount = getDocCount(index);
assertEquals(2, docCount);
Map<String, Object> searchResponseAsMap = search(index, queryBuilder, rescorer, 1, Map.of("search_pipeline", SEARCH_PIPELINE_NAME));
assertNotNull(searchResponseAsMap);
int hits = getHitCount(searchResponseAsMap);
assertEquals(1, hits);
List<Double> scoresList = getNormalizationScoreList(searchResponseAsMap);
for (Double score : scoresList) {
assertTrue(0 <= score && score <= 2);
}
}

private HybridQueryBuilder getQueryBuilder(
final String modelId,
final Map<String, ?> methodParameters,
final RescoreContext rescoreContextForNeuralQuery
) {
NeuralQueryBuilder neuralQueryBuilder = NeuralQueryBuilder.builder()
.fieldName(VECTOR_EMBEDDING_FIELD)
.modelId(modelId)
.queryText(QUERY)
.k(5)
.build();
if (methodParameters != null) {
neuralQueryBuilder.methodParameters(methodParameters);
}
if (Objects.nonNull(rescoreContextForNeuralQuery)) {
neuralQueryBuilder.rescoreContext(rescoreContextForNeuralQuery);
}

MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder("text", QUERY);

HybridQueryBuilder hybridQueryBuilder = new HybridQueryBuilder();
hybridQueryBuilder.add(matchQueryBuilder);
hybridQueryBuilder.add(neuralQueryBuilder);

return hybridQueryBuilder;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,37 +51,25 @@ public void testKnnRadialSearch_E2EFlow() throws Exception {
}

private void validateIndexQuery(final String modelId) {
NeuralQueryBuilder neuralQueryBuilderWithMinScoreQuery = new NeuralQueryBuilder(
"passage_embedding",
TEXT,
TEST_IMAGE_TEXT,
modelId,
null,
null,
0.01f,
null,
null,
null,
null,
null
);
NeuralQueryBuilder neuralQueryBuilderWithMinScoreQuery = NeuralQueryBuilder.builder()
.fieldName("passage_embedding")
.queryText(TEXT)
.queryImage(TEST_IMAGE_TEXT)
.modelId(modelId)
.minScore(0.01f)
.build();

Map<String, Object> responseWithMinScoreQuery = search(getIndexNameForTest(), neuralQueryBuilderWithMinScoreQuery, 1);
assertNotNull(responseWithMinScoreQuery);

NeuralQueryBuilder neuralQueryBuilderWithMaxDistanceQuery = new NeuralQueryBuilder(
"passage_embedding",
TEXT,
TEST_IMAGE_TEXT,
modelId,
null,
100000f,
null,
null,
null,
null,
null,
null
);
NeuralQueryBuilder neuralQueryBuilderWithMaxDistanceQuery = NeuralQueryBuilder.builder()
.fieldName("passage_embedding")
.queryText(TEXT)
.queryImage(TEST_IMAGE_TEXT)
.modelId(modelId)
.maxDistance(100000f)
.build();

Map<String, Object> responseWithMaxDistanceQuery = search(getIndexNameForTest(), neuralQueryBuilderWithMaxDistanceQuery, 1);
assertNotNull(responseWithMaxDistanceQuery);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,20 +53,13 @@ public void testTextImageEmbeddingProcessor_E2EFlow() throws Exception {
private void validateTestIndex(final String modelId) throws Exception {
int docCount = getDocCount(getIndexNameForTest());
assertEquals(2, docCount);
NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder(
"passage_embedding",
TEXT,
TEST_IMAGE_TEXT,
modelId,
1,
null,
null,
null,
null,
null,
null,
null
);
NeuralQueryBuilder neuralQueryBuilder = NeuralQueryBuilder.builder()
.fieldName("passage_embedding")
.queryText(TEXT)
.queryImage(TEST_IMAGE_TEXT)
.modelId(modelId)
.k(1)
.build();
Map<String, Object> response = search(getIndexNameForTest(), neuralQueryBuilder, 1);
assertNotNull(response);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,14 @@ public void testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow() throws

public void testNeuralQueryEnricherProcessor_NeuralSearch_E2EFlow() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
NeuralQueryBuilder neuralQueryBuilderWithoutModelId = new NeuralQueryBuilder().fieldName(TEST_ENCODING_FIELD).queryText(TEXT_1);
NeuralQueryBuilder neuralQueryBuilderWithModelId = new NeuralQueryBuilder().fieldName(TEST_ENCODING_FIELD).queryText(TEXT_1);
NeuralQueryBuilder neuralQueryBuilderWithoutModelId = NeuralQueryBuilder.builder()
.fieldName(TEST_ENCODING_FIELD)
.queryText(TEXT_1)
.build();
NeuralQueryBuilder neuralQueryBuilderWithModelId = NeuralQueryBuilder.builder()
.fieldName(TEST_ENCODING_FIELD)
.queryText(TEXT_1)
.build();

if (isRunningAgainstOldCluster()) {
String modelId = uploadTextEmbeddingModel();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,12 @@ public void testTextEmbeddingProcessor_E2EFlow() throws Exception {
private void validateTestIndex(final String modelId) throws Exception {
int docCount = getDocCount(getIndexNameForTest());
assertEquals(2, docCount);
NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder();
neuralQueryBuilder.fieldName("passage_embedding");
neuralQueryBuilder.modelId(modelId);
neuralQueryBuilder.queryText(TEXT);
neuralQueryBuilder.k(1);
NeuralQueryBuilder neuralQueryBuilder = NeuralQueryBuilder.builder()
.fieldName("passage_embedding")
.queryText(TEXT)
.modelId(modelId)
.k(1)
.build();
Map<String, Object> response = search(getIndexNameForTest(), neuralQueryBuilder, 1);
assertNotNull(response);
}
Expand Down
Loading

0 comments on commit ca81c58

Please sign in to comment.