diff --git a/src/main/java/org/opensearch/knn/index/codec/derivedsource/NestedPerFieldDerivedVectorInjector.java b/src/main/java/org/opensearch/knn/index/codec/derivedsource/NestedPerFieldDerivedVectorInjector.java index c85aacd9c..016b4153a 100644 --- a/src/main/java/org/opensearch/knn/index/codec/derivedsource/NestedPerFieldDerivedVectorInjector.java +++ b/src/main/java/org/opensearch/knn/index/codec/derivedsource/NestedPerFieldDerivedVectorInjector.java @@ -38,17 +38,12 @@ public class NestedPerFieldDerivedVectorInjector implements PerFieldDerivedVecto @Override public void inject(int parentDocId, Map sourceAsMap) throws IOException { // If the parent has the field, then it is just an object field. - if (getLowestDocIdForField(childFieldInfo.name, parentDocId) == parentDocId) { + int lowestDocIdForFieldWithParentAsOffset = getLowestDocIdForField(childFieldInfo.name, parentDocId); + if (lowestDocIdForFieldWithParentAsOffset == parentDocId) { injectObject(parentDocId, sourceAsMap); return; } - if (ParentChildHelper.splitPath(childFieldInfo.name).length > 2) { - // We do not support nested fields beyond one level - log.warn("Nested fields beyond one level are not supported. Field: {}", childFieldInfo.name); - return; - } - // Setup the iterator. Return if no parent String childFieldName = ParentChildHelper.getChildField(childFieldInfo.name); String parentFieldName = ParentChildHelper.getParentField(childFieldInfo.name); @@ -62,6 +57,10 @@ public void inject(int parentDocId, Map sourceAsMap) throws IOEx parentDocId ); + if (nestedPerFieldParentToDocIdIterator.numChildren() == 0) { + return; + } + // Initializes the parent field so that there is a list to put each of the children Object originalParentValue = sourceAsMap.get(parentFieldName); List> reconstructedSource; diff --git a/src/main/java/org/opensearch/knn/index/codec/derivedsource/NestedPerFieldParentToDocIdIterator.java b/src/main/java/org/opensearch/knn/index/codec/derivedsource/NestedPerFieldParentToDocIdIterator.java index d6d4e5062..d2bc1a32f 100644 --- a/src/main/java/org/opensearch/knn/index/codec/derivedsource/NestedPerFieldParentToDocIdIterator.java +++ b/src/main/java/org/opensearch/knn/index/codec/derivedsource/NestedPerFieldParentToDocIdIterator.java @@ -91,6 +91,14 @@ public int childId() { return children.get(currentChild); } + /** + * + * @return the number of children for this parent + */ + public int numChildren() { + return children.size(); + } + /** * For parentDocId of this class, find the one just before it to be used for matching children. * @@ -122,6 +130,10 @@ private int previousParent() throws IOException { * @throws IOException if there is an error reading the children */ private List getChildren() throws IOException { + if (this.parentDocId - this.previousParentDocId <= 1) { + return Collections.emptyList(); + } + // First, we need to get the currect PostingsEnum for the key as _nested_path and the value the actual parent // path. String childField = childFieldInfo.name; diff --git a/src/test/java/org/opensearch/knn/integ/DerivedSourceIT.java b/src/test/java/org/opensearch/knn/integ/DerivedSourceIT.java index 4b569a41a..2607a9695 100644 --- a/src/test/java/org/opensearch/knn/integ/DerivedSourceIT.java +++ b/src/test/java/org/opensearch/knn/integ/DerivedSourceIT.java @@ -9,7 +9,6 @@ import lombok.Builder; import lombok.Data; import lombok.SneakyThrows; -import org.junit.Ignore; import org.opensearch.common.CheckedConsumer; import org.opensearch.common.settings.Settings; import org.opensearch.common.xcontent.XContentFactory; @@ -30,7 +29,7 @@ * Integration tests for derived source feature for vector fields. Currently, with derived source, there are * a few gaps in functionality. Ignoring tests for now as feature is experimental. */ -@Ignore +// @Ignore public class DerivedSourceIT extends KNNRestTestCase { private final static String NESTED_NAME = "test_nested"; @@ -52,10 +51,14 @@ public class DerivedSourceIT extends KNNRestTestCase { .build(); /** - * Testing flat, single field base case with index configuration: + * Testing flat, single field base case with index configuration. The test will automatically skip adding fields for + * random documents to ensure it works robustly. To ensure correctness, we repeat same operations against an + * index without derived source enabled (baseline). + * Test mapping: * { * "settings": { * "index.knn" true, + * "index.knn.derived_source.enabled": true * }, * "mappings":{ * "properties": { @@ -66,10 +69,11 @@ public class DerivedSourceIT extends KNNRestTestCase { * } * } * } - * Comparing to the baseline: + * Baseline mapping: * { * "settings": { * "index.knn" true, + * "index.knn.derived_source.enabled": true * }, * "mappings":{ * "properties": { @@ -155,6 +159,53 @@ public void testFlatBaseCase() { testDerivedSourceE2E(indexConfigContexts); } + /** + * Testing multiple flat fields. + * Test mapping: + * { + * "settings": { + * "index.knn" true, + * "index.knn.derived_source.enabled": true + * }, + * "mappings":{ + * "properties": { + * "test_vector1": { + * "type": "knn_vector", + * "dimension": 128 + * }, + * "test_vector1": { + * "type": "knn_vector", + * "dimension": 128 + * }, + * "text": { + * "type": "text", + * }, + * } + * } + * } + * Baseline mapping: + * { + * "settings": { + * "index.knn" true, + * "index.knn.derived_source.enabled": false + * }, + * "mappings":{ + * "properties": { + * "test_vector1": { + * "type": "knn_vector", + * "dimension": 128 + * }, + * "test_vector1": { + * "type": "knn_vector", + * "dimension": 128 + * }, + * "text": { + * "type": "text", + * }, + * } + * } + * } + */ @SneakyThrows public void testMultiFlatFields() { XContentBuilder builder = XContentFactory.jsonBuilder() @@ -263,6 +314,55 @@ public void testMultiFlatFields() { testDerivedSourceE2E(indexConfigContexts); } + /** + * Testing single nested doc per parent doc. + * Test mapping: + * { + * "settings": { + * "index.knn" true, + * "index.knn.derived_source.enabled": true + * }, + * "mappings":{ + * "properties": { + * "test_nested" : { + * "type": "nested", + * "properties": { + * "test_vector": { + * "type": "knn_vector", + * "dimension": 128 + * }, + * "text": { + * "type": "text", + * }, + * } + * } + * } + * } + * } + * Baseline mapping: + * { + * "settings": { + * "index.knn" true, + * "index.knn.derived_source.enabled": false + * }, + * "mappings":{ + * "properties": { + * "test_nested" : { + * "type": "nested", + * "properties": { + * "test_vector": { + * "type": "knn_vector", + * "dimension": 128 + * }, + * "text": { + * "type": "text", + * }, + * } + * } + * } + * } + * } + */ public void testNestedSingleDocBasic() { String nestedMapping = createVectorNestedMappings(TEST_DIMENSION); List indexConfigContexts = List.of( @@ -351,6 +451,56 @@ public void testNestedSingleDocBasic() { testDerivedSourceE2E(indexConfigContexts); } + /** + * Testing single nested doc per parent doc. + * Test mapping: + * { + * "settings": { + * "index.knn" true, + * "index.knn.derived_source.enabled": true + * }, + * "mappings":{ + * "properties": { + * "test_nested" : { + * "type": "nested", + * "properties": { + * "test_vector": { + * "type": "knn_vector", + * "dimension": 128 + * }, + * "text": { + * "type": "text", + * }, + * } + * } + * + * } + * } + * } + * Baseline mapping: + * { + * "settings": { + * "index.knn" true, + * "index.knn.derived_source.enabled": false + * }, + * "mappings":{ + * "properties": { + * "test_nested" : { + * "type": "nested", + * "properties": { + * "test_vector": { + * "type": "knn_vector", + * "dimension": 128 + * }, + * "text": { + * "type": "text", + * }, + * } + * } + * } + * } + * } + */ @SneakyThrows public void testNestedMultiDocBasic() { String nestedMapping = createVectorNestedMappings(TEST_DIMENSION); @@ -443,28 +593,71 @@ public void testNestedMultiDocBasic() { } /** + * Test object (non-nested field) + * Test * { - * "properties": { - * "vector_field_1" : { - * "type" : "knn_vector", - * "dimension" : 2 - * }, - * "path_1": { - * "properties" : { - * "vector_field_2" : { - * "type" : "knn_vector", - * "dimension" : 2 - * }, - * "path_2": { - * "properties" : { - * "vector_field_3" : { - * "type" : "knn_vector", - * "dimension" : 2 - * }, - * } - * } - * } - * } + * "settings": { + * "index.knn" true, + * "index.knn.derived_source.enabled": true + * }, + * "mappings":{ + * { + * "properties": { + * "vector_field_1" : { + * "type" : "knn_vector", + * "dimension" : 2 + * }, + * "path_1": { + * "properties" : { + * "vector_field_2" : { + * "type" : "knn_vector", + * "dimension" : 2 + * }, + * "path_2": { + * "properties" : { + * "vector_field_3" : { + * "type" : "knn_vector", + * "dimension" : 2 + * }, + * } + * } + * } + * } + * } + * } + * } + * } + * Baseline + * { + * "settings": { + * "index.knn" true, + * "index.knn.derived_source.enabled": false + * }, + * "mappings":{ + * { + * "properties": { + * "vector_field_1" : { + * "type" : "knn_vector", + * "dimension" : 2 + * }, + * "path_1": { + * "properties" : { + * "vector_field_2" : { + * "type" : "knn_vector", + * "dimension" : 2 + * }, + * "path_2": { + * "properties" : { + * "vector_field_3" : { + * "type" : "knn_vector", + * "dimension" : 2 + * }, + * } + * } + * } + * } + * } + * } * } * } */ @@ -625,8 +818,7 @@ public void testObjectFieldTypes() { } // TODO Test configurations - // 1. Object fields - // 2. FLS index + // 1. Ensure compatibility with FLS // We need to write a single method that will run through all the different possible combinations and // abstact when necessary. diff --git a/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java b/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java index bd4858bb5..e95deb558 100644 --- a/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java +++ b/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java @@ -1477,7 +1477,6 @@ public void bulkIngestRandomVectorsMultiFieldsWithSkips( for (int k = 0; k < fields.length - 1; k++) { String field = fields[k]; Object value = currentMap.get(field); - log.info("Value: " + value); currentMap = (Map) currentMap.computeIfAbsent(field, t -> new HashMap<>()); } currentMap.put(fields[fields.length - 1], vectors.get(j)[i]); @@ -1486,14 +1485,10 @@ public void bulkIngestRandomVectorsMultiFieldsWithSkips( for (int j = 0; j < includeTextFields.size(); j++) { if (includeTextFields.get(j)) { String[] fields = ParentChildHelper.splitPath(textFields.get(j)); - log.info("Fields: " + Arrays.toString(fields)); Map currentMap = source; for (int k = 0; k < fields.length - 1; k++) { String field = fields[k]; Object value = currentMap.get(field); - log.info("FUll path: " + textFields.get(j)); - log.info("Key: " + field); - log.info("Value: " + value); currentMap = (Map) currentMap.computeIfAbsent(field, t -> new HashMap<>()); } currentMap.put(fields[fields.length - 1], "test-test"); @@ -1503,7 +1498,6 @@ public void bulkIngestRandomVectorsMultiFieldsWithSkips( XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); mapToBuilder(builder, source); builder.endObject(); - log.info(builder.toString()); addKnnDoc(indexName, String.valueOf(i + 1), builder.toString()); } } @@ -1569,7 +1563,6 @@ public void bulkIngestRandomVectorsWithSkipsAndNestedMultiDoc( } builder.endArray(); builder.endObject(); - // log.info(builder.toString()); addKnnDoc(indexName, String.valueOf(i + 1), builder.toString()); } }