Skip to content

Commit

Permalink
Fix issues with object type
Browse files Browse the repository at this point in the history
Signed-off-by: John Mazanec <[email protected]>
  • Loading branch information
jmazanec15 committed Jan 29, 2025
1 parent c3483df commit 399d281
Show file tree
Hide file tree
Showing 4 changed files with 237 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,12 @@ public class NestedPerFieldDerivedVectorInjector implements PerFieldDerivedVecto
@Override
public void inject(int parentDocId, Map<String, Object> sourceAsMap) throws IOException {
// If the parent has the field, then it is just an object field.
if (getLowestDocIdForField(childFieldInfo.name, parentDocId) == parentDocId) {
int lowestDocIdForFieldWithParentAsOffset = getLowestDocIdForField(childFieldInfo.name, parentDocId);
if (lowestDocIdForFieldWithParentAsOffset == parentDocId) {
injectObject(parentDocId, sourceAsMap);
return;
}

if (ParentChildHelper.splitPath(childFieldInfo.name).length > 2) {
// We do not support nested fields beyond one level
log.warn("Nested fields beyond one level are not supported. Field: {}", childFieldInfo.name);
return;
}

// Setup the iterator. Return if no parent
String childFieldName = ParentChildHelper.getChildField(childFieldInfo.name);
String parentFieldName = ParentChildHelper.getParentField(childFieldInfo.name);
Expand All @@ -62,6 +57,10 @@ public void inject(int parentDocId, Map<String, Object> sourceAsMap) throws IOEx
parentDocId
);

if (nestedPerFieldParentToDocIdIterator.numChildren() == 0) {
return;
}

// Initializes the parent field so that there is a list to put each of the children
Object originalParentValue = sourceAsMap.get(parentFieldName);
List<Map<String, Object>> reconstructedSource;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,14 @@ public int childId() {
return children.get(currentChild);
}

/**
*
* @return the number of children for this parent
*/
public int numChildren() {
return children.size();
}

/**
* For parentDocId of this class, find the one just before it to be used for matching children.
*
Expand Down Expand Up @@ -122,6 +130,10 @@ private int previousParent() throws IOException {
* @throws IOException if there is an error reading the children
*/
private List<Integer> getChildren() throws IOException {
if (this.parentDocId - this.previousParentDocId <= 1) {
return Collections.emptyList();
}

// First, we need to get the currect PostingsEnum for the key as _nested_path and the value the actual parent
// path.
String childField = childFieldInfo.name;
Expand Down
246 changes: 219 additions & 27 deletions src/test/java/org/opensearch/knn/integ/DerivedSourceIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import lombok.Builder;
import lombok.Data;
import lombok.SneakyThrows;
import org.junit.Ignore;
import org.opensearch.common.CheckedConsumer;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.xcontent.XContentFactory;
Expand All @@ -30,7 +29,7 @@
* Integration tests for derived source feature for vector fields. Currently, with derived source, there are
* a few gaps in functionality. Ignoring tests for now as feature is experimental.
*/
@Ignore
// @Ignore
public class DerivedSourceIT extends KNNRestTestCase {

private final static String NESTED_NAME = "test_nested";
Expand All @@ -52,10 +51,14 @@ public class DerivedSourceIT extends KNNRestTestCase {
.build();

/**
* Testing flat, single field base case with index configuration:
* Testing flat, single field base case with index configuration. The test will automatically skip adding fields for
* random documents to ensure it works robustly. To ensure correctness, we repeat same operations against an
* index without derived source enabled (baseline).
* Test mapping:
* {
* "settings": {
* "index.knn" true,
* "index.knn.derived_source.enabled": true
* },
* "mappings":{
* "properties": {
Expand All @@ -66,10 +69,11 @@ public class DerivedSourceIT extends KNNRestTestCase {
* }
* }
* }
* Comparing to the baseline:
* Baseline mapping:
* {
* "settings": {
* "index.knn" true,
* "index.knn.derived_source.enabled": true
* },
* "mappings":{
* "properties": {
Expand Down Expand Up @@ -155,6 +159,53 @@ public void testFlatBaseCase() {
testDerivedSourceE2E(indexConfigContexts);
}

/**
* Testing multiple flat fields.
* Test mapping:
* {
* "settings": {
* "index.knn" true,
* "index.knn.derived_source.enabled": true
* },
* "mappings":{
* "properties": {
* "test_vector1": {
* "type": "knn_vector",
* "dimension": 128
* },
* "test_vector1": {
* "type": "knn_vector",
* "dimension": 128
* },
* "text": {
* "type": "text",
* },
* }
* }
* }
* Baseline mapping:
* {
* "settings": {
* "index.knn" true,
* "index.knn.derived_source.enabled": false
* },
* "mappings":{
* "properties": {
* "test_vector1": {
* "type": "knn_vector",
* "dimension": 128
* },
* "test_vector1": {
* "type": "knn_vector",
* "dimension": 128
* },
* "text": {
* "type": "text",
* },
* }
* }
* }
*/
@SneakyThrows
public void testMultiFlatFields() {
XContentBuilder builder = XContentFactory.jsonBuilder()
Expand Down Expand Up @@ -263,6 +314,55 @@ public void testMultiFlatFields() {
testDerivedSourceE2E(indexConfigContexts);
}

/**
* Testing single nested doc per parent doc.
* Test mapping:
* {
* "settings": {
* "index.knn" true,
* "index.knn.derived_source.enabled": true
* },
* "mappings":{
* "properties": {
* "test_nested" : {
* "type": "nested",
* "properties": {
* "test_vector": {
* "type": "knn_vector",
* "dimension": 128
* },
* "text": {
* "type": "text",
* },
* }
* }
* }
* }
* }
* Baseline mapping:
* {
* "settings": {
* "index.knn" true,
* "index.knn.derived_source.enabled": false
* },
* "mappings":{
* "properties": {
* "test_nested" : {
* "type": "nested",
* "properties": {
* "test_vector": {
* "type": "knn_vector",
* "dimension": 128
* },
* "text": {
* "type": "text",
* },
* }
* }
* }
* }
* }
*/
public void testNestedSingleDocBasic() {
String nestedMapping = createVectorNestedMappings(TEST_DIMENSION);
List<IndexConfigContext> indexConfigContexts = List.of(
Expand Down Expand Up @@ -351,6 +451,56 @@ public void testNestedSingleDocBasic() {
testDerivedSourceE2E(indexConfigContexts);
}

/**
* Testing single nested doc per parent doc.
* Test mapping:
* {
* "settings": {
* "index.knn" true,
* "index.knn.derived_source.enabled": true
* },
* "mappings":{
* "properties": {
* "test_nested" : {
* "type": "nested",
* "properties": {
* "test_vector": {
* "type": "knn_vector",
* "dimension": 128
* },
* "text": {
* "type": "text",
* },
* }
* }
*
* }
* }
* }
* Baseline mapping:
* {
* "settings": {
* "index.knn" true,
* "index.knn.derived_source.enabled": false
* },
* "mappings":{
* "properties": {
* "test_nested" : {
* "type": "nested",
* "properties": {
* "test_vector": {
* "type": "knn_vector",
* "dimension": 128
* },
* "text": {
* "type": "text",
* },
* }
* }
* }
* }
* }
*/
@SneakyThrows
public void testNestedMultiDocBasic() {
String nestedMapping = createVectorNestedMappings(TEST_DIMENSION);
Expand Down Expand Up @@ -443,28 +593,71 @@ public void testNestedMultiDocBasic() {
}

/**
* Test object (non-nested field)
* Test
* {
* "properties": {
* "vector_field_1" : {
* "type" : "knn_vector",
* "dimension" : 2
* },
* "path_1": {
* "properties" : {
* "vector_field_2" : {
* "type" : "knn_vector",
* "dimension" : 2
* },
* "path_2": {
* "properties" : {
* "vector_field_3" : {
* "type" : "knn_vector",
* "dimension" : 2
* },
* }
* }
* }
* }
* "settings": {
* "index.knn" true,
* "index.knn.derived_source.enabled": true
* },
* "mappings":{
* {
* "properties": {
* "vector_field_1" : {
* "type" : "knn_vector",
* "dimension" : 2
* },
* "path_1": {
* "properties" : {
* "vector_field_2" : {
* "type" : "knn_vector",
* "dimension" : 2
* },
* "path_2": {
* "properties" : {
* "vector_field_3" : {
* "type" : "knn_vector",
* "dimension" : 2
* },
* }
* }
* }
* }
* }
* }
* }
* }
* Baseline
* {
* "settings": {
* "index.knn" true,
* "index.knn.derived_source.enabled": false
* },
* "mappings":{
* {
* "properties": {
* "vector_field_1" : {
* "type" : "knn_vector",
* "dimension" : 2
* },
* "path_1": {
* "properties" : {
* "vector_field_2" : {
* "type" : "knn_vector",
* "dimension" : 2
* },
* "path_2": {
* "properties" : {
* "vector_field_3" : {
* "type" : "knn_vector",
* "dimension" : 2
* },
* }
* }
* }
* }
* }
* }
* }
* }
*/
Expand Down Expand Up @@ -625,8 +818,7 @@ public void testObjectFieldTypes() {
}

// TODO Test configurations
// 1. Object fields
// 2. FLS index
// 1. Ensure compatibility with FLS

// We need to write a single method that will run through all the different possible combinations and
// abstact when necessary.
Expand Down
Loading

0 comments on commit 399d281

Please sign in to comment.