-
Notifications
You must be signed in to change notification settings - Fork 141
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Introduce derived vector source via stored fields (#2467)
Generates the vector source in the source field from the KnnVectorsFormat or BVD. It does this by adding StoredFieldsFormat to our existing custom codec. Currently, feature is experimental and behind a feature flag via index setting. In the future, we need to iterate to improve performance and stability for nested/object portions. Signed-off-by: John Mazanec <[email protected]>
- Loading branch information
1 parent
168ee3c
commit 9b07a12
Showing
29 changed files
with
3,015 additions
and
52 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
82 changes: 82 additions & 0 deletions
82
...ain/java/org/opensearch/knn/index/codec/KNN9120Codec/DerivedSourceStoredFieldsFormat.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.knn.index.codec.KNN9120Codec; | ||
|
||
import lombok.AllArgsConstructor; | ||
import org.apache.lucene.codecs.StoredFieldsFormat; | ||
import org.apache.lucene.codecs.StoredFieldsReader; | ||
import org.apache.lucene.codecs.StoredFieldsWriter; | ||
import org.apache.lucene.index.FieldInfo; | ||
import org.apache.lucene.index.FieldInfos; | ||
import org.apache.lucene.index.SegmentInfo; | ||
import org.apache.lucene.index.SegmentReadState; | ||
import org.apache.lucene.store.Directory; | ||
import org.apache.lucene.store.IOContext; | ||
import org.opensearch.common.Nullable; | ||
import org.opensearch.index.mapper.MappedFieldType; | ||
import org.opensearch.index.mapper.MapperService; | ||
import org.opensearch.knn.index.KNNSettings; | ||
import org.opensearch.knn.index.codec.derivedsource.DerivedSourceReadersSupplier; | ||
import org.opensearch.knn.index.mapper.KNNVectorFieldType; | ||
|
||
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
import static org.opensearch.knn.common.KNNConstants.DERIVED_VECTOR_FIELD_ATTRIBUTE_KEY; | ||
import static org.opensearch.knn.common.KNNConstants.DERIVED_VECTOR_FIELD_ATTRIBUTE_TRUE_VALUE; | ||
|
||
@AllArgsConstructor | ||
public class DerivedSourceStoredFieldsFormat extends StoredFieldsFormat { | ||
|
||
private final StoredFieldsFormat delegate; | ||
private final DerivedSourceReadersSupplier derivedSourceReadersSupplier; | ||
// IMPORTANT Do not rely on this for the reader, it will be null if SPI is used | ||
@Nullable | ||
private final MapperService mapperService; | ||
|
||
@Override | ||
public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext ioContext) | ||
throws IOException { | ||
List<FieldInfo> derivedVectorFields = null; | ||
for (FieldInfo fieldInfo : fieldInfos) { | ||
if (DERIVED_VECTOR_FIELD_ATTRIBUTE_TRUE_VALUE.equals(fieldInfo.attributes().get(DERIVED_VECTOR_FIELD_ATTRIBUTE_KEY))) { | ||
// Lazily initialize the list of fields | ||
if (derivedVectorFields == null) { | ||
derivedVectorFields = new ArrayList<>(); | ||
} | ||
derivedVectorFields.add(fieldInfo); | ||
} | ||
} | ||
// If no fields have it enabled, we can just short-circuit and return the delegate's fieldReader | ||
if (derivedVectorFields == null || derivedVectorFields.isEmpty()) { | ||
return delegate.fieldsReader(directory, segmentInfo, fieldInfos, ioContext); | ||
} | ||
return new DerivedSourceStoredFieldsReader( | ||
delegate.fieldsReader(directory, segmentInfo, fieldInfos, ioContext), | ||
derivedVectorFields, | ||
derivedSourceReadersSupplier, | ||
new SegmentReadState(directory, segmentInfo, fieldInfos, ioContext) | ||
); | ||
} | ||
|
||
@Override | ||
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo segmentInfo, IOContext ioContext) throws IOException { | ||
StoredFieldsWriter delegateWriter = delegate.fieldsWriter(directory, segmentInfo, ioContext); | ||
if (mapperService != null && KNNSettings.isKNNDerivedSourceEnabled(mapperService.getIndexSettings().getSettings())) { | ||
List<String> vectorFieldTypes = new ArrayList<>(); | ||
for (MappedFieldType fieldType : mapperService.fieldTypes()) { | ||
if (fieldType instanceof KNNVectorFieldType) { | ||
vectorFieldTypes.add(fieldType.name()); | ||
} | ||
} | ||
if (vectorFieldTypes.isEmpty() == false) { | ||
return new DerivedSourceStoredFieldsWriter(delegateWriter, vectorFieldTypes); | ||
} | ||
} | ||
return delegateWriter; | ||
} | ||
} |
128 changes: 128 additions & 0 deletions
128
...ain/java/org/opensearch/knn/index/codec/KNN9120Codec/DerivedSourceStoredFieldsReader.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.knn.index.codec.KNN9120Codec; | ||
|
||
import org.apache.lucene.codecs.StoredFieldsReader; | ||
import org.apache.lucene.index.FieldInfo; | ||
import org.apache.lucene.index.SegmentReadState; | ||
import org.apache.lucene.index.StoredFieldVisitor; | ||
import org.apache.lucene.util.IOUtils; | ||
import org.opensearch.index.fieldvisitor.FieldsVisitor; | ||
import org.opensearch.knn.index.codec.derivedsource.DerivedSourceReadersSupplier; | ||
import org.opensearch.knn.index.codec.derivedsource.DerivedSourceStoredFieldVisitor; | ||
import org.opensearch.knn.index.codec.derivedsource.DerivedSourceVectorInjector; | ||
|
||
import java.io.IOException; | ||
import java.util.List; | ||
|
||
public class DerivedSourceStoredFieldsReader extends StoredFieldsReader { | ||
private final StoredFieldsReader delegate; | ||
private final List<FieldInfo> derivedVectorFields; | ||
private final DerivedSourceReadersSupplier derivedSourceReadersSupplier; | ||
private final SegmentReadState segmentReadState; | ||
private final boolean shouldInject; | ||
|
||
private final DerivedSourceVectorInjector derivedSourceVectorInjector; | ||
|
||
/** | ||
* | ||
* @param delegate delegate StoredFieldsReader | ||
* @param derivedVectorFields List of fields that are derived source fields | ||
* @param derivedSourceReadersSupplier Supplier for the derived source readers | ||
* @param segmentReadState SegmentReadState for the segment | ||
* @throws IOException in case of I/O error | ||
*/ | ||
public DerivedSourceStoredFieldsReader( | ||
StoredFieldsReader delegate, | ||
List<FieldInfo> derivedVectorFields, | ||
DerivedSourceReadersSupplier derivedSourceReadersSupplier, | ||
SegmentReadState segmentReadState | ||
) throws IOException { | ||
this(delegate, derivedVectorFields, derivedSourceReadersSupplier, segmentReadState, true); | ||
} | ||
|
||
private DerivedSourceStoredFieldsReader( | ||
StoredFieldsReader delegate, | ||
List<FieldInfo> derivedVectorFields, | ||
DerivedSourceReadersSupplier derivedSourceReadersSupplier, | ||
SegmentReadState segmentReadState, | ||
boolean shouldInject | ||
) throws IOException { | ||
this.delegate = delegate; | ||
this.derivedVectorFields = derivedVectorFields; | ||
this.derivedSourceReadersSupplier = derivedSourceReadersSupplier; | ||
this.segmentReadState = segmentReadState; | ||
this.shouldInject = shouldInject; | ||
this.derivedSourceVectorInjector = createDerivedSourceVectorInjector(); | ||
} | ||
|
||
private DerivedSourceVectorInjector createDerivedSourceVectorInjector() throws IOException { | ||
return new DerivedSourceVectorInjector(derivedSourceReadersSupplier, segmentReadState, derivedVectorFields); | ||
} | ||
|
||
@Override | ||
public void document(int docId, StoredFieldVisitor storedFieldVisitor) throws IOException { | ||
// If the visitor has explicitly indicated it does not need the fields, we should not inject them | ||
boolean isVisitorNeedFields = true; | ||
if (storedFieldVisitor instanceof FieldsVisitor) { | ||
isVisitorNeedFields = derivedSourceVectorInjector.shouldInject( | ||
((FieldsVisitor) storedFieldVisitor).includes(), | ||
((FieldsVisitor) storedFieldVisitor).excludes() | ||
); | ||
} | ||
if (shouldInject && isVisitorNeedFields) { | ||
delegate.document(docId, new DerivedSourceStoredFieldVisitor(storedFieldVisitor, docId, derivedSourceVectorInjector)); | ||
return; | ||
} | ||
delegate.document(docId, storedFieldVisitor); | ||
} | ||
|
||
@Override | ||
public StoredFieldsReader clone() { | ||
try { | ||
return new DerivedSourceStoredFieldsReader( | ||
delegate.clone(), | ||
derivedVectorFields, | ||
derivedSourceReadersSupplier, | ||
segmentReadState, | ||
shouldInject | ||
); | ||
} catch (IOException e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
|
||
@Override | ||
public void checkIntegrity() throws IOException { | ||
delegate.checkIntegrity(); | ||
} | ||
|
||
@Override | ||
public void close() throws IOException { | ||
IOUtils.close(delegate, derivedSourceVectorInjector); | ||
} | ||
|
||
/** | ||
* For merging, we need to tell the derived source stored fields reader to skip injecting the source. Otherwise, | ||
* on merge we will end up just writing the source to disk | ||
* | ||
* @return Merged instance that wont inject by default | ||
*/ | ||
@Override | ||
public StoredFieldsReader getMergeInstance() { | ||
try { | ||
return new DerivedSourceStoredFieldsReader( | ||
delegate.getMergeInstance(), | ||
derivedVectorFields, | ||
derivedSourceReadersSupplier, | ||
segmentReadState, | ||
false | ||
); | ||
} catch (IOException e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
} |
Oops, something went wrong.