Skip to content

Commit

Permalink
Initial commit with Todos identifying places to change
Browse files Browse the repository at this point in the history
  • Loading branch information
expani committed Dec 2, 2024
1 parent e9f77e3 commit f663733
Show file tree
Hide file tree
Showing 17 changed files with 218 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1192,6 +1192,9 @@ private void createConfiguration() {
baseConfig.put("logger.org.opensearch.action.support.master", "DEBUG");
baseConfig.put("logger.org.opensearch.cluster.coordination", "DEBUG");

baseConfig.put("opensearch.experimental.feature.composite_index.star_tree.enabled", "true");
baseConfig.put("indices.composite_index.star_tree.enabled", "true");

HashSet<String> overriden = new HashSet<>(baseConfig.keySet());
overriden.retainAll(settings.keySet());
OVERRIDABLE_SETTINGS.forEach(overriden::remove);
Expand Down
3 changes: 3 additions & 0 deletions distribution/src/config/jvm.options
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
-Xms${heap.min}
-Xmx${heap.max}

-Xdebug
-Xrunjdwp:server=y,transport=dt_socket,address=4000,suspend=n

################################################################
## Expert settings
################################################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ public StarTreeNode getChildStarNode() throws IOException {

@Override
public StarTreeNode getChildForDimensionValue(Long dimensionValue) throws IOException {
// TODO : Better to move it inside binarySearchChild() to avoid NPE for callers who don't handle this in future
// there will be no children for leaf nodes
if (isLeaf()) {
return null;
Expand Down Expand Up @@ -255,6 +256,8 @@ private FixedLengthStarTreeNode binarySearchChild(long dimensionValue) throws IO
high--;
}

// TODO : Check if dimension value is within the range of low and high.

while (low <= high) {
int mid = low + (high - low) / 2;
FixedLengthStarTreeNode midNode = new FixedLengthStarTreeNode(in, mid);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,9 @@ public interface StarTreeNode {

/**
* Returns the child node for the given dimension value in the star-tree.
* Responsibility of callers to handle conversion to ordinal ( dimensionValue ) for non-numeric fields.
*
* @param dimensionValue the dimension value
* @param dimensionValue the dimension value which will be Doc Value ordinals for non-numeric fields
* @return the child node for the given dimension value or null if child is not present
* @throws IOException if an I/O error occurs while retrieving the child node
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.search.CollectionTerminatedException;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.NumericUtils;
import org.opensearch.common.lucene.Lucene;
import org.opensearch.index.codec.composite.CompositeIndexFieldInfo;
import org.opensearch.index.codec.composite.CompositeIndexReader;
Expand All @@ -22,6 +24,7 @@
import org.opensearch.index.compositeindex.datacube.MetricStat;
import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues;
import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedNumericStarTreeValuesIterator;
import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedSetStarTreeValuesIterator;
import org.opensearch.index.mapper.CompositeDataCubeFieldType;
import org.opensearch.index.query.MatchAllQueryBuilder;
import org.opensearch.index.query.QueryBuilder;
Expand All @@ -37,12 +40,16 @@
import org.opensearch.search.startree.StarTreeQueryContext;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Consumer;
import java.util.stream.Collectors;

import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;

/**
* Helper class for building star-tree query
*
Expand Down Expand Up @@ -73,6 +80,7 @@ public static StarTreeQueryContext getStarTreeQueryContext(SearchContext context
compositeMappedFieldType.getCompositeIndexType()
);

// TODO : Handle different types of validations in a better way
for (AggregatorFactory aggregatorFactory : context.aggregations().factories().getFactories()) {
MetricStat metricStat = validateStarTreeMetricSupport(compositeMappedFieldType, aggregatorFactory);
if (metricStat == null) {
Expand All @@ -96,12 +104,17 @@ private static StarTreeQueryContext tryCreateStarTreeQueryContext(
QueryBuilder queryBuilder,
int cacheStarTreeValuesSize
) {
Map<String, Long> queryMap;

// TODO : Handle single valued keyword and IP field queries ( convert to ordinal )
// TODO : Also, handle multi-valued term aggregation and range query from low to high.
// TODO : Keep it extensible for boolean queries that can contain nested term and range query over separate fields.
Map<String, Object> queryMap;
if (queryBuilder == null || queryBuilder instanceof MatchAllQueryBuilder) {
queryMap = null;
} else if (queryBuilder instanceof TermQueryBuilder) {
// TODO: Add support for keyword fields
if (compositeFieldType.getDimensions().stream().anyMatch(d -> d.getDocValuesType() != DocValuesType.SORTED_NUMERIC)) {
// TODO: Add support for keyword fields which has DocValuesType.SORTED_SET
// FIXME : This should also check if the non-numeric field is present in query or not.
if (compositeFieldType.getDimensions().stream().anyMatch(d -> d.getDocValuesType() != DocValuesType.SORTED_NUMERIC && d.getDocValuesType() != DocValuesType.SORTED_SET)) {
// return null for non-numeric fields
return null;
}
Expand All @@ -117,25 +130,33 @@ private static StarTreeQueryContext tryCreateStarTreeQueryContext(
} else {
return null;
}
return new StarTreeQueryContext(compositeIndexFieldInfo, queryMap, cacheStarTreeValuesSize);
StarTreeQueryContext starTreeQueryContext = new StarTreeQueryContext(compositeIndexFieldInfo, queryMap, cacheStarTreeValuesSize);
starTreeQueryContext.getFilterRegistry().registerQueryBuilder(queryBuilder);
return starTreeQueryContext;
}

/**
* Parse query body to star-tree predicates
* @param queryBuilder to match star-tree supported query shape
* @return predicates to match
*/
private static Map<String, Long> getStarTreePredicates(QueryBuilder queryBuilder, List<String> supportedDimensions) {
private static Map<String, Object> getStarTreePredicates(QueryBuilder queryBuilder, List<String> supportedDimensions) {
// TODO : Make this generic to handle range queries, boolean queries; etc.
TermQueryBuilder tq = (TermQueryBuilder) queryBuilder;
String field = tq.fieldName();
if (!supportedDimensions.contains(field)) {
return null;
}
long inputQueryVal = Long.parseLong(tq.value().toString());

// Create a map with the field and the value
Map<String, Long> predicateMap = new HashMap<>();
predicateMap.put(field, inputQueryVal);
Map<String, Object> predicateMap = new HashMap<>();
Object objValue = tq.value();
// Term query builder is returning Integer for int fields as expected.
// TODO : Properly handle conversion to long for other fields like keyword.
if (objValue instanceof Integer) {
objValue =((Integer) objValue).longValue();
}
predicateMap.put(field, objValue);
return predicateMap;
}

Expand Down Expand Up @@ -173,6 +194,19 @@ public static StarTreeValues getStarTreeValues(LeafReaderContext context, Compos
return (StarTreeValues) starTreeDocValuesReader.getCompositeIndexValues(starTree);
}

private static void iterateOverMetric(String metricName, StarTreeValues starTreeValues) throws IOException {
SortedNumericStarTreeValuesIterator metricsIterator = (SortedNumericStarTreeValuesIterator) starTreeValues.getMetricValuesIterator(
metricName
);
int docId = 0;
List<Object> values = new ArrayList<>();
while (metricsIterator.advance(docId) != NO_MORE_DOCS) {
values.add(NumericUtils.sortableLongToDouble(metricsIterator.nextValue()));
docId++;
}
System.out.printf("Metric Values for metric %s has %s values and is %s%n", metricName, values.size(), values);
}

/**
* Get the star-tree leaf collector
* This collector computes the aggregation prematurely and invokes an early termination collector
Expand All @@ -189,13 +223,16 @@ public static LeafBucketCollector getStarTreeLeafCollector(
) throws IOException {
StarTreeValues starTreeValues = getStarTreeValues(ctx, starTree);
assert starTreeValues != null;
// FIXME : Add support for fetching field name for keyword and IP value sources as well.
String fieldName = ((ValuesSource.Numeric.FieldData) valuesSource).getIndexFieldName();
String metricName = StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues(starTree.getField(), fieldName, metric);

assert starTreeValues != null;
// TODO : Handle for keyword, IP and other data types.
SortedNumericStarTreeValuesIterator valuesIterator = (SortedNumericStarTreeValuesIterator) starTreeValues.getMetricValuesIterator(
metricName
);
iterateOverMetric(metricName, starTreeValues);
// Obtain a FixedBitSet of matched star tree document IDs
FixedBitSet filteredValues = getStarTreeFilteredValues(context, ctx, starTreeValues);
assert filteredValues != null;
Expand All @@ -212,7 +249,8 @@ public static LeafBucketCollector getStarTreeLeafCollector(
}

// Iterate over the values for the current entryId
for (int i = 0, count = valuesIterator.entryValueCount(); i < count; i++) {
for (int i = 0, count = valuesIterator.docValueCount(); i < count; i++) {
// TODO : Handle for keyword, IP and other data types ??
long value = valuesIterator.nextValue();
valueConsumer.accept(value); // Apply the consumer operation (e.g., max, sum)
}
Expand Down Expand Up @@ -240,6 +278,9 @@ public static FixedBitSet getStarTreeFilteredValues(SearchContext context, LeafR
throws IOException {
FixedBitSet result = context.getStarTreeQueryContext().getStarTreeValues(ctx);
if (result == null) {
// TODO : Consider passing ctx.reader().postings() for converting the non-numeric dimensions in queryMap for append-only indices.
// FIXME : Convert term bytesref to ordinals by performing a lookup using StarTreeValues here
// FIXME : Move the validation of metric support in aggregators and filter dimension ( mandatory ) support here.
result = StarTreeFilter.getStarTreeResult(starTreeValues, context.getStarTreeQueryContext().getQueryMap());
context.getStarTreeQueryContext().setStarTreeValues(ctx, result);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,18 @@ public SortedNumericStarTreeValuesIterator(DocIdSetIterator docIdSetIterator) {
super(docIdSetIterator);
}

public long nextValue() throws IOException {
return ((SortedNumericDocValues) docIdSetIterator).nextValue();
@Override
public int docValueCount() {
return ((SortedNumericDocValues) docIdSetIterator).docValueCount();
}

public int entryValueCount() throws IOException {
return ((SortedNumericDocValues) docIdSetIterator).docValueCount();
public long nextValue() throws IOException {
return ((SortedNumericDocValues) docIdSetIterator).nextValue();
}

public boolean advanceExact(int target) throws IOException {
return ((SortedNumericDocValues) docIdSetIterator).advanceExact(target);
}

// TODO : We need a Iterator<Long> valuesInRange(long low, long high)
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ public long nextOrd() throws IOException {
return ((SortedSetDocValues) docIdSetIterator).nextOrd();
}

@Override
public int docValueCount() {
return ((SortedSetDocValues) docIdSetIterator).docValueCount();
}
Expand All @@ -45,6 +46,7 @@ public long getValueCount() {
return ((SortedSetDocValues) docIdSetIterator).getValueCount();
}

// TODO : Use range(int, int) for range queries and aggregation.
public long lookupTerm(BytesRef key) throws IOException {
return ((SortedSetDocValues) docIdSetIterator).lookupTerm(key);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,7 @@ public int advance(int target) throws IOException {
public long cost() {
return docIdSetIterator.cost();
}

abstract public int docValueCount();

}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
package org.opensearch.search.aggregations.bucket.filter;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
import org.opensearch.common.lucene.Lucene;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ public LeafBucketCollector getStarTreeLeafCollector(LeafReaderContext ctx, LeafB
}

// Iterate over the values for the current entryId
for (int i = 0; i < sumValuesIterator.entryValueCount(); i++) {
for (int i = 0; i < sumValuesIterator.docValueCount(); i++) {
kahanSummation.add(NumericUtils.sortableLongToDouble(sumValuesIterator.nextValue()));
counts.increment(0, countValueIterator.nextValue()); // Apply the consumer operation (e.g., max, sum)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,10 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBuc
}
}


CompositeIndexFieldInfo supportedStarTree = getSupportedStarTree(this.context);
if (supportedStarTree != null) {
// FIXME : Check whether the returned collector is null to determine if support is possible.
return getStarTreeCollector(ctx, sub, supportedStarTree);
}
return getDefaultLeafCollector(ctx, sub);
Expand Down
Loading

0 comments on commit f663733

Please sign in to comment.