diff --git a/CHANGELOG.md b/CHANGELOG.md index f386b092cf074..128b35dec28c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Propagate the sourceIncludes and excludes fields from fetchSourceContext to FieldsVisitor. ([#17080](https://github.com/opensearch-project/OpenSearch/pull/17080)) - [Star Tree] [Search] Resolving Date histogram with metric aggregation using star-tree ([#16674](https://github.com/opensearch-project/OpenSearch/pull/16674)) - [Star Tree] [Search] Extensible design to support different query and field types ([#17137](https://github.com/opensearch-project/OpenSearch/pull/17137)) +- [Star Tree] [Search] Resolving keyword & numeric bucket aggregation with metric aggregation using star-tree ([#17165](https://github.com/opensearch-project/OpenSearch/pull/17165)) ### Dependencies - Bump `com.google.cloud:google-cloud-core-http` from 2.23.0 to 2.47.0 ([#16504](https://github.com/opensearch-project/OpenSearch/pull/16504)) diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/iterator/SortedSetStarTreeValuesIterator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/iterator/SortedSetStarTreeValuesIterator.java index 1605bd9cfc014..a50e3306c49fe 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/iterator/SortedSetStarTreeValuesIterator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/iterator/SortedSetStarTreeValuesIterator.java @@ -67,5 +67,4 @@ public TermsEnum termsEnum() throws IOException { public TermsEnum intersect(CompiledAutomaton automaton) throws IOException { return ((SortedSetDocValues) docIdSetIterator).intersect(automaton); } - } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 451b96dc3cf9c..015f35b0f1ca3 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -108,7 +108,7 @@ class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAg private boolean starTreeDateRoundingRequired = true; private final FilterRewriteOptimizationContext filterRewriteOptimizationContext; - public final String STARTREE_TIMESTAMP_FIELD = "@timestamp"; + public final String fieldName; DateHistogramAggregator( String name, @@ -137,6 +137,9 @@ class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAg this.hardBounds = hardBounds; // TODO: Stop using null here this.valuesSource = valuesSourceConfig.hasValues() ? (ValuesSource.Numeric) valuesSourceConfig.getValuesSource() : null; + this.fieldName = valuesSourceConfig.hasValues() + ? ((ValuesSource.Numeric.FieldData) valuesSourceConfig.getValuesSource()).getIndexFieldName() + : null; this.formatter = valuesSourceConfig.format(); bucketOrds = LongKeyedBucketOrds.build(context.bigArrays(), cardinality); @@ -244,13 +247,13 @@ private String fetchStarTreeCalendarUnit() { .next(); DateDimension starTreeDateDimension = (DateDimension) compositeMappedFieldType.getDimensions() .stream() - .filter(dim -> dim.getField().equals(STARTREE_TIMESTAMP_FIELD)) + .filter(dim -> dim.getField().equals(fieldName)) .findFirst() // Get the first matching time dimension - .orElseThrow(() -> new AssertionError(String.format(Locale.ROOT, "Date dimension '%s' not found", STARTREE_TIMESTAMP_FIELD))); + .orElseThrow(() -> new AssertionError(String.format(Locale.ROOT, "Date dimension '%s' not found", fieldName))); DateTimeUnitAdapter dateTimeUnitRounding = new DateTimeUnitAdapter(this.rounding.unit()); DateTimeUnitRounding rounding = starTreeDateDimension.findClosestValidInterval(dateTimeUnitRounding); - String dimensionName = STARTREE_TIMESTAMP_FIELD + "_" + rounding.shortName(); + String dimensionName = fieldName + "_" + rounding.shortName(); if (rounding.shortName().equals(this.rounding.unit().shortName())) { this.starTreeDateRoundingRequired = false; } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index 9dbc97f7d2cb6..349a82e4aabcd 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -41,9 +41,11 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.CollectionTerminatedException; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Weight; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.PriorityQueue; import org.opensearch.common.SetOnce; import org.opensearch.common.lease.Releasable; @@ -52,6 +54,12 @@ import org.opensearch.common.util.LongHash; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; +import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; +import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedNumericStarTreeValuesIterator; +import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedSetStarTreeValuesIterator; import org.opensearch.index.mapper.DocCountFieldMapper; import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.AggregationExecutionException; @@ -64,14 +72,20 @@ import org.opensearch.search.aggregations.InternalOrder; import org.opensearch.search.aggregations.LeafBucketCollector; import org.opensearch.search.aggregations.LeafBucketCollectorBase; +import org.opensearch.search.aggregations.StarTreeBucketCollector; +import org.opensearch.search.aggregations.StarTreePreComputeCollector; import org.opensearch.search.aggregations.bucket.LocalBucketCountThresholds; import org.opensearch.search.aggregations.bucket.terms.SignificanceLookup.BackgroundFrequencyForBytes; import org.opensearch.search.aggregations.bucket.terms.heuristic.SignificanceHeuristic; import org.opensearch.search.aggregations.support.ValuesSource; import org.opensearch.search.internal.SearchContext; +import org.opensearch.search.startree.StarTreeQueryHelper; +import org.opensearch.search.startree.StarTreeTraversalUtil; +import org.opensearch.search.startree.filter.DimensionFilter; import java.io.IOException; import java.util.Arrays; +import java.util.List; import java.util.Map; import java.util.function.BiConsumer; import java.util.function.Function; @@ -86,7 +100,7 @@ * * @opensearch.internal */ -public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggregator { +public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggregator implements StarTreePreComputeCollector { protected final ResultStrategy resultStrategy; protected final ValuesSource.Bytes.WithOrdinals valuesSource; @@ -98,6 +112,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr private final SetOnce dvs = new SetOnce<>(); protected int segmentsWithSingleValuedOrds = 0; protected int segmentsWithMultiValuedOrds = 0; + LongUnaryOperator globalOperator; /** * Lookup global ordinals @@ -229,6 +244,14 @@ public void collect(int doc, long owningBucketOrd) throws IOException { public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { SortedSetDocValues globalOrds = valuesSource.globalOrdinalsValues(ctx); collectionStrategy.globalOrdsReady(globalOrds); + globalOperator = valuesSource.globalOrdinalsMapping(ctx); + + CompositeIndexFieldInfo supportedStarTree = StarTreeQueryHelper.getSupportedStarTree(this.context.getQueryShardContext()); + if (supportedStarTree != null) { + if (preComputeWithStarTree(ctx, supportedStarTree) == true) { + throw new CollectionTerminatedException(); + } + } if (collectionStrategy instanceof DenseGlobalOrds && this.resultStrategy instanceof StandardTermsResults @@ -314,6 +337,87 @@ public void collect(int doc, long owningBucketOrd) throws IOException { }); } + public StarTreeBucketCollector getStarTreeBucketCollector( + LeafReaderContext ctx, + CompositeIndexFieldInfo starTree, + StarTreeBucketCollector parent + ) throws IOException { + assert parent == null; + StarTreeValues starTreeValues = StarTreeQueryHelper.getStarTreeValues(ctx, starTree); + return new StarTreeBucketCollector( + starTreeValues, + StarTreeTraversalUtil.getStarTreeResult( + starTreeValues, + StarTreeQueryHelper.mergeDimensionFilterIfNotExists( + context.getQueryShardContext().getStarTreeQueryContext().getBaseQueryStarTreeFilter(), + fieldName, + List.of(DimensionFilter.MATCH_ALL_DEFAULT) + ), + context + ) + ) { + @Override + public void setSubCollectors() throws IOException { + for (Aggregator aggregator : subAggregators) { + this.subCollectors.add(((StarTreePreComputeCollector) aggregator).getStarTreeBucketCollector(ctx, starTree, this)); + } + } + + SortedSetStarTreeValuesIterator valuesIterator = (SortedSetStarTreeValuesIterator) starTreeValues.getDimensionValuesIterator( + fieldName + ); + + String metricName = StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues( + starTree.getField(), + "_doc_count", + MetricStat.DOC_COUNT.getTypeName() + ); + SortedNumericStarTreeValuesIterator docCountsIterator = (SortedNumericStarTreeValuesIterator) starTreeValues + .getMetricValuesIterator(metricName); + + @Override + public void collectStarTreeEntry(int starTreeEntry, long owningBucketOrd) throws IOException { + + if (valuesIterator.advanceExact(starTreeEntry) == false) { + return; + } + + for (int i = 0, count = valuesIterator.docValueCount(); i < count; i++) { + long dimensionValue = valuesIterator.nextOrd(); + long ord = globalOperator.applyAsLong(dimensionValue); + + if (docCountsIterator.advanceExact(starTreeEntry)) { + long metricValue = docCountsIterator.nextValue(); + + long bucketOrd = collectionStrategy.globalOrdToBucketOrd(0, ord); + if (bucketOrd < 0) { + bucketOrd = -1 - bucketOrd; + collectStarTreeBucket(this, metricValue, bucketOrd, starTreeEntry); + } else { + grow(bucketOrd + 1); + collectStarTreeBucket(this, metricValue, bucketOrd, starTreeEntry); + } + } + } + } + }; + } + + private boolean preComputeWithStarTree(LeafReaderContext ctx, CompositeIndexFieldInfo supportedStarTree) throws IOException { + StarTreeBucketCollector starTreeBucketCollector = getStarTreeBucketCollector(ctx, supportedStarTree, null); + FixedBitSet matchingDocsBitSet = starTreeBucketCollector.getMatchingDocsBitSet(); + + int numBits = matchingDocsBitSet.length(); + if (numBits > 0) { + for (int bit = matchingDocsBitSet.nextSetBit(0); bit != DocIdSetIterator.NO_MORE_DOCS; bit = (bit + 1 < numBits) + ? matchingDocsBitSet.nextSetBit(bit + 1) + : DocIdSetIterator.NO_MORE_DOCS) { + starTreeBucketCollector.collectStarTreeEntry(bit, 0); + } + } + return true; + } + @Override public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { return resultStrategy.buildAggregations(owningBucketOrds); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/NumericTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/NumericTermsAggregator.java index 9d095bbf7dccf..72f4a097fa297 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/NumericTermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/NumericTermsAggregator.java @@ -34,14 +34,23 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.CollectionTerminatedException; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.PriorityQueue; import org.opensearch.common.Numbers; import org.opensearch.common.lease.Releasable; import org.opensearch.common.lease.Releasables; import org.opensearch.common.util.LongArray; +import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; +import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; +import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedNumericStarTreeValuesIterator; import org.opensearch.index.fielddata.FieldData; +import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.Aggregator; import org.opensearch.search.aggregations.AggregatorFactories; @@ -52,6 +61,8 @@ import org.opensearch.search.aggregations.InternalOrder; import org.opensearch.search.aggregations.LeafBucketCollector; import org.opensearch.search.aggregations.LeafBucketCollectorBase; +import org.opensearch.search.aggregations.StarTreeBucketCollector; +import org.opensearch.search.aggregations.StarTreePreComputeCollector; import org.opensearch.search.aggregations.bucket.LocalBucketCountThresholds; import org.opensearch.search.aggregations.bucket.terms.IncludeExclude.LongFilter; import org.opensearch.search.aggregations.bucket.terms.LongKeyedBucketOrds.BucketOrdsEnum; @@ -60,6 +71,9 @@ import org.opensearch.search.aggregations.support.ValuesSource; import org.opensearch.search.internal.ContextIndexSearcher; import org.opensearch.search.internal.SearchContext; +import org.opensearch.search.startree.StarTreeQueryHelper; +import org.opensearch.search.startree.StarTreeTraversalUtil; +import org.opensearch.search.startree.filter.DimensionFilter; import java.io.IOException; import java.math.BigInteger; @@ -72,17 +86,19 @@ import static java.util.Collections.emptyList; import static org.opensearch.search.aggregations.InternalOrder.isKeyOrder; +import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; /** * Aggregate all docs that contain numeric terms * * @opensearch.internal */ -public class NumericTermsAggregator extends TermsAggregator { +public class NumericTermsAggregator extends TermsAggregator implements StarTreePreComputeCollector { private final ResultStrategy resultStrategy; private final ValuesSource.Numeric valuesSource; private final LongKeyedBucketOrds bucketOrds; private final LongFilter longFilter; + private final String fieldName; public NumericTermsAggregator( String name, @@ -104,6 +120,9 @@ public NumericTermsAggregator( this.valuesSource = valuesSource; this.longFilter = longFilter; bucketOrds = LongKeyedBucketOrds.build(context.bigArrays(), cardinality); + this.fieldName = (this.valuesSource instanceof ValuesSource.Numeric.FieldData) + ? ((ValuesSource.Numeric.FieldData) valuesSource).getIndexFieldName() + : null; } @Override @@ -116,6 +135,13 @@ public ScoreMode scoreMode() { @Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { + CompositeIndexFieldInfo supportedStarTree = StarTreeQueryHelper.getSupportedStarTree(this.context.getQueryShardContext()); + if (supportedStarTree != null) { + if (preComputeWithStarTree(ctx, supportedStarTree) == true) { + throw new CollectionTerminatedException(); + } + } + SortedNumericDocValues values = resultStrategy.getValues(ctx); return resultStrategy.wrapCollector(new LeafBucketCollectorBase(sub, values) { @Override @@ -145,6 +171,93 @@ public void collect(int doc, long owningBucketOrd) throws IOException { }); } + public StarTreeBucketCollector getStarTreeBucketCollector( + LeafReaderContext ctx, + CompositeIndexFieldInfo starTree, + StarTreeBucketCollector parent + ) throws IOException { + assert parent == null; + StarTreeValues starTreeValues = StarTreeQueryHelper.getStarTreeValues(ctx, starTree); + return new StarTreeBucketCollector( + starTreeValues, + StarTreeTraversalUtil.getStarTreeResult( + starTreeValues, + StarTreeQueryHelper.mergeDimensionFilterIfNotExists( + context.getQueryShardContext().getStarTreeQueryContext().getBaseQueryStarTreeFilter(), + fieldName, + List.of(DimensionFilter.MATCH_ALL_DEFAULT) + ), + context + ) + ) { + @Override + public void setSubCollectors() throws IOException { + for (Aggregator aggregator : subAggregators) { + this.subCollectors.add(((StarTreePreComputeCollector) aggregator).getStarTreeBucketCollector(ctx, starTree, this)); + } + } + + SortedNumericStarTreeValuesIterator valuesIterator = (SortedNumericStarTreeValuesIterator) starTreeValues + .getDimensionValuesIterator(fieldName); + + String metricName = StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues( + starTree.getField(), + "_doc_count", + MetricStat.DOC_COUNT.getTypeName() + ); + SortedNumericStarTreeValuesIterator docCountsIterator = (SortedNumericStarTreeValuesIterator) starTreeValues + .getMetricValuesIterator(metricName); + + @Override + public void collectStarTreeEntry(int starTreeEntry, long owningBucketOrd) throws IOException { + if (valuesIterator.advance(starTreeEntry) == NO_MORE_DOCS) { + return; + } + long dimensionValue = valuesIterator.nextValue(); + // Only numeric & floating points are supported as of now in star-tree + // TODO: Add support for isBigInteger() when it gets supported in star-tree + if (valuesSource.isFloatingPoint()) { + double doubleValue = ((NumberFieldMapper.NumberFieldType) context.mapperService().fieldType(fieldName)).toDoubleValue( + dimensionValue + ); + dimensionValue = NumericUtils.doubleToSortableLong(doubleValue); + } + + for (int i = 0, count = valuesIterator.entryValueCount(); i < count; i++) { + + if (docCountsIterator.advanceExact(starTreeEntry)) { + long metricValue = docCountsIterator.nextValue(); + long bucketOrd = bucketOrds.add(owningBucketOrd, dimensionValue); + + if (bucketOrd < 0) { + bucketOrd = -1 - bucketOrd; + collectStarTreeBucket(this, metricValue, bucketOrd, starTreeEntry); + } else { + grow(bucketOrd + 1); + collectStarTreeBucket(this, metricValue, bucketOrd, starTreeEntry); + } + + } + } + } + }; + } + + private boolean preComputeWithStarTree(LeafReaderContext ctx, CompositeIndexFieldInfo supportedStarTree) throws IOException { + StarTreeBucketCollector starTreeBucketCollector = getStarTreeBucketCollector(ctx, supportedStarTree, null); + FixedBitSet matchingDocsBitSet = starTreeBucketCollector.getMatchingDocsBitSet(); + + int numBits = matchingDocsBitSet.length(); + if (numBits > 0) { + for (int bit = matchingDocsBitSet.nextSetBit(0); bit != DocIdSetIterator.NO_MORE_DOCS; bit = (bit + 1 < numBits) + ? matchingDocsBitSet.nextSetBit(bit + 1) + : DocIdSetIterator.NO_MORE_DOCS) { + starTreeBucketCollector.collectStarTreeEntry(bit, 0); + } + } + return true; + } + @Override public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { return resultStrategy.buildAggregations(owningBucketOrds); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregator.java index 918cc0276ed13..e73012d83ea14 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregator.java @@ -291,6 +291,10 @@ private boolean subAggsNeedScore() { @Override protected boolean shouldDefer(Aggregator aggregator) { - return collectMode == SubAggCollectionMode.BREADTH_FIRST && !aggsUsedForSorting.contains(aggregator); + // don't defer when StarTreeContext is set, don't defer when collectMode == SubAggCollectionMode.BREADTH_FIRST + // this boolean condition can be further simplified but affects readability. + return (context.getQueryShardContext().getStarTreeQueryContext() == null || collectMode != SubAggCollectionMode.BREADTH_FIRST) + && collectMode == SubAggCollectionMode.BREADTH_FIRST + && !aggsUsedForSorting.contains(aggregator); } } diff --git a/server/src/main/java/org/opensearch/search/startree/StarTreeQueryContext.java b/server/src/main/java/org/opensearch/search/startree/StarTreeQueryContext.java index ca0ab9ce52f6e..a7b7cf62ff3be 100644 --- a/server/src/main/java/org/opensearch/search/startree/StarTreeQueryContext.java +++ b/server/src/main/java/org/opensearch/search/startree/StarTreeQueryContext.java @@ -21,6 +21,7 @@ import org.opensearch.index.query.QueryBuilder; import org.opensearch.search.aggregations.AggregatorFactory; import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregatorFactory; +import org.opensearch.search.aggregations.bucket.terms.TermsAggregatorFactory; import org.opensearch.search.aggregations.metrics.MetricAggregatorFactory; import org.opensearch.search.internal.SearchContext; import org.opensearch.search.startree.filter.StarTreeFilter; @@ -113,6 +114,13 @@ public boolean consolidateAllFilters(SearchContext context) { if (validateDateHistogramSupport(compositeMappedFieldType, aggregatorFactory)) { continue; } + + // validation for terms aggregation + if (validateKeywordTermsAggregationSupport(compositeMappedFieldType, aggregatorFactory)) { + continue; + } + + // invalid query shape return false; } @@ -151,6 +159,32 @@ private static boolean validateStarTreeMetricSupport( return false; } + private static boolean validateKeywordTermsAggregationSupport( + CompositeDataCubeFieldType compositeIndexFieldInfo, + AggregatorFactory aggregatorFactory + ) { + if (!(aggregatorFactory instanceof TermsAggregatorFactory termsAggregatorFactory) + || aggregatorFactory.getSubFactories().getFactories().length < 1) { + return false; + } + + // Validate request field is part of dimensions + if (compositeIndexFieldInfo.getDimensions() + .stream() + .map(Dimension::getField) + .noneMatch(termsAggregatorFactory.getField()::equals)) { + return false; + } + + // Validate all sub-factories + for (AggregatorFactory subFactory : aggregatorFactory.getSubFactories().getFactories()) { + if (!validateStarTreeMetricSupport(compositeIndexFieldInfo, subFactory)) { + return false; + } + } + return true; + } + private StarTreeFilter getStarTreeFilter( SearchContext context, QueryBuilder queryBuilder, diff --git a/server/src/test/java/org/opensearch/search/SearchServiceStarTreeTests.java b/server/src/test/java/org/opensearch/search/SearchServiceStarTreeTests.java index b548c844b2476..709f4c4340f27 100644 --- a/server/src/test/java/org/opensearch/search/SearchServiceStarTreeTests.java +++ b/server/src/test/java/org/opensearch/search/SearchServiceStarTreeTests.java @@ -47,10 +47,12 @@ import org.opensearch.search.aggregations.SearchContextAggregations; import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder; import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval; +import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.opensearch.search.aggregations.metrics.MaxAggregationBuilder; import org.opensearch.search.aggregations.metrics.MedianAbsoluteDeviationAggregationBuilder; import org.opensearch.search.aggregations.metrics.SumAggregationBuilder; import org.opensearch.search.aggregations.startree.DateHistogramAggregatorTests; +import org.opensearch.search.aggregations.startree.NumericTermsAggregatorTests; import org.opensearch.search.aggregations.startree.StarTreeFilterTests; import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; import org.opensearch.search.builder.SearchSourceBuilder; @@ -70,6 +72,7 @@ import static org.opensearch.search.aggregations.AggregationBuilders.max; import static org.opensearch.search.aggregations.AggregationBuilders.medianAbsoluteDeviation; import static org.opensearch.search.aggregations.AggregationBuilders.sum; +import static org.opensearch.search.aggregations.AggregationBuilders.terms; import static org.hamcrest.CoreMatchers.notNullValue; import static org.hamcrest.CoreMatchers.nullValue; import static org.mockito.Mockito.mock; @@ -536,6 +539,152 @@ public void testInvalidQueryParsingForDateHistogramAggregations() throws IOExcep setStarTreeIndexSetting(null); } + /** + * Test query parsing for bucket aggregations, with/without numeric term query + */ + public void testQueryParsingForBucketAggregations() throws IOException { + FeatureFlags.initializeFeatureFlags(Settings.builder().put(FeatureFlags.STAR_TREE_INDEX, true).build()); + setStarTreeIndexSetting("true"); + + Settings settings = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true) + .build(); + CreateIndexRequestBuilder builder = client().admin() + .indices() + .prepareCreate("test") + .setSettings(settings) + .setMapping(NumericTermsAggregatorTests.getExpandedMapping(1, false)); + createIndex("test", builder); + + IndicesService indicesService = getInstanceFromNode(IndicesService.class); + IndexService indexService = indicesService.indexServiceSafe(resolveIndex("test")); + IndexShard indexShard = indexService.getShard(0); + ShardSearchRequest request = new ShardSearchRequest( + OriginalIndices.NONE, + new SearchRequest().allowPartialSearchResults(true), + indexShard.shardId(), + 1, + new AliasFilter(null, Strings.EMPTY_ARRAY), + 1.0f, + -1, + null, + null + ); + String KEYWORD_FIELD = "clientip"; + String NUMERIC_FIELD = "size"; + + MaxAggregationBuilder maxAggNoSub = max("max").field(FIELD_NAME); + MaxAggregationBuilder sumAggNoSub = max("sum").field(FIELD_NAME); + SumAggregationBuilder sumAggSub = sum("sum").field(FIELD_NAME).subAggregation(maxAggNoSub); + MedianAbsoluteDeviationAggregationBuilder medianAgg = medianAbsoluteDeviation("median").field(FIELD_NAME); + + QueryBuilder baseQuery; + SearchContext searchContext = createSearchContext(indexService); + StarTreeFieldConfiguration starTreeFieldConfiguration = new StarTreeFieldConfiguration( + 1, + Collections.emptySet(), + StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP + ); + + // Case 1: MatchAllQuery and non-nested metric aggregations is nested within keyword term aggregation, should use star tree + TermsAggregationBuilder termsAggregationBuilder = terms("term").field(KEYWORD_FIELD).subAggregation(maxAggNoSub); + baseQuery = new MatchAllQueryBuilder(); + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder().size(0).query(baseQuery).aggregation(termsAggregationBuilder); + + assertStarTreeContext( + request, + sourceBuilder, + getStarTreeQueryContext( + searchContext, + starTreeFieldConfiguration, + "startree1", + -1, + List.of(new NumericDimension(NUMERIC_FIELD), new OrdinalDimension(KEYWORD_FIELD)), + List.of(new Metric(FIELD_NAME, List.of(MetricStat.SUM, MetricStat.MAX))), + baseQuery, + sourceBuilder, + true + ), + -1 + ); + + // Case 2: MatchAllQuery and non-nested metric aggregations is nested within numeric term aggregation, should use star tree + termsAggregationBuilder = terms("term").field(NUMERIC_FIELD).subAggregation(maxAggNoSub); + sourceBuilder = new SearchSourceBuilder().size(0).query(new MatchAllQueryBuilder()).aggregation(termsAggregationBuilder); + assertStarTreeContext( + request, + sourceBuilder, + getStarTreeQueryContext( + searchContext, + starTreeFieldConfiguration, + "startree1", + -1, + List.of(new NumericDimension(NUMERIC_FIELD), new OrdinalDimension(KEYWORD_FIELD)), + List.of(new Metric(FIELD_NAME, List.of(MetricStat.SUM, MetricStat.MAX))), + baseQuery, + sourceBuilder, + true + ), + -1 + ); + + // Case 3: NumericTermsQuery and non-nested metric aggregations is nested within keyword term aggregation, should use star tree + termsAggregationBuilder = terms("term").field(KEYWORD_FIELD).subAggregation(maxAggNoSub); + baseQuery = new TermQueryBuilder(FIELD_NAME, 1); + sourceBuilder = new SearchSourceBuilder().size(0).query(baseQuery).aggregation(termsAggregationBuilder); + assertStarTreeContext( + request, + sourceBuilder, + getStarTreeQueryContext( + searchContext, + starTreeFieldConfiguration, + "startree1", + -1, + List.of(new NumericDimension(NUMERIC_FIELD), new OrdinalDimension(KEYWORD_FIELD), new NumericDimension(FIELD_NAME)), + List.of(new Metric(FIELD_NAME, List.of(MetricStat.SUM, MetricStat.MAX))), + baseQuery, + sourceBuilder, + true + ), + -1 + ); + + // Case 4: NumericTermsQuery and multiple non-nested metric aggregations is within numeric term aggregation, should use star tree + termsAggregationBuilder = terms("term").field(NUMERIC_FIELD).subAggregation(maxAggNoSub).subAggregation(sumAggNoSub); + sourceBuilder = new SearchSourceBuilder().size(0).query(new TermQueryBuilder(FIELD_NAME, 1)).aggregation(termsAggregationBuilder); + + assertStarTreeContext( + request, + sourceBuilder, + getStarTreeQueryContext( + searchContext, + starTreeFieldConfiguration, + "startree1", + -1, + List.of(new NumericDimension(NUMERIC_FIELD), new OrdinalDimension(KEYWORD_FIELD), new NumericDimension(FIELD_NAME)), + List.of(new Metric(FIELD_NAME, List.of(MetricStat.SUM, MetricStat.MAX))), + baseQuery, + sourceBuilder, + true + ), + -1 + ); + + // Case 5: Nested metric aggregations is nested within numeric term aggregation, should not use star tree + termsAggregationBuilder = terms("term").field(NUMERIC_FIELD).subAggregation(sumAggSub); + sourceBuilder = new SearchSourceBuilder().size(0).query(new TermQueryBuilder(FIELD_NAME, 1)).aggregation(termsAggregationBuilder); + assertStarTreeContext(request, sourceBuilder, null, -1); + + // Case 6: Unsupported aggregations is nested within numeric term aggregation, should not use star tree + termsAggregationBuilder = terms("term").field(NUMERIC_FIELD).subAggregation(medianAgg); + sourceBuilder = new SearchSourceBuilder().size(0).query(new TermQueryBuilder(FIELD_NAME, 1)).aggregation(termsAggregationBuilder); + assertStarTreeContext(request, sourceBuilder, null, -1); + + setStarTreeIndexSetting(null); + } + private void setStarTreeIndexSetting(String value) { client().admin() .cluster() diff --git a/server/src/test/java/org/opensearch/search/aggregations/startree/KeywordTermsAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/startree/KeywordTermsAggregatorTests.java new file mode 100644 index 0000000000000..9b1627e5b3fe5 --- /dev/null +++ b/server/src/test/java/org/opensearch/search/aggregations/startree/KeywordTermsAggregatorTests.java @@ -0,0 +1,239 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.aggregations.startree; + +import com.carrotsearch.randomizedtesting.RandomizedTest; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene101.Lucene101Codec; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SegmentReader; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.NumericUtils; +import org.opensearch.common.lucene.Lucene; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; +import org.opensearch.index.codec.composite.CompositeIndexReader; +import org.opensearch.index.codec.composite.composite101.Composite101Codec; +import org.opensearch.index.codec.composite912.datacube.startree.StarTreeDocValuesFormatTests; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.NumericDimension; +import org.opensearch.index.compositeindex.datacube.OrdinalDimension; +import org.opensearch.index.mapper.KeywordFieldMapper; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.search.aggregations.Aggregator; +import org.opensearch.search.aggregations.AggregatorTestCase; +import org.opensearch.search.aggregations.bucket.terms.InternalTerms; +import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; +import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Random; + +import static org.opensearch.search.aggregations.AggregationBuilders.avg; +import static org.opensearch.search.aggregations.AggregationBuilders.count; +import static org.opensearch.search.aggregations.AggregationBuilders.max; +import static org.opensearch.search.aggregations.AggregationBuilders.min; +import static org.opensearch.search.aggregations.AggregationBuilders.sum; +import static org.opensearch.search.aggregations.AggregationBuilders.terms; +import static org.opensearch.test.InternalAggregationTestCase.DEFAULT_MAX_BUCKETS; + +public class KeywordTermsAggregatorTests extends AggregatorTestCase { + final static String STATUS = "status"; + final static String SIZE = "size"; + final static String CLIENTIP = "clientip"; + private static final MappedFieldType STATUS_FIELD_TYPE = new NumberFieldMapper.NumberFieldType( + STATUS, + NumberFieldMapper.NumberType.LONG + ); + private static final MappedFieldType SIZE_FIELD_NAME = new NumberFieldMapper.NumberFieldType(SIZE, NumberFieldMapper.NumberType.FLOAT); + private static final MappedFieldType CLIENTIP_FIELD_NAME = new KeywordFieldMapper.KeywordFieldType(CLIENTIP); + + @Before + public void setup() { + FeatureFlags.initializeFeatureFlags(Settings.builder().put(FeatureFlags.STAR_TREE_INDEX, true).build()); + } + + @After + public void teardown() throws IOException { + FeatureFlags.initializeFeatureFlags(Settings.EMPTY); + } + + protected Codec getCodec() { + final Logger testLogger = LogManager.getLogger(KeywordTermsAggregatorTests.class); + MapperService mapperService; + try { + mapperService = StarTreeDocValuesFormatTests.createMapperService(NumericTermsAggregatorTests.getExpandedMapping(1, false)); + } catch (IOException e) { + throw new RuntimeException(e); + } + return new Composite101Codec(Lucene101Codec.Mode.BEST_SPEED, mapperService, testLogger); + } + + public void testStarTreeKeywordTerms() throws IOException { + Directory directory = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(null); + conf.setCodec(getCodec()); + conf.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf); + + Random random = RandomizedTest.getRandom(); + int totalDocs = 100; + + long val; + + List docs = new ArrayList<>(); + // Index 100 random documents + for (int i = 0; i < totalDocs; i++) { + Document doc = new Document(); + if (random.nextBoolean()) { + val = random.nextInt(10); // Random int between 0 and 9 for status + doc.add(new SortedNumericDocValuesField(STATUS, val)); + } + if (random.nextBoolean()) { + val = NumericUtils.doubleToSortableLong(random.nextInt(100) + 0.5f); + doc.add(new SortedNumericDocValuesField(SIZE, val)); + } + if (random.nextBoolean()) { + val = random.nextInt(10); // Random strings for int between 0 and 9 for clientip + doc.add(new SortedSetDocValuesField(CLIENTIP, new BytesRef(String.valueOf(val)))); + doc.add(new StringField(CLIENTIP, String.valueOf(val), Field.Store.NO)); + } + iw.addDocument(doc); + docs.add(doc); + } + + if (randomBoolean()) { + iw.forceMerge(1); + } + iw.close(); + DirectoryReader ir = DirectoryReader.open(directory); + LeafReaderContext context = ir.leaves().get(0); + + SegmentReader reader = Lucene.segmentReader(context.reader()); + IndexSearcher indexSearcher = newSearcher(wrapInMockESDirectoryReader(ir), false, false); + CompositeIndexReader starTreeDocValuesReader = (CompositeIndexReader) reader.getDocValuesReader(); + + List compositeIndexFields = starTreeDocValuesReader.getCompositeIndexFields(); + CompositeIndexFieldInfo starTree = compositeIndexFields.get(0); + + LinkedHashMap supportedDimensions = new LinkedHashMap<>(); + supportedDimensions.put(new NumericDimension(STATUS), STATUS_FIELD_TYPE); + supportedDimensions.put(new NumericDimension(SIZE), SIZE_FIELD_NAME); + supportedDimensions.put(new OrdinalDimension(CLIENTIP), CLIENTIP_FIELD_NAME); + + ValuesSourceAggregationBuilder[] aggBuilders = { + sum("_sum").field(SIZE), + max("_max").field(SIZE), + min("_min").field(SIZE), + count("_count").field(SIZE), + avg("_avg").field(SIZE) }; + + for (ValuesSourceAggregationBuilder aggregationBuilder : aggBuilders) { + Query query = new MatchAllDocsQuery(); + QueryBuilder queryBuilder = null; + + TermsAggregationBuilder termsAggregationBuilder = terms("terms_agg").field(CLIENTIP) + .subAggregation(aggregationBuilder) + .collectMode(Aggregator.SubAggCollectionMode.BREADTH_FIRST); + testCase(indexSearcher, query, queryBuilder, termsAggregationBuilder, starTree, supportedDimensions); + + // Numeric-terms query with keyword terms aggregation + for (int cases = 0; cases < 100; cases++) { + // query of status field + String queryField = STATUS; + long queryValue = random.nextInt(10); + query = SortedNumericDocValuesField.newSlowExactQuery(queryField, queryValue); + queryBuilder = new TermQueryBuilder(queryField, queryValue); + testCase(indexSearcher, query, queryBuilder, termsAggregationBuilder, starTree, supportedDimensions); + + // query on size field + queryField = SIZE; + queryValue = NumericUtils.floatToSortableInt(random.nextInt(20) - 14.5f); + query = SortedNumericDocValuesField.newSlowExactQuery(queryField, queryValue); + queryBuilder = new TermQueryBuilder(queryField, queryValue); + testCase(indexSearcher, query, queryBuilder, termsAggregationBuilder, starTree, supportedDimensions); + } + } + ir.close(); + directory.close(); + } + + private void testCase( + IndexSearcher indexSearcher, + Query query, + QueryBuilder queryBuilder, + TermsAggregationBuilder termsAggregationBuilder, + CompositeIndexFieldInfo starTree, + LinkedHashMap supportedDimensions + ) throws IOException { + InternalTerms starTreeAggregation = searchAndReduceStarTree( + createIndexSettings(), + indexSearcher, + query, + queryBuilder, + termsAggregationBuilder, + starTree, + supportedDimensions, + null, + DEFAULT_MAX_BUCKETS, + false, + null, + true, + STATUS_FIELD_TYPE, + SIZE_FIELD_NAME, + CLIENTIP_FIELD_NAME + ); + + InternalTerms defaultAggregation = searchAndReduceStarTree( + createIndexSettings(), + indexSearcher, + query, + queryBuilder, + termsAggregationBuilder, + null, + null, + null, + DEFAULT_MAX_BUCKETS, + false, + null, + false, + STATUS_FIELD_TYPE, + SIZE_FIELD_NAME, + CLIENTIP_FIELD_NAME + ); + + assertEquals(defaultAggregation.getBuckets().size(), starTreeAggregation.getBuckets().size()); + assertEquals(defaultAggregation.getBuckets(), starTreeAggregation.getBuckets()); + } +} diff --git a/server/src/test/java/org/opensearch/search/aggregations/startree/NumericTermsAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/startree/NumericTermsAggregatorTests.java new file mode 100644 index 0000000000000..a14d0a15aa31e --- /dev/null +++ b/server/src/test/java/org/opensearch/search/aggregations/startree/NumericTermsAggregatorTests.java @@ -0,0 +1,338 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.aggregations.startree; + +import com.carrotsearch.randomizedtesting.RandomizedTest; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene101.Lucene101Codec; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SegmentReader; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.util.NumericUtils; +import org.opensearch.common.lucene.Lucene; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; +import org.opensearch.index.codec.composite.CompositeIndexReader; +import org.opensearch.index.codec.composite.composite101.Composite101Codec; +import org.opensearch.index.codec.composite912.datacube.startree.StarTreeDocValuesFormatTests; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.NumericDimension; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.search.aggregations.AggregatorTestCase; +import org.opensearch.search.aggregations.bucket.terms.InternalTerms; +import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; +import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Random; + +import static org.opensearch.index.codec.composite912.datacube.startree.AbstractStarTreeDVFormatTests.topMapping; +import static org.opensearch.search.aggregations.AggregationBuilders.avg; +import static org.opensearch.search.aggregations.AggregationBuilders.count; +import static org.opensearch.search.aggregations.AggregationBuilders.max; +import static org.opensearch.search.aggregations.AggregationBuilders.min; +import static org.opensearch.search.aggregations.AggregationBuilders.sum; +import static org.opensearch.search.aggregations.AggregationBuilders.terms; +import static org.opensearch.test.InternalAggregationTestCase.DEFAULT_MAX_BUCKETS; + +public class NumericTermsAggregatorTests extends AggregatorTestCase { + final static String STATUS = "status"; + final static String SIZE = "size"; + private static final MappedFieldType STATUS_FIELD_TYPE = new NumberFieldMapper.NumberFieldType( + STATUS, + NumberFieldMapper.NumberType.LONG + ); + private static final MappedFieldType SIZE_FIELD_NAME = new NumberFieldMapper.NumberFieldType(SIZE, NumberFieldMapper.NumberType.FLOAT); + + @Before + public void setup() { + FeatureFlags.initializeFeatureFlags(Settings.builder().put(FeatureFlags.STAR_TREE_INDEX, true).build()); + } + + @After + public void teardown() throws IOException { + FeatureFlags.initializeFeatureFlags(Settings.EMPTY); + } + + protected Codec getCodec() { + final Logger testLogger = LogManager.getLogger(NumericTermsAggregatorTests.class); + MapperService mapperService; + try { + mapperService = StarTreeDocValuesFormatTests.createMapperService(getExpandedMapping(1, false)); + } catch (IOException e) { + throw new RuntimeException(e); + } + return new Composite101Codec(Lucene101Codec.Mode.BEST_SPEED, mapperService, testLogger); + } + + public void testStarTreeNumericTerms() throws IOException { + Directory directory = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(null); + conf.setCodec(getCodec()); + conf.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf); + + Random random = RandomizedTest.getRandom(); + int totalDocs = 100; + + long val; + + List docs = new ArrayList<>(); + // Index 100 random documents + for (int i = 0; i < totalDocs; i++) { + Document doc = new Document(); + if (random.nextBoolean()) { + val = random.nextInt(10); // Random int between (0 and 9) for status + doc.add(new SortedNumericDocValuesField(STATUS, val)); + } + if (random.nextBoolean()) { + val = NumericUtils.doubleToSortableLong(random.nextInt(100) + 0.5f); + // Random float between (0 and 99)+0.5f for size + doc.add(new SortedNumericDocValuesField(SIZE, val)); + } + iw.addDocument(doc); + docs.add(doc); + } + + if (randomBoolean()) { + iw.forceMerge(1); + } + iw.close(); + DirectoryReader ir = DirectoryReader.open(directory); + LeafReaderContext context = ir.leaves().get(0); + + SegmentReader reader = Lucene.segmentReader(context.reader()); + IndexSearcher indexSearcher = newSearcher(reader, false, false); + CompositeIndexReader starTreeDocValuesReader = (CompositeIndexReader) reader.getDocValuesReader(); + + List compositeIndexFields = starTreeDocValuesReader.getCompositeIndexFields(); + CompositeIndexFieldInfo starTree = compositeIndexFields.get(0); + + LinkedHashMap supportedDimensions = new LinkedHashMap<>(); + supportedDimensions.put(new NumericDimension(STATUS), STATUS_FIELD_TYPE); + supportedDimensions.put(new NumericDimension(SIZE), SIZE_FIELD_NAME); + + ValuesSourceAggregationBuilder[] aggBuilders = { + sum("_sum").field(SIZE), + max("_max").field(SIZE), + min("_min").field(SIZE), + count("_count").field(SIZE), + avg("_avg").field(SIZE) }; + + for (ValuesSourceAggregationBuilder aggregationBuilder : aggBuilders) { + Query query = new MatchAllDocsQuery(); + QueryBuilder queryBuilder = null; + + TermsAggregationBuilder termsAggregationBuilder = terms("terms_agg").field(STATUS).subAggregation(aggregationBuilder); + testCase(indexSearcher, query, queryBuilder, termsAggregationBuilder, starTree, supportedDimensions); + + // Numeric-terms query with numeric terms aggregation + for (int cases = 0; cases < 100; cases++) { + + // query of status field + String queryField = STATUS; + long queryValue = random.nextInt(10); + query = SortedNumericDocValuesField.newSlowExactQuery(queryField, queryValue); + queryBuilder = new TermQueryBuilder(queryField, queryValue); + testCase(indexSearcher, query, queryBuilder, termsAggregationBuilder, starTree, supportedDimensions); + + // query on size field + queryField = SIZE; + queryValue = NumericUtils.floatToSortableInt(random.nextInt(20) - 14.5f); + query = SortedNumericDocValuesField.newSlowExactQuery(queryField, queryValue); + queryBuilder = new TermQueryBuilder(queryField, queryValue); + testCase(indexSearcher, query, queryBuilder, termsAggregationBuilder, starTree, supportedDimensions); + } + } + + aggBuilders = new ValuesSourceAggregationBuilder[] { + sum("_sum").field(STATUS), + max("_max").field(STATUS), + min("_min").field(STATUS), + count("_count").field(STATUS), + avg("_avg").field(STATUS) }; + + for (ValuesSourceAggregationBuilder aggregationBuilder : aggBuilders) { + Query query = new MatchAllDocsQuery(); + QueryBuilder queryBuilder = null; + + TermsAggregationBuilder termsAggregationBuilder = terms("terms_agg").field(SIZE).subAggregation(aggregationBuilder); + testCase(indexSearcher, query, queryBuilder, termsAggregationBuilder, starTree, supportedDimensions); + } + + ir.close(); + directory.close(); + } + + private void testCase( + IndexSearcher indexSearcher, + Query query, + QueryBuilder queryBuilder, + TermsAggregationBuilder termsAggregationBuilder, + CompositeIndexFieldInfo starTree, + LinkedHashMap supportedDimensions + ) throws IOException { + InternalTerms starTreeAggregation = searchAndReduceStarTree( + createIndexSettings(), + indexSearcher, + query, + queryBuilder, + termsAggregationBuilder, + starTree, + supportedDimensions, + null, + DEFAULT_MAX_BUCKETS, + false, + null, + true, + STATUS_FIELD_TYPE, + SIZE_FIELD_NAME + ); + + InternalTerms defaultAggregation = searchAndReduceStarTree( + createIndexSettings(), + indexSearcher, + query, + queryBuilder, + termsAggregationBuilder, + null, + null, + null, + DEFAULT_MAX_BUCKETS, + false, + null, + false, + STATUS_FIELD_TYPE, + SIZE_FIELD_NAME + ); + + assertEquals(defaultAggregation.getBuckets().size(), starTreeAggregation.getBuckets().size()); + assertEquals(defaultAggregation.getBuckets(), starTreeAggregation.getBuckets()); + } + + public static XContentBuilder getExpandedMapping(int maxLeafDocs, boolean skipStarNodeCreationForStatusDimension) throws IOException { + return topMapping(b -> { + b.startObject("composite"); + b.startObject("startree1"); // Use the same name as the provided mapping + b.field("type", "star_tree"); + b.startObject("config"); + b.field("max_leaf_docs", maxLeafDocs); + if (skipStarNodeCreationForStatusDimension) { + b.startArray("skip_star_node_creation_for_dimensions"); + b.value("status"); // Skip for "status" dimension + b.endArray(); + } + b.startArray("ordered_dimensions"); + b.startObject(); + b.field("name", "status"); + b.endObject(); + b.startObject(); + b.field("name", "size"); + b.endObject(); + b.startObject(); + b.field("name", "clientip"); + b.endObject(); + b.startObject(); + b.field("name", "@timestamp"); + b.startArray("calendar_intervals"); + b.value("month"); + b.value("day"); + b.endArray(); + b.endObject(); + b.endArray(); + b.startArray("metrics"); + b.startObject(); + b.field("name", "size"); + b.startArray("stats"); + b.value("sum"); + b.value("value_count"); + b.value("min"); + b.value("max"); + b.endArray(); + b.endObject(); + b.startObject(); + b.field("name", "status"); + b.startArray("stats"); + b.value("sum"); + b.value("value_count"); + b.value("min"); + b.value("max"); + b.endArray(); + b.endObject(); + b.endArray(); + b.endObject(); + b.endObject(); + b.endObject(); + b.startObject("properties"); + b.startObject("@timestamp"); + b.field("type", "date"); + b.field("format", "strict_date_optional_time||epoch_second"); + b.endObject(); + b.startObject("message"); + b.field("type", "keyword"); + b.field("index", false); + b.field("doc_values", false); + b.endObject(); + b.startObject("clientip"); + b.field("type", "keyword"); + b.endObject(); + b.startObject("request"); + b.field("type", "text"); + b.startObject("fields"); + b.startObject("raw"); + b.field("type", "keyword"); + b.field("ignore_above", 256); + b.endObject(); + b.endObject(); + b.endObject(); + b.startObject("status"); + b.field("type", "integer"); + b.endObject(); + b.startObject("size"); + b.field("type", "float"); + b.endObject(); + b.startObject("geoip"); + b.startObject("properties"); + b.startObject("country_name"); + b.field("type", "keyword"); + b.endObject(); + b.startObject("city_name"); + b.field("type", "keyword"); + b.endObject(); + b.startObject("location"); + b.field("type", "geo_point"); + b.endObject(); + b.endObject(); + b.endObject(); + b.endObject(); + }); + } +} diff --git a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java index 78e3d4f50a0d5..2f6bb11d59ca6 100644 --- a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java @@ -168,6 +168,7 @@ import java.util.function.Function; import java.util.function.Supplier; import java.util.stream.Collectors; +import java.util.stream.Stream; import static java.util.Collections.emptyMap; import static java.util.Collections.singletonList; @@ -441,6 +442,8 @@ protected SearchContext createSearchContextWithStarTreeContext( searchContext.getQueryShardContext().setStarTreeQueryContext(starTreeQueryContext); } + Stream.of(fieldTypes).forEach(fieldType -> when(mapperService.fieldType(fieldType.name())).thenReturn(fieldType)); + return searchContext; }