Skip to content

Commit

Permalink
Improve performance of bitmap terms filtering (#16936)
Browse files Browse the repository at this point in the history
---------

Signed-off-by: bowenlan-amzn <[email protected]>
(cherry picked from commit ba0c4f3)
Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
  • Loading branch information
github-actions[bot] committed Jan 22, 2025
1 parent c2c15bb commit 72ac22b
Show file tree
Hide file tree
Showing 7 changed files with 586 additions and 80 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Introduce framework for auxiliary transports and an experimental gRPC transport plugin ([#16534](https://github.com/opensearch-project/OpenSearch/pull/16534))
- Support searching from doc_value using termQueryCaseInsensitive/termQuery in flat_object/keyword field([#16974](https://github.com/opensearch-project/OpenSearch/pull/16974/))
- Added a new `time` field to replace the deprecated `getTime` field in `GetStats`. ([#17009](https://github.com/opensearch-project/OpenSearch/pull/17009))
- Improve performance of the bitmap filtering([#16936](https://github.com/opensearch-project/OpenSearch/pull/16936/))

### Dependencies
- Bump `com.google.cloud:google-cloud-core-http` from 2.23.0 to 2.47.0 ([#16504](https://github.com/opensearch-project/OpenSearch/pull/16504))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,8 @@
import org.apache.lucene.sandbox.document.HalfFloatPoint;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.PointInSetQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
Expand All @@ -73,6 +71,7 @@
import org.opensearch.search.DocValueFormat;
import org.opensearch.search.lookup.SearchLookup;
import org.opensearch.search.query.BitmapDocValuesQuery;
import org.opensearch.search.query.BitmapIndexQuery;

import java.io.IOException;
import java.math.BigInteger;
Expand All @@ -81,7 +80,6 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
Expand Down Expand Up @@ -888,10 +886,10 @@ public Query bitmapQuery(String field, BytesArray bitmapArray, boolean isSearcha
}

if (isSearchable && hasDocValues) {
return new IndexOrDocValuesQuery(bitmapIndexQuery(field, bitmap), new BitmapDocValuesQuery(field, bitmap));
return new IndexOrDocValuesQuery(new BitmapIndexQuery(field, bitmap), new BitmapDocValuesQuery(field, bitmap));
}
if (isSearchable) {
return bitmapIndexQuery(field, bitmap);
return new BitmapIndexQuery(field, bitmap);
}
return new BitmapDocValuesQuery(field, bitmap);
}
Expand Down Expand Up @@ -1507,40 +1505,6 @@ public static Query unsignedLongRangeQuery(
}
return builder.apply(l, u);
}

static PointInSetQuery bitmapIndexQuery(String field, RoaringBitmap bitmap) {
final BytesRef encoded = new BytesRef(new byte[Integer.BYTES]);
return new PointInSetQuery(field, 1, Integer.BYTES, new PointInSetQuery.Stream() {

final Iterator<Integer> iterator = bitmap.iterator();

@Override
public BytesRef next() {
int value;
if (iterator.hasNext()) {
value = iterator.next();
} else {
return null;
}
IntPoint.encodeDimension(value, encoded.bytes, 0);
return encoded;
}
}) {
@Override
public Query rewrite(IndexSearcher indexSearcher) throws IOException {
if (bitmap.isEmpty()) {
return new MatchNoDocsQuery();
}
return super.rewrite(indexSearcher);
}

@Override
protected String toString(byte[] value) {
assert value.length == Integer.BYTES;
return Integer.toString(IntPoint.decodeDimension(value, 0));
}
};
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@

import org.roaringbitmap.RoaringBitmap;

import static org.opensearch.search.query.BitmapIndexQuery.checkArgs;

/**
* Filter with bitmap
* <p>
Expand All @@ -43,6 +45,7 @@ public class BitmapDocValuesQuery extends Query implements Accountable {
final long max;

public BitmapDocValuesQuery(String field, RoaringBitmap bitmap) {
checkArgs(field, bitmap);
this.field = field;
this.bitmap = bitmap;
if (!bitmap.isEmpty()) {
Expand Down Expand Up @@ -111,8 +114,7 @@ public boolean isCacheable(LeafReaderContext ctx) {

@Override
public String toString(String field) {
// bitmap may contain high cardinality, so choose to not show the actual values in it
return field + " cardinality: " + bitmap.getLongCardinality();
return "BitmapDocValuesQuery(field=" + this.field + ")";
}

@Override
Expand All @@ -139,8 +141,8 @@ public int hashCode() {

@Override
public long ramBytesUsed() {
return RamUsageEstimator.shallowSizeOfInstance(BitmapDocValuesQuery.class) + RamUsageEstimator.sizeOfObject(field)
+ RamUsageEstimator.sizeOfObject(bitmap);
return RamUsageEstimator.shallowSizeOfInstance(BitmapIndexQuery.class) + RamUsageEstimator.sizeOf(field) + bitmap
.getLongSizeInBytes();
}

@Override
Expand Down
Loading

0 comments on commit 72ac22b

Please sign in to comment.