From 36664edc5b125459413fb5acb6550e8f01983663 Mon Sep 17 00:00:00 2001 From: bowenlan-amzn Date: Wed, 2 Oct 2024 15:31:10 -0700 Subject: [PATCH 1/8] lazy load the score of point in set query Signed-off-by: bowenlan-amzn --- .../index/mapper/NumberFieldMapper.java | 104 ++++++++++++++---- 1 file changed, 82 insertions(+), 22 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java index 43e975f95757b..376686a7a89ce 100644 --- a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java @@ -42,15 +42,22 @@ import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.sandbox.document.BigIntegerPoint; import org.apache.lucene.sandbox.document.HalfFloatPoint; import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.ConstantScoreWeight; import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PointInSetQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.ScorerSupplier; +import org.apache.lucene.search.Weight; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.opensearch.common.Explicit; @@ -1508,36 +1515,89 @@ public static Query unsignedLongRangeQuery( return builder.apply(l, u); } - static PointInSetQuery bitmapIndexQuery(String field, RoaringBitmap bitmap) { - final BytesRef encoded = new BytesRef(new byte[Integer.BYTES]); - return new PointInSetQuery(field, 1, Integer.BYTES, new PointInSetQuery.Stream() { + static Query bitmapIndexQuery(String field, RoaringBitmap bitmap) { + return new Query() { - final Iterator iterator = bitmap.iterator(); + @Override + public String toString(String field) { + return ""; + } @Override - public BytesRef next() { - int value; - if (iterator.hasNext()) { - value = iterator.next(); - } else { - return null; - } - IntPoint.encodeDimension(value, encoded.bytes, 0); - return encoded; + public void visit(QueryVisitor visitor) { + } - }) { + @Override - public Query rewrite(IndexSearcher indexSearcher) throws IOException { - if (bitmap.isEmpty()) { - return new MatchNoDocsQuery(); - } - return super.rewrite(indexSearcher); + public boolean equals(Object obj) { + return false; } @Override - protected String toString(byte[] value) { - assert value.length == Integer.BYTES; - return Integer.toString(IntPoint.decodeDimension(value, 0)); + public int hashCode() { + return 0; + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) { + return new ConstantScoreWeight(this, boost) { + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + return scorerSupplier(context).get(Long.MAX_VALUE); + } + + @Override + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + return new ScorerSupplier() { + @Override + public Scorer get(long leadCost) throws IOException { + + final BytesRef encoded = new BytesRef(new byte[Integer.BYTES]); + Query query = new PointInSetQuery(field, 1, Integer.BYTES, new PointInSetQuery.Stream() { + + final Iterator iterator = bitmap.iterator(); + + @Override + public BytesRef next() { + int value; + if (iterator.hasNext()) { + value = iterator.next(); + } else { + return null; + } + IntPoint.encodeDimension(value, encoded.bytes, 0); + return encoded; + } + }) { + @Override + public Query rewrite(IndexSearcher indexSearcher) throws IOException { + if (bitmap.isEmpty()) { + return new MatchNoDocsQuery(); + } + return super.rewrite(indexSearcher); + } + + @Override + protected String toString(byte[] value) { + assert value.length == Integer.BYTES; + return Integer.toString(IntPoint.decodeDimension(value, 0)); + } + }; + return query.createWeight(searcher, scoreMode, boost).scorer(context); + } + + @Override + public long cost() { + return bitmap.getLongCardinality(); + } + }; + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + return false; + } + }; } }; } From 151eb77e90bb23249d59d078d3cff1526d706b1e Mon Sep 17 00:00:00 2001 From: bowenlan-amzn Date: Thu, 2 Jan 2025 10:14:12 -0800 Subject: [PATCH 2/8] implement bitmap set query Signed-off-by: bowenlan-amzn --- .../index/mapper/NumberFieldMapper.java | 9 +- .../search/query/BitmapDocValuesQuery.java | 3 +- .../search/query/BitmapIndexQuery.java | 266 ++++++++++++++++++ .../query/BitmapDocValuesQueryTests.java | 34 +-- .../search/query/BitmapIndexQueryTests.java | 141 ++++++++++ 5 files changed, 416 insertions(+), 37 deletions(-) create mode 100644 server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java create mode 100644 server/src/test/java/org/opensearch/search/query/BitmapIndexQueryTests.java diff --git a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java index 376686a7a89ce..b0f13059dd7af 100644 --- a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java @@ -97,6 +97,7 @@ import java.util.function.Function; import java.util.function.Supplier; +import org.opensearch.search.query.BitmapIndexQuery; import org.roaringbitmap.RoaringBitmap; /** @@ -895,10 +896,10 @@ public Query bitmapQuery(String field, BytesArray bitmapArray, boolean isSearcha } if (isSearchable && hasDocValues) { - return new IndexOrDocValuesQuery(bitmapIndexQuery(field, bitmap), new BitmapDocValuesQuery(field, bitmap)); + return new IndexOrDocValuesQuery(new BitmapIndexQuery(field, bitmap), new BitmapDocValuesQuery(field, bitmap)); } if (isSearchable) { - return bitmapIndexQuery(field, bitmap); + return new BitmapIndexQuery(field, bitmap); } return new BitmapDocValuesQuery(field, bitmap); } @@ -1551,12 +1552,9 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti return new ScorerSupplier() { @Override public Scorer get(long leadCost) throws IOException { - final BytesRef encoded = new BytesRef(new byte[Integer.BYTES]); Query query = new PointInSetQuery(field, 1, Integer.BYTES, new PointInSetQuery.Stream() { - final Iterator iterator = bitmap.iterator(); - @Override public BytesRef next() { int value; @@ -1583,6 +1581,7 @@ protected String toString(byte[] value) { return Integer.toString(IntPoint.decodeDimension(value, 0)); } }; + return query.createWeight(searcher, scoreMode, boost).scorer(context); } diff --git a/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java b/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java index dfa5fc4567f80..ee8d1641ec082 100644 --- a/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java +++ b/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java @@ -111,8 +111,7 @@ public boolean isCacheable(LeafReaderContext ctx) { @Override public String toString(String field) { - // bitmap may contain high cardinality, so choose to not show the actual values in it - return field + " cardinality: " + bitmap.getLongCardinality(); + return "BitmapDocValuesQuery(field=" + field + ")"; } @Override diff --git a/server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java b/server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java new file mode 100644 index 0000000000000..f403764bbe734 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java @@ -0,0 +1,266 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.query; + +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PointValues; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.ScorerSupplier; +import org.apache.lucene.search.Weight; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefIterator; +import org.apache.lucene.util.DocIdSetBuilder; +import org.apache.lucene.util.RamUsageEstimator; +import org.roaringbitmap.RoaringBitmap; + +import java.io.IOException; +import java.util.Iterator; +import java.util.Objects; + +/** + * A query that matches all documents that contain a set of integer numbers represented by bitmap + * + * @opensearch.internal + */ +public class BitmapIndexQuery extends Query implements Accountable { + + private final RoaringBitmap bitmap; + private final String field; + + public BitmapIndexQuery(String field, RoaringBitmap bitmap) { + this.bitmap = bitmap; + this.field = field; + } + + private static BytesRefIterator bitmapEncodedIterator(RoaringBitmap bitmap) { + return new BytesRefIterator() { + private final Iterator iterator = bitmap.iterator(); + private final BytesRef encoded = new BytesRef(new byte[Integer.BYTES]); + + @Override + public BytesRef next() { + int value; + if (iterator.hasNext()) { + value = iterator.next(); + } else { + return null; + } + IntPoint.encodeDimension(value, encoded.bytes, 0); + return encoded; + } + }; + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return new ConstantScoreWeight(this, boost) { + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context); + if (scorerSupplier == null) { + return null; + } + return scorerSupplier.get(Long.MAX_VALUE); + } + + @Override + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + final Weight weight = this; + LeafReader reader = context.reader(); + // get point value + // only works for one dimension + PointValues values = reader.getPointValues(field); + if (values == null) { + return null; + } + if (values.getNumIndexDimensions() != 1) { + throw new IllegalArgumentException("field must have only one dimension"); + } + + return new ScorerSupplier() { + long cost = -1; // calculate lazily, and only once + + @Override + public Scorer get(long leadCost) throws IOException { + DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field); + MergePointVisitor visitor = new MergePointVisitor(result); + values.intersect(visitor); + return new ConstantScoreScorer(weight, score(), scoreMode, result.build().iterator()); + } + + @Override + public long cost() { + if (cost == -1) { + cost = bitmap.getLongCardinality(); + } + return cost; + } + }; + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + // This query depend only on segment-immutable structure points + return true; + } + }; + } + + private class MergePointVisitor implements PointValues.IntersectVisitor { + private final DocIdSetBuilder result; + private final BytesRefIterator iterator; + private BytesRef nextQueryPoint; + private final ArrayUtil.ByteArrayComparator comparator; + private DocIdSetBuilder.BulkAdder adder; + + public MergePointVisitor(DocIdSetBuilder result) + throws IOException { + this.result = result; + this.comparator = ArrayUtil.getUnsignedComparator(Integer.BYTES); + this.iterator = bitmapEncodedIterator(bitmap); + nextQueryPoint = iterator.next(); + } + + @Override + public void grow(int count) { + adder = result.grow(count); + } + + @Override + public void visit(int docID) { + adder.add(docID); + } + + @Override + public void visit(DocIdSetIterator iterator) throws IOException { + adder.add(iterator); + } + + @Override + public void visit(int docID, byte[] packedValue) { + if (matches(packedValue)) { + visit(docID); + } + } + + @Override + public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { + if (matches(packedValue)) { + adder.add(iterator); + } + } + + private boolean matches(byte[] packedValue) { + while (nextQueryPoint != null) { + int cmp = comparator.compare(nextQueryPoint.bytes, nextQueryPoint.offset, packedValue, 0); + if (cmp == 0) { + return true; + } else if (cmp < 0) { + // Query point is before index point, so we move to next query point + try { + nextQueryPoint = iterator.next(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } else { + // Query point is after index point, so we don't collect and we return: + break; + } + } + return false; + } + + @Override + public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + while (nextQueryPoint != null) { + int cmpMin = + comparator.compare(nextQueryPoint.bytes, nextQueryPoint.offset, minPackedValue, 0); + if (cmpMin < 0) { + // query point is before the start of this cell + try { + nextQueryPoint = iterator.next(); + } catch (IOException e) { + throw new RuntimeException(e); + } + continue; + } + int cmpMax = + comparator.compare(nextQueryPoint.bytes, nextQueryPoint.offset, maxPackedValue, 0); + if (cmpMax > 0) { + // query point is after the end of this cell + return PointValues.Relation.CELL_OUTSIDE_QUERY; + } + + if (cmpMin == 0 && cmpMax == 0) { + // NOTE: we only hit this if we are on a cell whose min and max values are exactly equal + // to our point, + // which can easily happen if many (> 512) docs share this one value + return PointValues.Relation.CELL_INSIDE_QUERY; + } else { + return PointValues.Relation.CELL_CROSSES_QUERY; + } + } + + // We exhausted all points in the query: + return PointValues.Relation.CELL_OUTSIDE_QUERY; + } + } + + @Override + public Query rewrite(IndexSearcher indexSearcher) throws IOException { + if (bitmap.isEmpty()) { + return new MatchNoDocsQuery(); + } + return super.rewrite(indexSearcher); + } + + @Override + public String toString(String field) { + return "BitmapIndexQuery(field=" + field + ")"; + } + + @Override + public void visit(QueryVisitor visitor) { + if (visitor.acceptField(field)) { + visitor.visitLeaf(this); + } + } + + @Override + public boolean equals(Object other) { + if (sameClassAs(other) == false) { + return false; + } + BitmapIndexQuery that = (BitmapIndexQuery) other; + return field.equals(that.field) && bitmap.equals(that.bitmap); + } + + @Override + public int hashCode() { + return Objects.hash(classHash(), field, bitmap); + } + + @Override + public long ramBytesUsed() { + return RamUsageEstimator.shallowSizeOfInstance(BitmapIndexQuery.class) + RamUsageEstimator.sizeOfObject(field) + + RamUsageEstimator.sizeOfObject(bitmap); + } +} diff --git a/server/src/test/java/org/opensearch/search/query/BitmapDocValuesQueryTests.java b/server/src/test/java/org/opensearch/search/query/BitmapDocValuesQueryTests.java index 6e293d1ec69fd..d3e43e5f63979 100644 --- a/server/src/test/java/org/opensearch/search/query/BitmapDocValuesQueryTests.java +++ b/server/src/test/java/org/opensearch/search/query/BitmapDocValuesQueryTests.java @@ -34,6 +34,8 @@ import org.roaringbitmap.RoaringBitmap; +import static org.opensearch.search.query.BitmapIndexQueryTests.getMatchingValues; + public class BitmapDocValuesQueryTests extends OpenSearchTestCase { private Directory dir; private IndexWriter w; @@ -81,21 +83,7 @@ public void testScore() throws IOException { Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f); - List actual = new LinkedList<>(); - for (LeafReaderContext leaf : searcher.getIndexReader().leaves()) { - // use doc values to get the actual value of the matching docs and assert - // cannot directly check the docId because test can randomize segment numbers - SortedNumericDocValues dv = DocValues.getSortedNumeric(leaf.reader(), "product_id"); - Scorer scorer = weight.scorer(leaf); - DocIdSetIterator disi = scorer.iterator(); - int docId; - while ((docId = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { - dv.advanceExact(docId); - for (int count = 0; count < dv.docValueCount(); ++count) { - actual.add((int) dv.nextValue()); - } - } - } + List actual = getMatchingValues(weight, searcher.getIndexReader()); List expected = List.of(1, 4); assertEquals(expected, actual); } @@ -128,21 +116,7 @@ public void testScoreMutilValues() throws IOException { Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f); - Set actual = new HashSet<>(); - for (LeafReaderContext leaf : searcher.getIndexReader().leaves()) { - // use doc values to get the actual value of the matching docs and assert - // cannot directly check the docId because test can randomize segment numbers - SortedNumericDocValues dv = DocValues.getSortedNumeric(leaf.reader(), "product_id"); - Scorer scorer = weight.scorer(leaf); - DocIdSetIterator disi = scorer.iterator(); - int docId; - while ((docId = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { - dv.advanceExact(docId); - for (int count = 0; count < dv.docValueCount(); ++count) { - actual.add((int) dv.nextValue()); - } - } - } + Set actual = new HashSet<>(getMatchingValues(weight, searcher.getIndexReader())); Set expected = Set.of(2, 3); assertEquals(expected, actual); } diff --git a/server/src/test/java/org/opensearch/search/query/BitmapIndexQueryTests.java b/server/src/test/java/org/opensearch/search/query/BitmapIndexQueryTests.java new file mode 100644 index 0000000000000..13e29f78a1d5f --- /dev/null +++ b/server/src/test/java/org/opensearch/search/query/BitmapIndexQueryTests.java @@ -0,0 +1,141 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.query; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.IntField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; +import org.apache.lucene.store.Directory; +import org.junit.After; +import org.junit.Before; +import org.opensearch.test.OpenSearchTestCase; +import org.roaringbitmap.RoaringBitmap; + +import java.io.IOException; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; + +public class BitmapIndexQueryTests extends OpenSearchTestCase { + private Directory dir; + private IndexWriter w; + private DirectoryReader reader; + private IndexSearcher searcher; + + @Before + public void initSearcher() throws IOException { + dir = newDirectory(); + w = new IndexWriter(dir, newIndexWriterConfig()); + } + + @After + public void closeAllTheReaders() throws IOException { + reader.close(); + w.close(); + dir.close(); + } + + public void testScore() throws IOException { + Document d = new Document(); + d.add(new IntField("product_id", 1, Field.Store.NO)); + w.addDocument(d); + + d = new Document(); + d.add(new IntField("product_id", 2, Field.Store.NO)); + w.addDocument(d); + + d = new Document(); + d.add(new IntField("product_id", 3, Field.Store.NO)); + w.addDocument(d); + + d = new Document(); + d.add(new IntField("product_id", 4, Field.Store.NO)); + w.addDocument(d); + + w.commit(); + reader = DirectoryReader.open(w); + searcher = newSearcher(reader); + + RoaringBitmap bitmap = new RoaringBitmap(); + bitmap.add(1); + bitmap.add(4); + BitmapIndexQuery query = new BitmapIndexQuery("product_id", bitmap); + + Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f); + + List actual = getMatchingValues(weight, searcher.getIndexReader()); + List expected = List.of(1, 4); + assertEquals(expected, actual); + } + + static List getMatchingValues(Weight weight, IndexReader reader) throws IOException { + List actual = new LinkedList<>(); + for (LeafReaderContext leaf : reader.leaves()) { + // use doc values to get the actual value of the matching docs and assert + // cannot directly check the docId because test can randomize segment numbers + SortedNumericDocValues dv = DocValues.getSortedNumeric(leaf.reader(), "product_id"); + Scorer scorer = weight.scorer(leaf); + DocIdSetIterator disi = scorer.iterator(); + int docId; + while ((docId = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + dv.advanceExact(docId); + for (int count = 0; count < dv.docValueCount(); ++count) { + actual.add((int) dv.nextValue()); + } + } + } + return actual; + } + + public void testScoreMutilValues() throws IOException { + Document d = new Document(); + d.add(new IntField("product_id", 1, Field.Store.NO)); + w.addDocument(d); + + d = new Document(); + d.add(new IntField("product_id", 2, Field.Store.NO)); + d.add(new IntField("product_id", 3, Field.Store.NO)); + w.addDocument(d); + + d = new Document(); + d.add(new IntField("product_id", 3, Field.Store.NO)); + w.addDocument(d); + + d = new Document(); + d.add(new IntField("product_id", 4, Field.Store.NO)); + w.addDocument(d); + + w.commit(); + reader = DirectoryReader.open(w); + searcher = newSearcher(reader); + + RoaringBitmap bitmap = new RoaringBitmap(); + bitmap.add(3); + BitmapIndexQuery query = new BitmapIndexQuery("product_id", bitmap); + + Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f); + + Set actual = new HashSet<>(getMatchingValues(weight, searcher.getIndexReader())); + Set expected = Set.of(2, 3); + assertEquals(expected, actual); + } + +} From 462887117ff2996cf7b6b57883581323ae831f61 Mon Sep 17 00:00:00 2001 From: bowenlan-amzn Date: Tue, 7 Jan 2025 10:05:40 -0800 Subject: [PATCH 3/8] Use iterator that can advance, add random value unit test Signed-off-by: bowenlan-amzn --- .../index/mapper/NumberFieldMapper.java | 3 +- .../search/query/BitmapIndexQuery.java | 58 ++++++++++--------- .../query/BitmapDocValuesQueryTests.java | 6 -- .../search/query/BitmapIndexQueryTests.java | 49 ++++++++++++++-- 4 files changed, 77 insertions(+), 39 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java index b0f13059dd7af..425c5c5efe50b 100644 --- a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java @@ -80,6 +80,7 @@ import org.opensearch.search.DocValueFormat; import org.opensearch.search.lookup.SearchLookup; import org.opensearch.search.query.BitmapDocValuesQuery; +import org.opensearch.search.query.BitmapIndexQuery; import java.io.IOException; import java.math.BigInteger; @@ -97,7 +98,6 @@ import java.util.function.Function; import java.util.function.Supplier; -import org.opensearch.search.query.BitmapIndexQuery; import org.roaringbitmap.RoaringBitmap; /** @@ -1555,6 +1555,7 @@ public Scorer get(long leadCost) throws IOException { final BytesRef encoded = new BytesRef(new byte[Integer.BYTES]); Query query = new PointInSetQuery(field, 1, Integer.BYTES, new PointInSetQuery.Stream() { final Iterator iterator = bitmap.iterator(); + @Override public BytesRef next() { int value; diff --git a/server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java b/server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java index f403764bbe734..9aa355541f291 100644 --- a/server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java +++ b/server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java @@ -29,12 +29,13 @@ import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.DocIdSetBuilder; import org.apache.lucene.util.RamUsageEstimator; -import org.roaringbitmap.RoaringBitmap; import java.io.IOException; -import java.util.Iterator; import java.util.Objects; +import org.roaringbitmap.PeekableIntIterator; +import org.roaringbitmap.RoaringBitmap; + /** * A query that matches all documents that contain a set of integer numbers represented by bitmap * @@ -50,12 +51,19 @@ public BitmapIndexQuery(String field, RoaringBitmap bitmap) { this.field = field; } - private static BytesRefIterator bitmapEncodedIterator(RoaringBitmap bitmap) { - return new BytesRefIterator() { - private final Iterator iterator = bitmap.iterator(); + interface BitmapIterator extends BytesRefIterator { + // wrap IntIterator.next() + BytesRef next(); + + // expose PeekableIntIterator.advanceIfNeeded, advance as long as the next value is smaller than target + void advance(byte[] target); + } + + private static BitmapIterator bitmapEncodedIterator(RoaringBitmap bitmap) { + return new BitmapIterator() { + private final PeekableIntIterator iterator = bitmap.getIntIterator(); private final BytesRef encoded = new BytesRef(new byte[Integer.BYTES]); - @Override public BytesRef next() { int value; if (iterator.hasNext()) { @@ -66,6 +74,10 @@ public BytesRef next() { IntPoint.encodeDimension(value, encoded.bytes, 0); return encoded; } + + public void advance(byte[] target) { + iterator.advanceIfNeeded(IntPoint.decodeDimension(target, 0)); + } }; } @@ -85,8 +97,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { final Weight weight = this; LeafReader reader = context.reader(); - // get point value - // only works for one dimension + // get the point value which should be one dimension, since bitmap saves integers PointValues values = reader.getPointValues(field); if (values == null) { return null; @@ -118,7 +129,7 @@ public long cost() { @Override public boolean isCacheable(LeafReaderContext ctx) { - // This query depend only on segment-immutable structure points + // This query depend only on segment-immutable structure — points return true; } }; @@ -126,13 +137,12 @@ public boolean isCacheable(LeafReaderContext ctx) { private class MergePointVisitor implements PointValues.IntersectVisitor { private final DocIdSetBuilder result; - private final BytesRefIterator iterator; + private final BitmapIterator iterator; private BytesRef nextQueryPoint; private final ArrayUtil.ByteArrayComparator comparator; private DocIdSetBuilder.BulkAdder adder; - public MergePointVisitor(DocIdSetBuilder result) - throws IOException { + public MergePointVisitor(DocIdSetBuilder result) throws IOException { this.result = result; this.comparator = ArrayUtil.getUnsignedComparator(Integer.BYTES); this.iterator = bitmapEncodedIterator(bitmap); @@ -175,11 +185,8 @@ private boolean matches(byte[] packedValue) { return true; } else if (cmp < 0) { // Query point is before index point, so we move to next query point - try { - nextQueryPoint = iterator.next(); - } catch (IOException e) { - throw new RuntimeException(e); - } + iterator.advance(packedValue); + nextQueryPoint = iterator.next(); } else { // Query point is after index point, so we don't collect and we return: break; @@ -191,19 +198,14 @@ private boolean matches(byte[] packedValue) { @Override public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { while (nextQueryPoint != null) { - int cmpMin = - comparator.compare(nextQueryPoint.bytes, nextQueryPoint.offset, minPackedValue, 0); + int cmpMin = comparator.compare(nextQueryPoint.bytes, nextQueryPoint.offset, minPackedValue, 0); if (cmpMin < 0) { // query point is before the start of this cell - try { - nextQueryPoint = iterator.next(); - } catch (IOException e) { - throw new RuntimeException(e); - } + iterator.advance(minPackedValue); + nextQueryPoint = iterator.next(); continue; } - int cmpMax = - comparator.compare(nextQueryPoint.bytes, nextQueryPoint.offset, maxPackedValue, 0); + int cmpMax = comparator.compare(nextQueryPoint.bytes, nextQueryPoint.offset, maxPackedValue, 0); if (cmpMax > 0) { // query point is after the end of this cell return PointValues.Relation.CELL_OUTSIDE_QUERY; @@ -260,7 +262,7 @@ public int hashCode() { @Override public long ramBytesUsed() { - return RamUsageEstimator.shallowSizeOfInstance(BitmapIndexQuery.class) + RamUsageEstimator.sizeOfObject(field) - + RamUsageEstimator.sizeOfObject(bitmap); + return RamUsageEstimator.shallowSizeOfInstance(BitmapIndexQuery.class) + RamUsageEstimator.sizeOfObject(field) + RamUsageEstimator + .sizeOfObject(bitmap); } } diff --git a/server/src/test/java/org/opensearch/search/query/BitmapDocValuesQueryTests.java b/server/src/test/java/org/opensearch/search/query/BitmapDocValuesQueryTests.java index d3e43e5f63979..d103b335588bc 100644 --- a/server/src/test/java/org/opensearch/search/query/BitmapDocValuesQueryTests.java +++ b/server/src/test/java/org/opensearch/search/query/BitmapDocValuesQueryTests.java @@ -12,14 +12,9 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.IntField; import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.SortedNumericDocValues; -import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; import org.apache.lucene.store.Directory; import org.opensearch.test.OpenSearchTestCase; @@ -28,7 +23,6 @@ import java.io.IOException; import java.util.HashSet; -import java.util.LinkedList; import java.util.List; import java.util.Set; diff --git a/server/src/test/java/org/opensearch/search/query/BitmapIndexQueryTests.java b/server/src/test/java/org/opensearch/search/query/BitmapIndexQueryTests.java index 13e29f78a1d5f..b5f7e35e92520 100644 --- a/server/src/test/java/org/opensearch/search/query/BitmapIndexQueryTests.java +++ b/server/src/test/java/org/opensearch/search/query/BitmapIndexQueryTests.java @@ -23,17 +23,22 @@ import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; import org.apache.lucene.store.Directory; +import org.opensearch.common.Randomness; +import org.opensearch.test.OpenSearchTestCase; import org.junit.After; import org.junit.Before; -import org.opensearch.test.OpenSearchTestCase; -import org.roaringbitmap.RoaringBitmap; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; import java.util.HashSet; import java.util.LinkedList; import java.util.List; +import java.util.Random; import java.util.Set; +import org.roaringbitmap.RoaringBitmap; + public class BitmapIndexQueryTests extends OpenSearchTestCase { private Directory dir; private IndexWriter w; @@ -86,11 +91,11 @@ public void testScore() throws IOException { assertEquals(expected, actual); } + // use doc values to get the actual value of the matching docs + // cannot directly check the docId because test can randomize segment numbers static List getMatchingValues(Weight weight, IndexReader reader) throws IOException { List actual = new LinkedList<>(); for (LeafReaderContext leaf : reader.leaves()) { - // use doc values to get the actual value of the matching docs and assert - // cannot directly check the docId because test can randomize segment numbers SortedNumericDocValues dv = DocValues.getSortedNumeric(leaf.reader(), "product_id"); Scorer scorer = weight.scorer(leaf); DocIdSetIterator disi = scorer.iterator(); @@ -138,4 +143,40 @@ public void testScoreMutilValues() throws IOException { assertEquals(expected, actual); } + public void testRandomDocumentsAndQueries() throws IOException { + Random random = Randomness.get(); + int valueRange = 10_000; // the range of query values should be within indexed values + + for (int i = 0; i < valueRange + 1; i++) { + Document d = new Document(); + d.add(new IntField("product_id", i, Field.Store.NO)); + w.addDocument(d); + } + + w.commit(); + reader = DirectoryReader.open(w); + searcher = newSearcher(reader); + + // Generate random values for bitmap query + Set queryValues = new HashSet<>(); + int numberOfValues = 5; + for (int i = 0; i < numberOfValues; i++) { + int value = random.nextInt(valueRange) + 1; + queryValues.add(value); + } + RoaringBitmap bitmap = new RoaringBitmap(); + bitmap.add(queryValues.stream().mapToInt(Integer::intValue).toArray()); + + BitmapIndexQuery query = new BitmapIndexQuery("product_id", bitmap); + Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f); + + Set actualSet = new HashSet<>(getMatchingValues(weight, searcher.getIndexReader())); + + List expected = new ArrayList<>(queryValues); + Collections.sort(expected); + List actual = new ArrayList<>(actualSet); + Collections.sort(actual); + assertEquals(expected, actual); + } + } From eee3eb72907136ffc2c1a5974ad1949707c6f9ec Mon Sep 17 00:00:00 2001 From: bowenlan-amzn Date: Tue, 7 Jan 2025 11:40:42 -0800 Subject: [PATCH 4/8] fix ut Signed-off-by: bowenlan-amzn --- .../java/org/opensearch/index/mapper/NumberFieldTypeTests.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java index c06371bed9357..c08c6d3821645 100644 --- a/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java @@ -76,6 +76,7 @@ import org.opensearch.search.DocValueFormat; import org.opensearch.search.MultiValueMode; import org.opensearch.search.query.BitmapDocValuesQuery; +import org.opensearch.search.query.BitmapIndexQuery; import org.junit.Before; import java.io.ByteArrayInputStream; @@ -962,7 +963,7 @@ public void testBitmapQuery() throws IOException { NumberFieldType ft = new NumberFieldMapper.NumberFieldType("field", NumberType.INTEGER); assertEquals( - new IndexOrDocValuesQuery(NumberType.bitmapIndexQuery("field", r), new BitmapDocValuesQuery("field", r)), + new IndexOrDocValuesQuery(new BitmapIndexQuery("field", r), new BitmapDocValuesQuery("field", r)), ft.bitmapQuery(bitmap) ); From 1f378a74a5c0e363571f845c2932de8ca296e7d0 Mon Sep 17 00:00:00 2001 From: bowenlan-amzn Date: Tue, 7 Jan 2025 22:26:43 -0800 Subject: [PATCH 5/8] update Signed-off-by: bowenlan-amzn --- .../index/mapper/NumberFieldMapper.java | 96 ------------------- .../search/query/BitmapDocValuesQuery.java | 3 + .../search/query/BitmapIndexQuery.java | 10 ++ 3 files changed, 13 insertions(+), 96 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java index 425c5c5efe50b..702e5db50e841 100644 --- a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java @@ -42,22 +42,13 @@ import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.sandbox.document.BigIntegerPoint; import org.apache.lucene.sandbox.document.HalfFloatPoint; import org.apache.lucene.search.BoostQuery; -import org.apache.lucene.search.ConstantScoreWeight; import org.apache.lucene.search.IndexOrDocValuesQuery; -import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.MatchNoDocsQuery; -import org.apache.lucene.search.PointInSetQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.QueryVisitor; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.ScorerSupplier; -import org.apache.lucene.search.Weight; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.opensearch.common.Explicit; @@ -89,7 +80,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; @@ -1515,92 +1505,6 @@ public static Query unsignedLongRangeQuery( } return builder.apply(l, u); } - - static Query bitmapIndexQuery(String field, RoaringBitmap bitmap) { - return new Query() { - - @Override - public String toString(String field) { - return ""; - } - - @Override - public void visit(QueryVisitor visitor) { - - } - - @Override - public boolean equals(Object obj) { - return false; - } - - @Override - public int hashCode() { - return 0; - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) { - return new ConstantScoreWeight(this, boost) { - @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - return scorerSupplier(context).get(Long.MAX_VALUE); - } - - @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { - return new ScorerSupplier() { - @Override - public Scorer get(long leadCost) throws IOException { - final BytesRef encoded = new BytesRef(new byte[Integer.BYTES]); - Query query = new PointInSetQuery(field, 1, Integer.BYTES, new PointInSetQuery.Stream() { - final Iterator iterator = bitmap.iterator(); - - @Override - public BytesRef next() { - int value; - if (iterator.hasNext()) { - value = iterator.next(); - } else { - return null; - } - IntPoint.encodeDimension(value, encoded.bytes, 0); - return encoded; - } - }) { - @Override - public Query rewrite(IndexSearcher indexSearcher) throws IOException { - if (bitmap.isEmpty()) { - return new MatchNoDocsQuery(); - } - return super.rewrite(indexSearcher); - } - - @Override - protected String toString(byte[] value) { - assert value.length == Integer.BYTES; - return Integer.toString(IntPoint.decodeDimension(value, 0)); - } - }; - - return query.createWeight(searcher, scoreMode, boost).scorer(context); - } - - @Override - public long cost() { - return bitmap.getLongCardinality(); - } - }; - } - - @Override - public boolean isCacheable(LeafReaderContext ctx) { - return false; - } - }; - } - }; - } } /** diff --git a/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java b/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java index ee8d1641ec082..05cf6791d066c 100644 --- a/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java +++ b/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java @@ -30,6 +30,8 @@ import org.roaringbitmap.RoaringBitmap; +import static org.opensearch.search.query.BitmapIndexQuery.checkArgs; + /** * Filter with bitmap *

@@ -43,6 +45,7 @@ public class BitmapDocValuesQuery extends Query implements Accountable { final long max; public BitmapDocValuesQuery(String field, RoaringBitmap bitmap) { + checkArgs(field, bitmap); this.field = field; this.bitmap = bitmap; if (!bitmap.isEmpty()) { diff --git a/server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java b/server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java index 9aa355541f291..b28b05de4792b 100644 --- a/server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java +++ b/server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java @@ -47,10 +47,20 @@ public class BitmapIndexQuery extends Query implements Accountable { private final String field; public BitmapIndexQuery(String field, RoaringBitmap bitmap) { + checkArgs(field, bitmap); this.bitmap = bitmap; this.field = field; } + static void checkArgs(String field, RoaringBitmap bitmap) { + if (field == null) { + throw new IllegalArgumentException("field must not be null"); + } + if (bitmap == null) { + throw new IllegalArgumentException("bitmap must not be null"); + } + } + interface BitmapIterator extends BytesRefIterator { // wrap IntIterator.next() BytesRef next(); From 56ead5916c7895de4d6db0ba0425c454e29d1c87 Mon Sep 17 00:00:00 2001 From: bowenlan-amzn Date: Mon, 13 Jan 2025 09:38:38 -0800 Subject: [PATCH 6/8] update the cost estimation Signed-off-by: bowenlan-amzn --- .../search/query/BitmapDocValuesQuery.java | 2 +- .../search/query/BitmapIndexQuery.java | 16 +++++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java b/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java index 05cf6791d066c..6dce33b76b0a4 100644 --- a/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java +++ b/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java @@ -114,7 +114,7 @@ public boolean isCacheable(LeafReaderContext ctx) { @Override public String toString(String field) { - return "BitmapDocValuesQuery(field=" + field + ")"; + return "BitmapDocValuesQuery(field=" + this.field + ")"; } @Override diff --git a/server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java b/server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java index b28b05de4792b..62ccc50ca318d 100644 --- a/server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java +++ b/server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java @@ -94,6 +94,9 @@ public void advance(byte[] target) { @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { + // get cardinality is not cheap enough to do when supplying scorers, so do it once per weight + final long cardinality = bitmap.getLongCardinality(); + @Override public Scorer scorer(LeafReaderContext context) throws IOException { ScorerSupplier scorerSupplier = scorerSupplier(context); @@ -117,12 +120,13 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti } return new ScorerSupplier() { - long cost = -1; // calculate lazily, and only once + long cost = -1; + + final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field); + final MergePointVisitor visitor = new MergePointVisitor(result); @Override public Scorer get(long leadCost) throws IOException { - DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field); - MergePointVisitor visitor = new MergePointVisitor(result); values.intersect(visitor); return new ConstantScoreScorer(weight, score(), scoreMode, result.build().iterator()); } @@ -130,7 +134,9 @@ public Scorer get(long leadCost) throws IOException { @Override public long cost() { if (cost == -1) { - cost = bitmap.getLongCardinality(); + // rough estimate of the cost, 20 times penalty is based on the experiment results + // details in https://github.com/opensearch-project/OpenSearch/pull/16936 + cost = cardinality * 20; } return cost; } @@ -246,7 +252,7 @@ public Query rewrite(IndexSearcher indexSearcher) throws IOException { @Override public String toString(String field) { - return "BitmapIndexQuery(field=" + field + ")"; + return "BitmapIndexQuery(field=" + this.field + ")"; } @Override From c7fd161c234f5a1c532074eaae6eaf31e8f075eb Mon Sep 17 00:00:00 2001 From: bowenlan-amzn Date: Mon, 13 Jan 2025 11:54:14 -0800 Subject: [PATCH 7/8] Add changelog Signed-off-by: bowenlan-amzn --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c9d7d9a60a3e5..3d8a5d63579a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Support object fields in star-tree index([#16728](https://github.com/opensearch-project/OpenSearch/pull/16728/)) - Support searching from doc_value using termQueryCaseInsensitive/termQuery in flat_object/keyword field([#16974](https://github.com/opensearch-project/OpenSearch/pull/16974/)) - Added a new `time` field to replace the deprecated `getTime` field in `GetStats`. ([#17009](https://github.com/opensearch-project/OpenSearch/pull/17009)) +- Improve performance of the bitmap filtering([#16936](https://github.com/opensearch-project/OpenSearch/pull/16936/)) ### Dependencies - Bump `com.google.cloud:google-cloud-core-http` from 2.23.0 to 2.47.0 ([#16504](https://github.com/opensearch-project/OpenSearch/pull/16504)) From ed293c426b708ec047b6c586e79ffc39ba1a99ff Mon Sep 17 00:00:00 2001 From: bowenlan-amzn Date: Thu, 16 Jan 2025 11:15:44 -0800 Subject: [PATCH 8/8] increase coverage Signed-off-by: bowenlan-amzn --- .../search/query/BitmapDocValuesQuery.java | 4 +- .../search/query/BitmapIndexQuery.java | 4 +- .../index/mapper/NumberFieldTypeTests.java | 3 + .../search/query/BitmapIndexQueryTests.java | 101 ++++++++++++++++++ 4 files changed, 108 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java b/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java index 6dce33b76b0a4..9fce29ae7fbfb 100644 --- a/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java +++ b/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java @@ -141,8 +141,8 @@ public int hashCode() { @Override public long ramBytesUsed() { - return RamUsageEstimator.shallowSizeOfInstance(BitmapDocValuesQuery.class) + RamUsageEstimator.sizeOfObject(field) - + RamUsageEstimator.sizeOfObject(bitmap); + return RamUsageEstimator.shallowSizeOfInstance(BitmapIndexQuery.class) + RamUsageEstimator.sizeOf(field) + bitmap + .getLongSizeInBytes(); } @Override diff --git a/server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java b/server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java index 62ccc50ca318d..87a7c132be848 100644 --- a/server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java +++ b/server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java @@ -278,7 +278,7 @@ public int hashCode() { @Override public long ramBytesUsed() { - return RamUsageEstimator.shallowSizeOfInstance(BitmapIndexQuery.class) + RamUsageEstimator.sizeOfObject(field) + RamUsageEstimator - .sizeOfObject(bitmap); + return RamUsageEstimator.shallowSizeOfInstance(BitmapIndexQuery.class) + RamUsageEstimator.sizeOf(field) + bitmap + .getLongSizeInBytes(); } } diff --git a/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java index c08c6d3821645..e1551e225b307 100644 --- a/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java @@ -970,6 +970,9 @@ public void testBitmapQuery() throws IOException { ft = new NumberFieldType("field", NumberType.INTEGER, false, false, true, true, null, Collections.emptyMap()); assertEquals(new BitmapDocValuesQuery("field", r), ft.bitmapQuery(bitmap)); + ft = new NumberFieldType("field", NumberType.INTEGER, true, false, false, true, null, Collections.emptyMap()); + assertEquals(new BitmapIndexQuery("field", r), ft.bitmapQuery(bitmap)); + Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, new IndexWriterConfig()); DirectoryReader reader = DirectoryReader.open(w); diff --git a/server/src/test/java/org/opensearch/search/query/BitmapIndexQueryTests.java b/server/src/test/java/org/opensearch/search/query/BitmapIndexQueryTests.java index b5f7e35e92520..32228fce4c8c5 100644 --- a/server/src/test/java/org/opensearch/search/query/BitmapIndexQueryTests.java +++ b/server/src/test/java/org/opensearch/search/query/BitmapIndexQueryTests.java @@ -15,14 +15,18 @@ import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.ScorerSupplier; import org.apache.lucene.search.Weight; import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.util.TestUtil; import org.opensearch.common.Randomness; import org.opensearch.test.OpenSearchTestCase; import org.junit.After; @@ -49,6 +53,7 @@ public class BitmapIndexQueryTests extends OpenSearchTestCase { public void initSearcher() throws IOException { dir = newDirectory(); w = new IndexWriter(dir, newIndexWriterConfig()); + reader = DirectoryReader.open(w); } @After @@ -179,4 +184,100 @@ public void testRandomDocumentsAndQueries() throws IOException { assertEquals(expected, actual); } + public void testCheckArgsNullBitmap() { + /** + * Test that checkArgs throws IllegalArgumentException when bitmap is null + */ + assertThrows(IllegalArgumentException.class, () -> BitmapIndexQuery.checkArgs("field", null)); + } + + public void testCheckArgsNullField() { + /** + * Test that checkArgs throws IllegalArgumentException when field is null + */ + RoaringBitmap bitmap = new RoaringBitmap(); + assertThrows(IllegalArgumentException.class, () -> BitmapIndexQuery.checkArgs(null, bitmap)); + } + + public void testCheckArgsWithNullBitmap() { + assertThrows(IllegalArgumentException.class, () -> { BitmapIndexQuery.checkArgs("product_id", null); }); + } + + public void testCheckArgsWithNullFieldAndBitmap() { + IllegalArgumentException exception = expectThrows( + IllegalArgumentException.class, + () -> { BitmapIndexQuery.checkArgs(null, null); } + ); + assertEquals("field must not be null", exception.getMessage()); + } + + public void testCreateWeight() throws IOException { + Document d = new Document(); + d.add(new IntField("product_id", 4, Field.Store.NO)); + w.addDocument(d); + + w.commit(); + reader = DirectoryReader.open(w); + searcher = newSearcher(reader); + RoaringBitmap bitmap = new RoaringBitmap(); + bitmap.add(1); + BitmapIndexQuery query = new BitmapIndexQuery("product_id", bitmap); + Weight weight = query.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f); + assertNotNull(weight); + Scorer scorer = weight.scorer(reader.leaves().get(0)); + assertNotNull(scorer); + ScorerSupplier supplier = weight.scorerSupplier(reader.leaves().get(0)); + assertNotNull(supplier); + long cost = supplier.cost(); + assertEquals(20, cost); + } + + public void testRewrite() throws IOException { + RoaringBitmap bitmap = new RoaringBitmap(); + BitmapIndexQuery query = new BitmapIndexQuery("product_id", bitmap); + assertEquals(new MatchNoDocsQuery(), query.rewrite(searcher)); + } + + public void testPointVisitor() throws IOException { + w.close(); + // default codec uses 512 documents per leaf node, so we can cover the visit disi methods in PointVisitor + w = new IndexWriter(dir, new IndexWriterConfig().setCodec(TestUtil.getDefaultCodec())); + + for (int i = 0; i < 512 + 1; i++) { + Document d = new Document(); + d.add(new IntField("product_id", 1, Field.Store.NO)); + w.addDocument(d); + } + + for (int i = 0; i < 256 + 1; i++) { + Document d = new Document(); + d.add(new IntField("product_id", 2, Field.Store.NO)); + w.addDocument(d); + } + + for (int i = 0; i < 256 + 1; i++) { + Document d = new Document(); + d.add(new IntField("product_id", 3, Field.Store.NO)); + w.addDocument(d); + } + + for (int i = 0; i < 512 + 1; i++) { + Document d = new Document(); + d.add(new IntField("product_id", 4, Field.Store.NO)); + w.addDocument(d); + } + + w.commit(); + reader = DirectoryReader.open(w); + searcher = newSearcher(reader); + + RoaringBitmap bitmap = new RoaringBitmap(); + bitmap.add(0, 1, 2, 3, 5); + BitmapIndexQuery query = new BitmapIndexQuery("product_id", bitmap); + Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f); + + Set actual = new HashSet<>(getMatchingValues(weight, searcher.getIndexReader())); + Set expected = Set.of(1, 2, 3); + assertEquals(expected, actual); + } }