Skip to content

Commit

Permalink
Update codecs to Apache Lucene 9.12.0
Browse files Browse the repository at this point in the history
Signed-off-by: Andriy Redko <[email protected]>
  • Loading branch information
reta committed Oct 10, 2024
1 parent 5b5d693 commit 96e5601
Show file tree
Hide file tree
Showing 33 changed files with 1,304 additions and 175 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.common.settings.Settings;
import org.opensearch.core.common.Strings;
import org.opensearch.index.codec.customcodecs.Lucene99QatCodec;
import org.opensearch.index.codec.customcodecs.Lucene912QatCodec;
import org.opensearch.index.codec.customcodecs.QatZipperFactory;
import org.opensearch.test.rest.OpenSearchRestTestCase;

Expand Down Expand Up @@ -103,7 +103,10 @@ public void testCreateIndexWithQatSPICodecWithQatHardwareUnavailable() throws IO
Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
.put("index.codec", randomFrom(Lucene99QatCodec.Mode.QAT_LZ4.getCodec(), Lucene99QatCodec.Mode.QAT_DEFLATE.getCodec()))
.put(
"index.codec",
randomFrom(Lucene912QatCodec.Mode.QAT_LZ4.getCodec(), Lucene912QatCodec.Mode.QAT_DEFLATE.getCodec())
)
.put("index.codec.compression_level", randomIntBetween(1, 6))
.build()
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.opensearch.common.settings.Setting;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.codec.CodecServiceFactory;
import org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99QatCodec;
import org.opensearch.index.engine.EngineConfig;
import org.opensearch.plugins.EnginePlugin;
import org.opensearch.plugins.Plugin;
Expand Down Expand Up @@ -49,12 +50,19 @@ public Optional<CodecServiceFactory> getCustomCodecServiceFactory(final IndexSet
|| codecName.equals(CustomCodecService.QAT_DEFLATE_CODEC)) {
return Optional.of(new CustomCodecServiceFactory());
} else {
if (!QatZipperFactory.isQatAvailable()
&& (codecName.equals(Lucene99QatCodec.Mode.QAT_LZ4.getCodec())
|| codecName.equals(Lucene99QatCodec.Mode.QAT_DEFLATE.getCodec()))) {
throw new IllegalArgumentException("QAT codecs are not supported. Please create indices with a different codec.");
if (codecName.equals(Lucene99QatCodec.Mode.QAT_LZ4.getCodec())
|| codecName.equals(Lucene99QatCodec.Mode.QAT_DEFLATE.getCodec())) {
if (!QatZipperFactory.isQatAvailable()) {
throw new IllegalArgumentException("QAT codecs are not supported. Please create indices with a different codec.");
}
}

if (codecName.equals(Lucene912QatCodec.Mode.QAT_LZ4.getCodec())
|| codecName.equals(Lucene912QatCodec.Mode.QAT_DEFLATE.getCodec())) {
if (!QatZipperFactory.isQatAvailable()) {
throw new IllegalArgumentException("QAT codecs are not supported. Please create indices with a different codec.");
}
}
}
return Optional.empty();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.util.Map;
import java.util.stream.Stream;

import static org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING;
import static org.opensearch.index.engine.EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING;

/** CustomCodecService provides ZSTD, ZSTD_NO_DICT, QAT_LZ4, and QAT_DEFLATE compression codecs. */
Expand Down Expand Up @@ -49,25 +50,26 @@ public CustomCodecService(MapperService mapperService, IndexSettings indexSettin
int compressionLevel = indexSettings.getValue(INDEX_CODEC_COMPRESSION_LEVEL_SETTING);
final MapBuilder<String, Codec> codecs = MapBuilder.<String, Codec>newMapBuilder();
if (mapperService == null) {
codecs.put(ZSTD_CODEC, new Zstd99Codec(compressionLevel));
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict99Codec(compressionLevel));
codecs.put(ZSTD_CODEC, new Zstd912Codec(compressionLevel));
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict912Codec(compressionLevel));
if (QatZipperFactory.isQatAvailable()) {
codecs.put(QAT_LZ4_CODEC, new QatLz499Codec(compressionLevel, () -> {
return indexSettings.getValue(Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING);
}));
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate99Codec(compressionLevel, () -> {
return indexSettings.getValue(Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING);
codecs.put(
QAT_LZ4_CODEC,
new QatLz4912Codec(compressionLevel, () -> { return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING); })
);
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate912Codec(compressionLevel, () -> {
return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING);
}));
}
} else {
codecs.put(ZSTD_CODEC, new Zstd99Codec(mapperService, logger, compressionLevel));
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict99Codec(mapperService, logger, compressionLevel));
codecs.put(ZSTD_CODEC, new Zstd912Codec(mapperService, logger, compressionLevel));
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict912Codec(mapperService, logger, compressionLevel));
if (QatZipperFactory.isQatAvailable()) {
codecs.put(QAT_LZ4_CODEC, new QatLz499Codec(mapperService, logger, compressionLevel, () -> {
return indexSettings.getValue(Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING);
codecs.put(QAT_LZ4_CODEC, new QatLz4912Codec(mapperService, logger, compressionLevel, () -> {
return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING);
}));
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate99Codec(mapperService, logger, compressionLevel, () -> {
return indexSettings.getValue(Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING);
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate912Codec(mapperService, logger, compressionLevel, () -> {
return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING);
}));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,13 @@
import org.apache.logging.log4j.Logger;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.opensearch.common.settings.Settings;
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.opensearch.index.codec.PerFieldMappingPostingFormatCodec;
import org.opensearch.index.mapper.MapperService;

import java.util.Set;

import static org.opensearch.index.engine.EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING;
import static org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99CustomCodec.DEFAULT_COMPRESSION_LEVEL;

/**
*
Expand All @@ -28,21 +27,18 @@
*
* @opensearch.internal
*/
public abstract class Lucene99CustomCodec extends FilterCodec {

/** Default compression level used for compression */
public static final int DEFAULT_COMPRESSION_LEVEL = INDEX_CODEC_COMPRESSION_LEVEL_SETTING.getDefault(Settings.EMPTY);
public abstract class Lucene912CustomCodec extends FilterCodec {

/** Each mode represents a compression algorithm. */
public enum Mode {
/**
* ZStandard mode with dictionary
*/
ZSTD("ZSTD99", Set.of("zstd")),
ZSTD("ZSTD912", Set.of("zstd")),
/**
* ZStandard mode without dictionary
*/
ZSTD_NO_DICT("ZSTDNODICT99", Set.of("zstd_no_dict"));
ZSTD_NO_DICT("ZSTDNODICT912", Set.of("zstd_no_dict"));

private final String codec;
private final Set<String> aliases;
Expand Down Expand Up @@ -74,7 +70,7 @@ public Set<String> getAliases() {
*
* @param mode The compression codec (ZSTD or ZSTDNODICT).
*/
public Lucene99CustomCodec(Mode mode) {
public Lucene912CustomCodec(Mode mode) {
this(mode, DEFAULT_COMPRESSION_LEVEL);
}

Expand All @@ -86,9 +82,9 @@ public Lucene99CustomCodec(Mode mode) {
* @param mode The compression codec (ZSTD or ZSTDNODICT).
* @param compressionLevel The compression level.
*/
public Lucene99CustomCodec(Mode mode, int compressionLevel) {
super(mode.getCodec(), new Lucene99Codec());
this.storedFieldsFormat = new Lucene99CustomStoredFieldsFormat(mode, compressionLevel);
public Lucene912CustomCodec(Mode mode, int compressionLevel) {
super(mode.getCodec(), new Lucene912Codec());
this.storedFieldsFormat = new Lucene912CustomStoredFieldsFormat(mode, compressionLevel);
}

/**
Expand All @@ -101,9 +97,9 @@ public Lucene99CustomCodec(Mode mode, int compressionLevel) {
* @param mapperService The mapper service.
* @param logger The logger.
*/
public Lucene99CustomCodec(Mode mode, int compressionLevel, MapperService mapperService, Logger logger) {
super(mode.getCodec(), new PerFieldMappingPostingFormatCodec(Lucene99Codec.Mode.BEST_SPEED, mapperService, logger));
this.storedFieldsFormat = new Lucene99CustomStoredFieldsFormat(mode, compressionLevel);
public Lucene912CustomCodec(Mode mode, int compressionLevel, MapperService mapperService, Logger logger) {
super(mode.getCodec(), new PerFieldMappingPostingFormatCodec(Lucene912Codec.Mode.BEST_SPEED, mapperService, logger));
this.storedFieldsFormat = new Lucene912CustomStoredFieldsFormat(mode, compressionLevel);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.codec.customcodecs;

import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.codecs.compressing.CompressionMode;
import org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingStoredFieldsFormat;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;

import java.io.IOException;
import java.util.Objects;

import static org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99CustomCodec.DEFAULT_COMPRESSION_LEVEL;

/** Stored field format used by pluggable codec */
public class Lucene912CustomStoredFieldsFormat extends StoredFieldsFormat {

/** A key that we use to map to a mode */
public static final String MODE_KEY = Lucene912CustomStoredFieldsFormat.class.getSimpleName() + ".mode";

protected static final int ZSTD_BLOCK_LENGTH = 10 * 48 * 1024;
protected static final int ZSTD_MAX_DOCS_PER_BLOCK = 4096;
protected static final int ZSTD_BLOCK_SHIFT = 10;

private final CompressionMode zstdCompressionMode;
private final CompressionMode zstdNoDictCompressionMode;

private final Lucene912CustomCodec.Mode mode;
private final int compressionLevel;

/** default constructor */
public Lucene912CustomStoredFieldsFormat() {
this(Lucene912CustomCodec.Mode.ZSTD, DEFAULT_COMPRESSION_LEVEL);
}

/**
* Creates a new instance.
*
* @param mode The mode represents ZSTD or ZSTDNODICT
*/
public Lucene912CustomStoredFieldsFormat(Lucene912CustomCodec.Mode mode) {
this(mode, DEFAULT_COMPRESSION_LEVEL);
}

/**
* Creates a new instance with the specified mode and compression level.
*
* @param mode The mode represents ZSTD or ZSTDNODICT
* @param compressionLevel The compression level for the mode.
*/
public Lucene912CustomStoredFieldsFormat(Lucene912CustomCodec.Mode mode, int compressionLevel) {
this.mode = Objects.requireNonNull(mode);
this.compressionLevel = compressionLevel;
zstdCompressionMode = new ZstdCompressionMode(compressionLevel);
zstdNoDictCompressionMode = new ZstdNoDictCompressionMode(compressionLevel);
}

/**
* Returns a {@link StoredFieldsReader} to load stored fields.
* @param directory The index directory.
* @param si The SegmentInfo that stores segment information.
* @param fn The fieldInfos.
* @param context The IOContext that holds additional details on the merge/search context.
*/
@Override
public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
if (si.getAttribute(MODE_KEY) != null) {
String value = si.getAttribute(MODE_KEY);
Lucene912CustomCodec.Mode mode = Lucene912CustomCodec.Mode.valueOf(value);
return impl(mode).fieldsReader(directory, si, fn, context);
} else {
throw new IllegalStateException("missing value for " + MODE_KEY + " for segment: " + si.name);
}
}

/**
* Returns a {@link StoredFieldsReader} to write stored fields.
* @param directory The index directory.
* @param si The SegmentInfo that stores segment information.
* @param context The IOContext that holds additional details on the merge/search context.
*/
@Override
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException {
String previous = si.putAttribute(MODE_KEY, mode.name());
if (previous != null && previous.equals(mode.name()) == false) {
throw new IllegalStateException(
"found existing value for " + MODE_KEY + " for segment: " + si.name + " old = " + previous + ", new = " + mode.name()
);
}
return impl(mode).fieldsWriter(directory, si, context);
}

StoredFieldsFormat impl(Lucene912CustomCodec.Mode mode) {
switch (mode) {
case ZSTD:
return getCustomCompressingStoredFieldsFormat("CustomStoredFieldsZstd", this.zstdCompressionMode);
case ZSTD_NO_DICT:
return getCustomCompressingStoredFieldsFormat("CustomStoredFieldsZstdNoDict", this.zstdNoDictCompressionMode);
default:
throw new IllegalStateException("Unsupported compression mode: " + mode);
}
}

private StoredFieldsFormat getCustomCompressingStoredFieldsFormat(String formatName, CompressionMode compressionMode) {
return new Lucene90CompressingStoredFieldsFormat(
formatName,
compressionMode,
ZSTD_BLOCK_LENGTH,
ZSTD_MAX_DOCS_PER_BLOCK,
ZSTD_BLOCK_SHIFT
);
}

public Lucene912CustomCodec.Mode getMode() {
return mode;
}

/**
* Returns the compression level.
*/
public int getCompressionLevel() {
return compressionLevel;
}

public CompressionMode getCompressionMode() {
return mode == Lucene912CustomCodec.Mode.ZSTD_NO_DICT ? zstdNoDictCompressionMode : zstdCompressionMode;
}

}
Loading

0 comments on commit 96e5601

Please sign in to comment.