Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CNDB-11613 SAI compressed indexes #1474

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
2 changes: 2 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
Future version (tbd)
* Require only MODIFY permission on base when updating table with MV (STAR-564)
Merged from 5.1:
* Expose current compaction throughput in nodetool (CASSANDRA-13890)
Merged from 5.0:
* Disable chronicle analytics (CASSANDRA-19656)
* Remove mocking in InternalNodeProbe spying on StorageServiceMBean (CASSANDRA-18152)
Expand Down
2 changes: 2 additions & 0 deletions conf/cassandra.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,8 @@ commitlog_segment_size_in_mb: 32
# none : Flush without compressing blocks but while still doing checksums.
# fast : Flush with a fast compressor. If the table is already using a
# fast compressor that compressor is used.
# adaptive : Flush with a fast adaptive compressor. If the table is already using a
# fast compressor that compressor is used.
# table: Always flush with the same compressor that the table uses. This
# was the pre 4.0 behavior.
#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,9 @@ public enum CassandraRelevantProperties
/** Watcher used when opening sstables to discover extra components, eg. archive component */
CUSTOM_SSTABLE_WATCHER("cassandra.custom_sstable_watcher"),

/** When enabled, a user can set compression options in the index schema */
INDEX_COMPRESSION("cassandra.index.compression_enabled", "false"),

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Being this a DS-only property, should we use a different prefix, as in ds.index.compression_enabled, so it's easier for us to identify these properties?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, the property could be named INDEX_COMPRESSION_ENABLED, or perhaps USE_INDEX_COMPRESSION, so the name suggests that it's a boolean property.


/** Controls the maximum top-k limit for vector search */
SAI_VECTOR_SEARCH_MAX_TOP_K("cassandra.sai.vector_search.max_top_k", "1000"),

Expand Down
6 changes: 5 additions & 1 deletion src/java/org/apache/cassandra/config/Config.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
Expand All @@ -37,6 +38,7 @@
import org.apache.cassandra.fql.FullQueryLoggerOptions;
import org.apache.cassandra.db.ConsistencyLevel;
import org.apache.cassandra.guardrails.GuardrailsConfig;
import org.apache.cassandra.io.compress.AdaptiveCompressor;
import org.apache.cassandra.utils.FBUtilities;

/**
Expand Down Expand Up @@ -294,8 +296,9 @@ public class Config
public double commitlog_sync_group_window_in_ms = Double.NaN;
public int commitlog_sync_period_in_ms;
public int commitlog_segment_size_in_mb = 32;

public ParameterizedClass commitlog_compression;
public FlushCompression flush_compression = FlushCompression.fast;
public FlushCompression flush_compression;
public int commitlog_max_compression_buffers_in_pool = 3;
public Integer periodic_commitlog_sync_lag_block_in_ms;
public TransparentDataEncryptionOptions transparent_data_encryption_options = new TransparentDataEncryptionOptions();
Expand Down Expand Up @@ -666,6 +669,7 @@ public enum FlushCompression
{
none,
fast,
adaptive,
table
}

Expand Down
11 changes: 9 additions & 2 deletions src/java/org/apache/cassandra/config/DatabaseDescriptor.java
Original file line number Diff line number Diff line change
Expand Up @@ -2118,15 +2118,22 @@ public static void setCommitLogCompression(ParameterizedClass compressor)

public static Config.FlushCompression getFlushCompression()
{
return conf.flush_compression;
return Objects.requireNonNullElseGet(conf.flush_compression, () -> shouldUseAdaptiveCompressionByDefault()
? Config.FlushCompression.adaptive
: Config.FlushCompression.fast);
}

public static void setFlushCompression(Config.FlushCompression compression)
{
conf.flush_compression = compression;
}

/**
public static boolean shouldUseAdaptiveCompressionByDefault()
{
return System.getProperty("cassandra.default_sstable_compression", "fast").equals("adaptive");

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would probably be better in CassandraRelevantProperties.

}

/**
* Maximum number of buffers in the compression pool. The default value is 3, it should not be set lower than that
* (one segment in compression, one written to, one in reserve); delays in compression may cause the log to use
* more, depending on how soon the sync policy stops all writing threads.
Expand Down
33 changes: 33 additions & 0 deletions src/java/org/apache/cassandra/cql3/CqlBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;
import java.util.function.Consumer;

import com.google.common.annotations.VisibleForTesting;

Expand Down Expand Up @@ -238,4 +239,36 @@ public String toString()
{
return builder.toString();
}

/**
* Builds a `WITH option1 = ... AND option2 = ... AND option3 = ... clause
* @param builder a receiver to receive a builder allowing to add each option
Comment on lines +244 to +245

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
* Builds a `WITH option1 = ... AND option2 = ... AND option3 = ... clause
* @param builder a receiver to receive a builder allowing to add each option
* Builds a {@code WITH option1 = ... AND option2 = ... AND option3 = ...} clause.
*
* @param builder a consumer to receive a builder allowing to add each option

*/
public CqlBuilder appendOptions(Consumer<OptionsBuilder> builder)
{
builder.accept(new OptionsBuilder(this));
return this;
}

public static class OptionsBuilder
{
private CqlBuilder builder;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can be final

boolean empty = true;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can be private


OptionsBuilder(CqlBuilder builder)
{
this.builder = builder;
}

public OptionsBuilder append(String name, Map<String, String> options)
{
if (options.isEmpty())
return this;

builder.append((empty ? " WITH " : " AND ") + name + " = ");
empty = false;
builder.append(options);
return this;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@
import org.apache.cassandra.audit.AuditLogContext;
import org.apache.cassandra.audit.AuditLogEntryType;
import org.apache.cassandra.auth.Permission;
import org.apache.cassandra.config.CassandraRelevantProperties;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.config.ParameterizedClass;
import org.apache.cassandra.cql3.ColumnIdentifier;
import org.apache.cassandra.cql3.QualifiedName;
import org.apache.cassandra.cql3.statements.RawKeyspaceAwareStatement;
Expand Down Expand Up @@ -161,7 +163,21 @@ public Keyspaces apply(Keyspaces schema)

Map<String, String> options = attrs.isCustom ? attrs.getOptions() : Collections.emptyMap();

IndexMetadata index = IndexMetadata.fromIndexTargets(indexTargets, name, kind, options);
Map<String, String> keyCompressionOptions = attrs.getMap("key_compression");
Copy link

@adelapena adelapena Feb 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can make IndexAttributes.KW_KEY_COMPRESSION public and use it here, and probably in other uses in IndexMetadata.

CompressionParams keyCompression = keyCompressionOptions != null
? CompressionParams.fromMap(keyCompressionOptions)
: CompressionParams.noCompression();

Map<String, String> valueCompressionOptions = attrs.getMap("value_compression");
Copy link

@adelapena adelapena Feb 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above, we can make IndexAttributes.KW_VALUE_COMPRESSION public and use it here, and probably in other uses in IndexMetadata.

CompressionParams valueCompression = valueCompressionOptions != null
? CompressionParams.fromMap(valueCompressionOptions)
: CompressionParams.noCompression();

if ((keyCompression.isEnabled() || valueCompression.isEnabled()) && !CassandraRelevantProperties.INDEX_COMPRESSION.getBoolean())
throw ire("Cannot create a compressed index, because index compression is disabled. " +
"Please set " + CassandraRelevantProperties.INDEX_COMPRESSION.getKey() + " property to enable it.");

IndexMetadata index = IndexMetadata.fromIndexTargets(indexTargets, name, kind, options, keyCompression, valueCompression);

String className = index.getIndexClassName();
IndexGuardrails guardRails = IndexGuardrails.forClassName(className);
Expand Down Expand Up @@ -194,7 +210,10 @@ public Keyspaces apply(Keyspaces schema)
throw ire("Index %s is a duplicate of existing index %s", index.name, equalIndex.name);
}

TableMetadata newTable = table.withSwapped(table.indexes.with(index));
// All indexes on one table must use the same key_compression.
// The newly created index forces key_compression on the previous indexes.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to emit a client warning about this?

Indexes newIndexes = table.indexes.withKeyCompression(index.keyCompression).with(index);
TableMetadata newTable = table.withSwapped(newIndexes);
newTable.validate();

return schema.withAddedOrUpdated(keyspace.withSwapped(keyspace.tables.withSwapped(newTable)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ public class IndexAttributes extends PropertyDefinitions
private static final String DEFAULT_INDEX_CLASS_PROPERTY = "cassandra.default_index_implementation_class";

private static final String KW_OPTIONS = "options";
private static final String KW_KEY_COMPRESSION = "key_compression";
private static final String KW_VALUE_COMPRESSION = "value_compression";

private static final Set<String> keywords = new HashSet<>();
private static final Set<String> obsoleteKeywords = new HashSet<>();
Expand All @@ -39,6 +41,8 @@ public class IndexAttributes extends PropertyDefinitions
static
{
keywords.add(KW_OPTIONS);
keywords.add(KW_KEY_COMPRESSION);
keywords.add(KW_VALUE_COMPRESSION);
}

public void maybeApplyDefaultIndex()
Expand Down
14 changes: 10 additions & 4 deletions src/java/org/apache/cassandra/db/compaction/CompactionManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
import com.google.common.util.concurrent.Uninterruptibles;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.codahale.metrics.Meter;
import io.netty.util.concurrent.FastThreadLocal;
import org.apache.cassandra.cache.AutoSavingCache;
import org.apache.cassandra.concurrent.DebuggableThreadPoolExecutor;
Expand Down Expand Up @@ -290,6 +290,11 @@ public void setRate(final double throughPutMbPerSec)
compactionRateLimiter.setRate(throughput);
}

public Meter getCompactionThroughput()
{
return metrics.bytesCompactedThroughput;
}

/**
* Call this whenever a compaction might be needed on the given column family store.
* It's okay to over-call (within reason) if a call is unnecessary, it will
Expand All @@ -314,7 +319,7 @@ public CompletableFuture<?>[] startCompactionTasks(ColumnFamilyStore cfs, Collec
{
return backgroundCompactionRunner.startCompactionTasks(cfs, tasks);
}

public int getOngoingBackgroundUpgradesCount()
{
return backgroundCompactionRunner.getOngoingUpgradesCount();
Expand Down Expand Up @@ -1373,7 +1378,7 @@ private void doCleanupOne(final ColumnFamilyStore cfs,

}

static boolean compactionRateLimiterAcquire(RateLimiter limiter, long bytesScanned, long lastBytesScanned, double compressionRatio)
protected boolean compactionRateLimiterAcquire(RateLimiter limiter, long bytesScanned, long lastBytesScanned, double compressionRatio)
{
if (DatabaseDescriptor.getCompactionThroughputMbPerSec() == 0)
return false;
Expand All @@ -1386,8 +1391,9 @@ static boolean compactionRateLimiterAcquire(RateLimiter limiter, long bytesScann
return actuallyAcquire(limiter, lengthRead);
}

private static boolean actuallyAcquire(RateLimiter limiter, long lengthRead)
private boolean actuallyAcquire(RateLimiter limiter, long lengthRead)
{
metrics.bytesCompactedThroughput.mark(lengthRead);
while (lengthRead >= Integer.MAX_VALUE)
{
limiter.acquire(Integer.MAX_VALUE);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@

import static org.apache.cassandra.config.CassandraRelevantProperties.COMPACTION_HISTORY_ENABLED;
import static org.apache.cassandra.config.CassandraRelevantProperties.CURSORS_ENABLED;
import static org.apache.cassandra.db.compaction.CompactionManager.compactionRateLimiterAcquire;
import static org.apache.cassandra.utils.FBUtilities.prettyPrintMemory;
import static org.apache.cassandra.utils.FBUtilities.prettyPrintMemoryPerSecond;

Expand Down Expand Up @@ -715,7 +714,7 @@ void execute0()
long bytesScanned = compactionIterator.bytesRead();

// Rate limit the scanners, and account for compression
if (compactionRateLimiterAcquire(limiter, bytesScanned, lastBytesScanned, compressionRatio))
if (CompactionManager.instance.compactionRateLimiterAcquire(limiter, bytesScanned, lastBytesScanned, compressionRatio))
lastBytesScanned = bytesScanned;

maybeStopOrUpdateState();
Expand Down
11 changes: 11 additions & 0 deletions src/java/org/apache/cassandra/index/sai/IndexContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@
import org.apache.cassandra.io.sstable.format.SSTableReader;
import org.apache.cassandra.io.util.FileUtils;
import org.apache.cassandra.schema.ColumnMetadata;
import org.apache.cassandra.schema.CompressionParams;
import org.apache.cassandra.schema.IndexMetadata;
import org.apache.cassandra.schema.TableId;
import org.apache.cassandra.utils.CloseableIterator;
Expand Down Expand Up @@ -609,6 +610,16 @@ public String getIndexName()
return this.config == null ? null : config.name;
}

public CompressionParams getKeyCompression()
{
return this.config.keyCompression;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: unneeded this.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The method seems unused.

}

public CompressionParams getValueCompression()
{
return this.config.valueCompression;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: unneeded this.

}

public int getIntOption(String name, int defaultValue)
{
String value = this.config.options.get(name);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import org.apache.cassandra.io.sstable.SSTable;
import org.apache.cassandra.io.sstable.format.SSTableFlushObserver;
import org.apache.cassandra.metrics.TableMetrics;
import org.apache.cassandra.schema.CompressionParams;
import org.apache.cassandra.schema.TableMetadata;
import org.apache.cassandra.utils.ApproximateTime;
import org.apache.cassandra.utils.Throwables;
Expand Down Expand Up @@ -107,11 +108,13 @@ public StorageAttachedIndexWriter(IndexDescriptor indexDescriptor,
.filter(Objects::nonNull) // a null here means the column had no data to flush
.collect(Collectors.toList());

CompressionParams keyCompression = indices.iterator().next().getIndexMetadata().keyCompression;

// If the SSTable components are already being built by another index build then we don't want
// to build them again so use a NO-OP writer
this.perSSTableWriter = perIndexComponentsOnly
? PerSSTableWriter.NONE
: onDiskFormat.newPerSSTableWriter(indexDescriptor);
: onDiskFormat.newPerSSTableWriter(indexDescriptor, keyCompression);
this.tableMetrics = tableMetrics;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@

import org.apache.cassandra.index.sai.disk.io.IndexInput;
import org.apache.cassandra.index.sai.disk.io.IndexOutputWriter;
import org.apache.cassandra.io.compress.CompressionMetadata;
import org.apache.cassandra.io.sstable.Component;
import org.apache.cassandra.io.util.File;
import org.apache.cassandra.io.util.FileHandle;
import org.apache.cassandra.schema.CompressionParams;
import org.apache.lucene.store.ChecksumIndexInput;

public interface IndexComponent
Expand All @@ -38,8 +40,11 @@ public interface IndexComponent

String fileNamePart();
Component asCustomComponent();

File file();

File compressionMetaFile();

default boolean isCompletionMarker()
{
return componentType() == parent().completionMarkerComponent();
Expand Down Expand Up @@ -77,7 +82,18 @@ default IndexOutputWriter openOutput() throws IOException
return openOutput(false);
}

IndexOutputWriter openOutput(boolean append) throws IOException;
default IndexOutputWriter openOutput(boolean append) throws IOException
{
return openOutput(append, CompressionParams.noCompression());
}

default IndexOutputWriter openOutput(CompressionParams compression) throws IOException
{
return openOutput(false, compression);
}

IndexOutputWriter openOutput(boolean append, CompressionParams compression) throws IOException;


void createEmpty() throws IOException;
}
Expand Down
Loading