Skip to content

Commit

Permalink
[orc] Enable READER_USE_SELECTED only deletionVectors disabled
Browse files Browse the repository at this point in the history
  • Loading branch information
JingsongLi committed Nov 12, 2024
1 parent aee2b8b commit f0e4cd7
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 31 deletions.
15 changes: 0 additions & 15 deletions paimon-format/src/main/java/org/apache/orc/OrcConf.java
Original file line number Diff line number Diff line change
Expand Up @@ -305,21 +305,6 @@ public enum OrcConf {
+ "must have the filter\n"
+ "reapplied to avoid using unset values in the unselected rows.\n"
+ "If unsure please leave this as false."),

READER_ONLY_ALLOW_SARG_TO_FILTER(
"orc.reader.sarg.to.filter",
"orc.reader.sarg.to.filter",
false,
"A boolean flag to determine if a SArg is allowed to become a filter, only for reader."),
READER_ONLY_USE_SELECTED(
"orc.reader.filter.use.selected",
"orc.reader.filter.use.selected",
false,
"A boolean flag to determine if the selected vector is supported by\n"
+ "the reading application, only for reader. If false, the output of the ORC reader "
+ "must have the filter\n"
+ "reapplied to avoid using unset values in the unselected rows.\n"
+ "If unsure please leave this as false."),
ALLOW_PLUGIN_FILTER(
"orc.filter.plugin",
"orc.filter.plugin",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
import java.util.Properties;
import java.util.stream.Collectors;

import static org.apache.paimon.CoreOptions.DELETION_VECTORS_ENABLED;
import static org.apache.paimon.types.DataTypeChecks.getFieldTypes;

/** Orc {@link FileFormat}. */
Expand All @@ -69,6 +70,7 @@ public class OrcFileFormat extends FileFormat {
private final org.apache.hadoop.conf.Configuration writerConf;
private final int readBatchSize;
private final int writeBatchSize;
private final boolean deletionVectorsEnabled;

public OrcFileFormat(FormatContext formatContext) {
super(IDENTIFIER);
Expand All @@ -79,6 +81,7 @@ public OrcFileFormat(FormatContext formatContext) {
this.orcProperties.forEach((k, v) -> writerConf.set(k.toString(), v.toString()));
this.readBatchSize = formatContext.readBatchSize();
this.writeBatchSize = formatContext.writeBatchSize();
this.deletionVectorsEnabled = formatContext.options().get(DELETION_VECTORS_ENABLED);
}

@VisibleForTesting
Expand Down Expand Up @@ -113,7 +116,8 @@ public FormatReaderFactory createReaderFactory(
readerConf,
(RowType) refineDataType(projectedRowType),
orcPredicates,
readBatchSize);
readBatchSize,
deletionVectorsEnabled);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,11 @@
public class OrcReaderFactory implements FormatReaderFactory {

protected final Configuration hadoopConfig;

protected final TypeDescription schema;

private final RowType tableType;

protected final RowType tableType;
protected final List<OrcFilters.Predicate> conjunctPredicates;

protected final int batchSize;
protected final boolean deletionVectorsEnabled;

/**
* @param hadoopConfig the hadoop config for orc reader.
Expand All @@ -80,12 +77,14 @@ public OrcReaderFactory(
final org.apache.hadoop.conf.Configuration hadoopConfig,
final RowType readType,
final List<OrcFilters.Predicate> conjunctPredicates,
final int batchSize) {
final int batchSize,
final boolean deletionVectorsEnabled) {
this.hadoopConfig = checkNotNull(hadoopConfig);
this.schema = toOrcType(readType);
this.tableType = readType;
this.conjunctPredicates = checkNotNull(conjunctPredicates);
this.batchSize = batchSize;
this.deletionVectorsEnabled = deletionVectorsEnabled;
}

// ------------------------------------------------------------------------
Expand All @@ -108,7 +107,8 @@ public OrcVectorizedReader createReader(FormatReaderFactory.Context context)
context.filePath(),
0,
context.fileSize(),
context.fileIndex());
context.fileIndex(),
deletionVectorsEnabled);
return new OrcVectorizedReader(orcReader, poolOfBatches);
}

Expand Down Expand Up @@ -258,7 +258,8 @@ private static RecordReader createRecordReader(
org.apache.paimon.fs.Path path,
long splitStart,
long splitLength,
FileIndexResult fileIndexResult)
FileIndexResult fileIndexResult,
boolean deletionVectorsEnabled)
throws IOException {
org.apache.orc.Reader orcReader = createReader(conf, fileIO, path, fileIndexResult);
try {
Expand All @@ -275,12 +276,11 @@ private static RecordReader createRecordReader(
.skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf))
.tolerateMissingSchema(
OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf));
if (!conjunctPredicates.isEmpty()) {
// TODO fix it , if open this option,future deletion vectors would not work,
// cased by getRowNumber would be changed .
options.useSelected(OrcConf.READER_ONLY_USE_SELECTED.getBoolean(conf));
options.allowSARGToFilter(
OrcConf.READER_ONLY_ALLOW_SARG_TO_FILTER.getBoolean(conf));
if (!conjunctPredicates.isEmpty() && !deletionVectorsEnabled) {
// deletion vectors can not enable this feature, cased by getRowNumber would be
// changed.
options.useSelected(OrcConf.READER_USE_SELECTED.getBoolean(conf));
options.allowSARGToFilter(OrcConf.ALLOW_SARG_TO_FILTER.getBoolean(conf));
}
// configure filters
if (!conjunctPredicates.isEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,8 @@ protected OrcReaderFactory createFormat(
new Configuration(),
Projection.of(selectedFields).project(formatType),
conjunctPredicates,
BATCH_SIZE);
BATCH_SIZE,
false);
}

private RecordReader<InternalRow> createReader(OrcReaderFactory format, Path split)
Expand Down

0 comments on commit f0e4cd7

Please sign in to comment.