[orc] Enable READER_USE_SELECTED only deletionVectors disabled

apache · Nov 12, 2024 · f0e4cd7 · f0e4cd7
1 parent aee2b8b
commit f0e4cd7
Show file tree

Hide file tree

Showing 4 changed files with 21 additions and 31 deletions.
diff --git a/paimon-format/src/main/java/org/apache/orc/OrcConf.java b/paimon-format/src/main/java/org/apache/orc/OrcConf.java
@@ -305,21 +305,6 @@ public enum OrcConf {
                     + "must have the filter\n"
                     + "reapplied to avoid using unset values in the unselected rows.\n"
                     + "If unsure please leave this as false."),
-
-    READER_ONLY_ALLOW_SARG_TO_FILTER(
-            "orc.reader.sarg.to.filter",
-            "orc.reader.sarg.to.filter",
-            false,
-            "A boolean flag to determine if a SArg is allowed to become a filter, only for reader."),
-    READER_ONLY_USE_SELECTED(
-            "orc.reader.filter.use.selected",
-            "orc.reader.filter.use.selected",
-            false,
-            "A boolean flag to determine if the selected vector is supported by\n"
-                    + "the reading application, only for reader.  If false, the output of the ORC reader "
-                    + "must have the filter\n"
-                    + "reapplied to avoid using unset values in the unselected rows.\n"
-                    + "If unsure please leave this as false."),
     ALLOW_PLUGIN_FILTER(
             "orc.filter.plugin",
             "orc.filter.plugin",

diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcFileFormat.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcFileFormat.java
@@ -56,6 +56,7 @@
 import java.util.Properties;
 import java.util.stream.Collectors;
 
+import static org.apache.paimon.CoreOptions.DELETION_VECTORS_ENABLED;
 import static org.apache.paimon.types.DataTypeChecks.getFieldTypes;
 
 /** Orc {@link FileFormat}. */
@@ -69,6 +70,7 @@ public class OrcFileFormat extends FileFormat {
     private final org.apache.hadoop.conf.Configuration writerConf;
     private final int readBatchSize;
     private final int writeBatchSize;
+    private final boolean deletionVectorsEnabled;
 
     public OrcFileFormat(FormatContext formatContext) {
         super(IDENTIFIER);
@@ -79,6 +81,7 @@ public OrcFileFormat(FormatContext formatContext) {
         this.orcProperties.forEach((k, v) -> writerConf.set(k.toString(), v.toString()));
         this.readBatchSize = formatContext.readBatchSize();
         this.writeBatchSize = formatContext.writeBatchSize();
+        this.deletionVectorsEnabled = formatContext.options().get(DELETION_VECTORS_ENABLED);
     }
 
     @VisibleForTesting
@@ -113,7 +116,8 @@ public FormatReaderFactory createReaderFactory(
                 readerConf,
                 (RowType) refineDataType(projectedRowType),
                 orcPredicates,
-                readBatchSize);
+                readBatchSize,
+                deletionVectorsEnabled);
     }
 
     @Override

diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
@@ -62,14 +62,11 @@
 public class OrcReaderFactory implements FormatReaderFactory {
 
     protected final Configuration hadoopConfig;
-
     protected final TypeDescription schema;
-
-    private final RowType tableType;
-
+    protected final RowType tableType;
     protected final List<OrcFilters.Predicate> conjunctPredicates;
-
     protected final int batchSize;
+    protected final boolean deletionVectorsEnabled;
 
     /**
      * @param hadoopConfig the hadoop config for orc reader.
@@ -80,12 +77,14 @@ public OrcReaderFactory(
             final org.apache.hadoop.conf.Configuration hadoopConfig,
             final RowType readType,
             final List<OrcFilters.Predicate> conjunctPredicates,
-            final int batchSize) {
+            final int batchSize,
+            final boolean deletionVectorsEnabled) {
         this.hadoopConfig = checkNotNull(hadoopConfig);
         this.schema = toOrcType(readType);
         this.tableType = readType;
         this.conjunctPredicates = checkNotNull(conjunctPredicates);
         this.batchSize = batchSize;
+        this.deletionVectorsEnabled = deletionVectorsEnabled;
     }
 
     // ------------------------------------------------------------------------
@@ -108,7 +107,8 @@ public OrcVectorizedReader createReader(FormatReaderFactory.Context context)
                         context.filePath(),
                         0,
                         context.fileSize(),
-                        context.fileIndex());
+                        context.fileIndex(),
+                        deletionVectorsEnabled);
         return new OrcVectorizedReader(orcReader, poolOfBatches);
     }
 
@@ -258,7 +258,8 @@ private static RecordReader createRecordReader(
             org.apache.paimon.fs.Path path,
             long splitStart,
             long splitLength,
-            FileIndexResult fileIndexResult)
+            FileIndexResult fileIndexResult,
+            boolean deletionVectorsEnabled)
             throws IOException {
         org.apache.orc.Reader orcReader = createReader(conf, fileIO, path, fileIndexResult);
         try {
@@ -275,12 +276,11 @@ private static RecordReader createRecordReader(
                             .skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf))
                             .tolerateMissingSchema(
                                     OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf));
-            if (!conjunctPredicates.isEmpty()) {
-                // TODO fix it , if open this option,future deletion vectors would not work,
-                //  cased by getRowNumber would be changed .
-                options.useSelected(OrcConf.READER_ONLY_USE_SELECTED.getBoolean(conf));
-                options.allowSARGToFilter(
-                        OrcConf.READER_ONLY_ALLOW_SARG_TO_FILTER.getBoolean(conf));
+            if (!conjunctPredicates.isEmpty() && !deletionVectorsEnabled) {
+                // deletion vectors can not enable this feature, cased by getRowNumber would be
+                // changed.
+                options.useSelected(OrcConf.READER_USE_SELECTED.getBoolean(conf));
+                options.allowSARGToFilter(OrcConf.ALLOW_SARG_TO_FILTER.getBoolean(conf));
             }
             // configure filters
             if (!conjunctPredicates.isEmpty()) {

diff --git a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcReaderFactoryTest.java b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcReaderFactoryTest.java
@@ -277,7 +277,8 @@ protected OrcReaderFactory createFormat(
                 new Configuration(),
                 Projection.of(selectedFields).project(formatType),
                 conjunctPredicates,
-                BATCH_SIZE);
+                BATCH_SIZE,
+                false);
     }
 
     private RecordReader<InternalRow> createReader(OrcReaderFactory format, Path split)