diff --git a/lib/trino-hive-formats/pom.xml b/lib/trino-hive-formats/pom.xml
index c8d6fa745f58..72d911457f39 100644
--- a/lib/trino-hive-formats/pom.xml
+++ b/lib/trino-hive-formats/pom.xml
@@ -24,6 +24,12 @@
1.11.9
+
+ com.amazon.ion
+ ion-java-path-extraction
+ 1.5.0
+
+
com.fasterxml.jackson.corejackson-core
diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoder.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoder.java
index 422ba16fb712..4463bc79c500 100644
--- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoder.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoder.java
@@ -15,7 +15,6 @@
import com.amazon.ion.IonException;
import com.amazon.ion.IonReader;
-import io.trino.spi.PageBuilder;
public interface IonDecoder
{
@@ -25,6 +24,6 @@ public interface IonDecoder
* Expects that the calling code has called IonReader.next()
* to position the reader at the value to be decoded.
*/
- void decode(IonReader reader, PageBuilder builder)
+ void decode(IonReader reader)
throws IonException;
}
diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoderConfig.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoderConfig.java
new file mode 100644
index 000000000000..755d00ae9010
--- /dev/null
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoderConfig.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.hive.formats.ion;
+
+import java.util.Map;
+
+/**
+ * Captures the SerDe properties that affect decoding.
+ *
+ * @param pathExtractors Map of column name => ion paths
+ * for each entry in the map, the value bound to the column will be the result
+ * of extracting the given search path.
+ * @param strictTyping whether the path extractions should enforce type expectations.
+ * this only affects type checking of path extractions; any value decoded into
+ * a trino column will be correctly typed or coerced for that column.
+ * @param caseSensitive whether field name matching should be case-sensitive or not.
+ */
+public record IonDecoderConfig(Map pathExtractors, Boolean strictTyping, Boolean caseSensitive)
+{
+ static IonDecoderConfig defaultConfig()
+ {
+ return new IonDecoderConfig(Map.of(), false, false);
+ }
+
+ IonDecoderConfig withStrictTyping()
+ {
+ return new IonDecoderConfig(pathExtractors, true, caseSensitive);
+ }
+
+ IonDecoderConfig withCaseSensitive()
+ {
+ return new IonDecoderConfig(pathExtractors, strictTyping, true);
+ }
+
+ IonDecoderConfig withPathExtractors(Map pathExtractors)
+ {
+ return new IonDecoderConfig(pathExtractors, strictTyping, caseSensitive);
+ }
+}
diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoderFactory.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoderFactory.java
index 255882bb43ef..f3051ef620b7 100644
--- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoderFactory.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoderFactory.java
@@ -19,11 +19,15 @@
import com.amazon.ion.IonWriter;
import com.amazon.ion.Timestamp;
import com.amazon.ion.system.IonTextWriterBuilder;
+import com.amazon.ionpathextraction.PathExtractor;
+import com.amazon.ionpathextraction.PathExtractorBuilder;
+import com.amazon.ionpathextraction.pathcomponents.Text;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import io.airlift.slice.Slices;
import io.trino.hive.formats.DistinctMapKeys;
import io.trino.hive.formats.line.Column;
+import io.trino.spi.PageBuilder;
import io.trino.spi.StandardErrorCode;
import io.trino.spi.TrinoException;
import io.trino.spi.block.ArrayBlockBuilder;
@@ -65,8 +69,8 @@
import java.util.List;
import java.util.Locale;
import java.util.Map;
-import java.util.Optional;
import java.util.Set;
+import java.util.function.BiFunction;
import java.util.function.IntFunction;
public class IonDecoderFactory
@@ -79,39 +83,66 @@ private IonDecoderFactory() {}
* The decoder expects to decode the _current_ Ion Value.
* It also expects that the calling code will manage the PageBuilder.
*
- *
- * @param strictPathing controls behavior when encountering mistyped
- * values during path extraction. That is outside (before), the trino
- * type model. The ion-hive-serde used path extraction for navigating
- * the top-level-values even if no path extractions were configured.
- * So, in absence of support for path extraction configurations this
- * still affects the handling of mistyped top-level-values.
- * todo: revisit the above once path extraction config is supported.
*/
- public static IonDecoder buildDecoder(List columns, boolean strictPathing)
+ public static IonDecoder buildDecoder(
+ List columns,
+ IonDecoderConfig decoderConfig,
+ PageBuilder pageBuilder)
{
- RowDecoder rowDecoder = RowDecoder.forFields(
- columns.stream()
- .map(c -> new RowType.Field(Optional.of(c.name()), c.type()))
- .toList());
+ PathExtractorBuilder extractorBuilder = PathExtractorBuilder.standard()
+ .withMatchCaseInsensitive(!decoderConfig.caseSensitive());
- return (ionReader, pageBuilder) -> {
- IonType ionType = ionReader.getType();
- IntFunction blockSelector = pageBuilder::getBlockBuilder;
+ for (int pos = 0; pos < columns.size(); pos++) {
+ String name = columns.get(pos).name();
+ BlockDecoder decoder = decoderForType(columns.get(pos).type());
+ BiFunction callback = callbackFor(decoder, pos);
- if (ionType == IonType.STRUCT && !ionReader.isNullValue()) {
- rowDecoder.decode(ionReader, blockSelector);
+ String extractionPath = decoderConfig.pathExtractors().get(name);
+ if (extractionPath == null) {
+ extractorBuilder.withSearchPath(List.of(new Text(name)), callback);
}
- else if (ionType == IonType.STRUCT || ionType == IonType.NULL || !strictPathing) {
- rowDecoder.appendNulls(blockSelector);
+ else {
+ extractorBuilder.withSearchPath(extractionPath, callback);
+ }
+ }
+ PathExtractor extractor = extractorBuilder.buildStrict(decoderConfig.strictTyping());
+ PageExtractionContext context = new PageExtractionContext(pageBuilder, new boolean[columns.size()]);
+
+ return (ionReader) -> {
+ extractor.matchCurrentValue(ionReader, context);
+ context.completeRowAndReset();
+ };
+ }
+
+ private static BiFunction callbackFor(BlockDecoder decoder, int pos)
+ {
+ return (ionReader, context) -> {
+ BlockBuilder blockBuilder = context.pageBuilder.getBlockBuilder(pos);
+ if (context.encountered[pos]) {
+ blockBuilder.resetTo(blockBuilder.getPositionCount() - 1);
}
else {
- throw new TrinoException(StandardErrorCode.GENERIC_USER_ERROR,
- "Top-level-value of IonType %s is not valid with strict typing.".formatted(ionType));
+ context.encountered[pos] = true;
}
+
+ decoder.decode(ionReader, context.pageBuilder.getBlockBuilder(pos));
+ return 0;
};
}
+ private record PageExtractionContext(PageBuilder pageBuilder, boolean[] encountered)
+ {
+ private void completeRowAndReset()
+ {
+ for (int i = 0; i < encountered.length; i++) {
+ if (!encountered[i]) {
+ pageBuilder.getBlockBuilder(i).appendNull();
+ }
+ encountered[i] = false;
+ }
+ }
+ }
+
private interface BlockDecoder
{
void decode(IonReader reader, BlockBuilder builder);
@@ -169,10 +200,6 @@ private static BlockDecoder wrapDecoder(BlockDecoder decoder, Type trinoType, Io
};
}
- /**
- * The RowDecoder is used as the BlockDecoder for nested RowTypes and is used for decoding
- * top-level structs into pages.
- */
private record RowDecoder(Map fieldPositions, List fieldDecoders)
implements BlockDecoder
{
@@ -224,13 +251,6 @@ private void decode(IonReader ionReader, IntFunction blockSelector
ionReader.stepOut();
}
-
- private void appendNulls(IntFunction blockSelector)
- {
- for (int i = 0; i < fieldDecoders.size(); i++) {
- blockSelector.apply(i).appendNull();
- }
- }
}
private static class MapDecoder
diff --git a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/ion/TestIonFormat.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/ion/TestIonFormat.java
index ead606fe3e1b..3e7dd843c7af 100644
--- a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/ion/TestIonFormat.java
+++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/ion/TestIonFormat.java
@@ -19,6 +19,7 @@
import com.amazon.ion.IonWriter;
import com.amazon.ion.system.IonReaderBuilder;
import com.amazon.ion.system.IonSystemBuilder;
+import com.amazon.ionpathextraction.exceptions.PathExtractionException;
import com.google.common.collect.ImmutableMap;
import io.trino.hive.formats.line.Column;
import io.trino.spi.Page;
@@ -48,6 +49,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
+import java.util.Map;
import java.util.stream.IntStream;
import static io.trino.hive.formats.FormatTestUtils.assertColumnValuesEquals;
@@ -108,18 +110,20 @@ public void testVariousTlvsStrict()
throws IOException
{
RowType rowType = RowType.rowType(field("foo", INTEGER), field("bar", VARCHAR));
+ IonDecoderConfig decoderConfig = IonDecoderConfig.defaultConfig().withStrictTyping();
List