diff --git a/lib/trino-hive-formats/pom.xml b/lib/trino-hive-formats/pom.xml index c8d6fa745f58..72d911457f39 100644 --- a/lib/trino-hive-formats/pom.xml +++ b/lib/trino-hive-formats/pom.xml @@ -24,6 +24,12 @@ 1.11.9 + + com.amazon.ion + ion-java-path-extraction + 1.5.0 + + com.fasterxml.jackson.core jackson-core diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoder.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoder.java index 422ba16fb712..4463bc79c500 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoder.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoder.java @@ -15,7 +15,6 @@ import com.amazon.ion.IonException; import com.amazon.ion.IonReader; -import io.trino.spi.PageBuilder; public interface IonDecoder { @@ -25,6 +24,6 @@ public interface IonDecoder * Expects that the calling code has called IonReader.next() * to position the reader at the value to be decoded. */ - void decode(IonReader reader, PageBuilder builder) + void decode(IonReader reader) throws IonException; } diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoderConfig.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoderConfig.java new file mode 100644 index 000000000000..755d00ae9010 --- /dev/null +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoderConfig.java @@ -0,0 +1,50 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.hive.formats.ion; + +import java.util.Map; + +/** + * Captures the SerDe properties that affect decoding. + * + * @param pathExtractors Map of column name => ion paths + * for each entry in the map, the value bound to the column will be the result + * of extracting the given search path. + * @param strictTyping whether the path extractions should enforce type expectations. + * this only affects type checking of path extractions; any value decoded into + * a trino column will be correctly typed or coerced for that column. + * @param caseSensitive whether field name matching should be case-sensitive or not. + */ +public record IonDecoderConfig(Map pathExtractors, Boolean strictTyping, Boolean caseSensitive) +{ + static IonDecoderConfig defaultConfig() + { + return new IonDecoderConfig(Map.of(), false, false); + } + + IonDecoderConfig withStrictTyping() + { + return new IonDecoderConfig(pathExtractors, true, caseSensitive); + } + + IonDecoderConfig withCaseSensitive() + { + return new IonDecoderConfig(pathExtractors, strictTyping, true); + } + + IonDecoderConfig withPathExtractors(Map pathExtractors) + { + return new IonDecoderConfig(pathExtractors, strictTyping, caseSensitive); + } +} diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoderFactory.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoderFactory.java index 255882bb43ef..f3051ef620b7 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoderFactory.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoderFactory.java @@ -19,11 +19,15 @@ import com.amazon.ion.IonWriter; import com.amazon.ion.Timestamp; import com.amazon.ion.system.IonTextWriterBuilder; +import com.amazon.ionpathextraction.PathExtractor; +import com.amazon.ionpathextraction.PathExtractorBuilder; +import com.amazon.ionpathextraction.pathcomponents.Text; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import io.airlift.slice.Slices; import io.trino.hive.formats.DistinctMapKeys; import io.trino.hive.formats.line.Column; +import io.trino.spi.PageBuilder; import io.trino.spi.StandardErrorCode; import io.trino.spi.TrinoException; import io.trino.spi.block.ArrayBlockBuilder; @@ -65,8 +69,8 @@ import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.Optional; import java.util.Set; +import java.util.function.BiFunction; import java.util.function.IntFunction; public class IonDecoderFactory @@ -79,39 +83,66 @@ private IonDecoderFactory() {} * The decoder expects to decode the _current_ Ion Value. * It also expects that the calling code will manage the PageBuilder. *

- * - * @param strictPathing controls behavior when encountering mistyped - * values during path extraction. That is outside (before), the trino - * type model. The ion-hive-serde used path extraction for navigating - * the top-level-values even if no path extractions were configured. - * So, in absence of support for path extraction configurations this - * still affects the handling of mistyped top-level-values. - * todo: revisit the above once path extraction config is supported. */ - public static IonDecoder buildDecoder(List columns, boolean strictPathing) + public static IonDecoder buildDecoder( + List columns, + IonDecoderConfig decoderConfig, + PageBuilder pageBuilder) { - RowDecoder rowDecoder = RowDecoder.forFields( - columns.stream() - .map(c -> new RowType.Field(Optional.of(c.name()), c.type())) - .toList()); + PathExtractorBuilder extractorBuilder = PathExtractorBuilder.standard() + .withMatchCaseInsensitive(!decoderConfig.caseSensitive()); - return (ionReader, pageBuilder) -> { - IonType ionType = ionReader.getType(); - IntFunction blockSelector = pageBuilder::getBlockBuilder; + for (int pos = 0; pos < columns.size(); pos++) { + String name = columns.get(pos).name(); + BlockDecoder decoder = decoderForType(columns.get(pos).type()); + BiFunction callback = callbackFor(decoder, pos); - if (ionType == IonType.STRUCT && !ionReader.isNullValue()) { - rowDecoder.decode(ionReader, blockSelector); + String extractionPath = decoderConfig.pathExtractors().get(name); + if (extractionPath == null) { + extractorBuilder.withSearchPath(List.of(new Text(name)), callback); } - else if (ionType == IonType.STRUCT || ionType == IonType.NULL || !strictPathing) { - rowDecoder.appendNulls(blockSelector); + else { + extractorBuilder.withSearchPath(extractionPath, callback); + } + } + PathExtractor extractor = extractorBuilder.buildStrict(decoderConfig.strictTyping()); + PageExtractionContext context = new PageExtractionContext(pageBuilder, new boolean[columns.size()]); + + return (ionReader) -> { + extractor.matchCurrentValue(ionReader, context); + context.completeRowAndReset(); + }; + } + + private static BiFunction callbackFor(BlockDecoder decoder, int pos) + { + return (ionReader, context) -> { + BlockBuilder blockBuilder = context.pageBuilder.getBlockBuilder(pos); + if (context.encountered[pos]) { + blockBuilder.resetTo(blockBuilder.getPositionCount() - 1); } else { - throw new TrinoException(StandardErrorCode.GENERIC_USER_ERROR, - "Top-level-value of IonType %s is not valid with strict typing.".formatted(ionType)); + context.encountered[pos] = true; } + + decoder.decode(ionReader, context.pageBuilder.getBlockBuilder(pos)); + return 0; }; } + private record PageExtractionContext(PageBuilder pageBuilder, boolean[] encountered) + { + private void completeRowAndReset() + { + for (int i = 0; i < encountered.length; i++) { + if (!encountered[i]) { + pageBuilder.getBlockBuilder(i).appendNull(); + } + encountered[i] = false; + } + } + } + private interface BlockDecoder { void decode(IonReader reader, BlockBuilder builder); @@ -169,10 +200,6 @@ private static BlockDecoder wrapDecoder(BlockDecoder decoder, Type trinoType, Io }; } - /** - * The RowDecoder is used as the BlockDecoder for nested RowTypes and is used for decoding - * top-level structs into pages. - */ private record RowDecoder(Map fieldPositions, List fieldDecoders) implements BlockDecoder { @@ -224,13 +251,6 @@ private void decode(IonReader ionReader, IntFunction blockSelector ionReader.stepOut(); } - - private void appendNulls(IntFunction blockSelector) - { - for (int i = 0; i < fieldDecoders.size(); i++) { - blockSelector.apply(i).appendNull(); - } - } } private static class MapDecoder diff --git a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/ion/TestIonFormat.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/ion/TestIonFormat.java index ead606fe3e1b..3e7dd843c7af 100644 --- a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/ion/TestIonFormat.java +++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/ion/TestIonFormat.java @@ -19,6 +19,7 @@ import com.amazon.ion.IonWriter; import com.amazon.ion.system.IonReaderBuilder; import com.amazon.ion.system.IonSystemBuilder; +import com.amazon.ionpathextraction.exceptions.PathExtractionException; import com.google.common.collect.ImmutableMap; import io.trino.hive.formats.line.Column; import io.trino.spi.Page; @@ -48,6 +49,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Map; import java.util.stream.IntStream; import static io.trino.hive.formats.FormatTestUtils.assertColumnValuesEquals; @@ -108,18 +110,20 @@ public void testVariousTlvsStrict() throws IOException { RowType rowType = RowType.rowType(field("foo", INTEGER), field("bar", VARCHAR)); + IonDecoderConfig decoderConfig = IonDecoderConfig.defaultConfig().withStrictTyping(); List expected = new ArrayList<>(2); expected.add(null); expected.add(null); assertValues(rowType, + decoderConfig, // empty struct, untyped null, struct null, and explicitly typed null null, phew. "{} null null.struct null.null", expected, expected, expected, expected); - Assertions.assertThrows(TrinoException.class, () -> { - assertValues(rowType, "null.int", expected); - assertValues(rowType, "[]", expected); + Assertions.assertThrows(PathExtractionException.class, () -> { + assertValues(rowType, decoderConfig, "null.int", expected); + assertValues(rowType, decoderConfig, "[]", expected); }); } @@ -133,7 +137,6 @@ public void testVariousTlvsLax() expected.add(null); assertValues(rowType, - false, "{} 37 null.list null.struct null spam false", expected, expected, expected, expected, expected, expected, expected); } @@ -236,6 +239,20 @@ public void testCaseInsensitivityOfDuplicateKeys() List.of(5, "baz")); } + @Test + public void testCaseSensitiveExtraction() + throws IOException + { + assertValues( + RowType.rowType( + field("Foo", INTEGER), + field("Bar", VARCHAR)), + IonDecoderConfig.defaultConfig().withCaseSensitive(), + // assumes duplicate fields overwrite, which is asserted in the test above + "{ Bar: baz, bar: blegh, Foo: 31, foo: 67 }", + List.of(31, "baz")); + } + @Test public void testStructWithNullAndMissingValues() throws IOException @@ -448,6 +465,53 @@ public void testNumbersTooBigForDecimal128() } } + @Test + public void testPathExtraction() + throws IOException + { + Map pathExtractions = Map.of("bar", "(foo bar)", "baz", "(foo baz)"); + assertValues( + RowType.rowType(field("qux", BOOLEAN), field("bar", INTEGER), field("baz", VARCHAR)), + IonDecoderConfig.defaultConfig().withPathExtractors(pathExtractions), + "{ foo: { bar: 31, baz: quux }, qux: true }", + List.of(true, 31, "quux")); + } + + @Test + public void testNonStructTlvPathExtraction() + throws IOException + { + Map pathExtractions = Map.of("tlv", "()"); + assertValues( + RowType.rowType(field("tlv", new ArrayType(INTEGER))), + IonDecoderConfig.defaultConfig().withPathExtractors(pathExtractions), + "[13, 17] [19, 23]", + List.of(List.of(13, 17)), + List.of(List.of(19, 23))); + } + + /** + * Shows how users can configure mapping sequence positions from Ion values to a Trino row. + */ + @Test + public void testPositionalPathExtraction() + throws IOException + { + Map pathExtractions = Map.of( + "foo", "(0)", + "bar", "(1)"); + RowType rowType = RowType.rowType( + field("foo", INTEGER), + field("bar", VARCHAR)); + + assertValues( + rowType, + IonDecoderConfig.defaultConfig().withPathExtractors(pathExtractions), + "[13, baz] [17, qux]", + List.of(13, "baz"), + List.of(17, "qux")); + } + @Test public void testEncode() throws IOException @@ -531,10 +595,10 @@ public void testEncodeWithNullNestedField() private void assertValues(RowType rowType, String ionText, List... expected) throws IOException { - assertValues(rowType, true, ionText, expected); + assertValues(rowType, IonDecoderConfig.defaultConfig(), ionText, expected); } - private void assertValues(RowType rowType, Boolean strictTlvs, String ionText, List... expected) + private void assertValues(RowType rowType, IonDecoderConfig config, String ionText, List... expected) throws IOException { List fields = rowType.getFields(); @@ -545,14 +609,14 @@ private void assertValues(RowType rowType, Boolean strictTlvs, String ionText, L return new Column(field.getName().get(), field.getType(), i); }) .toList(); - IonDecoder decoder = IonDecoderFactory.buildDecoder(columns, strictTlvs); PageBuilder pageBuilder = new PageBuilder(expected.length, rowType.getFields().stream().map(RowType.Field::getType).toList()); + IonDecoder decoder = IonDecoderFactory.buildDecoder(columns, config, pageBuilder); try (IonReader ionReader = IonReaderBuilder.standard().build(ionText)) { for (int i = 0; i < expected.length; i++) { assertThat(ionReader.next()).isNotNull(); pageBuilder.declarePosition(); - decoder.decode(ionReader, pageBuilder); + decoder.decode(ionReader); } assertThat(ionReader.next()).isNull(); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/ion/IonPageSource.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/ion/IonPageSource.java index 5fbc9549429a..b2472b78ea58 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/ion/IonPageSource.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/ion/IonPageSource.java @@ -112,7 +112,7 @@ private boolean readNextValue() } pageBuilder.declarePosition(); - decoder.decode(ionReader, pageBuilder); + decoder.decode(ionReader); return true; } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/ion/IonPageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/ion/IonPageSourceFactory.java index 2bc98396f76e..d7b051c26896 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/ion/IonPageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/ion/IonPageSourceFactory.java @@ -26,6 +26,7 @@ import io.trino.hive.formats.compression.Codec; import io.trino.hive.formats.compression.CompressionKind; import io.trino.hive.formats.ion.IonDecoder; +import io.trino.hive.formats.ion.IonDecoderConfig; import io.trino.hive.formats.ion.IonDecoderFactory; import io.trino.hive.formats.line.Column; import io.trino.plugin.hive.AcidInfo; @@ -62,9 +63,6 @@ import static io.trino.plugin.hive.ion.IonReaderOptions.FAIL_ON_OVERFLOW_PROPERTY_WITH_COLUMN; import static io.trino.plugin.hive.ion.IonReaderOptions.IGNORE_MALFORMED; import static io.trino.plugin.hive.ion.IonReaderOptions.IGNORE_MALFORMED_DEFAULT; -import static io.trino.plugin.hive.ion.IonReaderOptions.PATH_EXTRACTION_CASE_SENSITIVITY; -import static io.trino.plugin.hive.ion.IonReaderOptions.PATH_EXTRACTION_CASE_SENSITIVITY_DEFAULT; -import static io.trino.plugin.hive.ion.IonReaderOptions.PATH_EXTRACTOR_PROPERTY; import static io.trino.plugin.hive.ion.IonWriterOptions.ION_SERIALIZATION_AS_NULL_DEFAULT; import static io.trino.plugin.hive.ion.IonWriterOptions.ION_SERIALIZATION_AS_NULL_PROPERTY; import static io.trino.plugin.hive.ion.IonWriterOptions.ION_SERIALIZATION_AS_PROPERTY; @@ -82,14 +80,12 @@ public class IonPageSourceFactory private static final Map TABLE_PROPERTIES = ImmutableMap.of( FAIL_ON_OVERFLOW_PROPERTY, FAIL_ON_OVERFLOW_PROPERTY_DEFAULT, IGNORE_MALFORMED, IGNORE_MALFORMED_DEFAULT, - PATH_EXTRACTION_CASE_SENSITIVITY, PATH_EXTRACTION_CASE_SENSITIVITY_DEFAULT, ION_TIMESTAMP_OFFSET_PROPERTY, ION_TIMESTAMP_OFFSET_DEFAULT, ION_SERIALIZATION_AS_NULL_PROPERTY, ION_SERIALIZATION_AS_NULL_DEFAULT); private static final Set COLUMN_PROPERTIES = ImmutableSet.of( Pattern.compile(FAIL_ON_OVERFLOW_PROPERTY_WITH_COLUMN), - Pattern.compile(ION_SERIALIZATION_AS_PROPERTY), - Pattern.compile(PATH_EXTRACTOR_PROPERTY)); + Pattern.compile(ION_SERIALIZATION_AS_PROPERTY)); @Inject public IonPageSourceFactory(TrinoFileSystemFactory trinoFileSystemFactory, HiveConfig hiveConfig) @@ -172,8 +168,9 @@ public Optional createPageSource( List decoderColumns = projectedReaderColumns.stream() .map(hc -> new Column(hc.getName(), hc.getType(), hc.getBaseHiveColumnIndex())) .toList(); - boolean strictPathing = IonReaderOptions.useStrictPathTyping(schema.serdeProperties()); - IonDecoder decoder = IonDecoderFactory.buildDecoder(decoderColumns, strictPathing); + + IonDecoderConfig decoderConfig = IonReaderOptions.decoderConfigFor(schema.serdeProperties()); + IonDecoder decoder = IonDecoderFactory.buildDecoder(decoderColumns, decoderConfig, pageBuilder); IonPageSource pageSource = new IonPageSource(ionReader, countingInputStream::getCount, decoder, pageBuilder); return Optional.of(new ReaderPageSource(pageSource, readerProjections)); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/ion/IonReaderOptions.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/ion/IonReaderOptions.java index 5c510c2a5d09..a87a2280fc67 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/ion/IonReaderOptions.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/ion/IonReaderOptions.java @@ -13,13 +13,18 @@ */ package io.trino.plugin.hive.ion; +import com.google.common.collect.ImmutableMap; +import io.trino.hive.formats.ion.IonDecoderConfig; + import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; public final class IonReaderOptions { public static final String STRICT_PATH_TYPING_PROPERTY = "ion.path_extractor.strict"; public static final String STRICT_PATH_TYPING_DEFAULT = "false"; - public static final String PATH_EXTRACTOR_PROPERTY = "ion.\\w+.path_extractor"; + public static final String PATH_EXTRACTOR_PROPERTY = "ion.(\\w+).path_extractor"; public static final String PATH_EXTRACTION_CASE_SENSITIVITY = "ion.path_extractor.case_sensitive"; public static final String PATH_EXTRACTION_CASE_SENSITIVITY_DEFAULT = "false"; public static final String FAIL_ON_OVERFLOW_PROPERTY_WITH_COLUMN = "ion.\\w+.fail_on_overflow"; @@ -28,11 +33,27 @@ public final class IonReaderOptions public static final String IGNORE_MALFORMED = "ion.ignore_malformed"; public static final String IGNORE_MALFORMED_DEFAULT = "false"; + private static final Pattern pathExtractorPattern = Pattern.compile(PATH_EXTRACTOR_PROPERTY); + private IonReaderOptions() {} - static boolean useStrictPathTyping(Map propertiesMap) + public static IonDecoderConfig decoderConfigFor(Map propertiesMap) { - return Boolean.parseBoolean( + ImmutableMap.Builder extractionsBuilder = ImmutableMap.builder(); + + for (Map.Entry property : propertiesMap.entrySet()) { + Matcher matcher = pathExtractorPattern.matcher(property.getKey()); + if (matcher.matches()) { + extractionsBuilder.put(matcher.group(1), property.getValue()); + } + } + + Boolean strictTyping = Boolean.parseBoolean( propertiesMap.getOrDefault(STRICT_PATH_TYPING_PROPERTY, STRICT_PATH_TYPING_DEFAULT)); + Boolean caseSensitive = Boolean.parseBoolean( + propertiesMap.getOrDefault(PATH_EXTRACTION_CASE_SENSITIVITY, PATH_EXTRACTION_CASE_SENSITIVITY_DEFAULT)); + + // n.b.: the hive serde overwrote when there were duplicate extractors defined for a column + return new IonDecoderConfig(extractionsBuilder.buildOrThrow(), strictTyping, caseSensitive); } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/ion/IonPageSourceSmokeTest.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/ion/IonPageSourceSmokeTest.java index 58ba7e743e13..a806ad4ace26 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/ion/IonPageSourceSmokeTest.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/ion/IonPageSourceSmokeTest.java @@ -67,8 +67,6 @@ import static io.trino.plugin.hive.ion.IonReaderOptions.FAIL_ON_OVERFLOW_PROPERTY_DEFAULT; import static io.trino.plugin.hive.ion.IonReaderOptions.IGNORE_MALFORMED; import static io.trino.plugin.hive.ion.IonReaderOptions.IGNORE_MALFORMED_DEFAULT; -import static io.trino.plugin.hive.ion.IonReaderOptions.PATH_EXTRACTION_CASE_SENSITIVITY; -import static io.trino.plugin.hive.ion.IonReaderOptions.PATH_EXTRACTION_CASE_SENSITIVITY_DEFAULT; import static io.trino.plugin.hive.ion.IonWriterOptions.BINARY_ENCODING; import static io.trino.plugin.hive.ion.IonWriterOptions.ION_ENCODING_PROPERTY; import static io.trino.plugin.hive.ion.IonWriterOptions.ION_SERIALIZATION_AS_NULL_DEFAULT; @@ -130,16 +128,39 @@ public void testStrictAndLaxPathTyping() throws IOException { TestFixture defaultFixture = new TestFixture(FOO_BAR_COLUMNS); - defaultFixture.assertRowCount("37 null.timestamp", 2); + defaultFixture.assertRowCount("37 null.timestamp []", 3); TestFixture laxFixture = new TestFixture(FOO_BAR_COLUMNS); laxFixture.withStrictPathTyping("false"); - laxFixture.assertRowCount("37 null.timestamp", 2); + laxFixture.assertRowCount("37 null.timestamp []", 3); TestFixture strictFixture = new TestFixture(FOO_BAR_COLUMNS); strictFixture.withStrictPathTyping("true"); + Assertions.assertThrows(TrinoException.class, () -> - strictFixture.assertRowCount("37 null.timestamp", 2)); + strictFixture.assertRowCount("37 null.timestamp []", 3)); + } + + @Test + public void testPathExtraction() + throws IOException + { + TestFixture fixture = new TestFixture(List.of(toHiveBaseColumnHandle("bar", INTEGER, 0))) + .withSerdeProperty("ion.bar.path_extractor", "(foo bar)"); + + // these would result in errors if we tried to extract the bar field from the root instead of the nested bar + fixture.assertRowCount("{ foo: { bar: 17 }, bar: not_this_bar } { foo: { bar: 31 }, bar: not_this_bar }", 2); + } + + @Test + public void testCaseSensitive() + throws IOException + { + TestFixture fixture = new TestFixture(List.of(toHiveBaseColumnHandle("bar", INTEGER, 0))) + .withSerdeProperty("ion.path_extractor.case_sensitive", "true"); + + // this would result in errors if we tried to extract the BAR field + fixture.assertRowCount("{ BAR: should_be_skipped } { bar: 17 }", 2); } @Test @@ -179,7 +200,6 @@ private static Stream> propertiesWithDefaults() { return Stream.of( entry(FAIL_ON_OVERFLOW_PROPERTY, FAIL_ON_OVERFLOW_PROPERTY_DEFAULT), - entry(PATH_EXTRACTION_CASE_SENSITIVITY, PATH_EXTRACTION_CASE_SENSITIVITY_DEFAULT), entry(IGNORE_MALFORMED, IGNORE_MALFORMED_DEFAULT), entry(ION_TIMESTAMP_OFFSET_PROPERTY, ION_TIMESTAMP_OFFSET_DEFAULT), entry(ION_SERIALIZATION_AS_NULL_PROPERTY, ION_SERIALIZATION_AS_NULL_DEFAULT)); @@ -189,7 +209,6 @@ private static Stream> propertiesWithValues() { return Stream.of( entry(FAIL_ON_OVERFLOW_PROPERTY, "false"), - entry(PATH_EXTRACTION_CASE_SENSITIVITY, "true"), entry(IGNORE_MALFORMED, "true"), entry(ION_TIMESTAMP_OFFSET_PROPERTY, "01:00"), entry(ION_SERIALIZATION_AS_NULL_PROPERTY, "TYPED"), @@ -197,8 +216,8 @@ private static Stream> propertiesWithValues() // Any presence of these properties in the schema will result in an empty PageSource, // regardless of their assigned values. entry("ion.foo.fail_on_overflow", "property_value"), - entry("ion.foo.serialize_as", "property_value"), - entry("ion.foo.path_extractor", "property_value")); + entry("ion.foo.serialize_as", "property_value")); + //entry("ion.foo.path_extractor", "property_value")); } private static Map.Entry entry(String key, String value) @@ -212,7 +231,7 @@ void testPropertiesWithValues(Map.Entry property) throws IOException { TestFixture fixture = new TestFixture(FOO_BAR_COLUMNS) - .withSerdeProperties(property); + .withSerdeProperty(property.getKey(), property.getValue()); fixture.writeIonTextFile("{ foo: 31, bar: baz } { foo: 31, bar: \"baz\" }"); Optional connectorPageSource = fixture.getOptionalPageSource(); @@ -221,11 +240,11 @@ void testPropertiesWithValues(Map.Entry property) @ParameterizedTest @MethodSource("propertiesWithDefaults") - void testPropertiesWithDefaults(Map.Entry propertyEntry) + void testPropertiesWithDefaults(Map.Entry property) throws IOException { TestFixture fixture = new TestFixture(FOO_BAR_COLUMNS) - .withSerdeProperties(propertyEntry); + .withSerdeProperty(property.getKey(), property.getValue()); fixture.assertRowCount("{ foo: 31, bar: baz } { foo: 31, bar: \"baz\" }", 2); } @@ -345,10 +364,9 @@ TestFixture withStrictPathTyping(String strict) return this; } - TestFixture withSerdeProperties(Map.Entry propertyEntry) + TestFixture withSerdeProperty(String key, String value) { - // The value of the property is just placeholder - tableProperties.put(propertyEntry.getKey(), propertyEntry.getValue()); + tableProperties.put(key, value); return this; }