From 4b95129f962b0dfa2b1119f54d096c3a8464cfa5 Mon Sep 17 00:00:00 2001
From: dan-s1
Date: Mon, 16 Oct 2023 18:14:49 +0000
Subject: [PATCH] NIFI-11197 Added YamlTreeReader
- Adjusted JsonTreeReader implementation for sharing common Jackson components
This closes #7665
Signed-off-by: David Handermann
---
.../json/AbstractJsonRowRecordReader.java | 41 +-
.../apache/nifi/json/JsonParserFactory.java | 42 +
.../nifi/json/JsonPathRowRecordReader.java | 2 +-
.../apache/nifi/json/JsonRecordSource.java | 28 +-
.../nifi/json/JsonTreeRowRecordReader.java | 18 +-
.../apache/nifi/json/TokenParserFactory.java | 36 +
.../nifi-yaml-record-utils/pom.xml | 45 +
.../apache/nifi/yaml/YamlParserFactory.java | 44 +
.../apache/nifi/yaml/YamlRecordSource.java | 29 +
.../nifi/yaml/YamlTreeRowRecordReader.java | 47 +
.../nifi-record-utils/pom.xml | 1 +
.../pom.xml | 6 +
.../org/apache/nifi/json/JsonTreeReader.java | 51 +-
.../org/apache/nifi/yaml/YamlTreeReader.java | 81 ++
...g.apache.nifi.controller.ControllerService | 1 +
.../additionalDetails.html | 2 +-
.../additionalDetails.html | 454 ++++++
.../json/TestJsonTreeRowRecordReader.java | 3 +-
.../yaml/TestYamlTreeRowRecordReader.java | 1241 +++++++++++++++++
.../bank-account-array-different-schemas.yaml | 24 +
.../bank-account-array-optional-balance.yaml | 23 +
.../resources/yaml/bank-account-array.yaml | 16 +
.../resources/yaml/bank-account-comments.yaml | 20 +
.../test/resources/yaml/capture-fields.yaml | 15 +
...of-different-arrays-with-extra-fields.yaml | 15 +
...of-embedded-arrays-and-single-records.yaml | 16 +
.../choice-of-embedded-similar-records.yaml | 7 +
...ed-embedded-arrays-and-single-records.yaml | 19 +
.../choice-of-string-or-array-record.yaml | 4 +
.../yaml/elements-for-record-choice.yaml | 6 +
.../resources/yaml/multiple-nested-field.yaml | 13 +
.../yaml/nested-array-then-start-object.yaml | 9 +
.../test/resources/yaml/similar-records.yaml | 7 +
.../single-bank-account-wrong-field-type.yaml | 10 +
.../resources/yaml/single-bank-account.yaml | 8 +
.../yaml/single-element-deep-nested.yaml | 8 +
.../yaml/single-element-nested-array.yaml | 12 +
.../resources/yaml/single-element-nested.yaml | 10 +
.../src/test/resources/yaml/timestamp.yaml | 2 +
.../resources/yaml/yaml-with-unicode.yaml | 6 +
40 files changed, 2342 insertions(+), 80 deletions(-)
create mode 100644 nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/JsonParserFactory.java
create mode 100644 nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/TokenParserFactory.java
create mode 100644 nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-yaml-record-utils/pom.xml
create mode 100644 nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-yaml-record-utils/src/main/java/org/apache/nifi/yaml/YamlParserFactory.java
create mode 100644 nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-yaml-record-utils/src/main/java/org/apache/nifi/yaml/YamlRecordSource.java
create mode 100644 nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-yaml-record-utils/src/main/java/org/apache/nifi/yaml/YamlTreeRowRecordReader.java
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/yaml/YamlTreeReader.java
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.yaml.YamlTreeReader/additionalDetails.html
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/yaml/TestYamlTreeRowRecordReader.java
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/yaml/bank-account-array-different-schemas.yaml
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/yaml/bank-account-array-optional-balance.yaml
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/yaml/bank-account-array.yaml
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/yaml/bank-account-comments.yaml
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/yaml/capture-fields.yaml
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/yaml/choice-of-different-arrays-with-extra-fields.yaml
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/yaml/choice-of-embedded-arrays-and-single-records.yaml
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/yaml/choice-of-embedded-similar-records.yaml
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/yaml/choice-of-merged-embedded-arrays-and-single-records.yaml
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/yaml/choice-of-string-or-array-record.yaml
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/yaml/elements-for-record-choice.yaml
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/yaml/multiple-nested-field.yaml
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/yaml/nested-array-then-start-object.yaml
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/yaml/similar-records.yaml
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/yaml/single-bank-account-wrong-field-type.yaml
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/yaml/single-bank-account.yaml
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/yaml/single-element-deep-nested.yaml
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/yaml/single-element-nested-array.yaml
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/yaml/single-element-nested.yaml
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/yaml/timestamp.yaml
create mode 100644 nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/yaml/yaml-with-unicode.yaml
diff --git a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/AbstractJsonRowRecordReader.java b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/AbstractJsonRowRecordReader.java
index d718e7e4bcb3..e3f6d8567ee9 100644
--- a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/AbstractJsonRowRecordReader.java
+++ b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/AbstractJsonRowRecordReader.java
@@ -22,7 +22,6 @@
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.core.StreamReadConstraints;
import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.logging.ComponentLog;
@@ -56,7 +55,7 @@
public abstract class AbstractJsonRowRecordReader implements RecordReader {
public static final String DEFAULT_MAX_STRING_LENGTH = "20 MB";
- static final PropertyDescriptor MAX_STRING_LENGTH = new PropertyDescriptor.Builder()
+ public static final PropertyDescriptor MAX_STRING_LENGTH = new PropertyDescriptor.Builder()
.name("Max String Length")
.displayName("Max String Length")
.description("The maximum allowed length of a string value when parsing the JSON document")
@@ -88,7 +87,6 @@ public abstract class AbstractJsonRowRecordReader implements RecordReader {
private JsonParser jsonParser;
private JsonNode firstJsonNode;
private StartingFieldStrategy strategy;
-
private Map capturedFields;
private BiPredicate captureFieldPredicate;
@@ -104,28 +102,6 @@ private AbstractJsonRowRecordReader(final ComponentLog logger, final String date
lazyTimestampFormat = () -> tsf;
}
- protected AbstractJsonRowRecordReader(final InputStream in,
- final ComponentLog logger,
- final String dateFormat,
- final String timeFormat,
- final String timestampFormat)
- throws IOException, MalformedRecordException {
-
- this(in, logger, dateFormat, timeFormat, timestampFormat, null, null, null, false, null);
- }
-
- protected AbstractJsonRowRecordReader(final InputStream in,
- final ComponentLog logger,
- final String dateFormat,
- final String timeFormat,
- final String timestampFormat,
- final boolean allowComments,
- final StreamReadConstraints streamReadConstraints)
- throws IOException, MalformedRecordException {
-
- this(in, logger, dateFormat, timeFormat, timestampFormat, null, null, null, allowComments, streamReadConstraints);
- }
-
/**
* Constructor with initial logic for JSON to NiFi record parsing.
*
@@ -140,7 +116,7 @@ protected AbstractJsonRowRecordReader(final InputStream in,
* be accessed by calling {@link #getCapturedFields()}
* @param allowComments whether to allow comments within the JSON stream
* @param streamReadConstraints configuration for the JsonFactory stream reader {@link StreamReadConstraints}
- *
+ * @param tokenParserFactory factory to provide an instance of com.fasterxml.jackson.core.JsonParser
* @throws IOException in case of JSON stream processing failure
* @throws MalformedRecordException in case of malformed JSON input
*/
@@ -153,7 +129,8 @@ protected AbstractJsonRowRecordReader(final InputStream in,
final String nestedFieldName,
final BiPredicate captureFieldPredicate,
final boolean allowComments,
- final StreamReadConstraints streamReadConstraints)
+ final StreamReadConstraints streamReadConstraints,
+ final TokenParserFactory tokenParserFactory)
throws IOException, MalformedRecordException {
this(logger, dateFormat, timeFormat, timestampFormat);
@@ -163,14 +140,8 @@ protected AbstractJsonRowRecordReader(final InputStream in,
capturedFields = new LinkedHashMap<>();
try {
- final ObjectMapper codec = new ObjectMapper();
- if (allowComments) {
- codec.enable(JsonParser.Feature.ALLOW_COMMENTS);
- }
- codec.getFactory().setStreamReadConstraints(streamReadConstraints != null ? streamReadConstraints : DEFAULT_STREAM_READ_CONSTRAINTS);
-
- jsonParser = codec.getFactory().createParser(in);
- jsonParser.setCodec(codec);
+ final StreamReadConstraints configuredStreamReadConstraints = streamReadConstraints == null ? DEFAULT_STREAM_READ_CONSTRAINTS : streamReadConstraints;
+ jsonParser = tokenParserFactory.getJsonParser(in, configuredStreamReadConstraints, allowComments);
if (strategy == StartingFieldStrategy.NESTED_FIELD) {
while (jsonParser.nextToken() != null) {
diff --git a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/JsonParserFactory.java b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/JsonParserFactory.java
new file mode 100644
index 000000000000..d2e7484b3dee
--- /dev/null
+++ b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/JsonParserFactory.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.json;
+
+import com.fasterxml.jackson.core.JsonFactory;
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.StreamReadConstraints;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Objects;
+
+public class JsonParserFactory implements TokenParserFactory {
+ @Override
+ public JsonParser getJsonParser(final InputStream in, final StreamReadConstraints streamReadConstraints, final boolean allowComments) throws IOException {
+ Objects.requireNonNull(in, "Input Stream required");
+ Objects.requireNonNull(streamReadConstraints, "Stream Read Constraints required");
+
+ final ObjectMapper objectMapper = new ObjectMapper();
+ objectMapper.getFactory().setStreamReadConstraints(streamReadConstraints);
+ if (allowComments) {
+ objectMapper.enable(JsonParser.Feature.ALLOW_COMMENTS);
+ }
+ final JsonFactory jsonFactory = objectMapper.getFactory();
+ return jsonFactory.createParser(in);
+ }
+}
diff --git a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/JsonPathRowRecordReader.java b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/JsonPathRowRecordReader.java
index 328cf0c8683e..bc9ba69800a6 100644
--- a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/JsonPathRowRecordReader.java
+++ b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/JsonPathRowRecordReader.java
@@ -66,7 +66,7 @@ public JsonPathRowRecordReader(final LinkedHashMap jsonPaths,
final boolean allowComments, final StreamReadConstraints streamReadConstraints)
throws MalformedRecordException, IOException {
- super(in, logger, dateFormat, timeFormat, timestampFormat, allowComments, streamReadConstraints);
+ super(in, logger, dateFormat, timeFormat, timestampFormat, null, null, null, allowComments, streamReadConstraints, new JsonParserFactory());
this.schema = schema;
this.jsonPaths = jsonPaths;
diff --git a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/JsonRecordSource.java b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/JsonRecordSource.java
index 348c2ef02feb..5ff735fbddde 100644
--- a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/JsonRecordSource.java
+++ b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/JsonRecordSource.java
@@ -16,12 +16,11 @@
*/
package org.apache.nifi.json;
-import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
+import com.fasterxml.jackson.core.StreamReadConstraints;
import com.fasterxml.jackson.core.io.SerializedString;
import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.nifi.schema.inference.RecordSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -31,29 +30,28 @@
public class JsonRecordSource implements RecordSource {
private static final Logger logger = LoggerFactory.getLogger(JsonRecordSource.class);
- private static final JsonFactory jsonFactory;
+
+ private static final StreamReadConstraints DEFAULT_STREAM_READ_CONSTRAINTS = StreamReadConstraints.defaults();
+
+ private static final boolean ALLOW_COMMENTS_ENABLED = true;
+
private final JsonParser jsonParser;
private final StartingFieldStrategy strategy;
- private final String startingFieldName;
-
- static {
- jsonFactory = new JsonFactory();
- jsonFactory.setCodec(new ObjectMapper());
- }
public JsonRecordSource(final InputStream in) throws IOException {
- jsonParser = jsonFactory.createParser(in);
- strategy = null;
- startingFieldName = null;
+ this(in, null, null);
}
public JsonRecordSource(final InputStream in, final StartingFieldStrategy strategy, final String startingFieldName) throws IOException {
- jsonParser = jsonFactory.createParser(in);
+ this(in , strategy, startingFieldName, new JsonParserFactory());
+ }
+
+ public JsonRecordSource(final InputStream in, final StartingFieldStrategy strategy, final String startingFieldName, TokenParserFactory tokenParserFactory) throws IOException {
+ jsonParser = tokenParserFactory.getJsonParser(in, DEFAULT_STREAM_READ_CONSTRAINTS, ALLOW_COMMENTS_ENABLED);
this.strategy = strategy;
- this.startingFieldName = startingFieldName;
if (strategy == StartingFieldStrategy.NESTED_FIELD) {
- final SerializedString serializedNestedField = new SerializedString(this.startingFieldName);
+ final SerializedString serializedNestedField = new SerializedString(startingFieldName);
while (!jsonParser.nextFieldName(serializedNestedField) && jsonParser.hasCurrentToken());
logger.debug("Parsing starting at nested field [{}]", startingFieldName);
}
diff --git a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/JsonTreeRowRecordReader.java b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/JsonTreeRowRecordReader.java
index 758cd96a4815..489474e804dc 100644
--- a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/JsonTreeRowRecordReader.java
+++ b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/JsonTreeRowRecordReader.java
@@ -56,16 +56,7 @@ public JsonTreeRowRecordReader(final InputStream in, final ComponentLog logger,
final String dateFormat, final String timeFormat, final String timestampFormat)
throws IOException, MalformedRecordException {
- this(in, logger, schema, dateFormat, timeFormat, timestampFormat, false, null);
- }
-
- public JsonTreeRowRecordReader(final InputStream in, final ComponentLog logger, final RecordSchema schema,
- final String dateFormat, final String timeFormat, final String timestampFormat,
- final boolean allowComments, final StreamReadConstraints streamReadConstraints)
- throws IOException, MalformedRecordException {
-
- this(in, logger, schema, dateFormat, timeFormat, timestampFormat, null, null, null, null,
- allowComments, streamReadConstraints);
+ this(in, logger, schema, dateFormat, timeFormat, timestampFormat, null, null, null, null);
}
public JsonTreeRowRecordReader(final InputStream in, final ComponentLog logger, final RecordSchema schema,
@@ -75,18 +66,18 @@ public JsonTreeRowRecordReader(final InputStream in, final ComponentLog logger,
throws IOException, MalformedRecordException {
this(in, logger, schema, dateFormat, timeFormat, timestampFormat, startingFieldStrategy, startingFieldName, schemaApplicationStrategy,
- captureFieldPredicate, false, null);
+ captureFieldPredicate, false, null, new JsonParserFactory());
}
public JsonTreeRowRecordReader(final InputStream in, final ComponentLog logger, final RecordSchema schema,
final String dateFormat, final String timeFormat, final String timestampFormat,
final StartingFieldStrategy startingFieldStrategy, final String startingFieldName,
final SchemaApplicationStrategy schemaApplicationStrategy, final BiPredicate captureFieldPredicate,
- final boolean allowComments, final StreamReadConstraints streamReadConstraints)
+ final boolean allowComments, final StreamReadConstraints streamReadConstraints, final TokenParserFactory tokenParserFactory)
throws IOException, MalformedRecordException {
super(in, logger, dateFormat, timeFormat, timestampFormat, startingFieldStrategy, startingFieldName, captureFieldPredicate,
- allowComments, streamReadConstraints);
+ allowComments, streamReadConstraints, tokenParserFactory);
if (startingFieldStrategy == StartingFieldStrategy.NESTED_FIELD && schemaApplicationStrategy == SchemaApplicationStrategy.WHOLE_JSON) {
this.schema = getSelectedSchema(schema, startingFieldName);
@@ -110,7 +101,6 @@ private RecordSchema getSelectedSchema(final RecordSchema schema, final String s
}
}
}
-
}
throw new RuntimeException(String.format("Selected schema field [%s] not found.", startingFieldName));
}
diff --git a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/TokenParserFactory.java b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/TokenParserFactory.java
new file mode 100644
index 000000000000..1c3a812fe2a4
--- /dev/null
+++ b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/TokenParserFactory.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.json;
+
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.StreamReadConstraints;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+public interface TokenParserFactory {
+ /**
+ * Get JSON Parser implementation for provided Input Stream with configured settings
+ *
+ * @param in Input Stream to be parsed
+ * @param streamReadConstraints Stream Read Constraints applied
+ * @param allowComments Whether to allow comments when parsing
+ * @return JSON Parser
+ * @throws IOException Thrown on failures to read the Input Stream
+ */
+ JsonParser getJsonParser(InputStream in, StreamReadConstraints streamReadConstraints, boolean allowComments) throws IOException;
+}
diff --git a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-yaml-record-utils/pom.xml b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-yaml-record-utils/pom.xml
new file mode 100644
index 000000000000..3fd1f0d8b9d9
--- /dev/null
+++ b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-yaml-record-utils/pom.xml
@@ -0,0 +1,45 @@
+
+
+
+ 4.0.0
+
+ org.apache.nifi
+ nifi-record-utils
+ 2.0.0-SNAPSHOT
+
+ nifi-yaml-record-utils
+
+
+ org.apache.nifi
+ nifi-json-record-utils
+ 2.0.0-SNAPSHOT
+
+
+ org.apache.nifi
+ nifi-record
+
+
+ org.apache.nifi
+ nifi-api
+
+
+ com.fasterxml.jackson.dataformat
+ jackson-dataformat-yaml
+
+
+
\ No newline at end of file
diff --git a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-yaml-record-utils/src/main/java/org/apache/nifi/yaml/YamlParserFactory.java b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-yaml-record-utils/src/main/java/org/apache/nifi/yaml/YamlParserFactory.java
new file mode 100644
index 000000000000..a6df6c198a15
--- /dev/null
+++ b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-yaml-record-utils/src/main/java/org/apache/nifi/yaml/YamlParserFactory.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.yaml;
+
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.StreamReadConstraints;
+import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
+import com.fasterxml.jackson.dataformat.yaml.YAMLMapper;
+import org.apache.nifi.json.TokenParserFactory;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+public class YamlParserFactory implements TokenParserFactory {
+ private static final YAMLFactory YAML_FACTORY = new YAMLFactory(new YAMLMapper());
+
+ /**
+ * Get Parser implementation for YAML
+ *
+ * @param in Input Stream to be parsed
+ * @param streamReadConstraints Stream Read Constraints are not supported in YAML
+ * @param allowComments Whether to allow comments when parsing does not apply to YAML
+ * @return YAML Parser
+ * @throws IOException Thrown on parser creation failures
+ */
+ @Override
+ public JsonParser getJsonParser(final InputStream in, final StreamReadConstraints streamReadConstraints, final boolean allowComments) throws IOException {
+ return YAML_FACTORY.createParser(in);
+ }
+}
diff --git a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-yaml-record-utils/src/main/java/org/apache/nifi/yaml/YamlRecordSource.java b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-yaml-record-utils/src/main/java/org/apache/nifi/yaml/YamlRecordSource.java
new file mode 100644
index 000000000000..f47a591de624
--- /dev/null
+++ b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-yaml-record-utils/src/main/java/org/apache/nifi/yaml/YamlRecordSource.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.yaml;
+
+import org.apache.nifi.json.JsonRecordSource;
+import org.apache.nifi.json.StartingFieldStrategy;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+public class YamlRecordSource extends JsonRecordSource {
+ public YamlRecordSource(final InputStream in, final StartingFieldStrategy strategy, final String startingFieldName) throws IOException {
+ super(in, strategy, startingFieldName, new YamlParserFactory());
+ }
+}
diff --git a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-yaml-record-utils/src/main/java/org/apache/nifi/yaml/YamlTreeRowRecordReader.java b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-yaml-record-utils/src/main/java/org/apache/nifi/yaml/YamlTreeRowRecordReader.java
new file mode 100644
index 000000000000..25136d85aa3c
--- /dev/null
+++ b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-yaml-record-utils/src/main/java/org/apache/nifi/yaml/YamlTreeRowRecordReader.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nifi.yaml;
+
+import org.apache.nifi.json.JsonTreeRowRecordReader;
+import org.apache.nifi.json.SchemaApplicationStrategy;
+import org.apache.nifi.json.StartingFieldStrategy;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.serialization.MalformedRecordException;
+import org.apache.nifi.serialization.record.RecordSchema;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.function.BiPredicate;
+
+public class YamlTreeRowRecordReader extends JsonTreeRowRecordReader {
+
+ public YamlTreeRowRecordReader(final InputStream in, final ComponentLog logger, final RecordSchema schema,
+ final String dateFormat, final String timeFormat, final String timestampFormat) throws IOException, MalformedRecordException {
+ this(in, logger, schema, dateFormat, timeFormat, timestampFormat, null, null, null, null);
+ }
+
+ public YamlTreeRowRecordReader(final InputStream in, final ComponentLog logger, final RecordSchema schema,
+ final String dateFormat, final String timeFormat, final String timestampFormat,
+ final StartingFieldStrategy startingFieldStrategy, final String startingFieldName,
+ final SchemaApplicationStrategy schemaApplicationStrategy, final BiPredicate captureFieldPredicate)
+ throws IOException, MalformedRecordException {
+
+ super(in, logger, schema, dateFormat, timeFormat, timestampFormat, startingFieldStrategy, startingFieldName, schemaApplicationStrategy,
+ captureFieldPredicate, true, null, new YamlParserFactory());
+ }
+}
diff --git a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/pom.xml b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/pom.xml
index 89820015c749..d87e21a9a333 100644
--- a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/pom.xml
+++ b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/pom.xml
@@ -30,6 +30,7 @@
nifi-json-record-utils
nifi-mock-record-utils
nifi-schema-inference-utils
+ nifi-yaml-record-utils
diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/pom.xml b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/pom.xml
index 8011be927d73..8227f6f6e18f 100755
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/pom.xml
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/pom.xml
@@ -68,6 +68,11 @@
nifi-json-record-utils
2.0.0-SNAPSHOT
+
+ org.apache.nifi
+ nifi-yaml-record-utils
+ 2.0.0-SNAPSHOT
+
org.apache.commons
commons-csv
@@ -270,6 +275,7 @@
src/test/resources/xml/testschema
src/test/resources/xml/testschema2
src/test/resources/xml/testschema3
+ src/test/resources/yaml/*.yaml
diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonTreeReader.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonTreeReader.java
index bcf7d66c95ae..596b3880bd6e 100644
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonTreeReader.java
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonTreeReader.java
@@ -70,12 +70,12 @@
+ "See the Usage of the Controller Service for more information and examples.")
@SeeAlso(JsonPathReader.class)
public class JsonTreeReader extends SchemaRegistryService implements RecordReaderFactory {
- private volatile String dateFormat;
- private volatile String timeFormat;
- private volatile String timestampFormat;
- private volatile String startingFieldName;
- private volatile StartingFieldStrategy startingFieldStrategy;
- private volatile SchemaApplicationStrategy schemaApplicationStrategy;
+ protected volatile String dateFormat;
+ protected volatile String timeFormat;
+ protected volatile String timestampFormat;
+ protected volatile String startingFieldName;
+ protected volatile StartingFieldStrategy startingFieldStrategy;
+ protected volatile SchemaApplicationStrategy schemaApplicationStrategy;
private volatile boolean allowComments;
private volatile StreamReadConstraints streamReadConstraints;
@@ -138,9 +138,29 @@ public void storePropertyValues(final ConfigurationContext context) {
this.startingFieldStrategy = StartingFieldStrategy.valueOf(context.getProperty(STARTING_FIELD_STRATEGY).getValue());
this.startingFieldName = context.getProperty(STARTING_FIELD_NAME).getValue();
this.schemaApplicationStrategy = SchemaApplicationStrategy.valueOf(context.getProperty(SCHEMA_APPLICATION_STRATEGY).getValue());
+ this.streamReadConstraints = buildStreamReadConstraints(context);
+ this.allowComments = isAllowCommentsEnabled(context);
+ }
+
+ /**
+ * Build Stream Read Constraints based on available properties
+ *
+ * @param context Configuration Context with property values
+ * @return Stream Read Constraints
+ */
+ protected StreamReadConstraints buildStreamReadConstraints(final ConfigurationContext context) {
final int maxStringLength = context.getProperty(AbstractJsonRowRecordReader.MAX_STRING_LENGTH).asDataSize(DataUnit.B).intValue();
- this.streamReadConstraints = StreamReadConstraints.builder().maxStringLength(maxStringLength).build();
- this.allowComments = context.getProperty(AbstractJsonRowRecordReader.ALLOW_COMMENTS).asBoolean();
+ return StreamReadConstraints.builder().maxStringLength(maxStringLength).build();
+ }
+
+ /**
+ * Determine whether to allow comments when parsing based on available properties
+ *
+ * @param context Configuration Context with property values
+ * @return Allow comments status
+ */
+ protected boolean isAllowCommentsEnabled(final ConfigurationContext context) {
+ return context.getProperty(AbstractJsonRowRecordReader.ALLOW_COMMENTS).asBoolean();
}
@Override
@@ -153,9 +173,7 @@ protected List getSchemaAccessStrategyValues() {
@Override
protected SchemaAccessStrategy getSchemaAccessStrategy(final String schemaAccessStrategy, final SchemaRegistry schemaRegistry, final PropertyContext context) {
- final RecordSourceFactory jsonSourceFactory =
- (var, in) -> new JsonRecordSource(in, startingFieldStrategy, startingFieldName);
-
+ final RecordSourceFactory jsonSourceFactory = createJsonRecordSourceFactory();
final Supplier> inferenceSupplier =
() -> new JsonSchemaInference(new TimeValueInference(dateFormat, timeFormat, timestampFormat));
@@ -163,16 +181,23 @@ protected SchemaAccessStrategy getSchemaAccessStrategy(final String schemaAccess
() -> super.getSchemaAccessStrategy(schemaAccessStrategy, schemaRegistry, context));
}
+ protected RecordSourceFactory createJsonRecordSourceFactory() {
+ return (variables, in) -> new JsonRecordSource(in, startingFieldStrategy, startingFieldName);
+ }
+
@Override
protected AllowableValue getDefaultSchemaAccessStrategy() {
return INFER_SCHEMA;
}
- @Override
public RecordReader createRecordReader(final Map variables, final InputStream in, final long inputLength, final ComponentLog logger)
throws IOException, MalformedRecordException, SchemaNotFoundException {
final RecordSchema schema = getSchema(variables, in, null);
+ return createJsonTreeRowRecordReader(in, logger, schema);
+ }
+
+ protected JsonTreeRowRecordReader createJsonTreeRowRecordReader(final InputStream in, final ComponentLog logger, final RecordSchema schema) throws IOException, MalformedRecordException {
return new JsonTreeRowRecordReader(in, logger, schema, dateFormat, timeFormat, timestampFormat, startingFieldStrategy, startingFieldName,
- schemaApplicationStrategy, null, allowComments, streamReadConstraints);
+ schemaApplicationStrategy, null, allowComments, streamReadConstraints, new JsonParserFactory());
}
}
diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/yaml/YamlTreeReader.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/yaml/YamlTreeReader.java
new file mode 100644
index 000000000000..ab1a5682ed70
--- /dev/null
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/yaml/YamlTreeReader.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nifi.yaml;
+
+import com.fasterxml.jackson.core.StreamReadConstraints;
+import com.fasterxml.jackson.databind.JsonNode;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.controller.ConfigurationContext;
+import org.apache.nifi.json.AbstractJsonRowRecordReader;
+import org.apache.nifi.json.JsonTreeReader;
+import org.apache.nifi.json.JsonTreeRowRecordReader;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.schema.inference.RecordSourceFactory;
+import org.apache.nifi.serialization.MalformedRecordException;
+import org.apache.nifi.serialization.record.RecordSchema;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+@Tags({"yaml", "tree", "record", "reader", "parser"})
+@CapabilityDescription("Parses YAML into individual Record objects. While the reader expects each record "
+ + "to be well-formed YAML, the content of a FlowFile may consist of many records, each as a well-formed "
+ + "YAML array or YAML object. "
+ + "If an array is encountered, each element in that array will be treated as a separate record. "
+ + "If the schema that is configured contains a field that is not present in the YAML, a null value will be used. If the YAML contains "
+ + "a field that is not present in the schema, that field will be skipped. "
+ + "See the Usage of the Controller Service for more information and examples.")
+public class YamlTreeReader extends JsonTreeReader {
+
+ private static final boolean ALLOW_COMMENTS_DISABLED = false;
+
+ @Override
+ protected List getSupportedPropertyDescriptors() {
+ final List properties = new ArrayList<>(super.getSupportedPropertyDescriptors());
+ // Remove those properties which are not applicable for YAML
+ properties.remove(AbstractJsonRowRecordReader.MAX_STRING_LENGTH);
+ properties.remove(AbstractJsonRowRecordReader.ALLOW_COMMENTS);
+
+ return properties;
+ }
+
+ @Override
+ protected RecordSourceFactory createJsonRecordSourceFactory() {
+ return (var, in) -> new YamlRecordSource(in, startingFieldStrategy, startingFieldName);
+ }
+
+ @Override
+ protected JsonTreeRowRecordReader createJsonTreeRowRecordReader(InputStream in, ComponentLog logger, RecordSchema schema) throws IOException, MalformedRecordException {
+ return new YamlTreeRowRecordReader(in, logger, schema, dateFormat, timeFormat, timestampFormat, startingFieldStrategy, startingFieldName,
+ schemaApplicationStrategy, null);
+ }
+
+ @Override
+ protected StreamReadConstraints buildStreamReadConstraints(final ConfigurationContext context) {
+ return StreamReadConstraints.defaults();
+ }
+
+ @Override
+ protected boolean isAllowCommentsEnabled(final ConfigurationContext context) {
+ return ALLOW_COMMENTS_DISABLED;
+ }
+}
diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService
index 7c391e244378..0b8484c3dbd4 100755
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService
@@ -38,3 +38,4 @@ org.apache.nifi.xml.XMLReader
org.apache.nifi.xml.XMLRecordSetWriter
org.apache.nifi.windowsevent.WindowsEventLogReader
org.apache.nifi.schema.inference.VolatileSchemaCache
+org.apache.nifi.yaml.YamlTreeReader
diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.json.JsonTreeReader/additionalDetails.html b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.json.JsonTreeReader/additionalDetails.html
index f46b52375821..d80f3719130a 100644
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.json.JsonTreeReader/additionalDetails.html
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.json.JsonTreeReader/additionalDetails.html
@@ -411,7 +411,7 @@ Schema Application Strategies
When using JsonTreeReader with "Nested Field Strategy" and the "Schema Access Strategy" is not "Infer Schema",
- it can be configured for the entire original JSON ("Whole JSON" strategy) or for the nested field section ("Selected part" strategy).
+ it can be configured for the entire original JSON ("Whole document" strategy) or for the nested field section ("Selected part" strategy).
+
+ The YamlTreeReader Controller Service reads a YAML Object and creates a Record object either for the
+ entire YAML Object tree or a subpart (see "Starting Field Strategies" section). The Controller Service
+ must be configured with a Schema that describes the structure of the YAML data. If any field exists in
+ the YAML that is not in the schema, that field will be skipped. If the schema contains a field for which
+ no YAML field exists, a null value will be used in the Record (or the default value defined in the schema,
+ if applicable).
+
+
+
+ If the root element of the YAML is a YAML Array, each YAML Object within that array will be treated as
+ its own separate Record. If the root element is a YAML Object, the YAML will all be treated as a single
+ Record.
+
+
+
+ Schemas and Type Coercion
+
+
+ When a record is parsed from incoming data, it is separated into fields. Each of these fields is then looked up against the
+ configured schema (by field name) in order to determine what the type of the data should be. If the field is not present in
+ the schema, that field is omitted from the Record. If the field is found in the schema, the data type of the received data
+ is compared against the data type specified in the schema. If the types match, the value of that field is used as-is. If the
+ schema indicates that the field should be of a different type, then the Controller Service will attempt to coerce the data
+ into the type specified by the schema. If the field cannot be coerced into the specified type, an Exception will be thrown.
+
+
+
+ The following rules apply when attempting to coerce a field value from one data type to another:
+
+
+
+ - Any data type can be coerced into a String type.
+ - Any numeric data type (Byte, Short, Int, Long, Float, Double) can be coerced into any other numeric data type.
+ - Any numeric value can be coerced into a Date, Time, or Timestamp type, by assuming that the Long value is the number of
+ milliseconds since epoch (Midnight GMT, January 1, 1970).
+ - A String value can be coerced into a Date, Time, or Timestamp type, if its format matches the configured "Date Format," "Time Format,"
+ or "Timestamp Format."
+ - A String value can be coerced into a numeric value if the value is of the appropriate type. For example, the String value
+
8
can be coerced into any numeric type. However, the String value 8.2
can be coerced into a Double or Float
+ type but not an Integer.
+ - A String value of "true" or "false" (regardless of case) can be coerced into a Boolean value.
+ - A String value that is not empty can be coerced into a Char type. If the String contains more than 1 character, the first character is used
+ and the rest of the characters are ignored.
+ - Any "date/time" type (Date, Time, Timestamp) can be coerced into any other "date/time" type.
+ - Any "date/time" type can be coerced into a Long type, representing the number of milliseconds since epoch (Midnight GMT, January 1, 1970).
+ - Any "date/time" type can be coerced into a String. The format of the String is whatever DateFormat is configured for the corresponding
+ property (Date Format, Time Format, Timestamp Format property). If no value is specified, then the value will be converted into a String
+ representation of the number of milliseconds since epoch (Midnight GMT, January 1, 1970).
+
+
+
+ If none of the above rules apply when attempting to coerce a value from one data type to another, the coercion will fail and an Exception
+ will be thrown.
+
+
+
+
+ Schema Inference
+
+
+ While NiFi's Record API does require that each Record have a schema, it is often convenient to infer the schema based on the values in the data,
+ rather than having to manually create a schema. This is accomplished by selecting a value of "Infer Schema" for the "Schema Access Strategy" property.
+ When using this strategy, the Reader will determine the schema by first parsing all data in the FlowFile, keeping track of all fields that it has encountered
+ and the type of each field. Once all data has been parsed, a schema is formed that encompasses all fields that have been encountered.
+
+
+
+ A common concern when inferring schemas is how to handle the condition of two values that have different types. For example, consider a FlowFile with the following two records:
+
+
+[{
+ "name": "John",
+ "age": 8,
+ "values": "N/A"
+}, {
+ "name": "Jane",
+ "age": "Ten",
+ "values": [ 8, "Ten" ]
+}]
+
+
+
+ It is clear that the "name" field will be inferred as a STRING type. However, how should we handle the "age" field? Should the field be an CHOICE between INT and STRING? Should we
+ prefer LONG over INT? Should we just use a STRING? Should the field be considered nullable?
+
+
+
+ To help understand how this Record Reader infers schemas, we have the following list of rules that are followed in the inference logic:
+
+
+
+ - All fields are inferred to be nullable.
+ -
+ When two values are encountered for the same field in two different records (or two values are encountered for an ARRAY type), the inference engine prefers
+ to use a "wider" data type over using a CHOICE data type. A data type "A" is said to be wider than data type "B" if and only if data type "A" encompasses all
+ values of "B" in addition to other values. For example, the LONG type is wider than the INT type but not wider than the BOOLEAN type (and BOOLEAN is also not wider
+ than LONG). INT is wider than SHORT. The STRING type is considered wider than all other types with the Exception of MAP, RECORD, ARRAY, and CHOICE.
+
+ -
+ If two values are encountered for the same field in two different records (or two values are encountered for an ARRAY type), but neither value is of a type that
+ is wider than the other, then a CHOICE type is used. In the example above, the "values" field will be inferred as a CHOICE between a STRING or an ARRRAY<STRING>.
+
+ -
+ If the "Time Format," "Timestamp Format," or "Date Format" properties are configured, any value that would otherwise be considered a STRING type is first checked against
+ the configured formats to see if it matches any of them. If the value matches the Timestamp Format, the value is considered a Timestamp field. If it matches the Date Format,
+ it is considered a Date field. If it matches the Time Format, it is considered a Time field. In the unlikely event that the value matches more than one of the configured
+ formats, they will be matched in the order: Timestamp, Date, Time. I.e., if a value matched both the Timestamp Format and the Date Format, the type that is inferred will be
+ Timestamp. Because parsing dates and times can be expensive, it is advisable not to configure these formats if dates, times, and timestamps are not expected, or if processing
+ the data as a STRING is acceptable. For use cases when this is important, though, the inference engine is intelligent enough to optimize the parsing by first checking several
+ very cheap conditions. For example, the string's length is examined to see if it is too long or too short to match the pattern. This results in far more efficient processing
+ than would result if attempting to parse each string value as a timestamp.
+
+ - The MAP type is never inferred. Instead, the RECORD type is used.
+ - If a field exists but all values are null, then the field is inferred to be of type STRING.
+
+
+
+
+ Caching of Inferred Schemas
+
+
+ This Record Reader requires that if a schema is to be inferred, that all records be read in order to ensure that the schema that gets inferred is applicable for all
+ records in the FlowFile. However, this can become expensive, especially if the data undergoes many different transformations. To alleviate the cost of inferring schemas,
+ the Record Reader can be configured with a "Schema Inference Cache" by populating the property with that name. This is a Controller Service that can be shared by Record
+ Readers and Record Writers.
+
+
+
+ Whenever a Record Writer is used to write data, if it is configured with a "Schema Cache," it will also add the schema to the Schema Cache. This will result in an
+ identifier for that schema being added as an attribute to the FlowFile.
+
+
+
+ Whenever a Record Reader is used to read data, if it is configured with a "Schema Inference Cache", it will first look for a "schema.cache.identifier" attribute on the FlowFile.
+ If the attribute exists, it will use the value of that attribute to lookup the schema in the schema cache. If it is able to find a schema in the cache with that identifier,
+ then it will use that schema instead of reading, parsing, and analyzing the data to infer the schema. If the attribute is not available on the FlowFile, or if the attribute is
+ available but the cache does not have a schema with that identifier, then the Record Reader will proceed to infer the schema as described above.
+
+
+
+ The end result is that users are able to chain together many different Processors to operate on Record-oriented data. Typically, only the first such Processor in the chain will
+ incur the "penalty" of inferring the schema. For all other Processors in the chain, the Record Reader is able to simply lookup the schema in the Schema Cache by identifier.
+ This allows the Record Reader to infer a schema accurately, since it is inferred based on all data in the FlowFile, and still allows this to happen efficiently since the schema
+ will typically only be inferred once, regardless of how many Processors handle the data.
+
+
+
+ Starting Field Strategies
+
+
+ When using YamlTreeReader, two different starting field strategies can be selected. With the default Root Node strategy, the YamlTreeReader begins processing from the root element
+ of the YAML and creates a Record object for the entire YAML Object tree, while the Nested Field strategy defines a nested field from which to begin processing.
+
+
+ Using the Nested Field strategy, a schema corresponding to the nested YAML part should be specified. In case of schema inference, the YamlTreeReader will automatically
+ infer a schema from nested records.
+
+
+ Root Node Strategy
+
+
+ Consider the following YAML is read with the default Root Node strategy:
+
+
+
+- id: 17
+ name: John
+ child:
+ id: "1"
+ dob: 10-29-1982
+ siblings:
+ - name: Jeremy
+ id: 4
+ - name: Julia
+ id: 8
+- id: 98
+ name: Jane
+ child:
+ id: 2
+ dob: 08-30-1984
+ gender: F
+ siblingIds: []
+ siblings: []
+
+
+
+
+ Also, consider that the schema that is configured for this YAML is as follows (assuming that the AvroSchemaRegistry
+ Controller Service is chosen to denote the Schema):
+
+
+
+
+{
+ "type": "record",
+ "name": "nifiRecord",
+ "namespace": "org.apache.nifi",
+ "fields": [
+ {
+ "name": "id",
+ "type": ["int","null"]
+ },
+ {
+ "name": "name",
+ "type": ["string","null"]
+ },
+ {
+ "name": "child",
+ "type": [
+ {
+ "type": "record",
+ "name": "childType",
+ "fields": [
+ {
+ "name": "id",
+ "type": ["int","string","null"]
+ }
+ ]
+ },
+ "null"
+ ]
+ },
+ {
+ "name": "dob",
+ "type": ["string","null"]
+ },
+ {
+ "name": "siblings",
+ "type": [
+ {
+ "type": "array",
+ "items": {
+ "type": "record",
+ "name": "siblingsType",
+ "fields": [
+ {
+ "name": "name",
+ "type": ["string","null"]
+ },
+ {
+ "name": "id",
+ "type": ["int","null"]
+ }
+ ]
+ }
+ },
+ "null"
+ ]
+ },
+ {
+ "name": "gender",
+ "type": ["string","null"]
+ },
+ {
+ "name": "siblingIds",
+ "type": [
+ {
+ "type": "array",
+ "items": "string"
+ },
+ "null"
+ ]
+ }
+ ]
+}
+
+
+
+
+ Let us also assume that this Controller Service is configured with the "Date Format" property set to "MM-dd-yyyy", as this
+ matches the date format used for our YAML data. This will result in the YAML creating two separate records, because the root
+ element is a YAML array with two elements.
+
+
+
+ The first Record will consist of the following values:
+
+
+
+
+ Field Name |
+ Field Value |
+
+
+ id |
+ 17 |
+
+
+ name |
+ John |
+
+
+ gender |
+ null |
+
+
+ dob |
+ 11-30-1983 |
+
+
+ siblings |
+
+ array with two elements, each of which is itself a Record:
+
+
+
+ Field Name |
+ Field Value |
+
+
+ name |
+ Jeremy |
+
+
+
+ and:
+
+
+
+ Field Name |
+ Field Value |
+
+
+ name |
+ Julia |
+
+
+ |
+
+
+
+
+ The second Record will consist of the following values:
+
+
+
+
+ Field Name |
+ Field Value |
+
+
+ id |
+ 98 |
+
+
+ name |
+ Jane |
+
+
+ gender |
+ F |
+
+
+ dob |
+ 08-30-1984 |
+
+
+ siblings |
+ empty array |
+
+
+
+ Nested Field Strategy
+
+
+ Using the Nested Field strategy, consider the same YAML where the specified Starting Field Name is
+ "siblings". The schema that is configured for this YAML is as follows:
+
+
+
+
+{
+ "namespace": "nifi",
+ "name": "siblings",
+ "type": "record",
+ "fields": [
+ { "name": "name", "type": "string" },
+ { "name": "id", "type": "int" }
+ ]
+}
+
+
+
+
+ The first Record will consist of the following values:
+
+
+
+
+ Field Name |
+ Field Value |
+
+
+ name |
+ Jeremy |
+
+
+ id |
+ 4 |
+
+
+
+
+ The second Record will consist of the following values:
+
+
+
+
+ Field Name |
+ Field Value |
+
+
+ name |
+ Julia |
+
+
+ id |
+ 8 |
+
+
+
+ Schema Application Strategies
+
+
+ When using YamlTreeReader with "Nested Field Strategy" and the "Schema Access Strategy" is not "Infer Schema",
+ it can be configured for the entire original YAML ("Whole document" strategy) or for the nested field section ("Selected part" strategy).
+
+
+
diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.yaml.YamlTreeReader/additionalDetails.html b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.yaml.YamlTreeReader/additionalDetails.html
new file mode 100644
index 000000000000..ece3862e05d8
--- /dev/null
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.yaml.YamlTreeReader/additionalDetails.html
@@ -0,0 +1,454 @@
+
+
+
+