diff --git a/DRAFT_RELEASE_NOTES.md b/DRAFT_RELEASE_NOTES.md
index 0638a399c..456fadc00 100644
--- a/DRAFT_RELEASE_NOTES.md
+++ b/DRAFT_RELEASE_NOTES.md
@@ -13,7 +13,10 @@ Data access through [GraphQL](https://graphql.org/) has been deprecated and repl
Spark and PySpark have been upgraded from version 3.5.2 to 3.5.4.
## Record Relation
-To enable nested data records, we have added a new relation feature to the record metamodel. This allows records to reference other records. For more details, refer to the [Record Relation Options].(https://boozallen.github.io/aissemble/aissemble/current-dev/record-metamodel.html#_record_relation_options)
+To enable nested data records, we have added a new relation feature to the record metamodel. This allows records to reference other records. For more details, refer to the [Record Relation Options](https://boozallen.github.io/aissemble/aissemble/current-dev/record-metamodel.html#_record_relation_options).
+Several features are still a work in progress:
+- Spark-based validation for records with a One to Many multiplicity. (POJO validation is available.)
+- PySpark schema generation for records with any multiplicity
# Breaking Changes
_Note: instructions for adapting to these changes are outlined in the upgrade instructions below._
diff --git a/build-parent/pom.xml b/build-parent/pom.xml
index f8b0afba6..8292dd73e 100644
--- a/build-parent/pom.xml
+++ b/build-parent/pom.xml
@@ -48,7 +48,7 @@
2.9.1
4.24.0
0.10.1
- 3.0.8
+ 3.1.12
2.15.0
2.6.0
3.8.6
diff --git a/docs/modules/ROOT/pages/record-metamodel.adoc b/docs/modules/ROOT/pages/record-metamodel.adoc
index a46f063fd..676836c76 100644
--- a/docs/modules/ROOT/pages/record-metamodel.adoc
+++ b/docs/modules/ROOT/pages/record-metamodel.adoc
@@ -247,13 +247,20 @@ namespacing (e.g., package in Java, namespace in XSD).
| `relations/relation/documentation`
| No
| None
-| A description of the field.
+| A description of the relation.
| `relations/relation/multiplicity`
| No
| One to Many (1-M)
| Defines the multiplicity of the relation. Options are ONE_TO_MANY (1-M), ONE_TO_ONE (1-1), and MANY_TO_ONE (M-1).
-|===
+| `relations/relation/required`
+| No
+| false
+| Setting `required` to `true` will mandate that the relation must be populated for a record to pass validation.
-\
\ No newline at end of file
+| `relations/relation/column`
+| No
+| None
+| The name of the storage field for data persistence.
+|===
diff --git a/extensions/extensions-data-delivery/extensions-data-delivery-spark/src/main/java/com/boozallen/aiops/data/delivery/spark/SparkSchema.java b/extensions/extensions-data-delivery/extensions-data-delivery-spark/src/main/java/com/boozallen/aiops/data/delivery/spark/SparkSchema.java
index 6eec43801..1872f50eb 100644
--- a/extensions/extensions-data-delivery/extensions-data-delivery-spark/src/main/java/com/boozallen/aiops/data/delivery/spark/SparkSchema.java
+++ b/extensions/extensions-data-delivery/extensions-data-delivery-spark/src/main/java/com/boozallen/aiops/data/delivery/spark/SparkSchema.java
@@ -127,6 +127,22 @@ protected void add(String name, DataType dataType, boolean nullable, String comm
schema = schema.add(name, dataType, nullable, comment);
}
+ /**
+ * Adds a field to the schema.
+ *
+ * @param name
+ * the name of the field to add
+ * @param structType
+ * the struct type of the field to add
+ * @param nullable
+ * whether the field is nullable
+ * @param comment
+ * a description of the field
+ */
+ protected void add(String name, StructType structType, boolean nullable, String comment) {
+ schema = schema.add(name, structType, nullable, comment);
+ }
+
/**
* Updates the data type of a field in the schema.
*
diff --git a/foundation/foundation-mda/src/main/java/com/boozallen/aiops/mda/metamodel/element/BaseRecordRelationDecorator.java b/foundation/foundation-mda/src/main/java/com/boozallen/aiops/mda/metamodel/element/BaseRecordRelationDecorator.java
index 566edaca0..75a897a52 100644
--- a/foundation/foundation-mda/src/main/java/com/boozallen/aiops/mda/metamodel/element/BaseRecordRelationDecorator.java
+++ b/foundation/foundation-mda/src/main/java/com/boozallen/aiops/mda/metamodel/element/BaseRecordRelationDecorator.java
@@ -2,7 +2,7 @@
/*-
* #%L
- * AIOps Foundation::AIOps MDA
+ * aiSSEMBLE::Foundation::MDA
* %%
* Copyright (C) 2021 Booz Allen
* %%
@@ -10,11 +10,14 @@
* #L%
*/
+import org.apache.commons.lang3.StringUtils;
import org.technologybrewery.fermenter.mda.metamodel.element.MetamodelUtils;
+import com.boozallen.aiops.mda.generator.util.PipelineUtils;
+
/**
* Provides baseline decorator functionality for {@link Relation}.
- *
+ *
* The goal is to make it easier to apply the decorator pattern in various implementations of generators (e.g., Java,
* python, Docker) so that each concrete decorator only has to decorate those aspects of the class that are needed, not
* all the pass-through methods that each decorator would otherwise need to implement (that add no real value).
@@ -59,6 +62,16 @@ public String getName() {
return wrapped.getName();
}
+ @Override
+ public Boolean isRequired() {
+ return wrapped.isRequired();
+ }
+
+ @Override
+ public String getColumn() {
+ return wrapped.getColumn();
+ }
+
@Override
public void validate() {
wrapped.validate();
@@ -71,4 +84,56 @@ public void validate() {
public boolean isOneToManyRelation() {
return wrapped.getMultiplicity().equals(Multiplicity.ONE_TO_MANY);
}
+
+ /**
+ * Whether the Spark relation is nullable.
+ *
+ * @return true if the Spark field is nullable
+ */
+ public boolean isNullable() {
+ return wrapped.isRequired() == null || !wrapped.isRequired();
+ }
+
+ /**
+ * Returns the column name for the Spark relation.
+ *
+ * @return column name
+ */
+ public String getColumnName() {
+ String columnName;
+
+ if (StringUtils.isNotBlank(wrapped.getColumn())) {
+ columnName = wrapped.getColumn();
+ } else {
+ columnName = wrapped.getName();
+ }
+ return columnName;
+ }
+
+ /**
+ * Returns the relation name formatted to uppercase with underscores.
+ *
+ * @return name formatted to uppercase with underscores
+ */
+ public String getUpperSnakecaseName() {
+ return PipelineUtils.deriveUpperUnderscoreNameFromUpperCamelName(getName());
+ }
+
+ /**
+ * Returns the relation name, capitalized.
+ *
+ * @return capitalized name
+ */
+ public String getCapitalizedName() {
+ return StringUtils.capitalize(getName());
+ }
+
+ /**
+ * Returns the relation name, uncapitalized.
+ *
+ * @return uncapitalized name
+ */
+ public String getUncapitalizeName() {
+ return StringUtils.uncapitalize(getName());
+ }
}
diff --git a/foundation/foundation-mda/src/main/java/com/boozallen/aiops/mda/metamodel/element/Relation.java b/foundation/foundation-mda/src/main/java/com/boozallen/aiops/mda/metamodel/element/Relation.java
index 23e4aeac3..8f5bace8f 100644
--- a/foundation/foundation-mda/src/main/java/com/boozallen/aiops/mda/metamodel/element/Relation.java
+++ b/foundation/foundation-mda/src/main/java/com/boozallen/aiops/mda/metamodel/element/Relation.java
@@ -33,6 +33,20 @@ public interface Relation extends NamespacedMetamodel {
*/
Multiplicity getMultiplicity();
+ /**
+ * Returns whether the relation is required.
+ *
+ * @return required
+ */
+ Boolean isRequired();
+
+ /**
+ * Returns the column of the relation.
+ *
+ * @return column
+ */
+ String getColumn();
+
/**
* Enumerated values representing multiplicity options.
*/
diff --git a/foundation/foundation-mda/src/main/java/com/boozallen/aiops/mda/metamodel/element/RelationElement.java b/foundation/foundation-mda/src/main/java/com/boozallen/aiops/mda/metamodel/element/RelationElement.java
index 0204f571f..0e8d00eaf 100644
--- a/foundation/foundation-mda/src/main/java/com/boozallen/aiops/mda/metamodel/element/RelationElement.java
+++ b/foundation/foundation-mda/src/main/java/com/boozallen/aiops/mda/metamodel/element/RelationElement.java
@@ -34,6 +34,11 @@ public class RelationElement extends NamespacedMetamodelElement implements Relat
@JsonInclude(Include.NON_NULL)
protected Multiplicity multiplicity;
+ @JsonInclude(Include.NON_NULL)
+ protected Boolean required;
+
+ @JsonInclude(Include.NON_NULL)
+ protected String column;
/**
* {@inheritDoc}
@@ -51,6 +56,16 @@ public Multiplicity getMultiplicity() {
return multiplicity;
}
+ @Override
+ public Boolean isRequired() {
+ return required;
+ }
+
+ @Override
+ public String getColumn() {
+ return column;
+ }
+
/**
* {@inheritDoc}
diff --git a/foundation/foundation-mda/src/main/java/com/boozallen/aiops/mda/metamodel/element/spark/SparkRecord.java b/foundation/foundation-mda/src/main/java/com/boozallen/aiops/mda/metamodel/element/spark/SparkRecord.java
index 308a34287..d51636159 100644
--- a/foundation/foundation-mda/src/main/java/com/boozallen/aiops/mda/metamodel/element/spark/SparkRecord.java
+++ b/foundation/foundation-mda/src/main/java/com/boozallen/aiops/mda/metamodel/element/spark/SparkRecord.java
@@ -15,6 +15,7 @@
import java.util.Set;
import java.util.TreeSet;
+import com.boozallen.aiops.mda.metamodel.element.Relation;
import com.boozallen.aiops.mda.metamodel.element.java.JavaRecordFieldType;
import com.boozallen.aiops.mda.metamodel.element.util.JavaElementUtils;
import org.technologybrewery.fermenter.mda.TypeManager;
@@ -63,6 +64,18 @@ public List getFields() {
return fields;
}
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public List getRelations() {
+ List relations = new ArrayList<>();
+ for (Relation relation: super.getRelations()){
+ relations.add(new SparkRecordRelation(relation));
+ }
+ return relations;
+ }
+
/**
* {@inheritDoc}
*/
diff --git a/foundation/foundation-mda/src/main/java/com/boozallen/aiops/mda/metamodel/element/spark/SparkRecordRelation.java b/foundation/foundation-mda/src/main/java/com/boozallen/aiops/mda/metamodel/element/spark/SparkRecordRelation.java
new file mode 100644
index 000000000..e978f8796
--- /dev/null
+++ b/foundation/foundation-mda/src/main/java/com/boozallen/aiops/mda/metamodel/element/spark/SparkRecordRelation.java
@@ -0,0 +1,38 @@
+package com.boozallen.aiops.mda.metamodel.element.spark;
+
+/*-
+ * #%L
+ * aiSSEMBLE::Foundation::MDA
+ * %%
+ * Copyright (C) 2021 Booz Allen
+ * %%
+ * This software package is licensed under the Booz Allen Public License. All Rights Reserved.
+ * #L%
+ */
+
+
+import org.apache.commons.lang3.StringUtils;
+
+import com.boozallen.aiops.mda.metamodel.element.BaseRecordRelationDecorator;
+import com.boozallen.aiops.mda.metamodel.element.Relation;
+
+/**
+ * Decorates RecordField with Spark-specific functionality.
+ */
+public class SparkRecordRelation extends BaseRecordRelationDecorator {
+
+ /**
+ * {@inheritDoc}
+ */
+ public SparkRecordRelation(Relation recordRelationToDecorate) {
+ super(recordRelationToDecorate);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public String getDocumentation() {
+ return StringUtils.isNotBlank(super.getDocumentation()) ? super.getDocumentation() : "";
+ }
+}
diff --git a/foundation/foundation-mda/src/main/resources/aiops-record-schema.json b/foundation/foundation-mda/src/main/resources/aiops-record-schema.json
index 258d045e6..62cc01599 100644
--- a/foundation/foundation-mda/src/main/resources/aiops-record-schema.json
+++ b/foundation/foundation-mda/src/main/resources/aiops-record-schema.json
@@ -95,6 +95,12 @@
},
"multiplicity": {
"type": "string"
+ },
+ "required": {
+ "type": "string"
+ },
+ "column": {
+ "type": "string"
}
},
"required": [
@@ -106,4 +112,4 @@
"required": [
"name", "package", "fields"
]
-}
\ No newline at end of file
+}
diff --git a/foundation/foundation-mda/src/main/resources/templates/data-delivery-data-records/spark.schema.base.java.vm b/foundation/foundation-mda/src/main/resources/templates/data-delivery-data-records/spark.schema.base.java.vm
index 6b05a3f91..01fa21bca 100644
--- a/foundation/foundation-mda/src/main/resources/templates/data-delivery-data-records/spark.schema.base.java.vm
+++ b/foundation/foundation-mda/src/main/resources/templates/data-delivery-data-records/spark.schema.base.java.vm
@@ -12,13 +12,19 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.NotImplementedException;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.types.DataTypes;
import scala.collection.JavaConverters;
import scala.collection.Seq;
+import scala.collection.mutable.WrappedArray;
import com.boozallen.aiops.data.delivery.spark.SparkSchema;
import com.boozallen.aiops.data.delivery.spark.SparkDatasetUtils;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.ObjectMapper;
import static org.apache.spark.sql.functions.col;
import static org.apache.spark.sql.functions.lit;
@@ -32,11 +38,17 @@ import static org.apache.spark.sql.functions.lit;
*/
public abstract class ${record.capitalizedName}SchemaBase extends SparkSchema {
+ private static final String ARRAY = "array";
#set($columnVars = {})
#foreach ($field in $record.fields)
#set ($columnVars[$field.name] = "${field.upperSnakecaseName}_COLUMN")
public static final String ${columnVars[$field.name]} = "${field.sparkAttributes.columnName}";
#end
+#set($relationVars = {})
+#foreach ($relation in $record.relations)
+ #set ($relationVars[$relation.name] = "${relation.upperSnakecaseName}_COLUMN")
+ public static final String ${relationVars[$relation.name]} = "${relation.columnName}";
+#end
public ${record.capitalizedName}SchemaBase() {
super();
@@ -47,10 +59,17 @@ public abstract class ${record.capitalizedName}SchemaBase extends SparkSchema {
#else
add(${columnVars[$field.name]}, ${field.shortType}, ${field.sparkAttributes.isNullable()}, "${field.description}");
#end
+ #end
+ #foreach ($relation in $record.relations)
+ #if ($relation.isOneToManyRelation())
+ add(${relationVars[$relation.name]}, DataTypes.createArrayType(new ${relation.name}Schema().getStructType()), ${relation.isNullable()}, "${relation.documentation}");
+ #else
+ add(${relationVars[$relation.name]}, new ${relation.name}Schema().getStructType(), ${relation.isNullable()}, "${relation.documentation}");
+ #end
#end
}
- #if ($record.hasFields())
+ #if (($record.hasFields()) || ($record.hasRelations()))
/**
* Casts the given dataset to this schema.
*
@@ -62,10 +81,16 @@ public abstract class ${record.capitalizedName}SchemaBase extends SparkSchema {
#foreach ($field in $record.fields)
DataType ${field.name}Type = getDataType(${columnVars[$field.name]});
#end
+ #foreach ($relation in $record.relations)
+ DataType ${relation.uncapitalizeName}Type = getDataType(${relationVars[$relation.name]});
+ #end
return dataset
#foreach ($field in $record.fields)
- .withColumn(${columnVars[$field.name]}, col(${columnVars[$field.name]}).cast(${field.name}Type))#if (!$foreach.hasNext);#end
+ .withColumn(${columnVars[$field.name]}, col(${columnVars[$field.name]}).cast(${field.name}Type))#if((!$foreach.hasNext) && ($relationVars == {}));#end
+ #end
+ #foreach ($relation in $record.relations)
+ .withColumn(${relationVars[$relation.name]}, col(${relationVars[$relation.name]}).cast(${relation.uncapitalizeName}Type))#if(!$foreach.hasNext);#end
#end
}
#end
@@ -90,34 +115,53 @@ public abstract class ${record.capitalizedName}SchemaBase extends SparkSchema {
/***
* Appends spark validation logic to an unvalidated spark DataFrame (org.apache.spark.sql.Dataset)
+ *
+ * @param data the spark Dataset to perform validation against
* @return Dataset with appended validation logic
*/
public Dataset validateDataFrame(Dataset data) {
+ return validateDataFrame(data, "");
+ }
+
+ /***
+ * Appends spark validation logic to an unvalidated spark DataFrame (org.apache.spark.sql.Dataset)
+ *
+ * @param data the spark Dataset to perform validation against
+ * @param columnPrefix a prefix to append to the dataset columns that validation is being performed on
+ * @return Dataset with appended validation logic
+ */
+ protected Dataset validateDataFrame(Dataset data, String columnPrefix) {
+ for (String headerField : getStructType().fieldNames()) {
+ if (StringUtils.equals(ARRAY, getDataType(headerField).typeName())) {
+ throw new NotImplementedException(
+ "Validation against relations with One to Many multiplicity is not yet implemented");
+ }
+ }
Dataset dataWithValidations = data
#foreach ($field in $record.fields)
#if (${field.isRequired()})
- .withColumn(${columnVars[$field.name]} + "_IS_NOT_NULL", col(${columnVars[$field.name]}).isNotNull())
+ .withColumn(${columnVars[$field.name]} + "_IS_NOT_NULL", col(columnPrefix + ${columnVars[$field.name]}).isNotNull())
#end
#if (${field.getValidation().getMinValue()})
- .withColumn(${columnVars[$field.name]} + "_GREATER_THAN_MIN", col(${columnVars[$field.name]}).gt(lit(${field.getValidation().getMinValue()})).or(col(${columnVars[$field.name]}).equalTo(lit(${field.getValidation().getMinValue()}))))
+ .withColumn(${columnVars[$field.name]} + "_GREATER_THAN_MIN", col(columnPrefix + ${columnVars[$field.name]}).gt(lit(${field.getValidation().getMinValue()})).or(col(columnPrefix + ${columnVars[$field.name]}).equalTo(lit(${field.getValidation().getMinValue()}))))
#end
#if (${field.getValidation().getMaxValue()})
- .withColumn(${columnVars[$field.name]} + "_LESS_THAN_MAX", col(${columnVars[$field.name]}).lt(lit(${field.getValidation().getMaxValue()})).or(col(${columnVars[$field.name]}).equalTo(lit(${field.getValidation().getMaxValue()}))))
+ .withColumn(${columnVars[$field.name]} + "_LESS_THAN_MAX", col(columnPrefix + ${columnVars[$field.name]}).lt(lit(${field.getValidation().getMaxValue()})).or(col(columnPrefix + ${columnVars[$field.name]}).equalTo(lit(${field.getValidation().getMaxValue()}))))
#end
#if (${field.getValidation().getScale()})
- .withColumn(${columnVars[$field.name]} + "_MATCHES_SCALE", col(${columnVars[$field.name]}).rlike(("^[0-9]*(?:\\.[0-9]{0,${field.getValidation().getScale()}})?$")))
+ .withColumn(${columnVars[$field.name]} + "_MATCHES_SCALE", col(columnPrefix + ${columnVars[$field.name]}).rlike(("^[0-9]*(?:\\.[0-9]{0,${field.getValidation().getScale()}})?$")))
#end
#if (${field.getValidation().getMinLength()})
- .withColumn(${columnVars[$field.name]} + "_GREATER_THAN_MAX_LENGTH", col(${columnVars[$field.name]}).rlike(("^.{${field.getValidation().getMinLength()},}")))
+ .withColumn(${columnVars[$field.name]} + "_GREATER_THAN_MAX_LENGTH", col(columnPrefix + ${columnVars[$field.name]}).rlike(("^.{${field.getValidation().getMinLength()},}")))
#end
#if (${field.getValidation().getMaxLength()})
- .withColumn(${columnVars[$field.name]} + "_LESS_THAN_MAX_LENGTH", col(${columnVars[$field.name]}).rlike(("^.{${field.getValidation().getMaxLength()},}")).equalTo(lit(false)))
+ .withColumn(${columnVars[$field.name]} + "_LESS_THAN_MAX_LENGTH", col(columnPrefix + ${columnVars[$field.name]}).rlike(("^.{${field.getValidation().getMaxLength()},}")).equalTo(lit(false)))
#end
#foreach ($format in $field.getValidation().getFormats())
#if ($foreach.first)
- .withColumn(${columnVars[$field.name]} + "_MATCHES_FORMAT", col(${columnVars[$field.name]}).rlike(("$format.replace("\","\\")"))
+ .withColumn(${columnVars[$field.name]} + "_MATCHES_FORMAT", col(columnPrefix +${columnVars[$field.name]}).rlike(("$format.replace("\","\\")"))
#else
- .or(col(${columnVars[$field.name]}).rlike(("$format.replace("\","\\")")))
+ .or(col(columnPrefix + ${columnVars[$field.name]}).rlike(("$format.replace("\","\\")")))
#end
#if ($foreach.last)
)
@@ -125,6 +169,15 @@ public abstract class ${record.capitalizedName}SchemaBase extends SparkSchema {
#end
#end ;
+ #foreach($relation in $record.relations)
+ #if($relation.isOneToManyRelation())
+ ## TODO implement validation for records with one to Many relations
+ #else
+ ${relation.capitalizedName}Schema ${relation.uncapitalizeName}Schema = new ${relation.capitalizedName}Schema();
+ dataWithValidations = dataWithValidations.withColumn(${relationVars[$relation.name]} + "_VALID", lit(!${relation.uncapitalizeName}Schema.validateDataFrame(data.select(col(${relationVars[$relation.name]})), ${relationVars[$relation.name]} + ".").isEmpty()));
+ #end
+ #end
+
Column filterSchema = null;
List validationColumns = new ArrayList<>();
Collections.addAll(validationColumns, dataWithValidations.columns());
@@ -159,14 +212,34 @@ public abstract class ${record.capitalizedName}SchemaBase extends SparkSchema {
return RowFactory.create(
#foreach ($field in $record.fields)
#if ($field.type.dictionaryType.isComplex())
- record.get${field.capitalizedName}() != null ? record.get${field.capitalizedName}().getValue() : null#if ($foreach.hasNext),#end
+ record.get${field.capitalizedName}() != null ? record.get${field.capitalizedName}().getValue() : null#if (($foreach.hasNext) || ($record.hasRelations())),#end
#else
- record.get${field.capitalizedName}()#if ($foreach.hasNext),#end
+ record.get${field.capitalizedName}()#if (($foreach.hasNext) || ($record.hasRelations())),#end
+ #end
+ #end
+ #foreach ($relation in $record.relations)
+ #if ($relation.isOneToManyRelation())
+ record.get${relation.capitalizedName}() != null ? ${relation.capitalizedName}Schema.asRows(record.get${relation.capitalizedName}()) : null#if ($foreach.hasNext),#end
+ #else
+ record.get${relation.capitalizedName}() != null ? ${relation.capitalizedName}Schema.asRow(record.get${relation.capitalizedName}()) : null#if ($foreach.hasNext),#end
#end
#end
);
}
+ /**
+ * Returns a list of given records as Spark dataset rows.
+ *
+ * @return the list of records as Spark dataset rows
+ */
+ public static List asRows(List<${record.capitalizedName}> records) {
+ List rows = new ArrayList<>();
+ for(${record.capitalizedName} ${record.name}: records){
+ rows.add(asRow(${record.name}));
+ }
+ return rows;
+ }
+
/**
* Returns a given Spark dataset row as a record.
@@ -183,7 +256,28 @@ public abstract class ${record.capitalizedName}SchemaBase extends SparkSchema {
record.set${field.capitalizedName}(${field.name}Value);
#end
#end
+ #foreach ($relation in $record.relations)
+ #if ($relation.isOneToManyRelation())
+ Row[] ${relation.uncapitalizeName}Rows = (Row[])((WrappedArray) SparkDatasetUtils.getRowValue(row, "${relation.columnName}")).array();
+ record.set${relation.capitalizedName}(${relation.capitalizedName}Schema.mapRows(Arrays.asList(${relation.uncapitalizeName}Rows)));
+ #else
+ ${relation.capitalizedName} ${relation.uncapitalizeName}Value = ${relation.capitalizedName}Schema.mapRow((Row) SparkDatasetUtils.getRowValue(row, "${relation.columnName}"));
+ record.set${relation.capitalizedName}(${relation.uncapitalizeName}Value);
+ #end
+ #end
return record;
}
+ /**
+ * Returns a given list of Spark dataset rows as a list of records.
+ *
+ * @return the row as a record
+ */
+ public static List<${record.capitalizedName}> mapRows(List rows) {
+ List<${record.capitalizedName}> ${record.name} = new ArrayList<>();
+ for(Row row : rows){
+ ${record.name}.add(mapRow(row));
+ }
+ return ${record.name};
+ }
}
diff --git a/foundation/foundation-mda/src/main/resources/templates/data-delivery-spark/encryption.java.vm b/foundation/foundation-mda/src/main/resources/templates/data-delivery-spark/encryption.java.vm
index df1a40bb9..770145cf0 100644
--- a/foundation/foundation-mda/src/main/resources/templates/data-delivery-spark/encryption.java.vm
+++ b/foundation/foundation-mda/src/main/resources/templates/data-delivery-spark/encryption.java.vm
@@ -111,4 +111,4 @@ ${step.encryptionSignature} {
return datasetWithEncryptionPolicyApplied;
#end
#end
-}
\ No newline at end of file
+}
diff --git a/foundation/foundation-mda/src/main/resources/templates/data-delivery-spark/synchronous.processor.base.java.vm b/foundation/foundation-mda/src/main/resources/templates/data-delivery-spark/synchronous.processor.base.java.vm
index 517ea13aa..22d2659d6 100644
--- a/foundation/foundation-mda/src/main/resources/templates/data-delivery-spark/synchronous.processor.base.java.vm
+++ b/foundation/foundation-mda/src/main/resources/templates/data-delivery-spark/synchronous.processor.base.java.vm
@@ -404,4 +404,4 @@ public abstract class ${step.capitalizedName}Base extends AbstractPipelineStep {
this.$store.lowerName = fileStore;
}
#end
-}
\ No newline at end of file
+}
diff --git a/foundation/foundation-mda/src/test/java/com/boozallen/aiops/mda/metamodel/element/RecordSteps.java b/foundation/foundation-mda/src/test/java/com/boozallen/aiops/mda/metamodel/element/RecordSteps.java
index beebc49e2..526874579 100644
--- a/foundation/foundation-mda/src/test/java/com/boozallen/aiops/mda/metamodel/element/RecordSteps.java
+++ b/foundation/foundation-mda/src/test/java/com/boozallen/aiops/mda/metamodel/element/RecordSteps.java
@@ -12,6 +12,8 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
@@ -495,5 +497,39 @@ private RecordElement createRecordWithRelation(String name, String packageName,
return newRecord;
}
+ @Given("a record with a relation that does not define the required fields")
+ public void aRecordWithARelationThatDoesNotDefineRequired() {
+ RecordElement childRecord = createNewRecordWithNameAndPackage("ChildRecord", RELATION_PACKAGE);
+ RecordFieldElement field = new RecordFieldElement();
+ field.setName("FieldB");
+ RecordFieldTypeElement type = new RecordFieldTypeElement();
+ type.setName("phoneNumber");
+ field.setType(type);
+ childRecord.addField(field);
+ saveRecordToFile(childRecord);
+
+ RelationElement recordRelation = new RelationElement();
+ recordRelation.setName("ChildRecord");
+ recordRelation.setPackage(RELATION_PACKAGE);
+
+ RecordElement parentRecord = createNewRecordWithNameAndPackage("ParentRecord", TEST_RECORD_RELATIONS);
+ parentRecord.addRelation(recordRelation);
+ saveRecordToFile(parentRecord);
+ }
+
+ @Then("the relation has the correct default values")
+ public void theRelationHasTheCorrectDefaultValues() {
+ Record parentRecord = this.metadataRepo.getRecord(TEST_RECORD_RELATIONS,"ParentRecord");
+ assertNotNull("Parent Record with relation was not created successfully", parentRecord);
+ assertNotNull("Parent record does not have the appropriate relation",
+ parentRecord.getRelations());
+ for(Relation childRelation : parentRecord.getRelations()) {
+ assertNull("Child relation should not be required", childRelation.isRequired());
+ assertNull("Child relation should not have a description", childRelation.getDocumentation());
+ assertNull("Child relation should not have a column", childRelation.getColumn());
+ assertEquals("Child relation should default multiplicity to 1-M", Relation.Multiplicity.ONE_TO_MANY,
+ childRelation.getMultiplicity());
+ }
+ }
}
diff --git a/foundation/foundation-mda/src/test/resources/specifications/record-model.feature b/foundation/foundation-mda/src/test/resources/specifications/record-model.feature
index 0b99338ad..83f1dbc30 100644
--- a/foundation/foundation-mda/src/test/resources/specifications/record-model.feature
+++ b/foundation/foundation-mda/src/test/resources/specifications/record-model.feature
@@ -208,3 +208,8 @@ Feature: Specify record of semantically defined types
Then the records are successfully created
And you can reference record B from record A
And you can reference record A from record B
+
+ Scenario: A records relation is not required by default
+ Given a record with a relation that does not define the required fields
+ When records are read
+ Then the relation has the correct default values
diff --git a/test/test-mda-models/test-data-delivery-spark-model/pom.xml b/test/test-mda-models/test-data-delivery-spark-model/pom.xml
index 191b76e7b..80937bc62 100644
--- a/test/test-mda-models/test-data-delivery-spark-model/pom.xml
+++ b/test/test-mda-models/test-data-delivery-spark-model/pom.xml
@@ -34,10 +34,12 @@
test/java/com/boozallen/aiops/mda/pattern/RecordTest.java
test/java/com/boozallen/aiops/mda/pattern/LineageTest.java
test/java/com/boozallen/aiops/mda/pattern/RelationTest.java
+ test/java/com/boozallen/aiops/mda/pattern/SparkSchemaTest.java
test/resources/config/
test/resources/specifications/record.feature
test/resources/specifications/lineage.feature
test/resources/specifications/relation.feature
+ test/resources/specifications/sparkSchema.feature
test/resources/krausening/base/
test/resources/apps
diff --git a/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/dictionaries/SparkJavaDataDeliveryDictionary.json b/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/dictionaries/SparkJavaDataDeliveryDictionary.json
index 5685d10f3..bab14e772 100644
--- a/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/dictionaries/SparkJavaDataDeliveryDictionary.json
+++ b/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/dictionaries/SparkJavaDataDeliveryDictionary.json
@@ -84,6 +84,10 @@
{
"name": "binarydata",
"simpleType": "bytearray"
+ },
+ {
+ "name": "string",
+ "simpleType": "string"
}
]
}
diff --git a/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/records/Address.json b/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/records/Address.json
index 3b0e33eb4..04a7a4a7e 100644
--- a/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/records/Address.json
+++ b/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/records/Address.json
@@ -5,6 +5,7 @@
"fields": [
{
"name": "street",
+ "required": true,
"type": {
"name": "street",
"package": "com.boozallen.aiops.mda.pattern.dictionary"
@@ -32,4 +33,4 @@
}
}
]
-}
\ No newline at end of file
+}
diff --git a/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/records/City.json b/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/records/City.json
new file mode 100644
index 000000000..f911d5a80
--- /dev/null
+++ b/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/records/City.json
@@ -0,0 +1,27 @@
+{
+ "name": "City",
+ "package": "com.boozallen.aiops.mda.pattern.record",
+ "description": "Example City record for testing Spark Java Data Relations",
+ "relations": [
+ {
+ "name": "Mayor",
+ "package": "com.boozallen.aiops.mda.pattern.records",
+ "multiplicity": "1-1",
+ "column": "MAYOR",
+ "required": true,
+ "documentation": "There is one mayor in the city"
+ },
+ {
+ "name": "State",
+ "package": "com.boozallen.aiops.mda.pattern.records",
+ "multiplicity": "M-1",
+ "column": "STATE"
+ },
+ {
+ "name": "Street",
+ "package": "com.boozallen.aiops.mda.pattern.records",
+ "multiplicity": "1-M",
+ "column": "STREET"
+ }
+ ]
+}
diff --git a/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/records/Mayor.json b/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/records/Mayor.json
new file mode 100644
index 000000000..636622c44
--- /dev/null
+++ b/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/records/Mayor.json
@@ -0,0 +1,22 @@
+{
+ "name": "Mayor",
+ "package": "com.boozallen.aiops.mda.pattern.record",
+ "description": "Example Mayor record for testing Spark Java Data Relations",
+ "fields": [
+ {
+ "name": "name",
+ "type": {
+ "name": "string",
+ "package": "com.boozallen.aiops.mda.pattern.dictionary"
+ }
+ },
+ {
+ "name": "integerValidation",
+ "type": {
+ "name": "integerWithValidation",
+ "package": "com.boozallen.aiops.mda.pattern.dictionary"
+ },
+ "column": "int_v8n"
+ }
+ ]
+}
diff --git a/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/records/PersonWithMToOneRelation.json b/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/records/PersonWithMToOneRelation.json
index a9e1e3701..12226ec4b 100644
--- a/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/records/PersonWithMToOneRelation.json
+++ b/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/records/PersonWithMToOneRelation.json
@@ -19,4 +19,4 @@
"documentation":"Many to One Relation between Person and Address"
}
]
-}
\ No newline at end of file
+}
diff --git a/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/records/State.json b/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/records/State.json
new file mode 100644
index 000000000..2a0fce643
--- /dev/null
+++ b/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/records/State.json
@@ -0,0 +1,14 @@
+{
+ "name": "State",
+ "package": "com.boozallen.aiops.mda.pattern.record",
+ "description": "Example State record for testing Spark Java Data Relations",
+ "fields": [
+ {
+ "name": "name",
+ "type": {
+ "name": "string",
+ "package": "com.boozallen.aiops.mda.pattern.dictionary"
+ }
+ }
+ ]
+}
diff --git a/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/records/Street.json b/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/records/Street.json
new file mode 100644
index 000000000..a6e227368
--- /dev/null
+++ b/test/test-mda-models/test-data-delivery-spark-model/src/main/resources/records/Street.json
@@ -0,0 +1,29 @@
+{
+ "name": "Street",
+ "package": "com.boozallen.aiops.mda.pattern.record",
+ "description": "Example Street record for testing Spark Java Data Relations",
+ "fields": [
+ {
+ "name": "name",
+ "type": {
+ "name": "string",
+ "package": "com.boozallen.aiops.mda.pattern.dictionary"
+ }
+ },
+ {
+ "name": "county",
+ "type": {
+ "name": "string",
+ "package": "com.boozallen.aiops.mda.pattern.dictionary"
+ }
+ },
+ {
+ "name": "integerValidation",
+ "type": {
+ "name": "integerWithValidation",
+ "package": "com.boozallen.aiops.mda.pattern.dictionary"
+ },
+ "column": "int_v8n"
+ }
+ ]
+}
diff --git a/test/test-mda-models/test-data-delivery-spark-model/src/test/java/com/boozallen/aiops/mda/pattern/SparkSchemaTest.java b/test/test-mda-models/test-data-delivery-spark-model/src/test/java/com/boozallen/aiops/mda/pattern/SparkSchemaTest.java
new file mode 100644
index 000000000..f93925295
--- /dev/null
+++ b/test/test-mda-models/test-data-delivery-spark-model/src/test/java/com/boozallen/aiops/mda/pattern/SparkSchemaTest.java
@@ -0,0 +1,247 @@
+package com.boozallen.aiops.mda.pattern;
+
+/*-
+ * #%L
+ * aiSSEMBLE::Test::MDA::Data Delivery Spark
+ * %%
+ * Copyright (C) 2021 Booz Allen
+ * %%
+ * This software package is licensed under the Booz Allen Public License. All Rights Reserved.
+ * #L%
+ */
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.NotImplementedException;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+
+import com.boozallen.aiops.mda.pattern.dictionary.IntegerWithValidation;
+import com.boozallen.aiops.mda.pattern.record.Address;
+import com.boozallen.aiops.mda.pattern.record.City;
+import com.boozallen.aiops.mda.pattern.record.CitySchema;
+import com.boozallen.aiops.mda.pattern.record.Mayor;
+import com.boozallen.aiops.mda.pattern.record.MayorSchema;
+import com.boozallen.aiops.mda.pattern.record.PersonWithMToOneRelation;
+import com.boozallen.aiops.mda.pattern.record.PersonWithMToOneRelationSchema;
+import com.boozallen.aiops.mda.pattern.record.PersonWithOneToOneRelation;
+import com.boozallen.aiops.mda.pattern.record.PersonWithOneToOneRelationSchema;
+import com.boozallen.aiops.mda.pattern.record.State;
+import com.boozallen.aiops.mda.pattern.record.Street;
+
+import io.cucumber.java.Before;
+import io.cucumber.java.en.And;
+import io.cucumber.java.en.Given;
+import io.cucumber.java.en.Then;
+import io.cucumber.java.en.When;
+
+public class SparkSchemaTest {
+ CitySchema citySchema;
+ PersonWithOneToOneRelationSchema personWithOneToOneRelationSchema;
+ PersonWithMToOneRelationSchema personWithMToOneRelationSchema;
+ SparkSession spark;
+ Dataset cityDataSet;
+ Dataset personWithOneToOneRelationDataSet;
+ Dataset personWithMToOneRelationDataSet;
+ Dataset validatedDataSet;
+ Exception exception;
+
+ @Before("@SparkSchema")
+ public void setUp() {
+ this.spark = SparkTestHarness.getSparkSession();
+ }
+
+ @Given("the record \"City\" exists with the following relations")
+ public void theRecordExistsWithTheFollowingRelations(Map multiplicity) {
+ // Handled with MDA generation
+ }
+
+ @Given("the spark schema is generate for the \"PersonWithOneToOneRelation\" record")
+ public void theSparkSchemaIsGenerateForThePersonWithOneToOneRelationRecord() {
+ this.personWithOneToOneRelationSchema = new PersonWithOneToOneRelationSchema();
+ }
+
+ @Given("a {string} \"PersonWithOneToOneRelation\" dataSet exists")
+ public void aValidPersonWithOneToOneRelationDataSetExists(String validity) {
+ PersonWithOneToOneRelation personWithOneToOneRelation = new PersonWithOneToOneRelation();
+ if (StringUtils.equals("valid", validity)){
+ personWithOneToOneRelation.setAddress(createAddress());
+ } else {
+ Address address = createAddress();
+ address.setStreet(null);
+ personWithOneToOneRelation.setAddress(address);
+ }
+
+ List rows = Collections.singletonList(PersonWithOneToOneRelationSchema.asRow(personWithOneToOneRelation));
+ this.personWithOneToOneRelationDataSet = spark.createDataFrame(rows,
+ this.personWithOneToOneRelationSchema.getStructType());
+ }
+
+ @Given("the spark schema is generate for the \"PersonWithMToOneRelation\" record")
+ public void theSparkSchemaIsGenerateForThePersonWithMToOneRelationRecord() {
+ this.personWithMToOneRelationSchema = new PersonWithMToOneRelationSchema();
+ }
+
+ @Given("a {string} \"PersonWithMToOneRelation\" dataSet exists")
+ public void aValidPersonWithManyToOneRelationDataSetExists(String validity) {
+ PersonWithMToOneRelation personWithOneToOneRelation = new PersonWithMToOneRelation();
+ if (StringUtils.equals("valid", validity)){
+ personWithOneToOneRelation.setAddress(createAddress());
+ } else {
+ Address address = createAddress();
+ address.setStreet(null);
+ personWithOneToOneRelation.setAddress(address);
+ }
+
+ List rows = Collections.singletonList(PersonWithMToOneRelationSchema.asRow(personWithOneToOneRelation));
+ this.personWithMToOneRelationDataSet = spark.createDataFrame(rows,
+ this.personWithMToOneRelationSchema.getStructType());
+ }
+
+ @Given("a valid \"City\" dataSet exists")
+ public void aValidDataSetExists() {
+ List rows = Collections.singletonList(CitySchema.asRow(createCity()));
+ this.cityDataSet = spark.createDataFrame(rows, this.citySchema.getStructType());
+ }
+
+ @Given("a \"City\" dataSet with an invalid relation exists")
+ public void aCityDataSetWithAnInvalidRelationExists() {
+ IntegerWithValidation integerWithValidation = new IntegerWithValidation(0);
+ Mayor mayor = new Mayor();
+ mayor.setName("Sam");
+ mayor.setIntegerValidation(integerWithValidation);
+
+ City city = createCity();
+ city.setMayor(mayor);
+ List rows = Collections.singletonList(CitySchema.asRow(city));
+ this.cityDataSet = spark.createDataFrame(rows, this.citySchema.getStructType());
+ }
+
+ @When("the spark schema is generate for the \"City\" record")
+ public void theSparkSchemaIsGenerateForTheCityRecord() {
+ this.citySchema = new CitySchema();
+ }
+
+ @When("a \"City\" POJO is mapped to a spark dataset using the schema")
+ public void aSparkDatasetExists() {
+ City expectedCity = createCity();
+ List cityRows = Collections.singletonList(CitySchema.asRow(expectedCity));
+
+ this.cityDataSet = this.spark.createDataFrame(cityRows, this.citySchema.getStructType());
+ }
+
+ @When("spark schema validation is performed on the dataSet")
+ public void sparkSchemaValidationIsPerformedOnTheDataSet() {
+ try {
+ this.validatedDataSet = this.citySchema.validateDataFrame(this.cityDataSet);
+ }catch (Exception e) {
+ this.exception = e;
+ }
+ }
+
+ @When("spark schema validation is performed on the \"PersonWithOneToOneRelation\" dataSet")
+ public void sparkSchemaValidationIsPerformedOnThePersonWithOneToOneRelationDataSet() {
+ try {
+ this.validatedDataSet = this.personWithOneToOneRelationSchema.validateDataFrame(this.personWithOneToOneRelationDataSet);
+ }catch (Exception e) {
+ this.exception = e;
+ }
+ }
+
+ @When("spark schema validation is performed on the \"PersonWithMToOneRelation\" dataSet")
+ public void sparkSchemaValidationIsPerformedOnThePersonWithMToOneRelationDataSet() {
+ try {
+ this.validatedDataSet =
+ this.personWithMToOneRelationSchema.validateDataFrame(this.personWithMToOneRelationDataSet);
+ }catch (Exception e) {
+ this.exception = e;
+ }
+ }
+
+ @Then("the schema data type for {string} is {string}")
+ public void theSchemaDataTypeForIs(String record, String type) {
+ assertEquals("The type for record is not correct", type,
+ this.citySchema.getDataType(record.toUpperCase()).toString());
+ }
+
+ @Then("the dataset has the correct values for the relational objects")
+ public void aPOJOCanBeMappedToASparkRow() {
+ City expectedCity = createCity();
+ for (Row row : this.cityDataSet.collectAsList()) {
+ City actualCity = CitySchema.mapRow(row);
+ assertEquals("City did not map correctly. Incorrect number of Street relations",
+ expectedCity.getStreet().size(), actualCity.getStreet().size());
+ assertEquals("City did not map correctly. Incorrect Street relation",
+ expectedCity.getStreet().get(0).toJson(), actualCity.getStreet().get(0).toJson());
+ assertEquals("City did not map correctly. Incorrect Mayor relation", expectedCity.getMayor().toJson(),
+ actualCity.getMayor().toJson());
+ assertEquals("City did not map correctly. Incorrect State relation", expectedCity.getState().toJson(),
+ actualCity.getState().toJson());
+ }
+ }
+
+ @Then("the validation fails with NotYetImplementedException")
+ public void theValidationFailsWithNotYetImplementedException() {
+ assertNotNull("No exception was thrown", this.exception);
+ assertNotNull("Throw exception is not of instance NotImplementedException", this.exception instanceof
+ NotImplementedException ? (this.exception) : null);
+ }
+
+ @Then("the dataSet validation {string}")
+ public void theDataSetValidationIsSuccessful(String succeed) {
+ if(StringUtils.equals("fails", succeed)) {
+ assertTrue("Validation passed when it should have failed", validatedDataSet.isEmpty());
+ } else {
+ assertNotNull("Validation failed when it should have passed", validatedDataSet);
+ assertFalse("Validation failed when it should have passed", validatedDataSet.isEmpty());
+ }
+ }
+
+ private City createCity(){
+ IntegerWithValidation integerWithValidation = new IntegerWithValidation(0);
+
+ List streets = new ArrayList<>();
+ Street street = new Street();
+ street.setName("Street 1 Name");
+ street.setCounty("County 2 Name");
+ street.setIntegerValidation(integerWithValidation);
+ streets.add(street);
+ Street street2 = new Street();
+ street2.setName("Street 2 Name");
+ street2.setCounty("County 2 Name");
+ street2.setIntegerValidation(integerWithValidation);
+ streets.add(street2);
+
+ State state = new State();
+ state.setName("Maryland");
+
+ Mayor mayor = new Mayor();
+ mayor.setName("Sam");
+ mayor.setIntegerValidation(integerWithValidation);
+
+ City city = new City();
+ city.setStreet(streets);
+ city.setMayor(mayor);
+ city.setState(state);
+ return city;
+ }
+
+ private Address createAddress(){
+ Address address = new Address();
+ address.setZipcode(12);
+ address.setCity("City");
+ address.setState("State");
+ address.setStreet("Street");
+ return address;
+ }
+}
diff --git a/test/test-mda-models/test-data-delivery-spark-model/src/test/resources/specifications/sparkSchema.feature b/test/test-mda-models/test-data-delivery-spark-model/src/test/resources/specifications/sparkSchema.feature
new file mode 100644
index 000000000..e2771d07c
--- /dev/null
+++ b/test/test-mda-models/test-data-delivery-spark-model/src/test/resources/specifications/sparkSchema.feature
@@ -0,0 +1,49 @@
+@SparkSchema
+Feature: Records with relations are generated correctly and function as expected
+
+ Background:
+ Given the record "City" exists with the following relations
+ | multiplicity | record |
+ | 1-1 | Mayor |
+ | 1-M | Street |
+ | M-1 | State |
+
+ Scenario Outline: Spark schemas generated are able to get the correct data types
+ When the spark schema is generate for the "City" record
+ Then the schema data type for "" is ""
+ Examples:
+ | record | type |
+ | Mayor | StructType(StructField(name,StringType,true),StructField(int_v8n,IntegerType,true)) |
+ | Street | ArrayType(StructType(StructField(name,StringType,true),StructField(county,StringType,true),StructField(int_v8n,IntegerType,true)),true) |
+ | State | StructType(StructField(name,StringType,true)) |
+
+ Scenario: Spark schemas generated has working to and from POJO functionality
+ Given the spark schema is generate for the "City" record
+ When a "City" POJO is mapped to a spark dataset using the schema
+ Then the dataset has the correct values for the relational objects
+
+ Scenario: Spark schemas generated fails to validate with not yet implemented exception
+ Given the spark schema is generate for the "City" record
+ And a valid "City" dataSet exists
+ When spark schema validation is performed on the dataSet
+ Then the validation fails with NotYetImplementedException
+
+ Scenario Outline: Records with a One to One relation can be validated using the spark schema
+ Given the spark schema is generate for the "PersonWithOneToOneRelation" record
+ And a "" "PersonWithOneToOneRelation" dataSet exists
+ When spark schema validation is performed on the "PersonWithOneToOneRelation" dataSet
+ Then the dataSet validation ""
+ Examples:
+ | validity | success |
+ | valid | passes |
+ | invalid | fails |
+
+ Scenario Outline: Records with a Many to one relation can be validated using the spark schema
+ Given the spark schema is generate for the "PersonWithMToOneRelation" record
+ And a "" "PersonWithMToOneRelation" dataSet exists
+ When spark schema validation is performed on the "PersonWithMToOneRelation" dataSet
+ Then the dataSet validation ""
+ Examples:
+ | validity | success |
+ | valid | passes |
+ | invalid | fails |