From d3951b8436e07ff816b76287c558cdb72c6a9b30 Mon Sep 17 00:00:00 2001 From: Chaoyi Sun Date: Wed, 5 Feb 2025 10:30:05 -0800 Subject: [PATCH] #560 address pr comments --- DRAFT_RELEASE_NOTES.md | 1 - .../spark.schema.base.java.vm | 7 +++---- .../aiops/mda/pattern/SparkSchemaTest.java | 21 +------------------ .../specifications/sparkSchema.feature | 6 ------ 4 files changed, 4 insertions(+), 31 deletions(-) diff --git a/DRAFT_RELEASE_NOTES.md b/DRAFT_RELEASE_NOTES.md index 0b814c848..804ea521d 100644 --- a/DRAFT_RELEASE_NOTES.md +++ b/DRAFT_RELEASE_NOTES.md @@ -15,7 +15,6 @@ Spark and PySpark have been upgraded from version 3.5.2 to 3.5.4. ## Record Relation To enable nested data records, we have added a new relation feature to the record metamodel. This allows records to reference other records. For more details, refer to the [Record Relation Options](https://boozallen.github.io/aissemble/aissemble/current-dev/record-metamodel.html#_record_relation_options). Several features are still a work in progress: -- Spark-based validation for records with a One to Many multiplicity. (POJO validation is available.) - PySpark schema generation for records with any multiplicity ## Helm Charts Resource Specification diff --git a/foundation/foundation-mda/src/main/resources/templates/data-delivery-data-records/spark.schema.base.java.vm b/foundation/foundation-mda/src/main/resources/templates/data-delivery-data-records/spark.schema.base.java.vm index 7b10e5331..671cda446 100644 --- a/foundation/foundation-mda/src/main/resources/templates/data-delivery-data-records/spark.schema.base.java.vm +++ b/foundation/foundation-mda/src/main/resources/templates/data-delivery-data-records/spark.schema.base.java.vm @@ -39,7 +39,6 @@ import static org.apache.spark.sql.functions.explode; */ public abstract class ${record.capitalizedName}SchemaBase extends SparkSchema { - private static final String ARRAY = "array"; #set($columnVars = {}) #foreach ($field in $record.fields) #set ($columnVars[$field.name] = "${field.upperSnakecaseName}_COLUMN") @@ -277,12 +276,13 @@ public abstract class ${record.capitalizedName}SchemaBase extends SparkSchema { return ${record.name}; } -#if ($record.hasRelations()) + #foreach ($relation in $record.relations) #if ($relation.isOneToManyRelation()) /** - * Validate the given ${relation.capitalizedName} 1:M multiplicity relation dataset against ${relation.capitalizedName}Schema + * Validate the given ${relation.capitalizedName} 1:M multiplicity relation dataset against ${relation.capitalizedName}Schema. + * A false will be return if any one of the relation records schema validation is failed. * @param ${relation.uncapitalizedName}Dataset * @return boolean value to indicate validation result */ @@ -299,5 +299,4 @@ public abstract class ${record.capitalizedName}SchemaBase extends SparkSchema { #end #end -#end } diff --git a/test/test-mda-models/test-data-delivery-spark-model/src/test/java/com/boozallen/aiops/mda/pattern/SparkSchemaTest.java b/test/test-mda-models/test-data-delivery-spark-model/src/test/java/com/boozallen/aiops/mda/pattern/SparkSchemaTest.java index 524eaa1af..9115a9fc3 100644 --- a/test/test-mda-models/test-data-delivery-spark-model/src/test/java/com/boozallen/aiops/mda/pattern/SparkSchemaTest.java +++ b/test/test-mda-models/test-data-delivery-spark-model/src/test/java/com/boozallen/aiops/mda/pattern/SparkSchemaTest.java @@ -131,12 +131,6 @@ public void aValidPersonWithOneToManyRelationDataSetExists(String validity) { this.personWithOneToMRelationSchema.getStructType()); } - @Given("a valid \"City\" dataSet exists") - public void aValidDataSetExists() { - List rows = Collections.singletonList(CitySchema.asRow(createCity())); - this.cityDataSet = spark.createDataFrame(rows, this.citySchema.getStructType()); - } - @Given("a \"City\" dataSet with an invalid relation exists") public void aCityDataSetWithAnInvalidRelationExists() { IntegerWithValidation integerWithValidation = new IntegerWithValidation(0); @@ -163,15 +157,6 @@ public void aSparkDatasetExists() { this.cityDataSet = this.spark.createDataFrame(cityRows, this.citySchema.getStructType()); } - @When("spark schema validation is performed on the dataSet") - public void sparkSchemaValidationIsPerformedOnTheDataSet() { - try { - this.validatedDataSet = this.citySchema.validateDataFrame(this.cityDataSet); - }catch (Exception e) { - this.exception = e; - } - } - @When("spark schema validation is performed on the \"PersonWithOneToOneRelation\" dataSet") public void sparkSchemaValidationIsPerformedOnThePersonWithOneToOneRelationDataSet() { try { @@ -193,12 +178,8 @@ public void sparkSchemaValidationIsPerformedOnThePersonWithMToOneRelationDataSet @When("spark schema validation is performed on the \"PersonWithOneToMRelation\" dataSet") public void sparkSchemaValidationIsPerformedOnThePersonWithOneToMRelationDataSet() { - try { - this.validatedDataSet = + this.validatedDataSet = this.personWithOneToMRelationSchema.validateDataFrame(this.personWithOneToMRelationDataSet); - }catch (Exception e) { - this.exception = e; - } } @Then("the schema data type for {string} is {string}") diff --git a/test/test-mda-models/test-data-delivery-spark-model/src/test/resources/specifications/sparkSchema.feature b/test/test-mda-models/test-data-delivery-spark-model/src/test/resources/specifications/sparkSchema.feature index 02666dbdf..5cda1cc35 100644 --- a/test/test-mda-models/test-data-delivery-spark-model/src/test/resources/specifications/sparkSchema.feature +++ b/test/test-mda-models/test-data-delivery-spark-model/src/test/resources/specifications/sparkSchema.feature @@ -22,12 +22,6 @@ Feature: Records with relations are generated correctly and function as expected When a "City" POJO is mapped to a spark dataset using the schema Then the dataset has the correct values for the relational objects - Scenario: Spark schemas generated fails to validate with not yet implemented exception - Given the spark schema is generated for the "City" record - And a valid "City" dataSet exists - When spark schema validation is performed on the dataSet - Then the dataSet validation "passes" - Scenario Outline: Records with a One to One relation can be validated using the spark schema Given the spark schema is generated for the "PersonWithOneToOneRelation" record And a "" "PersonWithOneToOneRelation" dataSet exists