Skip to content

Commit

Permalink
Merge pull request #566 from boozallen/560-relation-record-spark-sche…
Browse files Browse the repository at this point in the history
…ma-validation

#560 address pr comments
  • Loading branch information
csun-cpointe authored Feb 5, 2025
2 parents dd747b2 + d3951b8 commit 7e6706d
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 31 deletions.
1 change: 0 additions & 1 deletion DRAFT_RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ Spark and PySpark have been upgraded from version 3.5.2 to 3.5.4.
## Record Relation
To enable nested data records, we have added a new relation feature to the record metamodel. This allows records to reference other records. For more details, refer to the [Record Relation Options](https://boozallen.github.io/aissemble/aissemble/current-dev/record-metamodel.html#_record_relation_options).
Several features are still a work in progress:
- Spark-based validation for records with a One to Many multiplicity. (POJO validation is available.)
- PySpark schema generation for records with any multiplicity

## Helm Charts Resource Specification
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ import static org.apache.spark.sql.functions.explode;
*/
public abstract class ${record.capitalizedName}SchemaBase extends SparkSchema {

private static final String ARRAY = "array";
#set($columnVars = {})
#foreach ($field in $record.fields)
#set ($columnVars[$field.name] = "${field.upperSnakecaseName}_COLUMN")
Expand Down Expand Up @@ -277,12 +276,13 @@ public abstract class ${record.capitalizedName}SchemaBase extends SparkSchema {
return ${record.name};
}

#if ($record.hasRelations())

#foreach ($relation in $record.relations)
#if ($relation.isOneToManyRelation())

/**
* Validate the given ${relation.capitalizedName} 1:M multiplicity relation dataset against ${relation.capitalizedName}Schema
* Validate the given ${relation.capitalizedName} 1:M multiplicity relation dataset against ${relation.capitalizedName}Schema.
* A false will be return if any one of the relation records schema validation is failed.
* @param ${relation.uncapitalizedName}Dataset
* @return boolean value to indicate validation result
*/
Expand All @@ -299,5 +299,4 @@ public abstract class ${record.capitalizedName}SchemaBase extends SparkSchema {

#end
#end
#end
}
Original file line number Diff line number Diff line change
Expand Up @@ -131,12 +131,6 @@ public void aValidPersonWithOneToManyRelationDataSetExists(String validity) {
this.personWithOneToMRelationSchema.getStructType());
}

@Given("a valid \"City\" dataSet exists")
public void aValidDataSetExists() {
List<Row> rows = Collections.singletonList(CitySchema.asRow(createCity()));
this.cityDataSet = spark.createDataFrame(rows, this.citySchema.getStructType());
}

@Given("a \"City\" dataSet with an invalid relation exists")
public void aCityDataSetWithAnInvalidRelationExists() {
IntegerWithValidation integerWithValidation = new IntegerWithValidation(0);
Expand All @@ -163,15 +157,6 @@ public void aSparkDatasetExists() {
this.cityDataSet = this.spark.createDataFrame(cityRows, this.citySchema.getStructType());
}

@When("spark schema validation is performed on the dataSet")
public void sparkSchemaValidationIsPerformedOnTheDataSet() {
try {
this.validatedDataSet = this.citySchema.validateDataFrame(this.cityDataSet);
}catch (Exception e) {
this.exception = e;
}
}

@When("spark schema validation is performed on the \"PersonWithOneToOneRelation\" dataSet")
public void sparkSchemaValidationIsPerformedOnThePersonWithOneToOneRelationDataSet() {
try {
Expand All @@ -193,12 +178,8 @@ public void sparkSchemaValidationIsPerformedOnThePersonWithMToOneRelationDataSet

@When("spark schema validation is performed on the \"PersonWithOneToMRelation\" dataSet")
public void sparkSchemaValidationIsPerformedOnThePersonWithOneToMRelationDataSet() {
try {
this.validatedDataSet =
this.validatedDataSet =
this.personWithOneToMRelationSchema.validateDataFrame(this.personWithOneToMRelationDataSet);
}catch (Exception e) {
this.exception = e;
}
}

@Then("the schema data type for {string} is {string}")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,6 @@ Feature: Records with relations are generated correctly and function as expected
When a "City" POJO is mapped to a spark dataset using the schema
Then the dataset has the correct values for the relational objects

Scenario: Spark schemas generated fails to validate with not yet implemented exception
Given the spark schema is generated for the "City" record
And a valid "City" dataSet exists
When spark schema validation is performed on the dataSet
Then the dataSet validation "passes"

Scenario Outline: Records with a One to One relation can be validated using the spark schema
Given the spark schema is generated for the "PersonWithOneToOneRelation" record
And a "<validity>" "PersonWithOneToOneRelation" dataSet exists
Expand Down

0 comments on commit 7e6706d

Please sign in to comment.