From 6789ec95c9bf2f3ce86859ba8c31a80fb377905e Mon Sep 17 00:00:00 2001 From: Daniel-VM Date: Tue, 18 Jun 2024 14:40:26 +0200 Subject: [PATCH 1/3] created method to add custom validatros to json schema --- .../assets/schema_utils/custom_validators.py | 25 +++++++++++++++ relecov_tools/conf/amr_genes.json | 12 +++++++ relecov_tools/conf/configuration.json | 5 ++- relecov_tools/json_validation.py | 32 +++++++++++++++++++ 4 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 relecov_tools/assets/schema_utils/custom_validators.py create mode 100644 relecov_tools/conf/amr_genes.json diff --git a/relecov_tools/assets/schema_utils/custom_validators.py b/relecov_tools/assets/schema_utils/custom_validators.py new file mode 100644 index 00000000..09387f35 --- /dev/null +++ b/relecov_tools/assets/schema_utils/custom_validators.py @@ -0,0 +1,25 @@ +import os +import json +from jsonschema import ValidationError + +# TODO: ADD AN USAGE DOC HERE. + +# Disable default enum validation for amr_genes +def ignore_enum(validator, enums, instance, schema): + pass + +def validate_amr_genes(validator, value, instance, schema): + # Load Config from File + amr_config = os.path.join(os.path.dirname(__file__), "conf", "amr_genes.config") + with open(amr_config, 'r') as file: + amr_json = json.load(file) + + amr_genes = instance.get("amr_genes", []) + for gene in amr_genes: + if gene not in amr_json.keys(): + yield ValidationError(f"Gene '{gene}' is not annotated in any group.") + +# Map of custom validators +available = { + "amr_genes_validator": validate_amr_genes +} \ No newline at end of file diff --git a/relecov_tools/conf/amr_genes.json b/relecov_tools/conf/amr_genes.json new file mode 100644 index 00000000..a2009c28 --- /dev/null +++ b/relecov_tools/conf/amr_genes.json @@ -0,0 +1,12 @@ +{ + "geneA": "group1", + "geneB": "group2", + "geneC": "group3", + "geneD": "group1", + "geneE": "group2", + "geneF": "group3", + "geneG": "group1", + "geneH": "group2", + "geneI": "group3", + "geneJ": "group1" +} \ No newline at end of file diff --git a/relecov_tools/conf/configuration.json b/relecov_tools/conf/configuration.json index 18ccb7b9..13ec3b7f 100755 --- a/relecov_tools/conf/configuration.json +++ b/relecov_tools/conf/configuration.json @@ -400,5 +400,8 @@ "fastq_r2_md5" ] } - } + }, + "json_schema_validators": [ + "amr_genes_validator" + ] } diff --git a/relecov_tools/json_validation.py b/relecov_tools/json_validation.py index 94ccec00..83d57b2e 100755 --- a/relecov_tools/json_validation.py +++ b/relecov_tools/json_validation.py @@ -9,6 +9,8 @@ import relecov_tools.utils from relecov_tools.config_json import ConfigJson +import relecov_tools.assets.schema_utils.custom_validators + log = logging.getLogger(__name__) stderr = rich.console.Console( @@ -63,6 +65,35 @@ def validate_schema(self): except jsonschema.ValidationError: stderr.print("[red] Json schema does not fulfill Draft 202012 Validation") sys.exit(1) + + def register_custom_validators(self): + """Register custom JSON schema validators based on configuration.""" + + # Get custom validators utils + stderr.print("Loading configuration for custom JSON schema validators.") + conf = ConfigJson() + conf_path = os.path.join(os.path.dirname(__file__), "conf", "configuration.json") + + custom_validator_list = conf.get_configuration("json_schema_valiators") + if custom_validator_list is None: + stderr.print(f"No 'json_schema_validators' key found in {conf_path}") + return + + if not len(custom_validator_list) > 0: + stderr.print(f"No validators defined yet under 'json_schema_validators' key in {conf_path}") + return + + # Append custom validators to Draft202012Validator + stderr.print("Registering custom validators.") + available_validators = relecov_tools.assets.schema_utils.custom_validators.available + for validator_name in custom_validator_list: + if validator_name in available_validators: + Draft202012Validator.VALIDATORS[validator_name] = available_validators[validator_name] + stderr.print(f"Custom validator '{validator_name}' successfully added.") + else: + stderr.print(f"No validator found for '{validator_name}'. Exiting.") + sys.exit(1) + def validate_instances(self): """Validate data instances against a validated json schema""" @@ -189,5 +220,6 @@ def validate(self): """Write invalid samples from metadata to excel""" self.validate_schema() + self.register_custom_validators() invalid_json = self.validate_instances() self.create_invalid_metadata(invalid_json, self.metadata, self.out_folder) From d4a825f15416bf92a7c26327ef86e5b418057ba0 Mon Sep 17 00:00:00 2001 From: Daniel-VM Date: Tue, 18 Jun 2024 14:51:37 +0200 Subject: [PATCH 2/3] tmp implementation to test schema validation method --- relecov_tools/schema/relecov_schema.json | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/relecov_tools/schema/relecov_schema.json b/relecov_tools/schema/relecov_schema.json index d631537c..7fe2f732 100755 --- a/relecov_tools/schema/relecov_schema.json +++ b/relecov_tools/schema/relecov_schema.json @@ -18,7 +18,8 @@ "sequencing_instrument_model", "sequencing_instrument_platform", "enrichment_panel", - "enrichment_panel_version" + "enrichment_panel_version", + "amr_genes" ], "type": "object", "properties": { @@ -2899,6 +2900,18 @@ "description": "", "clasification": "Submission ENA", "label": "Run Alias" + }, + "tmp-amr_genes": { "$ref": "#/$defs/gene_name"} + }, + "amr_genes_validation": true, + "$defs": { + "gene_name" : { + "type": "array", + "items": { + "type": "string", + "enum": ["geneA", "geneB", "geneC", "geneD", "geneE", "geneF", "geneG", "geneH", "geneI", "geneJ"], + "uniqueItems": true + } } } } From 9590d0804a722e9b3e1bd038ca243b0608337482 Mon Sep 17 00:00:00 2001 From: Daniel-VM Date: Tue, 18 Jun 2024 16:15:13 +0200 Subject: [PATCH 3/3] fix linting --- .../assets/schema_utils/custom_validators.py | 12 ++++++---- relecov_tools/json_validation.py | 23 ++++++++++++------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/relecov_tools/assets/schema_utils/custom_validators.py b/relecov_tools/assets/schema_utils/custom_validators.py index 09387f35..f674e298 100644 --- a/relecov_tools/assets/schema_utils/custom_validators.py +++ b/relecov_tools/assets/schema_utils/custom_validators.py @@ -2,16 +2,19 @@ import json from jsonschema import ValidationError -# TODO: ADD AN USAGE DOC HERE. + +# TODO: ADD AN USAGE DOC HERE. + # Disable default enum validation for amr_genes def ignore_enum(validator, enums, instance, schema): pass + def validate_amr_genes(validator, value, instance, schema): # Load Config from File amr_config = os.path.join(os.path.dirname(__file__), "conf", "amr_genes.config") - with open(amr_config, 'r') as file: + with open(amr_config, "r") as file: amr_json = json.load(file) amr_genes = instance.get("amr_genes", []) @@ -19,7 +22,6 @@ def validate_amr_genes(validator, value, instance, schema): if gene not in amr_json.keys(): yield ValidationError(f"Gene '{gene}' is not annotated in any group.") + # Map of custom validators -available = { - "amr_genes_validator": validate_amr_genes -} \ No newline at end of file +available = {"amr_genes_validator": validate_amr_genes} diff --git a/relecov_tools/json_validation.py b/relecov_tools/json_validation.py index 83d57b2e..946c751e 100755 --- a/relecov_tools/json_validation.py +++ b/relecov_tools/json_validation.py @@ -65,36 +65,43 @@ def validate_schema(self): except jsonschema.ValidationError: stderr.print("[red] Json schema does not fulfill Draft 202012 Validation") sys.exit(1) - + def register_custom_validators(self): """Register custom JSON schema validators based on configuration.""" # Get custom validators utils stderr.print("Loading configuration for custom JSON schema validators.") conf = ConfigJson() - conf_path = os.path.join(os.path.dirname(__file__), "conf", "configuration.json") - + conf_path = os.path.join( + os.path.dirname(__file__), "conf", "configuration.json" + ) + custom_validator_list = conf.get_configuration("json_schema_valiators") if custom_validator_list is None: stderr.print(f"No 'json_schema_validators' key found in {conf_path}") return - + if not len(custom_validator_list) > 0: - stderr.print(f"No validators defined yet under 'json_schema_validators' key in {conf_path}") + stderr.print( + f"No validators defined yet under 'json_schema_validators' key in {conf_path}" + ) return # Append custom validators to Draft202012Validator stderr.print("Registering custom validators.") - available_validators = relecov_tools.assets.schema_utils.custom_validators.available + available_validators = ( + relecov_tools.assets.schema_utils.custom_validators.available + ) for validator_name in custom_validator_list: if validator_name in available_validators: - Draft202012Validator.VALIDATORS[validator_name] = available_validators[validator_name] + Draft202012Validator.VALIDATORS[validator_name] = available_validators[ + validator_name + ] stderr.print(f"Custom validator '{validator_name}' successfully added.") else: stderr.print(f"No validator found for '{validator_name}'. Exiting.") sys.exit(1) - def validate_instances(self): """Validate data instances against a validated json schema"""