From 129bda27859b6303b7d85621eb6543afc21ea822 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Sat, 12 Nov 2022 23:59:12 -0500 Subject: [PATCH 01/38] Next development iteration `v0.4.8-SNAPSHOT`. Signed-off-by: Daniel Danis --- phenopacket-tools-builder/pom.xml | 2 +- phenopacket-tools-cli/pom.xml | 2 +- phenopacket-tools-converter/pom.xml | 2 +- phenopacket-tools-core/pom.xml | 2 +- phenopacket-tools-io/pom.xml | 2 +- phenopacket-tools-test/pom.xml | 2 +- phenopacket-tools-util/pom.xml | 2 +- phenopacket-tools-validator-core/pom.xml | 2 +- phenopacket-tools-validator-jsonschema/pom.xml | 2 +- pom.xml | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/phenopacket-tools-builder/pom.xml b/phenopacket-tools-builder/pom.xml index 5570812c..11c05303 100644 --- a/phenopacket-tools-builder/pom.xml +++ b/phenopacket-tools-builder/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.7 + 0.4.8-SNAPSHOT phenopacket-tools-builder diff --git a/phenopacket-tools-cli/pom.xml b/phenopacket-tools-cli/pom.xml index 0622ae02..0ad46fbc 100644 --- a/phenopacket-tools-cli/pom.xml +++ b/phenopacket-tools-cli/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.7 + 0.4.8-SNAPSHOT phenopacket-tools-cli diff --git a/phenopacket-tools-converter/pom.xml b/phenopacket-tools-converter/pom.xml index 2d48914e..8a2a17cc 100644 --- a/phenopacket-tools-converter/pom.xml +++ b/phenopacket-tools-converter/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.7 + 0.4.8-SNAPSHOT phenopacket-tools-converter diff --git a/phenopacket-tools-core/pom.xml b/phenopacket-tools-core/pom.xml index 46f0eab8..7e08e4bb 100644 --- a/phenopacket-tools-core/pom.xml +++ b/phenopacket-tools-core/pom.xml @@ -6,7 +6,7 @@ phenopacket-tools org.phenopackets.phenopackettools - 0.4.7 + 0.4.8-SNAPSHOT phenopacket-tools-core diff --git a/phenopacket-tools-io/pom.xml b/phenopacket-tools-io/pom.xml index 3768f2eb..205f3a08 100644 --- a/phenopacket-tools-io/pom.xml +++ b/phenopacket-tools-io/pom.xml @@ -6,7 +6,7 @@ phenopacket-tools org.phenopackets.phenopackettools - 0.4.7 + 0.4.8-SNAPSHOT phenopacket-tools-io diff --git a/phenopacket-tools-test/pom.xml b/phenopacket-tools-test/pom.xml index d0f5ce0e..6f457067 100644 --- a/phenopacket-tools-test/pom.xml +++ b/phenopacket-tools-test/pom.xml @@ -5,7 +5,7 @@ phenopacket-tools org.phenopackets.phenopackettools - 0.4.7 + 0.4.8-SNAPSHOT 4.0.0 diff --git a/phenopacket-tools-util/pom.xml b/phenopacket-tools-util/pom.xml index 7bb365de..e43e07df 100644 --- a/phenopacket-tools-util/pom.xml +++ b/phenopacket-tools-util/pom.xml @@ -5,7 +5,7 @@ phenopacket-tools org.phenopackets.phenopackettools - 0.4.7 + 0.4.8-SNAPSHOT 4.0.0 diff --git a/phenopacket-tools-validator-core/pom.xml b/phenopacket-tools-validator-core/pom.xml index 97b8b750..d81c6ca6 100644 --- a/phenopacket-tools-validator-core/pom.xml +++ b/phenopacket-tools-validator-core/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.7 + 0.4.8-SNAPSHOT phenopacket-tools-validator-core diff --git a/phenopacket-tools-validator-jsonschema/pom.xml b/phenopacket-tools-validator-jsonschema/pom.xml index 10ef5f3d..e6ada963 100644 --- a/phenopacket-tools-validator-jsonschema/pom.xml +++ b/phenopacket-tools-validator-jsonschema/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.7 + 0.4.8-SNAPSHOT phenopacket-tools-validator-jsonschema diff --git a/pom.xml b/pom.xml index 38051a77..72450942 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.7 + 0.4.8-SNAPSHOT pom From 1337bb9df8f99ca2775668f4021b174c58d402dd Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Sun, 13 Nov 2022 00:02:31 -0500 Subject: [PATCH 02/38] Update typo in `pages.yml`. Signed-off-by: Daniel Danis --- .github/workflows/pages.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index d36b4686..11ec1e90 100644 --- a/.github/workflows/pages.yml +++ b/.github/workflows/pages.yml @@ -42,7 +42,7 @@ jobs: pip install sphinx-rtd-theme - name: MVN clean - run: ./mvnw -Prerelease clean package + run: ./mvnw -Prelease clean package - name: Get CLI JAR file id: cli-jar From 6272a48e6854e141c9b9b499c3f3d7e9cb7bc7ae Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 15 Nov 2022 12:56:20 -0500 Subject: [PATCH 03/38] Next development iteration `v0.4.8-SNAPSHOT`. Signed-off-by: Daniel Danis --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 56357ef7..54d465f2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -33,7 +33,7 @@ # The short X.Y version. version = '0.4' # The full version, including alpha/beta/rc tags. -release = '0.4.7' +release = '0.4.8-SNAPSHOT' # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration From 3bc1017b3ad40fdef86aa2b5e8f84f4c19ce1ad9 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 15 Nov 2022 13:02:42 -0500 Subject: [PATCH 04/38] Update Javadoc link, remove Javadoc badge. Signed-off-by: Daniel Danis --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index d0b5ef10..3a2b766e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,5 @@ [![Java CI](https://github.com/phenopackets/phenopacket-tools/workflows/Java%20CI/badge.svg)](https://github.com/phenopackets/phenopacket-tools/actions/workflows/main.yml) [![GitHub release](https://img.shields.io/github/release/phenopackets/phenopacket-tools.svg)](https://github.com/phenopackets/phenopacket-tools/releases) -[![Javadoc](https://javadoc.io/badge2/org.phenopackets.phenopackettools/phenopacket-tools-core/javadoc.svg)](https://javadoc.io/doc/org.phenopackets.phenopackettools) # Phenopacket-tools @@ -20,7 +19,7 @@ The documentation includes a [Tutorial](http://phenopackets.org/phenopacket-tool how to use the CLI, a comprehensive [CLI user guide](http://phenopackets.org/phenopacket-tools/cli.html), a [Library user guide](http://phenopackets.org/phenopacket-tools), -and the [Java API documentation](https://javadoc.io/doc/org.phenopackets.phenopackettools). +and the [Javadoc API documentation](http://phenopackets.org/phenopacket-tools/apidocs). ## Availability From f1051f4478d1b976d5aa9770e06f28fa7ef25104 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 15 Nov 2022 13:13:15 -0500 Subject: [PATCH 05/38] Update constants README.md. Signed-off-by: Daniel Danis --- constants/README.md | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/constants/README.md b/constants/README.md index a0e9f65c..7336ad98 100644 --- a/constants/README.md +++ b/constants/README.md @@ -1,8 +1,16 @@ # README This directory contains spreadsheets with definitions of recommended constants (OntologyClass objects) that -are useful for common data elements needed to create Phenopackets. +are useful for common data elements needed to create phenopackets. -The python script create_classes.py transforms the CSV files into the corresponding Java classes. +The script `create_classes.py` transforms the CSV files into the corresponding Java classes: -The python script create_rtd.py creates read the docs documentation files. \ No newline at end of file +```shell +python3 create_classes.py +``` + +The script `create_rtd.py` creates read the docs documentation files: + +```shell +python3 create_rtd.py +``` From c2db8da285105e6d6de9090ecf4a592c1752b3b7 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 15 Nov 2022 13:16:05 -0500 Subject: [PATCH 06/38] Add javadocs for the `test` module. Signed-off-by: Daniel Danis --- phenopacket-tools-test/src/main/java/module-info.java | 3 +++ .../org/phenopackets/phenopackettools/test/package-info.java | 5 +++++ 2 files changed, 8 insertions(+) create mode 100644 phenopacket-tools-test/src/main/java/org/phenopackets/phenopackettools/test/package-info.java diff --git a/phenopacket-tools-test/src/main/java/module-info.java b/phenopacket-tools-test/src/main/java/module-info.java index 2592812c..87573a15 100644 --- a/phenopacket-tools-test/src/main/java/module-info.java +++ b/phenopacket-tools-test/src/main/java/module-info.java @@ -1,3 +1,6 @@ +/** + * A module with example phenopackets suitable for unit tests. + */ module org.phenopackets.phenopackettools.test { requires org.phenopackets.schema; requires com.google.protobuf; diff --git a/phenopacket-tools-test/src/main/java/org/phenopackets/phenopackettools/test/package-info.java b/phenopacket-tools-test/src/main/java/org/phenopackets/phenopackettools/test/package-info.java new file mode 100644 index 00000000..d349223c --- /dev/null +++ b/phenopacket-tools-test/src/main/java/org/phenopackets/phenopackettools/test/package-info.java @@ -0,0 +1,5 @@ +/** + * A package with {@link org.phenopackets.phenopackettools.test.TestData} - a static utility class for serving + * hard-coded phenopacket examples. + */ +package org.phenopackets.phenopackettools.test; \ No newline at end of file From 2af51d91d00d9d558941b39491d79a9cc25cab3b Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 15 Nov 2022 13:19:06 -0500 Subject: [PATCH 07/38] Update availability description, reorder sections. Signed-off-by: Daniel Danis --- README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 3a2b766e..bdd8230f 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,11 @@ such as anatomical organs, age of onset, biospecimen type, and clinical modifier The library validates the basic syntax and semantic requirements of the Phenopacket Schema as well as the adherence to additional user-defined requirements. +## Availability + +Most users should download the latest distribution archive from the [Releases page](https://github.com/phenopackets/phenopacket-tools/releases). +Alternatively, *phenopacket-tools* can be built from source, as described in the user guide. + ## Documentation The documentation includes a [Tutorial](http://phenopackets.org/phenopacket-tools/tutorial.html) showing @@ -21,10 +26,6 @@ a comprehensive [CLI user guide](http://phenopackets.org/phenopacket-tools/cli.h a [Library user guide](http://phenopackets.org/phenopacket-tools), and the [Javadoc API documentation](http://phenopackets.org/phenopacket-tools/apidocs). -## Availability - -Most users should download the latest distribution archive from the [Releases page](https://github.com/phenopackets/phenopacket-tools/releases). - ## Issues Comments, questions or issues? Feel free to submit a ticket to our [GitHub tracker](https://github.com/phenopackets/phenopacket-tools/issues). From d88b1fdad0a2077f21ec22597606385d3207977d Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 17 Nov 2022 13:12:03 -0500 Subject: [PATCH 08/38] Rename example phenopacket files. Signed-off-by: Daniel Danis --- docs/tutorial.rst | 27 ++++++++++++------- .../{marfan.valid.json => marfan.json} | 0 ....invalid.json => marfan.no-phenotype.json} | 0 ...ct.invalid.json => marfan.no-subject.json} | 0 ... => marfan.no-time-at-last-encounter.json} | 0 ...t-hpo.invalid.json => marfan.not-hpo.json} | 0 ...=> marfan.all-organ-system-annotated.json} | 0 ...son => marfan.missing-eye-annotation.json} | 0 ...alid.json => marfan.no-abnormalities.json} | 0 ...> marfan.annotation-propagation-rule.json} | 0 .../{marfan.valid.json => marfan.json} | 0 ...invalid.json => marfan.obsolete-term.json} | 0 12 files changed, 17 insertions(+), 10 deletions(-) rename phenopacket-tools-cli/src/examples/validate/custom-json-schema/{marfan.valid.json => marfan.json} (100%) rename phenopacket-tools-cli/src/examples/validate/custom-json-schema/{marfan.no-phenotype.invalid.json => marfan.no-phenotype.json} (100%) rename phenopacket-tools-cli/src/examples/validate/custom-json-schema/{marfan.no-subject.invalid.json => marfan.no-subject.json} (100%) rename phenopacket-tools-cli/src/examples/validate/custom-json-schema/{marfan.no-time-at-last-encounter.invalid.json => marfan.no-time-at-last-encounter.json} (100%) rename phenopacket-tools-cli/src/examples/validate/custom-json-schema/{marfan.not-hpo.invalid.json => marfan.not-hpo.json} (100%) rename phenopacket-tools-cli/src/examples/validate/organ-systems/{marfan.all-organ-system-annotated.valid.json => marfan.all-organ-system-annotated.json} (100%) rename phenopacket-tools-cli/src/examples/validate/organ-systems/{marfan.missing-eye-annotation.invalid.json => marfan.missing-eye-annotation.json} (100%) rename phenopacket-tools-cli/src/examples/validate/organ-systems/{marfan.no-abnormalities.valid.json => marfan.no-abnormalities.json} (100%) rename phenopacket-tools-cli/src/examples/validate/phenotype-validation/{marfan.annotation-propagation-rule.invalid.json => marfan.annotation-propagation-rule.json} (100%) rename phenopacket-tools-cli/src/examples/validate/phenotype-validation/{marfan.valid.json => marfan.json} (100%) rename phenopacket-tools-cli/src/examples/validate/phenotype-validation/{marfan.obsolete-term.invalid.json => marfan.obsolete-term.json} (100%) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index c1af15f0..29cd4ec9 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -214,10 +214,10 @@ The schema is located at ``examples/custom-json-schema/hpo-rare-disease-schema.j Using the custom JSON schema via ``--require`` option will point out issues in the 4 example phenopackets:: pxf validate --require ${examples}/validate/custom-json-schema/hpo-rare-disease-schema.json \ - -i ${examples}/validate/custom-json-schema/marfan.no-subject.invalid.json \ - -i ${examples}/validate/custom-json-schema/marfan.no-phenotype.invalid.json \ - -i ${examples}/validate/custom-json-schema/marfan.not-hpo.invalid.json \ - -i ${examples}/validate/custom-json-schema/marfan.no-time-at-last-encounter.invalid.json + -i ${examples}/validate/custom-json-schema/marfan.no-subject.json \ + -i ${examples}/validate/custom-json-schema/marfan.no-phenotype.json \ + -i ${examples}/validate/custom-json-schema/marfan.not-hpo.json \ + -i ${examples}/validate/custom-json-schema/marfan.no-time-at-last-encounter.json .. csv-table:: :header: "Validation error", "Solution" @@ -253,9 +253,9 @@ Phenopackets use non-obsolete term IDs The `HpoPhenotypeValidator` checks if the phenopacket contains obsolete HPO terms:: - pxf validate --hpo hp.json -i ${examples}/validate/phenotype-validation/marfan.obsolete-term.invalid.json + pxf validate --hpo hp.json -i ${examples}/validate/phenotype-validation/marfan.obsolete-term.json -It turns out that ``marfan.obsolete-term.invalid.json`` uses an obsolete ``HP:0002631`` instead of +It turns out that ``marfan.obsolete-term.json`` uses an obsolete ``HP:0002631`` instead of the primary ``HP:0002616`` for *Aortic root aneurysm*: .. csv-table:: @@ -275,13 +275,13 @@ In contrary, the *least* specific terms should be used for the *excluded* clinic The `HpoAncestryValidator` checks that the annotation propagation rule is not violated:: - pxf validate --hpo hp.json -i ${examples}/validate/phenotype-validation/marfan.annotation-propagation-rule.invalid.json + pxf validate --hpo hp.json -i ${examples}/validate/phenotype-validation/marfan.annotation-propagation-rule.json .. csv-table:: :header: "Validation error", "Solution" :widths: 350, 550 - "Phenotypic features of id-C must not contain both an observed term (Aortic root aneurysm, HP:0002616) and an observed ancestor (Aortic aneurysm, HP:0004942)", Remove the less specific term + "Phenotypic features of id-C must not contain both an observed term (Aortic root aneurysm, HP:0002616) and an observed ancestor (Aortic aneurysm, HP:0004942)", Remove the ancestor term .. note:: Presence of excluded descendant and observed ancestor does not violate the annotation propagation rule. @@ -303,14 +303,14 @@ in 3 phenopackets of toy `Marfan syndrome Date: Thu, 17 Nov 2022 14:14:53 -0500 Subject: [PATCH 09/38] Remove the concept of syntax and semantic validators. Signed-off-by: Daniel Danis --- .../cli/command/ValidateCommand.java | 18 ++--- .../validate/ValidateCohortCommand.java | 1 + .../validate/ValidateFamilyCommand.java | 1 + .../validate/ValidatePhenopacketCommand.java | 1 + .../core/ValidationWorkflowRunnerBuilder.java | 46 +++++++++++-- .../validator/core/package-info.java | 2 - .../BaseValidationWorkflowRunnerBuilder.java | 3 +- .../JsonSchemaValidationWorkflowRunner.java | 69 ++++++++----------- ...SchemaValidationWorkflowRunnerBuilder.java | 9 +-- 9 files changed, 87 insertions(+), 63 deletions(-) diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java index ce2a34cf..394bbed2 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java @@ -97,27 +97,27 @@ private ValidationWorkflowRunner prepareWorkflowRunner() { List customJsonSchemas = prepareCustomSchemaUrls(); Object runner = switch (inputSection.element) { case PHENOPACKET -> { - List> semanticValidators = configureSemanticValidators(); + List> validators = configureSemanticValidators(); yield JsonSchemaValidationWorkflowRunner.phenopacketBuilder() .addAllJsonSchemaUrls(customJsonSchemas) - .addSemanticValidator(MetaDataValidators.phenopacketValidator()) - .addAllSemanticValidators(semanticValidators) + .addValidator(MetaDataValidators.phenopacketValidator()) + .addValidators(validators) .build(); } case FAMILY -> { - List> semanticValidators = configureSemanticValidators(); + List> validators = configureSemanticValidators(); yield JsonSchemaValidationWorkflowRunner.familyBuilder() .addAllJsonSchemaUrls(customJsonSchemas) - .addSemanticValidator(MetaDataValidators.familyValidator()) - .addAllSemanticValidators(semanticValidators) + .addValidator(MetaDataValidators.familyValidator()) + .addValidators(validators) .build(); } case COHORT -> { - List> semanticValidators = configureSemanticValidators(); + List> validators = configureSemanticValidators(); yield JsonSchemaValidationWorkflowRunner.cohortBuilder() .addAllJsonSchemaUrls(customJsonSchemas) - .addSemanticValidator(MetaDataValidators.cohortValidator()) - .addAllSemanticValidators(semanticValidators) + .addValidator(MetaDataValidators.cohortValidator()) + .addValidators(validators) .build(); } }; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidateCohortCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidateCohortCommand.java index cd9f5ece..6355f625 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidateCohortCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidateCohortCommand.java @@ -17,6 +17,7 @@ @CommandLine.Command(name = "cohort", description = "Validate cohorts using builtin and custom JSON Schemas.", mixinStandardHelpOptions = true) +@Deprecated(forRemoval = true) public class ValidateCohortCommand extends BaseValidateCommand { @Override diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidateFamilyCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidateFamilyCommand.java index ba0ce5ef..25390c06 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidateFamilyCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidateFamilyCommand.java @@ -17,6 +17,7 @@ @CommandLine.Command(name = "family", description = "Validate families using builtin and custom JSON Schemas.", mixinStandardHelpOptions = true) +@Deprecated(forRemoval = true) public class ValidateFamilyCommand extends BaseValidateCommand { @Override diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidatePhenopacketCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidatePhenopacketCommand.java index da2fcec8..a16486d4 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidatePhenopacketCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidatePhenopacketCommand.java @@ -18,6 +18,7 @@ @CommandLine.Command(name = "phenopacket", description = "Validate phenopackets using builtin and custom JSON Schemas.", mixinStandardHelpOptions = true) +@Deprecated(forRemoval = true) public class ValidatePhenopacketCommand extends BaseValidateCommand { @Override diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunnerBuilder.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunnerBuilder.java index 31d6263f..08fdf591 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunnerBuilder.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunnerBuilder.java @@ -3,27 +3,55 @@ import com.google.protobuf.MessageOrBuilder; import java.util.ArrayList; +import java.util.Collection; import java.util.List; /** - * The base builder for constructing {@link ValidationWorkflowRunner}. The builder keeps track of - * the syntax and semantic validators. + * The base builder for constructing {@link ValidationWorkflowRunner}. + * The builder keeps track of the validators and builds the {@link ValidationWorkflowRunner} at the end. * * @param type of the top-level element of the Phenopacket Schema. */ public abstract class ValidationWorkflowRunnerBuilder { + protected final List> validators = new ArrayList<>(); + /** + * @deprecated use {@link #validators} instead + */ + @Deprecated(forRemoval = true, since = "0.4.8") protected final List> syntaxValidators = new ArrayList<>(); + /** + * @deprecated use {@link #validators} instead + */ + @Deprecated(forRemoval = true, since = "0.4.8") protected final List> semanticValidators = new ArrayList<>(); + /** + * Add a validator to the end of the workflow. + */ + public ValidationWorkflowRunnerBuilder addValidator(PhenopacketValidator validator) { + this.validators.add(validator); + return this; + } + + /** + * Add validators to the end of the workflow. + */ + public ValidationWorkflowRunnerBuilder addValidators(Collection> validators) { + this.validators.addAll(validators); + return this; + } + /** * Add a syntax validator. * * @param syntaxValidator the syntax validator * @return the builder + * @deprecated use {@link #addValidator(PhenopacketValidator)} instead */ + @Deprecated(forRemoval = true, since = "0.4.8") public ValidationWorkflowRunnerBuilder addSyntaxValidator(PhenopacketValidator syntaxValidator) { - this.syntaxValidators.add(syntaxValidator); + this.validators.add(syntaxValidator); return this; } @@ -32,10 +60,12 @@ public ValidationWorkflowRunnerBuilder addSyntaxValidator(PhenopacketValidato * * @param validators the syntax validators * @return the builder + * @deprecated use {@link #addValidators(Collection)} instead */ + @Deprecated(forRemoval = true, since = "0.4.8") public ValidationWorkflowRunnerBuilder addAllSyntaxValidators(List> validators) { // A slightly more efficient implementation comparing to the default method on the interface. - this.syntaxValidators.addAll(validators); + this.validators.addAll(validators); return this; } @@ -44,9 +74,11 @@ public ValidationWorkflowRunnerBuilder addAllSyntaxValidators(List addSemanticValidator(PhenopacketValidator semanticValidator) { - this.semanticValidators.add(semanticValidator); + this.validators.add(semanticValidator); return this; } @@ -55,10 +87,12 @@ public ValidationWorkflowRunnerBuilder addSemanticValidator(PhenopacketValida * * @param validators the semantic validators * @return the builder + * @deprecated use {@link #addValidators(Collection)} instead */ + @Deprecated(forRemoval = true, since = "0.4.8") public ValidationWorkflowRunnerBuilder addAllSemanticValidators(List> validators) { // A slightly more efficient implementation comparing to the default method on the interface. - this.semanticValidators.addAll(validators); + this.validators.addAll(validators); return this; } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/package-info.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/package-info.java index c82ea30b..831206b2 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/package-info.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/package-info.java @@ -35,7 +35,5 @@ *

* {@link org.phenopackets.phenopackettools.validator.core.ValidatorInfo} describes * the {@link org.phenopackets.phenopackettools.validator.core.PhenopacketValidator}. - *

- * {@link org.phenopackets.phenopackettools.validator.core.ValidationLevel} */ package org.phenopackets.phenopackettools.validator.core; \ No newline at end of file diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/BaseValidationWorkflowRunnerBuilder.java b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/BaseValidationWorkflowRunnerBuilder.java index d21ec40c..b4211b1c 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/BaseValidationWorkflowRunnerBuilder.java +++ b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/BaseValidationWorkflowRunnerBuilder.java @@ -33,8 +33,7 @@ public JsonSchemaValidationWorkflowRunner build() { return new JsonSchemaValidationWorkflowRunner<>(getFormatConverter(), getBaseRequirementsValidator(), requirementValidators, - syntaxValidators, - semanticValidators); + validators); } diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java index 3bfd5bdf..a7193c7d 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java +++ b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java @@ -17,6 +17,7 @@ import java.util.Collection; import java.util.List; import java.util.Objects; +import java.util.stream.Stream; /** * Validates if given top-level element satisfies the following criteria: @@ -25,9 +26,8 @@ *

  • basic Phenopacket schema syntax requirements - the requirements described by the reference documentation. * Absence of a required field is an {@link ValidationLevel#ERROR} and absence of a recommended field is * a {@link ValidationLevel#WARNING},
  • - *
  • custom syntax requirements - requirements provided in a JSON schema document(s) provided by the user,
  • - *
  • syntax requirements - requirements checked by the provided ad hoc {@link PhenopacketValidator}s,
  • - *
  • semantic requirements - requirements checked by the provided {@link PhenopacketValidator}s.
  • + *
  • custom requirements - requirements provided in a JSON schema document(s) provided by the user or + * provided as ad hoc {@link PhenopacketValidator}s.
  • * *

    * The validation is performed in the order as outlined above. Note that the data format validation must @@ -46,8 +46,7 @@ public class JsonSchemaValidationWorkflowRunner impl private final PhenopacketFormatConverter converter; private final JsonSchemaValidator baseValidator; private final Collection requirementValidators; - private final Collection> syntaxValidators; - private final Collection> semanticValidators; + private final Collection> validators; private final List validatorInfos; /** @@ -77,19 +76,29 @@ public static JsonSchemaValidationWorkflowRunnerBuilder cohortB JsonSchemaValidationWorkflowRunner(PhenopacketFormatConverter converter, JsonSchemaValidator baseValidator, Collection requirementValidators, - Collection> syntaxValidators, - Collection> semanticValidators) { + Collection> validators) { this.converter = Objects.requireNonNull(converter); this.baseValidator = Objects.requireNonNull(baseValidator); this.requirementValidators = Objects.requireNonNull(requirementValidators); - this.syntaxValidators = Objects.requireNonNull(syntaxValidators); - this.semanticValidators = Objects.requireNonNull(semanticValidators); - this.validatorInfos = summarizeValidatorInfos(baseValidator, requirementValidators, semanticValidators); + this.validators = Objects.requireNonNull(validators); + this.validatorInfos = summarizeValidatorInfos(baseValidator, requirementValidators, validators); + } + + /** + * @deprecated use the other constructor + */ + @Deprecated(forRemoval = true, since = "0.4.8") + JsonSchemaValidationWorkflowRunner(PhenopacketFormatConverter converter, + JsonSchemaValidator baseValidator, + Collection requirementValidators, + Collection> syntaxValidators, + Collection> semanticValidators) { + this(converter, baseValidator, requirementValidators, Stream.concat(syntaxValidators.stream(), semanticValidators.stream()).toList()); } private static List summarizeValidatorInfos(JsonSchemaValidator base, Collection requirements, - Collection> semantics) { + Collection> validators) { List infos = new ArrayList<>(); infos.add(base.validatorInfo()); @@ -97,7 +106,7 @@ private static List summarizeValidat infos.add(validator.validatorInfo()); } - for (PhenopacketValidator validator : semantics) { + for (PhenopacketValidator validator : validators) { infos.add(validator.validatorInfo()); } @@ -135,13 +144,7 @@ public ValidationResults validate(String json) { } try { - validateSyntax(json, builder); - } catch (ConversionException e) { - return wrapUpValidation(e, builder); - } - - try { - validateSemantic(json, builder); + convertAndRunValidation(json, builder); } catch (ConversionException e) { return wrapUpValidation(e, builder); } @@ -162,10 +165,8 @@ public ValidationResults validate(T item) { return wrapUpValidation(e, builder); } - validateSyntax(item, builder); - // No conversion necessary, hence no need to guard against the `ConversionException`. - validateSemantic(item, builder); + runValidation(item, builder); return builder.build(); } @@ -203,35 +204,23 @@ private void validateRequirements(String json, ValidationResults.Builder builder } } - private void validateSyntax(String item, ValidationResults.Builder builder) throws ConversionException { - T component = converter.toItem(item); - - validateSyntax(component, builder); - } - - private void validateSyntax(T component, ValidationResults.Builder builder) { - for (PhenopacketValidator validator : syntaxValidators) { - builder.addResults(validator.validatorInfo(), validator.validate(component)); - } - } - /** - * Validate semantic requirements using {@link #semanticValidators}. + * Convert the {@code item} into {@link T} and validate the requirements. * * @throws ConversionException if {@code item} cannot be mapped into {@link T} */ - private void validateSemantic(String item, ValidationResults.Builder builder) throws ConversionException { + private void convertAndRunValidation(String item, ValidationResults.Builder builder) throws ConversionException { T component = converter.toItem(item); - validateSemantic(component, builder); + runValidation(component, builder); } /** - * Validate semantic requirements using {@link #semanticValidators}. Unlike {@link #validateSemantic(String, ValidationResults.Builder)}, + * Validate semantic requirements using {@link #validators}. Unlike {@link #convertAndRunValidation(String, ValidationResults.Builder)}, * this method does not throw {@link ConversionException}. */ - private void validateSemantic(T component, ValidationResults.Builder builder) { - for (PhenopacketValidator validator : semanticValidators) + private void runValidation(T component, ValidationResults.Builder builder) { + for (PhenopacketValidator validator : validators) builder.addResults(validator.validatorInfo(), validator.validate(component)); } diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerBuilder.java b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerBuilder.java index a66a059a..55a0f872 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerBuilder.java +++ b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerBuilder.java @@ -13,8 +13,9 @@ /** * A builder for {@link JsonSchemaValidationWorkflowRunner}. *

    - * Build the {@link JsonSchemaValidationWorkflowRunner} by providing JSON schema documents - * either as {@link Path} or {@link URL}s, and {@link PhenopacketValidator}s for performing semantic validation. + * Build the {@link JsonSchemaValidationWorkflowRunner} for running base validation, and additional JSON schema-based + * validation (provide either {@link Path}s or {@link URL}s to JSON schema documents), + * and {@link PhenopacketValidator}s for performing additional validation. * * @param one of top-level elements of the Phenopacket schema. */ @@ -27,7 +28,7 @@ protected JsonSchemaValidationWorkflowRunnerBuilder() { } /** - * Register a JSON schema present at a given {@code path} to be used as a syntax validator. The {@code path} + * Register a JSON schema present at a given {@code path} to be used as a validator. The {@code path} * will be interpreted as a {@link URL}. * * @param path path to the JSON schema document @@ -39,7 +40,7 @@ public JsonSchemaValidationWorkflowRunnerBuilder addJsonSchema(Path path) thr } /** - * Register a JSON schema present at a given {@code url} to be used as a syntax validator. + * Register a JSON schema present at a given {@code url} to be used as a validator. * * @param url url to the JSON schema document * @return the builder From 4d0dae53a510abff8fc5f9001b17d76a8da804b1 Mon Sep 17 00:00:00 2001 From: Chris Mungall Date: Thu, 17 Nov 2022 11:50:55 -0800 Subject: [PATCH 10/38] Removing space from CURIE See #145 --- .../org/phenopackets/phenopackettools/cli/examples/Marfan.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Marfan.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Marfan.java index e401c7ed..336d5449 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Marfan.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Marfan.java @@ -14,7 +14,7 @@ public class Marfan implements PhenopacketExample { private final Phenopacket phenopacket; public Marfan() { - var marfan = DiseaseBuilder.of("OMIM:154700 ", "Marfan syndrome"); + var marfan = DiseaseBuilder.of("OMIM:154700", "Marfan syndrome"); var individual = IndividualBuilder.builder(PROBAND_ID).female().ageAtLastEncounter("P27Y").build(); var losartan = ontologyClass("DrugCentral:1610", "losartan"); var mg = ontologyClass("UO:0000022", "milligram"); From 7c382ddeecf092f865ffc9f0e7f45902f551981f Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 17 Nov 2022 16:05:06 -0500 Subject: [PATCH 11/38] Complete the tutorial. Signed-off-by: Daniel Danis --- docs/index.rst | 19 +++--- docs/tutorial.rst | 128 +++++++++++++++++++++---------------- docs/tutorial_examples.rst | 22 +++---- 3 files changed, 94 insertions(+), 75 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index abd116ae..df719147 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,3 +1,4 @@ +============================================= Welcome to phenopacket-tools's documentation! ============================================= @@ -17,19 +18,21 @@ the basis of defined relationships between diagnoses, lab measurements, and geno The *phenopacket-tools* library was written as a modular Java 17 library and has three main goals. -- To simplify creating phenopackets with Java code using streamlined builders and predefined building blocks such +- To simplify *creating* phenopackets with Java code using streamlined builders and predefined building blocks such as units, anatomical organs, and clinical modifiers. -- To provide a validation framework for checking phenopackets for syntactical and semantic correctness +- To *convert* phenopackets from the obsoleted version 1 to the version 2 (current) of the Schema. +- To provide a *validation* framework for checking phenopackets for syntactical and semantic correctness and to enable developers to extend the validation framework to encode the specific requirements of consortia or projects using either JSON schema or programmatic tools. -- To convert/lift phenopackets from the obsoleted version 1 to the version 2 (current) of the Schema. On top of the library, we provide a standalone command-line interface (CLI) application for -conversion and validation. The following sections first describe the library and the last section instructs -how to use the CLI application on your system. +conversion and validation. -.. - TODO - review the three points and sync them with the manuscript. +The following sections describe phenopacket-tools library and CLI application. +We start with :ref:`rsttutorial` to provide a quick overview of the CLI application functionality. +We follow with the comprehensive :ref:`rstcli` description. +The rest of the documentation offers an in-depth user guide for using the library functionality +in a JVM-based application. .. toctree:: :maxdepth: 1 @@ -44,7 +47,7 @@ how to use the CLI application on your system. .. figure:: https://onlinelibrary.wiley.com/cms/asset/1cc0a141-da65-45a3-b7b0-6316b7b02069/ggn2202200016-fig-0002-m.jpg - :alt: GA4GH Phenopacket + :alt: GA4GH Phenopacket Schema :width: 800px Overview of the GA4GH Phenopacket Schema. diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 29cd4ec9..0ce9dfc8 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -4,25 +4,29 @@ Tutorial ======== -This tutorial walks through the installation of *phenopacket-tools* and provides an overview and an intended usage -of the command-line interface. The tutorial sections point to the parts of documentations which offer more detail. +This tutorial walks through the installation of *phenopacket-tools* command-line interface application +and provides an overview of the *conversion* of phenopackets from `v1` to the current `v2` format and +*validation* functionality, including custom validation rules. Setup ===== -*Phenopacket-tools* is distributed as a ZIP archive that contains an executable JAR file -and several resource files for running this tutorial. Let's check that Java is installed on the machine, +*Phenopacket-tools* is written in Java 17 and requires Java 17 or better to run. +We distribute the CLI application as a ZIP archive with an executable Java Archive (JAR) file +and several examples for running this tutorial. + +As a prerequisite, Java 17 or newer is installed on the machine, download the distribution ZIP and set up an alias as a shortcut for running the *phenopacket-tools*. Prerequisites ^^^^^^^^^^^^^ -*Phenopacket-tools* is written in Java 17 and requires Java 17 or better to run. An appropriate Java executable -must be present on your ``$PATH``. Run the following to determine the availability and version of Java on your machine:: +Java 17 or newer must be present on your ``$PATH``. Run the following to check the availability +and version of Java on your machine:: java -version -which prints a similar output for Java 17:: +The command should print a similar output:: openjdk version "17" 2021-09-14 OpenJDK Runtime Environment (build 17+35-2724) @@ -35,7 +39,7 @@ A prebuilt distribution ZIP file is available for download from `phenopacket-tools release section `_ of the GitHub repository. -Download and unpack the ZIP file from the releases section: +Download and unpack the ZIP file of the latest release |release| from the release assets: .. parsed-literal:: @@ -46,16 +50,20 @@ Download and unpack the ZIP file from the releases section: Set up alias ^^^^^^^^^^^^ -In general, Java command line applications are invoked as ``java -jar executable.jar``. However, this is just -too verbose and we can shorten the command by defining an alias. +In general, Java command line applications are invoked as ``java -jar executable.jar``. However, such incantation is +a bit too verbose and we can shorten it a bit by defining an alias. Let's define an alias for *phenopacket-tools*. Assuming the distribution ZIP was unpacked into -phenopacket-tools-cli-|release| directory, run the following to set up the alias and to check that the alias works: +phenopacket-tools-cli-|release| directory, run the following to set up the alias and to check that the new alias works: .. parsed-literal:: alias pxf="java -jar $(pwd)/phenopacket-tools-cli-\ |release|\ /phenopacket-tools-cli-|release|.jar" pxf --help +.. note:: + From now on, we will use the ``pxf`` alias instead of the longer form. However, feel free to choose whichever + you like more. + Convert ======= @@ -63,16 +71,17 @@ Convert Version 1 of the GA4GH Phenopacket schema was released in 2019 to elicit community feedback. In response to this feedback, the schema was extended and refined and version 2 was released in 2021 and published in 2022 by the International Standards Organization (ISO). +The `convert` command of *phenopacket-tools* converts version 1 phenopackets into version 2. -The `convert` command of *phenopacket-tools* converts version 1 phenopackets into version 2. In this tutorial, -we will first convert an example v1 phenopacket and then 384 v1 phenopackets published by Robinson et al., 2020\ [1]_. +For the purpose of this tutorial, we will first convert a single v1 phenopacket +and then 384 v1 phenopackets published by Robinson et al., 2020\ [1]_. -A toy example -^^^^^^^^^^^^^ + +Convert single phenopacket +^^^^^^^^^^^^^^^^^^^^^^^^^^ We will convert a phenopacket ``Schreckenbach-2014-TPM3-II.2.json`` that is bundled -in the *phenopacket-tools* distribution ZIP file. -The phenopacket can be found in `examples/convert` folder next to the executable JAR file. +in the distribution ZIP archive in `examples/convert` folder next to the executable JAR file. .. note:: See :ref:`rsttutorialexamples` for detailed info of the example phenopackets. @@ -87,13 +96,14 @@ CLI option and the conversion can be done iff the *v1* phenopacket has one `Dise Let's convert the phenopacket by running:: - pxf convert -i ${examples}/convert/Schreckenbach-2014-TPM3-II.2.json > Schreckenbach-2014-TPM3-II.2.v2.json + cat ${examples}/convert/Schreckenbach-2014-TPM3-II.2.json | pxf convert > Schreckenbach-2014-TPM3-II.2.v2.json -The phenopacket represents a case report with several variants that are causal with respect to the disease. +The example phenopacket represents a case report with several variants that are causal with respect to the disease. Therefore, we can use ``--convert-variants`` to convert `Variant`\ s into v2 `Interpretation` element:: - pxf convert --convert-variants \ - -i ${examples}/convert/Schreckenbach-2014-TPM3-II.2.json > Schreckenbach-2014-TPM3-II.2.v2-with-variants.json + cat ${examples}/convert/Schreckenbach-2014-TPM3-II.2.json |\ + pxf convert --convert-variants \ + > Schreckenbach-2014-TPM3-II.2.v2-with-variants.json A real-life example @@ -109,7 +119,7 @@ a folder named as ``v1``:: curl -o phenopackets.v1.zip https://zenodo.org/record/3905420/files/phenopackets.zip unzip -d v1 phenopackets.v1.zip -Let's convert all *v1* phenopackets and store the results in JSON format in a new folder ``v2``:: +Now, we convert all *v1* phenopackets and store the results in JSON format in a new folder ``v2``:: # Make the folder for converted phenopackets. mkdir -p v2 @@ -122,19 +132,20 @@ Let's convert all *v1* phenopackets and store the results in JSON format in a ne printf "Converted %s phenopackets\n" $(ls v2/ | wc -l) -We converted 384 phenopackets into *v2* format and stored the JSON files in the ``v2`` folder. +.. note:: + We use ``-i`` instead of the standard input. The ``-i`` can be provided multiple times to convert multiple + phenopackets in bulk. See the :ref:`rstcli` reference for more details. + +After a while, phenopackets in the *v2* format are stored as JSON files in the ``v2`` folder. Validate ======== The `validate` command of *phenopacket-tools* validates correctness of phenopackets, families and cohorts. -This section focuses on the *off-the-shelf* phenopacket validators. - -.. note:: - See the :ref:`rstvalidation` and the `Java Documentation`_ to learn how to implement a custom validator. +This section outlines usage opf the *off-the-shelf* validators available in the CLI application. -We will work with a suite of phenopackets that are bundled in the *phenopacket-tools* distribution ZIP file. -The phenopackets are located in `examples` folder next to the executable JAR file: +In this tutorial section, we will work with a suite of phenopackets that are bundled in the distribution ZIP archive. +The phenopackets are located in `examples/validate` folder next to the executable JAR file: .. parsed-literal:: examples=$(pwd)/phenopacket-tools-cli-\ |release|\ /examples @@ -164,11 +175,11 @@ All phenopackets, regardless of their aim or scope must pass this requirement to All required fields must be present ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The `BaseValidator` checks that all required fields are not empty:: +The `BaseValidator` checks that all required fields are present:: pxf validate -i ${examples}/validate/base/missing-fields.json -The validator emits 3 lines with the following issues: +The validator will find 3 errors and emit 3 CSV lines with the following issues: .. csv-table:: :header: "Validation error", "Solution" @@ -178,14 +189,19 @@ The validator emits 3 lines with the following issues: 'subject.id' is missing but it is required, Add the subject ID 'phenotypicFeatures[0].type.label' is missing but it is required, Add the `label` attribute into the `type` of the first phenotypic feature +.. note:: + The ``validate`` command reports errors in CSV format the validation results can be easily stored in a CSV file by + using output stream redirection. Use the ``-H | --include-header`` option to include a header + with validation metadata. -All ontologies are defined -~~~~~~~~~~~~~~~~~~~~~~~~~~ +All ontologies are well-defined +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Phenopacket Schema relies heavily on use of ontologies and ontology concepts. `MetaData` element lists -the ontologies used in the particular phenopacket. +the ontologies used in the particular phenopacket. To ensure data traceability, Phenopacket Schema requires +phenopacket to contain a `Resource` with ontology metadata such as version and IRI for each used ontology concept. -The `MetaDataValidator` checks if the `MetaData` has an ontology `Resource` for all concepts used in the phenopacket:: +The `MetaDataValidator` checks if the `MetaData` has an ontology `Resource` for all used ontology concepts:: pxf validate -i ${examples}/validate/base/missing-resources.json @@ -209,7 +225,8 @@ For instance, a rare disease project may require presence of several elements th 3. Time at last encounter (sub-element of subject), representing the age of the proband *Phenopacket-tools* ships with a JSON schema for enforcing the above requirements. -The schema is located at ``examples/custom-json-schema/hpo-rare-disease-schema.json``. +The schema is located next to phenopacket examples for this section +at ``examples/custom-json-schema/hpo-rare-disease-schema.json``. Using the custom JSON schema via ``--require`` option will point out issues in the 4 example phenopackets:: @@ -237,12 +254,13 @@ Using the custom JSON schema via ``--require`` option will point out issues in t Phenotype validation ^^^^^^^^^^^^^^^^^^^^ -*Phenopacket-tools* offers a validator for checking logical consistency of phenotypic features in the phenopacket. -The phenotype validation requires the Human Phenotype Ontology (HPO) file to work. +*Phenopacket-tools* offers a validator for checking logical consistency of clinical abnormalities in the phenopacket. +The validator assumes Human Phenotype Ontology (HPO) is used to represent the clinical abnormalities and +the phenotype validation requires the HPO file to work. .. note:: The examples below assume that the latest HPO in JSON format has been downloaded to ``hp.json``. - The HPO file can be downloaded from `HPO releases`_. + Get the HPO JSON from `HPO releases`_. .. note:: See :ref:`rstphenotypevalidation` for more details. @@ -251,7 +269,7 @@ The phenotype validation requires the Human Phenotype Ontology (HPO) file to wor Phenopackets use non-obsolete term IDs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The `HpoPhenotypeValidator` checks if the phenopacket contains obsolete HPO terms:: +The `HpoPhenotypeValidator` points out if the phenopacket contains obsolete HPO terms:: pxf validate --hpo hp.json -i ${examples}/validate/phenotype-validation/marfan.obsolete-term.json @@ -268,14 +286,20 @@ the primary ``HP:0002616`` for *Aortic root aneurysm*: The annotation-propagation rule is not violated ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Due to annotation propagation rule, it is a logical error to use both a term and its ancestor -(e.g. *Arachnodactyly* and *Abnormality of finger*). +Due to the annotation propagation rule, it is a logical error to use both a term and its ancestor +(e.g. *Arachnodactyly* and *Abnormality of finger*) for annotation of a single item. When choosing HPO terms for phenotypic features, the *most* specific terms should be used for the *observed* clinical features. In contrary, the *least* specific terms should be used for the *excluded* clinical features. +There is one exception to these rules: a term and its ancestor can co-exist in the phenopacket if the parent term +is *observed* and the child term is *excluded* (e.g. phenopacket with present *Aortic aneurysm* +but excluded *Aortic root aneurysm*, see ``marfan.valid.json``). The `HpoAncestryValidator` checks that the annotation propagation rule is not violated:: - pxf validate --hpo hp.json -i ${examples}/validate/phenotype-validation/marfan.annotation-propagation-rule.json + pxf validate --hpo hp.json \ + -i ${examples}/validate/phenotype-validation/marfan.annotation-propagation-rule.json \ + -i ${examples}/validate/phenotype-validation/marfan.valid.json + .. csv-table:: :header: "Validation error", "Solution" @@ -283,11 +307,6 @@ The `HpoAncestryValidator` checks that the annotation propagation rule is not vi "Phenotypic features of id-C must not contain both an observed term (Aortic root aneurysm, HP:0002616) and an observed ancestor (Aortic aneurysm, HP:0004942)", Remove the ancestor term -.. note:: - Presence of excluded descendant and observed ancestor does not violate the annotation propagation rule. - A phenopacket with excluded *Aortic root aneurysm* and present *Aortic aneurysm* is valid, - see ``marfan.valid.json``. - Annotation of organ systems ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -310,7 +329,7 @@ in 3 phenopackets of toy `Marfan syndrome Date: Thu, 17 Nov 2022 17:08:55 -0500 Subject: [PATCH 12/38] Describe autocompletion, check `convert` command. Signed-off-by: Daniel Danis --- docs/cli.rst | 125 +++++++++++------- docs/tutorial.rst | 2 + .../phenopackettools/cli/Main.java | 8 +- 3 files changed, 86 insertions(+), 49 deletions(-) diff --git a/docs/cli.rst b/docs/cli.rst index 12c6bbe4..d9ef9215 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -4,30 +4,33 @@ Command-line interface ====================== -*Phenopacket-tools* command-line interface (CLI) provides functionality for viewing, conversion and validation -of the top-level elements of Phenopacket schema. This document describes how to set up the CLI application +*Phenopacket-tools* command-line interface (CLI) provides functionality for conversion and validation +of the top-level elements of Phenopacket Schema. Here we describe how to set up the CLI application on Linux, Mac and Windows environments. .. note:: - *Phenopacket-tools* is written in Java 17 and requires Java 17 or newer to run. + *phenopacket-tools* is written in Java 17 and requires Java 17 or newer to run. + +We distribute *phenopacket-tools* in a ZIP archive. The application requires no special installation procedure +if Java 17 or better is available in your environment. -*Phenopacket-tools* is distributed as a standalone executable Java Archive (JAR) file. The application requires -no special installation procedure if Java 17 or better is available in your environment. Setup -~~~~~ +===== -Most users should *download* the distribution ZIP file with precompiled JAR file from *Phenopacket-tools* release page. +Most users should *download* the distribution ZIP file with precompiled JAR file from *phenopacket-tools* release page. However, it is also possible to *build* the JAR from sources. + Download ^^^^^^^^ -*Phenopacket-tools* JAR is provided in the distribution ZIP file as part of *Phenopacket-tools*' release schedule +*phenopacket-tools* JAR is provided in the distribution ZIP file as part of *phenopacket-tools*' release schedule from `Releases `_. -The ZIP archive contains the executable JAR file along with README and example phenopackets required to run the setup -and the tutorial. +The ZIP archive contains the executable JAR file along with a `README` file and example phenopackets required +to run the setup and the tutorial. + Build from source code ^^^^^^^^^^^^^^^^^^^^^^ @@ -36,26 +39,68 @@ The source code is available in our `GitHub repository `_ for more details regarding - setting up JDK and ``$JAVA_HOME`` on your system. -* *Phenopacket-tools* leverages several open-source Java libraries and a **working internet connection** + to JDK's location. See `Setting JAVA_HOME `_ + for more details regarding setting up ``$JAVA_HOME`` on Windows, Mac, and Linux. +* *phenopacket-tools* uses several open-source Java libraries and a **working internet connection** is required to download the libraries. -Run the following commands to check out the stable source code and to build the application:: +Run the following commands to check out the source code and to build the application: + +.. parsed-literal:: + git clone https://github.com/phenopackets/phenopacket-tools + cd phenopacket-tools + git checkout tags/|release| + ./mvnw -Prelease package + +If the build completes, a ZIP archive "phenopacket-tools-cli-|release|-distribution.zip" +is created in the ``phenopacket-tools-cli/target`` directory. Use the archive in the same way as the archive +downloaded from *phenopacket-tools* releases. + +Set up alias and autocompletion +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In this *optional* step, we set up an alias and autocompletion for *phenopacket-tools* command-line application. +The autocompletion works thanks to the awesome `Picocli `_ library and it works +on Bash or ZSH Unix shells. + +Let's set up the alias first. To reiterate the tutorial :ref:`rstsetupaliastutorial` section, +Java command line applications are invoked as ``java -jar executable.jar``. However, such incantation is +a bit too verbose and we can shorten it a bit by defining an alias. + +Assuming the distribution ZIP was unpacked into phenopacket-tools-cli-|release| directory, let's run the following +to set up the alias: + +.. parsed-literal:: + alias pxf="java -jar $(pwd)/phenopacket-tools-cli-\ |release|\ /phenopacket-tools-cli-|release|.jar" + pxf --help - $ git clone https://github.com/phenopackets/phenopacket-tools - $ cd phenopacket-tools - $ ./mvnw -Prelease package +Now the autocompletion. The autocompletion can simplify using the CLI options by completing the command +or option after pressing the `TAB` key. +To enable the autocompletion, make sure the alias for `pxf` is set up correctly and run: -After a successful build, a distribution ZIP file "phenopacket-tools-cli-|release|-distribution.zip" -will be created in the ``phenopacket-tools-cli/target`` directory. Use the ZIP archive in the same way as the archive -downloaded from *Phenopacket-tools* releases. +.. parsed-literal:: + source <(pxf generate-completion) + +The ``pxf generate-completion`` command generates the autocompletion script and ``source`` uses it to set up +the completion. However, the autocompletion will last only for the duration of the current shell session. + +To make the autocompletion permanent, store the script file and add the alias and and sourcing into your `.bashrc` +or `.bash_profile` file: + +.. parsed-literal:: + echo "### Install phenopacket-tools" >> .bashrc + echo alias pxf="java -jar $(pwd)/phenopacket-tools-cli-\ |release|\ /phenopacket-tools-cli-|release|.jar" >> .bashrc + pxf generate-completion > pxf-completion.sh + echo source $(pwd)/pxf-completion.sh >> .bashrc + +.. warning:: + The autocompletion only works if the alias is set to `pxf`. Other alias values will *not* work. Commands -~~~~~~~~ +======== -*Phenopacket-tools* CLI provides the following commands: +The command-line interface provides the following commands: * ``examples`` - generate examples of the top-level elements * ``convert`` - convert top-level elements from *v1* to *v2* format @@ -66,13 +111,9 @@ into the provided directory. The ``convert`` and ``validate`` commands, despite a similar manner. The parts shared by the both command are be described in greater detail in the ``convert`` command section. -In the next sections, we will run *Phenopacket-tools* by using the following alias:: - - $ alias pxf="java -jar phenopacket-tools-cli-${project.version}.jar" - .. note:: - The commands report warnings and errors by default. Use `-v` to increase the verbosity and see what's - going on under the hood. The `-v` can be specified multiple times (e.g. `-vvv`). + The commands only report warnings and errors by default. Use `-v` to increase the verbosity and see what's + going on under the hood. The `-v` option can be specified multiple times (e.g. `-vvv`). *examples* - generate examples of the top-level elements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -90,7 +131,7 @@ into the current directory. The following command writes the examples into the ``path/to/examples`` directory:: - $ pxf examples -o path/to/examples + pxf examples -o path/to/examples *convert* - convert top-level elements from *v1* to *v2* format @@ -106,7 +147,7 @@ and cohort ``cohort.v1.json``. We can convert a *v1* phenopacket into *v2* by running:: - $ cat phenopacket.v1.json | pxf convert > phenopacket.v2.json + cat phenopacket.v1.json | pxf convert > phenopacket.v2.json @@ -114,11 +155,11 @@ We can convert a *v1* phenopacket into *v2* by running:: The guessing is, however, naive and can fail in parsing e.g. gzipped *JSON* file. Turn of the format guessing by providing the ``-f | --format`` option:: - $ # Explicit JSON input - $ cat phenopacket.v1.json | pxf convert -f json > phenopacket.v2.json - $ - $ # Explicit protobuf input - $ cat phenopacket.v1.pb | pxf convert -f protobuf > phenopacket.v2.pb + # Explicit JSON input + cat phenopacket.v1.json | pxf convert -f json > phenopacket.v2.json + + # Explicit protobuf input + cat phenopacket.v1.pb | pxf convert -f protobuf > phenopacket.v2.pb The ``-f | --format`` option accepts one of the following 3 values: ``{json, pb, yaml}``. @@ -127,7 +168,7 @@ The ``-f | --format`` option accepts one of the following 3 values: ``{json, pb, By default, the output is written in the format of the input data. However, we can override this by using ``--output-format`` option:: - $ cat phenopacket.v1.json | pxf convert --output-format pb > phenopacket.v2.pb + cat phenopacket.v1.json | pxf convert --output-format pb > phenopacket.v2.pb The ``--output-format`` option takes the same values as ``--format``: ``{json, pb, yaml}``. @@ -136,8 +177,8 @@ The ``convert`` command expects to receive a phenopacket by default. However, it top-level elements of the Phenopacket schema: family and cohort. Use the ``-e | --element`` option to indicate if the input is a ``family`` or a ``cohort``:: - $ cat family.v1.json | pxf convert -e family > family.v2.json - $ cat cohort.v1.json | pxf convert -e cohort > cohort.v2.json + cat family.v1.json | pxf convert -e family > family.v2.json + cat cohort.v1.json | pxf convert -e cohort > cohort.v2.json We can convert one or more item at the time by using the ``-i | --input`` option. If the ``-i`` option is used only once, the STDIN is ignored and the conversion proceeds in the same way as in the examples above. However, ``-i`` option can @@ -147,7 +188,7 @@ are written into a directory supplied via the ``-O | --output-directory`` option For instance:: - $ pxf convert -i phenopacket.a.v1.json -i phenopacket.b.v1.json -O converted + pxf convert -i phenopacket.a.v1.json -i phenopacket.b.v1.json -O converted converts the input phenopackets and stores the results in the ``converted`` folder. The converted files will be stored under the same names. @@ -178,9 +219,3 @@ A row with column names follows the header, and then the individual validation r .. TODO - check the validation description. -Set up autocompletion -~~~~~~~~~~~~~~~~~~~~~ - -.. TODO - write the section - -TODO - write diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 0ce9dfc8..0a868adf 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -47,6 +47,8 @@ Download and unpack the ZIP file of the latest release |release| from the releas curl -o phenopacket-tools-cli-|release|-distribution.zip ${URL} unzip phenopacket-tools-cli-|release|-distribution.zip +.. _rstsetupaliastutorial: + Set up alias ^^^^^^^^^^^^ diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/Main.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/Main.java index 6041658a..ae40c6f7 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/Main.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/Main.java @@ -10,7 +10,7 @@ import static picocli.CommandLine.Help.Ansi.Style.*; -@CommandLine.Command(name = "phenopacket-tools", +@CommandLine.Command(name = "pxf", header = Main.HEADER, version = Main.VERSION, mixinStandardHelpOptions = true, @@ -25,8 +25,8 @@ footer = Main.FOOTER) public class Main { - public static final String HEADER = "phenopacket-tools\nAn application for creating, converting and validating GA4GH phenopackets.\n"; - public static final String VERSION = "phenopacket-tools v0.4.7"; + public static final String HEADER = "phenopacket-tools\nAn application and library for building, conversion, and validation of GA4GH Phenopackets.\n"; + public static final String VERSION = "phenopacket-tools v0.4.8-SNAPSHOT"; // Maximum number of characters in line of the usage message. public static final int USAGE_WIDTH = 120; @@ -38,7 +38,7 @@ public class Main { .optionParams(italic) .build(); - public static final String FOOTER = "\nSee the full documentation at https://phenopacket-tools.readthedocs.io\n"; + public static final String FOOTER = "\nSee the full documentation at http://phenopackets.org/phenopacket-tools\n"; private Main() { // private no-op From 8cedcbae79783f148969feaca1dc2377e16c865c Mon Sep 17 00:00:00 2001 From: pnrobinson Date: Tue, 22 Nov 2022 14:42:02 -0500 Subject: [PATCH 13/38] updating index --- docs/index.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index df719147..164a09ce 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -16,7 +16,8 @@ genetic information, diagnoses, and treatments. The Phenopacket schema supports Phenopackets are designed to be both human and machine-interpretable, enabling computing operations and validation on the basis of defined relationships between diagnoses, lab measurements, and genotypic information. -The *phenopacket-tools* library was written as a modular Java 17 library and has three main goals. +The *phenopacket-tools* library was written as a modular Java 17 library. It is available +at https://github.com/phenopackets/phenopacket-tools. It has three main goals: - To simplify *creating* phenopackets with Java code using streamlined builders and predefined building blocks such as units, anatomical organs, and clinical modifiers. @@ -44,6 +45,7 @@ in a JVM-based application. validation converting constants + examples .. figure:: https://onlinelibrary.wiley.com/cms/asset/1cc0a141-da65-45a3-b7b0-6316b7b02069/ggn2202200016-fig-0002-m.jpg From 3adb7a5ff494ba1cda8e8d99ba04d22381275524 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 22 Nov 2022 15:07:49 -0500 Subject: [PATCH 14/38] Update the docs index page. Signed-off-by: Daniel Danis --- docs/index.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 164a09ce..9e6e8bc3 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -16,8 +16,8 @@ genetic information, diagnoses, and treatments. The Phenopacket schema supports Phenopackets are designed to be both human and machine-interpretable, enabling computing operations and validation on the basis of defined relationships between diagnoses, lab measurements, and genotypic information. -The *phenopacket-tools* library was written as a modular Java 17 library. It is available -at https://github.com/phenopackets/phenopacket-tools. It has three main goals: +*Phenopacket-tools* is an open-source Java library and command-line interface (CLI) application for working +with GA4GH phenopackets. The library has three main goals: - To simplify *creating* phenopackets with Java code using streamlined builders and predefined building blocks such as units, anatomical organs, and clinical modifiers. @@ -26,14 +26,14 @@ at https://github.com/phenopackets/phenopacket-tools. It has three main goals: and to enable developers to extend the validation framework to encode the specific requirements of consortia or projects using either JSON schema or programmatic tools. -On top of the library, we provide a standalone command-line interface (CLI) application for -conversion and validation. +On top of the library, we provide a standalone CLI application for +conversion and validation. The source code is available at https://github.com/phenopackets/phenopacket-tools. The following sections describe phenopacket-tools library and CLI application. -We start with :ref:`rsttutorial` to provide a quick overview of the CLI application functionality. -We follow with the comprehensive :ref:`rstcli` description. +We start with :ref:`rsttutorial` to provide a quick overview of the CLI functionality. +We follow with comprehensive description of the :ref:`rstcli`. The rest of the documentation offers an in-depth user guide for using the library functionality -in a JVM-based application. +in JVM-based applications. .. toctree:: :maxdepth: 1 From 27d86488f0a7b278dc9944b5aeb02b60a40208ec Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 22 Nov 2022 15:14:50 -0500 Subject: [PATCH 15/38] Update the docs index page. Signed-off-by: Daniel Danis --- docs/index.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index 9e6e8bc3..eae02479 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -27,7 +27,8 @@ with GA4GH phenopackets. The library has three main goals: or projects using either JSON schema or programmatic tools. On top of the library, we provide a standalone CLI application for -conversion and validation. The source code is available at https://github.com/phenopackets/phenopacket-tools. +conversion and validation. The source code is available +from our `GitHub repository `_. The following sections describe phenopacket-tools library and CLI application. We start with :ref:`rsttutorial` to provide a quick overview of the CLI functionality. From d1a37dbaa27633cf07e0f45e48414f93f0900290 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 22 Nov 2022 17:22:03 -0500 Subject: [PATCH 16/38] Use positional parameters to pass inputs. Finalize tutorial and CLI user guide. Signed-off-by: Daniel Danis --- .github/workflows/pages.yml | 2 +- docs/cli.rst | 142 ++++++++++++++---- docs/tutorial.rst | 26 +++- .../phenopackettools/cli/Main.java | 10 +- .../cli/command/BaseIOCommand.java | 21 ++- .../cli/command/ConvertCommand.java | 6 +- .../cli/command/ExamplesCommand.java | 12 +- .../cli/command/ValidateCommand.java | 2 +- 8 files changed, 159 insertions(+), 62 deletions(-) diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index 8cdba802..f23148cb 100644 --- a/.github/workflows/pages.yml +++ b/.github/workflows/pages.yml @@ -55,7 +55,7 @@ jobs: pxfcli="phenopacket-tools-cli" find ${pxfcli}/target \ -regex "^${pxfcli}/target/${pxfcli}-[0-9]\.[0-9]\.[0-9]\(-SNAPSHOT\)?\.jar$" \ - -exec java -jar {} examples --output gh-pages/examples \; + -exec java -jar {} examples --output-directory gh-pages/examples \; ## Build the docs # Generate the HTML pages and move the generated content into the target folder. diff --git a/docs/cli.rst b/docs/cli.rst index d9ef9215..d51f23d7 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -106,17 +106,32 @@ The command-line interface provides the following commands: * ``convert`` - convert top-level elements from *v1* to *v2* format * ``validate`` - validate semantic and syntactic correctness of top-level Phenopacket schema elements -The ``examples`` command is fairly simple; it writes a bunch of example phenopackets, cohorts and families -into the provided directory. The ``convert`` and ``validate`` commands, despite being a bit more elaborate, work in -a similar manner. The parts shared by the both command are be described in greater detail -in the ``convert`` command section. +Before we dive into the commands, let's discuss some common concepts shared by all CLI commands. + +Common concepts +^^^^^^^^^^^^^^^ + +We designed the CLI with aim to make it as easy to use as possible. As a result, the *phenopacket-tools* commands +use several common design principles: + +* The input data can be provided either via the standard input *OR* as a list of positional parameters. +* The input *data format* is provided using ``-f | --format`` option. + *phenopacket-tools* supports phenopackets in `JSON`, `YAML`, or `protobuf` formats. + In absence of the explicit data format, *phenopacket-tools* makes an educated guess. +* The output is written in the input data format. +* The top-level *element type* of the data input is indicated by the ``-e | --element`` option. + According to the Phenopacket Schema, the commands supports `phenopacket`, `family`, or `cohort` elements. +* The output is written into the standard output stream. Progress, warnings, and errors are reported + into standard error. +* The CLI operates in a silent mode by default; only warnings and errors are reported. + Use ``-v`` to increase the verbosity; the ``-v`` option can be specified multiple times (e.g. ``-vvv``). -.. note:: - The commands only report warnings and errors by default. Use `-v` to increase the verbosity and see what's - going on under the hood. The `-v` option can be specified multiple times (e.g. `-vvv`). -*examples* - generate examples of the top-level elements -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +We discuss the common concepts further at the relevant places of the next sections. + + +``examples`` - generate phenopacket examples +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The ``examples`` command writes example phenopackets (including family and cohort examples) into a provided base directory. Starting from a `base` directory, the examples are written into three sub-folders:: @@ -134,10 +149,10 @@ The following command writes the examples into the ``path/to/examples`` director pxf examples -o path/to/examples -*convert* - convert top-level elements from *v1* to *v2* format +``convert`` - convert top-level elements from *v1* to *v2* format ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The ``convert`` command converts a phenopacket, family, or a cohort from *v1* to *v2* format of Phenopacket schema. +The ``convert`` command converts a phenopacket, family, or a cohort from *v1* to *v2* format of Phenopacket Schema. Usage ##### @@ -151,9 +166,9 @@ We can convert a *v1* phenopacket into *v2* by running:: -*Phenopacket-tools* makes an educated guess to determine if the input is in *JSON*, *Protobuf*, or *YAML* format. -The guessing is, however, naive and can fail in parsing e.g. gzipped *JSON* file. Turn of the format guessing -by providing the ``-f | --format`` option:: +*Phenopacket-tools* makes an educated guess to determine if the input is in `JSON`, `protobuf`, or `YAML` format. +The current format guessing implementation is, however, naïve and can fail in parsing e.g. gzipped `JSON` file. +Turn the format guessing off by providing the ``-f | --format`` option:: # Explicit JSON input cat phenopacket.v1.json | pxf convert -f json > phenopacket.v2.json @@ -180,42 +195,103 @@ the input is a ``family`` or a ``cohort``:: cat family.v1.json | pxf convert -e family > family.v2.json cat cohort.v1.json | pxf convert -e cohort > cohort.v2.json -We can convert one or more item at the time by using the ``-i | --input`` option. If the ``-i`` option is used only once, -the STDIN is ignored and the conversion proceeds in the same way as in the examples above. However, ``-i`` option can -be provided more than once, to convert a collection of items in a single run. The results of the bulk processing -are written into a directory supplied via the ``-O | --output-directory`` option (the option is mandatory if using ->1 ``-i``). +We can convert one or more item at the time by passing the paths to the input files as a positional parameters. +In case one parameter is provided, the STDIN is ignored and the conversion proceeds in the same way as in the examples +above. The command can accept two or more files as positional parameters for bulk conversion. To perform +the bulk conversion, the ``-O | --output-directory`` option must be provided to set the location of the directory +for writing the converted phenopackets. For instance:: - pxf convert -i phenopacket.a.v1.json -i phenopacket.b.v1.json -O converted + pxf convert -O converted phenopacket.a.v1.json phenopacket.b.v1.json converts the input phenopackets and stores the results in the ``converted`` folder. The converted files will be stored under the same names. -*validate* - validate semantic and syntactic correctness -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +``validate`` - validate Phenopacket Schema elements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The ``validate`` command checks *syntactic* and *semantic* correctness of a *phenopacket*, *family*, or *cohort*. +The ``validate`` command checks *phenopacket*, *family*, or *cohort* for the *base* requirements imposed by +the Phenopacket Schema as well as additional user-defined constraints. -Briefly, to be syntactically correct, a phenopacket must be well formatted (valid Protobuf message, JSON document, etc.) -and meet the requirements of the Phenopacket schema; all REQUIRED attributes are set (e.g. ``phenopacket.id`` and +Briefly, to meet the base requirements, the phenopacket must be well formatted (valid Protobuf message, JSON document, etc.) +and meet the requirements of the Phenopacket schema; all REQUIRED attributes are set (e.g. ``phenopacket.id`` and ``phenopacket.meta_data``), and ``MetaData`` includes a ``Resource`` for all ontology concepts. -The *semantic* correctness ensures that the element, when taken as a whole, is ... TODO - finish +The validation can include a number of additional steps, as required by a project or a consortium. +*Phenopacket-tools* offers several off-the-shelf validators and the CLI uses the validators in the validation workflow +if the required resources are present. Usage ##### -The ``validate`` command shares many CLI options with ``convert``. +The ``validate`` command can validate one or more phenopacket files provided either via standard input or +as positional parameters. Results are written into the standard output in CSV format including an optional header +containing the validation metadata. The header lines start with ``#`` and contain *phenopacket-tools* version, +date and time of validation, and the list of validators that were run. +The header is followed by a row with column names, and the individual validation results. + +Base validation example +~~~~~~~~~~~~~~~~~~~~~~~ + +Let's demonstrate the base validation usage using a few examples. Phenopacket can be validated on a stream:: + + cat phenopacket.json | pxf validate + +or as a positional parameter:: + + pxf validate phenopacket.json + +Use ``-H | --include-header`` to include the validation metadata in the output and store the results in a file:: + + pxf validate -H phenopacket.json > phenopacket.validation.csv + -The same options are used to indicate the input formats and element types. The input can be provided through STDIN -as well as in bulk. The bulk processing makes sense especially if we e.g. load the HPO graph for each validation. +Custom validation example +~~~~~~~~~~~~~~~~~~~~~~~~~ -Results are written into STDOUT in CSV/TSV format. The CSV output has a header, each header line starts with ``#`` character. -The header contains phenopacket-tools version, date time of validation, and list of validators that were run. -A row with column names follows the header, and then the individual validation results. +On top of the base validation, *phenopacket-tools* supports validation using a custom requirements. +See the :ref:`rstcustomvalidation` section to learn how to define a custom JSON schema. -.. TODO - check the validation description. +The CLI can be provided with one or more JSON schema documents using the ``--require`` option:: + + pxf validate --require custom-schema.json phenopacket.json + +Phenotype validation +~~~~~~~~~~~~~~~~~~~~ + +*Phenopacket-tools* includes off-the-shelf validators for pointing out annotation errors in phenopackets that use +Human Phenotype Ontology (HPO) to represent clinical findings of the subjects. +The validators check presence of obsolete or unknown ontology concepts and violations +of the annotation propagation rule based on a HPO file. + +The CLI will automatically add the phenotype validation steps into the validation workflow if path to a HPO JSON file +is provided via the ``--hpo`` option:: + + pxf validate --hpo hp.json phenopacket.json + +.. note:: + The bulk validation where phenopackets are provided as positional parameters is much faster + since the HPO graph parsing, a computationally expensive operation, is done only once. + +Organ system validation +~~~~~~~~~~~~~~~~~~~~~~~ + +It can be desirable to check annotation of specific organ systems in the phenopacket. *Phenopacket-tools* can validate +annotation of specific organ systems by using the corresponding top-level HPO concepts, such as +`Eye `_, +`Cardiovascular `_, or +`Respiratory `_ organ systems. + +The organ systems are provided using ``-s | --organ-system`` option:: + + pxf validate --hpo hp.json \ + -s HP:0000478 \ + -s HP:0001626 \ + -s HP:0002086 \ + phenopacket.json + +.. note:: + The organ system validation requires HPO file to run. diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 0a868adf..20e1823f 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -8,6 +8,7 @@ This tutorial walks through the installation of *phenopacket-tools* command-line and provides an overview of the *conversion* of phenopackets from `v1` to the current `v2` format and *validation* functionality, including custom validation rules. + Setup ===== @@ -15,8 +16,6 @@ Setup We distribute the CLI application as a ZIP archive with an executable Java Archive (JAR) file and several examples for running this tutorial. -As a prerequisite, Java 17 or newer is installed on the machine, -download the distribution ZIP and set up an alias as a shortcut for running the *phenopacket-tools*. Prerequisites ^^^^^^^^^^^^^ @@ -32,11 +31,12 @@ The command should print a similar output:: OpenJDK Runtime Environment (build 17+35-2724) OpenJDK 64-Bit Server VM (build 17+35-2724, mixed mode, sharing) + Download *phenopacket-tools* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -A prebuilt distribution ZIP file is available for download from -`phenopacket-tools release section `_ +A prebuilt distribution ZIP file is available for download from the +`release section `_ of the GitHub repository. Download and unpack the ZIP file of the latest release |release| from the release assets: @@ -49,6 +49,7 @@ Download and unpack the ZIP file of the latest release |release| from the releas .. _rstsetupaliastutorial: + Set up alias ^^^^^^^^^^^^ @@ -67,6 +68,21 @@ phenopacket-tools-cli-|release| directory, run the following to set up the alias you like more. +Set up autocompletion +^^^^^^^^^^^^^^^^^^^^^ + +As a quick way to increase the user convenience, *phenopacket-tools* offers autocompletion for completing the command +or options after pressing the `TAB` key on Bash or ZSH Unix shells. + +Run the following to enable the autocompletion for the tutorial session: + +.. parsed-literal:: + source <(pxf generate-completion) + +.. note:: + See the :ref:`rstcli` for setting up the autocompletion to last beyond the current shell session. + + Convert ======= @@ -144,7 +160,7 @@ Validate ======== The `validate` command of *phenopacket-tools* validates correctness of phenopackets, families and cohorts. -This section outlines usage opf the *off-the-shelf* validators available in the CLI application. +This section outlines usage of the off-the-shelf validators available in the CLI application. In this tutorial section, we will work with a suite of phenopackets that are bundled in the distribution ZIP archive. The phenopackets are located in `examples/validate` folder next to the executable JAR file: diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/Main.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/Main.java index ae40c6f7..4de96603 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/Main.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/Main.java @@ -11,8 +11,11 @@ import static picocli.CommandLine.Help.Ansi.Style.*; @CommandLine.Command(name = "pxf", - header = Main.HEADER, - version = Main.VERSION, + header = { + "phenopacket-tools (pxf)", + "An application and library for building, conversion, and validation of GA4GH Phenopackets.\n" + }, + version = "v0.4.8-SNAPSHOT", mixinStandardHelpOptions = true, subcommands = { // see https://picocli.info/autocomplete.html @@ -25,9 +28,6 @@ footer = Main.FOOTER) public class Main { - public static final String HEADER = "phenopacket-tools\nAn application and library for building, conversion, and validation of GA4GH Phenopackets.\n"; - public static final String VERSION = "phenopacket-tools v0.4.8-SNAPSHOT"; - // Maximum number of characters in line of the usage message. public static final int USAGE_WIDTH = 120; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/BaseIOCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/BaseIOCommand.java index 49b2c4eb..35714a9e 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/BaseIOCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/BaseIOCommand.java @@ -33,11 +33,6 @@ public abstract class BaseIOCommand extends BaseCommand { public InputSection inputSection = new InputSection(); public static class InputSection { - @CommandLine.Option(names = {"-i", "--input"}, - arity = "0..*", - description = "Input phenopacket(s).%nLeave empty for STDIN") - public List inputs = null; - // The format will be sniffed if it is not provided. @CommandLine.Option(names = {"-f", "--format"}, description = {"Phenopacket format.", @@ -51,6 +46,16 @@ public static class InputSection { public PhenopacketElement element = null; } + + @CommandLine.Parameters( + paramLabel = "phenopacket file(s)", + description = { + "Input phenopacket(s).", + "Leave empty for STDIN" + } + ) + public List inputs = null; + protected BaseIOCommand() { parserFactory = PhenopacketParserFactory.getInstance(); } @@ -65,7 +70,7 @@ protected BaseIOCommand() { */ protected List readMessagesOrExit(PhenopacketSchemaVersion schemaVersion) { PhenopacketParser parser = parserFactory.forFormat(schemaVersion); - if (inputSection.inputs == null) { + if (inputs == null) { // The user did not set `-i | --input` option, assuming a single input is coming from STDIN. InputStream is = System.in; try { @@ -82,10 +87,10 @@ protected List readMessagesOrExit(PhenopacketSchemaVersion schem // Assuming a one or more input are provided via `-i | --input`. // // Picocli should ensure that `input` is never an empty list. `input` is `null` if no `-i` was supplied. - assert !inputSection.inputs.isEmpty(); + assert !inputs.isEmpty(); List messages = new ArrayList<>(); - for (Path input : inputSection.inputs) { + for (Path input : inputs) { try (InputStream is = new BufferedInputStream(Files.newInputStream(input))) { setFormatAndElement(is, schemaVersion); Message message = parser.parse(inputSection.format, inputSection.element, is); diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ConvertCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ConvertCommand.java index 2681098d..a4b578ec 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ConvertCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ConvertCommand.java @@ -87,14 +87,14 @@ protected Integer execute() { * Return {@code true} if CLI argument combination makes sense or {@code false} if the app should abort. */ private boolean checkInputArgumentsAreOk() { - if (inputSection.inputs == null) { + if (inputs == null) { if (convertSection.outputDirectory != null) LOGGER.warn("Output directory was provided but the input is coming from STDIN. The output will be written to STDOUT"); } else { - if (inputSection.inputs.isEmpty()) { + if (inputs.isEmpty()) { throw new RuntimeException("Input list should never be empty!"); // A bug guard. } else { - if (inputSection.inputs.size() > 1) { + if (inputs.size() > 1) { if (convertSection.outputDirectory == null) { LOGGER.error("Output directory (-O | --output-directory) must be provided when processing >1 inputs"); return false; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ExamplesCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ExamplesCommand.java index 2685e4d9..8203a989 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ExamplesCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ExamplesCommand.java @@ -21,9 +21,9 @@ description = "Write example phenopackets to a directory.") public class ExamplesCommand extends BaseCommand { - @CommandLine.Option(names = {"-o", "--output"}, - description = "Output directory (default: ${DEFAULT-VALUE})") - public Path output = Path.of("."); + @CommandLine.Option(names = {"-O", "--output-directory"}, + description = "Path to output directory") + public Path outputDirectory = Path.of("."); private final PhenopacketPrinter jsonPrinter; private final PhenopacketPrinter yamlPrinter; @@ -37,9 +37,9 @@ public ExamplesCommand() { @Override protected Integer execute() { try { - Path phenopacketDir = createADirectoryIfDoesNotExist(output.resolve("phenopackets")); - Path familyDir = createADirectoryIfDoesNotExist(output.resolve("families")); - Path cohortDir = createADirectoryIfDoesNotExist(output.resolve("cohorts")); + Path phenopacketDir = createADirectoryIfDoesNotExist(outputDirectory.resolve("phenopackets")); + Path familyDir = createADirectoryIfDoesNotExist(outputDirectory.resolve("families")); + Path cohortDir = createADirectoryIfDoesNotExist(outputDirectory.resolve("cohorts")); // Phenopackets printJsonAndYaml(new AtaxiaWithVitaminEdeficiency().getPhenopacket(), phenopacketDir, "AVED"); diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java index 394bbed2..a13def62 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java @@ -34,7 +34,7 @@ import java.util.stream.Collectors; @Command(name = "validate", - description = "Validate top-level elements of the Phenopacket schema.", + description = "Validate top-level elements of the Phenopacket Schema.", sortOptions = false, mixinStandardHelpOptions = true) public class ValidateCommand extends BaseIOCommand { From 21500d869fe96dfb0ae22eff3977efd52e1b213f Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 23 Nov 2022 11:05:27 -0500 Subject: [PATCH 17/38] Add ECO, OMIM, and NCT resources. Signed-off-by: Daniel Danis --- .../builder/builders/Resources.java | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java index c527b41c..0e02a7cb 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java @@ -35,6 +35,13 @@ private Resources() { .setUrl("http://www.ebi.ac.uk/efo/efo.owl") .setIriPrefix("http://purl.obolibrary.org/obo/EFO_"); + private static final Resource.Builder ECO_BUILDER = Resource.newBuilder() + .setId("eco") + .setName("Evidence & Conclusion Ontology (ECO)") + .setNamespacePrefix("ECO") + .setUrl("http://purl.obolibrary.org/obo/eco.owl") + .setIriPrefix("http://purl.obolibrary.org/obo/ECO_"); + private static final Resource.Builder CL_BUILDER = Resource.newBuilder() .setId("cl") .setName("Cell Ontology") @@ -115,6 +122,20 @@ private Resources() { .setUrl("https://drugcentral.org/") .setIriPrefix("https://drugcentral.org/drugcard/"); + private static final Resource.Builder OMIM_BUILDER = Resource.newBuilder() + .setId("omim") + .setName("An Online Catalog of Human Genes and Genetic Disorders") + .setNamespacePrefix("OMIM") + .setUrl("https://www.omim.org") + .setIriPrefix("https://www.omim.org/entry/"); + + private static final Resource.Builder NCT_BUILDER = Resource.newBuilder() + .setId("nct") + .setName("ClinicalTrials.gov") + .setNamespacePrefix("NCT") + .setUrl("https://clinicaltrials.gov") + .setIriPrefix("https://clinicaltrials.gov/show/NCT"); + public static Resource hgncVersion(String version) { return HGNC_BUILDER.setVersion(version).build(); } public static Resource hpoVersion(String version) { @@ -133,6 +154,10 @@ public static Resource efoVersion(String version) { return EFO_BUILDER.setVersion(version).build(); } + public static Resource ecoVersion(String version) { + return ECO_BUILDER.setVersion(version).build(); + } + public static Resource clVersion(String version) { return CL_BUILDER.setVersion(version).build(); } @@ -176,4 +201,12 @@ public static Resource loincVersion(String version) { public static Resource drugCentralVersion(String version) { return DRUG_CENTRAL_BUILDER.setVersion(version).build(); } + + public static Resource omimVersion(String version) { + return OMIM_BUILDER.setVersion(version).build(); + } + + public static Resource nctVersion(String version) { + return NCT_BUILDER.setVersion(version).build(); + } } From 71e9c0c5bc1401f8861568e44b7c0e576ab9479f Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 23 Nov 2022 11:05:48 -0500 Subject: [PATCH 18/38] Do not validate metadata if missing or empty. Signed-off-by: Daniel Danis --- .../metadata/CohortMetaDataValidator.java | 13 ----- .../metadata/FamilyMetaDataValidator.java | 13 ----- ...aValidator.java => MetaDataValidator.java} | 50 +++++++++++++++++-- .../core/metadata/MetaDataValidators.java | 6 +-- .../PhenopacketMetaDataValidator.java | 13 ----- 5 files changed, 49 insertions(+), 46 deletions(-) delete mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/CohortMetaDataValidator.java delete mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/FamilyMetaDataValidator.java rename phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/{BaseMetaDataValidator.java => MetaDataValidator.java} (67%) delete mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/PhenopacketMetaDataValidator.java diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/CohortMetaDataValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/CohortMetaDataValidator.java deleted file mode 100644 index aca9ff6d..00000000 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/CohortMetaDataValidator.java +++ /dev/null @@ -1,13 +0,0 @@ -package org.phenopackets.phenopackettools.validator.core.metadata; - -import org.phenopackets.schema.v2.CohortOrBuilder; -import org.phenopackets.schema.v2.core.MetaData; - -class CohortMetaDataValidator extends BaseMetaDataValidator { - - @Override - protected MetaData getMetadata(CohortOrBuilder message) { - return message.getMetaData(); - } - -} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/FamilyMetaDataValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/FamilyMetaDataValidator.java deleted file mode 100644 index 3341fae4..00000000 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/FamilyMetaDataValidator.java +++ /dev/null @@ -1,13 +0,0 @@ -package org.phenopackets.phenopackettools.validator.core.metadata; - -import org.phenopackets.schema.v2.FamilyOrBuilder; -import org.phenopackets.schema.v2.core.MetaData; - -class FamilyMetaDataValidator extends BaseMetaDataValidator { - - @Override - protected MetaData getMetadata(FamilyOrBuilder message) { - return message.getMetaData(); - } - -} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/BaseMetaDataValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/MetaDataValidator.java similarity index 67% rename from phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/BaseMetaDataValidator.java rename to phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/MetaDataValidator.java index 8a053cec..851e9673 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/BaseMetaDataValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/MetaDataValidator.java @@ -4,17 +4,21 @@ import org.phenopackets.phenopackettools.validator.core.PhenopacketValidator; import org.phenopackets.phenopackettools.validator.core.ValidationResult; import org.phenopackets.phenopackettools.validator.core.ValidatorInfo; +import org.phenopackets.schema.v2.CohortOrBuilder; +import org.phenopackets.schema.v2.FamilyOrBuilder; +import org.phenopackets.schema.v2.PhenopacketOrBuilder; import org.phenopackets.schema.v2.core.MetaData; import org.phenopackets.schema.v2.core.OntologyClass; import org.phenopackets.schema.v2.core.Resource; import java.util.Collection; import java.util.List; +import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; -abstract class BaseMetaDataValidator implements PhenopacketValidator { +abstract class MetaDataValidator implements PhenopacketValidator { private static final ValidatorInfo VALIDATOR_INFO = ValidatorInfo.of( "MetaDataValidator", @@ -29,8 +33,12 @@ public ValidatorInfo validatorInfo() { @Override public List validate(T component) { // Validate that these fields use ontology prefixes that are represented in the MetaData section. - MetaData metaData = getMetadata(component); - Set validOntologyPrefixes = getOntologyNamespacePrefixes(metaData); + Optional metaData = getMetadata(component); + if (metaData.isEmpty()) + // No need to run MetaData validation if there is no metadata! + return List.of(); + + Set validOntologyPrefixes = getOntologyNamespacePrefixes(metaData.get()); return streamOfAllInstancesOfType(component, OntologyClass.class).sequential() .flatMap(oc -> { @@ -59,8 +67,10 @@ public List validate(T component) { /** * A hook for getting {@link MetaData} from Phenopacket schema top-level element. + * The {@code Optional} is empty if {@link MetaData} is not initialized or + * is equal to {@link MetaData#getDefaultInstance()} (hence not useful for validation). */ - protected abstract MetaData getMetadata(T message); + protected abstract Optional getMetadata(T message); private static Set getOntologyNamespacePrefixes(MetaData metaData) { return metaData.getResourcesList() @@ -108,4 +118,36 @@ private static void findAllInstances(Object o, Class clz, Stream.Builder< } } + static class PhenopacketMetaDataValidator extends MetaDataValidator { + + @Override + protected Optional getMetadata(PhenopacketOrBuilder message) { + return !message.getMetaData().isInitialized() || message.getMetaData().equals(MetaData.getDefaultInstance()) + ? Optional.empty() + : Optional.of(message.getMetaData()); + } + + } + + static class FamilyMetaDataValidator extends MetaDataValidator { + + @Override + protected Optional getMetadata(FamilyOrBuilder message) { + return !message.getMetaData().isInitialized() || message.getMetaData().equals(MetaData.getDefaultInstance()) + ? Optional.empty() + : Optional.of(message.getMetaData()); + } + + } + + static class CohortMetaDataValidator extends MetaDataValidator { + + @Override + protected Optional getMetadata(CohortOrBuilder message) { + return !message.getMetaData().isInitialized() || message.getMetaData().equals(MetaData.getDefaultInstance()) + ? Optional.empty() + : Optional.of(message.getMetaData()); + } + + } } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/MetaDataValidators.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/MetaDataValidators.java index 5c90edb2..4de372d5 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/MetaDataValidators.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/MetaDataValidators.java @@ -29,7 +29,7 @@ public static PhenopacketValidator phenopacketValidator() if (PHENOPACKET_VALIDATOR == null) { synchronized (MetaDataValidators.class) { if (PHENOPACKET_VALIDATOR == null) - PHENOPACKET_VALIDATOR = new PhenopacketMetaDataValidator(); + PHENOPACKET_VALIDATOR = new MetaDataValidator.PhenopacketMetaDataValidator(); } } return PHENOPACKET_VALIDATOR; @@ -47,7 +47,7 @@ public static PhenopacketValidator familyValidator() { if (FAMILY_VALIDATOR == null) { synchronized (MetaDataValidators.class) { if (FAMILY_VALIDATOR == null) - FAMILY_VALIDATOR = new FamilyMetaDataValidator(); + FAMILY_VALIDATOR = new MetaDataValidator.FamilyMetaDataValidator(); } } return FAMILY_VALIDATOR; @@ -65,7 +65,7 @@ public static PhenopacketValidator cohortValidator() { if (COHORT_VALIDATOR == null) { synchronized (MetaDataValidators.class) { if (COHORT_VALIDATOR == null) - COHORT_VALIDATOR = new CohortMetaDataValidator(); + COHORT_VALIDATOR = new MetaDataValidator.CohortMetaDataValidator(); } } return COHORT_VALIDATOR; diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/PhenopacketMetaDataValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/PhenopacketMetaDataValidator.java deleted file mode 100644 index 0beb9383..00000000 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/PhenopacketMetaDataValidator.java +++ /dev/null @@ -1,13 +0,0 @@ -package org.phenopackets.phenopackettools.validator.core.metadata; - -import org.phenopackets.schema.v2.PhenopacketOrBuilder; -import org.phenopackets.schema.v2.core.MetaData; - -class PhenopacketMetaDataValidator extends BaseMetaDataValidator { - - @Override - protected MetaData getMetadata(PhenopacketOrBuilder message) { - return message.getMetaData(); - } - -} From c0c11ad323b7091937f31ba1e607d9090f8ed06d Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 23 Nov 2022 11:13:58 -0500 Subject: [PATCH 19/38] Run MetaDataValidator as part of the base validation. Signed-off-by: Daniel Danis --- .../BaseValidationWorkflowRunnerBuilder.java | 19 ++ .../JsonSchemaValidationWorkflowRunner.java | 58 ++-- ...sonSchemaValidationWorkflowRunnerTest.java | 103 ++++---- .../jsonschema/bethlem-myopathy.json | 114 ++++++-- .../validator/jsonschema/example-cohort.json | 114 ++++++-- .../validator/jsonschema/example-family.json | 249 +++++++++++------- .../validator/jsonschema/retinoblastoma.json | 87 +++--- 7 files changed, 501 insertions(+), 243 deletions(-) diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/BaseValidationWorkflowRunnerBuilder.java b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/BaseValidationWorkflowRunnerBuilder.java index b4211b1c..806a6af7 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/BaseValidationWorkflowRunnerBuilder.java +++ b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/BaseValidationWorkflowRunnerBuilder.java @@ -3,6 +3,8 @@ import com.google.protobuf.MessageOrBuilder; import org.phenopackets.phenopackettools.validator.core.PhenopacketFormatConverter; import org.phenopackets.phenopackettools.validator.core.PhenopacketFormatConverters; +import org.phenopackets.phenopackettools.validator.core.PhenopacketValidator; +import org.phenopackets.phenopackettools.validator.core.metadata.MetaDataValidators; import org.phenopackets.phenopackettools.validator.jsonschema.impl.JsonSchemaValidator; import org.phenopackets.phenopackettools.validator.jsonschema.v2.JsonSchemaValidatorConfigurer; import org.phenopackets.schema.v2.CohortOrBuilder; @@ -32,6 +34,7 @@ public JsonSchemaValidationWorkflowRunner build() { List requirementValidators = readRequirementValidators(jsonSchemaUrls); return new JsonSchemaValidationWorkflowRunner<>(getFormatConverter(), getBaseRequirementsValidator(), + getMetadataValidator(), requirementValidators, validators); } @@ -39,6 +42,7 @@ public JsonSchemaValidationWorkflowRunner build() { protected abstract PhenopacketFormatConverter getFormatConverter(); protected abstract JsonSchemaValidator getBaseRequirementsValidator(); + protected abstract PhenopacketValidator getMetadataValidator(); private List readRequirementValidators(List schemaUrls) { @@ -67,6 +71,11 @@ protected PhenopacketFormatConverter getFormatConverter() protected JsonSchemaValidator getBaseRequirementsValidator() { return JsonSchemaValidatorConfigurer.getBasePhenopacketValidator(); } + + @Override + protected PhenopacketValidator getMetadataValidator() { + return MetaDataValidators.phenopacketValidator(); + } } static class FamilyWorkflowRunnerBuilder extends BaseValidationWorkflowRunnerBuilder { @@ -79,6 +88,11 @@ protected PhenopacketFormatConverter getFormatConverter() { protected JsonSchemaValidator getBaseRequirementsValidator() { return JsonSchemaValidatorConfigurer.getBaseFamilyValidator(); } + + @Override + protected PhenopacketValidator getMetadataValidator() { + return MetaDataValidators.familyValidator(); + } } @@ -92,5 +106,10 @@ protected PhenopacketFormatConverter getFormatConverter() { protected JsonSchemaValidator getBaseRequirementsValidator() { return JsonSchemaValidatorConfigurer.getBaseCohortValidator(); } + + @Override + protected PhenopacketValidator getMetadataValidator() { + return MetaDataValidators.cohortValidator(); + } } } diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java index a7193c7d..6a2b23a1 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java +++ b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java @@ -17,7 +17,6 @@ import java.util.Collection; import java.util.List; import java.util.Objects; -import java.util.stream.Stream; /** * Validates if given top-level element satisfies the following criteria: @@ -45,6 +44,7 @@ public class JsonSchemaValidationWorkflowRunner impl private final ObjectMapper objectMapper = new ObjectMapper(); private final PhenopacketFormatConverter converter; private final JsonSchemaValidator baseValidator; + private final PhenopacketValidator metadataValidator; private final Collection requirementValidators; private final Collection> validators; private final List validatorInfos; @@ -75,27 +75,17 @@ public static JsonSchemaValidationWorkflowRunnerBuilder cohortB JsonSchemaValidationWorkflowRunner(PhenopacketFormatConverter converter, JsonSchemaValidator baseValidator, + PhenopacketValidator metadataValidator, Collection requirementValidators, Collection> validators) { this.converter = Objects.requireNonNull(converter); this.baseValidator = Objects.requireNonNull(baseValidator); + this.metadataValidator = Objects.requireNonNull(metadataValidator); this.requirementValidators = Objects.requireNonNull(requirementValidators); this.validators = Objects.requireNonNull(validators); this.validatorInfos = summarizeValidatorInfos(baseValidator, requirementValidators, validators); } - /** - * @deprecated use the other constructor - */ - @Deprecated(forRemoval = true, since = "0.4.8") - JsonSchemaValidationWorkflowRunner(PhenopacketFormatConverter converter, - JsonSchemaValidator baseValidator, - Collection requirementValidators, - Collection> syntaxValidators, - Collection> semanticValidators) { - this(converter, baseValidator, requirementValidators, Stream.concat(syntaxValidators.stream(), semanticValidators.stream()).toList()); - } - private static List summarizeValidatorInfos(JsonSchemaValidator base, Collection requirements, Collection> validators) { @@ -137,19 +127,16 @@ public ValidationResults validate(byte[] payload) { public ValidationResults validate(String json) { ValidationResults.Builder builder = ValidationResults.builder(); + T component; try { - validateRequirements(json, builder); - } catch (ConversionException e) { - return wrapUpValidation(e, builder); - } - - try { - convertAndRunValidation(json, builder); + component = converter.toItem(json); + validateRequirements(json, component, builder); } catch (ConversionException e) { + // We must not proceed with semantic validation with item that does not meet the requirements. return wrapUpValidation(e, builder); } - return builder.build(); + return runValidation(component, builder); } @Override @@ -159,16 +146,14 @@ public ValidationResults validate(T item) { String json = converter.toJson(item); try { - validateRequirements(json, builder); + validateRequirements(json, item, builder); } catch (ConversionException e) { // We must not proceed with semantic validation with item that does not meet the requirements. return wrapUpValidation(e, builder); } // No conversion necessary, hence no need to guard against the `ConversionException`. - runValidation(item, builder); - - return builder.build(); + return runValidation(item, builder); } private String parseToString(byte[] payload) throws ConversionException { @@ -184,11 +169,11 @@ private String parseToString(byte[] payload) throws ConversionException { } /** - * Validate requirements using {@link #baseValidator} and all {@link #requirementValidators}. + * Validate the requirements using {@link #baseValidator} and all {@link #requirementValidators}. * * @throws ConversionException if {@code json} cannot be mapped into {@link JsonNode} */ - private void validateRequirements(String json, ValidationResults.Builder builder) throws ConversionException { + private void validateRequirements(String json, T component, ValidationResults.Builder builder) throws ConversionException { JsonNode jsonNode; try { jsonNode = objectMapper.readTree(json); @@ -198,6 +183,7 @@ private void validateRequirements(String json, ValidationResults.Builder builder } builder.addResults(baseValidator.validatorInfo(), baseValidator.validate(jsonNode)); + builder.addResults(metadataValidator.validatorInfo(), metadataValidator.validate(component)); for (JsonSchemaValidator validator : requirementValidators) { builder.addResults(validator.validatorInfo(), validator.validate(jsonNode)); @@ -205,23 +191,13 @@ private void validateRequirements(String json, ValidationResults.Builder builder } /** - * Convert the {@code item} into {@link T} and validate the requirements. - * - * @throws ConversionException if {@code item} cannot be mapped into {@link T} - */ - private void convertAndRunValidation(String item, ValidationResults.Builder builder) throws ConversionException { - T component = converter.toItem(item); - - runValidation(component, builder); - } - - /** - * Validate semantic requirements using {@link #validators}. Unlike {@link #convertAndRunValidation(String, ValidationResults.Builder)}, - * this method does not throw {@link ConversionException}. + * Validate the requirements by applying {@link #validators}. */ - private void runValidation(T component, ValidationResults.Builder builder) { + private ValidationResults runValidation(T component, ValidationResults.Builder builder) { for (PhenopacketValidator validator : validators) builder.addResults(validator.validatorInfo(), validator.validate(component)); + + return builder.build(); } private static ValidationResults wrapUpValidation(ConversionException e, ValidationResults.Builder builder) { diff --git a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java index 6ade9cf6..b3e2271b 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java +++ b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java @@ -19,6 +19,8 @@ import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Arrays; +import java.util.Collection; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.*; @@ -58,7 +60,7 @@ public void setUp() { "/metaData, DELETE, 'metaData' is missing but it is required", }) public void checkTopLevelPhenopacketConstraints(String path, String action, String expected) { - testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); + testErrors(runner, readBethlemPhenopacketNode(), path, action, expected, true); } /** @@ -69,7 +71,7 @@ public void checkTopLevelPhenopacketConstraints(String path, String action, Stri "/subject/id, DELETE, 'subject.id' is missing but it is required" }) public void checkSubjectConstraints(String path, String action, String expected) { - testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); + testErrors(runner, readBethlemPhenopacketNode(), path, action, expected, true); } /** @@ -81,7 +83,7 @@ public void checkSubjectConstraints(String path, String action, String expected) "/subject/vitalStatus/status, DELETE, 'subject.vitalStatus.status' is missing but it is required" }) public void checkVitalStatusConstraints(String path, String action, String expected) { - testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); + testErrors(runner, readBethlemPhenopacketNode(), path, action, expected, true); } /** @@ -93,7 +95,7 @@ public void checkVitalStatusConstraints(String path, String action, String expec "/phenotypicFeatures[1]/type, DELETE, 'phenotypicFeatures[1].type' is missing but it is required" }) public void checkPhenotypicFeatureConstraints(String path, String action, String expected) { - testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); + testErrors(runner, readBethlemPhenopacketNode(), path, action, expected, true); } /** @@ -117,7 +119,7 @@ public void checkPhenotypicFeatureConstraints(String path, String action, String "/phenotypicFeatures[5]/onset/interval/end, DELETE, 'phenotypicFeatures[5].onset.interval.end' is missing but it is required", }) public void checkTimeElementConstraints(String path, String action, String expected) { - testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); + testErrors(runner, readBethlemPhenopacketNode(), path, action, expected, true); } /** @@ -128,7 +130,7 @@ public void checkTimeElementConstraints(String path, String action, String expec "/phenotypicFeatures[0]/evidence[0]/evidenceCode, DELETE, 'phenotypicFeatures[0].evidence[0].evidenceCode' is missing but it is required", }) public void checkEvidenceConstraints(String path, String action, String expected) { - testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); + testErrors(runner, readBethlemPhenopacketNode(), path, action, expected, true); } /** @@ -142,7 +144,7 @@ public void checkEvidenceConstraints(String path, String action, String expected "/measurements[1]/complexValue, DELETE, 'measurements[1].value' is missing but it is required|'measurements[1].complexValue' is missing but it is required", }) public void checkMeasurementConstraints(String path, String action, String expected) { - testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); + testErrors(runner, readBethlemPhenopacketNode(), path, action, expected, true); } /** @@ -153,7 +155,7 @@ public void checkMeasurementConstraints(String path, String action, String expec "/biosamples[0]/id, DELETE, 'biosamples[0].id' is missing but it is required", }) public void checkBiosampleConstraints(String path, String action, String expected) { - testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); + testErrors(runner, readBethlemPhenopacketNode(), path, action, expected, true); } /** @@ -166,7 +168,7 @@ public void checkBiosampleConstraints(String path, String action, String expecte "/interpretations[0]/progressStatus, DELETE, 'interpretations[0].progressStatus' is missing but it is required", }) public void checkInterpretationConstraints(String path, String action, String expected) { - testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); + testErrors(runner, readBethlemPhenopacketNode(), path, action, expected, true); } /** @@ -177,7 +179,7 @@ public void checkInterpretationConstraints(String path, String action, String ex "/interpretations[0]/diagnosis/disease, DELETE, 'interpretations[0].diagnosis.disease' is missing but it is required", }) public void checkDiagnosisConstraints(String path, String action, String expected) { - testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); + testErrors(runner, readBethlemPhenopacketNode(), path, action, expected, true); } /** @@ -194,7 +196,7 @@ public void checkDiagnosisConstraints(String path, String action, String expecte "/interpretations[0]/diagnosis/genomicInterpretations[1]/gene, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[1].gene' is missing but it is required|'interpretations[0].diagnosis.genomicInterpretations[1].variantInterpretation' is missing but it is required", }) public void checkGenomicInterpretationConstraints(String path, String action, String expected) { - testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); + testErrors(runner, readBethlemPhenopacketNode(), path, action, expected, true); } /** @@ -207,7 +209,7 @@ public void checkGenomicInterpretationConstraints(String path, String action, St "/interpretations[0]/diagnosis/genomicInterpretations[1]/gene/symbol, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[1].gene.symbol' is missing but it is required", }) public void checkGeneDescriptorConstraints(String path, String action, String expected) { - testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); + testErrors(runner, readBethlemPhenopacketNode(), path, action, expected, true); } /** @@ -221,7 +223,7 @@ public void checkGeneDescriptorConstraints(String path, String action, String ex "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor' is missing but it is required", }) public void checkVariantInterpretationConstraints(String path, String action, String expected) { - testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); + testErrors(runner, readBethlemPhenopacketNode(), path, action, expected, true); } @ParameterizedTest @@ -230,7 +232,7 @@ public void checkVariantInterpretationConstraints(String path, String action, St "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor/moleculeContext, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor.moleculeContext' is missing but it is required", }) public void checkVariationDescriptorConstraints(String path, String action, String expected) { - testErrors(runner, readRetinoblastomaPhenopacketNode(), path, action, expected); + testErrors(runner, readRetinoblastomaPhenopacketNode(), path, action, expected, true); } /** @@ -250,7 +252,7 @@ public void removingAOneOfFieldFromVariationProducesValidationError(String subPa String validationMessagePrefix = "interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor.variation"; String expectedValidationMessage = subExpected.replaceAll("REPLACE", validationMessagePrefix); - testErrors(runner, readRetinoblastomaPhenopacketNode(), path, action, expectedValidationMessage); + testErrors(runner, readRetinoblastomaPhenopacketNode(), path, action, expectedValidationMessage, true); } /** @@ -261,7 +263,7 @@ public void removingAOneOfFieldFromVariationProducesValidationError(String subPa "/diseases[0]/term, DELETE, 'diseases[0].term' is missing but it is required", }) public void checkDiseaseConstraints(String path, String action, String expected) { - testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); + testErrors(runner, readBethlemPhenopacketNode(), path, action, expected, true); } /** @@ -275,7 +277,7 @@ public void checkDiseaseConstraints(String path, String action, String expected) "/medicalActions[3]/therapeuticRegimen, DELETE, 'medicalActions[3].procedure' is missing but it is required|'medicalActions[3].treatment' is missing but it is required|'medicalActions[3].radiationTherapy' is missing but it is required|'medicalActions[3].therapeuticRegimen' is missing but it is required", }) public void checkMedicalActionConstraints(String path, String action, String expected) { - testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); + testErrors(runner, readBethlemPhenopacketNode(), path, action, expected, true); } /** @@ -286,7 +288,7 @@ public void checkMedicalActionConstraints(String path, String action, String exp "/medicalActions[0]/procedure/code, DELETE, 'medicalActions[0].procedure.code' is missing but it is required" }) public void checkProcedureConstraints(String path, String action, String expected) { - testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); + testErrors(runner, readBethlemPhenopacketNode(), path, action, expected, true); } /** @@ -297,7 +299,7 @@ public void checkProcedureConstraints(String path, String action, String expecte "/medicalActions[1]/treatment/agent, DELETE, 'medicalActions[1].treatment.agent' is missing but it is required" }) public void checkTreatmentConstraints(String path, String action, String expected) { - testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); + testErrors(runner, readBethlemPhenopacketNode(), path, action, expected, true); } /** @@ -312,7 +314,7 @@ public void checkTreatmentConstraints(String path, String action, String expecte "/medicalActions[2]/radiationTherapy/fractions, DELETE, 'medicalActions[2].radiationTherapy.fractions' is missing but it is required" }) public void checkRadiationTherapyConstraints(String path, String action, String expected) { - testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); + testErrors(runner, readBethlemPhenopacketNode(), path, action, expected, true); } /** @@ -326,7 +328,7 @@ public void checkRadiationTherapyConstraints(String path, String action, String "/medicalActions[3]/therapeuticRegimen/regimenStatus, DELETE, 'medicalActions[3].therapeuticRegimen.regimenStatus' is missing but it is required" }) public void checkTherapeuticRegimenConstraints(String path, String action, String expected) { - testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); + testErrors(runner, readBethlemPhenopacketNode(), path, action, expected, true); } /** @@ -338,7 +340,7 @@ public void checkTherapeuticRegimenConstraints(String path, String action, Strin "/files[0]/uri, DELETE, 'files[0].uri' is missing but it is required", }) public void checkFileConstraints(String path, String action, String expected) { - testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); + testErrors(runner, readBethlemPhenopacketNode(), path, action, expected, true); } /** @@ -347,13 +349,13 @@ public void checkFileConstraints(String path, String action, String expected) { */ @ParameterizedTest @CsvSource({ - "/metaData/created, DELETE, 'metaData.created' is missing but it is required", - "/metaData/createdBy, DELETE, 'metaData.createdBy' is missing but it is required", - "/metaData/resources[*], DELETE, 'metaData.resources' there must be a minimum of 1 items in the array", - "/metaData/phenopacketSchemaVersion, DELETE, 'metaData.phenopacketSchemaVersion' is missing but it is required", + "/metaData/created, DELETE, true, 'metaData.created' is missing but it is required", + "/metaData/createdBy, DELETE, true, 'metaData.createdBy' is missing but it is required", + "/metaData/resources[*], DELETE, false, 'metaData.resources' there must be a minimum of 1 items in the array", + "/metaData/phenopacketSchemaVersion, DELETE, true, 'metaData.phenopacketSchemaVersion' is missing but it is required", }) - public void checkMetaDataConstraints(String path, String action, String expected) { - testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); + public void checkMetaDataConstraints(String path, String action, boolean validateCount, String expected) { + testErrors(runner, readBethlemPhenopacketNode(), path, action, expected, validateCount); } /** @@ -362,15 +364,15 @@ public void checkMetaDataConstraints(String path, String action, String expected */ @ParameterizedTest @CsvSource({ - "/metaData/resources[0]/id, DELETE, 'metaData.resources[0].id' is missing but it is required", - "/metaData/resources[0]/name, DELETE, 'metaData.resources[0].name' is missing but it is required", - "/metaData/resources[0]/namespacePrefix, DELETE, 'metaData.resources[0].namespacePrefix' is missing but it is required", - "/metaData/resources[0]/url, DELETE, 'metaData.resources[0].url' is missing but it is required", - "/metaData/resources[0]/version, DELETE, 'metaData.resources[0].version' is missing but it is required", - "/metaData/resources[0]/iriPrefix, DELETE, 'metaData.resources[0].iriPrefix' is missing but it is required", + "/metaData/resources[0]/id, DELETE, true, 'metaData.resources[0].id' is missing but it is required", + "/metaData/resources[0]/name, DELETE, true, 'metaData.resources[0].name' is missing but it is required", + "/metaData/resources[0]/namespacePrefix, DELETE, false, 'metaData.resources[0].namespacePrefix' is missing but it is required", + "/metaData/resources[0]/url, DELETE, true, 'metaData.resources[0].url' is missing but it is required", + "/metaData/resources[0]/version, DELETE, true, 'metaData.resources[0].version' is missing but it is required", + "/metaData/resources[0]/iriPrefix, DELETE, true, 'metaData.resources[0].iriPrefix' is missing but it is required", }) - public void checkResourceConstraints(String path, String action, String expected) { - testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); + public void checkResourceConstraints(String path, String action, boolean validateCount, String expected) { + testErrors(runner, readBethlemPhenopacketNode(), path, action, expected, validateCount); } /** @@ -382,7 +384,7 @@ public void checkResourceConstraints(String path, String action, String expected "/metaData/updates[0]/timestamp, DELETE, 'metaData.updates[0].timestamp' is missing but it is required", }) public void checkUpdateConstraints(String path, String action, String expected) { - testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); + testErrors(runner, readBethlemPhenopacketNode(), path, action, expected, true); } } @@ -442,7 +444,7 @@ public void validFamilyYieldsNoErrors() { "/metaData, DELETE, 'metaData' is missing but it is required", }) public void absenceOfTopLevelFamilyElementsYieldsErrors(String path, String action, String expected) { - testErrors(runner, readExampleFamilyNode(), path, action, expected); + testErrors(runner, readExampleFamilyNode(), path, action, expected, true); } @ParameterizedTest @@ -451,7 +453,7 @@ public void absenceOfTopLevelFamilyElementsYieldsErrors(String path, String acti "/pedigree/persons[*], DELETE, 'pedigree.persons' there must be a minimum of 1 items in the array", }) public void emptyPedigreeYieldsError(String path, String action, String expected) { - testErrors(runner, readExampleFamilyNode(), path, action, expected); + testErrors(runner, readExampleFamilyNode(), path, action, expected, true); } @@ -496,7 +498,7 @@ public class RequiredFieldsTest { "/metaData, DELETE, 'metaData' is missing but it is required", }) public void checkCohortConstraints(String path, String action, String expected) { - testErrors(runner, readExampleCohortNode(), path, action, expected); + testErrors(runner, readExampleCohortNode(), path, action, expected, true); } } @@ -517,13 +519,18 @@ private static JsonNode readExampleCohortNode() { * to make the {@code invalid}. * Then we validate the invalid node with the {@code runner} and check we receive the expected {@code errors}. *

    + * Sometimes, absence of an element leads to other errors. For instance, absence of a namespace prefix in HP resource + * triggers missing ontology class definition for all HPO terms used in phenotypic feature. + * To check only presence of a specific error instead of all, we can set {@code validateCount} to {@code false}. + *

    * This is what is done in this method. */ private static void testErrors(ValidationWorkflowRunner runner, JsonNode node, String path, String action, - String errors) { + String errors, + boolean validateCount) { JsonNode tampered = TAMPERER.tamper(node, path, Action.valueOf(action)); ValidationResults results = runner.validate(tampered.toPrettyString()); @@ -532,10 +539,18 @@ private static void testErrors(ValidationWorkflowRu // .map(ValidationResult::message) // .forEach(System.err::println); - String[] tokens = errors.split("\\|"); - assertThat(results.validationResults(), hasSize(tokens.length)); + Collection tokens = Arrays.asList(errors.split("\\|")); + if (validateCount) { + assertThat(results.validationResults(), hasSize(tokens.size())); + // All messages must be present + assertThat(results.validationResults().stream().map(ValidationResult::message).toList(), containsInAnyOrder(tokens.toArray())); + } else { + // At least one message must be present + assertThat(results.validationResults().stream().map(ValidationResult::message).anyMatch(tokens::contains), is(true)); + } + + // Finally, the method is testing errors, so all levels must be errors. assertThat(results.validationResults().stream().allMatch(r -> r.level().isError()), is(true)); - assertThat(results.validationResults().stream().map(ValidationResult::message).toList(), containsInAnyOrder(tokens)); } diff --git a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/bethlem-myopathy.json b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/bethlem-myopathy.json index 6be501a1..14bf5d61 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/bethlem-myopathy.json +++ b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/bethlem-myopathy.json @@ -252,13 +252,13 @@ "value": { "quantity": { "unit": { - "id": "UO:0000316", + "id": "UCUM:{cells}/uL", "label": "cells per microliter" }, "value": 24000.0, "referenceRange": { "unit": { - "id": "UO:0000316", + "id": "UCUM:{cells}/uL", "label": "cells per microliter" }, "low": 150000.0, @@ -271,7 +271,7 @@ }, "procedure": { "code": { - "id": "C173496", + "id": "NCIT:C173496", "label": "Peripheral Blood" }, "performed": { @@ -282,7 +282,7 @@ { "description": "Customized blood pressure measurement description.", "assay": { - "id": "55417-0", + "id": "LOINC:55417-0", "label": "Short blood pressure panel" }, "complexValue": { @@ -428,7 +428,7 @@ { "procedure": { "code": { - "id": "C15396", + "id": "NCIT:C15396", "label": "Radical Cystectomy" }, "bodySite": { @@ -446,11 +446,11 @@ "label": "Infiltrating Urothelial Carcinoma" }, "treatmentIntent": { - "id": "C62220", + "id": "NCIT:C62220", "label": "Cure" }, "responseToTreatment": { - "id": "C4870", + "id": "NCIT:C4870", "label": "Complete Remission" }, "adverseEvents": [{ @@ -472,7 +472,7 @@ { "quantity": { "unit": { - "id": "UO:0000022", + "id": "UCUM:mg", "label": "milligram" }, "value": 30.0 @@ -490,7 +490,7 @@ "drugType": "PRESCRIPTION", "cumulativeDose": { "unit": { - "id": "UO:0000022", + "id": "UCUM:mg", "label": "milligram" }, "value": 43800.0 @@ -515,11 +515,11 @@ "label": "Infiltrating Urothelial Carcinoma" }, "treatmentIntent": { - "id": "C62220", + "id": "NCIT:C62220", "label": "Cure" }, "responseToTreatment": { - "id": "C4870", + "id": "NCIT:C4870", "label": "Complete Remission" }, "adverseEvents": [{ @@ -551,11 +551,11 @@ "label": "Infiltrating Urothelial Carcinoma" }, "treatmentIntent": { - "id": "C62220", + "id": "NCIT:C62220", "label": "Cure" }, "responseToTreatment": { - "id": "C4870", + "id": "NCIT:C4870", "label": "Complete Remission" }, "adverseEvents": [{ @@ -566,8 +566,8 @@ { "therapeuticRegimen": { "ontologyClass": { - "id": "NCT04576091", - "label": "ABCD" + "id": "NCT:04576091", + "label": "Testing the Addition of an Anti-cancer Drug, BAY 1895344, With Radiation Therapy to the Usual Pembrolizumab Treatment for Recurrent Head and Neck Cancer" }, "startTime": { "age": { @@ -586,11 +586,11 @@ "label": "Infiltrating Urothelial Carcinoma" }, "treatmentIntent": { - "id": "C62220", + "id": "NCIT:C62220", "label": "Cure" }, "responseToTreatment": { - "id": "C4870", + "id": "NCIT:C4870", "label": "Complete Remission" }, "adverseEvents": [{ @@ -630,6 +630,86 @@ "version": "2020-03-08", "namespacePrefix": "GENO", "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, + { + "id": "efo", + "name": "Experimental Factor Ontology", + "url": "http://www.ebi.ac.uk/efo/efo.owl", + "version": "VERSION", + "namespacePrefix": "EFO", + "iriPrefix": "http://purl.obolibrary.org/obo/EFO_" + }, + { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "VERSION", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }, + { + "id": "uberon", + "name": "Uber-anatomy ontology", + "url": "http://purl.obolibrary.org/obo/uberon.owl", + "version": "VERSION", + "namespacePrefix": "UBERON", + "iriPrefix": "http://purl.obolibrary.org/obo/UBERON_" + }, + { + "id": "ncbitaxon", + "name": "NCBI organismal classification", + "url": "http://purl.obolibrary.org/obo/ncbitaxon.owl", + "version": "VERSION", + "namespacePrefix": "NCBITaxon", + "iriPrefix": "http://purl.obolibrary.org/obo/NCBITaxon_" + }, + { + "id": "loinc", + "name": "Logical Observation Identifiers Names and Codes", + "url": "https://loinc.org", + "version": "VERSION", + "namespacePrefix": "LOINC", + "iriPrefix": "https://loinc.org" + }, + { + "id": "drugcentral", + "name": "Drug Central", + "url": "https://drugcentral.org/", + "version": "VERSION", + "namespacePrefix": "DrugCentral", + "iriPrefix": "https://drugcentral.org/drugcard" + }, + { + "id": "omim", + "name": "An Online Catalog of Human Genes and Genetic Disorders", + "url": "https://www.omim.org", + "version": "VERSION", + "namespacePrefix": "OMIM", + "iriPrefix": "https://www.omim.org/entry/" + }, + { + "id": "eco", + "name": "Evidence & Conclusion Ontology (ECO)", + "url": "http://purl.obolibrary.org/obo/eco.owl", + "version": "VERSION", + "namespacePrefix": "ECO", + "iriPrefix": "http://purl.obolibrary.org/obo/ECO_" + }, + { + "id": "ucum", + "name": "Unified Code for Units of Measure", + "url": "https://ucum.org", + "version": "2.1", + "namespacePrefix": "UCUM", + "iriPrefix": "https://ucum.org/" + }, + { + "id": "nct", + "name": "ClinicalTrials.gov", + "url": "https://clinicaltrials.gov", + "version": "VERSION", + "namespacePrefix": "NCT", + "iriPrefix": "https://clinicaltrials.gov/show/NCT" } ], "updates": [ diff --git a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-cohort.json b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-cohort.json index c55001a7..5f2fcbd7 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-cohort.json +++ b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-cohort.json @@ -252,13 +252,13 @@ "value": { "quantity": { "unit": { - "id": "UO:0000316", + "id": "UCUM:{cells}/uL", "label": "cells per microliter" }, "value": 24000.0, "referenceRange": { "unit": { - "id": "UO:0000316", + "id": "UCUM:{cells}/uL", "label": "cells per microliter" }, "low": 150000.0, @@ -271,7 +271,7 @@ }, "procedure": { "code": { - "id": "C173496", + "id": "NCIT:C173496", "label": "Peripheral Blood" }, "performed": { @@ -282,7 +282,7 @@ { "description": "Customized blood pressure measurement description.", "assay": { - "id": "55417-0", + "id": "LOINC:55417-0", "label": "Short blood pressure panel" }, "complexValue": { @@ -428,7 +428,7 @@ { "procedure": { "code": { - "id": "C15396", + "id": "NCIT:C15396", "label": "Radical Cystectomy" }, "bodySite": { @@ -446,11 +446,11 @@ "label": "Infiltrating Urothelial Carcinoma" }, "treatmentIntent": { - "id": "C62220", + "id": "NCIT:C62220", "label": "Cure" }, "responseToTreatment": { - "id": "C4870", + "id": "NCIT:C4870", "label": "Complete Remission" }, "adverseEvents": [{ @@ -472,7 +472,7 @@ { "quantity": { "unit": { - "id": "UO:0000022", + "id": "UCUM:mg", "label": "milligram" }, "value": 30.0 @@ -490,7 +490,7 @@ "drugType": "PRESCRIPTION", "cumulativeDose": { "unit": { - "id": "UO:0000022", + "id": "UCUM:mg", "label": "milligram" }, "value": 43800.0 @@ -515,11 +515,11 @@ "label": "Infiltrating Urothelial Carcinoma" }, "treatmentIntent": { - "id": "C62220", + "id": "NCIT:C62220", "label": "Cure" }, "responseToTreatment": { - "id": "C4870", + "id": "NCIT:C4870", "label": "Complete Remission" }, "adverseEvents": [{ @@ -551,11 +551,11 @@ "label": "Infiltrating Urothelial Carcinoma" }, "treatmentIntent": { - "id": "C62220", + "id": "NCIT:C62220", "label": "Cure" }, "responseToTreatment": { - "id": "C4870", + "id": "NCIT:C4870", "label": "Complete Remission" }, "adverseEvents": [{ @@ -566,8 +566,8 @@ { "therapeuticRegimen": { "ontologyClass": { - "id": "NCT04576091", - "label": "ABCD" + "id": "NCT:04576091", + "label": "Testing the Addition of an Anti-cancer Drug, BAY 1895344, With Radiation Therapy to the Usual Pembrolizumab Treatment for Recurrent Head and Neck Cancer" }, "startTime": { "age": { @@ -586,11 +586,11 @@ "label": "Infiltrating Urothelial Carcinoma" }, "treatmentIntent": { - "id": "C62220", + "id": "NCIT:C62220", "label": "Cure" }, "responseToTreatment": { - "id": "C4870", + "id": "NCIT:C4870", "label": "Complete Remission" }, "adverseEvents": [{ @@ -676,6 +676,86 @@ "version": "2020-03-08", "namespacePrefix": "GENO", "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, + { + "id": "efo", + "name": "Experimental Factor Ontology", + "url": "http://www.ebi.ac.uk/efo/efo.owl", + "version": "VERSION", + "namespacePrefix": "EFO", + "iriPrefix": "http://purl.obolibrary.org/obo/EFO_" + }, + { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "VERSION", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }, + { + "id": "uberon", + "name": "Uber-anatomy ontology", + "url": "http://purl.obolibrary.org/obo/uberon.owl", + "version": "VERSION", + "namespacePrefix": "UBERON", + "iriPrefix": "http://purl.obolibrary.org/obo/UBERON_" + }, + { + "id": "ncbitaxon", + "name": "NCBI organismal classification", + "url": "http://purl.obolibrary.org/obo/ncbitaxon.owl", + "version": "VERSION", + "namespacePrefix": "NCBITaxon", + "iriPrefix": "http://purl.obolibrary.org/obo/NCBITaxon_" + }, + { + "id": "loinc", + "name": "Logical Observation Identifiers Names and Codes", + "url": "https://loinc.org", + "version": "VERSION", + "namespacePrefix": "LOINC", + "iriPrefix": "https://loinc.org" + }, + { + "id": "drugcentral", + "name": "Drug Central", + "url": "https://drugcentral.org/", + "version": "VERSION", + "namespacePrefix": "DrugCentral", + "iriPrefix": "https://drugcentral.org/drugcard" + }, + { + "id": "omim", + "name": "An Online Catalog of Human Genes and Genetic Disorders", + "url": "https://www.omim.org", + "version": "VERSION", + "namespacePrefix": "OMIM", + "iriPrefix": "https://www.omim.org/entry/" + }, + { + "id": "eco", + "name": "Evidence & Conclusion Ontology (ECO)", + "url": "http://purl.obolibrary.org/obo/eco.owl", + "version": "VERSION", + "namespacePrefix": "ECO", + "iriPrefix": "http://purl.obolibrary.org/obo/ECO_" + }, + { + "id": "ucum", + "name": "Unified Code for Units of Measure", + "url": "https://ucum.org", + "version": "2.1", + "namespacePrefix": "UCUM", + "iriPrefix": "https://ucum.org/" + }, + { + "id": "nct", + "name": "ClinicalTrials.gov", + "url": "https://clinicaltrials.gov", + "version": "VERSION", + "namespacePrefix": "NCT", + "iriPrefix": "https://clinicaltrials.gov/show/NCT" } ], "phenopacketSchemaVersion": "2.0", diff --git a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-family.json b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-family.json index 6c30d25f..7a260067 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-family.json +++ b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-family.json @@ -251,13 +251,13 @@ "value": { "quantity": { "unit": { - "id": "UO:0000316", + "id": "UCUM:{cells}/uL", "label": "cells per microliter" }, "value": 24000.0, "referenceRange": { "unit": { - "id": "UO:0000316", + "id": "UCUM:{cells}/uL", "label": "cells per microliter" }, "low": 150000.0, @@ -270,7 +270,7 @@ }, "procedure": { "code": { - "id": "C173496", + "id": "NCIT:C173496", "label": "Peripheral Blood" }, "performed": { @@ -281,7 +281,7 @@ { "description": "Customized blood pressure measurement description.", "assay": { - "id": "55417-0", + "id": "LOINC:55417-0", "label": "Short blood pressure panel" }, "complexValue": { @@ -427,7 +427,7 @@ { "procedure": { "code": { - "id": "C15396", + "id": "NCIT:C15396", "label": "Radical Cystectomy" }, "bodySite": { @@ -445,17 +445,19 @@ "label": "Infiltrating Urothelial Carcinoma" }, "treatmentIntent": { - "id": "C62220", + "id": "NCIT:C62220", "label": "Cure" }, "responseToTreatment": { - "id": "C4870", + "id": "NCIT:C4870", "label": "Complete Remission" }, - "adverseEvents": [{ - "id": "HP:0012587", - "label": "Macroscopic hematuria" - }] + "adverseEvents": [ + { + "id": "HP:0012587", + "label": "Macroscopic hematuria" + } + ] }, { "treatment": { @@ -471,7 +473,7 @@ { "quantity": { "unit": { - "id": "UO:0000022", + "id": "UCUM:mg", "label": "milligram" }, "value": 30.0 @@ -489,7 +491,7 @@ "drugType": "PRESCRIPTION", "cumulativeDose": { "unit": { - "id": "UO:0000022", + "id": "UCUM:mg", "label": "milligram" }, "value": 43800.0 @@ -514,17 +516,19 @@ "label": "Infiltrating Urothelial Carcinoma" }, "treatmentIntent": { - "id": "C62220", + "id": "NCIT:C62220", "label": "Cure" }, "responseToTreatment": { - "id": "C4870", + "id": "NCIT:C4870", "label": "Complete Remission" }, - "adverseEvents": [{ - "id": "HP:0020060", - "label": "Decreased red blood cell count" - }] + "adverseEvents": [ + { + "id": "HP:0020060", + "label": "Decreased red blood cell count" + } + ] }, { "therapeuticRegimen": { @@ -550,23 +554,25 @@ "label": "Infiltrating Urothelial Carcinoma" }, "treatmentIntent": { - "id": "C62220", + "id": "NCIT:C62220", "label": "Cure" }, "responseToTreatment": { - "id": "C4870", + "id": "NCIT:C4870", "label": "Complete Remission" }, - "adverseEvents": [{ - "id": "HP:0020060", - "label": "Decreased red blood cell count" - }] + "adverseEvents": [ + { + "id": "HP:0020060", + "label": "Decreased red blood cell count" + } + ] }, { "therapeuticRegimen": { "ontologyClass": { - "id": "NCT04576091", - "label": "ABCD" + "id": "NCT:04576091", + "label": "Testing the Addition of an Anti-cancer Drug, BAY 1895344, With Radiation Therapy to the Usual Pembrolizumab Treatment for Recurrent Head and Neck Cancer" }, "startTime": { "age": { @@ -585,17 +591,19 @@ "label": "Infiltrating Urothelial Carcinoma" }, "treatmentIntent": { - "id": "C62220", + "id": "NCIT:C62220", "label": "Cure" }, "responseToTreatment": { - "id": "C4870", + "id": "NCIT:C4870", "label": "Complete Remission" }, - "adverseEvents": [{ - "id": "HP:0020060", - "label": "Decreased red blood cell count" - }] + "adverseEvents": [ + { + "id": "HP:0020060", + "label": "Decreased red blood cell count" + } + ] } ], "files": [ @@ -613,24 +621,6 @@ "metaData": { "created": "2021-05-14T10:35:00Z", "createdBy": "anonymous biocurator", - "resources": [ - { - "id": "hp", - "name": "human phenotype ontology", - "url": "http://purl.obolibrary.org/obo/hp.owl", - "version": "2021-08-02", - "namespacePrefix": "HP", - "iriPrefix": "http://purl.obolibrary.org/obo/HP_" - }, - { - "id": "geno", - "name": "Genotype Ontology", - "url": "http://purl.obolibrary.org/obo/geno.owl", - "version": "2020-03-08", - "namespacePrefix": "GENO", - "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" - } - ], "updates": [ { "timestamp": "2022-02-22T00:12:34.123456789Z", @@ -684,49 +674,120 @@ "created": "2021-07-01T19:32:35Z", "createdBy": "anonymous biocurator", "submittedBy": "anonymous submitter", - "resources": [{ - "id": "hp", - "name": "Human Phenotype Ontology", - "url": "http://purl.obolibrary.org/obo/hp.owl", - "version": "2021-08-02", - "namespacePrefix": "HP", - "iriPrefix": "http://purl.obolibrary.org/obo/HP_" - }, { - "id": "hp", - "name": "Human Phenotype Ontology", - "url": "http://purl.obolibrary.org/obo/hp.owl", - "version": "2021-08-02", - "namespacePrefix": "HP", - "iriPrefix": "http://purl.obolibrary.org/obo/HP_" - }, { - "id": "mondo", - "name": "Mondo Disease Ontology", - "url": "http://purl.obolibrary.org/obo/mondo.owl", - "version": "2021-09-01", - "namespacePrefix": "Mondo", - "iriPrefix": "http://purl.obolibrary.org/obo/mondo_" - }, { - "id": "hp", - "name": "Human Phenotype Ontology", - "url": "http://purl.obolibrary.org/obo/hp.owl", - "version": "2021-08-02", - "namespacePrefix": "HP", - "iriPrefix": "http://purl.obolibrary.org/obo/HP_" - }, { - "id": "hp", - "name": "Human Phenotype Ontology", - "url": "http://purl.obolibrary.org/obo/hp.owl", - "version": "2021-08-02", - "namespacePrefix": "HP", - "iriPrefix": "http://purl.obolibrary.org/obo/HP_" - }, { - "id": "mondo", - "name": "Mondo Disease Ontology", - "url": "http://purl.obolibrary.org/obo/mondo.owl", - "version": "2021-09-01", - "namespacePrefix": "Mondo", - "iriPrefix": "http://purl.obolibrary.org/obo/mondo_" - }], + "resources": [ + { + "id": "hp", + "name": "Human Phenotype Ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2021-08-02", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, + { + "id": "mondo", + "name": "Mondo Disease Ontology", + "url": "http://purl.obolibrary.org/obo/mondo.owl", + "version": "2021-09-01", + "namespacePrefix": "Mondo", + "iriPrefix": "http://purl.obolibrary.org/obo/mondo_" + }, + { + "id": "hp", + "name": "Human Phenotype Ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2021-08-02", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, + { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "2020-03-08", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, + { + "id": "efo", + "name": "Experimental Factor Ontology", + "url": "http://www.ebi.ac.uk/efo/efo.owl", + "version": "VERSION", + "namespacePrefix": "EFO", + "iriPrefix": "http://purl.obolibrary.org/obo/EFO_" + }, + { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "VERSION", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }, + { + "id": "uberon", + "name": "Uber-anatomy ontology", + "url": "http://purl.obolibrary.org/obo/uberon.owl", + "version": "VERSION", + "namespacePrefix": "UBERON", + "iriPrefix": "http://purl.obolibrary.org/obo/UBERON_" + }, + { + "id": "ncbitaxon", + "name": "NCBI organismal classification", + "url": "http://purl.obolibrary.org/obo/ncbitaxon.owl", + "version": "VERSION", + "namespacePrefix": "NCBITaxon", + "iriPrefix": "http://purl.obolibrary.org/obo/NCBITaxon_" + }, + { + "id": "loinc", + "name": "Logical Observation Identifiers Names and Codes", + "url": "https://loinc.org", + "version": "VERSION", + "namespacePrefix": "LOINC", + "iriPrefix": "https://loinc.org" + }, + { + "id": "drugcentral", + "name": "Drug Central", + "url": "https://drugcentral.org/", + "version": "VERSION", + "namespacePrefix": "DrugCentral", + "iriPrefix": "https://drugcentral.org/drugcard" + }, + { + "id": "omim", + "name": "An Online Catalog of Human Genes and Genetic Disorders", + "url": "https://www.omim.org", + "version": "VERSION", + "namespacePrefix": "OMIM", + "iriPrefix": "https://www.omim.org/entry/" + }, + { + "id": "eco", + "name": "Evidence & Conclusion Ontology (ECO)", + "url": "http://purl.obolibrary.org/obo/eco.owl", + "version": "VERSION", + "namespacePrefix": "ECO", + "iriPrefix": "http://purl.obolibrary.org/obo/ECO_" + }, + { + "id": "ucum", + "name": "Unified Code for Units of Measure", + "url": "https://ucum.org", + "version": "2.1", + "namespacePrefix": "UCUM", + "iriPrefix": "https://ucum.org/" + }, + { + "id": "nct", + "name": "ClinicalTrials.gov", + "url": "https://clinicaltrials.gov", + "version": "VERSION", + "namespacePrefix": "NCT", + "iriPrefix": "https://clinicaltrials.gov/show/NCT" + } + ], "phenopacketSchemaVersion": "2.0", "externalReferences": [{ "id": "PMID:20842687", diff --git a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/retinoblastoma.json b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/retinoblastoma.json index ca1fabdb..25cc6574 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/retinoblastoma.json +++ b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/retinoblastoma.json @@ -81,7 +81,7 @@ "value": 25.0, "referenceRange": { "unit": { - "id": "56844-4", + "id": "LOINC:56844-4", "label": "Intraocular pressure of Eye" }, "low": 10.0, @@ -108,7 +108,7 @@ "value": 15.0, "referenceRange": { "unit": { - "id": "56844-4", + "id": "LOINC:56844-4", "label": "Intraocular pressure of Eye" }, "low": 10.0, @@ -430,34 +430,61 @@ "created": "2021-05-14T10:35:00Z", "createdBy": "anonymous biocurator", "resources": [{ - "id": "ncit", - "name": "NCI Thesaurus", - "url": "http://purl.obolibrary.org/obo/ncit.owl", - "version": "21.05d", - "namespacePrefix": "NCIT", - "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" - }, { - "id": "efo", - "name": "Experimental Factor Ontology", - "url": "http://www.ebi.ac.uk/efo/efo.owl", - "version": "3.34.0", - "namespacePrefix": "EFO", - "iriPrefix": "http://purl.obolibrary.org/obo/EFO_" - }, { - "id": "uberon", - "name": "Uber-anatomy ontology", - "url": "http://purl.obolibrary.org/obo/uberon.owl", - "version": "2021-07-27", - "namespacePrefix": "UBERON", - "iriPrefix": "http://purl.obolibrary.org/obo/UBERON_" - }, { - "id": "ncbitaxon", - "name": "NCBI organismal classification", - "url": "http://purl.obolibrary.org/obo/ncbitaxon.owl", - "version": "2021-06-10", - "namespacePrefix": "NCBITaxon", - "iriPrefix": "http://purl.obolibrary.org/obo/NCBITaxon_" - }], + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2021-08-02", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, + { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "2020-03-08", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, + { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "VERSION", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }, + { + "id": "uberon", + "name": "Uber-anatomy ontology", + "url": "http://purl.obolibrary.org/obo/uberon.owl", + "version": "VERSION", + "namespacePrefix": "UBERON", + "iriPrefix": "http://purl.obolibrary.org/obo/UBERON_" + }, + { + "id": "loinc", + "name": "Logical Observation Identifiers Names and Codes", + "url": "https://loinc.org", + "version": "VERSION", + "namespacePrefix": "LOINC", + "iriPrefix": "https://loinc.org" + }, + { + "id": "drugcentral", + "name": "Drug Central", + "url": "https://drugcentral.org/", + "version": "VERSION", + "namespacePrefix": "DrugCentral", + "iriPrefix": "https://drugcentral.org/drugcard" + }, + { + "id": "ucum", + "name": "Unified Code for Units of Measure", + "url": "https://ucum.org", + "version": "2.1", + "namespacePrefix": "UCUM", + "iriPrefix": "https://ucum.org/" + }], "phenopacketSchemaVersion": "2.0.0" } } \ No newline at end of file From 1d00a3beb7bbeca6f614bf66245b6ebd232e1de0 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 23 Nov 2022 16:46:01 -0500 Subject: [PATCH 20/38] Finalize validation user guide. Signed-off-by: Daniel Danis --- docs/tutorial_examples.rst | 2 + docs/validation.rst | 433 ++++++++++++++---- .../pom.xml | 12 + .../JsonSchemaValidationWorkflowRunner.java | 4 +- ...sonSchemaValidationWorkflowRunnerTest.java | 155 ++++++- 5 files changed, 520 insertions(+), 86 deletions(-) diff --git a/docs/tutorial_examples.rst b/docs/tutorial_examples.rst index 8a0637ab..e930763f 100644 --- a/docs/tutorial_examples.rst +++ b/docs/tutorial_examples.rst @@ -51,6 +51,8 @@ the validation that **any phenopacket must pass**. missing-resources-valid.json, A valid version of the above phenopacket with the `Resource` for describing `NCBITaxon`. +.. _rstcustomjsonschematutorialexample: + ``custom-json-schema`` - validate custom requirements ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/validation.rst b/docs/validation.rst index b9b5ac62..404ba3f9 100644 --- a/docs/validation.rst +++ b/docs/validation.rst @@ -8,137 +8,395 @@ Validating Phenopackets Phenopackets schema uses protobuf, an exchange format developed in 2008 by Google. We refer readers to the excellent `Wikipedia page `_ on Protobuf and to `Google’s documentation `_ for details. -In Protobuf (version 3, which is what the Phenopacket Schema uses), all fields are optional. -However, the Phenopacket Schema defines certain fields to be optional +In Protobuf (version 3, which is what the Phenopacket Schema uses), all fields are *optional*. +However, the Phenopacket Schema defines certain fields as *required*. (See `documentation `_ for details). Moreover, projects and consortia can require application of specific constraints and requirements for the phenopackets. -*Phenopacket-tools* provides a functionality for validating phenopackets. +*Phenopacket-tools* provides an extensible API for validation of all schema components, +including a model of validation workflow and validation results. -This document provides a comprehensive description of the functionality of the *off-the-shelf* validators -as well as the validation workflow API. +This document outlines the validation workflow API and demonstrates how to use the off-the-shelf validators present +in the *phenopacket-tools* library. Validation workflow ^^^^^^^^^^^^^^^^^^^ -*Phenopacket-tools* defines an API for phenopacket validation workflow. The workflow is consists of -a list of validation steps. There are two types of steps: *syntax* and *semantic*. The syntax steps check syntax -and cardinality of each component separately. The semantic validators are run after syntax checks and validate -the components in the context of the entire phenopacket. +The validation workflow consists of a list of steps. The workflow includes a mandatory *base* validation step that +validates syntax and cardinality of each component, to verify the basic requirements of Phenopacket Schema, +such as presence of identifier fields and metadata. The base validation is implemented using JSON schema +and Java code (``MetaDataValidator``). + +The workflow can be extended by any number of validation steps for checking specific logical or semantic requirements. +*Phenopacket-tools* offers an API for the validation steps to allow encoding custom validation logic as well +as several off-the-shelf validators. + +The central element of the validation API is ``PhenopacketValidator`` that represents +a single validation step. The validator is generic over ``T`` where ``T`` must be a top-level element +of the Phenopacket Schema: ``Phenopacket``, ``Family``, or ``Cohort``. +The validator is identified by ``ValidationInfo`` with the name, type and description of the validation functionality. +The validation reports any errors as ``ValidationResult`` objects, one result per error. +The execution of the workflow is orchestrated by the ``ValidationWorkflowRunner``. The runner applies the validators +in the correct order, ensuring that the base validation is done first, and gathers the results into +a ``ValidationResults`` container. The container represents the results of the validation as immutable value objects, +``ValidatorInfo``, ``ValidationResult``, suitable for reporting back to the user. -There is one mandatory syntax validation step that is always run first: the *base* validation. The base validation -ensures the phenopacket message meets the requirements of the Phenopacket Schema. +.. _rstbasevalidation: -The results of the validation are aggregated into a container object that consists -of immutable value objects that describe the performed validations and the validation results suitable -for reporting back to the user. +Base validation workflow +~~~~~~~~~~~~~~~~~~~~~~~~ -.. Additional constraints and requirements may be made for phenopackets that are used in a specific - project or for a specific collaboration or consortium. For instance, a rare-disease consortium - may require that all phenotypic features be recorded using valid HPO terms. An example class is - provided that checks all ``PhenotypicFeature`` elements, ensures that they use HPO terms with valid - (i.e., primary) id's, and checks whether both a term and an ancestor of the term are used - if so - a warning is emitted, because an annotation with a specific HPO term - (e.g., `Perimembranous ventricular septal defect `_) - implies all of the ancestors of the term (e.g., a patient with perimembranous VSD by necessity also has - `Ventricular septal defect `_). +Let's demonstrate setting up of the base validation workflow for phenopacket validation. -**API** +.. + The code below is at + org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunnerTest.DocumentationTest.simpleValidationWorkflowRunner -See the ``TODO - add JavaDoc link`` for the API documentation. +.. code-block:: java -.. TODO - refer to org.phenopackets.phenopackettools.validator.jsonschema module -.. Describe validation workflow in general + ValidationWorkflowRunner runner = JsonSchemaValidationWorkflowRunner.phenopacketBuilder() + .build(); -*Off-the-shelf* validators -^^^^^^^^^^^^^^^^^^^^^^^^^^ +The `JsonSchemaValidationWorkflowRunner` provides factory methods for getting a workflow runner builder +for phenopacket, family, and cohort. The validation workflow can be extended by calling builder methods. +However, in this case we are only interested in the base validation, hence we conclude the building +by calling the `build()` method. -.. TODO - continue +As a convenience, the `runner` can validate phenopacket in several different input types: -TODO - describe *off-the-shelf* validators in great detail. +.. code-block:: java -.. _rstbasevalidation: + // A path + Path path = Path.of("bethlem-myopathy.json"); + ValidationResults results = runner.validate(path); -Base validation -~~~~~~~~~~~~~~~ + // An input stream + try (InputStream is = new FileInputStream(path.toFile())) { + results = runner.validate(is); + } -All phenopackets should be tested against the base JSON Schema (analogously for all ``Family`` and ``Cohort`` messages). -In code, this can be implemented as follows. + // A byte array + try (InputStream is = new FileInputStream(path.toFile())) { + results = runner.validate(is.readAllBytes()); + } + + // A JSON/YAML string + try (BufferedReader reader = Files.newBufferedReader(path)) { + String jsonString = reader.lines().collect(Collectors.joining(System.lineSeparator())); + results = runner.validate(jsonString); + } + + // Or a phenopacket. + PhenopacketParser parser = PhenopacketParserFactory.getInstance() + .forFormat(PhenopacketSchemaVersion.V2); + Phenopacket phenopacket = (Phenopacket) parser.parse(path); + results = runner.validate(phenopacket); + +``JsonSchemaValidationWorkflowRunner`` provides static factory method for getting builders for all top-level elements +of Phenopacket Schema: + +.. + The code below is at + org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunnerTest.DocumentationTest.availableBuilders .. code-block:: java - JsonSchemaValidator validator = PhenopacketWorkflowRunnerBuilder.getBaseRequirementsValidator(); - Path phenopacketPath = ...; // get Path to a JSON file representing a GA4GH phenopacket + ValidationWorkflowRunner phenopacket = JsonSchemaValidationWorkflowRunner.phenopacketBuilder() + .build(); - try (InputStream is = Files.newInputStream(phenopacketPath)) { - ValidationResults results = validator.validate(is); - List validationResults = results.validationResults(); - if (validationResults.isEmpty()) { - System.out.printf("%s - OK%n", fileName); - } else { - for (ValidationResult result : validationResults) { - System.out.printf("%s [%s] - %s: %s%n", fileName, result.level(), result.category(), result.message()); - } - } - } catch (IOException e) - System.out.println("Error opening the phenopacket: " + e); - } + ValidationWorkflowRunner family = JsonSchemaValidationWorkflowRunner.familyBuilder() + .build(); -**API** + ValidationWorkflowRunner cohort = JsonSchemaValidationWorkflowRunner.cohortBuilder() + .build(); -See the ``TODO - add JavaDoc link`` for the API documentation. -.. TODO - refer to ... and to org/phenopackets/phenopackettools/validator/core/metadata +To validation workflow can be introspected by calling `validators()` method: -.. _rstphenotypevalidation: +.. + The code below is at + org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunnerTest.DocumentationTest.workflowIntrospection -Phenotype validation -~~~~~~~~~~~~~~~~~~~~ +.. code-block:: java + + ValidationWorkflowRunner runner = JsonSchemaValidationWorkflowRunner.phenopacketBuilder() + .build(); + + List validators = runner.validators(); + +`ValidationResults` +~~~~~~~~~~~~~~~~~~~ + +The validation returns `ValidationResults`, a container object that aggregates issues discovered by all validators +of the workflow. + +.. + The code below is at + org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunnerTest.DocumentationTest.validationResults + +.. code-block:: java + + ValidationResults results = runner.validate(path); + +The `results.isValid()` returns `true` if no validation errors were discovered: + +.. code-block:: java + + assert results.isValid(); + +The `results` has information regarding the applied validation checks: + +.. code-block:: java + + List validators = results.validators(); + +and the discovered issues (if any): + +.. code-block:: java + + List issues = results.validationResults(); + + +`ValidationResult` +~~~~~~~~~~~~~~~~~~ + +`ValidationResult` represents a single validation issue. The object is a simple POJO/value object with several attributes: + +.. + The code below is at + org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunnerTest.DocumentationTest.validationResults + +.. code-block:: java + + ValidationResult issue = issues.get(0); + + // The validator that pointed out the issue. + ValidatorInfo validatorInfo = issue.validatorInfo(); + + // The issue severity (warning or error). + ValidationLevel level = issue.level(); + + // Category of the issue, useful for grouping the issues. + // One validator can produce issues with different categories. + String category = issue.category(); -TODO - write -.. TODO - continue + // A message targeted for the user. + String message = issue.message(); -**API** -See the ``TODO - add JavaDoc link`` for the API documentation. +The API documentation of the core validation API can be found in the +`org.phenopackets.phenopackettools.validator.core `_ +module. + + +.. _rstofftheshelfvalidation: + +*Off-the-shelf* validators +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Additional constraints and requirements may be made for phenopackets that are used in a specific +project or for a specific collaboration or consortium. + +For instance, one may want to require that phenopackets made for rare-disease diagnostics include age of the proband +and use Human Phenotype Ontology (HPO) terms to represent phenotypic features. Additionally, one may want +to enforce requirements that are difficult to encode using JSON Schema, such as that only a valid term id is used +(currently, the HPO has over 16,000 terms), or that the phenopacket does not encode both a term +and a parent or ancestor of the term, or that the phenopacket annotates or excludes abnormality +in selected organ systems. + + +Here we use a bunch of *off-the-shelf* validators from *phenopacket-tools* to show a validation workflow for checking +the above requirements. Let's start by creating a validation workflow runner builder: + +.. code-block:: java + + JsonSchemaValidationWorkflowRunnerBuilder builder = JsonSchemaValidationWorkflowRunner.phenopacketBuilder(); -.. TODO - refer to org/phenopackets/phenopackettools/validator/core/phenotype .. _rstcustomvalidation: Custom validation ~~~~~~~~~~~~~~~~~ -TODO - write -.. TODO - continue +*Phenopacket-tools* offers a validator for enforcing custom requirements encoded as JSON schema. +In the context of the example above, a project may require including age of the proband +and use HPO terms to represent phenotypic features, as well as presence of a certain number +of phenotypic features. + +Let's write a JSON schema document ``hpo-rare-disease-schema.json`` to enforce the requirements. + +Custom JSON schema header +######################### + +To use a JSON schema with *phenopacket-tools*, the schema header must include the following elements:: + + { + "$schema": "https://json-schema.org/draft/2019-09/schema", + "$id": "https://example.com/hpo-rare-disease-validator", + "title": "HPO Rare Disease Phenopacket Schema", + "description": "An example JSON schema for validating a phenopacket in context of the rare-disease research", + "type": "object" + } + +The elements are used for the following purpose: + +* `$schema` - *phenopacket-tools* uses draft `2019-09` JSON schema specification. The `$schema` element MUST be present + in the document and it MUST use the `2019-09` specification. +* `$id` - identifier of the schema, used at `validator.validatorInfo().validatorId()`. +* `title` - name of the schema for human consumption, used in `validator.validatorInfo().validatorName()`. +* `description` - a short description of the validation check, used in `validator.validatorInfo().description()`. +* `type` - JSON schema and JSON allows several data types, e.g. `number`, `string`, `object`, `boolean`, etc. + Here we are writing a JSON schema for validating a phenopacket, family, or cohort. Therefore, the `type` + must be an `object`. + +Custom JSON schema body +####################### + +Now, let's constrain the schema to first check that the time at last encounter is specified in the phenopacket subject. +To encode this in JSON schema, we require presence of a `subject` property in phenopacket. +The snippet follows the JSON schema header and instructs the JSON schema validator to check presence of +the `subject` field. The `subject` is an `object` and we can add a `description` to self-document the schema. +Phenopacket Schema does not require presence of the `subject`. However, here we make `subject` a required field, +hence presence of the `required` clause:: + + { + // The header ... + "properties": { + "subject": { + "type": "object", + "description": "The subject element is required for a rare-disease Phenopacket" + } + }, + "required": [ "subject" ] + } + + +Next, we add a constraint to the `subject` field to require presence of the time at last encounter. +We do this by requiring `timeAtLastEncounter` in the `subject`:: + + { + // The header ... + "properties": { + "subject": { + "type": "object", + "description": "The subject element is required for a rare-disease Phenopacket", + "properties": { + "timeAtLastEncounter": { + "type": "object", + "description": "The time at last encounter is required for a rare-disease phenopacket" + } + }, + "required": [ "timeAtLastEncounter" ] + } + }, + "required": [ "subject" ] + } + +We can encode additional checks using `JSON schema `_ syntax. +See the JSON schema document described in :ref:`rstcustomjsonschematutorialexample` section for additional examples. + + +Use custom JSON schema +###################### + +*Phenopacket-tools* validation API supports including custom JSON schema in the validation workflow. +The custom schema can be added into the `builder` created in the previous section (:ref:`rstofftheshelfvalidation`) +by: + +.. code-block:: + + Path customSchema = Path.of("hpo-rare-disease-schema.json"); + builder.addJsonSchema(customSchema); + +The `addJsonSchema()` adds a step for using the JSON schema in the validation workflow. -**API** +.. _rstphenotypevalidation: -See the ``TODO - add JavaDoc link`` for the API documentation. +Phenotype validators +~~~~~~~~~~~~~~~~~~~~ -.. TODO - refer to TODO - somewhere in JSON-schema validation +The validation we discussed until now was fairly simple; it included checking presence, absence, or formatting +of Phenopacket Schema components. However, sometimes we may need to check relationships between individual components. +For instance, in the context of rare-disease research and diagnostics, we may want to check if all phenotypic features +are encoded using valid HPO terms and if the phenotypic annotations are logically consistent. For instance, using both a term +(e.g. `Perimembranous ventricular septal defect `_) and +its ancestor (e.g. `Ventricular septal defect `_ ) is a logical error, +because an annotation with a specific HPO term (e.g., a patient with perimembranous VSD by necessity also has VSD). -.. _rstorgsysvalidation: +In this section, we describe validators that use HPO file to perform several checks that can be useful in many contexts. +The API documentation and the corresponding Java classes can be found in +`org.phenopackets.phenopackettools.validator.core.phenotype `_ +package. -Organ system validation -~~~~~~~~~~~~~~~~~~~~~~~ -TODO - write -.. TODO - continue +.. _rstprimaryphenotypevalidation: + +Primary validation +################## + +The `HpoPhenotypeValidator` checks if the HPO terms used by a phenopacket are *valid* - well-formatted and present +in the given HPO file, and *current* - not obsolete. If an obsolete term is found, the validator suggests a replacement +with the current term. + +In code, we add the primary validation into the validation workflow by running: + +.. code-block:: java + + Ontology hpo = OntologyLoader.loadOntology(new File("hp.json")); + PhenopacketValidator primary = HpoPhenotypeValidators.Primary.phenopacketHpoPhenotypeValidator(hpo); + builder.addValidator(primary); + +The validator requires an HPO `Ontology` object. We use `Phenol `_ +library to parse the HPO JSON file. The `OntologyLoader` is part of +`phenol-io `_ module, you may need to add +an appropriate dependency into your build file. + + +.. _rstancestryphenotypevalidation: + +Ancestry validation +################### + +The `HpoAncestryValidator` checks if the HPO terms are logically consistent; the phenotype features do not include both +a term and its ancestor. -We can validate presence of annotation for specific organ systems in a phenopacket. +Apart from a mere id of the phenotype feature, Phenopacket Schema also models observation status of a feature. +A feature can be either present/observed, or absent/excluded in the subject. The ancestry validator takes +the observation status into the account, which leads to several possible outcomes regarding validity of a term combination: -As an example, we work with toy phenopackets that represent patients with -`Marfan syndrome `_. Due to the nature of the Marfan syndrome, -we may require annotation of three organ systems: +.. csv-table:: + :header: "`Tonic seizure `_ (Term)", "`Seizure `_ (Ancestor)", "Is valid", "Explanation" + + observed, observed, No, "*Tonic seizure* is a type of *Seizure*. Use the *most* specific term (Tonic seizure)." + excluded, excluded, No, "Absence of a *Seizure* implies absence of the *Tonic seizure*. Use the *less* specific term (Seizure)." + observed, excluded, No, "Absence of a *Seizure* implies absence of the *Tonic seizure*. Keep one of the terms depending on the case context." + excluded, observed, Yes, "A valid phenotype term combination. A subject can be annotated with a term and having a sub-type excluded at the same time." + + +Using ancestry validator in the validator workflow is fairly straightforward if we can get ahold of a HPO `Ontology` object: + +.. code-block:: java + + PhenopacketValidator ancestry = HpoPhenotypeValidators.Ancestry.phenopacketHpoAncestryValidator(hpo); + builder.addValidator(ancestry); + + +.. _rstorgsysvalidation: + +Organ system validation +####################### + +In some cases it may be desirable to ensure presence of annotation for specific organ systems. +For instance, phenopackets that represent patients with +`Marfan syndrome `_ may require annotation of three organ systems: * Eye * Cardiovascular system * Respiratory system -The annotation is done either by *excluding* the corresponding top-level HPO term or by adding a descendent term: +To annotate organ system, we either *exclude* the corresponding top-level HPO term or by adding a descendent term: .. list-table:: :header-rows: 1 @@ -156,10 +414,25 @@ The annotation is done either by *excluding* the corresponding top-level HPO ter - `Abnormality of the respiratory system `_ - `Pneumothorax `_ -**API** +The `HpoOrganSystemValidator` requires HPO `Ontology` and a list of top-level HPO terms: + +.. code-block:: java + + List organSystemIds = List.of(HpoOrganSystems.EYE, HpoOrganSystems.CARDIOVASCULAR, HpoOrganSystems.RESPIRATORY); + PhenopacketValidator organSystem = HpoPhenotypeValidators.OrganSystem.phenopacketHpoOrganSystemValidator(hpo, organSystemIds); + builder.addValidator(organSystem); -See the ``TODO - add JavaDoc link`` for the API documentation. +*Phenopacket-tools* includes a convenience class `HpoOrganSystems` with IDs of the commonly-used top-level HPO terms. +However, any valid term ID can be used for the organ system validation. +For instance, ``List.of(TermId.of("HP:0001250"))`` validates annotation +of a `Seizure (HP:0001250) `_. -.. TODO - refer to org/phenopackets/phenopackettools/validator/core/phenotype/orgsys +See also +^^^^^^^^ +Check out the +`org.phenopackets.phenopackettools.validator.core `_ +and +`org.phenopackets.phenopackettools.validator.jsonschema `_ +modules for more information regarding the public validation API. diff --git a/phenopacket-tools-validator-jsonschema/pom.xml b/phenopacket-tools-validator-jsonschema/pom.xml index e6ada963..8e8660db 100644 --- a/phenopacket-tools-validator-jsonschema/pom.xml +++ b/phenopacket-tools-validator-jsonschema/pom.xml @@ -52,6 +52,18 @@ com.fasterxml.jackson.core jackson-databind + + + org.phenopackets.phenopackettools + phenopacket-tools-io + ${project.parent.version} + test + + + org.monarchinitiative.phenol + phenol-io + test + diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java index 6a2b23a1..be5c8aa7 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java +++ b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java @@ -83,15 +83,17 @@ public static JsonSchemaValidationWorkflowRunnerBuilder cohortB this.metadataValidator = Objects.requireNonNull(metadataValidator); this.requirementValidators = Objects.requireNonNull(requirementValidators); this.validators = Objects.requireNonNull(validators); - this.validatorInfos = summarizeValidatorInfos(baseValidator, requirementValidators, validators); + this.validatorInfos = summarizeValidatorInfos(baseValidator, metadataValidator, requirementValidators, validators); } private static List summarizeValidatorInfos(JsonSchemaValidator base, + PhenopacketValidator metadataValidator, Collection requirements, Collection> validators) { List infos = new ArrayList<>(); infos.add(base.validatorInfo()); + infos.add(metadataValidator.validatorInfo()); for (JsonSchemaValidator validator : requirements) { infos.add(validator.validatorInfo()); } diff --git a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java index b3e2271b..4a43788d 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java +++ b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java @@ -4,23 +4,32 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.protobuf.MessageOrBuilder; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; -import org.phenopackets.phenopackettools.validator.core.ValidationResult; -import org.phenopackets.phenopackettools.validator.core.ValidationResults; -import org.phenopackets.phenopackettools.validator.core.ValidationWorkflowRunner; +import org.monarchinitiative.phenol.io.OntologyLoader; +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; +import org.phenopackets.phenopackettools.io.PhenopacketParser; +import org.phenopackets.phenopackettools.io.PhenopacketParserFactory; +import org.phenopackets.phenopackettools.validator.core.*; +import org.phenopackets.phenopackettools.validator.core.phenotype.HpoOrganSystems; +import org.phenopackets.phenopackettools.validator.core.phenotype.HpoPhenotypeValidators; import org.phenopackets.schema.v2.CohortOrBuilder; import org.phenopackets.schema.v2.FamilyOrBuilder; +import org.phenopackets.schema.v2.Phenopacket; import org.phenopackets.schema.v2.PhenopacketOrBuilder; -import java.io.IOException; -import java.io.InputStream; +import java.io.*; import java.nio.file.Files; import java.nio.file.Path; import java.util.Arrays; import java.util.Collection; +import java.util.List; +import java.util.stream.Collectors; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.*; @@ -30,6 +39,31 @@ public class JsonSchemaValidationWorkflowRunnerTest { private static final ObjectMapper MAPPER = new ObjectMapper(); private static final JsonTamperer TAMPERER = new JsonTamperer(); + /** + * Tests that all validation workflow runners (phenopacket, family, and cohort) must pass. + */ + @Nested + public class GeneralTest { + + @Test + public void baseValidationWorkflowHasExpectedValidators() { + JsonSchemaValidationWorkflowRunner phenopacket = JsonSchemaValidationWorkflowRunner.phenopacketBuilder() + .build(); + List actual = phenopacket.validators().stream().map(ValidatorInfo::validatorId).toList(); + assertThat(actual, hasItems("BaseValidator", "MetaDataValidator")); + + JsonSchemaValidationWorkflowRunner family = JsonSchemaValidationWorkflowRunner.familyBuilder() + .build(); + actual = family.validators().stream().map(ValidatorInfo::validatorId).toList(); + assertThat(actual, hasItems("BaseValidator", "MetaDataValidator")); + + JsonSchemaValidationWorkflowRunner cohort = JsonSchemaValidationWorkflowRunner.cohortBuilder() + .build(); + actual = cohort.validators().stream().map(ValidatorInfo::validatorId).toList(); + assertThat(actual, hasItems("BaseValidator", "MetaDataValidator")); + } + } + /** * Check required and recommended phenopacket fields. */ @@ -513,6 +547,117 @@ private static JsonNode readExampleCohortNode() { } + /** + * The tests that are part of the user guide. Ensure that the user guide is updated if the tests do not compile. + * The tests do not need to be run, just to compile (hence @Disabled). + */ + @Nested + @Disabled + public class DocumentationTest { + + @Test + public void baseValidationWorkflowRunner() throws Exception { + // Prepare the runner + ValidationWorkflowRunner runner = JsonSchemaValidationWorkflowRunner.phenopacketBuilder() + .build(); + + // A path + Path path = Path.of("bethlem-myopathy.json"); + ValidationResults results = runner.validate(path); + + // An input stream + try (InputStream is = new FileInputStream(path.toFile())) { + results = runner.validate(is); + } + + // A byte array + try (InputStream is = new FileInputStream(path.toFile())) { + results = runner.validate(is.readAllBytes()); + } + + // A JSON or YAML string + try (BufferedReader reader = Files.newBufferedReader(path)) { + String jsonString = reader.lines().collect(Collectors.joining(System.lineSeparator())); + results = runner.validate(jsonString); + } + + // Or a phenopacket. + PhenopacketParser parser = PhenopacketParserFactory.getInstance() + .forFormat(PhenopacketSchemaVersion.V2); + Phenopacket phenopacket = (Phenopacket) parser.parse(path); + results = runner.validate(phenopacket); + } + + @Test + public void availableBuilders() { + ValidationWorkflowRunner phenopacket = JsonSchemaValidationWorkflowRunner.phenopacketBuilder() + .build(); + + ValidationWorkflowRunner family = JsonSchemaValidationWorkflowRunner.familyBuilder() + .build(); + + ValidationWorkflowRunner cohort = JsonSchemaValidationWorkflowRunner.cohortBuilder() + .build(); + } + + @Test + public void workflowIntrospection() { + ValidationWorkflowRunner runner = JsonSchemaValidationWorkflowRunner.phenopacketBuilder() + .build(); + + List validators = runner.validators(); + } + + @Test + public void validationResults() throws Exception { + // Prepare the runner + ValidationWorkflowRunner runner = JsonSchemaValidationWorkflowRunner.phenopacketBuilder() + .build(); + + // A path + Path path = Path.of("bethlem-myopathy.json"); + ValidationResults results = runner.validate(path); + + assert results.isValid(); + List validators = results.validators(); + List issues = results.validationResults(); + + ValidationResult issue = issues.get(0); + + // The validator that pointed out the issue. + ValidatorInfo validatorInfo = issue.validatorInfo(); + + // The issue severity (warning or error). + ValidationLevel level = issue.level(); + + // Category of the issue, useful for grouping the issues. + // One validator can produce issues with different categories. + String category = issue.category(); + + // A message targeted for the user. + String message = issue.message(); + } + + @Test + public void elaborateExample() throws Exception { + JsonSchemaValidationWorkflowRunnerBuilder builder = JsonSchemaValidationWorkflowRunner.phenopacketBuilder(); + + Path customSchema = Path.of("hpo-rare-disease-schema.json"); + builder.addJsonSchema(customSchema); + + Ontology hpo = OntologyLoader.loadOntology(new File("hp.json")); + PhenopacketValidator primary = HpoPhenotypeValidators.Primary.phenopacketHpoPhenotypeValidator(hpo); + builder.addValidator(primary); + + PhenopacketValidator ancestry = HpoPhenotypeValidators.Ancestry.phenopacketHpoAncestryValidator(hpo); + builder.addValidator(ancestry); + + List organSystemIds = List.of(HpoOrganSystems.EYE, HpoOrganSystems.CARDIOVASCULAR, HpoOrganSystems.RESPIRATORY); + PhenopacketValidator organSystem = HpoPhenotypeValidators.OrganSystem.phenopacketHpoOrganSystemValidator(hpo, organSystemIds); + builder.addValidator(organSystem); + } + } + /** * In principle, we do the same kind of testing in all parameterized tests. * We use the {@link #TAMPERER} to tamper with a {@code node}, performing a certain {@code action} on a {@code path} From 3aa087af748985e751b2b11e41dce40de0691735 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 23 Nov 2022 16:52:53 -0500 Subject: [PATCH 21/38] Check count of validators in the base validation. Signed-off-by: Daniel Danis --- .../JsonSchemaValidationWorkflowRunnerTest.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java index 4a43788d..b6e45e08 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java +++ b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java @@ -47,20 +47,24 @@ public class GeneralTest { @Test public void baseValidationWorkflowHasExpectedValidators() { + String[] expected = {"BaseValidator", "MetaDataValidator"}; JsonSchemaValidationWorkflowRunner phenopacket = JsonSchemaValidationWorkflowRunner.phenopacketBuilder() .build(); List actual = phenopacket.validators().stream().map(ValidatorInfo::validatorId).toList(); - assertThat(actual, hasItems("BaseValidator", "MetaDataValidator")); + assertThat(actual, hasItems(expected)); + assertThat(actual, hasSize(2)); JsonSchemaValidationWorkflowRunner family = JsonSchemaValidationWorkflowRunner.familyBuilder() .build(); actual = family.validators().stream().map(ValidatorInfo::validatorId).toList(); - assertThat(actual, hasItems("BaseValidator", "MetaDataValidator")); + assertThat(actual, hasItems(expected)); + assertThat(actual, hasSize(2)); JsonSchemaValidationWorkflowRunner cohort = JsonSchemaValidationWorkflowRunner.cohortBuilder() .build(); actual = cohort.validators().stream().map(ValidatorInfo::validatorId).toList(); - assertThat(actual, hasItems("BaseValidator", "MetaDataValidator")); + assertThat(actual, hasItems(expected)); + assertThat(actual, hasSize(2)); } } From ef45594af588fd39fffdd13fc70eac412c910f7f Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 23 Nov 2022 16:57:00 -0500 Subject: [PATCH 22/38] Do not use NCT resource. Signed-off-by: Daniel Danis --- .../phenopackettools/builder/builders/Resources.java | 10 ---------- .../validator/jsonschema/bethlem-myopathy.json | 12 ++---------- .../validator/jsonschema/example-cohort.json | 12 ++---------- .../validator/jsonschema/example-family.json | 12 ++---------- 4 files changed, 6 insertions(+), 40 deletions(-) diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java index 0e02a7cb..15b1c89d 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java @@ -129,13 +129,6 @@ private Resources() { .setUrl("https://www.omim.org") .setIriPrefix("https://www.omim.org/entry/"); - private static final Resource.Builder NCT_BUILDER = Resource.newBuilder() - .setId("nct") - .setName("ClinicalTrials.gov") - .setNamespacePrefix("NCT") - .setUrl("https://clinicaltrials.gov") - .setIriPrefix("https://clinicaltrials.gov/show/NCT"); - public static Resource hgncVersion(String version) { return HGNC_BUILDER.setVersion(version).build(); } public static Resource hpoVersion(String version) { @@ -206,7 +199,4 @@ public static Resource omimVersion(String version) { return OMIM_BUILDER.setVersion(version).build(); } - public static Resource nctVersion(String version) { - return NCT_BUILDER.setVersion(version).build(); - } } diff --git a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/bethlem-myopathy.json b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/bethlem-myopathy.json index 14bf5d61..4f822a89 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/bethlem-myopathy.json +++ b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/bethlem-myopathy.json @@ -566,8 +566,8 @@ { "therapeuticRegimen": { "ontologyClass": { - "id": "NCT:04576091", - "label": "Testing the Addition of an Anti-cancer Drug, BAY 1895344, With Radiation Therapy to the Usual Pembrolizumab Treatment for Recurrent Head and Neck Cancer" + "id": "NCIT:C10894", + "label": "Carboplatin/Etoposide/Vincristine" }, "startTime": { "age": { @@ -702,14 +702,6 @@ "version": "2.1", "namespacePrefix": "UCUM", "iriPrefix": "https://ucum.org/" - }, - { - "id": "nct", - "name": "ClinicalTrials.gov", - "url": "https://clinicaltrials.gov", - "version": "VERSION", - "namespacePrefix": "NCT", - "iriPrefix": "https://clinicaltrials.gov/show/NCT" } ], "updates": [ diff --git a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-cohort.json b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-cohort.json index 5f2fcbd7..2be00814 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-cohort.json +++ b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-cohort.json @@ -566,8 +566,8 @@ { "therapeuticRegimen": { "ontologyClass": { - "id": "NCT:04576091", - "label": "Testing the Addition of an Anti-cancer Drug, BAY 1895344, With Radiation Therapy to the Usual Pembrolizumab Treatment for Recurrent Head and Neck Cancer" + "id": "NCIT:C10894", + "label": "Carboplatin/Etoposide/Vincristine" }, "startTime": { "age": { @@ -748,14 +748,6 @@ "version": "2.1", "namespacePrefix": "UCUM", "iriPrefix": "https://ucum.org/" - }, - { - "id": "nct", - "name": "ClinicalTrials.gov", - "url": "https://clinicaltrials.gov", - "version": "VERSION", - "namespacePrefix": "NCT", - "iriPrefix": "https://clinicaltrials.gov/show/NCT" } ], "phenopacketSchemaVersion": "2.0", diff --git a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-family.json b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-family.json index 7a260067..7b87cbad 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-family.json +++ b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-family.json @@ -571,8 +571,8 @@ { "therapeuticRegimen": { "ontologyClass": { - "id": "NCT:04576091", - "label": "Testing the Addition of an Anti-cancer Drug, BAY 1895344, With Radiation Therapy to the Usual Pembrolizumab Treatment for Recurrent Head and Neck Cancer" + "id": "NCIT:C10894", + "label": "Carboplatin/Etoposide/Vincristine" }, "startTime": { "age": { @@ -778,14 +778,6 @@ "version": "2.1", "namespacePrefix": "UCUM", "iriPrefix": "https://ucum.org/" - }, - { - "id": "nct", - "name": "ClinicalTrials.gov", - "url": "https://clinicaltrials.gov", - "version": "VERSION", - "namespacePrefix": "NCT", - "iriPrefix": "https://clinicaltrials.gov/show/NCT" } ], "phenopacketSchemaVersion": "2.0", From 7cc02563955a697e597573c931d64c51e16c4b67 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 23 Nov 2022 17:12:42 -0500 Subject: [PATCH 23/38] Reorder sections, update the `creating` and `constants` guide. Signed-off-by: Daniel Danis --- constants/create_rtd.py | 17 +++++++++++------ docs/constants.rst | 11 ++++++++--- docs/creating.rst | 24 +++++++++++++++++++----- docs/index.rst | 4 ++-- 4 files changed, 40 insertions(+), 16 deletions(-) diff --git a/constants/create_rtd.py b/constants/create_rtd.py index d5620605..c75f155a 100644 --- a/constants/create_rtd.py +++ b/constants/create_rtd.py @@ -60,12 +60,17 @@ def create_csv_table(entry, fh): RTD_HEADER = """.. _rstconstants: +.. + DO NOT EDIT MANUALLY! + The document is generated by the `phenopacket-tools/constants/create_rtd.py` script. + ========= Constants ========= -The phenopacket-tools library offers a selection of recommended and predefined OntologyClass objects for commonly used concepts. -For instance, this is the code one would need to write using the native Protobuf frameworkto get an OntologyClass instance that represents the modifier ``Left``. +The *phenopacket-tools* library offers a selection of recommended and predefined `OntologyClass` objects +for commonly used concepts. For instance, this is the code one would need to write using the native Protobuf framework +to get an `OntologyClass` instance that represents the modifier ``Left``: .. code-block:: java @@ -75,7 +80,7 @@ def create_csv_table(entry, fh): .build(); -In contrast, this is the code required with phenopacket-tools (omitting import statements in both cases) +In contrast, this is the code required with *phenopacket-tools* (omitting import statements in both cases): .. code-block:: java @@ -87,6 +92,6 @@ def create_csv_table(entry, fh): """ with open(RTD_PATH, "wt") as fh: - fh.write(RTD_HEADER) - for e in entries: - create_csv_table(e, fh=fh) + fh.write(RTD_HEADER) + for e in entries: + create_csv_table(e, fh=fh) diff --git a/docs/constants.rst b/docs/constants.rst index f2b11cce..ce39c126 100644 --- a/docs/constants.rst +++ b/docs/constants.rst @@ -1,11 +1,16 @@ .. _rstconstants: +.. + DO NOT EDIT MANUALLY! + The document is generated by the `phenopacket-tools/constants/create_rtd.py` script. + ========= Constants ========= -The phenopacket-tools library offers a selection of recommended and predefined OntologyClass objects for commonly used concepts. -For instance, this is the code one would need to write using the native Protobuf frameworkto get an OntologyClass instance that represents the modifier ``Left``. +The *phenopacket-tools* library offers a selection of recommended and predefined `OntologyClass` objects +for commonly used concepts. For instance, this is the code one would need to write using the native Protobuf framework +to get an `OntologyClass` instance that represents the modifier ``Left``: .. code-block:: java @@ -15,7 +20,7 @@ For instance, this is the code one would need to write using the native Protobuf .build(); -In contrast, this is the code required with phenopacket-tools (omitting import statements in both cases) +In contrast, this is the code required with *phenopacket-tools* (omitting import statements in both cases): .. code-block:: java diff --git a/docs/creating.rst b/docs/creating.rst index 599d90e7..918acb6d 100644 --- a/docs/creating.rst +++ b/docs/creating.rst @@ -6,11 +6,15 @@ Creating Phenopackets ===================== Google's `Protocol Buffer (protobuf)`_ framework automatically generates -Java code for building and working with Phenopackets. However, the code can be unwieldy. Additionally, many users -of the phenopacket framework will want to use a recommended set of ontology terms for specific kinds of data, and thus -the *phenopacket-tools* library provides terms and constants that are more convenient to use than manually creating +Java bindings for building and working with Phenopackets. However, the code can be unwieldy. Additionally, many users +of the phenopacket framework will want to use a recommended set of ontology terms for specific kinds of data. +*Phenopacket-tools* library provides terms and constants that are more convenient to use than manually creating the equivalent message. +This section exemplifies usage of `PhenotypicFeatureBuilder`, one of many builders provided by the +`org.phenopackets.phenopackettools.builder `_ +module. + `phenopacket-tools` builder pattern =================================== @@ -80,7 +84,17 @@ Both code snippets generate identical phenopacket code. } } -Several detailed examples are available in the ``phenopackets-tools-cli`` module in the `examples`_ package. -.. _examples: https://github.com/phenopackets/phenopacket-tools/tree/main/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples +See also +======== + +See the API documentation of the +`org.phenopackets.phenopackettools.builder `_ +module for a comprehensive list of ontology constants, convenience methods, and builders provided +by the *phenopacket-tools* library. + +Several detailed examples are available in the ``phenopackets-tools-cli`` module in the +`org.phenopackets.phenopackettools.cli.examples `_ +package. + .. _Protocol Buffer (protobuf): https://developers.google.com/protocol-buffers diff --git a/docs/index.rst b/docs/index.rst index eae02479..a7144954 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -42,10 +42,10 @@ in JVM-based applications. tutorial cli + constants creating - validation converting - constants + validation examples From f83c6af08750d5ad465643a3c378dbe7182861f5 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 23 Nov 2022 17:23:20 -0500 Subject: [PATCH 24/38] Update `converting` docs. Signed-off-by: Daniel Danis --- docs/converting.rst | 44 ++++++++++--------- .../converters/V1ToV2ConverterTest.java | 23 ++++++++++ 2 files changed, 47 insertions(+), 20 deletions(-) diff --git a/docs/converting.rst b/docs/converting.rst index ff9191c7..c1f5842b 100644 --- a/docs/converting.rst +++ b/docs/converting.rst @@ -2,43 +2,47 @@ ========================== -Converting V1 Phenopackets +Converting v1 Phenopackets ========================== -The Phenopacket is a Global Alliance for Genomics and Health (GA4GH) standard for sharing disease and phenotype information. -To build the standard, requirements and specifications were established through a community effort. +The requirements and specifications for building the GA4GH Phenopacket Schema were established incrementally +through a community effort. Version 1 of the GA4GH standard was released in 2019 to elicit feedback from the community. Version 2 was developed on the basis of this feedback and should be used henceforth. Version 2 has many additional fields compared to version 1, but the fields used for reporting phenotype ontology terms are nearly identical and version 1 can easily be converted to version 2. Version 1 had fields for reporting variants but did not specify how the variants related to disease diagnoses that were reported. -The conversion methods provided by the *phenopacket-tools* library does not convert the variants by default. -The variants can be converted under the assumption that only one disease was specified in the ``diseases`` field -of the version 1 phenopacket and that the reported variants are interpreted to be causal for the disease. +The conversion methods provided by the *phenopacket-tools* library do not convert the `Variant`\ s by default. +The variants can be converted under the assumption that only one `Disease` was specified in the ``diseases`` field +of the version 1 phenopacket and that the reported variants are interpreted to be *causal* for the disease. If this is not the case, then users would need to write new code to perform the conversion according to the logic of their application. +Example conversion +^^^^^^^^^^^^^^^^^^ -To use library code for converting a phenopacket, adapt the following. +To use library code for converting a phenopacket, adapt the following code: -.. TODO - point to JavaDoc +.. + The code is at + org.phenopackets.phenopackettools.converter.converters.V1ToV2ConverterTest.DocumentationTest.converterWorks .. code-block:: java - boolean convertVariants = true; // or false, as desired - V1ToV2Converter converter = V1ToV2Converter.of(convertVariants); - Phenopacket v2 = converter.convertPhenopacket(v1Phenopacket); + // Set up the converter + boolean convertVariants = true; // or false, as desired + V1ToV2Converter converter = V1ToV2Converter.of(convertVariants); + // Get v1 phenopacket. + org.phenopackets.schema.v1.Phenopacket v1 = TestData.V1.comprehensivePhenopacket(); -Alternatively, use the ``convert`` command of the command-line interface. -Both of the following commands print output to the standard output. + // Convert to v2 phenopacket. + org.phenopackets.schema.v2.Phenopacket v2 = converter.convertPhenopacket(v1); -.. code-block:: bash +See also +^^^^^^^^ - pfx convert -i ${examples}/convert/Schreckenbach-2014-TPM3-II.2.json - pfx convert --convert-variants -i ${examples}/convert/Schreckenbach-2014-TPM3-II.2.json - -.. note:: - The commands above assume `pfx` is an alias and ``${examples}`` points ot location of examples folder, - both set up in :ref:`rsttutorial`. +The API documentation of the conversion functionality is located in the +`org.phenopackets.phenopackettools.converter `_ +module. diff --git a/phenopacket-tools-converter/src/test/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2ConverterTest.java b/phenopacket-tools-converter/src/test/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2ConverterTest.java index b1140e3b..5e6d8dc8 100644 --- a/phenopacket-tools-converter/src/test/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2ConverterTest.java +++ b/phenopacket-tools-converter/src/test/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2ConverterTest.java @@ -2,6 +2,7 @@ import org.ga4gh.vrsatile.v1.Expression; import org.ga4gh.vrsatile.v1.VariationDescriptor; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; @@ -284,4 +285,26 @@ private static void assertProcedureIsEqual(org.phenopackets.schema.v1.core.Proce public class PhenopacketConverterTest { } + /** + * Tests with code used in the user guide. The tests must compile, otherwise the documentation must be updated. + * However, the tests need not pass, hence Disabled. + */ + @Nested + @Disabled + public class DocumentationTest { + + @Test + public void converterWorks() { + // Set up the converter + boolean convertVariants = true; // or false, as desired + V1ToV2Converter converter = V1ToV2Converter.of(convertVariants); + + // Get v1 phenopacket. + org.phenopackets.schema.v1.Phenopacket v1 = TestData.V1.comprehensivePhenopacket(); + + // Convert to v2 phenopacket. + org.phenopackets.schema.v2.Phenopacket v2 = converter.convertPhenopacket(v1); + } + } + } \ No newline at end of file From 0a84db4409788a3fcedfdffcd9d9483edf5b24df Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 23 Nov 2022 17:24:51 -0500 Subject: [PATCH 25/38] Fix typo in `examples`. Signed-off-by: Daniel Danis --- docs/examples.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/examples.rst b/docs/examples.rst index cef98ed9..ea17439a 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -4,8 +4,8 @@ Phenopackets Examples ===================== -Example YAML files can be viewed for the following: +Example JSON and YAML files can be viewed for the following: * `Phenopackets `_ -* `Familes `_ +* `Families `_ * `Cohorts `_ From 24212e036e166a2ad797291ad1d5ce96ed220d95 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 23 Nov 2022 17:37:33 -0500 Subject: [PATCH 26/38] Use uniform UCUM encoding, preferring dot notation. Fix #131 Signed-off-by: Daniel Danis --- constants/Unit.tsv | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/constants/Unit.tsv b/constants/Unit.tsv index 8f8b0d57..47e1cbbe 100644 --- a/constants/Unit.tsv +++ b/constants/Unit.tsv @@ -2,24 +2,24 @@ ontology.id ontology.label variable.name function.name UCUM:degree degree (plane angle) DEGREE degreeOfAngle UCUM:[diop] diopter DIOPTER diopter UCUM:g gram GRAM gram -UCUM:g/kg gram per kilogram GRAM_PER_KG gramPerKilogram +UCUM:g.kg-1 gram per kilogram GRAM_PER_KG gramPerKilogram UCUM:kg kilogram KILOGRAM kilogram UCUM:L liter LITER liter UCUM:m meter METER meter UCUM:ug microgram MICROGRAM microgram -UCUM:ug/dL microgram per deciliter MICROGRAM_PER_DECILITER microgramPerDeciliter -UCUM:ug/L microgram per liter MICROGRAM_PER_LITER microgramPerLiter +UCUM:ug.dL-1 microgram per deciliter MICROGRAM_PER_DECILITER microgramPerDeciliter +UCUM:ug.L-1 microgram per liter MICROGRAM_PER_LITER microgramPerLiter UCUM:uL microliter MICROLITER microliter UCUM:um micrometer MICROMETER micrometer UCUM:mg milligram MILLIGRAM milligram -UCUM:mg/dL milligram per day MILLIGRAM_PER_DAY milligramPerDay -UCUM:mg/dL milligram per deciliter MILLIGRAM_PER_DL milligramPerDeciliter +UCUM:mg.d-1 milligram per day MILLIGRAM_PER_DAY milligramPerDay +UCUM:mg.dL-1 milligram per deciliter MILLIGRAM_PER_DL milligramPerDeciliter UCUM:mg.kg-1 milligram per kilogram MILLIGRAM_PER_KG mgPerKg UCUM:mL milliliter MILLILITER milliliter UCUM:mm millimeter MILLIMETER millimeter UCUM:mm[Hg] millimetres of mercury MILLIMETRES_OF_MERCURY mmHg UCUM:mmol millimole MILLIMOLE millimole UCUM:mol mole MOLE mole -UCUM:mol/L mole per liter MOLE_PER_LITER molePerLiter -UCUM:mol/mL mole per milliliter MOLE_PER_MILLILITER molePerMilliliter -UCUM:U/L enzyme unit per liter ENZYME_UNIT_PER_LITER enzymeUnitPerLiter +UCUM:mol.L-1 mole per liter MOLE_PER_LITER molePerLiter +UCUM:mol.mL-1 mole per milliliter MOLE_PER_MILLILITER molePerMilliliter +UCUM:U.L-1 enzyme unit per liter ENZYME_UNIT_PER_LITER enzymeUnitPerLiter From 4536b8f1e8ae59929c86e2c947c820500628658e Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 23 Nov 2022 17:39:35 -0500 Subject: [PATCH 27/38] Update code and user guide. Signed-off-by: Daniel Danis --- docs/constants.rst | 16 ++++++++-------- .../phenopackettools/builder/constants/Unit.java | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/constants.rst b/docs/constants.rst index ce39c126..5433ac33 100644 --- a/docs/constants.rst +++ b/docs/constants.rst @@ -431,26 +431,26 @@ With some exceptions, terms from the `The Unified Code for Units of Measure Date: Wed, 23 Nov 2022 17:46:25 -0500 Subject: [PATCH 28/38] No need to explicitly add the metadata validator in the `validate` command. Signed-off-by: Daniel Danis --- .../phenopackettools/cli/command/ValidateCommand.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java index a13def62..9ef0ee7b 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java @@ -9,7 +9,6 @@ import org.phenopackets.phenopackettools.core.PhenopacketElement; import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; import org.phenopackets.phenopackettools.validator.core.*; -import org.phenopackets.phenopackettools.validator.core.metadata.MetaDataValidators; import org.phenopackets.phenopackettools.validator.core.phenotype.HpoPhenotypeValidators; import org.phenopackets.phenopackettools.validator.core.writer.ValidationResultsAndPath; import org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunner; @@ -100,7 +99,6 @@ private ValidationWorkflowRunner prepareWorkflowRunner() { List> validators = configureSemanticValidators(); yield JsonSchemaValidationWorkflowRunner.phenopacketBuilder() .addAllJsonSchemaUrls(customJsonSchemas) - .addValidator(MetaDataValidators.phenopacketValidator()) .addValidators(validators) .build(); } @@ -108,7 +106,6 @@ private ValidationWorkflowRunner prepareWorkflowRunner() { List> validators = configureSemanticValidators(); yield JsonSchemaValidationWorkflowRunner.familyBuilder() .addAllJsonSchemaUrls(customJsonSchemas) - .addValidator(MetaDataValidators.familyValidator()) .addValidators(validators) .build(); } @@ -116,7 +113,6 @@ private ValidationWorkflowRunner prepareWorkflowRunner() { List> validators = configureSemanticValidators(); yield JsonSchemaValidationWorkflowRunner.cohortBuilder() .addAllJsonSchemaUrls(customJsonSchemas) - .addValidator(MetaDataValidators.cohortValidator()) .addValidators(validators) .build(); } From b038d7c68fcf6cce7983135411d7a1bfecd30fa0 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 23 Nov 2022 18:40:20 -0500 Subject: [PATCH 29/38] Add CHEBI resource builder, fix resource-related errors in the example phenopackets - #142 #145 Signed-off-by: Daniel Danis --- .../phenopackettools/builder/builders/Resources.java | 11 +++++++++++ .../cli/examples/AtaxiaWithVitaminEdeficiency.java | 6 +++++- .../cli/examples/BethlehamMyopathy.java | 2 ++ .../phenopackettools/cli/examples/Covid.java | 12 +++++++++--- .../cli/examples/DuchenneExon51Deletion.java | 3 ++- .../cli/examples/FamilyWithPedigree.java | 1 + .../cli/examples/Holoprosencephaly5.java | 2 ++ .../phenopackettools/cli/examples/Marfan.java | 8 ++++++-- .../cli/examples/NemalineMyopathyPrenatal.java | 3 ++- .../cli/examples/Pseudoexfoliation.java | 9 +++++++-- .../cli/examples/Retinoblastoma.java | 7 ++++++- .../SevereStatinInducedAutoimmuneMyopathy.java | 8 ++++++-- .../cli/examples/UrothelialCancer.java | 1 + 13 files changed, 60 insertions(+), 13 deletions(-) diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java index 15b1c89d..5bcb3ed0 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java @@ -129,6 +129,13 @@ private Resources() { .setUrl("https://www.omim.org") .setIriPrefix("https://www.omim.org/entry/"); + private static final Resource.Builder CHEBI_BUILDER = Resource.newBuilder() + .setId("chebi") + .setName("Chemical Entities of Biological Interest") + .setNamespacePrefix("CHEBI") + .setUrl("https://www.ebi.ac.uk/chebi") + .setIriPrefix("https://purl.obolibrary.org/obo/CHEBI_"); + public static Resource hgncVersion(String version) { return HGNC_BUILDER.setVersion(version).build(); } public static Resource hpoVersion(String version) { @@ -199,4 +206,8 @@ public static Resource omimVersion(String version) { return OMIM_BUILDER.setVersion(version).build(); } + public static Resource chebiVersion(String version) { + return CHEBI_BUILDER.setVersion(version).build(); + } + } diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/AtaxiaWithVitaminEdeficiency.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/AtaxiaWithVitaminEdeficiency.java index 9cbeb206..75772968 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/AtaxiaWithVitaminEdeficiency.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/AtaxiaWithVitaminEdeficiency.java @@ -35,8 +35,12 @@ public AtaxiaWithVitaminEdeficiency() { var metadata = MetaDataBuilder.builder("2022-04-21T10:35:00Z", "anonymous biocurator") .addResource(Resources.ncitVersion("21.05d")) .addResource(Resources.hpoVersion("2022-06-11")) - .addResource(Resources.mondoVersion("v2022-09-06")) + .addResource(Resources.mondoVersion("2022-04-04")) .addResource(Resources.uberonVersion("2021-07-27")) + .addResource(Resources.loincVersion("2.73")) + .addResource(Resources.ucum()) + .addResource(Resources.genoVersion("2022-03-05")) + .addResource(Resources.drugCentralVersion("2022-08-22")) .addExternalReference(externalRef) .build(); diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/BethlehamMyopathy.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/BethlehamMyopathy.java index e4683464..96762beb 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/BethlehamMyopathy.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/BethlehamMyopathy.java @@ -23,6 +23,8 @@ public BethlehamMyopathy() { var metaData = MetaDataBuilder.builder("2021-05-14T10:35:00Z", "anonymous biocurator") .addResource(Resources.hpoVersion("2021-08-02")) .addResource(Resources.genoVersion("2020-03-08")) + .addResource(Resources.ecoVersion("2022-08-05")) + .addResource(Resources.omimVersion("2022-11-23")) .addExternalReference(authorAssertion.getReference()) .build(); diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Covid.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Covid.java index d107a384..e174e951 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Covid.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Covid.java @@ -2,6 +2,7 @@ import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.phenopackettools.builder.builders.*; +import org.phenopackets.phenopackettools.builder.constants.Unit; import org.phenopackets.schema.v2.Phenopacket; import org.phenopackets.schema.v2.core.*; @@ -52,8 +53,13 @@ public Covid() { .description("The Imperfect Cytokine Storm: Severe COVID-19 With ARDS in a Patient on Durable LVAD Support") .build(); var metaData = MetaDataBuilder.builder("2021-08-17T00:00:00Z", "anonymous biocurator") - .addResource(Resources.ncitVersion("2019-11-26")) - .addResource(Resources.mondoVersion("2021-11-26")) + .addResource(Resources.hpoVersion("2021-08-02")) + .addResource(Resources.ncitVersion("21.05d")) + .addResource(Resources.mondoVersion("2022-04-04")) + .addResource(Resources.loincVersion("2.73")) + .addResource(Resources.patoVersion("2022-08-31")) + .addResource(Resources.chebiVersion("2022-11-23")) + .addResource(Resources.ucum()) .addExternalReference(externalRef) .build(); @@ -195,7 +201,7 @@ private MedicalAction tocilizumabAdministered() { private MedicalAction dexamethasone() { // ten days, 6 mg once a day - Quantity quantity = QuantityBuilder.of("UO:0000022", "milligram", 6); + Quantity quantity = QuantityBuilder.of(Unit.milligram(), 6); OntologyClass onceDaily = ontologyClass("NCIT:C125004", "Once Daily"); var doseInterval = DoseIntervalBuilder.of(quantity, onceDaily, "2020-03-20", "2020-03-30"); diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/DuchenneExon51Deletion.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/DuchenneExon51Deletion.java index 4ef12c92..c85b550d 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/DuchenneExon51Deletion.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/DuchenneExon51Deletion.java @@ -33,7 +33,8 @@ public class DuchenneExon51Deletion implements PhenopacketExample { public DuchenneExon51Deletion() { var metadata = MetaDataBuilder.builder("2021-05-14T10:35:00Z", "anonymous biocurator") .addResource(Resources.hpoVersion("2022-06-11")) - .addResource(Resources.mondoVersion("v2022-09-06")) + .addResource(Resources.mondoVersion("2022-04-04")) + .addResource(Resources.genoVersion("2022-03-05")) .build(); Individual proband = IndividualBuilder.builder(PROBAND_ID). ageAtLastEncounter("P10Y"). diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/FamilyWithPedigree.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/FamilyWithPedigree.java index 9c23f284..e0157407 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/FamilyWithPedigree.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/FamilyWithPedigree.java @@ -28,6 +28,7 @@ public class FamilyWithPedigree { public FamilyWithPedigree() { FamilyBuilder builder = FamilyBuilder.create(FAMILY_ID); var metadata = MetaDataBuilder.builder("2022-04-17T10:35:00Z", "biocurator") + .addResource(Resources.hpoVersion("2022-06-11")) .build(); builder.metaData(metadata); builder.pedigree(pedigree()); diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Holoprosencephaly5.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Holoprosencephaly5.java index aeb86e0f..bd86fda7 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Holoprosencephaly5.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Holoprosencephaly5.java @@ -38,6 +38,8 @@ public Holoprosencephaly5() { var metaData = MetaDataBuilder.builder("2021-05-14T10:35:00Z", "anonymous biocurator") .addResource(Resources.hpoVersion("2021-08-02")) .addResource(Resources.genoVersion("2020-03-08")) + .addResource(Resources.ecoVersion("2022-08-05")) + .addResource(Resources.omimVersion("2022-11-23")) .addExternalReference(authorAssertion.getReference()) .build(); var vcfRecord = VcfRecordBuilder.of("GRCh38", diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Marfan.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Marfan.java index e401c7ed..aff8b880 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Marfan.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Marfan.java @@ -2,6 +2,7 @@ import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.phenopackettools.builder.builders.*; +import org.phenopackets.phenopackettools.builder.constants.Unit; import org.phenopackets.schema.v2.Phenopacket; import static org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder.ontologyClass; @@ -17,10 +18,9 @@ public Marfan() { var marfan = DiseaseBuilder.of("OMIM:154700 ", "Marfan syndrome"); var individual = IndividualBuilder.builder(PROBAND_ID).female().ageAtLastEncounter("P27Y").build(); var losartan = ontologyClass("DrugCentral:1610", "losartan"); - var mg = ontologyClass("UO:0000022", "milligram"); var aorticAneurysm = PhenotypicFeatureBuilder.of("HP:0002616", "Aortic root aneurysm"); - var quantity = QuantityBuilder.of(mg, 30.0); + var quantity = QuantityBuilder.of(Unit.milligram(), 30.0); var administration = ontologyClass("NCIT:C38288", "Oral Route of Administration"); var bid = ontologyClass("NCIT:C64496", "Twice Daily"); var interval = TimeIntervalBuilder.of("2019-03-20T00:00:00Z", "2021-03-20T00:00:00Z"); @@ -33,6 +33,10 @@ public Marfan() { var medicalAction = MedicalActionBuilder.treatment(losartanTreatment); var metaData = MetaDataBuilder.builder("2021-05-14T10:35:00Z", "anonymous biocurator") .addResource(Resources.hpoVersion("2021-08-02")) + .addResource(Resources.omimVersion("2022-11-23")) + .addResource(Resources.drugCentralVersion("2022-08-22")) + .addResource(Resources.ncitVersion("21.05d")) + .addResource(Resources.ucum()) .build(); phenopacket = PhenopacketBuilder.create(PHENOPACKET_ID, metaData) .individual(individual) diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/NemalineMyopathyPrenatal.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/NemalineMyopathyPrenatal.java index eb7a00e6..5e0268d8 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/NemalineMyopathyPrenatal.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/NemalineMyopathyPrenatal.java @@ -31,8 +31,9 @@ public NemalineMyopathyPrenatal() { var metadata = MetaDataBuilder.builder("2021-05-14T10:35:00Z", "anonymous biocurator") .addResource(Resources.ncitVersion("21.05d")) .addResource(Resources.hpoVersion("2022-02")) - .addResource(Resources.mondoVersion("v2022-04-04")) + .addResource(Resources.mondoVersion("2022-04-04")) .addResource(Resources.uberonVersion("2021-07-27")) + .addResource(Resources.genoVersion("2022-03-05")) .build(); var vitalStatus = VitalStatusBuilder.deceased().causeOfDeath(NEMALINE_MYOPATHY_8).build(); var individual = IndividualBuilder.builder(PROBAND_ID) diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Pseudoexfoliation.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Pseudoexfoliation.java index b1036f4c..2da468ea 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Pseudoexfoliation.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Pseudoexfoliation.java @@ -46,6 +46,11 @@ public Pseudoexfoliation() { .addResource(Resources.uberonVersion("2022-08-19")) .addResource(Resources.ncitVersion("22.07d")) .addResource(Resources.hpoVersion("2022-06-11")) + .addResource(Resources.ucum()) + .addResource(Resources.loincVersion("2.73")) + .addResource(Resources.mondoVersion("2022-04-04")) + .addResource(Resources.drugCentralVersion("2022-08-22")) + .build(); Individual proband = IndividualBuilder.builder(PROBAND_ID). ageAtLastEncounter("P70Y"). @@ -346,11 +351,11 @@ List getPhenotypicFeatures() { .excluded() .build(); PhenotypicFeature excludedpupilabnormality = PhenotypicFeatureBuilder. - builder(" HP:0007686", "Abnormal pupillary function") + builder("HP:0007686", "Abnormal pupillary function") .excluded() .build(); PhenotypicFeature monovision = PhenotypicFeatureBuilder. - builder(" SCTID: 414775001", "monovision")// alternative to snomed? + builder("SCTID:414775001", "monovision")// alternative to snomed? .excluded() .build(); return List.of(emmetropia, myopia, iopi, excludedpupilabnormality, excludedPhacodonesis, monovision); diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Retinoblastoma.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Retinoblastoma.java index 0a28729d..1505c568 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Retinoblastoma.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Retinoblastoma.java @@ -33,6 +33,11 @@ public Retinoblastoma() { .addResource(Resources.efoVersion("3.34.0")) .addResource(Resources.uberonVersion("2021-07-27")) .addResource(Resources.ncbiTaxonVersion("2021-06-10")) + .addResource(Resources.hpoVersion("2022-06-11")) + .addResource(Resources.loincVersion("2.73")) + .addResource(Resources.ucum()) + .addResource(Resources.genoVersion("2022-03-05")) + .addResource(Resources.drugCentralVersion("2022-08-22")) .build(); Individual proband = IndividualBuilder.builder(PROBAND_ID). ageAtLastEncounter("P6M"). @@ -311,7 +316,7 @@ List getPhenotypicFeatures() { measured with the Perkins tonometer. */ List getMeasurements() { - OntologyClass iop = ontologyClass("56844-4","Intraocular pressure of Eye"); + OntologyClass iop = ontologyClass("LOINC:56844-4","Intraocular pressure of Eye"); ReferenceRange ref = ReferenceRangeBuilder.of(iop, 10, 21); OntologyClass leftEyeIop = OntologyClassBuilder.ontologyClass("LOINC:79893-4", "Left eye Intraocular pressure"); diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/SevereStatinInducedAutoimmuneMyopathy.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/SevereStatinInducedAutoimmuneMyopathy.java index 25255a24..0245dc91 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/SevereStatinInducedAutoimmuneMyopathy.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/SevereStatinInducedAutoimmuneMyopathy.java @@ -32,8 +32,12 @@ public SevereStatinInducedAutoimmuneMyopathy() { "Severe statin-induced autoimmune myopathy successfully treated with intravenous immunoglobulin"); var metadata = MetaDataBuilder.builder("2022-04-21T10:35:00Z", "anonymous biocurator") - .addResource(Resources.ncitVersion("22.03d")) - .addResource(Resources.hpoVersion("2021-08-02")) + .addResource(Resources.ncitVersion("21.05d")) + .addResource(Resources.hpoVersion("2022-06-11")) + .addResource(Resources.loincVersion("2.73")) + .addResource(Resources.ucum()) + .addResource(Resources.mondoVersion("2022-04-04")) + .addResource(Resources.drugCentralVersion("2022-08-22")) .addExternalReference(externalRef) .build(); diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/UrothelialCancer.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/UrothelialCancer.java index a5adac4a..bc3beaf6 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/UrothelialCancer.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/UrothelialCancer.java @@ -28,6 +28,7 @@ public UrothelialCancer() { .addResource(Resources.efoVersion("3.34.0")) .addResource(Resources.uberonVersion("2021-07-27")) .addResource(Resources.ncbiTaxonVersion("2021-06-10")) + .addResource(Resources.hpoVersion("2022-06-11")) .build(); phenopacket = PhenopacketBuilder.create(PHENOPACKET_ID, metadata) .individual(individual) From 1ef18542f8867d9e3f3ea65078cc46d159c1678c Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 28 Nov 2022 12:59:32 -0500 Subject: [PATCH 30/38] Update UCUM IRI prefix in `Resources`. Signed-off-by: Daniel Danis --- .../phenopackettools/builder/builders/Resources.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java index 5bcb3ed0..4614a42a 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java @@ -106,7 +106,7 @@ private Resources() { .setName("Unified Code for Units of Measure") .setNamespacePrefix("UCUM") .setUrl("https://ucum.org") - .setIriPrefix("https://ucum.org/"); + .setIriPrefix("https://units-of-measurement.org/"); private static final Resource.Builder LOINC_BUILDER = Resource.newBuilder() .setId("loinc") From f1340441a249daed06c13a1f035dfa1bcf87c00c Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 28 Nov 2022 17:40:52 -0500 Subject: [PATCH 31/38] Ensure all default enum fields are parsed/printed to/from JSON format. Use `PhenopacketPrintUtil` instead of using `JsonFormat`. Signed-off-by: Daniel Danis --- phenopacket-tools-io/pom.xml | 8 -- .../src/main/java/module-info.java | 2 - .../phenopackettools/io/JsonPrinter.java | 6 +- .../phenopackettools/io/NaiveYamlPrinter.java | 6 +- .../io/base/BasePhenopacketParser.java | 4 +- .../io/base/NaiveYamlParser.java | 6 +- phenopacket-tools-util/pom.xml | 8 ++ .../src/main/java/module-info.java | 5 + .../util/print/PhenopacketPrintUtil.java | 97 +++++++++++++++++++ .../util/print/package-info.java | 7 ++ phenopacket-tools-validator-core/pom.xml | 2 +- .../src/main/java/module-info.java | 4 +- .../validator/core/convert/BaseConverter.java | 6 +- .../src/main/java/module-info.java | 1 - 14 files changed, 132 insertions(+), 30 deletions(-) create mode 100644 phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/print/PhenopacketPrintUtil.java create mode 100644 phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/print/package-info.java diff --git a/phenopacket-tools-io/pom.xml b/phenopacket-tools-io/pom.xml index 205f3a08..f72e0021 100644 --- a/phenopacket-tools-io/pom.xml +++ b/phenopacket-tools-io/pom.xml @@ -29,14 +29,6 @@ com.fasterxml.jackson.dataformat jackson-dataformat-yaml - - com.google.protobuf - protobuf-java - - - com.google.protobuf - protobuf-java-util - org.phenopackets.phenopackettools diff --git a/phenopacket-tools-io/src/main/java/module-info.java b/phenopacket-tools-io/src/main/java/module-info.java index d5534857..b6788718 100644 --- a/phenopacket-tools-io/src/main/java/module-info.java +++ b/phenopacket-tools-io/src/main/java/module-info.java @@ -5,8 +5,6 @@ requires org.phenopackets.phenopackettools.util; requires org.phenopackets.schema; - requires com.google.protobuf; - requires com.google.protobuf.util; requires com.fasterxml.jackson.databind; requires com.fasterxml.jackson.dataformat.yaml; requires org.slf4j; diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/JsonPrinter.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/JsonPrinter.java index 3e6825d6..5f3313c3 100644 --- a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/JsonPrinter.java +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/JsonPrinter.java @@ -1,7 +1,7 @@ package org.phenopackets.phenopackettools.io; import com.google.protobuf.Message; -import com.google.protobuf.util.JsonFormat; +import org.phenopackets.phenopackettools.util.print.PhenopacketPrintUtil; import java.io.BufferedWriter; import java.io.IOException; @@ -10,8 +10,6 @@ class JsonPrinter implements PhenopacketPrinter { - private static final JsonFormat.Printer PRINTER = JsonFormat.printer(); - private static final JsonPrinter INSTANCE = new JsonPrinter(); static JsonPrinter getInstance() { @@ -24,7 +22,7 @@ private JsonPrinter() { @Override public void print(Message message, OutputStream os) throws IOException { BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(os)); - PRINTER.appendTo(message, writer); + PhenopacketPrintUtil.getPrinter().appendTo(message, writer); writer.flush(); } } diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinter.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinter.java index 9ebc7a0b..547ff33b 100644 --- a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinter.java +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinter.java @@ -6,7 +6,7 @@ import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; import com.google.protobuf.Message; import com.google.protobuf.MessageOrBuilder; -import com.google.protobuf.util.JsonFormat; +import org.phenopackets.phenopackettools.util.print.PhenopacketPrintUtil; import java.io.IOException; import java.io.OutputStream; @@ -19,8 +19,6 @@ */ class NaiveYamlPrinter implements PhenopacketPrinter { - private static final JsonFormat.Printer PB_PRINTER = JsonFormat.printer(); - private static final NaiveYamlPrinter INSTANCE = new NaiveYamlPrinter(); static NaiveYamlPrinter getInstance() { @@ -39,7 +37,7 @@ private NaiveYamlPrinter() { @Override public void print(Message message, OutputStream os) throws IOException { - String jsonString = PB_PRINTER.print(message); + String jsonString = PhenopacketPrintUtil.getPrinter().print(message); JsonNode jsonNode = jsonMapper.readTree(jsonString); yamlMapper.writeValue(os, jsonNode); } diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/BasePhenopacketParser.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/BasePhenopacketParser.java index 4383228d..5ffe3c76 100644 --- a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/BasePhenopacketParser.java +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/BasePhenopacketParser.java @@ -1,10 +1,10 @@ package org.phenopackets.phenopackettools.io.base; import com.google.protobuf.Message; -import com.google.protobuf.util.JsonFormat; import org.phenopackets.phenopackettools.io.PhenopacketParser; import org.phenopackets.phenopackettools.core.PhenopacketElement; import org.phenopackets.phenopackettools.core.PhenopacketFormat; +import org.phenopackets.phenopackettools.util.print.PhenopacketPrintUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,7 +41,7 @@ private Message readJsonMessage(PhenopacketElement element, InputStream is) thro // Not closing the BufferedReader as the InputStream should be closed. BufferedReader reader = new BufferedReader(new InputStreamReader(is)); Message.Builder builder = prepareBuilder(element); - JsonFormat.parser().merge(reader, builder); + PhenopacketPrintUtil.getParser().merge(reader, builder); return builder.build(); } diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/NaiveYamlParser.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/NaiveYamlParser.java index c84a0028..3a0f0348 100644 --- a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/NaiveYamlParser.java +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/NaiveYamlParser.java @@ -4,7 +4,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; import com.google.protobuf.Message; -import com.google.protobuf.util.JsonFormat; +import org.phenopackets.phenopackettools.util.print.PhenopacketPrintUtil; import java.io.IOException; import java.io.InputStream; @@ -15,8 +15,6 @@ */ class NaiveYamlParser { - private static final JsonFormat.Parser JSON_PARSER = JsonFormat.parser(); - static final NaiveYamlParser INSTANCE = new NaiveYamlParser(); private final ObjectMapper yamlMapper; private final ObjectMapper jsonMapper; @@ -28,6 +26,6 @@ private NaiveYamlParser() { void deserializeYamlMessage(InputStream is, Message.Builder builder) throws IOException { JsonNode node = yamlMapper.readTree(is); String jsonString = jsonMapper.writeValueAsString(node); - JSON_PARSER.merge(jsonString, builder); + PhenopacketPrintUtil.getParser().merge(jsonString, builder); } } diff --git a/phenopacket-tools-util/pom.xml b/phenopacket-tools-util/pom.xml index e43e07df..46911b44 100644 --- a/phenopacket-tools-util/pom.xml +++ b/phenopacket-tools-util/pom.xml @@ -17,6 +17,14 @@ phenopacket-tools-core ${project.parent.version} + + org.phenopackets + phenopacket-schema + + + com.google.protobuf + protobuf-java-util + \ No newline at end of file diff --git a/phenopacket-tools-util/src/main/java/module-info.java b/phenopacket-tools-util/src/main/java/module-info.java index 6c8793a5..58d13cbe 100644 --- a/phenopacket-tools-util/src/main/java/module-info.java +++ b/phenopacket-tools-util/src/main/java/module-info.java @@ -3,7 +3,12 @@ */ module org.phenopackets.phenopackettools.util { requires transitive org.phenopackets.phenopackettools.core; + requires com.google.protobuf; + // The `print` package exposes `JsonFormat.Printer`, hence the transitive export. + requires transitive com.google.protobuf.util; + requires org.phenopackets.schema; requires org.slf4j; exports org.phenopackets.phenopackettools.util.format; + exports org.phenopackets.phenopackettools.util.print; } \ No newline at end of file diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/print/PhenopacketPrintUtil.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/print/PhenopacketPrintUtil.java new file mode 100644 index 00000000..9174a713 --- /dev/null +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/print/PhenopacketPrintUtil.java @@ -0,0 +1,97 @@ +package org.phenopackets.phenopackettools.util.print; + +import com.google.protobuf.Descriptors; +import com.google.protobuf.util.JsonFormat; +import org.phenopackets.schema.v2.Cohort; +import org.phenopackets.schema.v2.Family; +import org.phenopackets.schema.v2.Phenopacket; + +import java.util.HashSet; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * A static utility class for obtaining {@linkplain JsonFormat.Parser} and {@linkplain JsonFormat.Printer} + * configured for printing Phenopacket Schema elements. + */ +public class PhenopacketPrintUtil { + + // There are no special requirements for the parser as of now. + // However, we keep it here for consistency. + private static final JsonFormat.Parser PARSER = JsonFormat.parser(); + + private static final JsonFormat.Printer PRINTER = JsonFormat.printer() + .includingDefaultValueFields(defaultValueFields()); + + private static Set defaultValueFields() { + /* + We must ensure that we print the enum values even if the value is the default value. + Otherwise, the base validation will fail due to absence of a required field. + + The set that we create below includes all enum descriptors declared in protobuf files. + */ + return Stream.of( + findEnumDescriptors(Phenopacket.getDescriptor()), + findEnumDescriptors(Family.getDescriptor()), + findEnumDescriptors(Cohort.getDescriptor()) + ) + .flatMap(Function.identity()) + .collect(Collectors.toSet()); + } + + /** + * Get a parser configured for parsing Phenopacket Schema elements from JSON format. + */ + public static JsonFormat.Parser getParser() { + return PARSER; + } + + /** + * Get a printer configured for printing Phenopacket Schema elements into JSON while respecting + * the special requirements of the schema. + *

    + * Currently, the special requirements include printing all enum field values, including the default values + * whose presence is implied in absence of the JSON field by Protobuf. + *

    + * + * @return the printer + */ + public static JsonFormat.Printer getPrinter() { + return PRINTER; + } + + /** + * Find recursively all enum field descriptors, starting from {@code base}. + */ + private static Stream findEnumDescriptors(Descriptors.Descriptor base) { + Stream.Builder builder = Stream.builder(); + Set visited = new HashSet<>(); + allEnumDescriptors(base, builder, visited); + + return builder.build(); + } + + private static void allEnumDescriptors(Descriptors.Descriptor descriptor, + Stream.Builder builder, + Set visited) { + for (Descriptors.FieldDescriptor field : descriptor.getFields()) { + if (field.getJavaType().equals(Descriptors.FieldDescriptor.JavaType.ENUM)) + builder.add(field); + + if (field.getJavaType().equals(Descriptors.FieldDescriptor.JavaType.MESSAGE)) { + if (visited.contains(field.getMessageType())) + continue; + + visited.add(field.getMessageType()); + allEnumDescriptors(field.getMessageType(), builder, visited); + } + } + } + + private PhenopacketPrintUtil() { + // static no-op + } + +} diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/print/package-info.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/print/package-info.java new file mode 100644 index 00000000..264a4d61 --- /dev/null +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/print/package-info.java @@ -0,0 +1,7 @@ +/** + * The package includes utilities for parsing and printing Phenopacket Schema elements from/to JSON format. + *

    + * See {@link org.phenopackets.phenopackettools.util.print.PhenopacketPrintUtil} for more info. + *

    + */ +package org.phenopackets.phenopackettools.util.print; \ No newline at end of file diff --git a/phenopacket-tools-validator-core/pom.xml b/phenopacket-tools-validator-core/pom.xml index d81c6ca6..1898f1bd 100644 --- a/phenopacket-tools-validator-core/pom.xml +++ b/phenopacket-tools-validator-core/pom.xml @@ -18,7 +18,7 @@ org.phenopackets.phenopackettools - phenopacket-tools-core + phenopacket-tools-util ${project.parent.version} diff --git a/phenopacket-tools-validator-core/src/main/java/module-info.java b/phenopacket-tools-validator-core/src/main/java/module-info.java index 0c5824ee..75fe7dc8 100644 --- a/phenopacket-tools-validator-core/src/main/java/module-info.java +++ b/phenopacket-tools-validator-core/src/main/java/module-info.java @@ -10,11 +10,13 @@ exports org.phenopackets.phenopackettools.validator.core.writer; requires org.phenopackets.phenopackettools.core; + requires org.phenopackets.phenopackettools.util; requires org.monarchinitiative.phenol.core; requires org.phenopackets.schema; + // There are many places where the protobuf classes are part of the API, e.g. as type parameter + // of PhenopacketFormatConverter. requires transitive com.google.protobuf; - requires com.google.protobuf.util; requires org.slf4j; diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/convert/BaseConverter.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/convert/BaseConverter.java index 5713bb52..09bfca4d 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/convert/BaseConverter.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/convert/BaseConverter.java @@ -3,18 +3,18 @@ import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.MessageOrBuilder; import com.google.protobuf.util.JsonFormat; +import org.phenopackets.phenopackettools.util.print.PhenopacketPrintUtil; import org.phenopackets.phenopackettools.validator.core.PhenopacketFormatConverter; import org.phenopackets.phenopackettools.validator.core.except.PhenopacketValidatorRuntimeException; abstract class BaseConverter implements PhenopacketFormatConverter { - protected static final JsonFormat.Parser parser = JsonFormat.parser(); - protected static final JsonFormat.Printer printer = JsonFormat.printer(); + protected static final JsonFormat.Parser parser = PhenopacketPrintUtil.getParser(); @Override public String toJson(T item) { try { - return printer.print(item); + return PhenopacketPrintUtil.getPrinter().print(item); } catch (InvalidProtocolBufferException e) { throw new PhenopacketValidatorRuntimeException(e); } diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/module-info.java b/phenopacket-tools-validator-jsonschema/src/main/java/module-info.java index df165e96..9d2de689 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/java/module-info.java +++ b/phenopacket-tools-validator-jsonschema/src/main/java/module-info.java @@ -12,7 +12,6 @@ requires org.phenopackets.phenopackettools.util; requires transitive org.phenopackets.phenopackettools.validator.core; requires org.phenopackets.schema; - requires com.google.protobuf.util; requires com.fasterxml.jackson.databind; requires json.schema.validator; requires org.slf4j; From 45e708eb65a62d15edfd814f6230638681eafb1c Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 28 Nov 2022 17:53:52 -0500 Subject: [PATCH 32/38] Prevent creating `VariationDescriptorBuilder` without variant ID. Signed-off-by: Daniel Danis --- .../builder/builders/VariationDescriptorBuilder.java | 7 +++++++ .../cli/examples/DuchenneExon51Deletion.java | 2 +- .../phenopackettools/cli/examples/Retinoblastoma.java | 2 +- .../converter/converters/V1ToV2ConverterImpl.java | 2 +- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/VariationDescriptorBuilder.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/VariationDescriptorBuilder.java index 0fb3693c..8214a95c 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/VariationDescriptorBuilder.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/VariationDescriptorBuilder.java @@ -14,6 +14,7 @@ public class VariationDescriptorBuilder { /** * Constructor if no identifier is to be used */ + @Deprecated(forRemoval = true) private VariationDescriptorBuilder() { builder = VariationDescriptor.newBuilder(); } @@ -25,6 +26,12 @@ private VariationDescriptorBuilder(String id) { builder = VariationDescriptor.newBuilder().setId(id); } + /** + * + * @deprecated use {@link #builder(String)} instead. The id is a required field and it should not be possible + * to create a builder without an id. + */ + @Deprecated(forRemoval = true, since = "0.4.8") public static VariationDescriptorBuilder builder() { return new VariationDescriptorBuilder(); } diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/DuchenneExon51Deletion.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/DuchenneExon51Deletion.java index c85b550d..097e0f13 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/DuchenneExon51Deletion.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/DuchenneExon51Deletion.java @@ -91,7 +91,7 @@ private GenomicInterpretation dmdDeletion() { abuilder.alleleLocation("refseq:NC_000023.11",31774144, 31785736);//VRS uses inter-residue coordinates abuilder.oneCopy(); - VariationDescriptorBuilder vbuilder = VariationDescriptorBuilder.builder(); + VariationDescriptorBuilder vbuilder = VariationDescriptorBuilder.builder("variant-id"); vbuilder.variation(abuilder.buildVariation()) .genomic() .hemizygous() diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Retinoblastoma.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Retinoblastoma.java index 1505c568..c46590b0 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Retinoblastoma.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Retinoblastoma.java @@ -130,7 +130,7 @@ GenomicInterpretation germlineRb1Deletion() { .setCopyNumber(cnv) .build(); - VariationDescriptorBuilder vbuilder = VariationDescriptorBuilder.builder(); + VariationDescriptorBuilder vbuilder = VariationDescriptorBuilder.builder("variant-id"); vbuilder.variation(variation); vbuilder.mosaicism(40.0); VariantInterpretationBuilder vibuilder = VariantInterpretationBuilder.builder(vbuilder); diff --git a/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2ConverterImpl.java b/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2ConverterImpl.java index 14934cfe..bc76ec72 100644 --- a/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2ConverterImpl.java +++ b/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2ConverterImpl.java @@ -180,7 +180,7 @@ private static Function toVariationDescriptor() { vcfAllele.getRef(), vcfAllele.getAlt()) .build(); - yield VariationDescriptorBuilder.builder() + yield VariationDescriptorBuilder.builder(vcfAllele.getId()) .vcfRecord(vcfRecord) .genomic() .zygosity(v2zygosity) From 0a343ca3bcf3762a9fc3410a490d912246800a24 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 28 Nov 2022 18:12:01 -0500 Subject: [PATCH 33/38] Use derived sequence expression instead of allele in `CopyNumberBuilder`. Signed-off-by: Daniel Danis --- .../builder/builders/CopyNumberBuilder.java | 10 ++++++---- .../cli/examples/DuchenneExon51Deletion.java | 11 +++++------ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/CopyNumberBuilder.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/CopyNumberBuilder.java index 8ca0ad0b..2d7711af 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/CopyNumberBuilder.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/CopyNumberBuilder.java @@ -23,10 +23,12 @@ public CopyNumberBuilder copyNumberId(String id) { * residues, possibly with length zero, and specified using “0-start, half-open” coordinates. */ public CopyNumberBuilder alleleLocation(String contig, int interbaseStartPos, int interbaseEndPos) { - AlleleBuilder abuilder = AlleleBuilder.builder() - .sequenceId(contig) - .interbaseStartEnd(interbaseStartPos, interbaseEndPos); - builder.setAllele(abuilder.build()); + builder.setDerivedSequenceExpression(DerivedSequenceExpression.newBuilder() + .setLocation(SequenceLocation.newBuilder() + .setSequenceId(contig) + .setSequenceInterval(SequenceInterval.newBuilder() + .setStartNumber(Number.newBuilder().setValue(interbaseStartPos)) + .setEndNumber(Number.newBuilder().setValue(interbaseEndPos))))); return this; } diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/DuchenneExon51Deletion.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/DuchenneExon51Deletion.java index 097e0f13..310ab09b 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/DuchenneExon51Deletion.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/DuchenneExon51Deletion.java @@ -84,15 +84,14 @@ Interpretation interpretation() { } private GenomicInterpretation dmdDeletion() { - CopyNumberBuilder abuilder = CopyNumberBuilder.builder(); // NC_000023.11:g.31774144_31785736del // NM_004006.3:c.7310-11543_7359del // This deletion removed the 5' part of exon 51 (of NM_004006.3) - - abuilder.alleleLocation("refseq:NC_000023.11",31774144, 31785736);//VRS uses inter-residue coordinates - abuilder.oneCopy(); - VariationDescriptorBuilder vbuilder = VariationDescriptorBuilder.builder("variant-id"); - vbuilder.variation(abuilder.buildVariation()) + CopyNumberBuilder cnvBuilder = CopyNumberBuilder.builder() + .alleleLocation("refseq:NC_000023.11", 31774144, 31785736) //VRS uses inter-residue coordinates + .oneCopy(); + VariationDescriptorBuilder vbuilder = VariationDescriptorBuilder.builder("variant-id") + .variation(cnvBuilder.buildVariation()) .genomic() .hemizygous() .geneContext(GeneDescriptorBuilder.of("HGNC:2928", "DMD")) From f8f6fd70b0c5d7b9fe3319c307590cde60651c9d Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 28 Nov 2022 21:04:43 -0500 Subject: [PATCH 34/38] Also serialize default boolean values. Signed-off-by: Daniel Danis --- .../util/print/PhenopacketPrintUtil.java | 37 +++++++++++-------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/print/PhenopacketPrintUtil.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/print/PhenopacketPrintUtil.java index 9174a713..6f31a384 100644 --- a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/print/PhenopacketPrintUtil.java +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/print/PhenopacketPrintUtil.java @@ -32,10 +32,15 @@ private static Set defaultValueFields() { The set that we create below includes all enum descriptors declared in protobuf files. */ + Set serializeDefaultValues = Set.of( + Descriptors.FieldDescriptor.JavaType.ENUM, + Descriptors.FieldDescriptor.JavaType.BOOLEAN + ); + return Stream.of( - findEnumDescriptors(Phenopacket.getDescriptor()), - findEnumDescriptors(Family.getDescriptor()), - findEnumDescriptors(Cohort.getDescriptor()) + findDescriptors(Phenopacket.getDescriptor(), serializeDefaultValues), + findDescriptors(Family.getDescriptor(), serializeDefaultValues), + findDescriptors(Cohort.getDescriptor(), serializeDefaultValues) ) .flatMap(Function.identity()) .collect(Collectors.toSet()); @@ -63,29 +68,31 @@ public static JsonFormat.Printer getPrinter() { } /** - * Find recursively all enum field descriptors, starting from {@code base}. + * Find recursively all enum field descriptors with given {@code targetTypes}, + * starting from {@code base}. */ - private static Stream findEnumDescriptors(Descriptors.Descriptor base) { + private static Stream findDescriptors(Descriptors.Descriptor base, + Set targetTypes) { Stream.Builder builder = Stream.builder(); Set visited = new HashSet<>(); - allEnumDescriptors(base, builder, visited); + findDescriptors(base, targetTypes, builder, visited); return builder.build(); } - private static void allEnumDescriptors(Descriptors.Descriptor descriptor, - Stream.Builder builder, - Set visited) { + private static void findDescriptors(Descriptors.Descriptor descriptor, + Set targetTypes, + Stream.Builder builder, + Set visited) { for (Descriptors.FieldDescriptor field : descriptor.getFields()) { - if (field.getJavaType().equals(Descriptors.FieldDescriptor.JavaType.ENUM)) + if (targetTypes.contains(field.getJavaType())) builder.add(field); if (field.getJavaType().equals(Descriptors.FieldDescriptor.JavaType.MESSAGE)) { - if (visited.contains(field.getMessageType())) - continue; - - visited.add(field.getMessageType()); - allEnumDescriptors(field.getMessageType(), builder, visited); + if (!visited.contains(field.getMessageType())) { + visited.add(field.getMessageType()); + findDescriptors(field.getMessageType(), targetTypes, builder, visited); + } } } } From c6ed0fe081a95dd00d756486b4d705f44b449983 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 28 Nov 2022 22:22:06 -0500 Subject: [PATCH 35/38] Remove Maven wrapper files. Signed-off-by: Daniel Danis --- .../.mvn/wrapper/maven-wrapper.properties | 1 - phenopacket-tools-validator-core/mvnw | 225 ------------------ phenopacket-tools-validator-core/mvnw.cmd | 143 ----------- 3 files changed, 369 deletions(-) delete mode 100644 phenopacket-tools-validator-core/.mvn/wrapper/maven-wrapper.properties delete mode 100644 phenopacket-tools-validator-core/mvnw delete mode 100644 phenopacket-tools-validator-core/mvnw.cmd diff --git a/phenopacket-tools-validator-core/.mvn/wrapper/maven-wrapper.properties b/phenopacket-tools-validator-core/.mvn/wrapper/maven-wrapper.properties deleted file mode 100644 index 0061e751..00000000 --- a/phenopacket-tools-validator-core/.mvn/wrapper/maven-wrapper.properties +++ /dev/null @@ -1 +0,0 @@ -distributionUrl=https://repo1.maven.org/maven2/org/apache/maven/apache-maven/3.6.0/apache-maven-3.6.0-bin.zip \ No newline at end of file diff --git a/phenopacket-tools-validator-core/mvnw b/phenopacket-tools-validator-core/mvnw deleted file mode 100644 index 5bf251c0..00000000 --- a/phenopacket-tools-validator-core/mvnw +++ /dev/null @@ -1,225 +0,0 @@ -#!/bin/sh -# ---------------------------------------------------------------------------- -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# ---------------------------------------------------------------------------- - -# ---------------------------------------------------------------------------- -# Maven2 Start Up Batch script -# -# Required ENV vars: -# ------------------ -# JAVA_HOME - location of a JDK home dir -# -# Optional ENV vars -# ----------------- -# M2_HOME - location of maven2's installed home dir -# MAVEN_OPTS - parameters passed to the Java VM when running Maven -# e.g. to debug Maven itself, use -# set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 -# MAVEN_SKIP_RC - flag to disable loading of mavenrc files -# ---------------------------------------------------------------------------- - -if [ -z "$MAVEN_SKIP_RC" ] ; then - - if [ -f /etc/mavenrc ] ; then - . /etc/mavenrc - fi - - if [ -f "$HOME/.mavenrc" ] ; then - . "$HOME/.mavenrc" - fi - -fi - -# OS specific support. $var _must_ be set to either true or false. -cygwin=false; -darwin=false; -mingw=false -case "`uname`" in - CYGWIN*) cygwin=true ;; - MINGW*) mingw=true;; - Darwin*) darwin=true - # Use /usr/libexec/java_home if available, otherwise fall back to /Library/Java/Home - # See https://developer.apple.com/library/mac/qa/qa1170/_index.html - if [ -z "$JAVA_HOME" ]; then - if [ -x "/usr/libexec/java_home" ]; then - export JAVA_HOME="`/usr/libexec/java_home`" - else - export JAVA_HOME="/Library/Java/Home" - fi - fi - ;; -esac - -if [ -z "$JAVA_HOME" ] ; then - if [ -r /etc/gentoo-release ] ; then - JAVA_HOME=`java-config --jre-home` - fi -fi - -if [ -z "$M2_HOME" ] ; then - ## resolve links - $0 may be a link to maven's home - PRG="$0" - - # need this for relative symlinks - while [ -h "$PRG" ] ; do - ls=`ls -ld "$PRG"` - link=`expr "$ls" : '.*-> \(.*\)$'` - if expr "$link" : '/.*' > /dev/null; then - PRG="$link" - else - PRG="`dirname "$PRG"`/$link" - fi - done - - saveddir=`pwd` - - M2_HOME=`dirname "$PRG"`/.. - - # make it fully qualified - M2_HOME=`cd "$M2_HOME" && pwd` - - cd "$saveddir" - # echo Using m2 at $M2_HOME -fi - -# For Cygwin, ensure paths are in UNIX format before anything is touched -if $cygwin ; then - [ -n "$M2_HOME" ] && - M2_HOME=`cygpath --unix "$M2_HOME"` - [ -n "$JAVA_HOME" ] && - JAVA_HOME=`cygpath --unix "$JAVA_HOME"` - [ -n "$CLASSPATH" ] && - CLASSPATH=`cygpath --path --unix "$CLASSPATH"` -fi - -# For Migwn, ensure paths are in UNIX format before anything is touched -if $mingw ; then - [ -n "$M2_HOME" ] && - M2_HOME="`(cd "$M2_HOME"; pwd)`" - [ -n "$JAVA_HOME" ] && - JAVA_HOME="`(cd "$JAVA_HOME"; pwd)`" - # TODO classpath? -fi - -if [ -z "$JAVA_HOME" ]; then - javaExecutable="`which javac`" - if [ -n "$javaExecutable" ] && ! [ "`expr \"$javaExecutable\" : '\([^ ]*\)'`" = "no" ]; then - # readlink(1) is not available as standard on Solaris 10. - readLink=`which readlink` - if [ ! `expr "$readLink" : '\([^ ]*\)'` = "no" ]; then - if $darwin ; then - javaHome="`dirname \"$javaExecutable\"`" - javaExecutable="`cd \"$javaHome\" && pwd -P`/javac" - else - javaExecutable="`readlink -f \"$javaExecutable\"`" - fi - javaHome="`dirname \"$javaExecutable\"`" - javaHome=`expr "$javaHome" : '\(.*\)/bin'` - JAVA_HOME="$javaHome" - export JAVA_HOME - fi - fi -fi - -if [ -z "$JAVACMD" ] ; then - if [ -n "$JAVA_HOME" ] ; then - if [ -x "$JAVA_HOME/jre/sh/java" ] ; then - # IBM's JDK on AIX uses strange locations for the executables - JAVACMD="$JAVA_HOME/jre/sh/java" - else - JAVACMD="$JAVA_HOME/bin/java" - fi - else - JAVACMD="`which java`" - fi -fi - -if [ ! -x "$JAVACMD" ] ; then - echo "Error: JAVA_HOME is not defined correctly." >&2 - echo " We cannot execute $JAVACMD" >&2 - exit 1 -fi - -if [ -z "$JAVA_HOME" ] ; then - echo "Warning: JAVA_HOME environment variable is not set." -fi - -CLASSWORLDS_LAUNCHER=org.codehaus.plexus.classworlds.launcher.Launcher - -# traverses directory structure from process work directory to filesystem root -# first directory with .mvn subdirectory is considered project base directory -find_maven_basedir() { - - if [ -z "$1" ] - then - echo "Path not specified to find_maven_basedir" - return 1 - fi - - basedir="$1" - wdir="$1" - while [ "$wdir" != '/' ] ; do - if [ -d "$wdir"/.mvn ] ; then - basedir=$wdir - break - fi - # workaround for JBEAP-8937 (on Solaris 10/Sparc) - if [ -d "${wdir}" ]; then - wdir=`cd "$wdir/.."; pwd` - fi - # end of workaround - done - echo "${basedir}" -} - -# concatenates all lines of a file -concat_lines() { - if [ -f "$1" ]; then - echo "$(tr -s '\n' ' ' < "$1")" - fi -} - -BASE_DIR=`find_maven_basedir "$(pwd)"` -if [ -z "$BASE_DIR" ]; then - exit 1; -fi - -export MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"} -echo $MAVEN_PROJECTBASEDIR -MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS" - -# For Cygwin, switch paths to Windows format before running java -if $cygwin; then - [ -n "$M2_HOME" ] && - M2_HOME=`cygpath --path --windows "$M2_HOME"` - [ -n "$JAVA_HOME" ] && - JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"` - [ -n "$CLASSPATH" ] && - CLASSPATH=`cygpath --path --windows "$CLASSPATH"` - [ -n "$MAVEN_PROJECTBASEDIR" ] && - MAVEN_PROJECTBASEDIR=`cygpath --path --windows "$MAVEN_PROJECTBASEDIR"` -fi - -WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain - -exec "$JAVACMD" \ - $MAVEN_OPTS \ - -classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \ - "-Dmaven.home=${M2_HOME}" "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \ - ${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@" diff --git a/phenopacket-tools-validator-core/mvnw.cmd b/phenopacket-tools-validator-core/mvnw.cmd deleted file mode 100644 index 019bd74d..00000000 --- a/phenopacket-tools-validator-core/mvnw.cmd +++ /dev/null @@ -1,143 +0,0 @@ -@REM ---------------------------------------------------------------------------- -@REM Licensed to the Apache Software Foundation (ASF) under one -@REM or more contributor license agreements. See the NOTICE file -@REM distributed with this work for additional information -@REM regarding copyright ownership. The ASF licenses this file -@REM to you under the Apache License, Version 2.0 (the -@REM "License"); you may not use this file except in compliance -@REM with the License. You may obtain a copy of the License at -@REM -@REM http://www.apache.org/licenses/LICENSE-2.0 -@REM -@REM Unless required by applicable law or agreed to in writing, -@REM software distributed under the License is distributed on an -@REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -@REM KIND, either express or implied. See the License for the -@REM specific language governing permissions and limitations -@REM under the License. -@REM ---------------------------------------------------------------------------- - -@REM ---------------------------------------------------------------------------- -@REM Maven2 Start Up Batch script -@REM -@REM Required ENV vars: -@REM JAVA_HOME - location of a JDK home dir -@REM -@REM Optional ENV vars -@REM M2_HOME - location of maven2's installed home dir -@REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands -@REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a key stroke before ending -@REM MAVEN_OPTS - parameters passed to the Java VM when running Maven -@REM e.g. to debug Maven itself, use -@REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 -@REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files -@REM ---------------------------------------------------------------------------- - -@REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on' -@echo off -@REM enable echoing my setting MAVEN_BATCH_ECHO to 'on' -@if "%MAVEN_BATCH_ECHO%" == "on" echo %MAVEN_BATCH_ECHO% - -@REM set %HOME% to equivalent of $HOME -if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%") - -@REM Execute a user defined script before this one -if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre -@REM check for pre script, once with legacy .bat ending and once with .cmd ending -if exist "%HOME%\mavenrc_pre.bat" call "%HOME%\mavenrc_pre.bat" -if exist "%HOME%\mavenrc_pre.cmd" call "%HOME%\mavenrc_pre.cmd" -:skipRcPre - -@setlocal - -set ERROR_CODE=0 - -@REM To isolate internal variables from possible post scripts, we use another setlocal -@setlocal - -@REM ==== START VALIDATION ==== -if not "%JAVA_HOME%" == "" goto OkJHome - -echo. -echo Error: JAVA_HOME not found in your environment. >&2 -echo Please set the JAVA_HOME variable in your environment to match the >&2 -echo location of your Java installation. >&2 -echo. -goto error - -:OkJHome -if exist "%JAVA_HOME%\bin\java.exe" goto init - -echo. -echo Error: JAVA_HOME is set to an invalid directory. >&2 -echo JAVA_HOME = "%JAVA_HOME%" >&2 -echo Please set the JAVA_HOME variable in your environment to match the >&2 -echo location of your Java installation. >&2 -echo. -goto error - -@REM ==== END VALIDATION ==== - -:init - -@REM Find the project base dir, i.e. the directory that contains the folder ".mvn". -@REM Fallback to current working directory if not found. - -set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR% -IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir - -set EXEC_DIR=%CD% -set WDIR=%EXEC_DIR% -:findBaseDir -IF EXIST "%WDIR%"\.mvn goto baseDirFound -cd .. -IF "%WDIR%"=="%CD%" goto baseDirNotFound -set WDIR=%CD% -goto findBaseDir - -:baseDirFound -set MAVEN_PROJECTBASEDIR=%WDIR% -cd "%EXEC_DIR%" -goto endDetectBaseDir - -:baseDirNotFound -set MAVEN_PROJECTBASEDIR=%EXEC_DIR% -cd "%EXEC_DIR%" - -:endDetectBaseDir - -IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig - -@setlocal EnableExtensions EnableDelayedExpansion -for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a -@endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS% - -:endReadAdditionalConfig - -SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe" - -set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar" -set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain - -%MAVEN_JAVA_EXE% %JVM_CONFIG_MAVEN_PROPS% %MAVEN_OPTS% %MAVEN_DEBUG_OPTS% -classpath %WRAPPER_JAR% "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %* -if ERRORLEVEL 1 goto error -goto end - -:error -set ERROR_CODE=1 - -:end -@endlocal & set ERROR_CODE=%ERROR_CODE% - -if not "%MAVEN_SKIP_RC%" == "" goto skipRcPost -@REM check for post script, once with legacy .bat ending and once with .cmd ending -if exist "%HOME%\mavenrc_post.bat" call "%HOME%\mavenrc_post.bat" -if exist "%HOME%\mavenrc_post.cmd" call "%HOME%\mavenrc_post.cmd" -:skipRcPost - -@REM pause the script if MAVEN_BATCH_PAUSE is set to 'on' -if "%MAVEN_BATCH_PAUSE%" == "on" pause - -if "%MAVEN_TERMINATE_CMD%" == "on" exit %ERROR_CODE% - -exit /B %ERROR_CODE% From 0baf233d4a40c01ce377547397c9e84e12aff31f Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 28 Nov 2022 22:28:38 -0500 Subject: [PATCH 36/38] Update CHANGELOG.rst Signed-off-by: Daniel Danis --- CHANGELOG.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a8f40949..20d79f57 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,14 @@ Changelog ========= +1.0.0 +----- + +* Update predefined constants, convenience functions and builders +* Improve user guide, tutorial, and documentation +* Run `MetaDataValidator` during base validation +* Fix example phenopackets + 0.4.7 ----- From 1b49582aacd8be02c35c3e019249b69928d4b0b2 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 28 Nov 2022 22:29:23 -0500 Subject: [PATCH 37/38] Make release `v1.0.0-RC1`. Signed-off-by: Daniel Danis --- docs/conf.py | 4 ++-- phenopacket-tools-builder/pom.xml | 2 +- phenopacket-tools-cli/pom.xml | 2 +- phenopacket-tools-converter/pom.xml | 2 +- phenopacket-tools-core/pom.xml | 2 +- phenopacket-tools-io/pom.xml | 2 +- phenopacket-tools-test/pom.xml | 2 +- phenopacket-tools-util/pom.xml | 2 +- phenopacket-tools-validator-core/pom.xml | 2 +- phenopacket-tools-validator-jsonschema/pom.xml | 2 +- pom.xml | 2 +- 11 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 54d465f2..172f4a9a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -31,9 +31,9 @@ # built documents. # # The short X.Y version. -version = '0.4' +version = '1.0' # The full version, including alpha/beta/rc tags. -release = '0.4.8-SNAPSHOT' +release = '1.0.0-RC1' # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/phenopacket-tools-builder/pom.xml b/phenopacket-tools-builder/pom.xml index 11c05303..463bd680 100644 --- a/phenopacket-tools-builder/pom.xml +++ b/phenopacket-tools-builder/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.8-SNAPSHOT + 1.0.0-RC1 phenopacket-tools-builder diff --git a/phenopacket-tools-cli/pom.xml b/phenopacket-tools-cli/pom.xml index 0ad46fbc..d282e75a 100644 --- a/phenopacket-tools-cli/pom.xml +++ b/phenopacket-tools-cli/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.8-SNAPSHOT + 1.0.0-RC1 phenopacket-tools-cli diff --git a/phenopacket-tools-converter/pom.xml b/phenopacket-tools-converter/pom.xml index 8a2a17cc..7f1f1779 100644 --- a/phenopacket-tools-converter/pom.xml +++ b/phenopacket-tools-converter/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.8-SNAPSHOT + 1.0.0-RC1 phenopacket-tools-converter diff --git a/phenopacket-tools-core/pom.xml b/phenopacket-tools-core/pom.xml index 7e08e4bb..5145cf83 100644 --- a/phenopacket-tools-core/pom.xml +++ b/phenopacket-tools-core/pom.xml @@ -6,7 +6,7 @@ phenopacket-tools org.phenopackets.phenopackettools - 0.4.8-SNAPSHOT + 1.0.0-RC1 phenopacket-tools-core diff --git a/phenopacket-tools-io/pom.xml b/phenopacket-tools-io/pom.xml index f72e0021..468a452d 100644 --- a/phenopacket-tools-io/pom.xml +++ b/phenopacket-tools-io/pom.xml @@ -6,7 +6,7 @@ phenopacket-tools org.phenopackets.phenopackettools - 0.4.8-SNAPSHOT + 1.0.0-RC1 phenopacket-tools-io diff --git a/phenopacket-tools-test/pom.xml b/phenopacket-tools-test/pom.xml index 6f457067..e734b9d6 100644 --- a/phenopacket-tools-test/pom.xml +++ b/phenopacket-tools-test/pom.xml @@ -5,7 +5,7 @@ phenopacket-tools org.phenopackets.phenopackettools - 0.4.8-SNAPSHOT + 1.0.0-RC1 4.0.0 diff --git a/phenopacket-tools-util/pom.xml b/phenopacket-tools-util/pom.xml index 46911b44..ca6753df 100644 --- a/phenopacket-tools-util/pom.xml +++ b/phenopacket-tools-util/pom.xml @@ -5,7 +5,7 @@ phenopacket-tools org.phenopackets.phenopackettools - 0.4.8-SNAPSHOT + 1.0.0-RC1 4.0.0 diff --git a/phenopacket-tools-validator-core/pom.xml b/phenopacket-tools-validator-core/pom.xml index 1898f1bd..9a5f879e 100644 --- a/phenopacket-tools-validator-core/pom.xml +++ b/phenopacket-tools-validator-core/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.8-SNAPSHOT + 1.0.0-RC1 phenopacket-tools-validator-core diff --git a/phenopacket-tools-validator-jsonschema/pom.xml b/phenopacket-tools-validator-jsonschema/pom.xml index 8e8660db..6fb1d814 100644 --- a/phenopacket-tools-validator-jsonschema/pom.xml +++ b/phenopacket-tools-validator-jsonschema/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.8-SNAPSHOT + 1.0.0-RC1 phenopacket-tools-validator-jsonschema diff --git a/pom.xml b/pom.xml index 6d5e34c8..42c0829d 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.8-SNAPSHOT + 1.0.0-RC1 Phenopacket-tools An app and library for building, conversion and validation of GA4GH Phenopackets From a56fc59d3d17c95046a3dc1fe412ccdf280b37a0 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 28 Nov 2022 22:33:23 -0500 Subject: [PATCH 38/38] Update version hardcoded in CLI. Signed-off-by: Daniel Danis --- .../main/java/org/phenopackets/phenopackettools/cli/Main.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/Main.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/Main.java index 4de96603..b2fe0c29 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/Main.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/Main.java @@ -15,7 +15,7 @@ "phenopacket-tools (pxf)", "An application and library for building, conversion, and validation of GA4GH Phenopackets.\n" }, - version = "v0.4.8-SNAPSHOT", + version = "v1.0.0-RC1", mixinStandardHelpOptions = true, subcommands = { // see https://picocli.info/autocomplete.html