diff --git a/CHANGELOG.rst b/CHANGELOG.rst new file mode 100644 index 00000000..a8f40949 --- /dev/null +++ b/CHANGELOG.rst @@ -0,0 +1,23 @@ +========= +Changelog +========= + +0.4.7 +----- + +* Add more predefined constants into the builder module, update the code for generating constants +* Write a tutorial with examples for validation and conversion functionalities +* Add I/O module, implement YAML parser and printer +* Add organ system validator +* Finalize VRS-like validation +* Let the user choose the CLI verbosity + +v0.4.6 +------ + +* Validate metadata +* Convert v1 variants +* Extend phenopacket examples +* Update documentation +* Improve CLI and extend CLI documentation + diff --git a/README.md b/README.md index 9d2e4fd6..7dabba6f 100644 --- a/README.md +++ b/README.md @@ -14,8 +14,7 @@ The cli application works in a standard UNIX-like manner. ```shell cd phenopacket-tools ./mvnw package -PXF_VERSION="0.4.6" -alias pfx-tools="java -jar $(pwd)/phenopacket-tools-cli/target/phenopacket-tools-cli-${PXF_VERSION}.jar" +alias pfx-tools="java -jar $(pwd)/phenopacket-tools-cli/target/phenopacket-tools-cli-@project.version@.jar" pfx-tools --help ``` @@ -51,6 +50,5 @@ pfx-tools validate family ~/phenopacket-examples/families/*.json pfx-tools convert phenopacket.json ``` - -see this for VRS -- https://github.com/ga4gh/vrs/blob/76542a903b913110e67811885a8958625bbc3aae/schema/vrs.json -import it like vrsatile \ No newline at end of file +### API +A Javadoc description of the API of phenopacket-tools is available [here](https://javadoc.io/doc/org.phenopackets.phenopackettools). \ No newline at end of file diff --git a/constants/AdministrationRoute.tsv b/constants/AdministrationRoute.tsv new file mode 100644 index 00000000..7d2dcf13 --- /dev/null +++ b/constants/AdministrationRoute.tsv @@ -0,0 +1,10 @@ +ontology.id ontology.label variable.name function.name +NCIT:C38276 Intravenous Route of Administration INTRAVENOUS_ROUTE intravenous +NCIT:C38222 Intraarterial Route of Administration INTRAARTERIAL_ROUTE intraarterial +NCIT:C183503 Administration via Wound Irrigation WOUND_IRRIGATION_ROUTE woundIrrigation +NCIT:C149695 Nebulizer Route of Administration NEBULIZER_ROUTE nebulizer +NCIT:C38288 Oral Route of Administration ORAL_ROUTE oral +NCIT:C38267 Intrathecal Route of Administration INTRATHECAL_ROUTE intrathecal +NCIT:C38677 Peridural Route of Administration PERIDURAL_ROUTE peridural +NCIT:C38304 Topical Route of Administration TOPICAL_ROUTE topical +NCIT:C38305 Transdermal Route of Administration TRANSDERMAL transdermal diff --git a/constants/Assays.tsv b/constants/Assays.tsv deleted file mode 100644 index c0bc6d28..00000000 --- a/constants/Assays.tsv +++ /dev/null @@ -1,2 +0,0 @@ -ontology.id ontology.label variable.name function.name -LOINC:2157-6 Creatine kinase [Enzymatic activity/volume] in Serum or Plasma CREATINE_KINASE creatineKinaseActivity diff --git a/constants/BiospecimenType.tsv b/constants/BiospecimenType.tsv new file mode 100644 index 00000000..43ff2c58 --- /dev/null +++ b/constants/BiospecimenType.tsv @@ -0,0 +1,10 @@ +ontology.id ontology.label variable.name function.name +NCIT:C133261 Bone Marrow Aspirate BONE_MARROW_ASPIRATE boneMarrowAspirate +NCIT:C158416 Blood DNA BLOOD_DNA bloodDNA +NCIT:C185194 Cerebrospinal Fluid Sample CSF_SAMPLE cerebrospinalFluidSample +NCIT:C156435 Formalin-Fixed Paraffin-Embedded DNA FORMALIN_FIXED_PARAFIN_DNA formalinFixedParaffinEmbeddedDNA +NCIT:C13195 Bronchoalveolar Lavage Fluid BAL_FLUID bronchoalveolarLavageFluid +NCIT:C187062 Pericardial Fluid Specimen PERICARDIAL_FLUID_SAMPLE pericardialFluidSpecimen +NCIT:C185197 Peritoneal Fluid Sample PERTONIAL_FLUID_SAMPLE peritonealFluidSample +NCIT:C163995 Total RNA TOTAL_RNA totalRNA +NCIT:C18009 Tumor Tissue TUMOR_TISSUE tumorTissue diff --git a/constants/DiseaseGrade.tsv b/constants/DiseaseGrade.tsv new file mode 100644 index 00000000..aaa2c73d --- /dev/null +++ b/constants/DiseaseGrade.tsv @@ -0,0 +1,7 @@ +ontology.id ontology.label variable.name function.name +NCIT:C28077 Grade 1 GRADE_1 grade1 +NCIT:C28078 Grade 2 GRADE_2 grade2 +NCIT:C28079 Grade 3 GRADE_3 grade3 +NCIT:C28080 Grade 3a GRADE_3A grade3a +NCIT:C28081 Grade 3b GRADE_3B grade3b +NCIT:C28082 Grade 4 GRADE_4 grade4 diff --git a/constants/DiseaseStage.tsv b/constants/DiseaseStage.tsv new file mode 100644 index 00000000..32aafab0 --- /dev/null +++ b/constants/DiseaseStage.tsv @@ -0,0 +1,11 @@ +ontology.id ontology.label variable.name function.name +NCIT:C28051 Stage 0 STAGE_0 stage0 +NCIT:C27966 Stage I STAGE_I stageI +NCIT:C28054 Stage II STAGE_II stageII +NCIT:C27970 Stage III STAGE_III stageIII +NCIT:C27971 Stage IV STAGE_IV stageIV +NCIT:C66904 New York Heart Association Class I NYHA_I nyhaClassI +NCIT:C66905 New York Heart Association Class II NYHA_II nyhaClassII +NCIT:C66907 New York Heart Association Class III NYHA_III nyhaClassIII +NCIT:C7922 New York Heart Association Class III/IV NYHA_III_IV nyhaClassIII_or_IV +NCIT:C66908 New York Heart Association Class IV NYHA_IV nyhaClassIV diff --git a/constants/Evidence.tsv b/constants/Evidence.tsv new file mode 100644 index 00000000..d2bb6db6 --- /dev/null +++ b/constants/Evidence.tsv @@ -0,0 +1,6 @@ +ontology.id ontology.label variable.name function.name +ECO:0006016 author statement from published clinical study AUTHOR_STATEMENT_FROM_PCS authorStatementFromPublishedClinicalStudy +ECO:0007539 author statement from published clinical study used in automatic assertion AUTHOR_STATEMENT_FROM_PCS_AUTOMATIC authorStatementFromPublishedClinicalStudyAutomaticAssertion +ECO:0006017 author statement from published clinical study used in manual assertion AUTHOR_STATEMENT_FROM_PCS_MANUAL authorStatementFromPublishedClinicalStudyManualAssertion +ECO:0000033 author statement supported by traceable reference AUTHOR_STATEMENT_TRACEABLE_REFERENCE authorStatementSupportedByTraceableReference +ECO:0006154 self-reported patient statement evidence SELF_REPORTED_PATIENT_STATEMENT_EVIDENCE selfReportedPatientStatementEvidence \ No newline at end of file diff --git a/constants/MaterialSample.tsv b/constants/MaterialSample.tsv new file mode 100644 index 00000000..69b01290 --- /dev/null +++ b/constants/MaterialSample.tsv @@ -0,0 +1,3 @@ +ontology.id ontology.label variable.name function.name +EFO:0009655 abnormal sample ABNORMAL_SAMPLE abnormalSample +EFO:0009654 reference sample REFERENCE_SAMPLE referenceSample diff --git a/constants/MedicalActions.tsv b/constants/MedicalActions.tsv index 0f1577e3..a8c483ef 100644 --- a/constants/MedicalActions.tsv +++ b/constants/MedicalActions.tsv @@ -1,10 +1,10 @@ ontology.id ontology.label variable.name function.name NCIT:C41331 Adverse Event ADVERSE_EVENT adverseEvent -NCIT:C64530 Four Times Daily FOUR_TIMES_DAILY fourtimesDaily +NCIT:C64530 Four Times Daily FOUR_TIMES_DAILY fourTimesDaily NCIT:C38222 Intraarterial Route of Administration INTRA_ARTERIAL intraArterialAdministration NCIT:C38276 Intravenous Route of Administration IV_ADMINISTRATION intravenousAdministration NCIT:C38288 Oral Route of Administration ORAL_ADMINISTRATION oralAdministration NCIT:C64576 Once ONCE once NCIT:C125004 Once Daily ONCE_DAILY onceDaily -NCIT:C64527 Three Times Daily THREE_TIMES_DAILY threetimesDaily +NCIT:C64527 Three Times Daily THREE_TIMES_DAILY threeTimesDaily NCIT:C64496 Twice Daily TWICE_DAILY twiceDaily diff --git a/constants/PathologicalTnm.tsv b/constants/PathologicalTnm.tsv new file mode 100644 index 00000000..4f4573cd --- /dev/null +++ b/constants/PathologicalTnm.tsv @@ -0,0 +1,37 @@ +ontology.id ontology.label variable.name function.name +NCIT:C48740 pM0 Stage Finding PM0_STAGE_FINDING pM0StageFinding +NCIT:C48741 pM1 Stage Finding PM1_STAGE_FINDING pM1StageFinding +NCIT:C48742 pM1a Stage Finding PM1A_STAGE_FINDING pM1aStageFinding +NCIT:C48743 pM1b Stage Finding PM1B_STAGE_FINDING pM1bStageFinding +NCIT:C48744 pM1c Stage Finding PM1C_STAGE_FINDING pM1cStageFinding +NCIT:C48745 pN0 Stage Finding PN0_STAGE_FINDING pN0StageFinding +NCIT:C48746 pN1 Stage Finding PN1_STAGE_FINDING pN1StageFinding +NCIT:C48747 pN1a Stage Finding PN1A_STAGE_FINDING pN1aStageFinding +NCIT:C48748 pN1b Stage Finding PN1B_STAGE_FINDING pN1bStageFinding +NCIT:C48749 pN1c Stage Finding PN1C_STAGE_FINDING pN1cStageFinding +NCIT:C48750 pN2 Stage Finding PN2_STAGE_FINDING pN2StageFinding +NCIT:C48751 pN2a Stage Finding PN2A_STAGE_FINDING pN2aStageFinding +NCIT:C48752 pN2b Stage Finding PN2B_STAGE_FINDING pN2bStageFinding +NCIT:C48753 pN2c Stage Finding PN2C_STAGE_FINDING pN2cStageFinding +NCIT:C48754 pN3 Stage Finding PN3_STAGE_FINDING pN3StageFinding +NCIT:C48755 pN3a Stage Finding PN3A_STAGE_FINDING pN3aStageFinding +NCIT:C48756 pN3b Stage Finding PN3B_STAGE_FINDING pN3bStageFinding +NCIT:C48757 pN3c Stage Finding PN3C_STAGE_FINDING pN3cStageFinding +NCIT:C48758 pT0 Stage Finding PT0_STAGE_FINDING pT0StageFinding +NCIT:C48759 pT1 Stage Finding PT1_STAGE_FINDING pT1StageFinding +NCIT:C48760 pT1a Stage Finding PT1A_STAGE_FINDING pT1aStageFinding +NCIT:C48761 pT1b Stage Finding PT1B_STAGE_FINDING pT1bStageFinding +NCIT:C48763 pT1c Stage Finding PT1C_STAGE_FINDING pT1cStageFinding +NCIT:C48764 pT2 Stage Finding PT2_STAGE_FINDING pT2StageFinding +NCIT:C48765 pT2a Stage Finding PT2A_STAGE_FINDING pT2aStageFinding +NCIT:C48766 pT2b Stage Finding PT2B_STAGE_FINDING pT2bStageFinding +NCIT:C48767 pT2c Stage Finding PT2C_STAGE_FINDING pT2cStageFinding +NCIT:C48768 pT3 Stage Finding PT3_STAGE_FINDING pT3StageFinding +NCIT:C48769 pT3a Stage Finding PT3A_STAGE_FINDING pT3aStageFinding +NCIT:C48770 pT3b Stage Finding PT3B_STAGE_FINDING pT3bStageFinding +NCIT:C48771 pT3c Stage Finding PT3C_STAGE_FINDING pT3cStageFinding +NCIT:C48772 pT4 Stage Finding PT4_STAGE_FINDING pT4StageFinding +NCIT:C48773 pT4a Stage Finding PT4A_STAGE_FINDING pT4aStageFinding +NCIT:C48774 pT4b Stage Finding PT4B_STAGE_FINDING pT4bStageFinding +NCIT:C48775 pT4c Stage Finding PT4C_STAGE_FINDING pT4cStageFinding +NCIT:C48776 pT4d Stage Finding PT4D_STAGE_FINDING pT4dStageFinding diff --git a/constants/Response.tsv b/constants/Response.tsv index f2f9b3b9..ae2fd96e 100644 --- a/constants/Response.tsv +++ b/constants/Response.tsv @@ -1,3 +1,14 @@ ontology.id ontology.label variable.name function.name -NCIT:C102560 Favorable FAVORABLE favorable -NCIT:C102561 Unfavorable UNFAVORABLE unfavorable +NCIT:C123584 Favorable Response FAVORABLE_RESPONSE favorableResponse +NCIT:C123617 Unfavorable Response UNFAVORABLE_RESPONSE unfavorableResponse +NCIT:C123600 No Response NO_RESPONSE noResponse +NCIT:C123614 Stringent Complete Response STRINGENT_COMPLETE_RESPONSE stringentCompleteResponse +NCIT:C123598 Minimal Response MINIMAL_RESPONSE minimalResponse +NCIT:C4870 Complete Remission COMPLETE_REMISSION completeRemission +NCIT:C18058 Partial Remission PARTIAL_REMISSION partialRemission +NCIT:C70604 Primary Refractory PRIMARY_REFRACTORY primaryRefractory +NCIT:C142357 iRECIST Complete Response iRECIST_COMPLETE_RESPONSE iRECISTCompleteResponse +NCIT:C142356 iRECIST Confirmed Progressive Disease iRECIST_CONFIRMED_PROGRESSIVE_DISEASE iRECISTConfirmedProgressiveDisease +NCIT:C142358 iRECIST Partial Response iRECIST_PARTIAL_RESPONSE iRECISTPartialResponse +NCIT:C142359 iRECIST Stable Disease iRECIST_STABLE_DISEASE iRECISTStableDisease +NCIT:C142360 iRECIST Unconfirmed Progressive Disease iRECIST_UNCONFIRMED_PROGRESSIVE_DISEASE iRECISTUnconfirmedProgressiveDisease diff --git a/constants/Severity.tsv b/constants/Severity.tsv new file mode 100644 index 00000000..56cf75ce --- /dev/null +++ b/constants/Severity.tsv @@ -0,0 +1,6 @@ +ontology.id ontology.label variable.name function.name +HP:0012827 Borderline BORDERLINE borderline +HP:0012825 Mild MILD mild +HP:0012826 Moderate MODERATE moderate +HP:0012828 Severe SEVERE severe +HP:0012829 Profound PROFOUND profound diff --git a/constants/SpatialPattern.tsv b/constants/SpatialPattern.tsv index ad430e2b..687a256f 100644 --- a/constants/SpatialPattern.tsv +++ b/constants/SpatialPattern.tsv @@ -31,4 +31,4 @@ HP:0012840 Proximal PROXIMAL proximal HP:0033820 Apical APICAL apical HP:0030650 Focal FOCAL focal HP:0030651 Multifocal MULTIFOCAL multifocal -HP:0032540 Jointflexorsurfacelocalization JOINT_FLEXOR_SURFACE_LOCALIZATION jointFlexorSurfaceLocalization +HP:0032540 Joint flexor surface localization JOINT_FLEXOR_SURFACE_LOCALIZATION jointFlexorSurfaceLocalization diff --git a/constants/TreatmentTermination.tsv b/constants/TreatmentTermination.tsv new file mode 100644 index 00000000..d64c1a6e --- /dev/null +++ b/constants/TreatmentTermination.tsv @@ -0,0 +1,5 @@ +ontology.id ontology.label variable.name function.name +NCIT:C105740 Treatment Completed as Prescribed TREATMENT_COMPLETED_AS_PRESCRIBED treatmentCompletedAsPrescribed +NCIT:C105741 Treatment Terminated Due to Toxicity TREATMENT_TERMINATED_TOXICITY treatmentTerminatedDueToToxicity +NCIT:C106470 Treatment on Hold TREATMENT_ON_HOLD treatmentOnHold +NCIT:C41331 Adverse Event ADVERSE_EVENT adverseEvent diff --git a/constants/TumorProgression.tsv b/constants/TumorProgression.tsv new file mode 100644 index 00000000..613fa925 --- /dev/null +++ b/constants/TumorProgression.tsv @@ -0,0 +1,4 @@ +ontology.id ontology.label variable.name function.name +NCIT:C8509 Primary Neoplasm PRIMARY_NEOPLASM primaryNeoplasm +NCIT:C3261 Metastatic Neoplasm METASTATIC_NEOPLASM metastaticNeoplasm +NCIT:C4798 Recurrent Neoplasm RECURRENT_NEOPLASM recurrentNeoplasm diff --git a/constants/Unit.tsv b/constants/Unit.tsv index 4a544c6f..8f8b0d57 100644 --- a/constants/Unit.tsv +++ b/constants/Unit.tsv @@ -3,7 +3,7 @@ UCUM:degree degree (plane angle) DEGREE degreeOfAngle UCUM:[diop] diopter DIOPTER diopter UCUM:g gram GRAM gram UCUM:g/kg gram per kilogram GRAM_PER_KG gramPerKilogram -UCUM:kg kiligram KILIGRAM kilogram +UCUM:kg kilogram KILOGRAM kilogram UCUM:L liter LITER liter UCUM:m meter METER meter UCUM:ug microgram MICROGRAM microgram diff --git a/constants/create_classes.py b/constants/create_classes.py index ffd62bfd..49e79251 100644 --- a/constants/create_classes.py +++ b/constants/create_classes.py @@ -54,6 +54,9 @@ def name(self): def items(self): return self._constant_items + def __repr__(self): + return f"Entry name={self._name} {len(self._constant_items)} items" + def parse_csv(fname): if not isfile(fname): @@ -72,31 +75,22 @@ def parse_csv(fname): def create_java_class(entry): java_file_name = entry.name + ".java" # LATER adjust path java_file_path = join(JAVA_DIR_PATH, java_file_name) - fh = open(java_file_path, 'wt') - fh.write("package org.phenopackets.phenopackettools.builder.constants;\n\n") - fh.write("import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder;\n") - fh.write("import org.phenopackets.schema.v2.core.OntologyClass;\n\n") - fh.write(f"public class {entry.name} {{\n\n") - items = entry.items - for item in items: - # e.g., private static final OntologyClass HETEROZYGOUS = OntologyClassBuilder.ontologyClass("GENO:0000135", "heterozygous"); - fh.write(f" private static final OntologyClass {item.variable_name} = OntologyClassBuilder.ontologyClass(") - fh.write(f"\"{item.ontology_id}\", \"{item.ontology_label}\");\n") - fh.write("\n\n") - for item in items: - # e.g., public static OntologyClass heterozygous() {return HETEROZYGOUS; } - fh.write(f" public static OntologyClass {item.function_name}() {{ return {item.variable_name}; }}\n") - fh.write("\n}\n") - fh.close() - - - - - - - - - + with open(java_file_path, 'wt') as fh: + fh.write("// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT!\n") + fh.write("package org.phenopackets.phenopackettools.builder.constants;\n\n") + fh.write("import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder;\n") + fh.write("import org.phenopackets.schema.v2.core.OntologyClass;\n\n") + fh.write(f"public class {entry.name} {{\n\n") + items = entry.items + for item in items: + # e.g., private static final OntologyClass HETEROZYGOUS = OntologyClassBuilder.ontologyClass("GENO:0000135", "heterozygous"); + fh.write(f" private static final OntologyClass {item.variable_name} = OntologyClassBuilder.ontologyClass(") + fh.write(f"\"{item.ontology_id}\", \"{item.ontology_label}\");\n") + fh.write("\n\n") + for item in items: + # e.g., public static OntologyClass heterozygous() {return HETEROZYGOUS; } + fh.write(f" public static OntologyClass {item.function_name}() {{ return {item.variable_name}; }}\n") + fh.write("\n}\n") entries = [] diff --git a/constants/rtd_texts.txt b/constants/rtd_texts.txt index cb6771ce..9546fb50 100644 --- a/constants/rtd_texts.txt +++ b/constants/rtd_texts.txt @@ -9,4 +9,15 @@ SpatialPattern|Modifier terms from the `HPO `_ are use Unit|With some exceptions, terms from the `The Unified Code for Units of Measure `_ are used to denote units. Response|These codes from `NCI Thesaurus `_ can be used to code the overall response of a patient to treatment. Favorable and Unfavorble can be used for general purposes and the remaining codes are intended to be used for oncology. Assays|If possible, `LOINC `_ codes should be used to specify laboratory test assays. -Gender|`LOINC `_ codes should be used to specify self-reported gender. \ No newline at end of file +Gender|`LOINC `_ codes should be used to specify self-reported gender. +PathologicalTnm|TNM staging performed as part of pathologic specimen (based on surgical specimens including sentinel lymph node biopsy specimens). +DiseaseStage|These codes from `NCI Thesaurus `_ can be used to denote that clinical stage of cancer or heart failure. Other codes should be used for specific diseases with their own clinical stage systems. +Severity|Terms from the `HPO `_ are used to describe the severity, defined as the intensity or degree of a manifestation. +Evidence|Terms from the `Evidence and Con clusion Ontology ` are used to specify evidence categories. +BiospecimenType|Terms from the `NCI Thesaurus `_ are used to denote the source of a biospecimen. +TumorProgression|Terms from the `NCI Thesaurus `_ are used to indicate if a specimen is from the primary tumor, a metastasis or a recurrence. +TumorGrade|Terms from the `NCI Thesaurus `_ to describe microscopic appearance of tumor. Grade 1: Well differentiated (low grade); Grade 2: Moderately differentiated (intermediate grade); Grade 3: Poorly differentiated (high grade); Grade 4: Undifferentiated (high grade). +DiseaseGrade|Terms from the `NCI Thesaurus `_ to represent the tumor grade. +MaterialSample|Terms from the `EFO `_ to specify the status of the sample. +AdministrationRoute|Terms from the `NCI Thesaurus `_ to represent the way in which a medicinal product is introduced into the body. +TreatmentTermination|Terms from the `NCI Thesaurus `_ to represent the reason that the treatment was completed or stopped early. \ No newline at end of file diff --git a/docs/cli.rst b/docs/cli.rst index ef383414..af4ae476 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -17,15 +17,18 @@ no special installation procedure if Java 17 or better is available in your envi Setup ~~~~~ -Most users should *download* the precompiled JAR file from *phenopacket-tools* release page. +Most users should *download* the distribution ZIP file with precompiled JAR file from *phenopacket-tools* release page. However, it is also possible to *build* the JAR from sources. Download ^^^^^^^^ -*phenopacket-tools* JAR is provided as part of *phenopacket-tools*' release schedule +*phenopacket-tools* JAR is provided in the distribution ZIP file as part of *phenopacket-tools*' release schedule from `Releases `_. +The ZIP archive contains the executable JAR file along with README and example phenopackets required to run the setup +and the tutorial. + Build from source code ^^^^^^^^^^^^^^^^^^^^^^ @@ -44,12 +47,9 @@ Run the following commands to check out the stable source code and to build the $ cd phenopacket-tools $ ./mvnw -Prelease package -After a successful build, a file ``phenopacket-tools-cli-${project.version}.jar`` will be created in -the ``phenopacket-tools-cli/target`` directory. Use the JAR file in the same way as the JAR downloaded -from *phenopacket-tools* releases. - -.. note:: - Replace ``${project.version}`` with a given version (e.g. ``0.4.6``). +After a successful build, a distribution ZIP file "phenopacket-tools-cli-|release|-distribution.zip" +will be created in the ``phenopacket-tools-cli/target`` directory. Use the ZIP archive in the same way as the archive +downloaded from *phenopacket-tools* releases. Commands @@ -70,6 +70,10 @@ In the next sections, we will run *phenopacket-tools* by using the following ali $ alias pxf="java -jar phenopacket-tools-cli-${project.version}.jar" +.. note:: + The commands report warnings and errors by default. Use `-v` to increase the verbosity and see what's + going on under the hood. The `-v` can be specified multiple times (e.g. `-vvv`). + *examples* - generate examples of the top-level elements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -172,11 +176,11 @@ Results are written into STDOUT in CSV/TSV format. The CSV output has a header, The header contains phenopacket-tools version, date time of validation, and list of validators that were run. A row with column names follows the header, and then the individual validation results. -.. - TODO - check the validation description. +.. TODO - check the validation description. Set up autocompletion ~~~~~~~~~~~~~~~~~~~~~ -TODO - write the section +.. TODO - write the section +TODO - write diff --git a/docs/conf.py b/docs/conf.py index 7c34361a..56357ef7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -23,8 +23,17 @@ ############## project = 'phenopacket-tools' -copyright = '2022, Peter Robinson' -author = 'Peter Robinson' +copyright = '2022, Daniel Danis, Peter Robinson' +author = u'Daniel Danis, Peter Robinson' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '0.4' +# The full version, including alpha/beta/rc tags. +release = '0.4.7' # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/docs/constants.rst b/docs/constants.rst index 31dbd431..f2b11cce 100644 --- a/docs/constants.rst +++ b/docs/constants.rst @@ -25,6 +25,26 @@ In contrast, this is the code required with phenopacket-tools (omitting import s The following tables present the available static functions with predefined concepts. +AdministrationRoute +^^^^^^^^^^^^^^^^^^^ + +Terms from the `NCI Thesaurus `_ to represent the way in which a medicinal product is introduced into the body. + +.. csv-table:: + :header: "id", "label", "function name" + :widths: 30, 200, 200 + + "NCIT:C38276", "Intravenous Route of Administration", "intravenous()" + "NCIT:C38222", "Intraarterial Route of Administration", "intraarterial()" + "NCIT:C183503", "Administration via Wound Irrigation", "woundIrrigation()" + "NCIT:C149695", "Nebulizer Route of Administration", "nebulizer()" + "NCIT:C38288", "Oral Route of Administration", "oral()" + "NCIT:C38267", "Intrathecal Route of Administration", "intrathecal()" + "NCIT:C38677", "Peridural Route of Administration", "peridural()" + "NCIT:C38304", "Topical Route of Administration", "topical()" + "NCIT:C38305", "Transdermal Route of Administration", "transdermal()" + + AllelicState ^^^^^^^^^^^^ @@ -39,16 +59,78 @@ Terms from the `GENE ontology `_ are "GENO:0000134", "hemizygous", "hemizygous()" -Assays -^^^^^^ +BiospecimenType +^^^^^^^^^^^^^^^ + +Terms from the `NCI Thesaurus `_ are used to denote the source of a biospecimen. + +.. csv-table:: + :header: "id", "label", "function name" + :widths: 30, 200, 200 + + "NCIT:C133261", "Bone Marrow Aspirate", "boneMarrowAspirate()" + "NCIT:C158416", "Blood DNA", "bloodDNA()" + "NCIT:C185194", "Cerebrospinal Fluid Sample", "cerebrospinalFluidSample()" + "NCIT:C156435", "Formalin-Fixed Paraffin-Embedded DNA", "formalinFixedParaffinEmbeddedDNA()" + "NCIT:C13195", "Bronchoalveolar Lavage Fluid", "bronchoalveolarLavageFluid()" + "NCIT:C187062", "Pericardial Fluid Specimen", "pericardialFluidSpecimen()" + "NCIT:C185197", "Peritoneal Fluid Sample", "peritonealFluidSample()" + "NCIT:C163995", "Total RNA", "totalRNA()" + "NCIT:C18009", "Tumor Tissue", "tumorTissue()" -If possible, `LOINC `_ codes should be used to specify laboratory test assays. + +DiseaseGrade +^^^^^^^^^^^^ + +Terms from the `NCI Thesaurus `_ to represent the tumor grade. .. csv-table:: :header: "id", "label", "function name" :widths: 30, 200, 200 - "LOINC:2157-6", "Creatine kinase [Enzymatic activity/volume] in Serum or Plasma", "creatineKinaseActivity()" + "NCIT:C28077", "Grade 1", "grade1()" + "NCIT:C28078", "Grade 2", "grade2()" + "NCIT:C28079", "Grade 3", "grade3()" + "NCIT:C28080", "Grade 3a", "grade3a()" + "NCIT:C28081", "Grade 3b", "grade3b()" + "NCIT:C28082", "Grade 4", "grade4()" + + +DiseaseStage +^^^^^^^^^^^^ + +These codes from `NCI Thesaurus `_ can be used to denote that clinical stage of cancer or heart failure. Other codes should be used for specific diseases with their own clinical stage systems. + +.. csv-table:: + :header: "id", "label", "function name" + :widths: 30, 200, 200 + + "NCIT:C28051", "Stage 0", "stage0()" + "NCIT:C27966", "Stage I", "stageI()" + "NCIT:C28054", "Stage II", "stageII()" + "NCIT:C27970", "Stage III", "stageIII()" + "NCIT:C27971", "Stage IV", "stageIV()" + "NCIT:C66904", "New York Heart Association Class I", "nyhaClassI()" + "NCIT:C66905", "New York Heart Association Class II", "nyhaClassII()" + "NCIT:C66907", "New York Heart Association Class III", "nyhaClassIII()" + "NCIT:C7922", "New York Heart Association Class III/IV", "nyhaClassIII_or_IV()" + "NCIT:C66908", "New York Heart Association Class IV", "nyhaClassIV()" + + +Evidence +^^^^^^^^ + +Terms from the `Evidence and Con clusion Ontology ` are used to specify evidence categories. + +.. csv-table:: + :header: "id", "label", "function name" + :widths: 30, 200, 200 + + "ECO:0006016", "author statement from published clinical study", "authorStatementFromPublishedClinicalStudy()" + "ECO:0007539", "author statement from published clinical study used in automatic assertion", "authorStatementFromPublishedClinicalStudyAutomaticAssertion()" + "ECO:0006017", "author statement from published clinical study used in manual assertion", "authorStatementFromPublishedClinicalStudyManualAssertion()" + "ECO:0000033", "author statement supported by traceable reference", "authorStatementSupportedByTraceableReference()" + "ECO:0006154", "self-reported patient statement evidence", "selfReportedPatientStatementEvidence()" Gender @@ -84,6 +166,19 @@ Modifier terms from the `HPO `_ are used to describe l "HP:0012832", "Bilateral", "bilateral()" +MaterialSample +^^^^^^^^^^^^^^ + +Terms from the `EFO `_ to specify the status of the sample. + +.. csv-table:: + :header: "id", "label", "function name" + :widths: 30, 200, 200 + + "EFO:0009655", "abnormal sample", "abnormalSample()" + "EFO:0009654", "reference sample", "referenceSample()" + + MedicalActions ^^^^^^^^^^^^^^ @@ -94,13 +189,13 @@ Terms from the `NCI Thesaurus `_ are :widths: 30, 200, 200 "NCIT:C41331", "Adverse Event", "adverseEvent()" - "NCIT:C64530", "Four Times Daily", "fourtimesDaily()" + "NCIT:C64530", "Four Times Daily", "fourTimesDaily()" "NCIT:C38222", "Intraarterial Route of Administration", "intraArterialAdministration()" "NCIT:C38276", "Intravenous Route of Administration", "intravenousAdministration()" "NCIT:C38288", "Oral Route of Administration", "oralAdministration()" "NCIT:C64576", "Once", "once()" "NCIT:C125004", "Once Daily", "onceDaily()" - "NCIT:C64527", "Three Times Daily", "threetimesDaily()" + "NCIT:C64527", "Three Times Daily", "threeTimesDaily()" "NCIT:C64496", "Twice Daily", "twiceDaily()" @@ -159,6 +254,53 @@ Terms from the `UBERON ontology `_ "UBERON:0002370", "thymus", "thymus()" +PathologicalTnm +^^^^^^^^^^^^^^^ + +TNM staging performed as part of pathologic specimen (based on surgical specimens including sentinel lymph node biopsy specimens). + +.. csv-table:: + :header: "id", "label", "function name" + :widths: 30, 200, 200 + + "NCIT:C48740", "pM0 Stage Finding", "pM0StageFinding()" + "NCIT:C48741", "pM1 Stage Finding", "pM1StageFinding()" + "NCIT:C48742", "pM1a Stage Finding", "pM1aStageFinding()" + "NCIT:C48743", "pM1b Stage Finding", "pM1bStageFinding()" + "NCIT:C48744", "pM1c Stage Finding", "pM1cStageFinding()" + "NCIT:C48745", "pN0 Stage Finding", "pN0StageFinding()" + "NCIT:C48746", "pN1 Stage Finding", "pN1StageFinding()" + "NCIT:C48747", "pN1a Stage Finding", "pN1aStageFinding()" + "NCIT:C48748", "pN1b Stage Finding", "pN1bStageFinding()" + "NCIT:C48749", "pN1c Stage Finding", "pN1cStageFinding()" + "NCIT:C48750", "pN2 Stage Finding", "pN2StageFinding()" + "NCIT:C48751", "pN2a Stage Finding", "pN2aStageFinding()" + "NCIT:C48752", "pN2b Stage Finding", "pN2bStageFinding()" + "NCIT:C48753", "pN2c Stage Finding", "pN2cStageFinding()" + "NCIT:C48754", "pN3 Stage Finding", "pN3StageFinding()" + "NCIT:C48755", "pN3a Stage Finding", "pN3aStageFinding()" + "NCIT:C48756", "pN3b Stage Finding", "pN3bStageFinding()" + "NCIT:C48757", "pN3c Stage Finding", "pN3cStageFinding()" + "NCIT:C48758", "pT0 Stage Finding", "pT0StageFinding()" + "NCIT:C48759", "pT1 Stage Finding", "pT1StageFinding()" + "NCIT:C48760", "pT1a Stage Finding", "pT1aStageFinding()" + "NCIT:C48761", "pT1b Stage Finding", "pT1bStageFinding()" + "NCIT:C48763", "pT1c Stage Finding", "pT1cStageFinding()" + "NCIT:C48764", "pT2 Stage Finding", "pT2StageFinding()" + "NCIT:C48765", "pT2a Stage Finding", "pT2aStageFinding()" + "NCIT:C48766", "pT2b Stage Finding", "pT2bStageFinding()" + "NCIT:C48767", "pT2c Stage Finding", "pT2cStageFinding()" + "NCIT:C48768", "pT3 Stage Finding", "pT3StageFinding()" + "NCIT:C48769", "pT3a Stage Finding", "pT3aStageFinding()" + "NCIT:C48770", "pT3b Stage Finding", "pT3bStageFinding()" + "NCIT:C48771", "pT3c Stage Finding", "pT3cStageFinding()" + "NCIT:C48772", "pT4 Stage Finding", "pT4StageFinding()" + "NCIT:C48773", "pT4a Stage Finding", "pT4aStageFinding()" + "NCIT:C48774", "pT4b Stage Finding", "pT4bStageFinding()" + "NCIT:C48775", "pT4c Stage Finding", "pT4cStageFinding()" + "NCIT:C48776", "pT4d Stage Finding", "pT4dStageFinding()" + + Response ^^^^^^^^ @@ -168,8 +310,35 @@ These codes from `NCI Thesaurus `_ ca :header: "id", "label", "function name" :widths: 30, 200, 200 - "NCIT:C102560", "Favorable", "favorable()" - "NCIT:C102561", "Unfavorable", "unfavorable()" + "NCIT:C123584", "Favorable Response", "favorableResponse()" + "NCIT:C123617", "Unfavorable Response", "unfavorableResponse()" + "NCIT:C123600", "No Response", "noResponse()" + "NCIT:C123614", "Stringent Complete Response", "stringentCompleteResponse()" + "NCIT:C123598", "Minimal Response", "minimalResponse()" + "NCIT:C4870", "Complete Remission", "completeRemission()" + "NCIT:C18058", "Partial Remission", "partialRemission()" + "NCIT:C70604", "Primary Refractory", "primaryRefractory()" + "NCIT:C142357", "iRECIST Complete Response", "iRECISTCompleteResponse()" + "NCIT:C142356", "iRECIST Confirmed Progressive Disease", "iRECISTConfirmedProgressiveDisease()" + "NCIT:C142358", "iRECIST Partial Response", "iRECISTPartialResponse()" + "NCIT:C142359", "iRECIST Stable Disease", "iRECISTStableDisease()" + "NCIT:C142360", "iRECIST Unconfirmed Progressive Disease", "iRECISTUnconfirmedProgressiveDisease()" + + +Severity +^^^^^^^^ + +Terms from the `HPO `_ are used to describe the severity, defined as the intensity or degree of a manifestation. + +.. csv-table:: + :header: "id", "label", "function name" + :widths: 30, 200, 200 + + "HP:0012827", "Borderline", "borderline()" + "HP:0012825", "Mild", "mild()" + "HP:0012826", "Moderate", "moderate()" + "HP:0012828", "Severe", "severe()" + "HP:0012829", "Profound", "profound()" SpatialPattern @@ -213,7 +382,36 @@ Modifier terms from the `HPO `_ are used to describe s "HP:0033820", "Apical", "apical()" "HP:0030650", "Focal", "focal()" "HP:0030651", "Multifocal", "multifocal()" - "HP:0032540", "Jointflexorsurfacelocalization", "jointFlexorSurfaceLocalization()" + "HP:0032540", "Joint flexor surface localization", "jointFlexorSurfaceLocalization()" + + +TreatmentTermination +^^^^^^^^^^^^^^^^^^^^ + +Terms from the `NCI Thesaurus `_ to represent the reason that the treatment was completed or stopped early. + +.. csv-table:: + :header: "id", "label", "function name" + :widths: 30, 200, 200 + + "NCIT:C105740", "Treatment Completed as Prescribed", "treatmentCompletedAsPrescribed()" + "NCIT:C105741", "Treatment Terminated Due to Toxicity", "treatmentTerminatedDueToToxicity()" + "NCIT:C106470", "Treatment on Hold", "treatmentOnHold()" + "NCIT:C41331", "Adverse Event", "adverseEvent()" + + +TumorProgression +^^^^^^^^^^^^^^^^ + +Terms from the `NCI Thesaurus `_ are used to indicate if a specimen is from the primary tumor, a metastasis or a recurrence. + +.. csv-table:: + :header: "id", "label", "function name" + :widths: 30, 200, 200 + + "NCIT:C8509", "Primary Neoplasm", "primaryNeoplasm()" + "NCIT:C3261", "Metastatic Neoplasm", "metastaticNeoplasm()" + "NCIT:C4798", "Recurrent Neoplasm", "recurrentNeoplasm()" Unit @@ -229,7 +427,7 @@ With some exceptions, terms from the `The Unified Code for Units of Measure `_ * `Familes `_ -* `Cohorts `_ \ No newline at end of file +* `Cohorts `_ diff --git a/docs/index.rst b/docs/index.rst index c158c4e3..d83e40aa 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -35,6 +35,7 @@ how to use the CLI application on your system. :maxdepth: 1 :caption: Contents: + tutorial creating validation converting diff --git a/docs/tutorial.rst b/docs/tutorial.rst new file mode 100644 index 00000000..c1af15f0 --- /dev/null +++ b/docs/tutorial.rst @@ -0,0 +1,328 @@ +.. _rsttutorial: + +======== +Tutorial +======== + +This tutorial walks through the installation of *phenopacket-tools* and provides an overview and an intended usage +of the command-line interface. The tutorial sections point to the parts of documentations which offer more detail. + +Setup +===== + +*Phenopacket-tools* is distributed as a ZIP archive that contains an executable JAR file +and several resource files for running this tutorial. Let's check that Java is installed on the machine, +download the distribution ZIP and set up an alias as a shortcut for running the *phenopacket-tools*. + +Prerequisites +^^^^^^^^^^^^^ + +*Phenopacket-tools* is written in Java 17 and requires Java 17 or better to run. An appropriate Java executable +must be present on your ``$PATH``. Run the following to determine the availability and version of Java on your machine:: + + java -version + +which prints a similar output for Java 17:: + + openjdk version "17" 2021-09-14 + OpenJDK Runtime Environment (build 17+35-2724) + OpenJDK 64-Bit Server VM (build 17+35-2724, mixed mode, sharing) + +Download *phenopacket-tools* +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A prebuilt distribution ZIP file is available for download from +`phenopacket-tools release section `_ +of the GitHub repository. + +Download and unpack the ZIP file from the releases section: + +.. parsed-literal:: + + URL=https://github.com/phenopackets/phenopacket-tools/releases/download/v\ |release|\ /phenopacket-tools-cli-|release|-distribution.zip + curl -o phenopacket-tools-cli-|release|-distribution.zip ${URL} + unzip phenopacket-tools-cli-|release|-distribution.zip + +Set up alias +^^^^^^^^^^^^ + +In general, Java command line applications are invoked as ``java -jar executable.jar``. However, this is just +too verbose and we can shorten the command by defining an alias. + +Let's define an alias for *phenopacket-tools*. Assuming the distribution ZIP was unpacked into +phenopacket-tools-cli-|release| directory, run the following to set up the alias and to check that the alias works: + +.. parsed-literal:: + alias pxf="java -jar $(pwd)/phenopacket-tools-cli-\ |release|\ /phenopacket-tools-cli-|release|.jar" + pxf --help + + +Convert +======= + +Version 1 of the GA4GH Phenopacket schema was released in 2019 to elicit community feedback. +In response to this feedback, the schema was extended and refined and version 2 was released in 2021 +and published in 2022 by the International Standards Organization (ISO). + +The `convert` command of *phenopacket-tools* converts version 1 phenopackets into version 2. In this tutorial, +we will first convert an example v1 phenopacket and then 384 v1 phenopackets published by Robinson et al., 2020\ [1]_. + +A toy example +^^^^^^^^^^^^^ + +We will convert a phenopacket ``Schreckenbach-2014-TPM3-II.2.json`` that is bundled +in the *phenopacket-tools* distribution ZIP file. +The phenopacket can be found in `examples/convert` folder next to the executable JAR file. + +.. note:: + See :ref:`rsttutorialexamples` for detailed info of the example phenopackets. + +Due to differences between version 1 and 2, there are two ways how to convert *v1* phenopackets into *v2*. +Briefly, the conversion either assumes that the `Variant`\ s are *causal* with respect to a `Disease` of the +v1 phenopacket, or skips conversion of `Variant`\ s altogether. The logic is controlled with ``--convert-variants`` +CLI option and the conversion can be done iff the *v1* phenopacket has one `Disease`. + +.. note:: + See the :ref:`rstconverting` section for more information. + +Let's convert the phenopacket by running:: + + pxf convert -i ${examples}/convert/Schreckenbach-2014-TPM3-II.2.json > Schreckenbach-2014-TPM3-II.2.v2.json + +The phenopacket represents a case report with several variants that are causal with respect to the disease. +Therefore, we can use ``--convert-variants`` to convert `Variant`\ s into v2 `Interpretation` element:: + + pxf convert --convert-variants \ + -i ${examples}/convert/Schreckenbach-2014-TPM3-II.2.json > Schreckenbach-2014-TPM3-II.2.v2-with-variants.json + + +A real-life example +^^^^^^^^^^^^^^^^^^^ + +Let's convert 384 individuals described in published case reports with Human Phenotype Ontology terms, +causal genetic variants, and OMIM disease identifiers. + +Let's start by downloading and unpacking the phenopacket dataset. +The phenopacket dataset is available for download from Zenodo\ [2]_. Then, we extract the archive content into +a folder named as ``v1``:: + + curl -o phenopackets.v1.zip https://zenodo.org/record/3905420/files/phenopackets.zip + unzip -d v1 phenopackets.v1.zip + +Let's convert all *v1* phenopackets and store the results in JSON format in a new folder ``v2``:: + + # Make the folder for converted phenopackets. + mkdir -p v2 + + # Convert the phenopackets. + for pp in $(find v1 -name "*.json"); do + pp_name=$(basename ${pp}) + pxf convert --convert-variants -i ${pp} > v2/${pp_name} + done + + printf "Converted %s phenopackets\n" $(ls v2/ | wc -l) + +We converted 384 phenopackets into *v2* format and stored the JSON files in the ``v2`` folder. + +Validate +======== + +The `validate` command of *phenopacket-tools* validates correctness of phenopackets, families and cohorts. +This section focuses on the *off-the-shelf* phenopacket validators. + +.. note:: + See the :ref:`rstvalidation` and the `Java Documentation`_ to learn how to implement a custom validator. + +We will work with a suite of phenopackets that are bundled in the *phenopacket-tools* distribution ZIP file. +The phenopackets are located in `examples` folder next to the executable JAR file: + +.. parsed-literal:: + examples=$(pwd)/phenopacket-tools-cli-\ |release|\ /examples + +.. note:: + See :ref:`rsttutorialexamples` for detailed info of the example phenopackets. + +We will describe each validation and show an example validation errors and a proposed solution in a table. + + +The validation examples use `Phenopacket`\ s, but the validation functionality is available for all top-level Phenopacket Schema +elements, including `Cohort` and `Family`. + +The validation is implemented for *v2* phenopackets only. The *v1* phenopackets must be converted to *v2* prior +running validation. + + +Base validation +^^^^^^^^^^^^^^^ + +First, let's check if the phenopackets meet the base requirements, as described by the Phenopacket Schema. +All phenopackets, regardless of their aim or scope must pass this requirement to be valid. + +.. note:: + See :ref:`rstbasevalidation` for more details. + +All required fields must be present +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The `BaseValidator` checks that all required fields are not empty:: + + pxf validate -i ${examples}/validate/base/missing-fields.json + +The validator emits 3 lines with the following issues: + +.. csv-table:: + :header: "Validation error", "Solution" + :widths: 350, 550 + + 'id' is missing but it is required, Add the phenopacket ID + 'subject.id' is missing but it is required, Add the subject ID + 'phenotypicFeatures[0].type.label' is missing but it is required, Add the `label` attribute into the `type` of the first phenotypic feature + + +All ontologies are defined +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Phenopacket Schema relies heavily on use of ontologies and ontology concepts. `MetaData` element lists +the ontologies used in the particular phenopacket. + +The `MetaDataValidator` checks if the `MetaData` has an ontology `Resource` for all concepts used in the phenopacket:: + + pxf validate -i ${examples}/validate/base/missing-resources.json + +The validator points out the absence of `NCBITaxon` definition: + +.. csv-table:: + :header: "Validation error", "Solution" + :widths: 350, 550 + + No ontology corresponding to ID 'NCBITaxon:9606' found in MetaData, Add a `Resource` element with `NCBITaxon` definition into `MetaData` + + +Custom validation rules +^^^^^^^^^^^^^^^^^^^^^^^ + +Projects or consortia can enforce specific requirements by designing a custom JSON schema. +For instance, a rare disease project may require presence of several elements that are not required by the default schema: + +1. Subject (proband being investigated) +2. At least one `PhenotypicFeature` element and using HPO terms for phenotypic features +3. Time at last encounter (sub-element of subject), representing the age of the proband + +*Phenopacket-tools* ships with a JSON schema for enforcing the above requirements. +The schema is located at ``examples/custom-json-schema/hpo-rare-disease-schema.json``. + +Using the custom JSON schema via ``--require`` option will point out issues in the 4 example phenopackets:: + + pxf validate --require ${examples}/validate/custom-json-schema/hpo-rare-disease-schema.json \ + -i ${examples}/validate/custom-json-schema/marfan.no-subject.invalid.json \ + -i ${examples}/validate/custom-json-schema/marfan.no-phenotype.invalid.json \ + -i ${examples}/validate/custom-json-schema/marfan.not-hpo.invalid.json \ + -i ${examples}/validate/custom-json-schema/marfan.no-time-at-last-encounter.invalid.json + +.. csv-table:: + :header: "Validation error", "Solution" + :widths: 350, 550 + + 'subject' is missing but it is required, Add the `Subject` element + 'phenotypicFeatures' is missing but it is required, Add at least one `PhenotypicFeature` + 'phenotypicFeatures[0].type.id' does not match the regex pattern ``^HP:\d{7}$``, Use Human Phenotype Ontology in `PhenotypicFeature`\ s + 'subject.timeAtLastEncounter' is missing but it is required, Add the time at last encounter field + +.. note:: + See :ref:`rstcustomvalidation` for more details. + + +.. _rstphenotypevalidationtutorial: + +Phenotype validation +^^^^^^^^^^^^^^^^^^^^ + +*Phenopacket-tools* offers a validator for checking logical consistency of phenotypic features in the phenopacket. +The phenotype validation requires the Human Phenotype Ontology (HPO) file to work. + +.. note:: + The examples below assume that the latest HPO in JSON format has been downloaded to ``hp.json``. + The HPO file can be downloaded from `HPO releases`_. + +.. note:: + See :ref:`rstphenotypevalidation` for more details. + + +Phenopackets use non-obsolete term IDs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The `HpoPhenotypeValidator` checks if the phenopacket contains obsolete HPO terms:: + + pxf validate --hpo hp.json -i ${examples}/validate/phenotype-validation/marfan.obsolete-term.invalid.json + +It turns out that ``marfan.obsolete-term.invalid.json`` uses an obsolete ``HP:0002631`` instead of +the primary ``HP:0002616`` for *Aortic root aneurysm*: + +.. csv-table:: + :header: "Validation error", "Solution" + :widths: 350, 550 + + Using obsolete id (HP:0002631) instead of current primary id (HP:0002616) in id-C, Replace the obsolete ID with the primary ID + + +The annotation-propagation rule is not violated +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Due to annotation propagation rule, it is a logical error to use both a term and its ancestor +(e.g. *Arachnodactyly* and *Abnormality of finger*). +When choosing HPO terms for phenotypic features, the *most* specific terms should be used for the *observed* clinical features. +In contrary, the *least* specific terms should be used for the *excluded* clinical features. + +The `HpoAncestryValidator` checks that the annotation propagation rule is not violated:: + + pxf validate --hpo hp.json -i ${examples}/validate/phenotype-validation/marfan.annotation-propagation-rule.invalid.json + +.. csv-table:: + :header: "Validation error", "Solution" + :widths: 350, 550 + + "Phenotypic features of id-C must not contain both an observed term (Aortic root aneurysm, HP:0002616) and an observed ancestor (Aortic aneurysm, HP:0004942)", Remove the less specific term + +.. note:: + Presence of excluded descendant and observed ancestor does not violate the annotation propagation rule. + A phenopacket with excluded *Aortic root aneurysm* and present *Aortic aneurysm* is valid, + see ``marfan.valid.json``. + + +Annotation of organ systems +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We can validate presence of annotation for specific organ systems in a phenopacket. + +Using the term IDs of the top-level HPO terms, we can validate annotation of +`Eye `_, +`Cardiovascular `_, and +`Respiratory `_ organ systems +in 3 phenopackets of toy `Marfan syndrome `_ patients:: + + pxf validate --hpo hp.json \ + --organ-system HP:0000478 --organ-system HP:0001626 --organ-system HP:0002086 \ + -i ${examples}/validate/organ-systems/marfan.all-organ-system-annotated.valid.json \ + -i ${examples}/validate/organ-systems/marfan.missing-eye-annotation.invalid.json \ + -i ${examples}/validate/organ-systems/marfan.no-abnormalities.valid.json + +.. note:: + Organ system validation requires HPO ontology. See the :ref:`rstphenotypevalidationtutorial` for more details about getting + the HPO file. + +The `HpoOrganSystemValidator` will point out one error in the `marfan.missing-eye-annotation.invalid.json` phenopacket: + +.. csv-table:: + :header: "Validation error", "Solution" + :widths: 350, 550 + + Missing annotation for Abnormality of the eye [HP:0000478] in id-C, Annotate the eye or exclude any abnormality. + +.. note:: + See :ref:`rstorgsysvalidation` for more details. + + +.. [1] https://pubmed.ncbi.nlm.nih.gov/32755546 +.. [2] https://zenodo.org/record/3905420 +.. _Java Documentation: https://javadoc.io/doc/org.phenopackets.phenopackettools/phenopacket-tools-validator-core/latest/org.phenopackets.phenopackettools.validator.core/module-summary.html +.. _HPO releases: https://hpo.jax.org/app/data/ontology \ No newline at end of file diff --git a/docs/tutorial_examples.rst b/docs/tutorial_examples.rst new file mode 100644 index 00000000..17f600b5 --- /dev/null +++ b/docs/tutorial_examples.rst @@ -0,0 +1,114 @@ +.. _rsttutorialexamples: + +==================== +Example phenopackets +==================== + +A set of example phenopackets is distributed with the *phenopacket-tools* binary. The example files should be used +to demonstrate the tools' functionality. + +The files are grouped in sub-folders by the target command:: + + examples + ├── convert + └── validate + ├── base + ├── custom-json-schema + ├── organ-systems + └── phenotype-validation + + +Convert +^^^^^^^ + +The ``convert`` folder contains one v1 phenopacket to demonstrate the conversion functionality:: + + Schreckenbach-2014-TPM3-II.2.json + +The phenopacket describes a case report of a 45 years-old female diagnosed with +`NEMALINE MYOPATHY 1; NEM1 `_ +caused by heterozygous mutation in `TPM3 `_. + + +Validate +^^^^^^^^ + +The ``validate`` directory contains files for demonstrating *off-the-shelf* phenopacket validation functionalities. + + +``base`` - base validation functionality +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The folder contains a few phenopackets for demonstrating the base validation functionality of *phenopacket-tools*; +the validation that **any phenopacket must pass**. + +.. csv-table:: + :header: "File name", "Description" + + missing-fields.json, "An invalid phenopacket with missing `id`, `subject.id` and `phenotypicFeatures[0].type.label` attributes." + missing-fields-valid.json, A valid version of the above phenopacket with IDs and the label. + missing-resources.json, An invalid phenopacket with missing `Resource` for the `NCBITaxon:9606` ontology concept used to represent organism of the subject. + missing-resources-valid.json, A valid version of the above phenopacket with the `Resource` for describing `NCBITaxon`. + + +``custom-json-schema`` - validate custom requirements +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A bunch of phenopackets for showing how a custom JSON schema can be used to validate user-specific requirements. + +.. csv-table:: + :header: "File name", "Description" + + hpo-rare-disease-schema.json, A custom JSON schema for enforcing user-specific requirements. + marfan.no-phenotype.invalid.json, The phenopacket is *invalid* since it contains no phenotypic features. + marfan.no-subject.invalid.json, The phenopacket is *invalid* since the `subject` is missing. + marfan.no-time-at-last-encounter.invalid.json, The phenopacket is *invalid* due to missing time at last encounter. + marfan.not-hpo.invalid.json, The phenopacket is *invalid* because HPO terms are not used to represent phenotypic features. + marfan.valid.json, A phenopacket that meets the custom requirements. + + +``organ-systems`` - validate annotation of organ systems +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Phenopackets for showing organ system validation. As an example, we work with phenopackets of patients with +`Marfan syndrome `_ and we require annotation +of +`Eye `_, +`Cardiovascular `_, and +`Respiratory `_ organ systems \ +either by *excluding* the corresponding top-level HPO term or by adding a descendent term. + +The phenopackets include + +.. list-table:: + :header-rows: 1 + + * - File name + - Description + * - marfan.no-abnormalities.valid.json + - A valid phenopacket of a proband with no abnormalities of the target organ systems. + * - marfan.all-organ-system-annotated.valid.json + - A valid phenopacket of a proband who had an abnormality of eye and cardiovascular systems but + no abnormality of respiratory system. Note that it is OK to have phenotypic feature of other organ system, + such as Arachnodactyly in this case. + * - marfan.missing-eye-annotation.invalid.json + - An invalid phenopacket of a proband without any annotation of the eye. + +``phenotype-validation`` - validate custom requirements +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Phenopackets for demonstrating ontology-based validation. + +.. list-table:: + :header-rows: 1 + + * - File name + - Description + * - marfan.annotation-propagation-rule.invalid.json + - | Invalid phenopacket due to logical inconsistency in phenotypic features. The phenopacket contains + | both *Aortic root aneurysm* and its ancestor *Aortic aneurysm*. Only the more specific term should be used. + * - marfan.obsolete-term.invalid.json + - The phenopacket is *invalid* because it contains an obsolete HPO term. + * - marfan.valid.json + - A phenopacket that meets the phenotype validation requirements. + diff --git a/docs/validation.rst b/docs/validation.rst index 1c87acb6..b9b5ac62 100644 --- a/docs/validation.rst +++ b/docs/validation.rst @@ -1,47 +1,67 @@ -.. _rstvalidating: +.. _rstvalidation: ======================= Validating Phenopackets ======================= +Phenopackets schema uses protobuf, an exchange format developed in 2008 by Google. We refer readers to the excellent +`Wikipedia page `_ +on Protobuf and to `Google’s documentation `_ for details. +In Protobuf (version 3, which is what the Phenopacket Schema uses), all fields are optional. +However, the Phenopacket Schema defines certain fields to be optional +(See `documentation `_ for details). +Moreover, projects and consortia can require application of specific constraints and requirements for the phenopackets. +*Phenopacket-tools* provides a functionality for validating phenopackets. +This document provides a comprehensive description of the functionality of the *off-the-shelf* validators +as well as the validation workflow API. -Protobuf -^^^^^^^^ -Phenopackets schema uses protobuf, an exchange format developed -in 2008 by Google. We refer readers to the excellent -`Wikipedia page `_ -on Protobuf and to `Google’s documentation `_ -for details. -In Protobuf (version 3, which is what the Phenopacket Schema uses), -all fields are optional. However, the Phenopacket Schema defines -certain fields to be optional -(See `documentation `_ for details). -Also, a phenopacket message can be represented in native protobuf (binary) format, JSON, YAML, and -other formats. +Validation workflow +^^^^^^^^^^^^^^^^^^^ + +*Phenopacket-tools* defines an API for phenopacket validation workflow. The workflow is consists of +a list of validation steps. There are two types of steps: *syntax* and *semantic*. The syntax steps check syntax +and cardinality of each component separately. The semantic validators are run after syntax checks and validate +the components in the context of the entire phenopacket. + +There is one mandatory syntax validation step that is always run first: the *base* validation. The base validation +ensures the phenopacket message meets the requirements of the Phenopacket Schema. + +The results of the validation are aggregated into a container object that consists +of immutable value objects that describe the performed validations and the validation results suitable +for reporting back to the user. + +.. Additional constraints and requirements may be made for phenopackets that are used in a specific + project or for a specific collaboration or consortium. For instance, a rare-disease consortium + may require that all phenotypic features be recorded using valid HPO terms. An example class is + provided that checks all ``PhenotypicFeature`` elements, ensures that they use HPO terms with valid + (i.e., primary) id's, and checks whether both a term and an ancestor of the term are used - if so + a warning is emitted, because an annotation with a specific HPO term + (e.g., `Perimembranous ventricular septal defect `_) + implies all of the ancestors of the term (e.g., a patient with perimembranous VSD by necessity also has + `Ventricular septal defect `_). + +**API** + +See the ``TODO - add JavaDoc link`` for the API documentation. -Validation -^^^^^^^^^^ +.. TODO - refer to org.phenopackets.phenopackettools.validator.jsonschema module +.. Describe validation workflow in general -The phenopacket-tools library offers JSON-Schema-based and semantic validations. The syntactic validation -is done using JSON schema. Additionally, an interface is provided to perform arbitrary kinds of validation. -This validation should be performed for all phenophenopackets. +*Off-the-shelf* validators +^^^^^^^^^^^^^^^^^^^^^^^^^^ -Additional constraints and requirements may be made for phenopackets that are used in a specific -project or for a specific collaboration or consortium. For instance, a rare-disease consortium -may require that all phenotypic features be recorded using valid HPO terms. An example class is -provided that checks all ``PhenotypicFeature`` elements, ensures that they use HPO terms with valid -(i.e., primary) id's, and checks whether both a term and an ancestor of the term are used - if so -a wanring is emitted, because an annotation with a specific HPO term -(e.g., `Perimembranous ventricular septal defect `_) -implies all of the ancestors of the term (e.g., a patient with perimembranous VSD by necessity also has -`Ventricular septal defect `_). +.. TODO - continue +TODO - describe *off-the-shelf* validators in great detail. + +.. _rstbasevalidation: Base validation -^^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~~ + All phenopackets should be tested against the base JSON Schema (analogously for all ``Family`` and ``Cohort`` messages). In code, this can be implemented as follows. @@ -64,6 +84,82 @@ In code, this can be implemented as follows. System.out.println("Error opening the phenopacket: " + e); } +**API** + +See the ``TODO - add JavaDoc link`` for the API documentation. + +.. TODO - refer to ... and to org/phenopackets/phenopackettools/validator/core/metadata + +.. _rstphenotypevalidation: + +Phenotype validation +~~~~~~~~~~~~~~~~~~~~ + +TODO - write +.. TODO - continue + +**API** + +See the ``TODO - add JavaDoc link`` for the API documentation. + +.. TODO - refer to org/phenopackets/phenopackettools/validator/core/phenotype + +.. _rstcustomvalidation: + +Custom validation +~~~~~~~~~~~~~~~~~ + +TODO - write +.. TODO - continue + + +**API** + +See the ``TODO - add JavaDoc link`` for the API documentation. + +.. TODO - refer to TODO - somewhere in JSON-schema validation + + +.. _rstorgsysvalidation: + +Organ system validation +~~~~~~~~~~~~~~~~~~~~~~~ + +TODO - write +.. TODO - continue + +We can validate presence of annotation for specific organ systems in a phenopacket. + +As an example, we work with toy phenopackets that represent patients with +`Marfan syndrome `_. Due to the nature of the Marfan syndrome, +we may require annotation of three organ systems: + +* Eye +* Cardiovascular system +* Respiratory system + +The annotation is done either by *excluding* the corresponding top-level HPO term or by adding a descendent term: + +.. list-table:: + :header-rows: 1 + + * - Organ system + - Top-level HPO term + - Example descendent + * - Eye + - `Abnormality of the eye `_ + - `Ectopia lentis `_ + * - Cardiovascular system + - `Abnormality of the cardiovascular system `_ + - `Mitral regurgitation `_ + * - Respiratory system + - `Abnormality of the respiratory system `_ + - `Pneumothorax `_ + +**API** + +See the ``TODO - add JavaDoc link`` for the API documentation. +.. TODO - refer to org/phenopackets/phenopackettools/validator/core/phenotype/orgsys diff --git a/phenopacket-tools-builder/pom.xml b/phenopacket-tools-builder/pom.xml index f403a9ae..5570812c 100644 --- a/phenopacket-tools-builder/pom.xml +++ b/phenopacket-tools-builder/pom.xml @@ -7,12 +7,17 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.6 + 0.4.7 phenopacket-tools-builder + + org.phenopackets.phenopackettools + phenopacket-tools-core + ${project.parent.version} + com.google.protobuf protobuf-java diff --git a/phenopacket-tools-builder/src/main/java/module-info.java b/phenopacket-tools-builder/src/main/java/module-info.java index 9ca32aca..123b5a56 100644 --- a/phenopacket-tools-builder/src/main/java/module-info.java +++ b/phenopacket-tools-builder/src/main/java/module-info.java @@ -1,4 +1,10 @@ +/** + * A module with pre-defined ontology constants, convenience methods, and concise builders + * to simplify phenopacket construction. + */ module org.phenopackets.phenopackettools.builder { + // No need to make it transitive since we only use runtime exceptions. + requires org.phenopackets.phenopackettools.core; requires transitive org.phenopackets.schema; // Required due to `TimestampBuilder`. //noinspection requires-transitive-automatic @@ -7,5 +13,4 @@ exports org.phenopackets.phenopackettools.builder; exports org.phenopackets.phenopackettools.builder.builders; exports org.phenopackets.phenopackettools.builder.constants; - exports org.phenopackets.phenopackettools.builder.exceptions; } \ No newline at end of file diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/FamilyBuilder.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/FamilyBuilder.java index 9f8a5654..22c1256f 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/FamilyBuilder.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/FamilyBuilder.java @@ -1,6 +1,6 @@ package org.phenopackets.phenopackettools.builder; -import org.phenopackets.phenopackettools.builder.exceptions.PhenotoolsRuntimeException; +import org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException; import org.phenopackets.schema.v2.Family; import org.phenopackets.schema.v2.Phenopacket; import org.phenopackets.schema.v2.core.File; @@ -57,11 +57,11 @@ public static FamilyBuilder create(String familyId) { public Family build() { if (! builder.hasMetaData()) { - throw new PhenotoolsRuntimeException("MetaData element missing from Family"); + throw new PhenopacketToolsRuntimeException("MetaData element missing from Family"); } else if (! builder.hasPedigree()) { - throw new PhenotoolsRuntimeException("Pedigree element missing from Family"); + throw new PhenopacketToolsRuntimeException("Pedigree element missing from Family"); } else if (! builder.hasProband()) { - throw new PhenotoolsRuntimeException("Proband Phenopacket element missing from Family"); + throw new PhenopacketToolsRuntimeException("Proband Phenopacket element missing from Family"); } return builder.build(); diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Ages.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Ages.java index 4711e498..f063b3df 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Ages.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Ages.java @@ -1,6 +1,6 @@ package org.phenopackets.phenopackettools.builder.builders; -import org.phenopackets.phenopackettools.builder.exceptions.PhenotoolsRuntimeException; +import org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException; import org.phenopackets.schema.v2.core.Age; import org.phenopackets.schema.v2.core.AgeRange; import org.phenopackets.schema.v2.core.GestationalAge; @@ -17,7 +17,7 @@ public static Age age(String iso8601duration) { try { Period.parse(iso8601duration); } catch (DateTimeParseException ex) { - throw new PhenotoolsRuntimeException("Invalid iso8601 age (period) string: \"" + iso8601duration + "\"."); + throw new PhenopacketToolsRuntimeException("Invalid iso8601 age (period) string: \"" + iso8601duration + "\"."); } return Age.newBuilder().setIso8601Duration(iso8601duration).build(); } diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/CopyNumberBuilder.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/CopyNumberBuilder.java index 9c91fd27..8ca0ad0b 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/CopyNumberBuilder.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/CopyNumberBuilder.java @@ -2,7 +2,7 @@ import org.ga4gh.vrs.v1.*; import org.ga4gh.vrs.v1.Number; -import org.phenopackets.phenopackettools.builder.exceptions.PhenotoolsRuntimeException; +import org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException; public class CopyNumberBuilder { @@ -50,7 +50,7 @@ public CopyNumberBuilder threeCopies() { public CopyNumberBuilder nCopies(int n) { if (n < 0) { - throw new PhenotoolsRuntimeException("Negative copy numbers are not allowed"); + throw new PhenopacketToolsRuntimeException("Negative copy numbers are not allowed"); } builder.setNumber(Number.newBuilder().setValue(n)); return this; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/GeneDescriptorBuilder.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/GeneDescriptorBuilder.java index 6c281190..2eb52c61 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/GeneDescriptorBuilder.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/GeneDescriptorBuilder.java @@ -8,16 +8,27 @@ public class GeneDescriptorBuilder { private final GeneDescriptor.Builder builder; - private GeneDescriptorBuilder(String identifier, String symbol) { - builder = GeneDescriptor.newBuilder().setValueId(identifier).setSymbol(symbol); + private GeneDescriptorBuilder(String valueId, String symbol) { + builder = GeneDescriptor.newBuilder().setValueId(valueId).setSymbol(symbol); } - public static GeneDescriptor of(String identifier, String symbol) { - return GeneDescriptor.newBuilder().setValueId(identifier).setSymbol(symbol).build(); + /** + * @param valueId Official identifier of the gene, e.g., HGNC:3603 + * @param symbol Official gene symbol, e.g., FBN1 + * @return completely built {@link GeneDescriptor} object + */ + public static GeneDescriptor of(String valueId, String symbol) { + return GeneDescriptor.newBuilder().setValueId(valueId).setSymbol(symbol).build(); } - public static GeneDescriptorBuilder builder(String identifier, String symbol) { - return new GeneDescriptorBuilder(identifier, symbol); + + /** + * @param valueId Official identifier of the gene, e.g., HGNC:3603 + * @param symbol Official gene symbol, e.g., FBN1 + * @return GeneDescriptorBuilder that can be used to set additional field values + */ + public static GeneDescriptorBuilder builder(String valueId, String symbol) { + return new GeneDescriptorBuilder(valueId, symbol); } public GeneDescriptorBuilder description(String desc) { diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/PhenotypicFeatureBuilder.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/PhenotypicFeatureBuilder.java index dcd434ac..d1efb9ea 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/PhenotypicFeatureBuilder.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/PhenotypicFeatureBuilder.java @@ -45,10 +45,10 @@ public PhenotypicFeatureBuilder onset(TimeElement time) { } /** - * @param isoISO8601 A string such as P10Y4M2D representing the age of onset/observation + * @param iso8601 A string such as P10Y4M2D representing the age of onset/observation */ - public PhenotypicFeatureBuilder isoISO8601onset(String isoISO8601) { - builder.setOnset(TimeElements.age(isoISO8601)); + public PhenotypicFeatureBuilder iso8601onset(String iso8601) { + builder.setOnset(TimeElements.age(iso8601)); return this; } diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java index aa686383..c527b41c 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java @@ -77,6 +77,9 @@ private Resources() { .setUrl("http://purl.obolibrary.org/obo/so.owl") .setIriPrefix("http://purl.obolibrary.org/obo/SO_"); + /** this is the version of the UCUM schema that has been valid since 2017-11-21 */ + private static final String DEFAULT_UCUM_VERSION = "2.1"; + private static final Resource.Builder UO_BUILDER = Resource.newBuilder() .setId("uo") .setName("Units of measurement ontology") @@ -84,6 +87,36 @@ private Resources() { .setUrl("http://purl.obolibrary.org/obo/uo.owl") .setIriPrefix("http://purl.obolibrary.org/obo/UO_"); + private static final Resource.Builder HGNC_BUILDER = Resource.newBuilder() + .setId("hgnc") + .setName("HUGO Gene Nomenclature Committee") + .setNamespacePrefix("HGNC") + .setUrl("https://www.genenames.org") + .setIriPrefix("https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/"); + + private static final Resource.Builder UCUM_BUILDER = Resource.newBuilder() + .setId("ucum") + .setName("Unified Code for Units of Measure") + .setNamespacePrefix("UCUM") + .setUrl("https://ucum.org") + .setIriPrefix("https://ucum.org/"); + + private static final Resource.Builder LOINC_BUILDER = Resource.newBuilder() + .setId("loinc") + .setName("Logical Observation Identifiers Names and Codes") + .setNamespacePrefix("LOINC") + .setUrl("https://loinc.org") + .setIriPrefix("https://loinc.org/"); + + private static final Resource.Builder DRUG_CENTRAL_BUILDER = Resource.newBuilder() + .setId("drugcentral") + .setName("Drug Central") + .setNamespacePrefix("DrugCentral") + .setUrl("https://drugcentral.org/") + .setIriPrefix("https://drugcentral.org/drugcard/"); + + public static Resource hgncVersion(String version) { return HGNC_BUILDER.setVersion(version).build(); } + public static Resource hpoVersion(String version) { return HPO_BUILDER.setVersion(version).build(); } @@ -127,4 +160,20 @@ public static Resource soVersion(String version) { public static Resource uoVersion(String version) { return UO_BUILDER.setVersion(version).build(); } + + public static Resource ucumVersion(String version) { + return UCUM_BUILDER.setVersion(version).build(); + } + + public static Resource ucum() { + return ucumVersion(DEFAULT_UCUM_VERSION); + } + + public static Resource loincVersion(String version) { + return LOINC_BUILDER.setVersion(version).build(); + } + + public static Resource drugCentralVersion(String version) { + return DRUG_CENTRAL_BUILDER.setVersion(version).build(); + } } diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/package-info.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/package-info.java new file mode 100644 index 00000000..e493d218 --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/package-info.java @@ -0,0 +1,4 @@ +/** + * A package with convenience methods and builders for creating Phenopacket Schema building blocks. + */ +package org.phenopackets.phenopackettools.builder.builders; \ No newline at end of file diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/AdministrationRoute.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/AdministrationRoute.java new file mode 100644 index 00000000..bf0849fb --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/AdministrationRoute.java @@ -0,0 +1,30 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! +package org.phenopackets.phenopackettools.builder.constants; + +import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; +import org.phenopackets.schema.v2.core.OntologyClass; + +public class AdministrationRoute { + + private static final OntologyClass INTRAVENOUS_ROUTE = OntologyClassBuilder.ontologyClass("NCIT:C38276", "Intravenous Route of Administration"); + private static final OntologyClass INTRAARTERIAL_ROUTE = OntologyClassBuilder.ontologyClass("NCIT:C38222", "Intraarterial Route of Administration"); + private static final OntologyClass WOUND_IRRIGATION_ROUTE = OntologyClassBuilder.ontologyClass("NCIT:C183503", "Administration via Wound Irrigation"); + private static final OntologyClass NEBULIZER_ROUTE = OntologyClassBuilder.ontologyClass("NCIT:C149695", "Nebulizer Route of Administration"); + private static final OntologyClass ORAL_ROUTE = OntologyClassBuilder.ontologyClass("NCIT:C38288", "Oral Route of Administration"); + private static final OntologyClass INTRATHECAL_ROUTE = OntologyClassBuilder.ontologyClass("NCIT:C38267", "Intrathecal Route of Administration"); + private static final OntologyClass PERIDURAL_ROUTE = OntologyClassBuilder.ontologyClass("NCIT:C38677", "Peridural Route of Administration"); + private static final OntologyClass TOPICAL_ROUTE = OntologyClassBuilder.ontologyClass("NCIT:C38304", "Topical Route of Administration"); + private static final OntologyClass TRANSDERMAL = OntologyClassBuilder.ontologyClass("NCIT:C38305", "Transdermal Route of Administration"); + + + public static OntologyClass intravenous() { return INTRAVENOUS_ROUTE; } + public static OntologyClass intraarterial() { return INTRAARTERIAL_ROUTE; } + public static OntologyClass woundIrrigation() { return WOUND_IRRIGATION_ROUTE; } + public static OntologyClass nebulizer() { return NEBULIZER_ROUTE; } + public static OntologyClass oral() { return ORAL_ROUTE; } + public static OntologyClass intrathecal() { return INTRATHECAL_ROUTE; } + public static OntologyClass peridural() { return PERIDURAL_ROUTE; } + public static OntologyClass topical() { return TOPICAL_ROUTE; } + public static OntologyClass transdermal() { return TRANSDERMAL; } + +} diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/AllelicState.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/AllelicState.java index 1f5882bf..4258c054 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/AllelicState.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/AllelicState.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Assays.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Assays.java index da0f8d66..66f73070 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Assays.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Assays.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/BiospecimenType.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/BiospecimenType.java new file mode 100644 index 00000000..75bd27b5 --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/BiospecimenType.java @@ -0,0 +1,30 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! +package org.phenopackets.phenopackettools.builder.constants; + +import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; +import org.phenopackets.schema.v2.core.OntologyClass; + +public class BiospecimenType { + + private static final OntologyClass BONE_MARROW_ASPIRATE = OntologyClassBuilder.ontologyClass("NCIT:C133261", "Bone Marrow Aspirate"); + private static final OntologyClass BLOOD_DNA = OntologyClassBuilder.ontologyClass("NCIT:C158416", "Blood DNA"); + private static final OntologyClass CSF_SAMPLE = OntologyClassBuilder.ontologyClass("NCIT:C185194", "Cerebrospinal Fluid Sample"); + private static final OntologyClass FORMALIN_FIXED_PARAFIN_DNA = OntologyClassBuilder.ontologyClass("NCIT:C156435", "Formalin-Fixed Paraffin-Embedded DNA"); + private static final OntologyClass BAL_FLUID = OntologyClassBuilder.ontologyClass("NCIT:C13195", "Bronchoalveolar Lavage Fluid"); + private static final OntologyClass PERICARDIAL_FLUID_SAMPLE = OntologyClassBuilder.ontologyClass("NCIT:C187062", "Pericardial Fluid Specimen"); + private static final OntologyClass PERTONIAL_FLUID_SAMPLE = OntologyClassBuilder.ontologyClass("NCIT:C185197", "Peritoneal Fluid Sample"); + private static final OntologyClass TOTAL_RNA = OntologyClassBuilder.ontologyClass("NCIT:C163995", "Total RNA"); + private static final OntologyClass TUMOR_TISSUE = OntologyClassBuilder.ontologyClass("NCIT:C18009", "Tumor Tissue"); + + + public static OntologyClass boneMarrowAspirate() { return BONE_MARROW_ASPIRATE; } + public static OntologyClass bloodDNA() { return BLOOD_DNA; } + public static OntologyClass cerebrospinalFluidSample() { return CSF_SAMPLE; } + public static OntologyClass formalinFixedParaffinEmbeddedDNA() { return FORMALIN_FIXED_PARAFIN_DNA; } + public static OntologyClass bronchoalveolarLavageFluid() { return BAL_FLUID; } + public static OntologyClass pericardialFluidSpecimen() { return PERICARDIAL_FLUID_SAMPLE; } + public static OntologyClass peritonealFluidSample() { return PERTONIAL_FLUID_SAMPLE; } + public static OntologyClass totalRNA() { return TOTAL_RNA; } + public static OntologyClass tumorTissue() { return TUMOR_TISSUE; } + +} diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/DiseaseGrade.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/DiseaseGrade.java new file mode 100644 index 00000000..aa15befb --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/DiseaseGrade.java @@ -0,0 +1,24 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! +package org.phenopackets.phenopackettools.builder.constants; + +import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; +import org.phenopackets.schema.v2.core.OntologyClass; + +public class DiseaseGrade { + + private static final OntologyClass GRADE_1 = OntologyClassBuilder.ontologyClass("NCIT:C28077", "Grade 1"); + private static final OntologyClass GRADE_2 = OntologyClassBuilder.ontologyClass("NCIT:C28078", "Grade 2"); + private static final OntologyClass GRADE_3 = OntologyClassBuilder.ontologyClass("NCIT:C28079", "Grade 3"); + private static final OntologyClass GRADE_3A = OntologyClassBuilder.ontologyClass("NCIT:C28080", "Grade 3a"); + private static final OntologyClass GRADE_3B = OntologyClassBuilder.ontologyClass("NCIT:C28081", "Grade 3b"); + private static final OntologyClass GRADE_4 = OntologyClassBuilder.ontologyClass("NCIT:C28082", "Grade 4"); + + + public static OntologyClass grade1() { return GRADE_1; } + public static OntologyClass grade2() { return GRADE_2; } + public static OntologyClass grade3() { return GRADE_3; } + public static OntologyClass grade3a() { return GRADE_3A; } + public static OntologyClass grade3b() { return GRADE_3B; } + public static OntologyClass grade4() { return GRADE_4; } + +} diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/DiseaseStage.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/DiseaseStage.java new file mode 100644 index 00000000..15c1551f --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/DiseaseStage.java @@ -0,0 +1,32 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! +package org.phenopackets.phenopackettools.builder.constants; + +import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; +import org.phenopackets.schema.v2.core.OntologyClass; + +public class DiseaseStage { + + private static final OntologyClass STAGE_0 = OntologyClassBuilder.ontologyClass("NCIT:C28051", "Stage 0"); + private static final OntologyClass STAGE_I = OntologyClassBuilder.ontologyClass("NCIT:C27966", "Stage I"); + private static final OntologyClass STAGE_II = OntologyClassBuilder.ontologyClass("NCIT:C28054", "Stage II"); + private static final OntologyClass STAGE_III = OntologyClassBuilder.ontologyClass("NCIT:C27970", "Stage III"); + private static final OntologyClass STAGE_IV = OntologyClassBuilder.ontologyClass("NCIT:C27971", "Stage IV"); + private static final OntologyClass NYHA_I = OntologyClassBuilder.ontologyClass("NCIT:C66904", "New York Heart Association Class I"); + private static final OntologyClass NYHA_II = OntologyClassBuilder.ontologyClass("NCIT:C66905", "New York Heart Association Class II"); + private static final OntologyClass NYHA_III = OntologyClassBuilder.ontologyClass("NCIT:C66907", "New York Heart Association Class III"); + private static final OntologyClass NYHA_III_IV = OntologyClassBuilder.ontologyClass("NCIT:C7922", "New York Heart Association Class III/IV"); + private static final OntologyClass NYHA_IV = OntologyClassBuilder.ontologyClass("NCIT:C66908", "New York Heart Association Class IV"); + + + public static OntologyClass stage0() { return STAGE_0; } + public static OntologyClass stageI() { return STAGE_I; } + public static OntologyClass stageII() { return STAGE_II; } + public static OntologyClass stageIII() { return STAGE_III; } + public static OntologyClass stageIV() { return STAGE_IV; } + public static OntologyClass nyhaClassI() { return NYHA_I; } + public static OntologyClass nyhaClassII() { return NYHA_II; } + public static OntologyClass nyhaClassIII() { return NYHA_III; } + public static OntologyClass nyhaClassIII_or_IV() { return NYHA_III_IV; } + public static OntologyClass nyhaClassIV() { return NYHA_IV; } + +} diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Evidence.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Evidence.java new file mode 100644 index 00000000..06f5cf6b --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Evidence.java @@ -0,0 +1,22 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! +package org.phenopackets.phenopackettools.builder.constants; + +import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; +import org.phenopackets.schema.v2.core.OntologyClass; + +public class Evidence { + + private static final OntologyClass AUTHOR_STATEMENT_FROM_PCS = OntologyClassBuilder.ontologyClass("ECO:0006016", "author statement from published clinical study"); + private static final OntologyClass AUTHOR_STATEMENT_FROM_PCS_AUTOMATIC = OntologyClassBuilder.ontologyClass("ECO:0007539", "author statement from published clinical study used in automatic assertion"); + private static final OntologyClass AUTHOR_STATEMENT_FROM_PCS_MANUAL = OntologyClassBuilder.ontologyClass("ECO:0006017", "author statement from published clinical study used in manual assertion"); + private static final OntologyClass AUTHOR_STATEMENT_TRACEABLE_REFERENCE = OntologyClassBuilder.ontologyClass("ECO:0000033", "author statement supported by traceable reference"); + private static final OntologyClass SELF_REPORTED_PATIENT_STATEMENT_EVIDENCE = OntologyClassBuilder.ontologyClass("ECO:0006154", "self-reported patient statement evidence"); + + + public static OntologyClass authorStatementFromPublishedClinicalStudy() { return AUTHOR_STATEMENT_FROM_PCS; } + public static OntologyClass authorStatementFromPublishedClinicalStudyAutomaticAssertion() { return AUTHOR_STATEMENT_FROM_PCS_AUTOMATIC; } + public static OntologyClass authorStatementFromPublishedClinicalStudyManualAssertion() { return AUTHOR_STATEMENT_FROM_PCS_MANUAL; } + public static OntologyClass authorStatementSupportedByTraceableReference() { return AUTHOR_STATEMENT_TRACEABLE_REFERENCE; } + public static OntologyClass selfReportedPatientStatementEvidence() { return SELF_REPORTED_PATIENT_STATEMENT_EVIDENCE; } + +} diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Gender.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Gender.java index d3174f70..1def073d 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Gender.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Gender.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Laterality.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Laterality.java index 59c993e1..447637ef 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Laterality.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Laterality.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/MaterialSample.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/MaterialSample.java new file mode 100644 index 00000000..024190e0 --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/MaterialSample.java @@ -0,0 +1,16 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! +package org.phenopackets.phenopackettools.builder.constants; + +import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; +import org.phenopackets.schema.v2.core.OntologyClass; + +public class MaterialSample { + + private static final OntologyClass ABNORMAL_SAMPLE = OntologyClassBuilder.ontologyClass("EFO:0009655", "abnormal sample"); + private static final OntologyClass REFERENCE_SAMPLE = OntologyClassBuilder.ontologyClass("EFO:0009654", "reference sample"); + + + public static OntologyClass abnormalSample() { return ABNORMAL_SAMPLE; } + public static OntologyClass referenceSample() { return REFERENCE_SAMPLE; } + +} diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/MedicalActions.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/MedicalActions.java index f47229ed..0324e45a 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/MedicalActions.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/MedicalActions.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; @@ -17,13 +18,13 @@ public class MedicalActions { public static OntologyClass adverseEvent() { return ADVERSE_EVENT; } - public static OntologyClass fourtimesDaily() { return FOUR_TIMES_DAILY; } + public static OntologyClass fourTimesDaily() { return FOUR_TIMES_DAILY; } public static OntologyClass intraArterialAdministration() { return INTRA_ARTERIAL; } public static OntologyClass intravenousAdministration() { return IV_ADMINISTRATION; } public static OntologyClass oralAdministration() { return ORAL_ADMINISTRATION; } public static OntologyClass once() { return ONCE; } public static OntologyClass onceDaily() { return ONCE_DAILY; } - public static OntologyClass threetimesDaily() { return THREE_TIMES_DAILY; } + public static OntologyClass threeTimesDaily() { return THREE_TIMES_DAILY; } public static OntologyClass twiceDaily() { return TWICE_DAILY; } } diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Onset.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Onset.java index e15d4ed8..5dcbfd7e 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Onset.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Onset.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Organ.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Organ.java index e888c262..a2ecc900 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Organ.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Organ.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/PathologicalTnm.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/PathologicalTnm.java new file mode 100644 index 00000000..d6954e45 --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/PathologicalTnm.java @@ -0,0 +1,84 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! +package org.phenopackets.phenopackettools.builder.constants; + +import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; +import org.phenopackets.schema.v2.core.OntologyClass; + +public class PathologicalTnm { + + private static final OntologyClass PM0_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48740", "pM0 Stage Finding"); + private static final OntologyClass PM1_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48741", "pM1 Stage Finding"); + private static final OntologyClass PM1A_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48742", "pM1a Stage Finding"); + private static final OntologyClass PM1B_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48743", "pM1b Stage Finding"); + private static final OntologyClass PM1C_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48744", "pM1c Stage Finding"); + private static final OntologyClass PN0_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48745", "pN0 Stage Finding"); + private static final OntologyClass PN1_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48746", "pN1 Stage Finding"); + private static final OntologyClass PN1A_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48747", "pN1a Stage Finding"); + private static final OntologyClass PN1B_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48748", "pN1b Stage Finding"); + private static final OntologyClass PN1C_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48749", "pN1c Stage Finding"); + private static final OntologyClass PN2_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48750", "pN2 Stage Finding"); + private static final OntologyClass PN2A_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48751", "pN2a Stage Finding"); + private static final OntologyClass PN2B_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48752", "pN2b Stage Finding"); + private static final OntologyClass PN2C_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48753", "pN2c Stage Finding"); + private static final OntologyClass PN3_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48754", "pN3 Stage Finding"); + private static final OntologyClass PN3A_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48755", "pN3a Stage Finding"); + private static final OntologyClass PN3B_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48756", "pN3b Stage Finding"); + private static final OntologyClass PN3C_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48757", "pN3c Stage Finding"); + private static final OntologyClass PT0_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48758", "pT0 Stage Finding"); + private static final OntologyClass PT1_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48759", "pT1 Stage Finding"); + private static final OntologyClass PT1A_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48760", "pT1a Stage Finding"); + private static final OntologyClass PT1B_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48761", "pT1b Stage Finding"); + private static final OntologyClass PT1C_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48763", "pT1c Stage Finding"); + private static final OntologyClass PT2_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48764", "pT2 Stage Finding"); + private static final OntologyClass PT2A_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48765", "pT2a Stage Finding"); + private static final OntologyClass PT2B_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48766", "pT2b Stage Finding"); + private static final OntologyClass PT2C_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48767", "pT2c Stage Finding"); + private static final OntologyClass PT3_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48768", "pT3 Stage Finding"); + private static final OntologyClass PT3A_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48769", "pT3a Stage Finding"); + private static final OntologyClass PT3B_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48770", "pT3b Stage Finding"); + private static final OntologyClass PT3C_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48771", "pT3c Stage Finding"); + private static final OntologyClass PT4_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48772", "pT4 Stage Finding"); + private static final OntologyClass PT4A_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48773", "pT4a Stage Finding"); + private static final OntologyClass PT4B_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48774", "pT4b Stage Finding"); + private static final OntologyClass PT4C_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48775", "pT4c Stage Finding"); + private static final OntologyClass PT4D_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48776", "pT4d Stage Finding"); + + + public static OntologyClass pM0StageFinding() { return PM0_STAGE_FINDING; } + public static OntologyClass pM1StageFinding() { return PM1_STAGE_FINDING; } + public static OntologyClass pM1aStageFinding() { return PM1A_STAGE_FINDING; } + public static OntologyClass pM1bStageFinding() { return PM1B_STAGE_FINDING; } + public static OntologyClass pM1cStageFinding() { return PM1C_STAGE_FINDING; } + public static OntologyClass pN0StageFinding() { return PN0_STAGE_FINDING; } + public static OntologyClass pN1StageFinding() { return PN1_STAGE_FINDING; } + public static OntologyClass pN1aStageFinding() { return PN1A_STAGE_FINDING; } + public static OntologyClass pN1bStageFinding() { return PN1B_STAGE_FINDING; } + public static OntologyClass pN1cStageFinding() { return PN1C_STAGE_FINDING; } + public static OntologyClass pN2StageFinding() { return PN2_STAGE_FINDING; } + public static OntologyClass pN2aStageFinding() { return PN2A_STAGE_FINDING; } + public static OntologyClass pN2bStageFinding() { return PN2B_STAGE_FINDING; } + public static OntologyClass pN2cStageFinding() { return PN2C_STAGE_FINDING; } + public static OntologyClass pN3StageFinding() { return PN3_STAGE_FINDING; } + public static OntologyClass pN3aStageFinding() { return PN3A_STAGE_FINDING; } + public static OntologyClass pN3bStageFinding() { return PN3B_STAGE_FINDING; } + public static OntologyClass pN3cStageFinding() { return PN3C_STAGE_FINDING; } + public static OntologyClass pT0StageFinding() { return PT0_STAGE_FINDING; } + public static OntologyClass pT1StageFinding() { return PT1_STAGE_FINDING; } + public static OntologyClass pT1aStageFinding() { return PT1A_STAGE_FINDING; } + public static OntologyClass pT1bStageFinding() { return PT1B_STAGE_FINDING; } + public static OntologyClass pT1cStageFinding() { return PT1C_STAGE_FINDING; } + public static OntologyClass pT2StageFinding() { return PT2_STAGE_FINDING; } + public static OntologyClass pT2aStageFinding() { return PT2A_STAGE_FINDING; } + public static OntologyClass pT2bStageFinding() { return PT2B_STAGE_FINDING; } + public static OntologyClass pT2cStageFinding() { return PT2C_STAGE_FINDING; } + public static OntologyClass pT3StageFinding() { return PT3_STAGE_FINDING; } + public static OntologyClass pT3aStageFinding() { return PT3A_STAGE_FINDING; } + public static OntologyClass pT3bStageFinding() { return PT3B_STAGE_FINDING; } + public static OntologyClass pT3cStageFinding() { return PT3C_STAGE_FINDING; } + public static OntologyClass pT4StageFinding() { return PT4_STAGE_FINDING; } + public static OntologyClass pT4aStageFinding() { return PT4A_STAGE_FINDING; } + public static OntologyClass pT4bStageFinding() { return PT4B_STAGE_FINDING; } + public static OntologyClass pT4cStageFinding() { return PT4C_STAGE_FINDING; } + public static OntologyClass pT4dStageFinding() { return PT4D_STAGE_FINDING; } + +} diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Response.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Response.java index dec48018..7f4cd19b 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Response.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Response.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; @@ -5,11 +6,33 @@ public class Response { - private static final OntologyClass FAVORABLE = OntologyClassBuilder.ontologyClass("NCIT:C102560", "Favorable"); - private static final OntologyClass UNFAVORABLE = OntologyClassBuilder.ontologyClass("NCIT:C102561", "Unfavorable"); + private static final OntologyClass FAVORABLE_RESPONSE = OntologyClassBuilder.ontologyClass("NCIT:C123584", "Favorable Response"); + private static final OntologyClass UNFAVORABLE_RESPONSE = OntologyClassBuilder.ontologyClass("NCIT:C123617", "Unfavorable Response"); + private static final OntologyClass NO_RESPONSE = OntologyClassBuilder.ontologyClass("NCIT:C123600", "No Response"); + private static final OntologyClass STRINGENT_COMPLETE_RESPONSE = OntologyClassBuilder.ontologyClass("NCIT:C123614", "Stringent Complete Response"); + private static final OntologyClass MINIMAL_RESPONSE = OntologyClassBuilder.ontologyClass("NCIT:C123598", "Minimal Response"); + private static final OntologyClass COMPLETE_REMISSION = OntologyClassBuilder.ontologyClass("NCIT:C4870", "Complete Remission"); + private static final OntologyClass PARTIAL_REMISSION = OntologyClassBuilder.ontologyClass("NCIT:C18058", "Partial Remission"); + private static final OntologyClass PRIMARY_REFRACTORY = OntologyClassBuilder.ontologyClass("NCIT:C70604", "Primary Refractory"); + private static final OntologyClass iRECIST_COMPLETE_RESPONSE = OntologyClassBuilder.ontologyClass("NCIT:C142357", "iRECIST Complete Response"); + private static final OntologyClass iRECIST_CONFIRMED_PROGRESSIVE_DISEASE = OntologyClassBuilder.ontologyClass("NCIT:C142356", "iRECIST Confirmed Progressive Disease"); + private static final OntologyClass iRECIST_PARTIAL_RESPONSE = OntologyClassBuilder.ontologyClass("NCIT:C142358", "iRECIST Partial Response"); + private static final OntologyClass iRECIST_STABLE_DISEASE = OntologyClassBuilder.ontologyClass("NCIT:C142359", "iRECIST Stable Disease"); + private static final OntologyClass iRECIST_UNCONFIRMED_PROGRESSIVE_DISEASE = OntologyClassBuilder.ontologyClass("NCIT:C142360", "iRECIST Unconfirmed Progressive Disease"); - public static OntologyClass favorable() { return FAVORABLE; } - public static OntologyClass unfavorable() { return UNFAVORABLE; } + public static OntologyClass favorableResponse() { return FAVORABLE_RESPONSE; } + public static OntologyClass unfavorableResponse() { return UNFAVORABLE_RESPONSE; } + public static OntologyClass noResponse() { return NO_RESPONSE; } + public static OntologyClass stringentCompleteResponse() { return STRINGENT_COMPLETE_RESPONSE; } + public static OntologyClass minimalResponse() { return MINIMAL_RESPONSE; } + public static OntologyClass completeRemission() { return COMPLETE_REMISSION; } + public static OntologyClass partialRemission() { return PARTIAL_REMISSION; } + public static OntologyClass primaryRefractory() { return PRIMARY_REFRACTORY; } + public static OntologyClass iRECISTCompleteResponse() { return iRECIST_COMPLETE_RESPONSE; } + public static OntologyClass iRECISTConfirmedProgressiveDisease() { return iRECIST_CONFIRMED_PROGRESSIVE_DISEASE; } + public static OntologyClass iRECISTPartialResponse() { return iRECIST_PARTIAL_RESPONSE; } + public static OntologyClass iRECISTStableDisease() { return iRECIST_STABLE_DISEASE; } + public static OntologyClass iRECISTUnconfirmedProgressiveDisease() { return iRECIST_UNCONFIRMED_PROGRESSIVE_DISEASE; } } diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Severity.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Severity.java index 9a0fb7e4..50566eed 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Severity.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Severity.java @@ -1,54 +1,22 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; +import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; import org.phenopackets.schema.v2.core.OntologyClass; -import static org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder.ontologyClass; - - public class Severity { - private Severity() { - } - - private static final OntologyClass BORDERLINE = ontologyClass("HP:0012827", "Borderline"); - private static final OntologyClass MILD = ontologyClass("HP:0012825", "Mild"); - private static final OntologyClass MODERATE = ontologyClass("HP:0012826", "Moderate"); - private static final OntologyClass SEVERE = ontologyClass("HP:0012828", "Severe"); - private static final OntologyClass PROFOUND = ontologyClass("HP:0012829", "Profound"); - - /** - * Having a minor degree of severity that is considered to be on the boundary between the normal and the abnormal ranges. For quantitative traits, a deviation of that is less than two standard deviations from the appropriate population mean. - */ - public static OntologyClass borderline() { - return BORDERLINE; - } - - /** - * Having a relatively minor degree of severity. For quantitative traits, a deviation of between two and three standard deviations from the appropriate population mean. - */ - public static OntologyClass mild() { - return MILD; - } - - /** - * Having a medium degree of severity. For quantitative traits, a deviation of between three and four standard deviations from the appropriate population mean. - */ - public static OntologyClass moderate() { - return MODERATE; - } + private static final OntologyClass BORDERLINE = OntologyClassBuilder.ontologyClass("HP:0012827", "Borderline"); + private static final OntologyClass MILD = OntologyClassBuilder.ontologyClass("HP:0012825", "Mild"); + private static final OntologyClass MODERATE = OntologyClassBuilder.ontologyClass("HP:0012826", "Moderate"); + private static final OntologyClass SEVERE = OntologyClassBuilder.ontologyClass("HP:0012828", "Severe"); + private static final OntologyClass PROFOUND = OntologyClassBuilder.ontologyClass("HP:0012829", "Profound"); - /** - * Having a high degree of severity. For quantitative traits, a deviation of between four and five standard deviations from the appropriate population mean. - */ - public static OntologyClass severe() { - return SEVERE; - } - /** - * Having an extremely high degree of severity. For quantitative traits, a deviation of more than five standard deviations from the appropriate population mean. - */ - public static OntologyClass profound() { - return PROFOUND; - } + public static OntologyClass borderline() { return BORDERLINE; } + public static OntologyClass mild() { return MILD; } + public static OntologyClass moderate() { return MODERATE; } + public static OntologyClass severe() { return SEVERE; } + public static OntologyClass profound() { return PROFOUND; } } diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/SpatialPattern.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/SpatialPattern.java index d3b58138..b83f9f82 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/SpatialPattern.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/SpatialPattern.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; @@ -37,7 +38,7 @@ public class SpatialPattern { private static final OntologyClass APICAL = OntologyClassBuilder.ontologyClass("HP:0033820", "Apical"); private static final OntologyClass FOCAL = OntologyClassBuilder.ontologyClass("HP:0030650", "Focal"); private static final OntologyClass MULTIFOCAL = OntologyClassBuilder.ontologyClass("HP:0030651", "Multifocal"); - private static final OntologyClass JOINT_FLEXOR_SURFACE_LOCALIZATION = OntologyClassBuilder.ontologyClass("HP:0032540", "Jointflexorsurfacelocalization"); + private static final OntologyClass JOINT_FLEXOR_SURFACE_LOCALIZATION = OntologyClassBuilder.ontologyClass("HP:0032540", "Joint flexor surface localization"); public static OntologyClass predominantSmallJointLocalization() { return PREDOMINANT_SMALL_JOINT_LOCALIZATION; } diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/TreatmentTermination.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/TreatmentTermination.java new file mode 100644 index 00000000..ff792267 --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/TreatmentTermination.java @@ -0,0 +1,20 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! +package org.phenopackets.phenopackettools.builder.constants; + +import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; +import org.phenopackets.schema.v2.core.OntologyClass; + +public class TreatmentTermination { + + private static final OntologyClass TREATMENT_COMPLETED_AS_PRESCRIBED = OntologyClassBuilder.ontologyClass("NCIT:C105740", "Treatment Completed as Prescribed"); + private static final OntologyClass TREATMENT_TERMINATED_TOXICITY = OntologyClassBuilder.ontologyClass("NCIT:C105741", "Treatment Terminated Due to Toxicity"); + private static final OntologyClass TREATMENT_ON_HOLD = OntologyClassBuilder.ontologyClass("NCIT:C106470", "Treatment on Hold"); + private static final OntologyClass ADVERSE_EVENT = OntologyClassBuilder.ontologyClass("NCIT:C41331", "Adverse Event"); + + + public static OntologyClass treatmentCompletedAsPrescribed() { return TREATMENT_COMPLETED_AS_PRESCRIBED; } + public static OntologyClass treatmentTerminatedDueToToxicity() { return TREATMENT_TERMINATED_TOXICITY; } + public static OntologyClass treatmentOnHold() { return TREATMENT_ON_HOLD; } + public static OntologyClass adverseEvent() { return ADVERSE_EVENT; } + +} diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/TumorProgression.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/TumorProgression.java new file mode 100644 index 00000000..4acdbbcf --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/TumorProgression.java @@ -0,0 +1,18 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! +package org.phenopackets.phenopackettools.builder.constants; + +import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; +import org.phenopackets.schema.v2.core.OntologyClass; + +public class TumorProgression { + + private static final OntologyClass PRIMARY_NEOPLASM = OntologyClassBuilder.ontologyClass("NCIT:C8509", "Primary Neoplasm"); + private static final OntologyClass METASTATIC_NEOPLASM = OntologyClassBuilder.ontologyClass("NCIT:C3261", "Metastatic Neoplasm"); + private static final OntologyClass RECURRENT_NEOPLASM = OntologyClassBuilder.ontologyClass("NCIT:C4798", "Recurrent Neoplasm"); + + + public static OntologyClass primaryNeoplasm() { return PRIMARY_NEOPLASM; } + public static OntologyClass metastaticNeoplasm() { return METASTATIC_NEOPLASM; } + public static OntologyClass recurrentNeoplasm() { return RECURRENT_NEOPLASM; } + +} diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Unit.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Unit.java index 387276a5..45515c9c 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Unit.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Unit.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; @@ -9,7 +10,7 @@ public class Unit { private static final OntologyClass DIOPTER = OntologyClassBuilder.ontologyClass("UCUM:[diop]", "diopter"); private static final OntologyClass GRAM = OntologyClassBuilder.ontologyClass("UCUM:g", "gram"); private static final OntologyClass GRAM_PER_KG = OntologyClassBuilder.ontologyClass("UCUM:g/kg", "gram per kilogram"); - private static final OntologyClass KILIGRAM = OntologyClassBuilder.ontologyClass("UCUM:kg", "kiligram"); + private static final OntologyClass KILOGRAM = OntologyClassBuilder.ontologyClass("UCUM:kg", "kilogram"); private static final OntologyClass LITER = OntologyClassBuilder.ontologyClass("UCUM:L", "liter"); private static final OntologyClass METER = OntologyClassBuilder.ontologyClass("UCUM:m", "meter"); private static final OntologyClass MICROGRAM = OntologyClassBuilder.ontologyClass("UCUM:ug", "microgram"); @@ -35,7 +36,7 @@ public class Unit { public static OntologyClass diopter() { return DIOPTER; } public static OntologyClass gram() { return GRAM; } public static OntologyClass gramPerKilogram() { return GRAM_PER_KG; } - public static OntologyClass kilogram() { return KILIGRAM; } + public static OntologyClass kilogram() { return KILOGRAM; } public static OntologyClass liter() { return LITER; } public static OntologyClass meter() { return METER; } public static OntologyClass microgram() { return MICROGRAM; } diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/package-info.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/package-info.java new file mode 100644 index 00000000..62cc2bf2 --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/package-info.java @@ -0,0 +1,5 @@ +/** + * The {@code org.phenopackets.phenopackettools.builder.constants} provides pre-defined constants from + * the recommended ontologies. + */ +package org.phenopackets.phenopackettools.builder.constants; \ No newline at end of file diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/exceptions/PhenotoolsRuntimeException.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/exceptions/PhenotoolsRuntimeException.java deleted file mode 100644 index 7735686d..00000000 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/exceptions/PhenotoolsRuntimeException.java +++ /dev/null @@ -1,6 +0,0 @@ -package org.phenopackets.phenopackettools.builder.exceptions; - -public class PhenotoolsRuntimeException extends RuntimeException { - public PhenotoolsRuntimeException() { super();} - public PhenotoolsRuntimeException(String m) { super(m);} -} diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/package-info.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/package-info.java new file mode 100644 index 00000000..42933cb3 --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/package-info.java @@ -0,0 +1,4 @@ +/** + * A package with builders for top-level elements of Phenopacket Schema. + */ +package org.phenopackets.phenopackettools.builder; \ No newline at end of file diff --git a/phenopacket-tools-builder/src/test/java/org/phenopackets/phenopackettools/builder/builders/OntologyClassBuilderTest.java b/phenopacket-tools-builder/src/test/java/org/phenopackets/phenopackettools/builder/builders/OntologyClassBuilderTest.java new file mode 100644 index 00000000..657e6dc5 --- /dev/null +++ b/phenopacket-tools-builder/src/test/java/org/phenopackets/phenopackettools/builder/builders/OntologyClassBuilderTest.java @@ -0,0 +1,22 @@ +package org.phenopackets.phenopackettools.builder.builders; + +import org.junit.jupiter.api.Test; +import org.phenopackets.schema.v2.core.Disease; +import org.phenopackets.schema.v2.core.OntologyClass; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder.ontologyClass; + +public class OntologyClassBuilderTest { + + @Test + public void testBuilder() { + OntologyClass longPR1 = OntologyClass.newBuilder() + .setId("HP:0012248") + .setLabel("Prolonged PR interval") + .build(); + OntologyClass longPR2 = ontologyClass("HP:0012248", "Prolonged PR interval"); + assertThat(longPR1, equalTo(longPR2)); + } +} diff --git a/phenopacket-tools-builder/src/test/java/org/phenopackets/phenopackettools/builder/builders/TimeElementsTest.java b/phenopacket-tools-builder/src/test/java/org/phenopackets/phenopackettools/builder/builders/TimeElementsTest.java index f16bb59b..c91c4071 100644 --- a/phenopacket-tools-builder/src/test/java/org/phenopackets/phenopackettools/builder/builders/TimeElementsTest.java +++ b/phenopacket-tools-builder/src/test/java/org/phenopackets/phenopackettools/builder/builders/TimeElementsTest.java @@ -3,7 +3,7 @@ import com.google.protobuf.Timestamp; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import org.phenopackets.phenopackettools.builder.exceptions.PhenotoolsRuntimeException; +import org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException; import org.phenopackets.schema.v2.core.OntologyClass; import org.phenopackets.schema.v2.core.TimeElement; @@ -36,7 +36,7 @@ public void testValidIso8601Age() { public void testInvalidIso8601Age() { // B instead of Y -- invalid, should throw exception String iso8601 = "P31B3M2D"; - Assertions.assertThrows(PhenotoolsRuntimeException.class, () -> { + Assertions.assertThrows(PhenopacketToolsRuntimeException.class, () -> { TimeElement age = TimeElements.age(iso8601); }); } @@ -80,6 +80,7 @@ public void testTimestamp() { assertTrue(time.hasTimestamp()); assertEquals(timestamp, time.getTimestamp()); } + @Test public void testTimeInterval() { String time1 = "2020-03-17T00:00:00Z"; diff --git a/phenopacket-tools-cli/pom.xml b/phenopacket-tools-cli/pom.xml index 1ac56d86..0622ae02 100644 --- a/phenopacket-tools-cli/pom.xml +++ b/phenopacket-tools-cli/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.6 + 0.4.7 phenopacket-tools-cli @@ -31,6 +31,11 @@ phenopacket-tools-validator-jsonschema ${project.parent.version} + + org.phenopackets.phenopackettools + phenopacket-tools-io + ${project.parent.version} + info.picocli picocli @@ -39,18 +44,6 @@ ch.qos.logback logback-classic - - com.fasterxml.jackson.core - jackson-databind - - - com.fasterxml.jackson.dataformat - jackson-dataformat-yaml - - - com.google.protobuf - protobuf-java-util - org.monarchinitiative.phenol phenol-core @@ -59,25 +52,53 @@ org.monarchinitiative.phenol phenol-io + + + org.yaml + snakeyaml + org.apache.commons commons-csv - - - - src/main/resources - true - - - - - org.springframework.boot - spring-boot-maven-plugin - - - + + + release + + + + src/main/resources + true + + + + + org.springframework.boot + spring-boot-maven-plugin + + + org.apache.maven.plugins + maven-assembly-plugin + + + src/assemble/distribution.xml + + + + + make-assembly + package + + single + + + + + + + + diff --git a/phenopacket-tools-cli/src/assemble/distribution.xml b/phenopacket-tools-cli/src/assemble/distribution.xml new file mode 100644 index 00000000..54f95946 --- /dev/null +++ b/phenopacket-tools-cli/src/assemble/distribution.xml @@ -0,0 +1,37 @@ + + distribution + + zip + + + + + ${project.parent.basedir} + ./ + true + + README.md + LICENSE + CHANGELOG.rst + + + + + ${project.build.directory} + ./ + + phenopacket-tools-cli-${version}.jar + + + + + ${project.basedir}/src/examples + ./examples + + **/** + + + + \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/convert/Schreckenbach-2014-TPM3-II.2.json b/phenopacket-tools-cli/src/examples/convert/Schreckenbach-2014-TPM3-II.2.json new file mode 100644 index 00000000..96a13fbe --- /dev/null +++ b/phenopacket-tools-cli/src/examples/convert/Schreckenbach-2014-TPM3-II.2.json @@ -0,0 +1,396 @@ +{ + "id": "PMID:24239060-Schreckenbach-2014-TPM3-II.2", + "subject": { + "id": "II.2", + "ageAtCollection": { + "age": "P45Y" + }, + "sex": "FEMALE", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "Homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0002527", + "label": "Falls" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0001260", + "label": "Dysarthria" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0003391", + "label": "Gowers sign" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0009046", + "label": "Difficulty running" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0000347", + "label": "Micrognathia" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0003691", + "label": "Scapular winging" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0002913", + "label": "Myoglobinuria" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0001265", + "label": "Hyporeflexia" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0000275", + "label": "Narrow face" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0002650", + "label": "Scoliosis" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0000651", + "label": "Diplopia" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0002515", + "label": "Waddling gait" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0002495", + "label": "Impaired vibratory sensation" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0010830", + "label": "Impaired tactile sensation" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0003202", + "label": "Skeletal muscle atrophy" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0002705", + "label": "High, narrow palate" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0003326", + "label": "Myalgia" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0040129", + "label": "Abnormal nerve conduction velocity" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0003701", + "label": "Proximal muscle weakness" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0000508", + "label": "Ptosis" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }], + "genes": [{ + "id": "NCBIGene:7170", + "symbol": "TPM3" + }], + "variants": [{ + "vcfAllele": { + "genomeAssembly": "GRCh37", + "chr": "1", + "pos": 154145610, + "ref": "G", + "alt": "T" + }, + "zygosity": { + "id": "GENO:0000135", + "label": "heterozygous" + } + }], + "diseases": [{ + "term": { + "id": "OMIM:609284", + "label": "NEMALINE MYOPATHY 1; NEM1CAP MYOPATHY 1, INCLUDED; CAPM1, INCLUDED" + } + }], + "metaData": { + "createdBy": "Hpo Case Annotator : 1.0.13-SNAPSHOT", + "submittedBy": "HPO:probinson", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "pato", + "name": "Phenotype And Trait Ontology", + "url": "http://purl.obolibrary.org/obo/pato.owl", + "version": "2018-03-28", + "namespacePrefix": "PATO", + "iriPrefix": "http://purl.obolibrary.org/obo/PATO_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "ncbitaxon", + "name": "NCBI organismal classification", + "url": "http://purl.obolibrary.org/obo/ncbitaxon.owl", + "version": "2018-03-02", + "namespacePrefix": "NCBITaxon" + }, { + "id": "eco", + "name": "Evidence and Conclusion Ontology", + "url": "http://purl.obolibrary.org/obo/eco.owl", + "version": "2018-11-10", + "namespacePrefix": "ECO", + "iriPrefix": "http://purl.obolibrary.org/obo/ECO_" + }, { + "id": "omim", + "name": "Online Mendelian Inheritance in Man", + "url": "https://www.omim.org", + "namespacePrefix": "OMIM" + }], + "phenopacketSchemaVersion": "1.0.0-RC3", + "externalReferences": [{ + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + }] + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/phenopackets/retinoblastoma.json b/phenopacket-tools-cli/src/examples/phenopackets/retinoblastoma.json new file mode 100644 index 00000000..966d7b64 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/phenopackets/retinoblastoma.json @@ -0,0 +1,498 @@ +{ + "id": "arbitrary.id", + "subject": { + "id": "proband A", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P6M" + } + }, + "sex": "FEMALE", + "karyotypicSex": "XX" + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0030084", + "label": "Clinodactyly" + }, + "modifiers": [{ + "id": "HP:0012834", + "label": "Right" + }], + "onset": { + "age": { + "iso8601duration": "P3M" + } + } + }, { + "type": { + "id": "HP:0000555", + "label": "Leukocoria" + }, + "modifiers": [{ + "id": "HP:0012835", + "label": "Left" + }], + "onset": { + "age": { + "iso8601duration": "P4M" + } + } + }, { + "type": { + "id": "HP:0000486", + "label": "Strabismus" + }, + "modifiers": [{ + "id": "HP:0012835", + "label": "Left" + }], + "onset": { + "age": { + "iso8601duration": "P5M15D" + } + } + }, { + "type": { + "id": "HP:0000541", + "label": "Retinal detachment" + }, + "modifiers": [{ + "id": "HP:0012835", + "label": "Left" + }], + "onset": { + "age": { + "iso8601duration": "P6M" + } + } + }], + "measurements": [{ + "assay": { + "id": "LOINC:79893-4", + "label": "Left eye Intraocular pressure" + }, + "value": { + "quantity": { + "unit": { + "id": "UCUM:mm[Hg]", + "label": "millimetres of mercury" + }, + "value": 25.0, + "referenceRange": { + "unit": { + "id": "LOINC:56844-4", + "label": "Intraocular pressure of Eye" + }, + "low": 10.0, + "high": 21.0 + } + } + }, + "timeObserved": { + "age": { + "iso8601duration": "P6M" + } + } + }, { + "assay": { + "id": "LOINC:79892-6", + "label": "Right eye Intraocular pressure" + }, + "value": { + "quantity": { + "unit": { + "id": "UCUM:mm[Hg]", + "label": "millimetres of mercury" + }, + "value": 15.0, + "referenceRange": { + "unit": { + "id": "LOINC:56844-4", + "label": "Intraocular pressure of Eye" + }, + "low": 10.0, + "high": 21.0 + } + } + }, + "timeObserved": { + "age": { + "iso8601duration": "P6M" + } + } + }], + "biosamples": [{ + "id": "biosample.1", + "sampledTissue": { + "id": "UBERON:0000970", + "label": "eye" + }, + "phenotypicFeatures": [{ + "type": { + "id": "NCIT:C35941", + "label": "Flexner-Wintersteiner Rosette Formation" + } + }, { + "type": { + "id": "NCIT:C132485", + "label": "Apoptosis and Necrosis" + } + }], + "measurements": [{ + "assay": { + "id": "LOINC:33728-7", + "label": "Size.maximum dimension in Tumor" + }, + "value": { + "quantity": { + "unit": { + "id": "UCUM:mm", + "label": "millimeter" + }, + "value": 15.0 + } + }, + "timeObserved": { + "age": { + "iso8601duration": "P8M2W" + } + } + }], + "tumorProgression": { + "id": "NCIT:C8509", + "label": "Primary Neoplasm" + }, + "pathologicalTnmFinding": [{ + "id": "NCIT:C140720", + "label": "Retinoblastoma pT3 TNM Finding v8" + }, { + "id": "NCIT:C140711", + "label": "Retinoblastoma pN0 TNM Finding v8" + }], + "procedure": { + "code": { + "id": "NCIT:C48601", + "label": "Enucleation" + }, + "bodySite": { + "id": "UBERON:0004548", + "label": "left eye" + }, + "performed": { + "age": { + "iso8601duration": "P8M2W" + } + } + }, + "files": [{ + "uri": "file://data/fileSomaticWgs.vcf.gz", + "individualToFileIdentifiers": { + "biosample.1": "specimen.1" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38", + "fileFormat": "VCF" + } + }] + }], + "interpretations": [{ + "id": "interpretation.id", + "progressStatus": "SOLVED", + "diagnosis": { + "disease": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "genomicInterpretations": [{ + "subjectOrBiosampleId": "proband A", + "interpretationStatus": "CAUSATIVE", + "variantInterpretation": { + "acmgPathogenicityClassification": "PATHOGENIC", + "therapeuticActionability": "ACTIONABLE", + "variationDescriptor": { + "id": "cnv-1", + "moleculeContext": "genomic", + "variation": { + "copyNumber": { + "derivedSequenceExpression": { + "location": { + "sequenceId": "refseq:NC_000013.14", + "sequenceInterval": { + "startNumber": { + "value": "25981249" + }, + "endNumber": { + "value": "61706822" + } + } + } + }, + "number": { + "value": "1" + } + } + }, + "extensions": [{ + "name": "mosaicism", + "value": "40.0%" + }] + } + } + }, { + "subjectOrBiosampleId": "biosample.1", + "interpretationStatus": "CAUSATIVE", + "variantInterpretation": { + "acmgPathogenicityClassification": "PATHOGENIC", + "therapeuticActionability": "ACTIONABLE", + "variationDescriptor": { + "id": "rs121913300", + "variation": { + "allele": { + "sequenceLocation": { + "sequenceId": "refseq:NC_000013.11", + "sequenceInterval": { + "startNumber": { + "value": "48367511" + }, + "endNumber": { + "value": "48367512" + } + } + }, + "literalSequenceExpression": { + "sequence": "T" + } + } + }, + "label": "RB1 c.958C\u003eT (p.Arg320Ter)", + "geneContext": { + "valueId": "HGNC:9884", + "symbol": "RB1" + }, + "expressions": [{ + "syntax": "hgvs.c", + "value": "NM_000321.2:c.958C\u003eT" + }, { + "syntax": "transcript_reference", + "value": "NM_000321.2" + }], + "vcfRecord": { + "genomeAssembly": "GRCh38", + "chrom": "NC_000013.11", + "pos": "48367512", + "ref": "C", + "alt": "T" + }, + "extensions": [{ + "name": "allele-frequency", + "value": "25.0%" + }], + "moleculeContext": "genomic", + "allelicState": { + "id": "GENO:0000135", + "label": "heterozygous" + } + } + } + }] + } + }], + "diseases": [{ + "term": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "onset": { + "age": { + "iso8601duration": "P4M" + } + }, + "diseaseStage": [{ + "id": "LOINC:LA24739-7", + "label": "Group E" + }], + "clinicalTnmFinding": [{ + "id": "NCIT:C140678", + "label": "Retinoblastoma cM0 TNM Finding v8" + }], + "primarySite": { + "id": "UBERON:0004548", + "label": "left eye" + } + }], + "medicalActions": [{ + "treatment": { + "agent": { + "id": "DrugCentral:1678", + "label": "melphalan" + }, + "routeOfAdministration": { + "id": "NCIT:C38222", + "label": "Intraarterial Route of Administration" + }, + "doseIntervals": [{ + "quantity": { + "unit": { + "id": "UCUM:mg.kg-1", + "label": "milligram per kilogram" + }, + "value": 0.4 + }, + "scheduleFrequency": { + "id": "NCIT:C64576", + "label": "Once" + }, + "interval": { + "start": "2020-09-02T00:00:00Z", + "end": "2020-09-02T00:00:00Z" + } + }] + }, + "treatmentTarget": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "treatmentIntent": { + "id": "NCIT:C62220", + "label": "Cure" + }, + "adverseEvents": [{ + "id": "HP:0025637", + "label": "Vasospasm" + }], + "treatmentTerminationReason": { + "id": "NCIT:C41331", + "label": "Adverse Event" + } + }, { + "therapeuticRegimen": { + "ontologyClass": { + "id": "NCIT:C10894", + "label": "Carboplatin/Etoposide/Vincristine" + }, + "startTime": { + "age": { + "iso8601duration": "P7M" + } + }, + "endTime": { + "age": { + "iso8601duration": "P8M" + } + }, + "regimenStatus": "COMPLETED" + }, + "treatmentTarget": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "treatmentIntent": { + "id": "NCIT:C62220", + "label": "Cure" + } + }, { + "procedure": { + "code": { + "id": "NCIT:C48601", + "label": "Enucleation" + }, + "bodySite": { + "id": "UBERON:0004548", + "label": "left eye" + }, + "performed": { + "age": { + "iso8601duration": "P8M2W" + } + } + }, + "treatmentTarget": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "treatmentIntent": { + "id": "NCIT:C62220", + "label": "Cure" + } + }], + "files": [{ + "uri": "file://data/germlineWgs.vcf.gz", + "individualToFileIdentifiers": { + "proband A": "sample1" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38", + "fileFormat": "VCF" + } + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "21.05d", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }, { + "id": "efo", + "name": "Experimental Factor Ontology", + "url": "http://www.ebi.ac.uk/efo/efo.owl", + "version": "3.34.0", + "namespacePrefix": "EFO", + "iriPrefix": "http://purl.obolibrary.org/obo/EFO_" + }, { + "id": "uberon", + "name": "Uber-anatomy ontology", + "url": "http://purl.obolibrary.org/obo/uberon.owl", + "version": "2021-07-27", + "namespacePrefix": "UBERON", + "iriPrefix": "http://purl.obolibrary.org/obo/UBERON_" + }, { + "id": "ncbitaxon", + "name": "NCBI organismal classification", + "url": "http://purl.obolibrary.org/obo/ncbitaxon.owl", + "version": "2021-06-10", + "namespacePrefix": "NCBITaxon", + "iriPrefix": "http://purl.obolibrary.org/obo/NCBITaxon_" + }, { + "id": "loinc", + "name": "Logical Observation Identifiers Names and Codes", + "url": "https://loinc.org", + "version": "2.7.3", + "namespacePrefix": "LOINC", + "iriPrefix": "https://loinc.org/" + }, { + "id": "ucum", + "name": "Unified Code for Units of Measure", + "url": "https://ucum.org", + "version": "2.1", + "namespacePrefix": "UCUM", + "iriPrefix": "https://ucum.org/" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "2022-03-05", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "drugcentral", + "name": "Drug Central", + "url": "https://drugcentral.org/", + "version": "08/22/2022", + "namespacePrefix": "DrugCentral", + "iriPrefix": "https://drugcentral.org/drugcard/" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/validate/base/README.md b/phenopacket-tools-cli/src/examples/validate/base/README.md new file mode 100644 index 00000000..a6928e46 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/validate/base/README.md @@ -0,0 +1,42 @@ +# README + +The folder contains a few phenopackets for demonstrating the base validation functionality of *phenopacket-tools*; +the validation that any phenopacket must pass. + +The validator will report the validation issues, one issue per line. The next sections show different types +of validation errors that can be found using *phenopacket-tools*. + +## `missing-fields.json` + +The `missing-fields.json` is a phenopacket where several required attributes are missing. Presence of all required +attributes is checked at the beginning of the validation, before any other checks. The *phenopacket-tools* validator +will point out the following issues: + +| Message | Solution | +|:------------------------------------------------------------------|:---------------------------------------------------------------------| +| `id` is missing but it is required. | Add phenopacket ID. | +| `subject.id` is missing but it is required. | Add subject ID. | +| `phenotypicFeatures[0].type.label` is missing but it is required. | Add the `label` attribute into `phenotypicFeatures[0].type.label`. | + +See `missing-fields-valid.json` for a valid version of the phenopacket: + +```shell +# Use UNIX diff to highlight differences between two files +diff missing-fields.json missing-fields-valid.json +``` + +## `missing-resources.json` + +The `missing-resources.json` is a phenopacket with no missing fields, so it passes the syntax validation. +However, the phenopacket is invalid because it uses ontologies that are not defined in the `metaData.resource` section. +The validator will point out the following issues: + +| Message | Solution | +|:--------------------------------------------------|:-----------------------------------------------------------| +| No ontology corresponding to ID 'NCBITaxon:9606' | Add a `Resource` for `NCBITaxon` into `metadata.resources` | + +See `missing-resources-valid.json` for a valid version of the phenopacket: + +```shell +diff missing-resources.json missing-resources-valid.json +``` diff --git a/phenopacket-tools-cli/src/examples/validate/base/missing-fields-valid.json b/phenopacket-tools-cli/src/examples/validate/base/missing-fields-valid.json new file mode 100644 index 00000000..5445fede --- /dev/null +++ b/phenopacket-tools-cli/src/examples/validate/base/missing-fields-valid.json @@ -0,0 +1,108 @@ +{ + "id": "missing-fields-valid-phenopacket-id", + "subject": { + "id": "example-subject-id", + "dateOfBirth": "1998-01-01T00:00:00Z", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P3Y" + } + }, + "sex": "MALE", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "Homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001159", + "label": "Syndactyly" + }, + "onset": { + "ontologyClass": { + "id": "HP:0003577", + "label": "Congenital onset" + } + } + }, { + "type": { + "id": "HP:0002090", + "label": "Pneumonia" + }, + "onset": { + "ontologyClass": { + "id": "HP:0011463", + "label": "Childhood onset" + } + } + }, { + "type": { + "id": "HP:0000028", + "label": "Cryptorchidism" + }, + "onset": { + "ontologyClass": { + "id": "HP:0003577", + "label": "Congenital onset" + } + } + }, { + "type": { + "id": "HP:0011109", + "label": "Chronic sinusitis" + }, + "severity": { + "id": "HP:0012828", + "label": "Severe" + }, + "onset": { + "ontologyClass": { + "id": "HP:0003581", + "label": "Adult onset" + } + } + }], + "files": [{ + "uri": "file://data/file.vcf.gz", + "individualToFileIdentifiers": { + "kindred 1A": "SAME000234" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38", + "fileFormat": "vcf" + } + }], + "metaData": { + "created": "2021-07-01T19:32:35Z", + "createdBy": "HPO:probinson", + "submittedBy": "HPO:probinson", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "ncbitaxon", + "name": "NCBI organismal classification", + "url": "http://purl.obolibrary.org/obo/ncbitaxon.owl", + "version": "2020-07-13", + "namespacePrefix": "NCBITaxon", + "iriPrefix": "http://purl.obolibrary.org/obo/NCBITaxon_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + } ], + "phenopacketSchemaVersion": "2.0", + "externalReferences": [{ + "id": "PMID:20842687", + "description": "Severe dystonic encephalopathy without hyperphenylalaninemia associated with an 18-bp deletion within the proximal GCH1 promoter." + }] + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/validate/base/missing-fields.json b/phenopacket-tools-cli/src/examples/validate/base/missing-fields.json new file mode 100644 index 00000000..7d474f13 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/validate/base/missing-fields.json @@ -0,0 +1,105 @@ +{ + "subject": { + "dateOfBirth": "1998-01-01T00:00:00Z", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P3Y" + } + }, + "sex": "MALE", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "Homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001159" + }, + "onset": { + "ontologyClass": { + "id": "HP:0003577", + "label": "Congenital onset" + } + } + }, { + "type": { + "id": "HP:0002090", + "label": "Pneumonia" + }, + "onset": { + "ontologyClass": { + "id": "HP:0011463", + "label": "Childhood onset" + } + } + }, { + "type": { + "id": "HP:0000028", + "label": "Cryptorchidism" + }, + "onset": { + "ontologyClass": { + "id": "HP:0003577", + "label": "Congenital onset" + } + } + }, { + "type": { + "id": "HP:0011109", + "label": "Chronic sinusitis" + }, + "severity": { + "id": "HP:0012828", + "label": "Severe" + }, + "onset": { + "ontologyClass": { + "id": "HP:0003581", + "label": "Adult onset" + } + } + }], + "files": [{ + "uri": "file://data/file.vcf.gz", + "individualToFileIdentifiers": { + "kindred 1A": "SAME000234" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38", + "fileFormat": "vcf" + } + }], + "metaData": { + "created": "2021-07-01T19:32:35Z", + "createdBy": "HPO:probinson", + "submittedBy": "HPO:probinson", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "ncbitaxon", + "name": "NCBI organismal classification", + "url": "http://purl.obolibrary.org/obo/ncbitaxon.owl", + "version": "2020-07-13", + "namespacePrefix": "NCBITaxon", + "iriPrefix": "http://purl.obolibrary.org/obo/NCBITaxon_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + } ], + "phenopacketSchemaVersion": "2.0", + "externalReferences": [{ + "id": "PMID:20842687", + "description": "Severe dystonic encephalopathy without hyperphenylalaninemia associated with an 18-bp deletion within the proximal GCH1 promoter." + }] + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/validate/base/missing-resources-valid.json b/phenopacket-tools-cli/src/examples/validate/base/missing-resources-valid.json new file mode 100644 index 00000000..61e09904 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/validate/base/missing-resources-valid.json @@ -0,0 +1,43 @@ +{ + "id": "missing-resources-example", + "subject": { + "id": "subject-id", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "Homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001250", + "label": "Seizure" + } + }], + "metaData": { + "created": "2021-07-01T19:32:35Z", + "createdBy": "HPO:probinson", + "submittedBy": "HPO:probinson", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "ncbitaxon", + "name": "NCBI organismal classification", + "url": "http://purl.obolibrary.org/obo/ncbitaxon.owl", + "version": "2020-07-13", + "namespacePrefix": "NCBITaxon", + "iriPrefix": "http://purl.obolibrary.org/obo/NCBITaxon_" + }], + "phenopacketSchemaVersion": "2.0", + "externalReferences": [ + { + "id": "PMID:20842687", + "description": "Severe dystonic encephalopathy without hyperphenylalaninemia associated with an 18-bp deletion within the proximal GCH1 promoter." + } + ] + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/validate/base/missing-resources.json b/phenopacket-tools-cli/src/examples/validate/base/missing-resources.json new file mode 100644 index 00000000..37851952 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/validate/base/missing-resources.json @@ -0,0 +1,36 @@ +{ + "id": "missing-resources-example", + "subject": { + "id": "subject-id", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "Homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001250", + "label": "Seizure" + } + }], + "metaData": { + "created": "2021-07-01T19:32:35Z", + "createdBy": "HPO:probinson", + "submittedBy": "HPO:probinson", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0", + "externalReferences": [ + { + "id": "PMID:20842687", + "description": "Severe dystonic encephalopathy without hyperphenylalaninemia associated with an 18-bp deletion within the proximal GCH1 promoter." + } + ] + } +} \ No newline at end of file diff --git a/supplementary/hpo-rare-disease-schema.json b/phenopacket-tools-cli/src/examples/validate/custom-json-schema/hpo-rare-disease-schema.json similarity index 51% rename from supplementary/hpo-rare-disease-schema.json rename to phenopacket-tools-cli/src/examples/validate/custom-json-schema/hpo-rare-disease-schema.json index d56a28fd..7ba15846 100644 --- a/supplementary/hpo-rare-disease-schema.json +++ b/phenopacket-tools-cli/src/examples/validate/custom-json-schema/hpo-rare-disease-schema.json @@ -1,9 +1,8 @@ { - "$schema": "https://json-schema.org/draft/2019-09/schema#", - "$id": "example.hpo.jsonschema.validator", + "$schema": "https://json-schema.org/draft/2019-09/schema", + "$id": "https://example.com/hpo-rare-disease-validator", "title": "HPO Rare Disease Phenopacket Schema", - "description": "HPO Rare Disease Schema for GA4GH Phenopacket", - "_comment": "Here we require the phenopacket to have the following elements that are not required by the default schema 1. subject (proband being investigated) 2. at least one phenotypicFeature element 3. time_at_last encounter (subelement of subject), representing the age of the proband. In addition, we require that Human Phenotype Ontology (HPO) terms are used to represent phenotypicFeature", + "description": "An example JSON schema for validating a phenopacket in context of the rare-disease research", "type": "object", "properties": { "subject": { @@ -25,13 +24,19 @@ { "type": "object", "properties": { - "id": { - "type": "string", - "pattern": "^HP:\\([0-9]{7}$" + "type": { + "type": "object", + "properties": { + "id": { + "type": "string", + "pattern": "^HP:\\d{7}$" + } + } } } } - ] + ], + "minItems": 1 } }, "required": [ diff --git a/phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.no-phenotype.invalid.json b/phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.no-phenotype.invalid.json new file mode 100644 index 00000000..095b1dc8 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.no-phenotype.invalid.json @@ -0,0 +1,24 @@ +{ + "id": "id-C", + "subject": { + "id": "proband C", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P27Y" + } + } + }, + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2021-08-02", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.no-subject.invalid.json b/phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.no-subject.invalid.json new file mode 100644 index 00000000..56ef72b1 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.no-subject.invalid.json @@ -0,0 +1,22 @@ +{ + "id": "id-C", + "phenotypicFeatures": [{ + "type": { + "id": "HP:0002616", + "label": "Aortic root aneurysm" + } + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2021-08-02", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.no-time-at-last-encounter.invalid.json b/phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.no-time-at-last-encounter.invalid.json new file mode 100644 index 00000000..65b335d8 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.no-time-at-last-encounter.invalid.json @@ -0,0 +1,25 @@ +{ + "id": "id-C", + "subject": { + "id": "proband C" + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0002616", + "label": "Aortic root aneurysm" + } + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2021-08-02", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.not-hpo.invalid.json b/phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.not-hpo.invalid.json new file mode 100644 index 00000000..3281c3f4 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.not-hpo.invalid.json @@ -0,0 +1,30 @@ +{ + "id": "id-C", + "subject": { + "id": "proband C", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P27Y" + } + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "NCIT:C26697", + "label": "Aortic Aneurysm" + } + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "21.05d", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.valid.json b/phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.valid.json new file mode 100644 index 00000000..f918d3bf --- /dev/null +++ b/phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.valid.json @@ -0,0 +1,30 @@ +{ + "id": "id-C", + "subject": { + "id": "proband C", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P27Y" + } + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0002616", + "label": "Aortic root aneurysm" + } + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2021-08-02", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/validate/organ-systems/marfan.all-organ-system-annotated.valid.json b/phenopacket-tools-cli/src/examples/validate/organ-systems/marfan.all-organ-system-annotated.valid.json new file mode 100644 index 00000000..c71e4e88 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/validate/organ-systems/marfan.all-organ-system-annotated.valid.json @@ -0,0 +1,38 @@ +{ + "id": "id-C", + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001083", + "label": "Ectopia lentis" + } + }, { + "type": { + "id": "HP:0001653", + "label": "Mitral regurgitation" + } + }, { + "type": { + "id": "HP:0002086", + "label": "Abnormality of the respiratory system" + }, + "excluded": true + }, { + "type": { + "id": "HP:0001166", + "label": "Arachnodactyly" + } + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2021-08-02", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/validate/organ-systems/marfan.missing-eye-annotation.invalid.json b/phenopacket-tools-cli/src/examples/validate/organ-systems/marfan.missing-eye-annotation.invalid.json new file mode 100644 index 00000000..35687e3c --- /dev/null +++ b/phenopacket-tools-cli/src/examples/validate/organ-systems/marfan.missing-eye-annotation.invalid.json @@ -0,0 +1,32 @@ +{ + "id": "id-C", + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001653", + "label": "Mitral regurgitation" + } + }, { + "type": { + "id": "HP:0002107", + "label": "Pneumothorax" + } + }, { + "type": { + "id": "HP:0001166", + "label": "Arachnodactyly" + } + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2021-08-02", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/validate/organ-systems/marfan.no-abnormalities.valid.json b/phenopacket-tools-cli/src/examples/validate/organ-systems/marfan.no-abnormalities.valid.json new file mode 100644 index 00000000..a6280f56 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/validate/organ-systems/marfan.no-abnormalities.valid.json @@ -0,0 +1,40 @@ +{ + "id": "id-C", + "phenotypicFeatures": [{ + "type": { + "id": "HP:0000478", + "label": "Abnormality of the eye" + }, + "excluded": true + }, { + "type": { + "id": "HP:0001626", + "label": "Abnormality of the cardiovascular system" + }, + "excluded": true + }, { + "type": { + "id": "HP:0002086", + "label": "Abnormality of the respiratory system" + }, + "excluded": true + }, { + "type": { + "id": "HP:0001166", + "label": "Arachnodactyly" + } + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2021-08-02", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/validate/phenotype-validation/marfan.annotation-propagation-rule.invalid.json b/phenopacket-tools-cli/src/examples/validate/phenotype-validation/marfan.annotation-propagation-rule.invalid.json new file mode 100644 index 00000000..3d3004a3 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/validate/phenotype-validation/marfan.annotation-propagation-rule.invalid.json @@ -0,0 +1,27 @@ +{ + "id": "id-C", + "phenotypicFeatures": [{ + "type": { + "id": "HP:0004942", + "label": "Aortic aneurysm" + } + }, { + "type": { + "id": "HP:0002616", + "label": "Aortic root aneurysm" + } + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2021-08-02", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/validate/phenotype-validation/marfan.obsolete-term.invalid.json b/phenopacket-tools-cli/src/examples/validate/phenotype-validation/marfan.obsolete-term.invalid.json new file mode 100644 index 00000000..000231e6 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/validate/phenotype-validation/marfan.obsolete-term.invalid.json @@ -0,0 +1,22 @@ +{ + "id": "id-C", + "phenotypicFeatures": [{ + "type": { + "id": "HP:0002631", + "label": "Aortic root aneurysm" + } + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2021-08-02", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/validate/phenotype-validation/marfan.valid.json b/phenopacket-tools-cli/src/examples/validate/phenotype-validation/marfan.valid.json new file mode 100644 index 00000000..fc712eae --- /dev/null +++ b/phenopacket-tools-cli/src/examples/validate/phenotype-validation/marfan.valid.json @@ -0,0 +1,28 @@ +{ + "id": "id-C", + "phenotypicFeatures": [{ + "type": { + "id": "HP:0004942", + "label": "Aortic aneurysm" + } + }, { + "type": { + "id": "HP:0002616", + "label": "Aortic root aneurysm" + }, + "excluded": true + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2021-08-02", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/main/java/module-info.java b/phenopacket-tools-cli/src/main/java/module-info.java index dbb1b933..77e84ce8 100644 --- a/phenopacket-tools-cli/src/main/java/module-info.java +++ b/phenopacket-tools-cli/src/main/java/module-info.java @@ -1,5 +1,6 @@ module org.phenopackets.phenopackettools.cli { requires org.phenopackets.phenopackettools.util; + requires org.phenopackets.phenopackettools.io; requires org.phenopackets.phenopackettools.converter; requires org.phenopackets.phenopackettools.builder; requires org.phenopackets.phenopackettools.validator.jsonschema; @@ -7,13 +8,11 @@ requires org.monarchinitiative.phenol.core; requires org.monarchinitiative.phenol.io; - requires com.google.protobuf.util; - requires com.fasterxml.jackson.databind; - requires com.fasterxml.jackson.dataformat.yaml; - requires commons.csv; requires info.picocli; + requires commons.csv; requires org.slf4j; + requires logback.classic; - opens org.phenopackets.phenopackettools.command to info.picocli; - opens org.phenopackets.phenopackettools.command.validate to info.picocli; + opens org.phenopackets.phenopackettools.cli.command to info.picocli; + opens org.phenopackets.phenopackettools.cli.command.validate to info.picocli; } \ No newline at end of file diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/Main.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/Main.java similarity index 87% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/Main.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/Main.java index ffb537f2..6041658a 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/Main.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/Main.java @@ -1,8 +1,8 @@ -package org.phenopackets.phenopackettools; +package org.phenopackets.phenopackettools.cli; -import org.phenopackets.phenopackettools.command.ValidateCommand; -import org.phenopackets.phenopackettools.command.ConvertCommand; -import org.phenopackets.phenopackettools.command.ExamplesCommand; +import org.phenopackets.phenopackettools.cli.command.ValidateCommand; +import org.phenopackets.phenopackettools.cli.command.ConvertCommand; +import org.phenopackets.phenopackettools.cli.command.ExamplesCommand; import picocli.AutoComplete; import picocli.CommandLine; @@ -26,7 +26,7 @@ public class Main { public static final String HEADER = "phenopacket-tools\nAn application for creating, converting and validating GA4GH phenopackets.\n"; - public static final String VERSION = "phenopacket-tools v0.4.6"; + public static final String VERSION = "phenopacket-tools v0.4.7"; // Maximum number of characters in line of the usage message. public static final int USAGE_WIDTH = 120; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/BaseCommand.java similarity index 50% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseCommand.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/BaseCommand.java index 929fd50a..75bce2a4 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/BaseCommand.java @@ -1,8 +1,11 @@ -package org.phenopackets.phenopackettools.command; +package org.phenopackets.phenopackettools.cli.command; -import org.phenopackets.phenopackettools.Main; +import ch.qos.logback.classic.Level; +import ch.qos.logback.classic.LoggerContext; +import org.phenopackets.phenopackettools.cli.Main; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import picocli.CommandLine; import java.io.IOException; import java.io.InputStream; @@ -13,19 +16,48 @@ public abstract class BaseCommand implements Callable { private static final Logger LOGGER = LoggerFactory.getLogger(BaseCommand.class); - protected static final String BANNER = readBanner(); - protected static final Properties APPLICATION_PROPERTIES = readApplicationProperties(); protected static final String PHENOPACKET_TOOLS_VERSION = APPLICATION_PROPERTIES.getProperty("phenopacket-tools.version", "UNKNOWN-version"); - private static String readBanner() { - try (InputStream is = Main.class.getResourceAsStream("banner.txt")) { - return is == null ? "" : new String(is.readAllBytes()); - } catch (IOException e) { - LOGGER.error("Unable to read banner. Please report to the developers: {}", e.getMessage(), e); - return ""; + @CommandLine.Option(names = {"-v"}, description = {"Specify multiple -v options to increase verbosity.", + "For example, `-v -v -v` or `-vvv`"}) + public boolean[] verbosity = {}; + + @Override + public Integer call() { + // (0) Setup verbosity and print banner. + setupLoggingAndPrintBanner(); + + // (1) Run the command functionality. + return execute(); + } + + protected abstract Integer execute(); + + private void setupLoggingAndPrintBanner() { + Level level = parseVerbosityLevel(); + + LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory(); + context.getLogger(Logger.ROOT_LOGGER_NAME).setLevel(level); + + if (!(level.equals(Level.WARN) || level.equals(Level.ERROR))) + printBanner(); + } + + private Level parseVerbosityLevel() { + int verbosity = 0; + for (boolean a : this.verbosity) { + if (a) verbosity++; } + + return switch (verbosity) { + case 0 -> Level.WARN; + case 1 -> Level.INFO; + case 2 -> Level.DEBUG; + case 3 -> Level.TRACE; + default -> Level.ALL; + }; } private static Properties readApplicationProperties() { @@ -39,8 +71,17 @@ private static Properties readApplicationProperties() { return properties; } - protected static void printBanner() { - System.err.println(BANNER); + private static void printBanner() { + System.err.println(readBanner()); + } + + private static String readBanner() { + try (InputStream is = Main.class.getResourceAsStream("banner.txt")) { + return is == null ? "" : new String(is.readAllBytes()); + } catch (IOException e) { + LOGGER.error("Unable to read banner. Please report to the developers: {}", e.getMessage(), e); + return ""; + } } } diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/BaseIOCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/BaseIOCommand.java new file mode 100644 index 00000000..49b2c4eb --- /dev/null +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/BaseIOCommand.java @@ -0,0 +1,145 @@ +package org.phenopackets.phenopackettools.cli.command; + +import com.google.protobuf.Message; +import org.phenopackets.phenopackettools.io.PhenopacketParser; +import org.phenopackets.phenopackettools.io.PhenopacketParserFactory; +import org.phenopackets.phenopackettools.core.PhenopacketElement; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; +import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; +import org.phenopackets.phenopackettools.util.format.ElementSniffer; +import org.phenopackets.phenopackettools.util.format.FormatSniffer; +import org.phenopackets.phenopackettools.util.format.SniffException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine; + +import java.io.*; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; + +/** + * A command that provides routines for reading as well as {@link PhenopacketFormat}s and {@link PhenopacketElement}s + * for processing of a single top-level Phenopacket schema element. + */ +public abstract class BaseIOCommand extends BaseCommand { + + private static final Logger LOGGER = LoggerFactory.getLogger(BaseIOCommand.class); + + private final PhenopacketParserFactory parserFactory; + + @CommandLine.ArgGroup(validate = false, heading = "Inputs:%n") + public InputSection inputSection = new InputSection(); + + public static class InputSection { + @CommandLine.Option(names = {"-i", "--input"}, + arity = "0..*", + description = "Input phenopacket(s).%nLeave empty for STDIN") + public List inputs = null; + + // The format will be sniffed if it is not provided. + @CommandLine.Option(names = {"-f", "--format"}, + description = {"Phenopacket format.", + "Choose from: {${COMPLETION-CANDIDATES}}"}) + public PhenopacketFormat format = null; + + @CommandLine.Option(names = {"-e", "--element"}, + description = {"Top-level element.", + "Choose from {${COMPLETION-CANDIDATES}}", + "Default: phenopacket"}) + public PhenopacketElement element = null; + + } + protected BaseIOCommand() { + parserFactory = PhenopacketParserFactory.getInstance(); + } + + /** + * Attempt to read the input in the provided {@code schemaVersion} and exit upon any failure. As a side effect, + * {@link org.phenopackets.phenopackettools.cli.command.BaseIOCommand.InputSection#format} + * and {@link org.phenopackets.phenopackettools.cli.command.BaseIOCommand.InputSection#element} + * fields are set after the function returns. + *

+ * Note that the function does not return if reading fails. + */ + protected List readMessagesOrExit(PhenopacketSchemaVersion schemaVersion) { + PhenopacketParser parser = parserFactory.forFormat(schemaVersion); + if (inputSection.inputs == null) { + // The user did not set `-i | --input` option, assuming a single input is coming from STDIN. + InputStream is = System.in; + try { + setFormatAndElement(is, schemaVersion); + Message message = parser.parse(inputSection.format, inputSection.element, is); + return List.of(new MessageAndPath(message, null)); + } catch (SniffException e) { + System.err.println("Unable to detect input format from STDIN.\nConsider using the `--format` option."); + } catch (IOException e) { + System.err.println("Unable to read STDIN: " + e.getMessage() + "\nPlease check the input format."); + } + System.exit(1); + } else { + // Assuming a one or more input are provided via `-i | --input`. + // + // Picocli should ensure that `input` is never an empty list. `input` is `null` if no `-i` was supplied. + assert !inputSection.inputs.isEmpty(); + + List messages = new ArrayList<>(); + for (Path input : inputSection.inputs) { + try (InputStream is = new BufferedInputStream(Files.newInputStream(input))) { + setFormatAndElement(is, schemaVersion); + Message message = parser.parse(inputSection.format, inputSection.element, is); + messages.add(new MessageAndPath(message, input)); + } catch (SniffException e) { + System.err.printf("Unable to detect input format of %s.\nConsider using the `--format` option.%n", input.toAbsolutePath()); + System.exit(1); + } catch (IOException e) { + System.err.printf("Unable to read input file %s: %s\nPlease check the input format.%n", input.toAbsolutePath(), e.getMessage()); + System.exit(1); + } + } + return messages; + } + return null; // Cannot happen since System.exit() never returns, but to make the compiler happy... + } + + /** + * Peek into the provided {@link InputStream} {@code is} to set {@link InputSection#format} + * and {@link InputSection#element} items + * + * @throws IOException if I/O error happens + * @throws SniffException if we cannot sniff the format + */ + private void setFormatAndElement(InputStream is, PhenopacketSchemaVersion schemaVersion) throws IOException, SniffException { + // Set format. + PhenopacketFormat fmt = FormatSniffer.sniff(is); + if (inputSection.format == null) { + LOGGER.info("Input format was not provided, making an educated guess.."); + LOGGER.info("The input looks like a {} file", fmt); + inputSection.format = fmt; + } else { + if (!inputSection.format.equals(fmt)) + // This can happen e.g. if processing multiple files at once but one turns out to be a different format. + // We emit warning because this is likely not what the user intended and the code will likely explode + // further downstream. + LOGGER.warn("Input format is set to {} but the current input looks like a {}", inputSection.format, fmt); + } + + // Set element. + PhenopacketElement element = ElementSniffer.sniff(is, schemaVersion, inputSection.format); + if (inputSection.element == null) { + LOGGER.info("Input element type (-e | --element) was not provided, making an educated guess.."); + LOGGER.info("The input looks like a {} ", element); + inputSection.element = element; + } +// else { + // TODO - enable once element sniffing is implemented +// if (!inputSection.element.equals(element)) +// Let's go an extra mile and check for the user. +// LOGGER.warn("Input element is set to {} but the current input looks like a {}", inputSection.element, element); +// } + } + + protected record MessageAndPath(Message message, Path path) {} + +} diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ConvertCommand.java similarity index 79% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ConvertCommand.java index 48a8528a..2681098d 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ConvertCommand.java @@ -1,9 +1,11 @@ -package org.phenopackets.phenopackettools.command; +package org.phenopackets.phenopackettools.cli.command; import com.google.protobuf.Message; -import com.google.protobuf.util.JsonFormat; import org.phenopackets.phenopackettools.converter.converters.V1ToV2Converter; -import org.phenopackets.phenopackettools.util.format.PhenopacketFormat; +import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; +import org.phenopackets.phenopackettools.io.PhenopacketPrinter; +import org.phenopackets.phenopackettools.io.PhenopacketPrinterFactory; import org.phenopackets.schema.v1.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -51,10 +53,7 @@ public static class ConvertSection { } @Override - public Integer call() { - // (0) Print banner. - printBanner(); - + protected Integer execute() { if (!checkInputArgumentsAreOk()) return 1; @@ -77,14 +76,11 @@ public Integer call() { converted.add(new MessageAndPath(v2, mp.path())); } - // (3) Set the output format if necessary. - if (convertSection.outputFormat == null) { - LOGGER.info("Output format (--output-format) not provided, writing data in the input format `{}`", inputSection.format); - convertSection.outputFormat = inputSection.format; - } + // (3) Configure the output format. + PhenopacketPrinter printer = configurePhenopacketPrinter(); // (4) Write out the output(s). - return writeOutConverted(converted); + return writeOutConverted(converted, printer); } /** @@ -112,7 +108,19 @@ private boolean checkInputArgumentsAreOk() { return true; } - private int writeOutConverted(List converted) { + private PhenopacketPrinter configurePhenopacketPrinter() { + PhenopacketFormat format; + if (convertSection.outputFormat == null) { + LOGGER.info("Output format (--output-format) not provided, writing data in the input format `{}`", inputSection.format); + format = inputSection.format; + } else + format = convertSection.outputFormat; + + PhenopacketPrinterFactory factory = PhenopacketPrinterFactory.getInstance(); + return factory.forFormat(PhenopacketSchemaVersion.V2, format); + } + + private int writeOutConverted(List converted, PhenopacketPrinter printer) { if (converted.size() == 1) { // Writing out item, either from STDIN or from one `-i` options. MessageAndPath mp = converted.get(0); @@ -124,7 +132,7 @@ private int writeOutConverted(List converted) { } else { os = openOutputStream(mp.path()); } - writeMessage(mp.message(), convertSection.outputFormat, os); + printer.print(mp.message(), os); } catch (IOException e) { LOGGER.error("Error while writing out a phenopacket: {}", e.getMessage(), e); return 1; @@ -141,7 +149,7 @@ private int writeOutConverted(List converted) { // Writing out >1 items provided by `-i` options. for (MessageAndPath mp : converted) { try (OutputStream os = openOutputStream(mp.path())) { - writeMessage(mp.message(), convertSection.outputFormat, os); + printer.print(mp.message(), os); } catch (IOException e) { LOGGER.error("Error while writing out a phenopacket: {}", e.getMessage(), e); return 1; @@ -171,32 +179,4 @@ private BufferedOutputStream openOutputStream(Path inputPath) throws IOException return new BufferedOutputStream(Files.newOutputStream(output)); } - /** - * Write the {@code message} in an appropriate {@code format} into the provided {@link OutputStream} {@code os}. - *

- * Uses {@link } - * @param message message to be written out. - * @param format format to write out - * @param os where to write - * @throws IOException in case of I/O errors during the output - */ - protected static void writeMessage(Message message, PhenopacketFormat format, OutputStream os) throws IOException { - switch (format) { - case PROTOBUF -> { - LOGGER.debug("Writing protobuf message"); - message.writeTo(os); - } - case JSON -> { - LOGGER.debug("Writing JSON message"); - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(os)); - JsonFormat.printer().appendTo(message, writer); - writer.flush(); - } - case YAML -> { - // TODO - implement - throw new RuntimeException("YAML printer is not yet implemented"); - } - } - } - } diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ExamplesCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ExamplesCommand.java new file mode 100644 index 00000000..2685e4d9 --- /dev/null +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ExamplesCommand.java @@ -0,0 +1,102 @@ +package org.phenopackets.phenopackettools.cli.command; + +import com.google.protobuf.Message; + +import org.phenopackets.phenopackettools.core.PhenopacketFormat; +import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; +import org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException; +import org.phenopackets.phenopackettools.cli.examples.*; +import org.phenopackets.phenopackettools.io.PhenopacketPrinter; +import org.phenopackets.phenopackettools.io.PhenopacketPrinterFactory; +import picocli.CommandLine; +import picocli.CommandLine.Command; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +@Command(name = "examples", + mixinStandardHelpOptions = true, + sortOptions = false, + description = "Write example phenopackets to a directory.") +public class ExamplesCommand extends BaseCommand { + + @CommandLine.Option(names = {"-o", "--output"}, + description = "Output directory (default: ${DEFAULT-VALUE})") + public Path output = Path.of("."); + + private final PhenopacketPrinter jsonPrinter; + private final PhenopacketPrinter yamlPrinter; + + public ExamplesCommand() { + PhenopacketPrinterFactory factory = PhenopacketPrinterFactory.getInstance(); + jsonPrinter = factory.forFormat(PhenopacketSchemaVersion.V2, PhenopacketFormat.JSON); + yamlPrinter = factory.forFormat(PhenopacketSchemaVersion.V2, PhenopacketFormat.YAML); + } + + @Override + protected Integer execute() { + try { + Path phenopacketDir = createADirectoryIfDoesNotExist(output.resolve("phenopackets")); + Path familyDir = createADirectoryIfDoesNotExist(output.resolve("families")); + Path cohortDir = createADirectoryIfDoesNotExist(output.resolve("cohorts")); + + // Phenopackets + printJsonAndYaml(new AtaxiaWithVitaminEdeficiency().getPhenopacket(), phenopacketDir, "AVED"); + printJsonAndYaml(new BethlehamMyopathy().getPhenopacket(), phenopacketDir, "bethleham-myopathy"); + printJsonAndYaml(new Holoprosencephaly5().getPhenopacket(), phenopacketDir, "holoprosencephaly5"); + printJsonAndYaml(new Marfan().getPhenopacket(), phenopacketDir, "marfan"); + printJsonAndYaml(new NemalineMyopathyPrenatal().getPhenopacket(), phenopacketDir, "nemalineMyopathy"); + printJsonAndYaml(new Pseudoexfoliation().getPhenopacket(), phenopacketDir, "pseudoexfoliation"); + printJsonAndYaml(new DuchenneExon51Deletion().getPhenopacket(), phenopacketDir, "duchenne"); + printJsonAndYaml(new SquamousCellCancer().getPhenopacket(), phenopacketDir, "squamous-cell-esophageal-carcinoma"); + printJsonAndYaml(new UrothelialCancer().getPhenopacket(), phenopacketDir, "urothelial-cancer"); + printJsonAndYaml(new Covid().getPhenopacket(), phenopacketDir, "covid"); + printJsonAndYaml(new Retinoblastoma().getPhenopacket(), phenopacketDir, "retinoblastoma"); + printJsonAndYaml(new WarburgMicroSyndrome().getPhenopacket(), phenopacketDir, "warburg-micro-syndrome"); + printJsonAndYaml(new SevereStatinInducedAutoimmuneMyopathy().getPhenopacket(), phenopacketDir, "statin-myopathy"); + + // Families + printJsonAndYaml(new FamilyWithPedigree().getFamily(), familyDir, "family"); + + // Cohorts + // TODO - write a cohort + + } catch (Exception e) { + System.err.println(e.getMessage()); + return 1; + } + return 0; + } + + private static Path createADirectoryIfDoesNotExist(Path path) throws IOException { + return Files.exists(path) + ? path + : Files.createDirectories(path); + } + + private void printJsonAndYaml(Message message, Path outDir, String basename) { + Path jsonPath = outDir.resolve(basename + ".json"); + printJsonMessage(message, jsonPath); + + Path yamlPath = outDir.resolve(basename + ".yml"); + printYamlMessage(message, yamlPath); + } + + private void printJsonMessage(Message message, Path path) { + try { + jsonPrinter.print(message, path); + } catch (IOException e) { + throw new PhenopacketToolsRuntimeException(e); + } + } + + private void printYamlMessage(Message message, Path path) { + try { + yamlPrinter.print(message, path); + } catch (IOException e) { + throw new PhenopacketToolsRuntimeException(e); + } + } + +} diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java similarity index 55% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java index 843ad62e..ce2a34cf 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java @@ -1,15 +1,19 @@ -package org.phenopackets.phenopackettools.command; +package org.phenopackets.phenopackettools.cli.command; import com.google.protobuf.MessageOrBuilder; +import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenol.io.OntologyLoader; import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.phenopackettools.core.PhenopacketElement; +import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; import org.phenopackets.phenopackettools.validator.core.*; import org.phenopackets.phenopackettools.validator.core.metadata.MetaDataValidators; import org.phenopackets.phenopackettools.validator.core.phenotype.HpoPhenotypeValidators; import org.phenopackets.phenopackettools.validator.core.writer.ValidationResultsAndPath; import org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunner; -import org.phenopackets.phenopackettools.writer.CSVValidationResultsWriter; +import org.phenopackets.phenopackettools.cli.writer.CSVValidationResultsWriter; import org.phenopackets.schema.v2.CohortOrBuilder; import org.phenopackets.schema.v2.FamilyOrBuilder; import org.phenopackets.schema.v2.PhenopacketOrBuilder; @@ -25,6 +29,9 @@ import java.time.LocalDateTime; import java.util.ArrayList; import java.util.List; +import java.util.Optional; +import java.util.function.Function; +import java.util.stream.Collectors; @Command(name = "validate", description = "Validate top-level elements of the Phenopacket schema.", @@ -38,21 +45,28 @@ public class ValidateCommand extends BaseIOCommand { public ValidateSection validateSection = new ValidateSection(); public static class ValidateSection { + @CommandLine.Option(names = {"-H", "--include-header"}, + description = {"Include header in the output", "Default: ${DEFAULT-VALUE}"}) + public boolean includeHeader = false; + @CommandLine.Option(names = {"--require"}, arity = "*", description = "Path to JSON schema with additional requirements to enforce.") - protected List requirements = List.of(); + public List requirements = List.of(); @CommandLine.Option(names = "--hpo", description = "Path to hp.json file") - protected Path hpJson; + public Path hpJson; + + @CommandLine.Option(names = {"-s", "--organ-system"}, + arity = "*", + description = {"Organ system HPO term IDs", + "Default: empty"}) + public List organSystems = List.of(); } @Override - public Integer call() { - // (0) Print banner. - printBanner(); - + protected Integer execute() { // (1) Read the input v2 message(s). List messages = readMessagesOrExit(PhenopacketSchemaVersion.V2); @@ -67,7 +81,10 @@ public Integer call() { // (4) Write out the validation results into STDOUT. try { - CSVValidationResultsWriter writer = new CSVValidationResultsWriter(System.out, PHENOPACKET_TOOLS_VERSION, LocalDateTime.now()); + CSVValidationResultsWriter writer = new CSVValidationResultsWriter(System.out, + PHENOPACKET_TOOLS_VERSION, + LocalDateTime.now(), + validateSection.includeHeader); writer.writeValidationResults(runner.validators(), results); return 0; } catch (IOException e) { @@ -129,36 +146,111 @@ private List prepareCustomSchemaUrls() { * Prepare semantic validators for given {@link T}. *

* Warning - it is important to request the {@link T} that is appropriate - * for the current {@link org.phenopackets.phenopackettools.command.BaseIOCommand.InputSection#element}. + * for the current {@link org.phenopackets.phenopackettools.cli.command.BaseIOCommand.InputSection#element}. * The app will crash and burn if e.g. {@link T} is {@link PhenopacketOrBuilder} - * while {@link org.phenopackets.phenopackettools.command.BaseIOCommand.InputSection#element} - * is {@link org.phenopackets.phenopackettools.util.format.PhenopacketElement#FAMILY}. + * while {@link org.phenopackets.phenopackettools.cli.command.BaseIOCommand.InputSection#element} + * is {@link PhenopacketElement#FAMILY}. */ private List> configureSemanticValidators() { // Right now we only have one semantic validator, but we'll extend this in the future. LOGGER.debug("Configuring semantic validators"); List> validators = new ArrayList<>(); + Ontology hpo = null; if (validateSection.hpJson != null) { - LOGGER.debug("Reading HPO from '{}}'", validateSection.hpJson.toAbsolutePath()); - Ontology hpo = OntologyLoader.loadOntology(validateSection.hpJson.toFile()); + LOGGER.debug("Reading HPO from {}", validateSection.hpJson.toAbsolutePath()); + hpo = OntologyLoader.loadOntology(validateSection.hpJson.toFile()); // The entire logic of this command stands and falls on correct state of `element` and the read message(s). // This method requires an appropriate combination of `T` and `element`, as described in Javadoc. // We suppress warning and perform an unchecked cast here, assuming `T` and `element` are appropriate. // The app will crash and burn if this is not the case. - PhenopacketValidator validator = switch (inputSection.element) { + switch (inputSection.element) { + case PHENOPACKET -> { + //noinspection unchecked + validators.add((PhenopacketValidator) HpoPhenotypeValidators.Primary.phenopacketHpoPhenotypeValidator(hpo)); + //noinspection unchecked + validators.add((PhenopacketValidator) HpoPhenotypeValidators.Ancestry.phenopacketHpoAncestryValidator(hpo)); + } + case FAMILY -> { + //noinspection unchecked + validators.add((PhenopacketValidator) HpoPhenotypeValidators.Primary.familyHpoPhenotypeValidator(hpo)); + //noinspection unchecked + validators.add((PhenopacketValidator) HpoPhenotypeValidators.Ancestry.familyHpoAncestryValidator(hpo)); + } + case COHORT -> { + //noinspection unchecked + validators.add((PhenopacketValidator) HpoPhenotypeValidators.Primary.cohortHpoPhenotypeValidator(hpo)); + //noinspection unchecked + validators.add((PhenopacketValidator) HpoPhenotypeValidators.Ancestry.cohortHpoAncestryValidator(hpo)); + } + } + } + + if (!validateSection.organSystems.isEmpty()) { + PhenopacketValidator validator = prepareOrganSystemValidator(hpo, validateSection.organSystems, inputSection.element); + if (validator != null) + validators.add(validator); + + } + + LOGGER.debug("Configured {} semantic validator(s)", validators.size()); + return validators; + } + + private static PhenopacketValidator prepareOrganSystemValidator(Ontology hpo, + List organSystems, + PhenopacketElement element) { + // Organ system validation can only be done when HPO is provided. + if (hpo == null) { + LOGGER.warn("Terms for organ system validation were provided but the path to HPO is unset. Use --hpo option to enable organ system validation."); + return null; + } + + // Prepare organ system IDs. + List organSystemIds = prepareOrganSystemIds(organSystems); + + // Create the validator. + if (!organSystemIds.isEmpty()) { + return switch (element) { case PHENOPACKET -> //noinspection unchecked - (PhenopacketValidator) HpoPhenotypeValidators.phenopacketHpoPhenotypeValidator(hpo); + (PhenopacketValidator) HpoPhenotypeValidators.OrganSystem.phenopacketHpoOrganSystemValidator(hpo, organSystemIds); case FAMILY -> //noinspection unchecked - (PhenopacketValidator) HpoPhenotypeValidators.familyHpoPhenotypeValidator(hpo); + (PhenopacketValidator) HpoPhenotypeValidators.OrganSystem.familyHpoOrganSystemValidator(hpo, organSystemIds); case COHORT -> //noinspection unchecked - (PhenopacketValidator) HpoPhenotypeValidators.cohortHpoPhenotypeValidator(hpo); + (PhenopacketValidator) HpoPhenotypeValidators.OrganSystem.cohortHpoOrganSystemValidator(hpo, organSystemIds); }; - validators.add(validator); } - LOGGER.debug("Configured {} semantic validator(s)", validators.size()); - return validators; + return null; + } + + private static List prepareOrganSystemIds(List organSystems) { + LOGGER.trace("Found {} organ system IDs: {}", organSystems.size(), organSystems.stream() + .collect(Collectors.joining(", ", "{", "}"))); + List organSystemIds = organSystems.stream() + .map(toTermId()) + .flatMap(Optional::stream) + .toList(); + LOGGER.trace("{} organ system IDs are valid term IDs: {}", organSystemIds.size(), + organSystemIds.stream() + .map(TermId::getValue) + .collect(Collectors.joining(", ", "{", "}"))); + return organSystemIds; + } + + /** + * @return a function that maps a {@link String} into a {@link TermId} or emits a warning if the value + * cannot be mapped. + */ + private static Function> toTermId() { + return value -> { + try { + return Optional.of(TermId.of(value)); + } catch (PhenolRuntimeException e) { + LOGGER.warn("Invalid term ID {}", value); + return Optional.empty(); + } + }; } } \ No newline at end of file diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/BaseValidateCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/BaseValidateCommand.java similarity index 98% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/BaseValidateCommand.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/BaseValidateCommand.java index 9cbb28a1..d4ecad43 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/BaseValidateCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/BaseValidateCommand.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.command.validate; +package org.phenopackets.phenopackettools.cli.command.validate; import com.google.protobuf.MessageOrBuilder; import org.monarchinitiative.phenol.io.OntologyLoader; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/ValidateCohortCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidateCohortCommand.java similarity index 95% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/ValidateCohortCommand.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidateCohortCommand.java index f04288da..cd9f5ece 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/ValidateCohortCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidateCohortCommand.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.command.validate; +package org.phenopackets.phenopackettools.cli.command.validate; import org.monarchinitiative.phenol.ontology.data.Ontology; import org.phenopackets.phenopackettools.validator.core.PhenopacketValidator; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/ValidateFamilyCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidateFamilyCommand.java similarity index 95% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/ValidateFamilyCommand.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidateFamilyCommand.java index edc7cd65..ba0ce5ef 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/ValidateFamilyCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidateFamilyCommand.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.command.validate; +package org.phenopackets.phenopackettools.cli.command.validate; import org.monarchinitiative.phenol.ontology.data.Ontology; import org.phenopackets.phenopackettools.validator.core.PhenopacketValidator; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/ValidatePhenopacketCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidatePhenopacketCommand.java similarity index 96% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/ValidatePhenopacketCommand.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidatePhenopacketCommand.java index 65bce015..da2fcec8 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/ValidatePhenopacketCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidatePhenopacketCommand.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.command.validate; +package org.phenopackets.phenopackettools.cli.command.validate; import org.monarchinitiative.phenol.ontology.data.Ontology; import org.phenopackets.phenopackettools.validator.core.PhenopacketValidator; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/AtaxiaWithVitaminEdeficiency.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/AtaxiaWithVitaminEdeficiency.java similarity index 93% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/AtaxiaWithVitaminEdeficiency.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/AtaxiaWithVitaminEdeficiency.java index 11219cf0..9cbeb206 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/AtaxiaWithVitaminEdeficiency.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/AtaxiaWithVitaminEdeficiency.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.ga4gh.vrsatile.v1.GeneDescriptor; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; @@ -10,7 +10,7 @@ import static org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder.ontologyClass; import static org.phenopackets.phenopackettools.builder.constants.Assays.creatineKinaseActivity; -import static org.phenopackets.phenopackettools.builder.constants.Response.favorable; +import static org.phenopackets.phenopackettools.builder.constants.Response.favorableResponse; import static org.phenopackets.phenopackettools.builder.constants.SpatialPattern.generalized; import static org.phenopackets.phenopackettools.builder.constants.Unit.*; @@ -179,30 +179,30 @@ private Interpretation aved() { private List getPhenotypicFeatures() { String iso8601age = "P16Y"; var pf1 = PhenotypicFeatureBuilder.builder("HP:0002066","Gait ataxia") - .isoISO8601onset(iso8601age).build(); + .iso8601onset(iso8601age).build(); var pf2 = PhenotypicFeatureBuilder.builder("HP:0001308","Tongue fasciculations") - .isoISO8601onset(iso8601age).build(); + .iso8601onset(iso8601age).build(); var pf3 = PhenotypicFeatureBuilder.builder("HP:0002080","Intention tremor") - .isoISO8601onset(iso8601age).build(); + .iso8601onset(iso8601age).build(); var pf4 = PhenotypicFeatureBuilder.builder("HP:0002075","Dysdiadochokinesis") - .isoISO8601onset(iso8601age).build(); + .iso8601onset(iso8601age).build(); var pf5 = PhenotypicFeatureBuilder.builder("HP:0001251","Ataxia") - .isoISO8601onset(iso8601age).build(); + .iso8601onset(iso8601age).build(); var pf6 = PhenotypicFeatureBuilder.builder("HP:0001284","Areflexia") - .isoISO8601onset(iso8601age).build(); + .iso8601onset(iso8601age).build(); var pf7 = PhenotypicFeatureBuilder.builder("HP:0011448","Ankle clonus") - .isoISO8601onset(iso8601age).build(); + .iso8601onset(iso8601age).build(); var pf8 = PhenotypicFeatureBuilder.builder("HP:0003690","Limb muscle weakness") - .isoISO8601onset(iso8601age).build(); + .iso8601onset(iso8601age).build(); var pf9 = PhenotypicFeatureBuilder.builder("HP:0003474","Somatic sensory dysfunction") .excluded() - .isoISO8601onset(iso8601age).build(); + .iso8601onset(iso8601age).build(); var pf10 = PhenotypicFeatureBuilder.builder("HP:0002599","Head titubation") .excluded() - .isoISO8601onset(iso8601age).build(); + .iso8601onset(iso8601age).build(); var pf11 = PhenotypicFeatureBuilder.builder("HP:0031910","Abnormal cranial nerve physiology") .excluded() - .isoISO8601onset(iso8601age).build(); + .iso8601onset(iso8601age).build(); return List.of(pf1,pf2,pf3, pf4, pf5, pf6, pf7, pf8, pf9, pf10,pf11); } @@ -217,7 +217,7 @@ private MedicalAction vitaminEtreatment() { OntologyClass vitE = ontologyClass("DrugCentral:257", "Vitamin E"); TreatmentBuilder tbuilder = TreatmentBuilder.oralAdministration(vitE); return MedicalActionBuilder.builder(tbuilder.build()) - .responseToTreatment(favorable()) + .responseToTreatment(favorableResponse()) .build(); } @@ -227,11 +227,11 @@ private MedicalAction vitaminEtreatment() { private List getMedicalHistory() { String iso8601age = "P10Y"; var gaitDisturbance = PhenotypicFeatureBuilder.builder("HP:0001288", "Gait disturbance") - .isoISO8601onset(iso8601age) + .iso8601onset(iso8601age) .build(); var weakness = PhenotypicFeatureBuilder.builder("HP:0001324", "Muscle weakness") .addModifier(generalized()) - .isoISO8601onset(iso8601age) + .iso8601onset(iso8601age) .build(); return List.of(gaitDisturbance, weakness); } diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/BethlehamMyopathy.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/BethlehamMyopathy.java similarity index 90% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/BethlehamMyopathy.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/BethlehamMyopathy.java index d7bb1d11..e4683464 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/BethlehamMyopathy.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/BethlehamMyopathy.java @@ -1,10 +1,11 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.phenopackettools.builder.builders.*; import org.phenopackets.phenopackettools.builder.constants.Status; import org.phenopackets.schema.v2.Phenopacket; +import org.phenopackets.schema.v2.core.GenomicInterpretation; import static org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder.ontologyClass; @@ -24,17 +25,9 @@ public BethlehamMyopathy() { .addResource(Resources.genoVersion("2020-03-08")) .addExternalReference(authorAssertion.getReference()) .build(); - var variationDescriptor = - VariationDescriptorBuilder.builder("variant id") - .heterozygous() - .hgvs("NM_001848.2:c.877G>A") - .build(); - var col6a1VariantInterpretation = - VariantInterpretationBuilder.of(variationDescriptor, Status.pathogenic()); - var genomicInterpretation = - GenomicInterpretationBuilder.builder(INTERPRETATION_ID) - .causative() - .variantInterpretation(col6a1VariantInterpretation).build(); + + var genomicInterpretation = COL6A1variant(); + var diagnosis = DiagnosisBuilder.builder(bethlehamMyopathy).addGenomicInterpretation(genomicInterpretation).build(); var interpretation = InterpretationBuilder.builder(INTERPRETATION_ID).completed(diagnosis); var ventricularSeptalDefect = @@ -100,6 +93,22 @@ public BethlehamMyopathy() { .build(); } + + private GenomicInterpretation COL6A1variant() { + var variationDescriptor = + VariationDescriptorBuilder.builder("variant id") + .heterozygous() + .hgvs("NM_001848.2:c.877G>A") + .geneContext(GeneDescriptorBuilder.of("HGNC:2211", "COL6A1")) + .vcfHg38("chr21",45989626, "G","A") + .build(); + var col6a1VariantInterpretation = + VariantInterpretationBuilder.of(variationDescriptor, Status.pathogenic()); + return GenomicInterpretationBuilder.builder(INTERPRETATION_ID) + .causative() + .variantInterpretation(col6a1VariantInterpretation).build(); + } + @Override public Phenopacket getPhenopacket() { return phenopacket; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Covid.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Covid.java similarity index 99% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Covid.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Covid.java index 39590dac..d107a384 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Covid.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Covid.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.phenopackettools.builder.builders.*; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/DuchenneExon51Deletion.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/DuchenneExon51Deletion.java similarity index 99% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/DuchenneExon51Deletion.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/DuchenneExon51Deletion.java index eb98ae33..4ef12c92 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/DuchenneExon51Deletion.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/DuchenneExon51Deletion.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.phenopackettools.builder.builders.*; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/FamilyWithPedigree.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/FamilyWithPedigree.java similarity index 98% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/FamilyWithPedigree.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/FamilyWithPedigree.java index b00d2ca8..9c23f284 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/FamilyWithPedigree.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/FamilyWithPedigree.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.phenopackettools.builder.FamilyBuilder; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Holoprosencephaly5.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Holoprosencephaly5.java similarity index 98% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Holoprosencephaly5.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Holoprosencephaly5.java index 2b41fdf9..aeb86e0f 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Holoprosencephaly5.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Holoprosencephaly5.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.ga4gh.vrsatile.v1.GeneDescriptor; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Marfan.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Marfan.java similarity index 97% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Marfan.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Marfan.java index 728aa8f4..e401c7ed 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Marfan.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Marfan.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.phenopackettools.builder.builders.*; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/NemalineMyopathyPrenatal.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/NemalineMyopathyPrenatal.java similarity index 99% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/NemalineMyopathyPrenatal.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/NemalineMyopathyPrenatal.java index 8e7471cc..eb7a00e6 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/NemalineMyopathyPrenatal.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/NemalineMyopathyPrenatal.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.ga4gh.vrs.v1.Variation; import org.ga4gh.vrsatile.v1.Expression; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/PhenopacketExample.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/PhenopacketExample.java similarity index 67% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/PhenopacketExample.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/PhenopacketExample.java index bef790d8..ac0c0934 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/PhenopacketExample.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/PhenopacketExample.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.schema.v2.Phenopacket; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/PneumothoraxSecondaryToCOVID.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/PneumothoraxSecondaryToCOVID.java similarity index 99% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/PneumothoraxSecondaryToCOVID.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/PneumothoraxSecondaryToCOVID.java index c063181c..d24fbeb8 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/PneumothoraxSecondaryToCOVID.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/PneumothoraxSecondaryToCOVID.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Pseudoexfoliation.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Pseudoexfoliation.java similarity index 99% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Pseudoexfoliation.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Pseudoexfoliation.java index ed544b26..b1036f4c 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Pseudoexfoliation.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Pseudoexfoliation.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.schema.v2.Phenopacket; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Retinoblastoma.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Retinoblastoma.java similarity index 90% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Retinoblastoma.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Retinoblastoma.java index c731cce2..0a28729d 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Retinoblastoma.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Retinoblastoma.java @@ -1,5 +1,7 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; +import org.ga4gh.vrs.v1.*; +import org.ga4gh.vrs.v1.Number; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.phenopackettools.builder.builders.*; import org.phenopackets.phenopackettools.builder.constants.Laterality; @@ -71,10 +73,10 @@ Interpretation interpretation() { * @return Genomic interpretation related to a somatic missense mutation in the RB1 gene. */ GenomicInterpretation somaticRb1Missense() { - AlleleBuilder abuilder = AlleleBuilder.builder(); - abuilder.sequenceId("refseq:NC_000013.11"); - abuilder.interbaseStartEnd( 48367511, 48367512); - abuilder.altAllele("T"); + AlleleBuilder abuilder = AlleleBuilder.builder() + .sequenceId("refseq:NC_000013.11") + .interbaseStartEnd( 48367511, 48367512) + .altAllele("T"); VariationDescriptorBuilder vbuilder = VariationDescriptorBuilder.builder("rs121913300") .variation(abuilder.buildVariation()) .genomic() @@ -100,15 +102,31 @@ GenomicInterpretation somaticRb1Missense() { GenomicInterpretation germlineRb1Deletion() { - CopyNumberBuilder abuilder = CopyNumberBuilder.builder(); + CopyNumber cnv = CopyNumber.newBuilder() + .setDerivedSequenceExpression(DerivedSequenceExpression.newBuilder() + .setLocation(SequenceLocation.newBuilder() + .setSequenceId("refseq:NC_000013.14") + .setSequenceInterval(SequenceInterval.newBuilder() + .setStartNumber(Number.newBuilder(). + setValue(25981249) + .build()) + .setEndNumber(Number.newBuilder() + .setValue(61706822) + .build()) + .build()) + .build()) + .build()) + .setNumber(Number.newBuilder().setValue(1).build()) + .build(); //abuilder.copyNumberId("ga4gh:VCN.AFfJws1M4Lg8w1O3XknmHYc9TU2hHYpp"); // original coordinates in paper were given as 13q12.13q21.2(26,555,387–62,280,955 for hg19 //chr13 25981249 61706822 -- lifted over to hg38 + Variation variation = Variation.newBuilder() + .setCopyNumber(cnv) + .build(); - abuilder.alleleLocation("refseq:NC_000013.14",25981249, 61706822);//VRS uses inter-residue coordinates - abuilder.oneCopy(); VariationDescriptorBuilder vbuilder = VariationDescriptorBuilder.builder(); - vbuilder.variation(abuilder.buildVariation()); + vbuilder.variation(variation); vbuilder.mosaicism(40.0); VariantInterpretationBuilder vibuilder = VariantInterpretationBuilder.builder(vbuilder); vibuilder.pathogenic(); diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/SevereStatinInducedAutoimmuneMyopathy.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/SevereStatinInducedAutoimmuneMyopathy.java similarity index 98% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/SevereStatinInducedAutoimmuneMyopathy.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/SevereStatinInducedAutoimmuneMyopathy.java index 8947bdf3..25255a24 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/SevereStatinInducedAutoimmuneMyopathy.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/SevereStatinInducedAutoimmuneMyopathy.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.phenopackettools.builder.builders.*; @@ -9,7 +9,7 @@ import static org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder.ontologyClass; import static org.phenopackets.phenopackettools.builder.constants.MedicalActions.*; -import static org.phenopackets.phenopackettools.builder.constants.Response.favorable; +import static org.phenopackets.phenopackettools.builder.constants.Response.favorableResponse; import static org.phenopackets.phenopackettools.builder.constants.Unit.*; /** @@ -86,7 +86,7 @@ List previousTreatments() { var metformin = ontologyClass( "DrugCentral:1725", "metformin"); var fiveHundredMg = QuantityBuilder.builder(milligram(), 500).build(); var metforminAction = MedicalActionBuilder - .oralAdministration(metformin, fiveHundredMg, threetimesDaily(), interval) + .oralAdministration(metformin, fiveHundredMg, threeTimesDaily(), interval) .build(); return List.of(atorvastatinAction, aspirinAction, ramiprilAction, metforminAction); } @@ -110,7 +110,7 @@ private MedicalAction treatment() { TimeInterval interval = TimeIntervalBuilder.of("2020-09-02", "2021-03-02"); return MedicalActionBuilder .intravenousAdministration(ivIg, quantity, everySixWeeks, interval) - .responseToTreatment(favorable()) + .responseToTreatment(favorableResponse()) .build(); } diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/SquamousCellCancer.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/SquamousCellCancer.java similarity index 98% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/SquamousCellCancer.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/SquamousCellCancer.java index 1a205f55..83c94428 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/SquamousCellCancer.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/SquamousCellCancer.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.phenopackettools.builder.builders.*; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/UrothelialCancer.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/UrothelialCancer.java similarity index 99% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/UrothelialCancer.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/UrothelialCancer.java index c8ed23b5..a5adac4a 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/UrothelialCancer.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/UrothelialCancer.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/WarburgMicroSyndrome.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/WarburgMicroSyndrome.java similarity index 98% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/WarburgMicroSyndrome.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/WarburgMicroSyndrome.java index 43881115..5e5115f2 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/WarburgMicroSyndrome.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/WarburgMicroSyndrome.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.phenopackettools.builder.builders.*; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/writer/CSVValidationResultsWriter.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/writer/CSVValidationResultsWriter.java similarity index 89% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/writer/CSVValidationResultsWriter.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/writer/CSVValidationResultsWriter.java index 1040a24a..8676dacc 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/writer/CSVValidationResultsWriter.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/writer/CSVValidationResultsWriter.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.writer; +package org.phenopackets.phenopackettools.cli.writer; import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVPrinter; @@ -27,18 +27,21 @@ public class CSVValidationResultsWriter implements ValidationResultsWriter { private final OutputStream os; private final String phenopacketToolsVersion; private final LocalDateTime dateTime; + private final boolean printHeader; /** * Create the writer using a given {@link OutputStream}. Note that the {@link OutputStream} is not closed. * * @param os where to write to * @param phenopacketToolsVersion phenopacket tools version - * @param dateTime + * @param dateTime the time of validation + * @param printHeader print header into the output */ - public CSVValidationResultsWriter(OutputStream os, String phenopacketToolsVersion, LocalDateTime dateTime) { + public CSVValidationResultsWriter(OutputStream os, String phenopacketToolsVersion, LocalDateTime dateTime, boolean printHeader) { this.os = os; this.phenopacketToolsVersion = phenopacketToolsVersion; this.dateTime = dateTime; + this.printHeader = printHeader; } @Override @@ -50,7 +53,10 @@ public void writeValidationResults(List validators, List results, CSVPrinter printer) throws for (ValidatorInfo validator : results) { printer.printComment("validator_id=%s;validator_name=%s;description=%s".formatted(validator.validatorId(), validator.validatorName(), validator.description())); } + + // Print column names + printer.printRecord("PATH", "LEVEL", "VALIDATOR_ID", "CATEGORY", "MESSAGE"); } private static void printValidationResults(List results, CSVPrinter printer) throws IOException { - // Header - printer.printRecord("PATH", "LEVEL", "VALIDATOR_ID", "CATEGORY", "MESSAGE"); - // Validation results for (ValidationResultsAndPath rp : results) { String path = rp.path() == null ? "-" : rp.path().toAbsolutePath().toString(); for (ValidationResult result : rp.results().validationResults()) { diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java deleted file mode 100644 index 183f2013..00000000 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java +++ /dev/null @@ -1,180 +0,0 @@ -package org.phenopackets.phenopackettools.command; - -import com.google.protobuf.Message; -import com.google.protobuf.util.JsonFormat; -import org.phenopackets.phenopackettools.util.format.FormatSniffException; -import org.phenopackets.phenopackettools.util.format.FormatSniffer; -import org.phenopackets.phenopackettools.util.format.PhenopacketElement; -import org.phenopackets.phenopackettools.util.format.PhenopacketFormat; -import org.phenopackets.schema.v1.Cohort; -import org.phenopackets.schema.v1.Family; -import org.phenopackets.schema.v1.Phenopacket; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import picocli.CommandLine; - -import java.io.*; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.List; - -/** - * A command that provides routines for reading as well as {@link PhenopacketFormat}s and {@link PhenopacketElement}s - * for processing of a single top-level Phenopacket schema element. - */ -public abstract class BaseIOCommand extends BaseCommand { - - private static final Logger LOGGER = LoggerFactory.getLogger(BaseIOCommand.class); - - @CommandLine.ArgGroup(validate = false, heading = "Inputs:%s") - public InputSection inputSection = new InputSection(); - - public static class InputSection { - @CommandLine.Option(names = {"-i", "--input"}, - arity = "0..*", - description = "Input phenopacket(s).%nLeave empty for STDIN") - public List inputs = null; - - // The format will be sniffed if it is uninitialized. - @CommandLine.Option(names = {"-f", "--format"}, - description = "Phenopacket format.%nChoose from: {${COMPLETION-CANDIDATES}}") - public PhenopacketFormat format = null; - - // TODO - is it too hard to implement element sniffing? - @CommandLine.Option(names = {"-e", "--element"}, - description = "Top-level element.%nChoose from {${COMPLETION-CANDIDATES}}%nDefault: phenopacket") - public PhenopacketElement element = null; - } - - /** - * Attempt to read the input in the provided {@code schemaVersion} and exit upon any failure. As a side effect, - * {@link org.phenopackets.phenopackettools.command.BaseIOCommand.InputSection#format} - * and {@link org.phenopackets.phenopackettools.command.BaseIOCommand.InputSection#element} - * fields are set after the function returns. - *

- * Note that the function does not return if reading fails. - */ - protected List readMessagesOrExit(PhenopacketSchemaVersion schemaVersion) { - if (inputSection.inputs == null) { - // Assuming a single input is coming from STDIN - InputStream is = System.in; - try { - setFormatAndElement(is); - return List.of(new MessageAndPath(parseMessage(schemaVersion, is), null)); - } catch (FormatSniffException e) { - System.err.println("Unable to detect input format from STDIN.\nConsider using the `--format` option."); - System.exit(1); - } catch (IOException e) { - System.err.println("Unable to read STDIN: " + e.getMessage() + "\nPlease check the input format."); - System.exit(1); - } - } else { - // Assuming a one or more input are provided via `-i | --input`. - - // Picocli should ensure that `input` is never an empty list. `input` is `null` if no `-i` was supplied. - assert !inputSection.inputs.isEmpty(); - - List messages = new ArrayList<>(); - for (Path input : inputSection.inputs) { - try (InputStream is = new BufferedInputStream(Files.newInputStream(input))) { - setFormatAndElement(is); - Message message = parseMessage(schemaVersion, is); - messages.add(new MessageAndPath(message, input)); - } catch (FormatSniffException e) { - System.err.printf("Unable to detect input format of %s.\nConsider using the `--format` option.%n", input.toAbsolutePath()); - System.exit(1); - } catch (IOException e) { - System.err.printf("Unable to read input file %s: %s\nPlease check the input format.%n", input.toAbsolutePath(), e.getMessage()); - System.exit(1); - } - } - return messages; - } - return null; // Cannot happen but to make the compiler happy... - } - - private void setFormatAndElement(InputStream is) throws IOException, FormatSniffException { - PhenopacketFormat sniffed = parseFormat(is); - if (inputSection.format == null) { - inputSection.format = sniffed; - } else { - if (!inputSection.format.equals(sniffed)) - // This can happen e.g. if processing multiple files at once but one turns out to be a different format. - // We emit warning because this is likely not what the user intended and the code will likely explode - // further downstream. - LOGGER.warn("Input format is set to {} but the current input looks like {}", inputSection.format, sniffed); - } - - if (inputSection.element == null) { - LOGGER.info("Input element type (-e | --element) was not provided, assuming phenopacket.."); - inputSection.element = PhenopacketElement.PHENOPACKET; - } - } - - private Message parseMessage(PhenopacketSchemaVersion schemaVersion, InputStream is) throws IOException { - return switch (inputSection.format) { - case PROTOBUF -> readProtobufMessage(schemaVersion, is); - case JSON -> readJsonMessage(schemaVersion, is); - // TODO - implement YAML parsing - case YAML -> throw new RuntimeException("YAML parser is not yet implemented"); - }; - } - - private Message readProtobufMessage(PhenopacketSchemaVersion schemaVersion, InputStream is) throws IOException { - LOGGER.debug("Reading protobuf message"); - return switch (schemaVersion) { - case V1 -> switch (inputSection.element) { - case PHENOPACKET -> Phenopacket.parseFrom(is); - case FAMILY -> Family.parseFrom(is); - case COHORT -> Cohort.parseFrom(is); - }; - case V2 -> switch (inputSection.element) { - - case PHENOPACKET -> org.phenopackets.schema.v2.Phenopacket.parseFrom(is); - case FAMILY -> org.phenopackets.schema.v2.Family.parseFrom(is); - case COHORT -> org.phenopackets.schema.v2.Cohort.parseFrom(is); - }; - }; - } - - private Message readJsonMessage(PhenopacketSchemaVersion schemaVersion, InputStream is) throws IOException { - LOGGER.debug("Reading JSON message"); - BufferedReader reader = new BufferedReader(new InputStreamReader(is)); - Message.Builder builder = prepareBuilder(schemaVersion, inputSection.element); - JsonFormat.parser().merge(reader, builder); - return builder.build(); - } - - private static Message.Builder prepareBuilder(PhenopacketSchemaVersion schemaVersion, PhenopacketElement element) { - return switch (schemaVersion) { - case V1 -> switch (element) { - case PHENOPACKET -> org.phenopackets.schema.v1.Phenopacket.newBuilder(); - case FAMILY -> org.phenopackets.schema.v1.Family.newBuilder(); - case COHORT -> org.phenopackets.schema.v1.Cohort.newBuilder(); - }; - case V2 -> switch (element) { - case PHENOPACKET -> org.phenopackets.schema.v2.Phenopacket.newBuilder(); - case FAMILY -> org.phenopackets.schema.v2.Family.newBuilder(); - case COHORT -> org.phenopackets.schema.v2.Cohort.newBuilder(); - }; - }; - } - - private PhenopacketFormat parseFormat(InputStream is) throws IOException, FormatSniffException { - if (inputSection.format == null) { - LOGGER.info("Input format was not provided, making an educated guess.."); - PhenopacketFormat fmt = FormatSniffer.sniff(is); - LOGGER.info("The input looks like a {} file", fmt); - return fmt; - } - return inputSection.format; - } - - protected record MessageAndPath(Message message, Path path) {} - - protected enum PhenopacketSchemaVersion { - V1, - V2; - } -} diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java deleted file mode 100644 index e46b69e3..00000000 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java +++ /dev/null @@ -1,136 +0,0 @@ -package org.phenopackets.phenopackettools.command; - - - -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.node.JsonNodeFactory; -import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; -import com.fasterxml.jackson.dataformat.yaml.YAMLGenerator; -import com.google.protobuf.Message; -import com.google.protobuf.util.JsonFormat; - -import org.phenopackets.phenopackettools.builder.exceptions.PhenotoolsRuntimeException; -import org.phenopackets.phenopackettools.examples.*; -import picocli.CommandLine; -import picocli.CommandLine.Command; - -import java.io.BufferedWriter; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.concurrent.Callable; - -@Command(name = "examples", - mixinStandardHelpOptions = true, - sortOptions = false, - description = "Write example phenopackets to a directory.") -public class ExamplesCommand extends BaseCommand { - - @CommandLine.Option(names = {"-o", "--output"}, - description = "Output directory (default: ${DEFAULT-VALUE})") - public Path output = Path.of("."); - - - @Override - public Integer call() throws Exception { - printBanner(); - - Path phenopacketDir = createADirectoryIfDoesNotExist(output.resolve("phenopackets")); - Path familyDir = createADirectoryIfDoesNotExist(output.resolve("families")); - Path cohortDir = createADirectoryIfDoesNotExist(output.resolve("cohorts")); - - try { - // Phenopackets - output(new AtaxiaWithVitaminEdeficiency().getPhenopacket(), phenopacketDir, "AVED"); - output(new BethlehamMyopathy().getPhenopacket(), phenopacketDir, "bethleham-myopathy"); - output(new Holoprosencephaly5().getPhenopacket(), phenopacketDir, "holoprosencephaly5"); - output(new Marfan().getPhenopacket(), phenopacketDir, "marfan"); - output(new NemalineMyopathyPrenatal().getPhenopacket(), phenopacketDir, "nemalineMyopathy"); - output(new Pseudoexfoliation().getPhenopacket(), phenopacketDir,"pseudoexfoliation"); - output(new DuchenneExon51Deletion().getPhenopacket(), phenopacketDir, "duchenne"); - output(new SquamousCellCancer().getPhenopacket(), phenopacketDir, "squamous-cell-esophageal-carcinoma"); - output(new UrothelialCancer().getPhenopacket(), phenopacketDir, "urothelial-cancer"); - output(new Covid().getPhenopacket(), phenopacketDir, "covid"); - output(new Retinoblastoma().getPhenopacket(), phenopacketDir, "retinoblastoma"); - output(new WarburgMicroSyndrome().getPhenopacket(), phenopacketDir, "warburg-micro-syndrome"); - output(new SevereStatinInducedAutoimmuneMyopathy().getPhenopacket(), phenopacketDir, "statin-myopathy"); - - // Families - outputFamily(new FamilyWithPedigree().getFamily(), familyDir, "family"); - - // Cohorts - // TODO - write a cohort - - } catch (Exception e) { - System.err.println(e.getMessage()); - return 1; - } - return 0; - } - - private static Path createADirectoryIfDoesNotExist(Path path) throws IOException { - return Files.exists(path) - ? path - : Files.createDirectories(path); - } - - private static void output(Message phenopacket, Path outDir, String basename) { - String yamlName = basename + ".yml"; - outputYamlPhenopacket(phenopacket, outDir, yamlName); - String jsonName = basename + ".json"; - outputPhenopacket(phenopacket, outDir,jsonName); - } - - private static void outputPhenopacket(Message phenopacket, Path outdir,String fileName) { - outputJsonMessage(phenopacket, outdir, fileName); - } - - private static void outputYamlPhenopacket(Message phenopacket, Path outdir, String fileName) { - outputYamlMessage(phenopacket, outdir, fileName, "phenopacket"); - - } - - private static void outputFamily(Message family, Path outDir, String basename) { - String yamlName = basename + ".yml"; - outputYamlFamily(family, outDir, yamlName); - String jsonName = basename + ".json"; - outputJsonFamily(family, outDir,jsonName); - } - - private static void outputJsonFamily(Message family, Path outDir, String jsonName) { - outputJsonMessage(family, outDir, jsonName); - } - - private static void outputYamlFamily(Message family, Path outDir, String yamlName) { - outputYamlMessage(family, outDir, yamlName, "family"); - } - - private static void outputJsonMessage(Message message, Path outDir, String fileName) { - Path path = outDir.resolve(fileName); - try (BufferedWriter writer = Files.newBufferedWriter(path, StandardCharsets.UTF_8)) { - String json = JsonFormat.printer().print(message); - writer.write(json); - } catch (IOException e) { - throw new PhenotoolsRuntimeException(e.getMessage()); - } - } - - private static void outputYamlMessage(Message family, Path outDir, String yamlName, String messageName) { - Path path = outDir.resolve(yamlName); - ObjectMapper mapper = new ObjectMapper(new YAMLFactory().disable(YAMLGenerator.Feature.WRITE_DOC_START_MARKER)); - try (BufferedWriter writer = Files.newBufferedWriter(path, StandardCharsets.UTF_8)) { - String jsonString = JsonFormat.printer().print(family); - JsonNode jsonNodeTree = new ObjectMapper().readTree(jsonString); - JsonNode node = JsonNodeFactory.instance.objectNode().set(messageName, jsonNodeTree); - mapper.writeValue(writer, node); - } catch (IOException e) { - throw new PhenotoolsRuntimeException(e.getMessage()); - } - } - - - - -} diff --git a/phenopacket-tools-cli/src/main/resources/logback.xml b/phenopacket-tools-cli/src/main/resources/logback.xml index 7b5f00ec..6d55578e 100644 --- a/phenopacket-tools-cli/src/main/resources/logback.xml +++ b/phenopacket-tools-cli/src/main/resources/logback.xml @@ -3,19 +3,13 @@ - - INFO - System.err ${pattern} - - - - + \ No newline at end of file diff --git a/phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/application.properties b/phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/cli/application.properties similarity index 100% rename from phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/application.properties rename to phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/cli/application.properties diff --git a/phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/banner.txt b/phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/cli/banner.txt similarity index 100% rename from phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/banner.txt rename to phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/cli/banner.txt diff --git a/phenopacket-tools-cli/src/test/java/org/phenopackets/phenopackettools/command/BasePTCommandTest.java b/phenopacket-tools-cli/src/test/java/org/phenopackets/phenopackettools/cli/command/BaseCommandTest.java similarity index 78% rename from phenopacket-tools-cli/src/test/java/org/phenopackets/phenopackettools/command/BasePTCommandTest.java rename to phenopacket-tools-cli/src/test/java/org/phenopackets/phenopackettools/cli/command/BaseCommandTest.java index 9074843a..ca329296 100644 --- a/phenopacket-tools-cli/src/test/java/org/phenopackets/phenopackettools/command/BasePTCommandTest.java +++ b/phenopacket-tools-cli/src/test/java/org/phenopackets/phenopackettools/cli/command/BaseCommandTest.java @@ -1,11 +1,11 @@ -package org.phenopackets.phenopackettools.command; +package org.phenopackets.phenopackettools.cli.command; import org.junit.jupiter.api.Test; import static org.hamcrest.MatcherAssert.*; import static org.hamcrest.Matchers.*; -public class BasePTCommandTest { +public class BaseCommandTest { @Test public void markIsSupportedForStdin() { diff --git a/phenopacket-tools-converter/pom.xml b/phenopacket-tools-converter/pom.xml index c8628e2a..2d48914e 100644 --- a/phenopacket-tools-converter/pom.xml +++ b/phenopacket-tools-converter/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.6 + 0.4.7 phenopacket-tools-converter @@ -22,10 +22,6 @@ org.phenopackets phenopacket-schema - - com.google.protobuf - protobuf-java - org.phenopackets.phenopackettools diff --git a/phenopacket-tools-converter/src/main/java/module-info.java b/phenopacket-tools-converter/src/main/java/module-info.java index 1eed0a0f..5d4a9fce 100644 --- a/phenopacket-tools-converter/src/main/java/module-info.java +++ b/phenopacket-tools-converter/src/main/java/module-info.java @@ -1,6 +1,11 @@ +/** + * A module for converting between the {@link org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion}s. + */ module org.phenopackets.phenopackettools.converter { requires transitive org.phenopackets.schema; + requires org.phenopackets.phenopackettools.core; requires org.phenopackets.phenopackettools.builder; + requires org.slf4j; exports org.phenopackets.phenopackettools.converter.converters; } \ No newline at end of file diff --git a/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2Converter.java b/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2Converter.java index adafb39e..397d6e29 100644 --- a/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2Converter.java +++ b/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2Converter.java @@ -12,7 +12,7 @@ * assuming all {@link org.phenopackets.schema.v1.core.Variant}s are * {@link org.phenopackets.schema.v2.core.GenomicInterpretation.InterpretationStatus#CAUSATIVE}. For this to work, * there must be exactly one {@link org.phenopackets.schema.v1.core.Disease} in the phenopacket, otherwise - * a {@link org.phenopackets.phenopackettools.builder.exceptions.PhenotoolsRuntimeException} is thrown. + * a {@link org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException} is thrown. */ public interface V1ToV2Converter { diff --git a/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2ConverterImpl.java b/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2ConverterImpl.java index 7d9b9cd5..14934cfe 100644 --- a/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2ConverterImpl.java +++ b/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2ConverterImpl.java @@ -2,13 +2,15 @@ import org.ga4gh.vrsatile.v1.VariationDescriptor; import org.phenopackets.phenopackettools.builder.builders.*; -import org.phenopackets.phenopackettools.builder.exceptions.PhenotoolsRuntimeException; +import org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException; import org.phenopackets.schema.v1.core.Variant; import org.phenopackets.schema.v2.Cohort; import org.phenopackets.schema.v2.Family; import org.phenopackets.schema.v2.Phenopacket; import org.phenopackets.schema.v2.core.Interpretation; import org.phenopackets.schema.v2.core.OntologyClass; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.List; import java.util.function.Function; @@ -27,6 +29,8 @@ */ class V1ToV2ConverterImpl implements V1ToV2Converter { + private static final Logger LOGGER = LoggerFactory.getLogger(V1ToV2ConverterImpl.class); + private final boolean convertVariants; V1ToV2ConverterImpl(boolean convertVariants) { @@ -49,8 +53,11 @@ public Phenopacket convertPhenopacket(org.phenopackets.schema.v1.Phenopacket phe builder.addAllBiosamples(toBiosamples(phenopacket.getBiosamplesList())); } - if (convertVariants) - builder.addInterpretations(toV2Interpretation(phenopacket)); + if (convertVariants) { + Interpretation interpretation = toV2Interpretation(phenopacket); + if (!Interpretation.getDefaultInstance().equals(interpretation)) + builder.addInterpretations(interpretation); + } if (phenopacket.getDiseasesCount() > 0) { builder.addAllDiseases(toDiseases(phenopacket.getDiseasesList())); @@ -123,7 +130,16 @@ private static Interpretation toV2Interpretation(org.phenopackets.schema.v1.Phen so we will use the v1 phenopacket id for the interpretation id. */ if (v1.getDiseasesCount() != 1) { - throw new PhenotoolsRuntimeException("Can only convert variants if there is exactly one disease in v1 phenopacket!"); + if (v1.getVariantsCount() == 0) { + // If there are no variants then we do not care about having exactly one disease. + // We can still create a meaningful phenopacket, however, this may be not what the user intended, + // and we'll warn. + LOGGER.warn("Unable to convert disease and variant data since there are no variants in phenopacket '{}'", v1.getId()); + return Interpretation.getDefaultInstance(); + } else { + // Non-empty variant list but not a single disease, we throw. + throw new PhenopacketToolsRuntimeException("Can only convert variants if there is exactly one disease in v1 phenopacket!"); + } } var v1disease = v1.getDiseases(0); @@ -190,7 +206,7 @@ private static Function toVariationDescriptor() { .build(); } // cannot ever happen, but if it does... - case ALLELE_NOT_SET -> throw new PhenotoolsRuntimeException("Did not recognize variant type"); + case ALLELE_NOT_SET -> throw new PhenopacketToolsRuntimeException("Did not recognize variant type"); }; }; } diff --git a/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/package-info.java b/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/package-info.java new file mode 100644 index 00000000..144d0c37 --- /dev/null +++ b/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/package-info.java @@ -0,0 +1,6 @@ +/** + * The package provides a {@link org.phenopackets.phenopackettools.converter.converters.V1ToV2Converter} to convert + * from {@link org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion#V1} + * to {@link org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion#V2}. + */ +package org.phenopackets.phenopackettools.converter.converters; \ No newline at end of file diff --git a/phenopacket-tools-core/pom.xml b/phenopacket-tools-core/pom.xml new file mode 100644 index 00000000..46f0eab8 --- /dev/null +++ b/phenopacket-tools-core/pom.xml @@ -0,0 +1,14 @@ + + + 4.0.0 + + phenopacket-tools + org.phenopackets.phenopackettools + 0.4.7 + + + phenopacket-tools-core + + \ No newline at end of file diff --git a/phenopacket-tools-core/src/main/java/module-info.java b/phenopacket-tools-core/src/main/java/module-info.java new file mode 100644 index 00000000..ddf4c946 --- /dev/null +++ b/phenopacket-tools-core/src/main/java/module-info.java @@ -0,0 +1,6 @@ +/** + * The module defines core concepts shared by (almost) all modules of phenopacket-tools. + */ +module org.phenopackets.phenopackettools.core { + exports org.phenopackets.phenopackettools.core; +} \ No newline at end of file diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketElement.java b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketElement.java similarity index 95% rename from phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketElement.java rename to phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketElement.java index 852c8ddc..27d5cf62 100644 --- a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketElement.java +++ b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketElement.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.util.format; +package org.phenopackets.phenopackettools.core; import java.util.Arrays; import java.util.stream.Collectors; diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketFormat.java b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketFormat.java similarity index 96% rename from phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketFormat.java rename to phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketFormat.java index d9020991..3db508a4 100644 --- a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketFormat.java +++ b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketFormat.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.util.format; +package org.phenopackets.phenopackettools.core; import java.util.Arrays; import java.util.stream.Collectors; diff --git a/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketSchemaVersion.java b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketSchemaVersion.java new file mode 100644 index 00000000..6fa23845 --- /dev/null +++ b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketSchemaVersion.java @@ -0,0 +1,18 @@ +package org.phenopackets.phenopackettools.core; + +/** + * An enum with currently supported Phenopacket schema versions. + */ +public enum PhenopacketSchemaVersion { + + /** + * The version 1 of the GA4GH Phenopacket schema released in 2019 to elicit community response. + * The {@code V1} has been deprecated in favor of {@link #V2}. + */ + V1, + /** + * The version 2 of the GA4GH Phenopacket schema. This is the current version. + */ + V2 + +} diff --git a/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketToolsException.java b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketToolsException.java new file mode 100644 index 00000000..8f1f664a --- /dev/null +++ b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketToolsException.java @@ -0,0 +1,28 @@ +package org.phenopackets.phenopackettools.core; + +/** + * Base checked exception thrown by phenopacket-tools. + */ +public class PhenopacketToolsException extends Exception { + + public PhenopacketToolsException() { + super(); + } + + public PhenopacketToolsException(String message) { + super(message); + } + + public PhenopacketToolsException(String message, Throwable cause) { + super(message, cause); + } + + public PhenopacketToolsException(Throwable cause) { + super(cause); + } + + protected PhenopacketToolsException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } + +} diff --git a/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketToolsRuntimeException.java b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketToolsRuntimeException.java new file mode 100644 index 00000000..b67798c5 --- /dev/null +++ b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketToolsRuntimeException.java @@ -0,0 +1,28 @@ +package org.phenopackets.phenopackettools.core; + +/** + * Base unchecked exception thrown by phenopacket-tools. + */ +public class PhenopacketToolsRuntimeException extends RuntimeException { + + public PhenopacketToolsRuntimeException() { + super(); + } + + public PhenopacketToolsRuntimeException(String message) { + super(message); + } + + public PhenopacketToolsRuntimeException(String message, Throwable cause) { + super(message, cause); + } + + public PhenopacketToolsRuntimeException(Throwable cause) { + super(cause); + } + + protected PhenopacketToolsRuntimeException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } + +} diff --git a/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/package-info.java b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/package-info.java new file mode 100644 index 00000000..0aa8cba5 --- /dev/null +++ b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/package-info.java @@ -0,0 +1,14 @@ +/** + * A package with constants and types used across the entire application, including the base exception classes. + *

+ * The package contains the base checked exception {@link org.phenopackets.phenopackettools.core.PhenopacketToolsException} + * and unchecked exception {@link org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException}. + *

+ * Several useful enumerations complete the circle: + *

    + *
  • {@link org.phenopackets.phenopackettools.core.PhenopacketElement}
  • + *
  • {@link org.phenopackets.phenopackettools.core.PhenopacketFormat}
  • + *
  • {@link org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion}
  • + *
+ */ +package org.phenopackets.phenopackettools.core; \ No newline at end of file diff --git a/phenopacket-tools-io/pom.xml b/phenopacket-tools-io/pom.xml new file mode 100644 index 00000000..3768f2eb --- /dev/null +++ b/phenopacket-tools-io/pom.xml @@ -0,0 +1,49 @@ + + + 4.0.0 + + phenopacket-tools + org.phenopackets.phenopackettools + 0.4.7 + + + phenopacket-tools-io + + + + org.phenopackets.phenopackettools + phenopacket-tools-util + ${project.parent.version} + + + org.phenopackets + phenopacket-schema + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.dataformat + jackson-dataformat-yaml + + + com.google.protobuf + protobuf-java + + + com.google.protobuf + protobuf-java-util + + + + org.phenopackets.phenopackettools + phenopacket-tools-test + ${project.parent.version} + test + + + + \ No newline at end of file diff --git a/phenopacket-tools-io/src/main/java/module-info.java b/phenopacket-tools-io/src/main/java/module-info.java new file mode 100644 index 00000000..d5534857 --- /dev/null +++ b/phenopacket-tools-io/src/main/java/module-info.java @@ -0,0 +1,15 @@ +/** + * A module for reading and writing top-level elements of Phenopacket Schema. + */ +module org.phenopackets.phenopackettools.io { + requires org.phenopackets.phenopackettools.util; + + requires org.phenopackets.schema; + requires com.google.protobuf; + requires com.google.protobuf.util; + requires com.fasterxml.jackson.databind; + requires com.fasterxml.jackson.dataformat.yaml; + requires org.slf4j; + + exports org.phenopackets.phenopackettools.io; +} \ No newline at end of file diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/JsonPrinter.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/JsonPrinter.java new file mode 100644 index 00000000..3e6825d6 --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/JsonPrinter.java @@ -0,0 +1,30 @@ +package org.phenopackets.phenopackettools.io; + +import com.google.protobuf.Message; +import com.google.protobuf.util.JsonFormat; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; + +class JsonPrinter implements PhenopacketPrinter { + + private static final JsonFormat.Printer PRINTER = JsonFormat.printer(); + + private static final JsonPrinter INSTANCE = new JsonPrinter(); + + static JsonPrinter getInstance() { + return INSTANCE; + } + + private JsonPrinter() { + } + + @Override + public void print(Message message, OutputStream os) throws IOException { + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(os)); + PRINTER.appendTo(message, writer); + writer.flush(); + } +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinter.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinter.java new file mode 100644 index 00000000..9ebc7a0b --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinter.java @@ -0,0 +1,47 @@ +package org.phenopackets.phenopackettools.io; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLGenerator; +import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; +import com.google.protobuf.Message; +import com.google.protobuf.MessageOrBuilder; +import com.google.protobuf.util.JsonFormat; + +import java.io.IOException; +import java.io.OutputStream; + +/** + * A naive implementation of YAML printer that first prints the {@link MessageOrBuilder} into a JSON string, + * then decodes the string into {@link JsonNode} and prints as YAML document. + *

+ * This is, of course, not efficient. However, it works OK as a prototype printer. + */ +class NaiveYamlPrinter implements PhenopacketPrinter { + + private static final JsonFormat.Printer PB_PRINTER = JsonFormat.printer(); + + private static final NaiveYamlPrinter INSTANCE = new NaiveYamlPrinter(); + + static NaiveYamlPrinter getInstance() { + return INSTANCE; + } + + private final ObjectMapper jsonMapper; + private final ObjectMapper yamlMapper; + + private NaiveYamlPrinter() { + jsonMapper = new ObjectMapper(); + yamlMapper = YAMLMapper.builder() + .disable(YAMLGenerator.Feature.WRITE_DOC_START_MARKER) + .build(); + } + + @Override + public void print(Message message, OutputStream os) throws IOException { + String jsonString = PB_PRINTER.print(message); + JsonNode jsonNode = jsonMapper.readTree(jsonString); + yamlMapper.writeValue(os, jsonNode); + } + +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParser.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParser.java new file mode 100644 index 00000000..98a4452e --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParser.java @@ -0,0 +1,79 @@ +package org.phenopackets.phenopackettools.io; + +import com.google.protobuf.Message; +import org.phenopackets.phenopackettools.util.format.FormatSniffer; +import org.phenopackets.phenopackettools.core.PhenopacketElement; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; +import org.phenopackets.phenopackettools.util.format.SniffException; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; + +public interface PhenopacketParser { + + Message parse(PhenopacketFormat format, PhenopacketElement element, InputStream is) throws IOException; + + default Message parse(PhenopacketFormat format, PhenopacketElement element, Path path) throws IOException { + try (InputStream is = openInputStream(path)) { + return parse(format, element, is); + } + } + + /* ******************************************* CONVENIENCE METHODS ******************************************* */ + + // We need to detect the element. + + default Message parse(PhenopacketFormat format, InputStream is) throws IOException { + PhenopacketElement element = sniffElement(is); + return parse(format, element, is); + } + + default Message parse(PhenopacketFormat format, Path path) throws IOException { + try (InputStream is = openInputStream(path)) { + return parse(format, is); + } + } + + // We need to detect the format. + + default Message parse(PhenopacketElement element, InputStream is) throws IOException, SniffException { + PhenopacketFormat format = sniffFormat(is); + return parse(format, element, is); + } + + default Message parse(PhenopacketElement element, Path path) throws IOException, SniffException { + try (InputStream is = openInputStream(path)) { + return parse(element, is); + } + } + + // We need to detect both the format and the element. + + default Message parse(InputStream is) throws IOException, SniffException { + PhenopacketFormat format = sniffFormat(is); + return parse(format, is); + } + + default Message parse(Path path) throws IOException, SniffException { + try (InputStream is = openInputStream(path)) { + return parse(is); + } + } + + /* ******************************************* UTILITY METHODS ******************************************* */ + + private static PhenopacketElement sniffElement(InputStream is) { + return PhenopacketElement.PHENOPACKET; // TODO - implement + } + + private static PhenopacketFormat sniffFormat(InputStream is) throws SniffException, IOException { + return FormatSniffer.sniff(is); + } + + private static BufferedInputStream openInputStream(Path path) throws IOException { + return new BufferedInputStream(Files.newInputStream(path)); + } +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactory.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactory.java new file mode 100644 index 00000000..7c29ba7c --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactory.java @@ -0,0 +1,19 @@ +package org.phenopackets.phenopackettools.io; + +import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; + +public interface PhenopacketParserFactory { + + static PhenopacketParserFactory getInstance() { + return PhenopacketParserFactoryImpl.INSTANCE; + } + + /** + * Get a {@link PhenopacketParser} to parse phenopacket with given {@link PhenopacketSchemaVersion}. + * + * @throws PhenopacketParserFactoryException if a {@link PhenopacketParser} for the given {@code version} + * is not available + */ + PhenopacketParser forFormat(PhenopacketSchemaVersion version) throws PhenopacketParserFactoryException; + +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactoryException.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactoryException.java new file mode 100644 index 00000000..d030dd23 --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactoryException.java @@ -0,0 +1,26 @@ +package org.phenopackets.phenopackettools.io; + +import org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException; + +public class PhenopacketParserFactoryException extends PhenopacketToolsRuntimeException { + + public PhenopacketParserFactoryException() { + super(); + } + + public PhenopacketParserFactoryException(String message) { + super(message); + } + + public PhenopacketParserFactoryException(String message, Throwable cause) { + super(message, cause); + } + + public PhenopacketParserFactoryException(Throwable cause) { + super(cause); + } + + protected PhenopacketParserFactoryException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactoryImpl.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactoryImpl.java new file mode 100644 index 00000000..e24654a1 --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactoryImpl.java @@ -0,0 +1,19 @@ +package org.phenopackets.phenopackettools.io; + +import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; +import org.phenopackets.phenopackettools.io.v1.V1PhenopacketParser; +import org.phenopackets.phenopackettools.io.v2.V2PhenopacketParser; + +class PhenopacketParserFactoryImpl implements PhenopacketParserFactory { + + static final PhenopacketParserFactoryImpl INSTANCE = new PhenopacketParserFactoryImpl(); + + @Override + public PhenopacketParser forFormat(PhenopacketSchemaVersion version) throws PhenopacketParserFactoryException { + return switch (version) { + case V1 -> V1PhenopacketParser.INSTANCE; + case V2 -> V2PhenopacketParser.INSTANCE; + }; + } + +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinter.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinter.java new file mode 100644 index 00000000..878f44fc --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinter.java @@ -0,0 +1,23 @@ +package org.phenopackets.phenopackettools.io; + +import com.google.protobuf.Message; + +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; + +/** + * The implementors can serialize a top-level element of Phenopacket schema into provided {@link OutputStream}. + */ +public interface PhenopacketPrinter { + + void print(Message message, OutputStream os) throws IOException; + + default void print(Message message, Path output) throws IOException { + try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(output))) { + print(message, os); + } + } +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactory.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactory.java new file mode 100644 index 00000000..05a68c01 --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactory.java @@ -0,0 +1,19 @@ +package org.phenopackets.phenopackettools.io; + +import org.phenopackets.phenopackettools.core.PhenopacketFormat; +import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; + +/** + * The implementors provide {@link PhenopacketPrinter}s for serializing top-level phenopacket elements + * into {@link PhenopacketFormat} using {@link PhenopacketSchemaVersion}. + */ +public interface PhenopacketPrinterFactory { + + static PhenopacketPrinterFactory getInstance() { + return PhenopacketPrinterFactoryImpl.INSTANCE; + } + + PhenopacketPrinter forFormat(PhenopacketSchemaVersion schemaVersion, + PhenopacketFormat format) throws PhenopacketPrinterFactoryException; + +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactoryException.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactoryException.java new file mode 100644 index 00000000..225983cf --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactoryException.java @@ -0,0 +1,26 @@ +package org.phenopackets.phenopackettools.io; + +import org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException; + +public class PhenopacketPrinterFactoryException extends PhenopacketToolsRuntimeException { + + public PhenopacketPrinterFactoryException() { + super(); + } + + public PhenopacketPrinterFactoryException(String message) { + super(message); + } + + public PhenopacketPrinterFactoryException(String message, Throwable cause) { + super(message, cause); + } + + public PhenopacketPrinterFactoryException(Throwable cause) { + super(cause); + } + + protected PhenopacketPrinterFactoryException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactoryImpl.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactoryImpl.java new file mode 100644 index 00000000..55736f6f --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactoryImpl.java @@ -0,0 +1,20 @@ +package org.phenopackets.phenopackettools.io; + +import com.google.protobuf.Message; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; +import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; + +class PhenopacketPrinterFactoryImpl implements PhenopacketPrinterFactory { + + static final PhenopacketPrinterFactoryImpl INSTANCE = new PhenopacketPrinterFactoryImpl(); + + @Override + public PhenopacketPrinter forFormat(PhenopacketSchemaVersion schemaVersion, PhenopacketFormat format) throws PhenopacketPrinterFactoryException { + return switch (format) { + case PROTOBUF -> Message::writeTo; + case JSON -> JsonPrinter.getInstance(); + case YAML -> NaiveYamlPrinter.getInstance(); + }; + } + +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/BasePhenopacketParser.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/BasePhenopacketParser.java new file mode 100644 index 00000000..4383228d --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/BasePhenopacketParser.java @@ -0,0 +1,55 @@ +package org.phenopackets.phenopackettools.io.base; + +import com.google.protobuf.Message; +import com.google.protobuf.util.JsonFormat; +import org.phenopackets.phenopackettools.io.PhenopacketParser; +import org.phenopackets.phenopackettools.core.PhenopacketElement; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; + +public abstract class BasePhenopacketParser implements PhenopacketParser { + + private static final Logger LOGGER = LoggerFactory.getLogger(BasePhenopacketParser.class); + + @Override + public Message parse(PhenopacketFormat format, PhenopacketElement element, InputStream is) throws IOException { + return switch (format) { + case PROTOBUF -> { + LOGGER.debug("Reading protobuf message"); + yield readProtobufMessage(element, is); + } + case JSON -> { + LOGGER.debug("Reading JSON message"); + yield readJsonMessage(element, is); + } + case YAML -> { + LOGGER.debug("Reading YAML message"); + yield readYamlMessage(element, is); + } + }; + } + + protected abstract Message readProtobufMessage(PhenopacketElement element, InputStream is) throws IOException; + + private Message readJsonMessage(PhenopacketElement element, InputStream is) throws IOException { + // Not closing the BufferedReader as the InputStream should be closed. + BufferedReader reader = new BufferedReader(new InputStreamReader(is)); + Message.Builder builder = prepareBuilder(element); + JsonFormat.parser().merge(reader, builder); + return builder.build(); + } + + protected abstract Message.Builder prepareBuilder(PhenopacketElement element); + + private Message readYamlMessage(PhenopacketElement element, InputStream is) throws IOException { + Message.Builder builder = prepareBuilder(element); + NaiveYamlParser.INSTANCE.deserializeYamlMessage(is, builder); + return builder.build(); + } +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/NaiveYamlParser.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/NaiveYamlParser.java new file mode 100644 index 00000000..c84a0028 --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/NaiveYamlParser.java @@ -0,0 +1,33 @@ +package org.phenopackets.phenopackettools.io.base; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; +import com.google.protobuf.Message; +import com.google.protobuf.util.JsonFormat; + +import java.io.IOException; +import java.io.InputStream; + +/** + * A naive and inefficient implementation of YAML -> {@link Message} parsing that first maps YAML into JSON String + * and then decodes the JSON into {@link Message}. + */ +class NaiveYamlParser { + + private static final JsonFormat.Parser JSON_PARSER = JsonFormat.parser(); + + static final NaiveYamlParser INSTANCE = new NaiveYamlParser(); + private final ObjectMapper yamlMapper; + private final ObjectMapper jsonMapper; + private NaiveYamlParser() { + yamlMapper = new YAMLMapper(); + jsonMapper = new ObjectMapper(); + } + + void deserializeYamlMessage(InputStream is, Message.Builder builder) throws IOException { + JsonNode node = yamlMapper.readTree(is); + String jsonString = jsonMapper.writeValueAsString(node); + JSON_PARSER.merge(jsonString, builder); + } +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/package-info.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/package-info.java new file mode 100644 index 00000000..9d18e4ae --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/package-info.java @@ -0,0 +1,12 @@ +/** + * The {@code org.phenopackets.phenopackettools.io} package offers functionality for reading and writing + * top-level elements of Phenopacket Schema. The elements can be (de)serialized in any of the supported + * {@link org.phenopackets.phenopackettools.core.PhenopacketFormat}s. + *

+ * The {@link org.phenopackets.phenopackettools.io.PhenopacketParserFactory} + * provides {@link org.phenopackets.phenopackettools.io.PhenopacketParser} for reading the schema elements. + *

+ * Use {@link org.phenopackets.phenopackettools.io.PhenopacketPrinterFactory} to get + * {@link org.phenopackets.phenopackettools.io.PhenopacketPrinter} for writing a top-level schema element. + */ +package org.phenopackets.phenopackettools.io; \ No newline at end of file diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/v1/V1PhenopacketParser.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/v1/V1PhenopacketParser.java new file mode 100644 index 00000000..3d4ab64b --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/v1/V1PhenopacketParser.java @@ -0,0 +1,35 @@ +package org.phenopackets.phenopackettools.io.v1; + +import com.google.protobuf.Message; +import org.phenopackets.phenopackettools.io.base.BasePhenopacketParser; +import org.phenopackets.phenopackettools.core.PhenopacketElement; +import org.phenopackets.schema.v1.Cohort; +import org.phenopackets.schema.v1.Family; +import org.phenopackets.schema.v1.Phenopacket; + +import java.io.IOException; +import java.io.InputStream; + +public class V1PhenopacketParser extends BasePhenopacketParser { + + public static final V1PhenopacketParser INSTANCE = new V1PhenopacketParser(); + + @Override + protected Message readProtobufMessage(PhenopacketElement element, InputStream is) throws IOException { + return switch (element) { + case PHENOPACKET -> Phenopacket.parseFrom(is); + case FAMILY -> Family.parseFrom(is); + case COHORT -> Cohort.parseFrom(is); + }; + } + + @Override + protected Message.Builder prepareBuilder(PhenopacketElement element) { + return switch (element) { + case PHENOPACKET -> Phenopacket.newBuilder(); + case FAMILY -> Family.newBuilder(); + case COHORT -> Cohort.newBuilder(); + }; + } + +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/v2/V2PhenopacketParser.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/v2/V2PhenopacketParser.java new file mode 100644 index 00000000..1ace731d --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/v2/V2PhenopacketParser.java @@ -0,0 +1,34 @@ +package org.phenopackets.phenopackettools.io.v2; + +import com.google.protobuf.Message; +import org.phenopackets.phenopackettools.io.base.BasePhenopacketParser; +import org.phenopackets.phenopackettools.core.PhenopacketElement; +import org.phenopackets.schema.v2.Cohort; +import org.phenopackets.schema.v2.Family; +import org.phenopackets.schema.v2.Phenopacket; + +import java.io.IOException; +import java.io.InputStream; + +public class V2PhenopacketParser extends BasePhenopacketParser { + + public static final V2PhenopacketParser INSTANCE = new V2PhenopacketParser(); + + @Override + protected Message readProtobufMessage(PhenopacketElement element, InputStream is) throws IOException { + return switch (element) { + case PHENOPACKET -> Phenopacket.parseFrom(is); + case FAMILY -> Family.parseFrom(is); + case COHORT -> Cohort.parseFrom(is); + }; + } + + @Override + protected Message.Builder prepareBuilder(PhenopacketElement element) { + return switch (element) { + case PHENOPACKET -> Phenopacket.newBuilder(); + case FAMILY -> Family.newBuilder(); + case COHORT -> Cohort.newBuilder(); + }; + } +} diff --git a/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinterTest.java b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinterTest.java new file mode 100644 index 00000000..55dfc1d0 --- /dev/null +++ b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinterTest.java @@ -0,0 +1,35 @@ +package org.phenopackets.phenopackettools.io; + +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.phenopackets.phenopackettools.test.TestData; +import org.phenopackets.schema.v1.Cohort; +import org.phenopackets.schema.v1.Family; +import org.phenopackets.schema.v1.Phenopacket; + +import java.nio.file.Path; + +@Disabled +public class NaiveYamlPrinterTest { + + private final NaiveYamlPrinter printer = NaiveYamlPrinter.getInstance(); + + @Test + public void printPhenopacket() throws Exception { + Phenopacket pp = TestData.V1.comprehensivePhenopacket(); + printer.print(pp, Path.of("phenopacket.v1.yaml")); + } + + @Test + public void printFamily() throws Exception { + Family pp = TestData.V1.comprehensiveFamily(); + printer.print(pp, Path.of("family.v1.yaml")); + } + + @Test + public void printCohort() throws Exception { + Cohort pp = TestData.V1.comprehensiveCohort(); + printer.print(pp, Path.of("cohort.v1.yaml")); + } + +} \ No newline at end of file diff --git a/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactoryImplTest.java b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactoryImplTest.java new file mode 100644 index 00000000..f7415fa1 --- /dev/null +++ b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactoryImplTest.java @@ -0,0 +1,30 @@ +package org.phenopackets.phenopackettools.io; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; + +import static org.hamcrest.MatcherAssert.*; +import static org.hamcrest.Matchers.*; + +public class PhenopacketParserFactoryImplTest { + + private PhenopacketParserFactoryImpl parserFactory; + + @BeforeEach + public void setUp() { + parserFactory = PhenopacketParserFactoryImpl.INSTANCE; + } + + @ParameterizedTest + @CsvSource({ + "V1", + "V2" + }) + public void weHaveAParserForAllSchemaVersions(PhenopacketSchemaVersion version) { + PhenopacketParser parser = parserFactory.forFormat(version); + assertThat(parser, is(notNullValue())); + } + +} \ No newline at end of file diff --git a/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/TestBase.java b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/TestBase.java new file mode 100644 index 00000000..f123327d --- /dev/null +++ b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/TestBase.java @@ -0,0 +1,9 @@ +package org.phenopackets.phenopackettools.io; + +import java.nio.file.Path; + +public class TestBase { + + public static final Path BASE_DIR = Path.of("src/test/resources/org/phenopackets/phenopackettools/io"); + +} diff --git a/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/v1/V1PhenopacketParserTest.java b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/v1/V1PhenopacketParserTest.java new file mode 100644 index 00000000..86fb9ed6 --- /dev/null +++ b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/v1/V1PhenopacketParserTest.java @@ -0,0 +1,58 @@ +package org.phenopackets.phenopackettools.io.v1; + +import com.google.protobuf.Message; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.phenopackets.phenopackettools.io.PhenopacketParser; +import org.phenopackets.phenopackettools.io.TestBase; +import org.phenopackets.phenopackettools.core.PhenopacketElement; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; +import org.phenopackets.schema.v1.Cohort; +import org.phenopackets.schema.v1.Family; +import org.phenopackets.schema.v1.Phenopacket; + +import java.nio.file.Path; + +import static org.hamcrest.MatcherAssert.*; +import static org.hamcrest.Matchers.*; + +public class V1PhenopacketParserTest { + + private static final Path BASE = TestBase.BASE_DIR.resolve("v1"); + + private PhenopacketParser parser; + + @BeforeEach + public void setUp() { + parser = V1PhenopacketParser.INSTANCE; + } + + @ParameterizedTest + @CsvSource({ + "PROTOBUF, PHENOPACKET, phenopacket.pb", + "PROTOBUF, FAMILY, family.pb", + "PROTOBUF, COHORT, cohort.pb", + " JSON, PHENOPACKET, phenopacket.json", + " JSON, FAMILY, family.json", + " JSON, COHORT, cohort.json", + " YAML, PHENOPACKET, phenopacket.yaml", + " YAML, FAMILY, family.yaml", + " YAML, COHORT, cohort.yaml", + }) + public void weGetExpectedClassForGivenFormatAndElement(PhenopacketFormat format, + PhenopacketElement element, + String fileName) throws Exception { + Message message = parser.parse(format, element, BASE.resolve(fileName)); + + assertThat(message, is(instanceOf(getClassForPhenopacketElement(element)))); + } + + private static Class getClassForPhenopacketElement(PhenopacketElement element) { + return switch (element) { + case PHENOPACKET -> Phenopacket.class; + case FAMILY -> Family.class; + case COHORT -> Cohort.class; + }; + } +} diff --git a/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/v2/V2PhenopacketParserTest.java b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/v2/V2PhenopacketParserTest.java new file mode 100644 index 00000000..41ed246d --- /dev/null +++ b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/v2/V2PhenopacketParserTest.java @@ -0,0 +1,58 @@ +package org.phenopackets.phenopackettools.io.v2; + +import com.google.protobuf.Message; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.phenopackets.phenopackettools.io.PhenopacketParser; +import org.phenopackets.phenopackettools.io.TestBase; +import org.phenopackets.phenopackettools.core.PhenopacketElement; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; +import org.phenopackets.schema.v2.Cohort; +import org.phenopackets.schema.v2.Family; +import org.phenopackets.schema.v2.Phenopacket; + +import java.nio.file.Path; + +import static org.hamcrest.MatcherAssert.*; +import static org.hamcrest.Matchers.*; + +public class V2PhenopacketParserTest { + + private static final Path BASE = TestBase.BASE_DIR.resolve("v2"); + + private PhenopacketParser parser; + + @BeforeEach + public void setUp() { + parser = V2PhenopacketParser.INSTANCE; + } + + @ParameterizedTest + @CsvSource({ + "PROTOBUF, PHENOPACKET, phenopacket.pb", + "PROTOBUF, FAMILY, family.pb", + "PROTOBUF, COHORT, cohort.pb", + " JSON, PHENOPACKET, phenopacket.json", + " JSON, FAMILY, family.json", + " JSON, COHORT, cohort.json", + " YAML, PHENOPACKET, phenopacket.yaml", + " YAML, FAMILY, family.yaml", + " YAML, COHORT, cohort.yaml", + }) + public void weGetExpectedClassForGivenFormatAndElement(PhenopacketFormat format, + PhenopacketElement element, + String fileName) throws Exception { + Message message = parser.parse(format, element, BASE.resolve(fileName)); + + assertThat(message, is(instanceOf(getClassForPhenopacketElement(element)))); + } + + private static Class getClassForPhenopacketElement(PhenopacketElement element) { + return switch (element) { + case PHENOPACKET -> Phenopacket.class; + case FAMILY -> Family.class; + case COHORT -> Cohort.class; + }; + } +} \ No newline at end of file diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/README.md b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/README.md new file mode 100644 index 00000000..85c3c57c --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/README.md @@ -0,0 +1,8 @@ +# README + +The files in this folder correspond to comprehensive, albeit medically invalid, phenopacket elements: +- phenopacket +- family, or +- cohort. + +The content corresponds to the output of `TestData.V1.comprehensive*()` as of Oct 27th, 2022. diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/cohort.json b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/cohort.json new file mode 100644 index 00000000..17ab00cb --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/cohort.json @@ -0,0 +1,251 @@ +{ + "id": "comprehensive-cohort-id", + "description": "A description of the example cohort.", + "members": [{ + "id": "comprehensive-phenopacket-id", + "subject": { + "id": "14 year-old boy", + "alternateIds": ["boy", "patient", "proband"], + "dateOfBirth": "1970-01-02T10:17:36.000000100Z", + "ageAtCollection": { + "age": "P14Y" + }, + "sex": "MALE", + "karyotypicSex": "XY", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001558", + "label": "Decreased fetal movement" + }, + "classOfOnset": { + "id": "HP:0011461", + "label": "Fetal onset" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0031910", + "label": "Abnormal cranial nerve physiology" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0011463", + "label": "Macroscopic hematuria" + }, + "modifiers": [{ + "id": "HP:0031796", + "label": "Recurrent" + }], + "ageOfOnset": { + "age": "P14Y" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0001270", + "label": "Motor delay" + }, + "severity": { + "id": "HP:0012825", + "label": "Mild" + }, + "classOfOnset": { + "id": "HP:0011463", + "label": "Childhood onset" + } + }], + "biosamples": [{ + "id": "biosample-id", + "individualId": "14 year-old boy", + "description": "Muscle biopsy of 14 year-old boy", + "sampledTissue": { + "id": "UBERON:0003403", + "label": "skin of forearm" + }, + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + }, + "ageOfIndividualAtCollection": { + "age": "P14Y" + }, + "histologicalDiagnosis": { + "id": "NCIT:C38757", + "label": "Negative Finding" + }, + "tumorProgression": { + "id": "NCIT:C3677", + "label": "Benign Neoplasm" + }, + "tumorGrade": { + "id": "NCIT:C28076", + "label": "Disease Grade Qualifier" + }, + "diagnosticMarkers": [{ + "id": "NCIT:C68748", + "label": "HER2/Neu Positive" + }] + }], + "genes": [{ + "id": "HGNC1:3688", + "symbol": "FGFR1" + }], + "variants": [{ + "hgvsAllele": { + "hgvs": "NM_001848.2:c.877G\u003eA" + }, + "zygosity": { + "id": "GENO:0000135", + "label": "heterozygous" + } + }], + "diseases": [{ + "term": { + "id": "OMIM:101600", + "label": "PFEIFFER SYNDROME" + }, + "classOfOnset": { + "id": "HP:0003577", + "label": "Congenital onset" + } + }], + "htsFiles": [{ + "uri": "file://data/genomes/P000001C", + "description": "Whole genome sequencing VCF output", + "htsFormat": "VCF", + "genomeAssembly": "GRCh38.p13", + "individualToSampleIdentifiers": { + "14 year-old boy": "P000001C" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "1.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } + }, { + "subject": { + "id": "MOTHER", + "sex": "FEMALE" + } + }, { + "subject": { + "id": "FATHER", + "sex": "MALE" + } + }], + "htsFiles": [{ + "uri": "file://data/genomes/FAM000001", + "description": "Whole genome sequencing VCF output", + "htsFormat": "VCF", + "genomeAssembly": "GRCh38.p13", + "individualToSampleIdentifiers": { + "14 year-old boy": "P000001C", + "MOTHER": "P000001M", + "FATHER": "P000001F" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "1.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } +} \ No newline at end of file diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/cohort.pb b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/cohort.pb new file mode 100644 index 00000000..5c57f5ee --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/cohort.pb @@ -0,0 +1,80 @@ + +comprehensive-cohort-id$A description of the example cohort. +comprehensive-phenopacket-id\ +14 year-old boyboypatientprobandd" +P14Y08B +NCBITaxon:9606 homo sapiens& + +HP:0001558Decreased fetal movementJ + +HP:0011461 Fetal onsetR +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report./ + +HP:0031910!Abnormal cranial nerve physiologyR +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.# + +HP:0011463Macroscopic hematuria* + +HP:0031796 Recurrent2 +P14YR +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.N + +HP:0001270 Motor delay" + +HP:0012825MildJ + +HP:0011463Childhood onset" + biosample-id14 year-old boy Muscle biopsy of 14 year-old boy"! +UBERON:0003403skin of forearm2 +NCBITaxon:9606 homo sapiens: +P14YJ + NCIT:C38757Negative FindingR + +NCIT:C3677Benign NeoplasmZ& + NCIT:C28076Disease Grade Qualifierb + NCIT:C68748HER2/Neu Positive* + +HGNC1:3688FGFR126NM_001848.2:c.877G>A2 + GENO:0000135 heterozygous:B + + OMIM:101600PFEIFFER SYNDROME" + +HP:0003577Congenital onsetBm +file://data/genomes/P000001C"Whole genome sequencing VCF output" +GRCh38.p13* +14 year-old boyP000001CJ + +Peter R.PhenopacketLab"y +hphuman phenotype ontology%http://purl.obolibrary.org/obo/hp.owl" +2018-03-08*HP2"http://purl.obolibrary.org/obo/HP_"z +genoGenotype Ontology'http://purl.obolibrary.org/obo/geno.owl" +19-03-2018*GENO2$http://purl.obolibrary.org/obo/GENO_"< +pubmedPubMed*PMID2$https://www.ncbi.nlm.nih.gov/pubmed/"v +ncit NCI Thesaurus'http://purl.obolibrary.org/obo/ncit.owl" +20-03-2020*NCIT2$http://purl.obolibrary.org/obo/NCIT_21.0.0:e + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.  + +MOTHER0  + +FATHER0" +file://data/genomes/FAM000001"Whole genome sequencing VCF output" +GRCh38.p13* +14 year-old boyP000001C* +MOTHERP000001M* +FATHERP000001F* + +Peter R.PhenopacketLab"y +hphuman phenotype ontology%http://purl.obolibrary.org/obo/hp.owl" +2018-03-08*HP2"http://purl.obolibrary.org/obo/HP_"z +genoGenotype Ontology'http://purl.obolibrary.org/obo/geno.owl" +19-03-2018*GENO2$http://purl.obolibrary.org/obo/GENO_"< +pubmedPubMed*PMID2$https://www.ncbi.nlm.nih.gov/pubmed/"v +ncit NCI Thesaurus'http://purl.obolibrary.org/obo/ncit.owl" +20-03-2020*NCIT2$http://purl.obolibrary.org/obo/NCIT_21.0.0:e + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report. \ No newline at end of file diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/cohort.yaml b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/cohort.yaml new file mode 100644 index 00000000..5f9ac9c2 --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/cohort.yaml @@ -0,0 +1,196 @@ +id: "comprehensive-cohort-id" +description: "A description of the example cohort." +members: +- id: "comprehensive-phenopacket-id" + subject: + id: "14 year-old boy" + alternateIds: + - "boy" + - "patient" + - "proband" + dateOfBirth: "1970-01-02T10:17:36.000000100Z" + ageAtCollection: + age: "P14Y" + sex: "MALE" + karyotypicSex: "XY" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" + phenotypicFeatures: + - type: + id: "HP:0001558" + label: "Decreased fetal movement" + classOfOnset: + id: "HP:0011461" + label: "Fetal onset" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0031910" + label: "Abnormal cranial nerve physiology" + negated: true + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0011463" + label: "Macroscopic hematuria" + modifiers: + - id: "HP:0031796" + label: "Recurrent" + ageOfOnset: + age: "P14Y" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0001270" + label: "Motor delay" + severity: + id: "HP:0012825" + label: "Mild" + classOfOnset: + id: "HP:0011463" + label: "Childhood onset" + biosamples: + - id: "biosample-id" + individualId: "14 year-old boy" + description: "Muscle biopsy of 14 year-old boy" + sampledTissue: + id: "UBERON:0003403" + label: "skin of forearm" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" + ageOfIndividualAtCollection: + age: "P14Y" + histologicalDiagnosis: + id: "NCIT:C38757" + label: "Negative Finding" + tumorProgression: + id: "NCIT:C3677" + label: "Benign Neoplasm" + tumorGrade: + id: "NCIT:C28076" + label: "Disease Grade Qualifier" + diagnosticMarkers: + - id: "NCIT:C68748" + label: "HER2/Neu Positive" + genes: + - id: "HGNC1:3688" + symbol: "FGFR1" + variants: + - hgvsAllele: + hgvs: "NM_001848.2:c.877G>A" + zygosity: + id: "GENO:0000135" + label: "heterozygous" + diseases: + - term: + id: "OMIM:101600" + label: "PFEIFFER SYNDROME" + classOfOnset: + id: "HP:0003577" + label: "Congenital onset" + htsFiles: + - uri: "file://data/genomes/P000001C" + description: "Whole genome sequencing VCF output" + htsFormat: "VCF" + genomeAssembly: "GRCh38.p13" + individualToSampleIdentifiers: + "14 year-old boy": "P000001C" + metaData: + created: "2022-10-03T16:39:04.000123456Z" + createdBy: "Peter R." + submittedBy: "PhenopacketLab" + resources: + - id: "hp" + name: "human phenotype ontology" + url: "http://purl.obolibrary.org/obo/hp.owl" + version: "2018-03-08" + namespacePrefix: "HP" + iriPrefix: "http://purl.obolibrary.org/obo/HP_" + - id: "geno" + name: "Genotype Ontology" + url: "http://purl.obolibrary.org/obo/geno.owl" + version: "19-03-2018" + namespacePrefix: "GENO" + iriPrefix: "http://purl.obolibrary.org/obo/GENO_" + - id: "pubmed" + name: "PubMed" + namespacePrefix: "PMID" + iriPrefix: "https://www.ncbi.nlm.nih.gov/pubmed/" + - id: "ncit" + name: "NCI Thesaurus" + url: "http://purl.obolibrary.org/obo/ncit.owl" + version: "20-03-2020" + namespacePrefix: "NCIT" + iriPrefix: "http://purl.obolibrary.org/obo/NCIT_" + phenopacketSchemaVersion: "1.0.0" + externalReferences: + - id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." +- subject: + id: "MOTHER" + sex: "FEMALE" +- subject: + id: "FATHER" + sex: "MALE" +htsFiles: +- uri: "file://data/genomes/FAM000001" + description: "Whole genome sequencing VCF output" + htsFormat: "VCF" + genomeAssembly: "GRCh38.p13" + individualToSampleIdentifiers: + "14 year-old boy": "P000001C" + MOTHER: "P000001M" + FATHER: "P000001F" +metaData: + created: "2022-10-03T16:39:04.000123456Z" + createdBy: "Peter R." + submittedBy: "PhenopacketLab" + resources: + - id: "hp" + name: "human phenotype ontology" + url: "http://purl.obolibrary.org/obo/hp.owl" + version: "2018-03-08" + namespacePrefix: "HP" + iriPrefix: "http://purl.obolibrary.org/obo/HP_" + - id: "geno" + name: "Genotype Ontology" + url: "http://purl.obolibrary.org/obo/geno.owl" + version: "19-03-2018" + namespacePrefix: "GENO" + iriPrefix: "http://purl.obolibrary.org/obo/GENO_" + - id: "pubmed" + name: "PubMed" + namespacePrefix: "PMID" + iriPrefix: "https://www.ncbi.nlm.nih.gov/pubmed/" + - id: "ncit" + name: "NCI Thesaurus" + url: "http://purl.obolibrary.org/obo/ncit.owl" + version: "20-03-2020" + namespacePrefix: "NCIT" + iriPrefix: "http://purl.obolibrary.org/obo/NCIT_" + phenopacketSchemaVersion: "1.0.0" + externalReferences: + - id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/family.json b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/family.json new file mode 100644 index 00000000..b7ad2d73 --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/family.json @@ -0,0 +1,268 @@ +{ + "id": "comprehensive-family-id", + "proband": { + "id": "comprehensive-phenopacket-id", + "subject": { + "id": "14 year-old boy", + "alternateIds": ["boy", "patient", "proband"], + "dateOfBirth": "1970-01-02T10:17:36.000000100Z", + "ageAtCollection": { + "age": "P14Y" + }, + "sex": "MALE", + "karyotypicSex": "XY", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001558", + "label": "Decreased fetal movement" + }, + "classOfOnset": { + "id": "HP:0011461", + "label": "Fetal onset" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0031910", + "label": "Abnormal cranial nerve physiology" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0011463", + "label": "Macroscopic hematuria" + }, + "modifiers": [{ + "id": "HP:0031796", + "label": "Recurrent" + }], + "ageOfOnset": { + "age": "P14Y" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0001270", + "label": "Motor delay" + }, + "severity": { + "id": "HP:0012825", + "label": "Mild" + }, + "classOfOnset": { + "id": "HP:0011463", + "label": "Childhood onset" + } + }], + "biosamples": [{ + "id": "biosample-id", + "individualId": "14 year-old boy", + "description": "Muscle biopsy of 14 year-old boy", + "sampledTissue": { + "id": "UBERON:0003403", + "label": "skin of forearm" + }, + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + }, + "ageOfIndividualAtCollection": { + "age": "P14Y" + }, + "histologicalDiagnosis": { + "id": "NCIT:C38757", + "label": "Negative Finding" + }, + "tumorProgression": { + "id": "NCIT:C3677", + "label": "Benign Neoplasm" + }, + "tumorGrade": { + "id": "NCIT:C28076", + "label": "Disease Grade Qualifier" + }, + "diagnosticMarkers": [{ + "id": "NCIT:C68748", + "label": "HER2/Neu Positive" + }] + }], + "genes": [{ + "id": "HGNC1:3688", + "symbol": "FGFR1" + }], + "variants": [{ + "hgvsAllele": { + "hgvs": "NM_001848.2:c.877G\u003eA" + }, + "zygosity": { + "id": "GENO:0000135", + "label": "heterozygous" + } + }], + "diseases": [{ + "term": { + "id": "OMIM:101600", + "label": "PFEIFFER SYNDROME" + }, + "classOfOnset": { + "id": "HP:0003577", + "label": "Congenital onset" + } + }], + "htsFiles": [{ + "uri": "file://data/genomes/P000001C", + "description": "Whole genome sequencing VCF output", + "htsFormat": "VCF", + "genomeAssembly": "GRCh38.p13", + "individualToSampleIdentifiers": { + "14 year-old boy": "P000001C" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "1.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } + }, + "relatives": [{ + "subject": { + "id": "MOTHER", + "sex": "FEMALE" + } + }, { + "subject": { + "id": "FATHER", + "sex": "MALE" + } + }], + "pedigree": { + "persons": [{ + "individualId": "14 year-old boy", + "paternalId": "FATHER", + "maternalId": "MOTHER", + "sex": "MALE", + "affectedStatus": "AFFECTED" + }, { + "individualId": "MOTHER", + "sex": "FEMALE", + "affectedStatus": "UNAFFECTED" + }, { + "individualId": "FATHER", + "sex": "MALE", + "affectedStatus": "UNAFFECTED" + }] + }, + "htsFiles": [{ + "uri": "file://data/genomes/FAM000001", + "description": "Whole genome sequencing VCF output", + "htsFormat": "VCF", + "genomeAssembly": "GRCh38.p13", + "individualToSampleIdentifiers": { + "14 year-old boy": "P000001C", + "MOTHER": "P000001M", + "FATHER": "P000001F" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "1.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } +} \ No newline at end of file diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/family.pb b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/family.pb new file mode 100644 index 00000000..fb131057 --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/family.pb @@ -0,0 +1,83 @@ + +comprehensive-family-id +comprehensive-phenopacket-id\ +14 year-old boyboypatientprobandd" +P14Y08B +NCBITaxon:9606 homo sapiens& + +HP:0001558Decreased fetal movementJ + +HP:0011461 Fetal onsetR +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report./ + +HP:0031910!Abnormal cranial nerve physiologyR +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.# + +HP:0011463Macroscopic hematuria* + +HP:0031796 Recurrent2 +P14YR +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.N + +HP:0001270 Motor delay" + +HP:0012825MildJ + +HP:0011463Childhood onset" + biosample-id14 year-old boy Muscle biopsy of 14 year-old boy"! +UBERON:0003403skin of forearm2 +NCBITaxon:9606 homo sapiens: +P14YJ + NCIT:C38757Negative FindingR + +NCIT:C3677Benign NeoplasmZ& + NCIT:C28076Disease Grade Qualifierb + NCIT:C68748HER2/Neu Positive* + +HGNC1:3688FGFR126NM_001848.2:c.877G>A2 + GENO:0000135 heterozygous:B + + OMIM:101600PFEIFFER SYNDROME" + +HP:0003577Congenital onsetBm +file://data/genomes/P000001C"Whole genome sequencing VCF output" +GRCh38.p13* +14 year-old boyP000001CJ + +Peter R.PhenopacketLab"y +hphuman phenotype ontology%http://purl.obolibrary.org/obo/hp.owl" +2018-03-08*HP2"http://purl.obolibrary.org/obo/HP_"z +genoGenotype Ontology'http://purl.obolibrary.org/obo/geno.owl" +19-03-2018*GENO2$http://purl.obolibrary.org/obo/GENO_"< +pubmedPubMed*PMID2$https://www.ncbi.nlm.nih.gov/pubmed/"v +ncit NCI Thesaurus'http://purl.obolibrary.org/obo/ncit.owl" +20-03-2020*NCIT2$http://purl.obolibrary.org/obo/NCIT_21.0.0:e + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.  + +MOTHER0  + +FATHER0"C +%14 year-old boyFATHER"MOTHER(0 + MOTHER(0 + FATHER(0* +file://data/genomes/FAM000001"Whole genome sequencing VCF output" +GRCh38.p13* +14 year-old boyP000001C* +MOTHERP000001M* +FATHERP000001F2 + +Peter R.PhenopacketLab"y +hphuman phenotype ontology%http://purl.obolibrary.org/obo/hp.owl" +2018-03-08*HP2"http://purl.obolibrary.org/obo/HP_"z +genoGenotype Ontology'http://purl.obolibrary.org/obo/geno.owl" +19-03-2018*GENO2$http://purl.obolibrary.org/obo/GENO_"< +pubmedPubMed*PMID2$https://www.ncbi.nlm.nih.gov/pubmed/"v +ncit NCI Thesaurus'http://purl.obolibrary.org/obo/ncit.owl" +20-03-2020*NCIT2$http://purl.obolibrary.org/obo/NCIT_21.0.0:e + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report. \ No newline at end of file diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/family.yaml b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/family.yaml new file mode 100644 index 00000000..02f6a92c --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/family.yaml @@ -0,0 +1,209 @@ +id: "comprehensive-family-id" +proband: + id: "comprehensive-phenopacket-id" + subject: + id: "14 year-old boy" + alternateIds: + - "boy" + - "patient" + - "proband" + dateOfBirth: "1970-01-02T10:17:36.000000100Z" + ageAtCollection: + age: "P14Y" + sex: "MALE" + karyotypicSex: "XY" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" + phenotypicFeatures: + - type: + id: "HP:0001558" + label: "Decreased fetal movement" + classOfOnset: + id: "HP:0011461" + label: "Fetal onset" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0031910" + label: "Abnormal cranial nerve physiology" + negated: true + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0011463" + label: "Macroscopic hematuria" + modifiers: + - id: "HP:0031796" + label: "Recurrent" + ageOfOnset: + age: "P14Y" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0001270" + label: "Motor delay" + severity: + id: "HP:0012825" + label: "Mild" + classOfOnset: + id: "HP:0011463" + label: "Childhood onset" + biosamples: + - id: "biosample-id" + individualId: "14 year-old boy" + description: "Muscle biopsy of 14 year-old boy" + sampledTissue: + id: "UBERON:0003403" + label: "skin of forearm" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" + ageOfIndividualAtCollection: + age: "P14Y" + histologicalDiagnosis: + id: "NCIT:C38757" + label: "Negative Finding" + tumorProgression: + id: "NCIT:C3677" + label: "Benign Neoplasm" + tumorGrade: + id: "NCIT:C28076" + label: "Disease Grade Qualifier" + diagnosticMarkers: + - id: "NCIT:C68748" + label: "HER2/Neu Positive" + genes: + - id: "HGNC1:3688" + symbol: "FGFR1" + variants: + - hgvsAllele: + hgvs: "NM_001848.2:c.877G>A" + zygosity: + id: "GENO:0000135" + label: "heterozygous" + diseases: + - term: + id: "OMIM:101600" + label: "PFEIFFER SYNDROME" + classOfOnset: + id: "HP:0003577" + label: "Congenital onset" + htsFiles: + - uri: "file://data/genomes/P000001C" + description: "Whole genome sequencing VCF output" + htsFormat: "VCF" + genomeAssembly: "GRCh38.p13" + individualToSampleIdentifiers: + "14 year-old boy": "P000001C" + metaData: + created: "2022-10-03T16:39:04.000123456Z" + createdBy: "Peter R." + submittedBy: "PhenopacketLab" + resources: + - id: "hp" + name: "human phenotype ontology" + url: "http://purl.obolibrary.org/obo/hp.owl" + version: "2018-03-08" + namespacePrefix: "HP" + iriPrefix: "http://purl.obolibrary.org/obo/HP_" + - id: "geno" + name: "Genotype Ontology" + url: "http://purl.obolibrary.org/obo/geno.owl" + version: "19-03-2018" + namespacePrefix: "GENO" + iriPrefix: "http://purl.obolibrary.org/obo/GENO_" + - id: "pubmed" + name: "PubMed" + namespacePrefix: "PMID" + iriPrefix: "https://www.ncbi.nlm.nih.gov/pubmed/" + - id: "ncit" + name: "NCI Thesaurus" + url: "http://purl.obolibrary.org/obo/ncit.owl" + version: "20-03-2020" + namespacePrefix: "NCIT" + iriPrefix: "http://purl.obolibrary.org/obo/NCIT_" + phenopacketSchemaVersion: "1.0.0" + externalReferences: + - id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." +relatives: +- subject: + id: "MOTHER" + sex: "FEMALE" +- subject: + id: "FATHER" + sex: "MALE" +pedigree: + persons: + - individualId: "14 year-old boy" + paternalId: "FATHER" + maternalId: "MOTHER" + sex: "MALE" + affectedStatus: "AFFECTED" + - individualId: "MOTHER" + sex: "FEMALE" + affectedStatus: "UNAFFECTED" + - individualId: "FATHER" + sex: "MALE" + affectedStatus: "UNAFFECTED" +htsFiles: +- uri: "file://data/genomes/FAM000001" + description: "Whole genome sequencing VCF output" + htsFormat: "VCF" + genomeAssembly: "GRCh38.p13" + individualToSampleIdentifiers: + "14 year-old boy": "P000001C" + MOTHER: "P000001M" + FATHER: "P000001F" +metaData: + created: "2022-10-03T16:39:04.000123456Z" + createdBy: "Peter R." + submittedBy: "PhenopacketLab" + resources: + - id: "hp" + name: "human phenotype ontology" + url: "http://purl.obolibrary.org/obo/hp.owl" + version: "2018-03-08" + namespacePrefix: "HP" + iriPrefix: "http://purl.obolibrary.org/obo/HP_" + - id: "geno" + name: "Genotype Ontology" + url: "http://purl.obolibrary.org/obo/geno.owl" + version: "19-03-2018" + namespacePrefix: "GENO" + iriPrefix: "http://purl.obolibrary.org/obo/GENO_" + - id: "pubmed" + name: "PubMed" + namespacePrefix: "PMID" + iriPrefix: "https://www.ncbi.nlm.nih.gov/pubmed/" + - id: "ncit" + name: "NCI Thesaurus" + url: "http://purl.obolibrary.org/obo/ncit.owl" + version: "20-03-2020" + namespacePrefix: "NCIT" + iriPrefix: "http://purl.obolibrary.org/obo/NCIT_" + phenopacketSchemaVersion: "1.0.0" + externalReferences: + - id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/phenopacket.json b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/phenopacket.json new file mode 100644 index 00000000..e6848a9c --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/phenopacket.json @@ -0,0 +1,189 @@ +{ + "id": "comprehensive-phenopacket-id", + "subject": { + "id": "14 year-old boy", + "alternateIds": ["boy", "patient", "proband"], + "dateOfBirth": "1970-01-02T10:17:36.000000100Z", + "ageAtCollection": { + "age": "P14Y" + }, + "sex": "MALE", + "karyotypicSex": "XY", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001558", + "label": "Decreased fetal movement" + }, + "classOfOnset": { + "id": "HP:0011461", + "label": "Fetal onset" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0031910", + "label": "Abnormal cranial nerve physiology" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0011463", + "label": "Macroscopic hematuria" + }, + "modifiers": [{ + "id": "HP:0031796", + "label": "Recurrent" + }], + "ageOfOnset": { + "age": "P14Y" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0001270", + "label": "Motor delay" + }, + "severity": { + "id": "HP:0012825", + "label": "Mild" + }, + "classOfOnset": { + "id": "HP:0011463", + "label": "Childhood onset" + } + }], + "biosamples": [{ + "id": "biosample-id", + "individualId": "14 year-old boy", + "description": "Muscle biopsy of 14 year-old boy", + "sampledTissue": { + "id": "UBERON:0003403", + "label": "skin of forearm" + }, + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + }, + "ageOfIndividualAtCollection": { + "age": "P14Y" + }, + "histologicalDiagnosis": { + "id": "NCIT:C38757", + "label": "Negative Finding" + }, + "tumorProgression": { + "id": "NCIT:C3677", + "label": "Benign Neoplasm" + }, + "tumorGrade": { + "id": "NCIT:C28076", + "label": "Disease Grade Qualifier" + }, + "diagnosticMarkers": [{ + "id": "NCIT:C68748", + "label": "HER2/Neu Positive" + }] + }], + "genes": [{ + "id": "HGNC1:3688", + "symbol": "FGFR1" + }], + "variants": [{ + "hgvsAllele": { + "hgvs": "NM_001848.2:c.877G\u003eA" + }, + "zygosity": { + "id": "GENO:0000135", + "label": "heterozygous" + } + }], + "diseases": [{ + "term": { + "id": "OMIM:101600", + "label": "PFEIFFER SYNDROME" + }, + "classOfOnset": { + "id": "HP:0003577", + "label": "Congenital onset" + } + }], + "htsFiles": [{ + "uri": "file://data/genomes/P000001C", + "description": "Whole genome sequencing VCF output", + "htsFormat": "VCF", + "genomeAssembly": "GRCh38.p13", + "individualToSampleIdentifiers": { + "14 year-old boy": "P000001C" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "1.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } +} \ No newline at end of file diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/phenopacket.pb b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/phenopacket.pb new file mode 100644 index 00000000..8fe66cc2 --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/phenopacket.pb @@ -0,0 +1,60 @@ + +comprehensive-phenopacket-id\ +14 year-old boyboypatientprobandd" +P14Y08B +NCBITaxon:9606 homo sapiens& + +HP:0001558Decreased fetal movementJ + +HP:0011461 Fetal onsetR +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report./ + +HP:0031910!Abnormal cranial nerve physiologyR +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.# + +HP:0011463Macroscopic hematuria* + +HP:0031796 Recurrent2 +P14YR +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.N + +HP:0001270 Motor delay" + +HP:0012825MildJ + +HP:0011463Childhood onset" + biosample-id14 year-old boy Muscle biopsy of 14 year-old boy"! +UBERON:0003403skin of forearm2 +NCBITaxon:9606 homo sapiens: +P14YJ + NCIT:C38757Negative FindingR + +NCIT:C3677Benign NeoplasmZ& + NCIT:C28076Disease Grade Qualifierb + NCIT:C68748HER2/Neu Positive* + +HGNC1:3688FGFR126NM_001848.2:c.877G>A2 + GENO:0000135 heterozygous:B + + OMIM:101600PFEIFFER SYNDROME" + +HP:0003577Congenital onsetBm +file://data/genomes/P000001C"Whole genome sequencing VCF output" +GRCh38.p13* +14 year-old boyP000001CJ + +Peter R.PhenopacketLab"y +hphuman phenotype ontology%http://purl.obolibrary.org/obo/hp.owl" +2018-03-08*HP2"http://purl.obolibrary.org/obo/HP_"z +genoGenotype Ontology'http://purl.obolibrary.org/obo/geno.owl" +19-03-2018*GENO2$http://purl.obolibrary.org/obo/GENO_"< +pubmedPubMed*PMID2$https://www.ncbi.nlm.nih.gov/pubmed/"v +ncit NCI Thesaurus'http://purl.obolibrary.org/obo/ncit.owl" +20-03-2020*NCIT2$http://purl.obolibrary.org/obo/NCIT_21.0.0:e + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report. \ No newline at end of file diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/phenopacket.yaml b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/phenopacket.yaml new file mode 100644 index 00000000..bafcc88b --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/phenopacket.yaml @@ -0,0 +1,146 @@ +id: "comprehensive-phenopacket-id" +subject: + id: "14 year-old boy" + alternateIds: + - "boy" + - "patient" + - "proband" + dateOfBirth: "1970-01-02T10:17:36.000000100Z" + ageAtCollection: + age: "P14Y" + sex: "MALE" + karyotypicSex: "XY" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" +phenotypicFeatures: +- type: + id: "HP:0001558" + label: "Decreased fetal movement" + classOfOnset: + id: "HP:0011461" + label: "Fetal onset" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." +- type: + id: "HP:0031910" + label: "Abnormal cranial nerve physiology" + negated: true + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." +- type: + id: "HP:0011463" + label: "Macroscopic hematuria" + modifiers: + - id: "HP:0031796" + label: "Recurrent" + ageOfOnset: + age: "P14Y" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." +- type: + id: "HP:0001270" + label: "Motor delay" + severity: + id: "HP:0012825" + label: "Mild" + classOfOnset: + id: "HP:0011463" + label: "Childhood onset" +biosamples: +- id: "biosample-id" + individualId: "14 year-old boy" + description: "Muscle biopsy of 14 year-old boy" + sampledTissue: + id: "UBERON:0003403" + label: "skin of forearm" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" + ageOfIndividualAtCollection: + age: "P14Y" + histologicalDiagnosis: + id: "NCIT:C38757" + label: "Negative Finding" + tumorProgression: + id: "NCIT:C3677" + label: "Benign Neoplasm" + tumorGrade: + id: "NCIT:C28076" + label: "Disease Grade Qualifier" + diagnosticMarkers: + - id: "NCIT:C68748" + label: "HER2/Neu Positive" +genes: +- id: "HGNC1:3688" + symbol: "FGFR1" +variants: +- hgvsAllele: + hgvs: "NM_001848.2:c.877G>A" + zygosity: + id: "GENO:0000135" + label: "heterozygous" +diseases: +- term: + id: "OMIM:101600" + label: "PFEIFFER SYNDROME" + classOfOnset: + id: "HP:0003577" + label: "Congenital onset" +htsFiles: +- uri: "file://data/genomes/P000001C" + description: "Whole genome sequencing VCF output" + htsFormat: "VCF" + genomeAssembly: "GRCh38.p13" + individualToSampleIdentifiers: + "14 year-old boy": "P000001C" +metaData: + created: "2022-10-03T16:39:04.000123456Z" + createdBy: "Peter R." + submittedBy: "PhenopacketLab" + resources: + - id: "hp" + name: "human phenotype ontology" + url: "http://purl.obolibrary.org/obo/hp.owl" + version: "2018-03-08" + namespacePrefix: "HP" + iriPrefix: "http://purl.obolibrary.org/obo/HP_" + - id: "geno" + name: "Genotype Ontology" + url: "http://purl.obolibrary.org/obo/geno.owl" + version: "19-03-2018" + namespacePrefix: "GENO" + iriPrefix: "http://purl.obolibrary.org/obo/GENO_" + - id: "pubmed" + name: "PubMed" + namespacePrefix: "PMID" + iriPrefix: "https://www.ncbi.nlm.nih.gov/pubmed/" + - id: "ncit" + name: "NCI Thesaurus" + url: "http://purl.obolibrary.org/obo/ncit.owl" + version: "20-03-2020" + namespacePrefix: "NCIT" + iriPrefix: "http://purl.obolibrary.org/obo/NCIT_" + phenopacketSchemaVersion: "1.0.0" + externalReferences: + - id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/cohort.json b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/cohort.json new file mode 100644 index 00000000..e59170ac --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/cohort.json @@ -0,0 +1,294 @@ +{ + "id": "comprehensive-cohort-id", + "description": "A description of the example cohort.", + "members": [{ + "id": "comprehensive-phenopacket-id", + "subject": { + "id": "14 year-old boy", + "alternateIds": ["boy", "patient", "proband"], + "dateOfBirth": "1970-01-02T10:17:36.000000100Z", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P14Y" + } + }, + "sex": "MALE", + "karyotypicSex": "XY", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001558", + "label": "Decreased fetal movement" + }, + "onset": { + "ontologyClass": { + "id": "HP:0011461", + "label": "Fetal onset" + } + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0031910", + "label": "Abnormal cranial nerve physiology" + }, + "excluded": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0011463", + "label": "Macroscopic hematuria" + }, + "modifiers": [{ + "id": "HP:0031796", + "label": "Recurrent" + }], + "onset": { + "age": { + "iso8601duration": "P14Y" + } + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0001270", + "label": "Motor delay" + }, + "severity": { + "id": "HP:0012825", + "label": "Mild" + }, + "onset": { + "ontologyClass": { + "id": "HP:0011463", + "label": "Childhood onset" + } + } + }], + "biosamples": [{ + "id": "biosample-id", + "individualId": "14 year-old boy", + "description": "Muscle biopsy of 14 year-old boy", + "sampledTissue": { + "id": "UBERON:0003403", + "label": "skin of forearm" + }, + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + }, + "timeOfCollection": { + "age": { + "iso8601duration": "P14Y" + } + }, + "histologicalDiagnosis": { + "id": "NCIT:C38757", + "label": "Negative Finding" + }, + "tumorProgression": { + "id": "NCIT:C3677", + "label": "Benign Neoplasm" + }, + "tumorGrade": { + "id": "NCIT:C28076", + "label": "Disease Grade Qualifier" + }, + "diagnosticMarkers": [{ + "id": "NCIT:C68748", + "label": "HER2/Neu Positive" + }], + "materialSample": { + "id": "EFO:0009655", + "label": "abnormal sample" + } + }], + "interpretations": [{ + "id": "comprehensive-phenopacket-id", + "progressStatus": "SOLVED", + "diagnosis": { + "disease": { + "id": "OMIM:101600", + "label": "PFEIFFER SYNDROME" + }, + "genomicInterpretations": [{ + "subjectOrBiosampleId": "14 year-old boy", + "interpretationStatus": "CAUSATIVE", + "variantInterpretation": { + "variationDescriptor": { + "expressions": [{ + "syntax": "hgvs", + "value": "NM_001848.2:c.877G\u003eA" + }], + "allelicState": { + "id": "GENO:0000135", + "label": "heterozygous" + } + } + } + }] + } + }], + "diseases": [{ + "term": { + "id": "OMIM:101600", + "label": "PFEIFFER SYNDROME" + }, + "onset": { + "ontologyClass": { + "id": "HP:0003577", + "label": "Congenital onset" + } + } + }], + "files": [{ + "uri": "file://data/genomes/P000001C", + "individualToFileIdentifiers": { + "14 year-old boy": "P000001C" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38.p13", + "fileFormat": "vcf", + "description": "Whole genome sequencing VCF output" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "2.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } + }, { + "subject": { + "id": "MOTHER", + "dateOfBirth": "1970-01-01T00:00:00Z", + "timeAtLastEncounter": { + }, + "sex": "FEMALE", + "taxonomy": { + } + } + }, { + "subject": { + "id": "FATHER", + "dateOfBirth": "1970-01-01T00:00:00Z", + "timeAtLastEncounter": { + }, + "sex": "MALE", + "taxonomy": { + } + } + }], + "files": [{ + "uri": "file://data/genomes/FAM000001", + "individualToFileIdentifiers": { + "14 year-old boy": "P000001C", + "MOTHER": "P000001M", + "FATHER": "P000001F" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38.p13", + "fileFormat": "vcf", + "description": "Whole genome sequencing VCF output" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "2.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } +} \ No newline at end of file diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/cohort.pb b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/cohort.pb new file mode 100644 index 00000000..0679cfa0 Binary files /dev/null and b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/cohort.pb differ diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/cohort.yaml b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/cohort.yaml new file mode 100644 index 00000000..55e7969d --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/cohort.yaml @@ -0,0 +1,222 @@ +id: "comprehensive-cohort-id" +description: "A description of the example cohort." +members: +- id: "comprehensive-phenopacket-id" + subject: + id: "14 year-old boy" + alternateIds: + - "boy" + - "patient" + - "proband" + dateOfBirth: "1970-01-02T10:17:36.000000100Z" + timeAtLastEncounter: + age: + iso8601duration: "P14Y" + sex: "MALE" + karyotypicSex: "XY" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" + phenotypicFeatures: + - type: + id: "HP:0001558" + label: "Decreased fetal movement" + onset: + ontologyClass: + id: "HP:0011461" + label: "Fetal onset" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0031910" + label: "Abnormal cranial nerve physiology" + excluded: true + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0011463" + label: "Macroscopic hematuria" + modifiers: + - id: "HP:0031796" + label: "Recurrent" + onset: + age: + iso8601duration: "P14Y" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0001270" + label: "Motor delay" + severity: + id: "HP:0012825" + label: "Mild" + onset: + ontologyClass: + id: "HP:0011463" + label: "Childhood onset" + biosamples: + - id: "biosample-id" + individualId: "14 year-old boy" + description: "Muscle biopsy of 14 year-old boy" + sampledTissue: + id: "UBERON:0003403" + label: "skin of forearm" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" + timeOfCollection: + age: + iso8601duration: "P14Y" + histologicalDiagnosis: + id: "NCIT:C38757" + label: "Negative Finding" + tumorProgression: + id: "NCIT:C3677" + label: "Benign Neoplasm" + tumorGrade: + id: "NCIT:C28076" + label: "Disease Grade Qualifier" + diagnosticMarkers: + - id: "NCIT:C68748" + label: "HER2/Neu Positive" + materialSample: + id: "EFO:0009655" + label: "abnormal sample" + interpretations: + - id: "comprehensive-phenopacket-id" + progressStatus: "SOLVED" + diagnosis: + disease: + id: "OMIM:101600" + label: "PFEIFFER SYNDROME" + genomicInterpretations: + - subjectOrBiosampleId: "14 year-old boy" + interpretationStatus: "CAUSATIVE" + variantInterpretation: + variationDescriptor: + expressions: + - syntax: "hgvs" + value: "NM_001848.2:c.877G>A" + allelicState: + id: "GENO:0000135" + label: "heterozygous" + diseases: + - term: + id: "OMIM:101600" + label: "PFEIFFER SYNDROME" + onset: + ontologyClass: + id: "HP:0003577" + label: "Congenital onset" + files: + - uri: "file://data/genomes/P000001C" + individualToFileIdentifiers: + "14 year-old boy": "P000001C" + fileAttributes: + genomeAssembly: "GRCh38.p13" + fileFormat: "vcf" + description: "Whole genome sequencing VCF output" + metaData: + created: "2022-10-03T16:39:04.000123456Z" + createdBy: "Peter R." + submittedBy: "PhenopacketLab" + resources: + - id: "hp" + name: "human phenotype ontology" + url: "http://purl.obolibrary.org/obo/hp.owl" + version: "2018-03-08" + namespacePrefix: "HP" + iriPrefix: "http://purl.obolibrary.org/obo/HP_" + - id: "geno" + name: "Genotype Ontology" + url: "http://purl.obolibrary.org/obo/geno.owl" + version: "19-03-2018" + namespacePrefix: "GENO" + iriPrefix: "http://purl.obolibrary.org/obo/GENO_" + - id: "pubmed" + name: "PubMed" + namespacePrefix: "PMID" + iriPrefix: "https://www.ncbi.nlm.nih.gov/pubmed/" + - id: "ncit" + name: "NCI Thesaurus" + url: "http://purl.obolibrary.org/obo/ncit.owl" + version: "20-03-2020" + namespacePrefix: "NCIT" + iriPrefix: "http://purl.obolibrary.org/obo/NCIT_" + phenopacketSchemaVersion: "2.0.0" + externalReferences: + - id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." +- subject: + id: "MOTHER" + dateOfBirth: "1970-01-01T00:00:00Z" + timeAtLastEncounter: {} + sex: "FEMALE" + taxonomy: {} +- subject: + id: "FATHER" + dateOfBirth: "1970-01-01T00:00:00Z" + timeAtLastEncounter: {} + sex: "MALE" + taxonomy: {} +files: +- uri: "file://data/genomes/FAM000001" + individualToFileIdentifiers: + "14 year-old boy": "P000001C" + MOTHER: "P000001M" + FATHER: "P000001F" + fileAttributes: + genomeAssembly: "GRCh38.p13" + fileFormat: "vcf" + description: "Whole genome sequencing VCF output" +metaData: + created: "2022-10-03T16:39:04.000123456Z" + createdBy: "Peter R." + submittedBy: "PhenopacketLab" + resources: + - id: "hp" + name: "human phenotype ontology" + url: "http://purl.obolibrary.org/obo/hp.owl" + version: "2018-03-08" + namespacePrefix: "HP" + iriPrefix: "http://purl.obolibrary.org/obo/HP_" + - id: "geno" + name: "Genotype Ontology" + url: "http://purl.obolibrary.org/obo/geno.owl" + version: "19-03-2018" + namespacePrefix: "GENO" + iriPrefix: "http://purl.obolibrary.org/obo/GENO_" + - id: "pubmed" + name: "PubMed" + namespacePrefix: "PMID" + iriPrefix: "https://www.ncbi.nlm.nih.gov/pubmed/" + - id: "ncit" + name: "NCI Thesaurus" + url: "http://purl.obolibrary.org/obo/ncit.owl" + version: "20-03-2020" + namespacePrefix: "NCIT" + iriPrefix: "http://purl.obolibrary.org/obo/NCIT_" + phenopacketSchemaVersion: "2.0.0" + externalReferences: + - id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/family.json b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/family.json new file mode 100644 index 00000000..b29f296b --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/family.json @@ -0,0 +1,311 @@ +{ + "id": "comprehensive-family-id", + "proband": { + "id": "comprehensive-phenopacket-id", + "subject": { + "id": "14 year-old boy", + "alternateIds": ["boy", "patient", "proband"], + "dateOfBirth": "1970-01-02T10:17:36.000000100Z", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P14Y" + } + }, + "sex": "MALE", + "karyotypicSex": "XY", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001558", + "label": "Decreased fetal movement" + }, + "onset": { + "ontologyClass": { + "id": "HP:0011461", + "label": "Fetal onset" + } + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0031910", + "label": "Abnormal cranial nerve physiology" + }, + "excluded": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0011463", + "label": "Macroscopic hematuria" + }, + "modifiers": [{ + "id": "HP:0031796", + "label": "Recurrent" + }], + "onset": { + "age": { + "iso8601duration": "P14Y" + } + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0001270", + "label": "Motor delay" + }, + "severity": { + "id": "HP:0012825", + "label": "Mild" + }, + "onset": { + "ontologyClass": { + "id": "HP:0011463", + "label": "Childhood onset" + } + } + }], + "biosamples": [{ + "id": "biosample-id", + "individualId": "14 year-old boy", + "description": "Muscle biopsy of 14 year-old boy", + "sampledTissue": { + "id": "UBERON:0003403", + "label": "skin of forearm" + }, + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + }, + "timeOfCollection": { + "age": { + "iso8601duration": "P14Y" + } + }, + "histologicalDiagnosis": { + "id": "NCIT:C38757", + "label": "Negative Finding" + }, + "tumorProgression": { + "id": "NCIT:C3677", + "label": "Benign Neoplasm" + }, + "tumorGrade": { + "id": "NCIT:C28076", + "label": "Disease Grade Qualifier" + }, + "diagnosticMarkers": [{ + "id": "NCIT:C68748", + "label": "HER2/Neu Positive" + }], + "materialSample": { + "id": "EFO:0009655", + "label": "abnormal sample" + } + }], + "interpretations": [{ + "id": "comprehensive-phenopacket-id", + "progressStatus": "SOLVED", + "diagnosis": { + "disease": { + "id": "OMIM:101600", + "label": "PFEIFFER SYNDROME" + }, + "genomicInterpretations": [{ + "subjectOrBiosampleId": "14 year-old boy", + "interpretationStatus": "CAUSATIVE", + "variantInterpretation": { + "variationDescriptor": { + "expressions": [{ + "syntax": "hgvs", + "value": "NM_001848.2:c.877G\u003eA" + }], + "allelicState": { + "id": "GENO:0000135", + "label": "heterozygous" + } + } + } + }] + } + }], + "diseases": [{ + "term": { + "id": "OMIM:101600", + "label": "PFEIFFER SYNDROME" + }, + "onset": { + "ontologyClass": { + "id": "HP:0003577", + "label": "Congenital onset" + } + } + }], + "files": [{ + "uri": "file://data/genomes/P000001C", + "individualToFileIdentifiers": { + "14 year-old boy": "P000001C" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38.p13", + "fileFormat": "vcf", + "description": "Whole genome sequencing VCF output" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "2.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } + }, + "relatives": [{ + "subject": { + "id": "MOTHER", + "dateOfBirth": "1970-01-01T00:00:00Z", + "timeAtLastEncounter": { + }, + "sex": "FEMALE", + "taxonomy": { + } + } + }, { + "subject": { + "id": "FATHER", + "dateOfBirth": "1970-01-01T00:00:00Z", + "timeAtLastEncounter": { + }, + "sex": "MALE", + "taxonomy": { + } + } + }], + "pedigree": { + "persons": [{ + "individualId": "14 year-old boy", + "paternalId": "FATHER", + "maternalId": "MOTHER", + "sex": "MALE", + "affectedStatus": "AFFECTED" + }, { + "individualId": "MOTHER", + "sex": "FEMALE", + "affectedStatus": "UNAFFECTED" + }, { + "individualId": "FATHER", + "sex": "MALE", + "affectedStatus": "UNAFFECTED" + }] + }, + "files": [{ + "uri": "file://data/genomes/FAM000001", + "individualToFileIdentifiers": { + "14 year-old boy": "P000001C", + "MOTHER": "P000001M", + "FATHER": "P000001F" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38.p13", + "fileFormat": "vcf", + "description": "Whole genome sequencing VCF output" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "2.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } +} \ No newline at end of file diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/family.pb b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/family.pb new file mode 100644 index 00000000..cb79a8f1 Binary files /dev/null and b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/family.pb differ diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/family.yaml b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/family.yaml new file mode 100644 index 00000000..02e2b78e --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/family.yaml @@ -0,0 +1,235 @@ +id: "comprehensive-family-id" +proband: + id: "comprehensive-phenopacket-id" + subject: + id: "14 year-old boy" + alternateIds: + - "boy" + - "patient" + - "proband" + dateOfBirth: "1970-01-02T10:17:36.000000100Z" + timeAtLastEncounter: + age: + iso8601duration: "P14Y" + sex: "MALE" + karyotypicSex: "XY" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" + phenotypicFeatures: + - type: + id: "HP:0001558" + label: "Decreased fetal movement" + onset: + ontologyClass: + id: "HP:0011461" + label: "Fetal onset" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0031910" + label: "Abnormal cranial nerve physiology" + excluded: true + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0011463" + label: "Macroscopic hematuria" + modifiers: + - id: "HP:0031796" + label: "Recurrent" + onset: + age: + iso8601duration: "P14Y" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0001270" + label: "Motor delay" + severity: + id: "HP:0012825" + label: "Mild" + onset: + ontologyClass: + id: "HP:0011463" + label: "Childhood onset" + biosamples: + - id: "biosample-id" + individualId: "14 year-old boy" + description: "Muscle biopsy of 14 year-old boy" + sampledTissue: + id: "UBERON:0003403" + label: "skin of forearm" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" + timeOfCollection: + age: + iso8601duration: "P14Y" + histologicalDiagnosis: + id: "NCIT:C38757" + label: "Negative Finding" + tumorProgression: + id: "NCIT:C3677" + label: "Benign Neoplasm" + tumorGrade: + id: "NCIT:C28076" + label: "Disease Grade Qualifier" + diagnosticMarkers: + - id: "NCIT:C68748" + label: "HER2/Neu Positive" + materialSample: + id: "EFO:0009655" + label: "abnormal sample" + interpretations: + - id: "comprehensive-phenopacket-id" + progressStatus: "SOLVED" + diagnosis: + disease: + id: "OMIM:101600" + label: "PFEIFFER SYNDROME" + genomicInterpretations: + - subjectOrBiosampleId: "14 year-old boy" + interpretationStatus: "CAUSATIVE" + variantInterpretation: + variationDescriptor: + expressions: + - syntax: "hgvs" + value: "NM_001848.2:c.877G>A" + allelicState: + id: "GENO:0000135" + label: "heterozygous" + diseases: + - term: + id: "OMIM:101600" + label: "PFEIFFER SYNDROME" + onset: + ontologyClass: + id: "HP:0003577" + label: "Congenital onset" + files: + - uri: "file://data/genomes/P000001C" + individualToFileIdentifiers: + "14 year-old boy": "P000001C" + fileAttributes: + genomeAssembly: "GRCh38.p13" + fileFormat: "vcf" + description: "Whole genome sequencing VCF output" + metaData: + created: "2022-10-03T16:39:04.000123456Z" + createdBy: "Peter R." + submittedBy: "PhenopacketLab" + resources: + - id: "hp" + name: "human phenotype ontology" + url: "http://purl.obolibrary.org/obo/hp.owl" + version: "2018-03-08" + namespacePrefix: "HP" + iriPrefix: "http://purl.obolibrary.org/obo/HP_" + - id: "geno" + name: "Genotype Ontology" + url: "http://purl.obolibrary.org/obo/geno.owl" + version: "19-03-2018" + namespacePrefix: "GENO" + iriPrefix: "http://purl.obolibrary.org/obo/GENO_" + - id: "pubmed" + name: "PubMed" + namespacePrefix: "PMID" + iriPrefix: "https://www.ncbi.nlm.nih.gov/pubmed/" + - id: "ncit" + name: "NCI Thesaurus" + url: "http://purl.obolibrary.org/obo/ncit.owl" + version: "20-03-2020" + namespacePrefix: "NCIT" + iriPrefix: "http://purl.obolibrary.org/obo/NCIT_" + phenopacketSchemaVersion: "2.0.0" + externalReferences: + - id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." +relatives: +- subject: + id: "MOTHER" + dateOfBirth: "1970-01-01T00:00:00Z" + timeAtLastEncounter: {} + sex: "FEMALE" + taxonomy: {} +- subject: + id: "FATHER" + dateOfBirth: "1970-01-01T00:00:00Z" + timeAtLastEncounter: {} + sex: "MALE" + taxonomy: {} +pedigree: + persons: + - individualId: "14 year-old boy" + paternalId: "FATHER" + maternalId: "MOTHER" + sex: "MALE" + affectedStatus: "AFFECTED" + - individualId: "MOTHER" + sex: "FEMALE" + affectedStatus: "UNAFFECTED" + - individualId: "FATHER" + sex: "MALE" + affectedStatus: "UNAFFECTED" +files: +- uri: "file://data/genomes/FAM000001" + individualToFileIdentifiers: + "14 year-old boy": "P000001C" + MOTHER: "P000001M" + FATHER: "P000001F" + fileAttributes: + genomeAssembly: "GRCh38.p13" + fileFormat: "vcf" + description: "Whole genome sequencing VCF output" +metaData: + created: "2022-10-03T16:39:04.000123456Z" + createdBy: "Peter R." + submittedBy: "PhenopacketLab" + resources: + - id: "hp" + name: "human phenotype ontology" + url: "http://purl.obolibrary.org/obo/hp.owl" + version: "2018-03-08" + namespacePrefix: "HP" + iriPrefix: "http://purl.obolibrary.org/obo/HP_" + - id: "geno" + name: "Genotype Ontology" + url: "http://purl.obolibrary.org/obo/geno.owl" + version: "19-03-2018" + namespacePrefix: "GENO" + iriPrefix: "http://purl.obolibrary.org/obo/GENO_" + - id: "pubmed" + name: "PubMed" + namespacePrefix: "PMID" + iriPrefix: "https://www.ncbi.nlm.nih.gov/pubmed/" + - id: "ncit" + name: "NCI Thesaurus" + url: "http://purl.obolibrary.org/obo/ncit.owl" + version: "20-03-2020" + namespacePrefix: "NCIT" + iriPrefix: "http://purl.obolibrary.org/obo/NCIT_" + phenopacketSchemaVersion: "2.0.0" + externalReferences: + - id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/phenopacket.json b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/phenopacket.json new file mode 100644 index 00000000..89d29db4 --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/phenopacket.json @@ -0,0 +1,220 @@ +{ + "id": "comprehensive-phenopacket-id", + "subject": { + "id": "14 year-old boy", + "alternateIds": ["boy", "patient", "proband"], + "dateOfBirth": "1970-01-02T10:17:36.000000100Z", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P14Y" + } + }, + "sex": "MALE", + "karyotypicSex": "XY", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001558", + "label": "Decreased fetal movement" + }, + "onset": { + "ontologyClass": { + "id": "HP:0011461", + "label": "Fetal onset" + } + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0031910", + "label": "Abnormal cranial nerve physiology" + }, + "excluded": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0011463", + "label": "Macroscopic hematuria" + }, + "modifiers": [{ + "id": "HP:0031796", + "label": "Recurrent" + }], + "onset": { + "age": { + "iso8601duration": "P14Y" + } + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0001270", + "label": "Motor delay" + }, + "severity": { + "id": "HP:0012825", + "label": "Mild" + }, + "onset": { + "ontologyClass": { + "id": "HP:0011463", + "label": "Childhood onset" + } + } + }], + "biosamples": [{ + "id": "biosample-id", + "individualId": "14 year-old boy", + "description": "Muscle biopsy of 14 year-old boy", + "sampledTissue": { + "id": "UBERON:0003403", + "label": "skin of forearm" + }, + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + }, + "timeOfCollection": { + "age": { + "iso8601duration": "P14Y" + } + }, + "histologicalDiagnosis": { + "id": "NCIT:C38757", + "label": "Negative Finding" + }, + "tumorProgression": { + "id": "NCIT:C3677", + "label": "Benign Neoplasm" + }, + "tumorGrade": { + "id": "NCIT:C28076", + "label": "Disease Grade Qualifier" + }, + "diagnosticMarkers": [{ + "id": "NCIT:C68748", + "label": "HER2/Neu Positive" + }], + "materialSample": { + "id": "EFO:0009655", + "label": "abnormal sample" + } + }], + "interpretations": [{ + "id": "comprehensive-phenopacket-id", + "progressStatus": "SOLVED", + "diagnosis": { + "disease": { + "id": "OMIM:101600", + "label": "PFEIFFER SYNDROME" + }, + "genomicInterpretations": [{ + "subjectOrBiosampleId": "14 year-old boy", + "interpretationStatus": "CAUSATIVE", + "variantInterpretation": { + "variationDescriptor": { + "expressions": [{ + "syntax": "hgvs", + "value": "NM_001848.2:c.877G\u003eA" + }], + "allelicState": { + "id": "GENO:0000135", + "label": "heterozygous" + } + } + } + }] + } + }], + "diseases": [{ + "term": { + "id": "OMIM:101600", + "label": "PFEIFFER SYNDROME" + }, + "onset": { + "ontologyClass": { + "id": "HP:0003577", + "label": "Congenital onset" + } + } + }], + "files": [{ + "uri": "file://data/genomes/P000001C", + "individualToFileIdentifiers": { + "14 year-old boy": "P000001C" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38.p13", + "fileFormat": "vcf", + "description": "Whole genome sequencing VCF output" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "2.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } +} \ No newline at end of file diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/phenopacket.pb b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/phenopacket.pb new file mode 100644 index 00000000..3acd16a2 --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/phenopacket.pb @@ -0,0 +1,71 @@ + +comprehensive-phenopacket-id^ +14 year-old boyboypatientprobandd" + +P14Y08J +NCBITaxon:9606 homo sapiens& + +HP:0001558Decreased fetal movement2 + +HP:0011461 Fetal onsetB +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report./ + +HP:0031910!Abnormal cranial nerve physiologyB +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.# + +HP:0011463Macroscopic hematuria* + +HP:0031796 Recurrent2 + +P14YB +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.P + +HP:0001270 Motor delay" + +HP:0012825Mild2 + +HP:0011463Childhood onset* + biosample-id14 year-old boy" Muscle biopsy of 14 year-old boy*! +UBERON:0003403skin of forearmJ +NCBITaxon:9606 homo sapiensR + +P14YZ + NCIT:C38757Negative Findingb + +NCIT:C3677Benign Neoplasmj& + NCIT:C28076Disease Grade Qualifier + NCIT:C68748HER2/Neu Positive + EFO:0009655abnormal sample2 +comprehensive-phenopacket-idw + + OMIM:101600PFEIFFER SYNDROMES +14 year-old boy"><2 +hgvsNM_001848.2:c.877G>Ar + GENO:0000135 heterozygous:D + + OMIM:101600PFEIFFER SYNDROME  + +HP:0003577Congenital onsetR +file://data/genomes/P000001C +14 year-old boyP000001C +genomeAssembly +GRCh38.p13 + +fileFormatvcf1 + description"Whole genome sequencing VCF outputZ + +Peter R.PhenopacketLab"y +hphuman phenotype ontology%http://purl.obolibrary.org/obo/hp.owl" +2018-03-08*HP2"http://purl.obolibrary.org/obo/HP_"z +genoGenotype Ontology'http://purl.obolibrary.org/obo/geno.owl" +19-03-2018*GENO2$http://purl.obolibrary.org/obo/GENO_"< +pubmedPubMed*PMID2$https://www.ncbi.nlm.nih.gov/pubmed/"v +ncit NCI Thesaurus'http://purl.obolibrary.org/obo/ncit.owl" +20-03-2020*NCIT2$http://purl.obolibrary.org/obo/NCIT_22.0.0:e + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report. \ No newline at end of file diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/phenopacket.yaml b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/phenopacket.yaml new file mode 100644 index 00000000..2a5986de --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/phenopacket.yaml @@ -0,0 +1,165 @@ +id: "comprehensive-phenopacket-id" +subject: + id: "14 year-old boy" + alternateIds: + - "boy" + - "patient" + - "proband" + dateOfBirth: "1970-01-02T10:17:36.000000100Z" + timeAtLastEncounter: + age: + iso8601duration: "P14Y" + sex: "MALE" + karyotypicSex: "XY" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" +phenotypicFeatures: +- type: + id: "HP:0001558" + label: "Decreased fetal movement" + onset: + ontologyClass: + id: "HP:0011461" + label: "Fetal onset" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." +- type: + id: "HP:0031910" + label: "Abnormal cranial nerve physiology" + excluded: true + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." +- type: + id: "HP:0011463" + label: "Macroscopic hematuria" + modifiers: + - id: "HP:0031796" + label: "Recurrent" + onset: + age: + iso8601duration: "P14Y" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." +- type: + id: "HP:0001270" + label: "Motor delay" + severity: + id: "HP:0012825" + label: "Mild" + onset: + ontologyClass: + id: "HP:0011463" + label: "Childhood onset" +biosamples: +- id: "biosample-id" + individualId: "14 year-old boy" + description: "Muscle biopsy of 14 year-old boy" + sampledTissue: + id: "UBERON:0003403" + label: "skin of forearm" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" + timeOfCollection: + age: + iso8601duration: "P14Y" + histologicalDiagnosis: + id: "NCIT:C38757" + label: "Negative Finding" + tumorProgression: + id: "NCIT:C3677" + label: "Benign Neoplasm" + tumorGrade: + id: "NCIT:C28076" + label: "Disease Grade Qualifier" + diagnosticMarkers: + - id: "NCIT:C68748" + label: "HER2/Neu Positive" + materialSample: + id: "EFO:0009655" + label: "abnormal sample" +interpretations: +- id: "comprehensive-phenopacket-id" + progressStatus: "SOLVED" + diagnosis: + disease: + id: "OMIM:101600" + label: "PFEIFFER SYNDROME" + genomicInterpretations: + - subjectOrBiosampleId: "14 year-old boy" + interpretationStatus: "CAUSATIVE" + variantInterpretation: + variationDescriptor: + expressions: + - syntax: "hgvs" + value: "NM_001848.2:c.877G>A" + allelicState: + id: "GENO:0000135" + label: "heterozygous" +diseases: +- term: + id: "OMIM:101600" + label: "PFEIFFER SYNDROME" + onset: + ontologyClass: + id: "HP:0003577" + label: "Congenital onset" +files: +- uri: "file://data/genomes/P000001C" + individualToFileIdentifiers: + "14 year-old boy": "P000001C" + fileAttributes: + genomeAssembly: "GRCh38.p13" + fileFormat: "vcf" + description: "Whole genome sequencing VCF output" +metaData: + created: "2022-10-03T16:39:04.000123456Z" + createdBy: "Peter R." + submittedBy: "PhenopacketLab" + resources: + - id: "hp" + name: "human phenotype ontology" + url: "http://purl.obolibrary.org/obo/hp.owl" + version: "2018-03-08" + namespacePrefix: "HP" + iriPrefix: "http://purl.obolibrary.org/obo/HP_" + - id: "geno" + name: "Genotype Ontology" + url: "http://purl.obolibrary.org/obo/geno.owl" + version: "19-03-2018" + namespacePrefix: "GENO" + iriPrefix: "http://purl.obolibrary.org/obo/GENO_" + - id: "pubmed" + name: "PubMed" + namespacePrefix: "PMID" + iriPrefix: "https://www.ncbi.nlm.nih.gov/pubmed/" + - id: "ncit" + name: "NCI Thesaurus" + url: "http://purl.obolibrary.org/obo/ncit.owl" + version: "20-03-2020" + namespacePrefix: "NCIT" + iriPrefix: "http://purl.obolibrary.org/obo/NCIT_" + phenopacketSchemaVersion: "2.0.0" + externalReferences: + - id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." diff --git a/phenopacket-tools-test/pom.xml b/phenopacket-tools-test/pom.xml index 36a9d7bf..d0f5ce0e 100644 --- a/phenopacket-tools-test/pom.xml +++ b/phenopacket-tools-test/pom.xml @@ -5,7 +5,7 @@ phenopacket-tools org.phenopackets.phenopackettools - 0.4.6 + 0.4.7 4.0.0 diff --git a/phenopacket-tools-util/pom.xml b/phenopacket-tools-util/pom.xml index 575e8e8f..7bb365de 100644 --- a/phenopacket-tools-util/pom.xml +++ b/phenopacket-tools-util/pom.xml @@ -5,10 +5,18 @@ phenopacket-tools org.phenopackets.phenopackettools - 0.4.6 + 0.4.7 4.0.0 phenopacket-tools-util + + + org.phenopackets.phenopackettools + phenopacket-tools-core + ${project.parent.version} + + + \ No newline at end of file diff --git a/phenopacket-tools-util/src/main/java/module-info.java b/phenopacket-tools-util/src/main/java/module-info.java index 9d20effb..6c8793a5 100644 --- a/phenopacket-tools-util/src/main/java/module-info.java +++ b/phenopacket-tools-util/src/main/java/module-info.java @@ -1,3 +1,9 @@ +/** + * A module with utility functions. + */ module org.phenopackets.phenopackettools.util { + requires transitive org.phenopackets.phenopackettools.core; + requires org.slf4j; + exports org.phenopackets.phenopackettools.util.format; } \ No newline at end of file diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/ElementSniffException.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/ElementSniffException.java new file mode 100644 index 00000000..b90545f0 --- /dev/null +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/ElementSniffException.java @@ -0,0 +1,25 @@ +package org.phenopackets.phenopackettools.util.format; + +public class ElementSniffException extends SniffException { + + public ElementSniffException() { + super(); + } + + public ElementSniffException(String message) { + super(message); + } + + public ElementSniffException(String message, Throwable cause) { + super(message, cause); + } + + public ElementSniffException(Throwable cause) { + super(cause); + } + + protected ElementSniffException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } + +} diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/ElementSniffer.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/ElementSniffer.java new file mode 100644 index 00000000..84c33dcb --- /dev/null +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/ElementSniffer.java @@ -0,0 +1,86 @@ +package org.phenopackets.phenopackettools.util.format; + +import org.phenopackets.phenopackettools.core.PhenopacketElement; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; +import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; + +/** + * Make an educated guess regarding which top-level element of Phenopacket schema is represented in the provided + * {@code byte[]} or {@link InputStream}. + */ +public class ElementSniffer { + + // Remove SLF4J from module-info if we omit logging. + private static final Logger LOGGER = LoggerFactory.getLogger(ElementSniffer.class); + + /** + * The number of bytes used for element sniffing. + */ + static final int BUFFER_SIZE = 32; + + private ElementSniffer() { + } + + /** + * Make an educated guess of {@link PhenopacketElement} present in given {@code input}. + * + * @param input an {@link InputStream} that supports {@link InputStream#mark(int)}. + * @param format the {@code payload} format + * @return the sniffed {@link PhenopacketElement}. + * @throws IOException in case an error occurs while reading the {@code input}. + * @throws SniffException if there are not enough bytes available in the {@code input} of if the {@code input} does not + * support {@link InputStream#mark(int)}. + */ + public static PhenopacketElement sniff(InputStream input, + PhenopacketSchemaVersion schemaVersion, + PhenopacketFormat format) throws IOException, SniffException { + return sniff(Util.getFirstBytesAndReset(input, BUFFER_SIZE), schemaVersion, format); + } + + /** + * Make an educated guess of {@link PhenopacketElement} based on given {@code payload}. + * + * @param payload buffer with at least the first {@link #BUFFER_SIZE} bytes of the input. + * @param format the {@code payload} format + * @return the sniffed {@link PhenopacketElement}. + * @throws ElementSniffException if {@code payload} contains less than {@link #BUFFER_SIZE} bytes. + */ + public static PhenopacketElement sniff(byte[] payload, + PhenopacketSchemaVersion schemaVersion, + PhenopacketFormat format) throws ElementSniffException { + if (payload.length < BUFFER_SIZE) + throw new ElementSniffException("Need at least %d bytes to sniff but got %d".formatted(BUFFER_SIZE, payload.length)); + + return switch (format) { + case PROTOBUF -> sniffProtobuf(payload, schemaVersion); + case JSON -> sniffJson(payload, schemaVersion); + case YAML -> sniffYaml(payload, schemaVersion); + }; + } + + private static PhenopacketElement sniffProtobuf(byte[] payload, PhenopacketSchemaVersion schemaVersion) { + // TODO - implement + LOGGER.debug("Sniffing is not yet implemented, assuming {}", PhenopacketElement.PHENOPACKET); + return PhenopacketElement.PHENOPACKET; + } + + private static PhenopacketElement sniffJson(byte[] payload, PhenopacketSchemaVersion schemaVersion) { + // TODO - implement + // TODO - reconsider the sniffing workflow. In case of loosely defined formats like JSON and YAML, + // the fields can be in any order and we may not get enough information. + // Is it OK to throw upon sniffing failure or an Optional is enough? + LOGGER.debug("Sniffing is not yet implemented, assuming {}", PhenopacketElement.PHENOPACKET); + return PhenopacketElement.PHENOPACKET; + } + + private static PhenopacketElement sniffYaml(byte[] payload, PhenopacketSchemaVersion schemaVersion) { + // TODO - implement + LOGGER.debug("Sniffing is not yet implemented, assuming {}", PhenopacketElement.PHENOPACKET); + return PhenopacketElement.PHENOPACKET; + } +} diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffException.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffException.java index 3cc3a382..c33b276d 100644 --- a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffException.java +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffException.java @@ -3,7 +3,7 @@ /** * An exception thrown when sniffing of the top-level element of Phenopacket schema cannot be performed. */ -public class FormatSniffException extends Exception { +public class FormatSniffException extends ElementSniffException { public FormatSniffException() { super(); diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffer.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffer.java index 0d4984d6..9d6b7376 100644 --- a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffer.java +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffer.java @@ -1,5 +1,7 @@ package org.phenopackets.phenopackettools.util.format; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; + import java.io.IOException; import java.io.InputStream; @@ -48,21 +50,7 @@ public static PhenopacketFormat sniff(byte[] payload) throws FormatSniffExceptio * @throws FormatSniffException if there are not enough bytes available in the {@code input} of if the {@code input} does not * support {@link InputStream#mark(int)}. */ - public static PhenopacketFormat sniff(InputStream input) throws IOException, FormatSniffException { - if (input.markSupported()) { - byte[] buffer = new byte[BUFFER_SIZE]; - input.mark(BUFFER_SIZE); - int read = input.read(buffer); - if (read < BUFFER_SIZE) { - // We explode because there are not enough bytes available for format sniffing. - String message = read < 0 - ? "The stream must not be at the end" - : "Need at least %d bytes to sniff the format but only %d was available".formatted(BUFFER_SIZE, read); - throw new FormatSniffException(message); - } - input.reset(); - return sniff(buffer); - } else - throw new FormatSniffException("The provided InputStream does not support `mark()`"); + public static PhenopacketFormat sniff(InputStream input) throws IOException, SniffException { + return sniff(Util.getFirstBytesAndReset(input, BUFFER_SIZE)); } } diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/SniffException.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/SniffException.java new file mode 100644 index 00000000..fae81f92 --- /dev/null +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/SniffException.java @@ -0,0 +1,30 @@ +package org.phenopackets.phenopackettools.util.format; + +import org.phenopackets.phenopackettools.core.PhenopacketToolsException; + +/** + * A checked exception thrown in case of encountering some content sniffing issues. + */ +public class SniffException extends PhenopacketToolsException { + + public SniffException() { + super(); + } + + public SniffException(String message) { + super(message); + } + + public SniffException(String message, Throwable cause) { + super(message, cause); + } + + public SniffException(Throwable cause) { + super(cause); + } + + protected SniffException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } + +} diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/Util.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/Util.java index 603769b1..9deb1fcd 100644 --- a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/Util.java +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/Util.java @@ -31,4 +31,22 @@ static boolean looksLikeYaml(byte[] payload) { } } + static byte[] getFirstBytesAndReset(InputStream input, int nBytes) throws SniffException, IOException { + if (input.markSupported()) { + byte[] buffer = new byte[nBytes]; + input.mark(nBytes); + int read = input.read(buffer); + if (read < nBytes) { + // We explode because there are not enough bytes available for format sniffing. + String message = read < 0 + ? "The stream must not be at the end" + : "Need at least %d bytes to sniff the format but only %d was available".formatted(nBytes, read); + throw new SniffException(message); + } + input.reset(); + return buffer; + } else + throw new SniffException("The provided InputStream does not support `mark()`"); + + } } diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/package-info.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/package-info.java index fecd687b..9e9da40d 100644 --- a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/package-info.java +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/package-info.java @@ -1,4 +1,5 @@ /** - * Defines the supported phenopacket formats and utility methods for working with the formats. + * Defines utility methods for working with {@link org.phenopackets.phenopackettools.core.PhenopacketElement}s + * and {@link org.phenopackets.phenopackettools.core.PhenopacketFormat}s. */ package org.phenopackets.phenopackettools.util.format; \ No newline at end of file diff --git a/phenopacket-tools-util/src/test/java/org/phenopackets/phenopackettools/util/format/ElementSnifferTest.java b/phenopacket-tools-util/src/test/java/org/phenopackets/phenopackettools/util/format/ElementSnifferTest.java new file mode 100644 index 00000000..22a095cd --- /dev/null +++ b/phenopacket-tools-util/src/test/java/org/phenopackets/phenopackettools/util/format/ElementSnifferTest.java @@ -0,0 +1,11 @@ +package org.phenopackets.phenopackettools.util.format; + +import static org.junit.jupiter.api.Assertions.*; +import static org.hamcrest.MatcherAssert.*; +import static org.hamcrest.Matchers.*; + +public class ElementSnifferTest { + + // TODO - implement + +} \ No newline at end of file diff --git a/phenopacket-tools-util/src/test/java/org/phenopackets/phenopackettools/util/format/FormatSnifferTest.java b/phenopacket-tools-util/src/test/java/org/phenopackets/phenopackettools/util/format/FormatSnifferTest.java index 5a1200fb..3321e094 100644 --- a/phenopacket-tools-util/src/test/java/org/phenopackets/phenopackettools/util/format/FormatSnifferTest.java +++ b/phenopacket-tools-util/src/test/java/org/phenopackets/phenopackettools/util/format/FormatSnifferTest.java @@ -2,6 +2,7 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; import java.io.BufferedInputStream; import java.io.IOException; diff --git a/phenopacket-tools-validator-core/pom.xml b/phenopacket-tools-validator-core/pom.xml index 4e848013..97b8b750 100644 --- a/phenopacket-tools-validator-core/pom.xml +++ b/phenopacket-tools-validator-core/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.6 + 0.4.7 phenopacket-tools-validator-core @@ -16,6 +16,11 @@ Validator utilities for phenopackets + + org.phenopackets.phenopackettools + phenopacket-tools-core + ${project.parent.version} + org.phenopackets phenopacket-schema diff --git a/phenopacket-tools-validator-core/src/main/java/module-info.java b/phenopacket-tools-validator-core/src/main/java/module-info.java index 1abaea74..0c5824ee 100644 --- a/phenopacket-tools-validator-core/src/main/java/module-info.java +++ b/phenopacket-tools-validator-core/src/main/java/module-info.java @@ -1,3 +1,6 @@ +/** + * Defines the base APIs for phenopacket validation. + */ module org.phenopackets.phenopackettools.validator.core { exports org.phenopackets.phenopackettools.validator.core; @@ -6,6 +9,7 @@ exports org.phenopackets.phenopackettools.validator.core.phenotype; exports org.phenopackets.phenopackettools.validator.core.writer; + requires org.phenopackets.phenopackettools.core; requires org.monarchinitiative.phenol.core; requires org.phenopackets.schema; diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ConversionException.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ConversionException.java index d8f13cc4..c5d76cb8 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ConversionException.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ConversionException.java @@ -1,9 +1,19 @@ package org.phenopackets.phenopackettools.validator.core; +import org.phenopackets.phenopackettools.core.PhenopacketToolsException; + /** - * An {@link Exception} that is thrown in case the provided data has incorrect format. + * A {@link PhenopacketToolsException} that is thrown by {@link org.phenopackets.phenopackettools.validator.core.convert.PhenopacketConverter} + * in case the provided data has incorrect format. + *

+ * This can happen if e.g. the {@code payload} to + * {@link org.phenopackets.phenopackettools.validator.core.convert.PhenopacketConverter#toJson(byte[])} + * is not valid JSON. + *

+ * {@code ConversionException} implements {@link ValidationResult} so that it can be reported + * by a {@link PhenopacketValidator}. */ -public class ConversionException extends Exception implements ValidationResult { +public class ConversionException extends PhenopacketToolsException implements ValidationResult { private static final String VALIDATION_CATEGORY = "input"; diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/InputError.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/InputError.java deleted file mode 100644 index 0d93f1e4..00000000 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/InputError.java +++ /dev/null @@ -1,27 +0,0 @@ -package org.phenopackets.phenopackettools.validator.core; - -/** - * {@link ValidationResult} returned when encountering a format error. - * @param message message to present the user. - */ -record InputError(String message) implements ValidationResult { - - private static final String VALIDATION_CATEGORY = "input"; - - - @Override - public ValidatorInfo validatorInfo() { - return ValidatorInfo.inputValidator(); - } - - @Override - public ValidationLevel level() { - return ValidationLevel.ERROR; - } - - @Override - public String category() { - return VALIDATION_CATEGORY; - } - -} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/PhenopacketValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/PhenopacketValidator.java index dda03f96..6d38d083 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/PhenopacketValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/PhenopacketValidator.java @@ -5,21 +5,21 @@ import java.util.List; /** - * {@link PhenopacketValidator} validates a top-level component of Phenopacket schema. - *

- * The top-level component must be one of the following types: - *

    - *
  • {@link org.phenopackets.schema.v2.Phenopacket}
  • - *
  • {@link org.phenopackets.schema.v2.Family}
  • - *
  • {@link org.phenopackets.schema.v2.Cohort}
  • - *
+ * {@link PhenopacketValidator} represents a single step of the validation workflow. + * The validator checks a top-level component of Phenopacket Schema. * - * @param type of the top-level component. + * @param type of the top-level element of the Phenopacket Schema. */ public interface PhenopacketValidator { + /** + * @return description of the validator and the validation logic. + */ ValidatorInfo validatorInfo(); + /** + * Validate the {@code component} and summarize the results into a {@link List} of {@link ValidationResult}s. + */ List validate(T component); } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationLevel.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationLevel.java index 87fc622b..6958be59 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationLevel.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationLevel.java @@ -1,5 +1,8 @@ package org.phenopackets.phenopackettools.validator.core; +/** + * {@code ValidationLevel} represents a severity level for {@link ValidationResult}. + */ public enum ValidationLevel { /** diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationResult.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationResult.java index d4e77eb3..f8193cd6 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationResult.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationResult.java @@ -1,19 +1,31 @@ package org.phenopackets.phenopackettools.validator.core; +/** + * {@code ValidationResult} contains results of a single validation step performed by a {@link PhenopacketValidator}. + */ public interface ValidationResult { + /** + * Create a {@link ValidationLevel#WARNING} result from given data. + */ static ValidationResult warning(ValidatorInfo validatorInfo, String category, String message) { return of(validatorInfo, ValidationLevel.WARNING, category, message); } + /** + * Create a {@link ValidationLevel#ERROR} result from given data. + */ static ValidationResult error(ValidatorInfo validatorInfo, String category, String message) { return of(validatorInfo, ValidationLevel.ERROR, category, message); } + /** + * Create a {@code ValidationResult} from given data. + */ static ValidationResult of(ValidatorInfo validatorInfo, ValidationLevel level, String category, diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationResults.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationResults.java index debd46ac..da4aa633 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationResults.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationResults.java @@ -4,9 +4,12 @@ import java.util.List; /** - * {@link ValidationResults} contain validation results for one Phenopacket schema top-level element - * (phenopacket, family, or cohort). - * The results contain info regarding which validators were run and the issues found during the validation. + * {@code ValidationResults} contain validation results for one Phenopacket schema top-level element + * ({@link org.phenopackets.schema.v2.Phenopacket}, {@link org.phenopackets.schema.v2.Family}, + * or {@link org.phenopackets.schema.v2.Cohort}). + *

+ * The results contain info regarding which validators were run ({@link #validators()}) and the issues found during + * the validation ({@link #validationResults()}). */ public interface ValidationResults { @@ -42,6 +45,9 @@ default boolean isValid() { return validationResults().isEmpty(); } + /** + * A builder for creating {@link ValidationResults}. + */ class Builder { private final List validators = new ArrayList<>(); diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowDispatcher.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowDispatcher.java index 4591051a..e90b9baf 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowDispatcher.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowDispatcher.java @@ -5,21 +5,96 @@ import org.phenopackets.schema.v2.Phenopacket; /** - * {@link ValidationWorkflowDispatcher} exposes endpoints for validating top-level elements of Phenopacket schema + * {@link ValidationWorkflowDispatcher} exposes endpoints for validating top-level elements of Phenopacket Schema * and dispatches the data into the appropriate {@link ValidationWorkflowRunner}. */ public interface ValidationWorkflowDispatcher { + static ValidationWorkflowDispatcher of(ValidationWorkflowRunner phenopacketValidationRunner, + ValidationWorkflowRunner familyValidationRunner, + ValidationWorkflowRunner cohortValidationRunner) { + return new ValidationWorkflowDispatcherImpl(phenopacketValidationRunner, familyValidationRunner, cohortValidationRunner); + } + + /** + * Validate a phenopacket starting from a pile of bytes. + * + * @param bytes that can represent a phenopacket in either + * of {@link org.phenopackets.phenopackettools.core.PhenopacketFormat}s. + * @return validation results. + */ ValidationResults validatePhenopacket(byte[] bytes); + + /** + * Validate a phenopacket starting from a string. + * + * @param string that can represent a phenopacket either + * in {@link org.phenopackets.phenopackettools.core.PhenopacketFormat#JSON} + * or {@link org.phenopackets.phenopackettools.core.PhenopacketFormat#YAML} format. + * @return validation results. + */ ValidationResults validatePhenopacket(String string); + + /** + * Validate a phenopacket starting from a protobuf object. + * + * @param phenopacket to be validated. + * @return validation results. + */ ValidationResults validatePhenopacket(Phenopacket phenopacket); + /** + * Validate a family starting from a pile of bytes. + * + * @param bytes that can represent a family in either + * of {@link org.phenopackets.phenopackettools.core.PhenopacketFormat}s. + * @return validation results. + */ ValidationResults validateFamily(byte[] bytes); + + /** + * Validate a family starting from a string. + * + * @param string that can represent a family either + * in {@link org.phenopackets.phenopackettools.core.PhenopacketFormat#JSON} + * or {@link org.phenopackets.phenopackettools.core.PhenopacketFormat#YAML} format. + * @return validation results. + */ ValidationResults validateFamily(String string); + + /** + * Validate a family starting from a protobuf object. + * + * @param family to be validated. + * @return validation results. + */ ValidationResults validateFamily(Family family); + /** + * Validate a cohort starting from a pile of bytes. + * + * @param bytes that can represent a cohort in either + * of {@link org.phenopackets.phenopackettools.core.PhenopacketFormat}s. + * @return validation results. + */ ValidationResults validateCohort(byte[] bytes); + + /** + * Validate a cohort starting from a string. + * + * @param string that can represent a cohort either + * in {@link org.phenopackets.phenopackettools.core.PhenopacketFormat#JSON} + * or {@link org.phenopackets.phenopackettools.core.PhenopacketFormat#YAML} format. + * @return validation results. + */ ValidationResults validateCohort(String string); + + /** + * Validate a cohort starting from a protobuf object. + * + * @param cohort to be validated. + * @return validation results. + */ ValidationResults validateCohort(Cohort cohort); } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowDispatcherImpl.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowDispatcherImpl.java index 184399c8..c15a29fc 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowDispatcherImpl.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowDispatcherImpl.java @@ -10,24 +10,23 @@ * A {@link ValidationWorkflowDispatcher} implementation that uses 3 {@link ValidationWorkflowRunner} to validate * top-level elements of the Phenopacket schema. */ -public class ValidationWorkflowDispatcherImpl implements ValidationWorkflowDispatcher { +class ValidationWorkflowDispatcherImpl implements ValidationWorkflowDispatcher { private final ValidationWorkflowRunner phenopacketValidationRunner; private final ValidationWorkflowRunner familyValidationRunner; private final ValidationWorkflowRunner cohortValidationRunner; - - public ValidationWorkflowDispatcherImpl(ValidationWorkflowRunner phenopacketValidationRunner, - ValidationWorkflowRunner familyValidationRunner, - ValidationWorkflowRunner cohortValidationRunner) { + ValidationWorkflowDispatcherImpl(ValidationWorkflowRunner phenopacketValidationRunner, + ValidationWorkflowRunner familyValidationRunner, + ValidationWorkflowRunner cohortValidationRunner) { this.phenopacketValidationRunner = Objects.requireNonNull(phenopacketValidationRunner); this.familyValidationRunner = Objects.requireNonNull(familyValidationRunner); this.cohortValidationRunner = Objects.requireNonNull(cohortValidationRunner); } @Override - public ValidationResults validatePhenopacket(Phenopacket phenopacket) { - return phenopacketValidationRunner.validate(phenopacket); + public ValidationResults validatePhenopacket(byte[] bytes) { + return phenopacketValidationRunner.validate(bytes); } @Override @@ -36,13 +35,13 @@ public ValidationResults validatePhenopacket(String string) { } @Override - public ValidationResults validatePhenopacket(byte[] bytes) { - return phenopacketValidationRunner.validate(bytes); + public ValidationResults validatePhenopacket(Phenopacket phenopacket) { + return phenopacketValidationRunner.validate(phenopacket); } @Override - public ValidationResults validateFamily(Family family) { - return familyValidationRunner.validate(family); + public ValidationResults validateFamily(byte[] bytes) { + return familyValidationRunner.validate(bytes); } @Override @@ -51,13 +50,13 @@ public ValidationResults validateFamily(String string) { } @Override - public ValidationResults validateFamily(byte[] bytes) { - return familyValidationRunner.validate(bytes); + public ValidationResults validateFamily(Family family) { + return familyValidationRunner.validate(family); } @Override - public ValidationResults validateCohort(Cohort cohort) { - return cohortValidationRunner.validate(cohort); + public ValidationResults validateCohort(byte[] bytes) { + return cohortValidationRunner.validate(bytes); } @Override @@ -66,8 +65,8 @@ public ValidationResults validateCohort(String string) { } @Override - public ValidationResults validateCohort(byte[] bytes) { - return cohortValidationRunner.validate(bytes); + public ValidationResults validateCohort(Cohort cohort) { + return cohortValidationRunner.validate(cohort); } } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunner.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunner.java index 82a7448d..9671cbc7 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunner.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunner.java @@ -8,16 +8,26 @@ import java.util.List; /** - * {@link ValidationWorkflowRunner} validates selected top-level element of the Phenopacket schema. + * {@link ValidationWorkflowRunner} validates selected top-level element of the Phenopacket Schema. *

* The validation is performed on 3 input types: {@link #validate(MessageOrBuilder)} validates an existing top-level - * element, {@link #validate(String)} validates input formatted in JSON format, - * and {@link #validate(byte[])} validates bytes that can be either in JSON or Protobuf binary exchange format. + * element, {@link #validate(String)} validates input formatted either + * in {@link org.phenopackets.phenopackettools.core.PhenopacketFormat#JSON} + * or {@link org.phenopackets.phenopackettools.core.PhenopacketFormat#YAML}, + * and {@link #validate(byte[])} validates a pile of bytes that can be in either + * of the {@link org.phenopackets.phenopackettools.core.PhenopacketFormat}s. *

* Validator provides a list with {@link ValidatorInfo} that describes validations * done by the {@link ValidationWorkflowRunner}. + *

+ * The validation is generally done in 2 phases, syntax and semantic phases. + * The syntax phase checks if the building blocks meet the requirements independently + * (e.g. all required fields are defined for a {@link org.phenopackets.schema.v2.core.Resource}). + * The semantic validation checks for presence of errors in the context of the entire top-level element + * (e.g. a phenopacket contains an HPO term but an HPO {@link org.phenopackets.schema.v2.core.Resource} is missing + * in {@link org.phenopackets.schema.v2.core.MetaData}). * - * @param type of the top-level element of the Phenopacket schema. + * @param type of the top-level element of the Phenopacket Schema. */ public interface ValidationWorkflowRunner { @@ -27,10 +37,30 @@ public interface ValidationWorkflowRunner { */ List validators(); + /** + * Validate a top-level element starting from a pile of bytes. + * + * @param payload top-level element in one of the {@link org.phenopackets.phenopackettools.core.PhenopacketFormat}s. + * @return the validation results. + */ ValidationResults validate(byte[] payload); - ValidationResults validate(String json); + /** + * Validate a top-level element starting from a string. + * + * @param value top-level element in either {@link org.phenopackets.phenopackettools.core.PhenopacketFormat#JSON} + * or {@link org.phenopackets.phenopackettools.core.PhenopacketFormat#YAML}. + * @return the validation results. + */ + // TODO - include YAML validation. + ValidationResults validate(String value); + /** + * Validate a top-level element starting from a protobuf item. + * + * @param item the top-level element as protobuf item. + * @return the validation results. + */ ValidationResults validate(T item); default ValidationResults validate(InputStream is) throws IOException { diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunnerBuilder.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunnerBuilder.java new file mode 100644 index 00000000..31d6263f --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunnerBuilder.java @@ -0,0 +1,72 @@ +package org.phenopackets.phenopackettools.validator.core; + +import com.google.protobuf.MessageOrBuilder; + +import java.util.ArrayList; +import java.util.List; + +/** + * The base builder for constructing {@link ValidationWorkflowRunner}. The builder keeps track of + * the syntax and semantic validators. + * + * @param type of the top-level element of the Phenopacket Schema. + */ +public abstract class ValidationWorkflowRunnerBuilder { + + protected final List> syntaxValidators = new ArrayList<>(); + protected final List> semanticValidators = new ArrayList<>(); + + /** + * Add a syntax validator. + * + * @param syntaxValidator the syntax validator + * @return the builder + */ + public ValidationWorkflowRunnerBuilder addSyntaxValidator(PhenopacketValidator syntaxValidator) { + this.syntaxValidators.add(syntaxValidator); + return this; + } + + /** + * Add syntax validators in bulk. + * + * @param validators the syntax validators + * @return the builder + */ + public ValidationWorkflowRunnerBuilder addAllSyntaxValidators(List> validators) { + // A slightly more efficient implementation comparing to the default method on the interface. + this.syntaxValidators.addAll(validators); + return this; + } + + /** + * Add a semantic validator. + * + * @param semanticValidator the semantic validator + * @return the builder + */ + public ValidationWorkflowRunnerBuilder addSemanticValidator(PhenopacketValidator semanticValidator) { + this.semanticValidators.add(semanticValidator); + return this; + } + + /** + * Add semantic validators in bulk. + * + * @param validators the semantic validators + * @return the builder + */ + public ValidationWorkflowRunnerBuilder addAllSemanticValidators(List> validators) { + // A slightly more efficient implementation comparing to the default method on the interface. + this.semanticValidators.addAll(validators); + return this; + } + + /** + * Finish building of the {@link ValidationWorkflowRunner}. + * + * @return the runner + */ + public abstract ValidationWorkflowRunner build(); + +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfo.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfo.java index f0d199d1..3714b4ae 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfo.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfo.java @@ -1,22 +1,12 @@ package org.phenopackets.phenopackettools.validator.core; /** - * Information regarding validator. + * A description of a {@link PhenopacketValidator}. */ public interface ValidatorInfo { - static ValidatorInfo genericJsonSchema() { - return ValidatorInfoDefault.GENERIC; - } - - /** - * This class implements additional validation of a phenopacket that is intended to be used - * for HPO rare disease phenotyping. By assumption, the phenopacket will have been first - * checked against the {@link ValidatorInfo#genericJsonSchema()} specification. This class performs validation with the - * file {@code hpo-rare-disease-schema.json}. - */ - static ValidatorInfo rareDiseaseValidation() { - return ValidatorInfoDefault.RARE_DISEASE_VALIDATOR; + static ValidatorInfo baseSyntaxValidation() { + return ValidatorInfoDefault.BASE; } /** diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfoDefault.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfoDefault.java index 5ec46e0c..ce2d44e9 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfoDefault.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfoDefault.java @@ -3,9 +3,8 @@ record ValidatorInfoDefault(String validatorId, String validatorName, String description) implements ValidatorInfo { - // TODO - add descriptions - static final ValidatorInfoDefault GENERIC = new ValidatorInfoDefault("GENERIC", "Validation of a generic Phenopacket", ""); - static final ValidatorInfoDefault RARE_DISEASE_VALIDATOR = new ValidatorInfoDefault("RARE_DISEASE_VALIDATOR", "Validation of rare disease Phenopacket constraints", ""); - static final ValidatorInfoDefault INPUT_VALIDATOR = new ValidatorInfoDefault("Input", "Input of phenopacket data", "Validation of data format"); + + static final ValidatorInfoDefault BASE = new ValidatorInfoDefault("BaseValidator", "Base syntax validator", "The base syntax validation of a phenopacket, family, or cohort"); + static final ValidatorInfoDefault INPUT_VALIDATOR = new ValidatorInfoDefault("InputValidator", "Data format validator", "The validator for checking data format issues (e.g. presence of a required field in JSON document)"); } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/convert/BaseConverter.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/convert/BaseConverter.java index 8b5ffc80..5713bb52 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/convert/BaseConverter.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/convert/BaseConverter.java @@ -4,18 +4,19 @@ import com.google.protobuf.MessageOrBuilder; import com.google.protobuf.util.JsonFormat; import org.phenopackets.phenopackettools.validator.core.PhenopacketFormatConverter; +import org.phenopackets.phenopackettools.validator.core.except.PhenopacketValidatorRuntimeException; abstract class BaseConverter implements PhenopacketFormatConverter { - protected final JsonFormat.Parser parser = JsonFormat.parser(); - protected final JsonFormat.Printer printer = JsonFormat.printer(); + protected static final JsonFormat.Parser parser = JsonFormat.parser(); + protected static final JsonFormat.Printer printer = JsonFormat.printer(); @Override public String toJson(T item) { try { return printer.print(item); } catch (InvalidProtocolBufferException e) { - throw new RuntimeException(e); + throw new PhenopacketValidatorRuntimeException(e); } } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/convert/package-info.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/convert/package-info.java new file mode 100644 index 00000000..ec95fd57 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/convert/package-info.java @@ -0,0 +1,5 @@ +/** + * A module-private package with {@link org.phenopackets.phenopackettools.validator.core.PhenopacketFormatConverter} + * implementations. + */ +package org.phenopackets.phenopackettools.validator.core.convert; \ No newline at end of file diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/except/PhenopacketValidatorException.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/except/PhenopacketValidatorException.java index c55bc234..a4ea709a 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/except/PhenopacketValidatorException.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/except/PhenopacketValidatorException.java @@ -1,6 +1,8 @@ package org.phenopackets.phenopackettools.validator.core.except; -public class PhenopacketValidatorException extends Exception { +import org.phenopackets.phenopackettools.core.PhenopacketToolsException; + +public class PhenopacketValidatorException extends PhenopacketToolsException { public PhenopacketValidatorException() { super(); } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/except/PhenopacketValidatorRuntimeException.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/except/PhenopacketValidatorRuntimeException.java index 663d3e15..30f728d0 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/except/PhenopacketValidatorRuntimeException.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/except/PhenopacketValidatorRuntimeException.java @@ -1,6 +1,8 @@ package org.phenopackets.phenopackettools.validator.core.except; -public class PhenopacketValidatorRuntimeException extends RuntimeException { +import org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException; + +public class PhenopacketValidatorRuntimeException extends PhenopacketToolsRuntimeException { public PhenopacketValidatorRuntimeException() { super(); diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/except/package-info.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/except/package-info.java new file mode 100644 index 00000000..bbd53a90 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/except/package-info.java @@ -0,0 +1,4 @@ +/** + * A package with exceptions that can be thrown by the validation code. + */ +package org.phenopackets.phenopackettools.validator.core.except; \ No newline at end of file diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/BaseMetaDataValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/BaseMetaDataValidator.java index 4aee2f6d..8a053cec 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/BaseMetaDataValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/BaseMetaDataValidator.java @@ -18,8 +18,8 @@ abstract class BaseMetaDataValidator implements Phen private static final ValidatorInfo VALIDATOR_INFO = ValidatorInfo.of( "MetaDataValidator", - "MetaDataValidator for Phenopacket, Family, and Cohort", - "Validate that the MetaData section includes information about all ontologies used"); + "MetaData validator", + "Validate that the MetaData section describes all used ontologies"); @Override public ValidatorInfo validatorInfo() { diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/package-info.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/package-info.java new file mode 100644 index 00000000..c82ea30b --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/package-info.java @@ -0,0 +1,41 @@ +/** + * The package provides APIs and default implementations of phenopacket validation. + *

+ *

Actors

+ * This section describes the actors of the validation workflow (the classes for representing behavior + * for "doing stuff"), starting from the basic elements. + *

+ * {@link org.phenopackets.phenopackettools.validator.core.PhenopacketFormatConverters} is a static factory class + * for providing {@link org.phenopackets.phenopackettools.validator.core.PhenopacketFormatConverter}s to convert + * the top-level elements of Phenopacket Schema between the supported + * {@link org.phenopackets.phenopackettools.core.PhenopacketFormat}s. + *

+ * {@link org.phenopackets.phenopackettools.validator.core.PhenopacketValidator} represents a single step + * of the validation workflow. + *

+ * {@link org.phenopackets.phenopackettools.validator.core.ValidationWorkflowRunner} applies + * the {@link org.phenopackets.phenopackettools.validator.core.PhenopacketValidator}s of the validation workflow in + * the correct order, ensuring the base validation is always run as first. + * The {@link org.phenopackets.phenopackettools.validator.core.ValidationWorkflowRunner} validates + * a top-level element. + *

+ * The {@link org.phenopackets.phenopackettools.validator.core.ValidationWorkflowDispatcher} exposes methods + * for validating all top-level elements of the Phenopacket Schema. + * + *

Value objects

+ * The package includes stateful objects with no complex behavior starting from the most complex objects. + *

+ * The {@link org.phenopackets.phenopackettools.validator.core.ValidationWorkflowRunner} + * and {@link org.phenopackets.phenopackettools.validator.core.ValidationWorkflowDispatcher} return + * {@link org.phenopackets.phenopackettools.validator.core.ValidationResults}, a container with results + * of the validation workflow. + *

+ * {@link org.phenopackets.phenopackettools.validator.core.ValidationResult} contains results of + * a single validation step. + *

+ * {@link org.phenopackets.phenopackettools.validator.core.ValidatorInfo} describes + * the {@link org.phenopackets.phenopackettools.validator.core.PhenopacketValidator}. + *

+ * {@link org.phenopackets.phenopackettools.validator.core.ValidationLevel} + */ +package org.phenopackets.phenopackettools.validator.core; \ No newline at end of file diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/BaseHpoPhenotypeValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/BaseHpoPhenotypeValidator.java deleted file mode 100644 index dad25e54..00000000 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/BaseHpoPhenotypeValidator.java +++ /dev/null @@ -1,68 +0,0 @@ -package org.phenopackets.phenopackettools.validator.core.phenotype; - -import com.google.protobuf.MessageOrBuilder; -import org.monarchinitiative.phenol.base.PhenolRuntimeException; -import org.monarchinitiative.phenol.ontology.data.Ontology; -import org.monarchinitiative.phenol.ontology.data.TermId; -import org.phenopackets.phenopackettools.validator.core.*; -import org.phenopackets.schema.v2.core.PhenotypicFeature; - -import java.util.Objects; -import java.util.stream.Stream; - -abstract class BaseHpoPhenotypeValidator implements PhenopacketValidator { - - private static final ValidatorInfo VALIDATOR_INFO = ValidatorInfo.of( - "HpoPhenotypeValidator", - "HPO phenotypic feature validator", - "Validate that HPO terms are well formatted, present, and non-obsolete based on the provided HPO"); - private static final String INVALID_TERM_ID = "Invalid TermId"; - private static final String OBSOLETED_TERM_ID = "Obsoleted TermId"; - - private final Ontology hpo; - private final String hpoVersion; - - public BaseHpoPhenotypeValidator(Ontology hpo) { - this.hpo = Objects.requireNonNull(hpo); - this.hpoVersion = this.hpo.getMetaInfo().getOrDefault("data-version", "HPO"); - } - - @Override - public ValidatorInfo validatorInfo() { - return VALIDATOR_INFO; - } - - protected Stream checkPhenotypeFeature(String individualId, PhenotypicFeature feature) { - TermId termId; - try { - termId = TermId.of(feature.getType().getId()); - } catch (PhenolRuntimeException e) { - // Should not really happen if JsonSchema validators are run upstream, but let's stay safe. - String msg = "The %s found in '%s' is not a valid value".formatted(feature.getType().getId(), individualId); - return Stream.of( - ValidationResult.error(VALIDATOR_INFO, INVALID_TERM_ID, msg) - ); - } - - // Check if the HPO contains the term. - if (!hpo.containsTerm(termId)) { - String msg = "%s in '%s' not found in %s".formatted(termId.getValue(), individualId, hpoVersion); - return Stream.of( - ValidationResult.error(VALIDATOR_INFO, INVALID_TERM_ID, msg) - ); - } - - // Check if the `termId` is a primary ID. // If not, this is a warning. - TermId primaryId = hpo.getPrimaryTermId(termId); - if (!primaryId.equals(termId)) { - String msg = "Using obsoleted id (%s) instead of current primary id (%s) in '%s'" - .formatted(termId.getValue(), primaryId.getValue(), individualId); - return Stream.of( - ValidationResult.warning(VALIDATOR_INFO, OBSOLETED_TERM_ID, msg) - ); - } - - return Stream.empty(); - } - -} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoOrganSystems.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoOrganSystems.java new file mode 100644 index 00000000..22554677 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoOrganSystems.java @@ -0,0 +1,51 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype; + +import org.monarchinitiative.phenol.ontology.data.TermId; + +/** + * A class with constants that correspond to the upper-level HPO organ-system phenotypic abnormalities. + *

+ * The constants can be used together with the + * {@link org.phenopackets.phenopackettools.validator.core.phenotype.HpoPhenotypeValidators.OrganSystem} validators, + * which enforce that a phenopacket contains at least one term from a set of organ systems (observed or excluded). + *

+ * Note that users can also use any HPO term in this way -- the validator will enforce that the phenopacket + * has an HPO term that descends from it, but the most common use cases are these organ-level terms. + * + *

+ * Ontology hpo = ...; // get the ontology
+ * var requiredOrganSystems = Set.of(BLOOD, CARDIOVASCULAR, SKELETAL);
+ * var validator = HpoPhenotypeValidators.OrganSystem.phenopacketHpoOrganSystemValidator(hpo, requiredOrganSystems);
+ * 
+ */ +public class HpoOrganSystems { + + public static final TermId ABNORMAL_CELLULAR = TermId.of("HP:0025354"); + public static final TermId BLOOD = TermId.of("HP:0001871"); + public static final TermId CONNECTIVE_TISSUE = TermId.of("HP:0003549"); + public static final TermId HEAD_AND_NECK = TermId.of("HP:0000152"); + public static final TermId LIMBS = TermId.of("HP:0040064"); + public static final TermId METABOLISM = TermId.of("HP:0001939"); + public static final TermId PRENATAL = TermId.of("HP:0001197"); + public static final TermId BREAST = TermId.of("HP:0000769"); + public static final TermId CARDIOVASCULAR = TermId.of("HP:0001626"); + public static final TermId DIGESTIVE = TermId.of("HP:0025031"); + public static final TermId EAR = TermId.of("HP:0000598"); + public static final TermId ENDOCRINE = TermId.of("HP:0000818"); + public static final TermId EYE = TermId.of("HP:0000478"); + public static final TermId GENITOURINARY = TermId.of("HP:0000119"); + public static final TermId IMMUNOLOGY = TermId.of("HP:0002715"); + public static final TermId INTEGUMENT = TermId.of("HP:0001574"); + public static final TermId MUSCLE = TermId.of("HP:0003011"); + public static final TermId NERVOUS_SYSTEM = TermId.of("HP:0000707"); + public static final TermId RESPIRATORY = TermId.of("HP:0002086"); + public static final TermId SKELETAL = TermId.of("HP:0000924"); + public static final TermId THORACIC_CAVITY = TermId.of("HP:0045027"); + public static final TermId VOICE = TermId.of("HP:0001608"); + public static final TermId CONSTITUTIONAL = TermId.of("HP:0025142"); + public static final TermId GROWTH = TermId.of("HP:0001507"); + public static final TermId NEOPLASM = TermId.of("HP:0002664"); + + private HpoOrganSystems() { + } +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoPhenotypeValidators.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoPhenotypeValidators.java index 1dbd6f54..07c56347 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoPhenotypeValidators.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoPhenotypeValidators.java @@ -1,9 +1,21 @@ package org.phenopackets.phenopackettools.validator.core.phenotype; import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.TermId; import org.phenopackets.phenopackettools.validator.core.PhenopacketValidator; +import org.phenopackets.phenopackettools.validator.core.phenotype.ancestry.CohortHpoAncestryValidator; +import org.phenopackets.phenopackettools.validator.core.phenotype.ancestry.FamilyHpoAncestryValidator; +import org.phenopackets.phenopackettools.validator.core.phenotype.ancestry.PhenopacketHpoAncestryValidator; +import org.phenopackets.phenopackettools.validator.core.phenotype.orgsys.CohortHpoOrganSystemValidator; +import org.phenopackets.phenopackettools.validator.core.phenotype.orgsys.FamilyHpoOrganSystemValidator; +import org.phenopackets.phenopackettools.validator.core.phenotype.orgsys.PhenopacketHpoOrganSystemValidator; +import org.phenopackets.phenopackettools.validator.core.phenotype.primary.CohortHpoPhenotypeValidator; +import org.phenopackets.phenopackettools.validator.core.phenotype.primary.FamilyHpoPhenotypeValidator; +import org.phenopackets.phenopackettools.validator.core.phenotype.primary.PhenopacketHpoPhenotypeValidator; import org.phenopackets.schema.v2.*; +import java.util.Collection; + /** * Static factory class for getting {@link PhenopacketValidator}s for top-level Phenopacket schema components. */ @@ -17,27 +29,179 @@ private HpoPhenotypeValidators() { * Get {@link PhenopacketValidator} to validate {@link Phenopacket} using provided {@link Ontology}. * * @param hpo HPO ontology + * @deprecated use {@link Primary#phenopacketHpoPhenotypeValidator(Ontology)} instead */ + // TODO - remove prior v1 + @Deprecated(forRemoval = true) public static PhenopacketValidator phenopacketHpoPhenotypeValidator(Ontology hpo) { - return new PhenopacketHpoPhenotypeValidator(hpo); + return Primary.phenopacketHpoPhenotypeValidator(hpo); } /** - * Get {@link PhenopacketValidator} to validate {@link Family} using provided {@link Ontology}. + * Get {@link PhenopacketValidator} for validate {@link Family} using provided {@link Ontology}. * * @param hpo HPO ontology + * @deprecated use {@link Primary#familyHpoPhenotypeValidator(Ontology)} instead */ + // TODO - remove prior v1 + @Deprecated(forRemoval = true) public static PhenopacketValidator familyHpoPhenotypeValidator(Ontology hpo) { - return new FamilyHpoPhenotypeValidator(hpo); + return Primary.familyHpoPhenotypeValidator(hpo); } /** - * Get {@link PhenopacketValidator} to validate {@link Cohort} using provided {@link Ontology}. + * Get {@link PhenopacketValidator} for performing primary validation {@link Cohort} using provided {@link Ontology}, + * as described in {@link org.phenopackets.phenopackettools.validator.core.phenotype.primary.AbstractHpoPhenotypeValidator}. * * @param hpo HPO ontology + * @deprecated use {@link Primary#cohortHpoPhenotypeValidator(Ontology)} instead */ + // TODO - remove prior v1 + @Deprecated(forRemoval = true) public static PhenopacketValidator cohortHpoPhenotypeValidator(Ontology hpo) { - return new CohortHpoPhenotypeValidator(hpo); + return Primary.cohortHpoPhenotypeValidator(hpo); + } + + /** + * A static factory class for providing {@link org.phenopackets.phenopackettools.validator.core.PhenopacketValidator}s + * that check if HPO terms of the Phenopacket schema elements are present in + * a given {@link org.monarchinitiative.phenol.ontology.data.Ontology} and if the terms are non-obsolete. + */ + public static class Primary { + /** + * Get {@link PhenopacketValidator} to validate {@link Phenopacket} using provided {@link Ontology}. + * + * @param hpo HPO ontology + */ + public static PhenopacketValidator phenopacketHpoPhenotypeValidator(Ontology hpo) { + return new PhenopacketHpoPhenotypeValidator(hpo); + } + + /** + * Get {@link PhenopacketValidator} for validate {@link Family} using provided {@link Ontology}. + * + * @param hpo HPO ontology + */ + public static PhenopacketValidator familyHpoPhenotypeValidator(Ontology hpo) { + return new FamilyHpoPhenotypeValidator(hpo); + } + + /** + * Get {@link PhenopacketValidator} for performing primary validation {@link Cohort} using provided {@link Ontology}, + * as described in {@link org.phenopackets.phenopackettools.validator.core.phenotype.primary.AbstractHpoPhenotypeValidator}. + * + * @param hpo HPO ontology + */ + public static PhenopacketValidator cohortHpoPhenotypeValidator(Ontology hpo) { + return new CohortHpoPhenotypeValidator(hpo); + } + } + + /** + * A static factory class for providing validators for pointing out violations of the annotation propagation rule. + *

+ * The validator checks observed and excluded phenotype terms. The observed terms are checked for a presence of + * an observed or an excluded ancestor, and a presence of such ancestor is pointed out as an error. + * For instance, Abnormality of finger or "NOT" Abnormality of finger must not be present + * in a patient annotated by Arachnodactyly. The most specific term (Arachnodactyly) must be used. + *

+ * For the excluded terms, the validator checks for presence of an excluded children. Here, the least specific term + * must be used. For instance, "NOT" Arachnodactyly must not be present in a patient annotated + * with "NOT" Abnormality of finger. Only the "NOT" Abnormality of finger must be used. + */ + public static class Ancestry { + + private Ancestry() { + } + + /** + * Get {@link PhenopacketValidator} to validate ancestry {@link Phenopacket} using provided {@link Ontology}. + * + * @param hpo HPO ontology + */ + public static PhenopacketValidator phenopacketHpoAncestryValidator(Ontology hpo) { + return new PhenopacketHpoAncestryValidator(hpo); + } + + /** + * Get {@link PhenopacketValidator} to validate ancestry {@link Family} using provided {@link Ontology}. + * + * @param hpo HPO ontology + */ + public static PhenopacketValidator familyHpoAncestryValidator(Ontology hpo) { + return new FamilyHpoAncestryValidator(hpo); + } + + /** + * Get {@link PhenopacketValidator} to validate ancestry {@link Cohort} using provided {@link Ontology}. + * + * @param hpo HPO ontology + */ + public static PhenopacketValidator cohortHpoAncestryValidator(Ontology hpo) { + return new CohortHpoAncestryValidator(hpo); + } + } + + /** + * A static factory class for providing validators for checking annotation of organ systems. + *

+ * The validators check if each phenopacket or family/cohort member have annotation + * for an organ system represented by a top-level HPO term + * (e.g. Abnormality of limbs). + * The annotation comprises either one or more observed descendants + * (e.g. Arachnodactyly), + * or excluded top-level HPO term + * (NOT Abnormality of limbs). + *

+ */ + public static class OrganSystem { + private OrganSystem() { + } + + /** + * Get {@link PhenopacketValidator} to validate annotation of organ systems in a {@link Phenopacket} + * using provided {@link Ontology} and a collection of organ system {@link TermId}s. + *

+ * NOTE: the organ system {@link TermId} that is absent from the {@link Ontology} is disregarded + * and not used for validation. + * + * @param hpo HPO ontology + * @param organSystemTermIds a collection of HPO {@link TermId}s corresponding to organ systems. + */ + public static PhenopacketValidator phenopacketHpoOrganSystemValidator(Ontology hpo, + Collection organSystemTermIds) { + return new PhenopacketHpoOrganSystemValidator(hpo, organSystemTermIds); + } + + /** + * Get {@link PhenopacketValidator} to validate annotation of organ systems in a {@link Family} + * using provided {@link Ontology} and a collection of organ system {@link TermId}s. + *

+ * NOTE: the organ system {@link TermId} that is absent from the {@link Ontology} is disregarded + * and not used for validation. + * + * @param hpo HPO ontology + * @param organSystemTermIds a collection of HPO {@link TermId}s corresponding to organ systems. + */ + public static PhenopacketValidator familyHpoOrganSystemValidator(Ontology hpo, + Collection organSystemTermIds) { + return new FamilyHpoOrganSystemValidator(hpo, organSystemTermIds); + } + + /** + * Get {@link PhenopacketValidator} to validate annotation of organ systems in a {@link Cohort} + * using provided {@link Ontology} and a collection of organ system {@link TermId}s. + *

+ * NOTE: the organ system {@link TermId} that is absent from the {@link Ontology} is disregarded + * and not used for validation. + * + * @param hpo HPO ontology + * @param organSystemTermIds a collection of HPO {@link TermId}s corresponding to organ systems. + */ + public static PhenopacketValidator cohortHpoOrganSystemValidator(Ontology hpo, + Collection organSystemTermIds) { + return new CohortHpoOrganSystemValidator(hpo, organSystemTermIds); + } } } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/AbstractHpoAncestryValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/AbstractHpoAncestryValidator.java new file mode 100644 index 00000000..4da56932 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/AbstractHpoAncestryValidator.java @@ -0,0 +1,133 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.ancestry; + +import com.google.protobuf.MessageOrBuilder; +import org.monarchinitiative.phenol.ontology.algo.OntologyAlgorithm; +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.Term; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.phenopackettools.validator.core.ValidationResult; +import org.phenopackets.phenopackettools.validator.core.ValidatorInfo; +import org.phenopackets.phenopackettools.validator.core.phenotype.base.BaseHpoValidator; +import org.phenopackets.phenopackettools.validator.core.phenotype.util.PhenotypicFeaturesByExclusionStatus; +import org.phenopackets.phenopackettools.validator.core.phenotype.util.Util; +import org.phenopackets.schema.v2.PhenopacketOrBuilder; +import org.phenopackets.schema.v2.core.PhenotypicFeature; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; +import java.util.stream.Stream; + +/** + * A class for pointing out violations of the annotation propagation rule. + *

+ * The validator checks observed and excluded phenotype terms. The observed terms are checked for a presence of + * an observed or an excluded ancestor, and a presence of such ancestor is pointed out as an error. + * For instance, Abnormality of finger or "NOT" Abnormality of finger must not be present + * in a patient annotated by Arachnodactyly. The most specific term (Arachnodactyly) must be used. + *

+ * For the excluded terms, the validator checks for presence of an excluded children. Here, the least specific term + * must be used. For instance, "NOT" Arachnodactyly must not be present in a patient annotated + * with "NOT" Abnormality of finger. Only the "NOT" Abnormality of finger must be used. + */ +public abstract class AbstractHpoAncestryValidator extends BaseHpoValidator { + + private static final Logger LOGGER = LoggerFactory.getLogger(AbstractHpoAncestryValidator.class); + + private static final ValidatorInfo VALIDATOR_INFO = ValidatorInfo.of( + "HpoAncestryValidator", + "HPO ancestry phenotypic feature validator", + "Validate that phenopacket does not contain an HPO term and its ancestor based on the provided HPO"); + private static final String APR_VIOLATION = "Violation of the annotation propagation rule"; + private static final String UNKNOWN = "UNKNOWN_NAME"; + + AbstractHpoAncestryValidator(Ontology hpo) { + super(hpo); + } + + @Override + public ValidatorInfo validatorInfo() { + return VALIDATOR_INFO; + } + + @Override + public List validate(T component) { + return extractPhenopackets(component) + .flatMap(pp -> validatePhenopacketPhenotypicFeatures(pp.getId(), pp.getPhenotypicFeaturesList())) + .toList(); + } + + protected abstract Stream extractPhenopackets(T message); + + private Stream validatePhenopacketPhenotypicFeatures(String id, List phenotypicFeatures) { + PhenotypicFeaturesByExclusionStatus featuresByExclusion = Util.partitionByExclusionStatus(phenotypicFeatures); + + Stream.Builder results = Stream.builder(); + + // Check that the component does not contain both observed term and its ancestor. + + for (TermId observed : featuresByExclusion.observedPhenotypicFeatures()) { + if (isObsoleteTermId(observed)) { + LOGGER.debug("Ignoring unknown/obsolete term ID {}", observed.getValue()); + continue; + } + + for (TermId ancestor : OntologyAlgorithm.getAncestorTerms(hpo, observed, false)) { + if (featuresByExclusion.observedPhenotypicFeatures().contains(ancestor)) + results.add(constructResultForAnObservedTerm(id, observed, ancestor, false)); + if (featuresByExclusion.excludedPhenotypicFeatures().contains(ancestor)) + results.add(constructResultForAnObservedTerm(id, observed, ancestor, true)); + } + } + + // Check that the component does not have negated descendant + for (TermId excluded : featuresByExclusion.excludedPhenotypicFeatures()) { + if (isObsoleteTermId(excluded)) { + LOGGER.debug("Ignoring unknown/obsolete term ID {}", excluded.getValue()); + continue; + } + + for (TermId child : OntologyAlgorithm.getDescendents(hpo, excluded)) { + if (child.equals(excluded)) + // skip the parent term + continue; + if (featuresByExclusion.excludedPhenotypicFeatures().contains(child)) + results.add(constructResultForAnExcludedTerm(id, excluded, child)); + } + } + + return results.build(); + } + + private boolean isObsoleteTermId(TermId termId) { + return hpo.getObsoleteTermIds().contains(termId); + } + + private ValidationResult constructResultForAnObservedTerm(String id, TermId observedId, TermId ancestorId, boolean ancestorIsExcluded) { + Term observedTerm = hpo.getTermMap().get(observedId); + String observedTermName = observedTerm == null ? UNKNOWN : observedTerm.getName(); + Term ancestorTerm = hpo.getTermMap().get(ancestorId); + String ancestorTermName = ancestorTerm == null ? UNKNOWN : ancestorTerm.getName(); + String message; + if (ancestorIsExcluded) + message = "Phenotypic features of %s must not contain both an observed term (%s, %s) and an excluded ancestor (%s, %s)".formatted( + id, observedTermName, observedId.getValue(), ancestorTermName, ancestorId.getValue()); + else + message = "Phenotypic features of %s must not contain both an observed term (%s, %s) and an observed ancestor (%s, %s)".formatted( + id, observedTermName, observedId.getValue(), ancestorTermName, ancestorId.getValue()); + + return ValidationResult.error(VALIDATOR_INFO, APR_VIOLATION, message); + } + + private ValidationResult constructResultForAnExcludedTerm(String id, TermId excluded, TermId child) { + Term excludedTerm = hpo.getTermMap().get(excluded); + String excludedTermName = excludedTerm == null ? UNKNOWN : excludedTerm.getName(); + Term childTerm = hpo.getTermMap().get(child); + String childTermName = childTerm == null ? UNKNOWN : childTerm.getName(); + String message = "Phenotypic features of %s must not contain both an excluded term (%s, %s) and an excluded child (%s, %s)".formatted( + id, excludedTermName, excluded.getValue(), childTermName, child.getValue()); + + return ValidationResult.error(VALIDATOR_INFO, APR_VIOLATION, message); + } + +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/CohortHpoAncestryValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/CohortHpoAncestryValidator.java new file mode 100644 index 00000000..c68a3517 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/CohortHpoAncestryValidator.java @@ -0,0 +1,19 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.ancestry; + +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.phenopackets.schema.v2.CohortOrBuilder; +import org.phenopackets.schema.v2.PhenopacketOrBuilder; + +import java.util.stream.Stream; + +public class CohortHpoAncestryValidator extends AbstractHpoAncestryValidator { + + public CohortHpoAncestryValidator(Ontology hpo) { + super(hpo); + } + + @Override + protected Stream extractPhenopackets(CohortOrBuilder message) { + return message.getMembersList().stream(); + } +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/FamilyHpoAncestryValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/FamilyHpoAncestryValidator.java new file mode 100644 index 00000000..66ba7e88 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/FamilyHpoAncestryValidator.java @@ -0,0 +1,26 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.ancestry; + +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.phenopackets.schema.v2.FamilyOrBuilder; +import org.phenopackets.schema.v2.Phenopacket; +import org.phenopackets.schema.v2.PhenopacketOrBuilder; + +import java.util.stream.Stream; + +public class FamilyHpoAncestryValidator extends AbstractHpoAncestryValidator { + + public FamilyHpoAncestryValidator(Ontology hpo) { + super(hpo); + } + + @Override + protected Stream extractPhenopackets(FamilyOrBuilder message) { + Stream.Builder builder = Stream.builder(); + builder.accept(message.getProband()); + + for (Phenopacket relative : message.getRelativesList()) + builder.add(relative); + + return builder.build(); + } +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/PhenopacketHpoAncestryValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/PhenopacketHpoAncestryValidator.java new file mode 100644 index 00000000..b23ca49a --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/PhenopacketHpoAncestryValidator.java @@ -0,0 +1,19 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.ancestry; + +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.phenopackets.schema.v2.PhenopacketOrBuilder; + +import java.util.stream.Stream; + +public class PhenopacketHpoAncestryValidator extends AbstractHpoAncestryValidator { + + public PhenopacketHpoAncestryValidator(Ontology hpo) { + super(hpo); + } + + @Override + protected Stream extractPhenopackets(PhenopacketOrBuilder message) { + return Stream.of(message); + } + +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/package-info.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/package-info.java new file mode 100644 index 00000000..234f7f14 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/package-info.java @@ -0,0 +1,6 @@ +/** + * The package contains validators that point out violations of the annotation propagation rule. + * + * @see org.phenopackets.phenopackettools.validator.core.phenotype.HpoPhenotypeValidators.Ancestry + */ +package org.phenopackets.phenopackettools.validator.core.phenotype.ancestry; \ No newline at end of file diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/base/BaseHpoValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/base/BaseHpoValidator.java new file mode 100644 index 00000000..6e824235 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/base/BaseHpoValidator.java @@ -0,0 +1,39 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.base; + +import com.google.protobuf.MessageOrBuilder; +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.phenopackets.phenopackettools.validator.core.PhenopacketValidator; +import org.phenopackets.schema.v2.PhenopacketOrBuilder; + +import java.util.Objects; + +public abstract class BaseHpoValidator implements PhenopacketValidator { + + protected final Ontology hpo; + protected final String hpoVersion; + + protected BaseHpoValidator(Ontology hpo) { + this.hpo = Objects.requireNonNull(hpo); + // TODO - can be replaced by this.hpo.version() in the most recent phenol versions. + this.hpoVersion = this.hpo.getMetaInfo().getOrDefault("data-version", "HPO"); + } + + protected static String summarizePhenopacketAndIndividualId(PhenopacketOrBuilder phenopacket) { + // Build a string like / but only if one/other are present. + StringBuilder builder = new StringBuilder(); + String phenopacketId = phenopacket.getId(); + String individualId = phenopacket.getSubject().getId(); + if (!phenopacketId.isBlank() || !individualId.isBlank()) { + builder.append(" in "); + if (!phenopacketId.isBlank()) + builder.append(phenopacketId); + + if (!individualId.isBlank()) { + if (!phenopacketId.isBlank()) + builder.append("/"); + builder.append(individualId); + } + } + return builder.toString(); + } +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/base/package-info.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/base/package-info.java new file mode 100644 index 00000000..eb9cb4de --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/base/package-info.java @@ -0,0 +1,5 @@ +/** + * Shared bits of all {@link org.phenopackets.phenopackettools.validator.core.PhenopacketValidator}s + * that use HPO {@link org.monarchinitiative.phenol.ontology.data.Ontology} in validation. + */ +package org.phenopackets.phenopackettools.validator.core.phenotype.base; \ No newline at end of file diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/AbstractOrganSystemValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/AbstractOrganSystemValidator.java new file mode 100644 index 00000000..2c1fff7c --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/AbstractOrganSystemValidator.java @@ -0,0 +1,109 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.orgsys; + +import com.google.protobuf.MessageOrBuilder; +import org.monarchinitiative.phenol.ontology.algo.OntologyAlgorithm; +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.Term; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.phenopackettools.validator.core.ValidationResult; +import org.phenopackets.phenopackettools.validator.core.ValidatorInfo; +import org.phenopackets.phenopackettools.validator.core.phenotype.base.BaseHpoValidator; +import org.phenopackets.phenopackettools.validator.core.phenotype.util.PhenotypicFeaturesByExclusionStatus; +import org.phenopackets.phenopackettools.validator.core.phenotype.util.Util; +import org.phenopackets.schema.v2.PhenopacketOrBuilder; +import org.phenopackets.schema.v2.core.PhenotypicFeature; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; +import java.util.function.Predicate; +import java.util.stream.Stream; + +/** + * The base class for an organ system validator to check if each phenopacket or family/cohort member have annotation + * for an organ system represented by a top-level HPO term + * (e.g. Abnormality of limbs). + * The annotation comprises either one or more observed descendants + * (e.g. Arachnodactyly), + * or excluded top-level HPO term + * (NOT Abnormality of limbs). + */ +public abstract class AbstractOrganSystemValidator extends BaseHpoValidator { + + private static final Logger LOGGER = LoggerFactory.getLogger(AbstractOrganSystemValidator.class); + + private static final ValidatorInfo VALIDATOR_INFO = ValidatorInfo.of( + "HpoOrganSystemValidator", + "HPO organ system validator", + "Validate annotation of selected organ systems"); + + private static final String MISSING_ORGAN_SYSTEM_CATEGORY = "Missing organ system annotation"; + + protected final List organSystemTermIds; + + protected AbstractOrganSystemValidator(Ontology hpo, + Collection organSystemTermIds) { + super(hpo); + this.organSystemTermIds = Objects.requireNonNull(organSystemTermIds).stream() + .distinct() + .filter(organSystemTermIdIsInOntology(hpo)) + .sorted() + .toList(); + } + + private static Predicate organSystemTermIdIsInOntology(Ontology hpo) { + return organSystemTermId -> { + if (hpo.containsTerm(organSystemTermId)) { + return true; + } else { + LOGGER.warn("{} is not present in the ontology", organSystemTermId.getValue()); + return false; + } + }; + } + + @Override + public ValidatorInfo validatorInfo() { + return VALIDATOR_INFO; + } + + @Override + public List validate(T component) { + return getPhenopackets(component) + .flatMap(p -> checkPhenotypicFeatures(p, p.getPhenotypicFeaturesList())) + .toList(); + } + + protected abstract Stream getPhenopackets(T component); + + private Stream checkPhenotypicFeatures(PhenopacketOrBuilder phenopacket, List features) { + PhenotypicFeaturesByExclusionStatus featuresByExclusion = Util.partitionByExclusionStatus(features); + + Stream.Builder results = Stream.builder(); + // Check we have at least one phenotypeFeature (pf) that is a descendant of given organSystemId + // and report otherwise. + organSystemLoop: + for (TermId organSystemId : organSystemTermIds) { + // Check if the organ system abnormality has been specifically excluded. + if (featuresByExclusion.excludedPhenotypicFeatures().contains(organSystemId)) + continue; // Yes, it was. Let's check the next organ system + + // Check if we have at least one observed annotation for the organ system. + for (TermId pf : featuresByExclusion.observedPhenotypicFeatures()) { + if (OntologyAlgorithm.existsPath(hpo, pf, organSystemId)) { + continue organSystemLoop; // It only takes one termId to annotate an organ system. + } + } + + // The organSystemId is neither annotated nor excluded. We report a validation error. + Term organSystem = hpo.getTermMap().get(organSystemId); + ValidationResult result = ValidationResult.error(VALIDATOR_INFO, + MISSING_ORGAN_SYSTEM_CATEGORY, + "Missing annotation for %s [%s]%s" + .formatted(organSystem.getName(), organSystem.id().getValue(), summarizePhenopacketAndIndividualId(phenopacket))); + results.add(result); + } + + return results.build(); + } +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/CohortHpoOrganSystemValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/CohortHpoOrganSystemValidator.java new file mode 100644 index 00000000..96bea28b --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/CohortHpoOrganSystemValidator.java @@ -0,0 +1,23 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.orgsys; + +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.schema.v2.CohortOrBuilder; +import org.phenopackets.schema.v2.PhenopacketOrBuilder; + +import java.util.Collection; +import java.util.stream.Stream; + +public class CohortHpoOrganSystemValidator extends AbstractOrganSystemValidator { + + public CohortHpoOrganSystemValidator(Ontology hpo, Collection organSystemTermIds) { + super(hpo, organSystemTermIds); + } + + @Override + protected Stream getPhenopackets(CohortOrBuilder component) { + return component.getMembersOrBuilderList().stream(); + } + + +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/FamilyHpoOrganSystemValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/FamilyHpoOrganSystemValidator.java new file mode 100644 index 00000000..7edf20df --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/FamilyHpoOrganSystemValidator.java @@ -0,0 +1,25 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.orgsys; + +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.schema.v2.FamilyOrBuilder; +import org.phenopackets.schema.v2.PhenopacketOrBuilder; + +import java.util.Collection; +import java.util.stream.Stream; + +public class FamilyHpoOrganSystemValidator extends AbstractOrganSystemValidator { + + public FamilyHpoOrganSystemValidator(Ontology hpo, Collection organSystemTermIds) { + super(hpo, organSystemTermIds); + } + + @Override + protected Stream getPhenopackets(FamilyOrBuilder component) { + return Stream.concat( + Stream.of(component.getProband()), + component.getRelativesList().stream() + ); + } + +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/PhenopacketHpoOrganSystemValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/PhenopacketHpoOrganSystemValidator.java new file mode 100644 index 00000000..d4db9e81 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/PhenopacketHpoOrganSystemValidator.java @@ -0,0 +1,21 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.orgsys; + +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.schema.v2.PhenopacketOrBuilder; + +import java.util.Collection; +import java.util.stream.Stream; + +public class PhenopacketHpoOrganSystemValidator extends AbstractOrganSystemValidator { + + public PhenopacketHpoOrganSystemValidator(Ontology hpo, + Collection organSystemTerms) { + super(hpo, organSystemTerms); + } + + @Override + protected Stream getPhenopackets(PhenopacketOrBuilder component) { + return Stream.of(component); + } +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/package-info.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/package-info.java new file mode 100644 index 00000000..7c983548 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/package-info.java @@ -0,0 +1,14 @@ +/** + * Package with off-the-shelf validators that work with Human Phenotype Ontology (HPO). + *

+ * The validators are exposed via a static factory class, there is a method for getting a validator for each top-level + * Phenopacket Schema component. + *

+ * The package includes a utility class with HPO {@link org.monarchinitiative.phenol.ontology.data.TermId}s + * that correspond to organ systems + * (e.g. {@link org.phenopackets.phenopackettools.validator.core.phenotype.HpoOrganSystems#EYE} for + * Abnormality of the eye) that can be used + * in combination with + * {@link org.phenopackets.phenopackettools.validator.core.phenotype.HpoPhenotypeValidators.OrganSystem} validators. + */ +package org.phenopackets.phenopackettools.validator.core.phenotype; \ No newline at end of file diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/AbstractHpoPhenotypeValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/AbstractHpoPhenotypeValidator.java new file mode 100644 index 00000000..d75cfc7f --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/AbstractHpoPhenotypeValidator.java @@ -0,0 +1,69 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.primary; + +import com.google.protobuf.MessageOrBuilder; +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.phenopackettools.validator.core.*; +import org.phenopackets.phenopackettools.validator.core.phenotype.base.BaseHpoValidator; +import org.phenopackets.schema.v2.PhenopacketOrBuilder; +import org.phenopackets.schema.v2.core.PhenotypicFeature; + +import java.util.stream.Stream; + +public abstract class AbstractHpoPhenotypeValidator extends BaseHpoValidator { + + private static final ValidatorInfo VALIDATOR_INFO = ValidatorInfo.of( + "HpoPhenotypeValidator", + "HPO phenotypic feature validator", + "Validate that HPO terms are well formatted, present, and non-obsolete based on the provided HPO"); + private static final String INVALID_TERM_ID = "Invalid TermId"; + private static final String OBSOLETED_TERM_ID = "Obsoleted TermId"; + + public AbstractHpoPhenotypeValidator(Ontology hpo) { + super(hpo); + } + + @Override + public ValidatorInfo validatorInfo() { + return VALIDATOR_INFO; + } + + protected Stream checkPhenotypeFeature(PhenopacketOrBuilder phenopacket, PhenotypicFeature feature) { + TermId termId; + try { + termId = TermId.of(feature.getType().getId()); + } catch (PhenolRuntimeException e) { + String idSummary = summarizePhenopacketAndIndividualId(phenopacket); + // Should not really happen if JsonSchema validators are run upstream, but let's stay safe. + String msg = "The %s found%s is not a valid term ID".formatted(feature.getType().getId(), idSummary); + return Stream.of( + ValidationResult.error(VALIDATOR_INFO, INVALID_TERM_ID, msg) + ); + } + if (termId.getPrefix().equals("HP")) { + // Check if the HPO contains the term. + if (!hpo.containsTerm(termId)) { + String idSummary = summarizePhenopacketAndIndividualId(phenopacket); + String msg = "%s%s not found in %s".formatted(termId.getValue(), idSummary, hpoVersion); + return Stream.of( + ValidationResult.error(VALIDATOR_INFO, INVALID_TERM_ID, msg) + ); + } + + // Check if the `termId` is a primary ID. // If not, this is a warning. + TermId primaryId = hpo.getPrimaryTermId(termId); + if (!primaryId.equals(termId)) { + String idSummary = summarizePhenopacketAndIndividualId(phenopacket); + String msg = "Using obsolete id (%s) instead of current primary id (%s)%s".formatted( + termId.getValue(), primaryId.getValue(), idSummary); + return Stream.of( + ValidationResult.warning(VALIDATOR_INFO, OBSOLETED_TERM_ID, msg) + ); + } + } + + return Stream.empty(); + } + +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/CohortHpoPhenotypeValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/CohortHpoPhenotypeValidator.java similarity index 73% rename from phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/CohortHpoPhenotypeValidator.java rename to phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/CohortHpoPhenotypeValidator.java index f72bbc70..0642f21e 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/CohortHpoPhenotypeValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/CohortHpoPhenotypeValidator.java @@ -1,18 +1,17 @@ -package org.phenopackets.phenopackettools.validator.core.phenotype; +package org.phenopackets.phenopackettools.validator.core.phenotype.primary; import org.monarchinitiative.phenol.ontology.data.Ontology; import org.phenopackets.phenopackettools.validator.core.ValidationResult; import org.phenopackets.schema.v2.CohortOrBuilder; import org.phenopackets.schema.v2.Phenopacket; -import org.phenopackets.schema.v2.core.Individual; import org.phenopackets.schema.v2.core.PhenotypicFeature; import java.util.ArrayList; import java.util.List; -class CohortHpoPhenotypeValidator extends BaseHpoPhenotypeValidator { +public class CohortHpoPhenotypeValidator extends AbstractHpoPhenotypeValidator { - CohortHpoPhenotypeValidator(Ontology hpo) { + public CohortHpoPhenotypeValidator(Ontology hpo) { super(hpo); } @@ -21,9 +20,8 @@ public List validate(CohortOrBuilder component) { List results = new ArrayList<>(); for (Phenopacket member : component.getMembersList()) { - Individual subject = member.getSubject(); for (PhenotypicFeature feature : member.getPhenotypicFeaturesList()) { - checkPhenotypeFeature(subject.getId(), feature) + checkPhenotypeFeature(member, feature) .forEach(results::add); } } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/FamilyHpoPhenotypeValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/FamilyHpoPhenotypeValidator.java similarity index 72% rename from phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/FamilyHpoPhenotypeValidator.java rename to phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/FamilyHpoPhenotypeValidator.java index 1512f25b..65beef4e 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/FamilyHpoPhenotypeValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/FamilyHpoPhenotypeValidator.java @@ -1,18 +1,17 @@ -package org.phenopackets.phenopackettools.validator.core.phenotype; +package org.phenopackets.phenopackettools.validator.core.phenotype.primary; import org.monarchinitiative.phenol.ontology.data.Ontology; import org.phenopackets.phenopackettools.validator.core.ValidationResult; import org.phenopackets.schema.v2.FamilyOrBuilder; import org.phenopackets.schema.v2.Phenopacket; -import org.phenopackets.schema.v2.core.Individual; import org.phenopackets.schema.v2.core.PhenotypicFeature; import java.util.ArrayList; import java.util.List; -class FamilyHpoPhenotypeValidator extends BaseHpoPhenotypeValidator { +public class FamilyHpoPhenotypeValidator extends AbstractHpoPhenotypeValidator { - FamilyHpoPhenotypeValidator(Ontology hpo) { + public FamilyHpoPhenotypeValidator(Ontology hpo) { super(hpo); } @@ -23,18 +22,16 @@ public List validate(FamilyOrBuilder component) { // First check the proband. { Phenopacket proband = component.getProband(); - Individual subject = proband.getSubject(); for (PhenotypicFeature feature : proband.getPhenotypicFeaturesList()) { - checkPhenotypeFeature(subject.getId(), feature) + checkPhenotypeFeature(proband, feature) .forEach(results::add); } } // Then the relatives. for (Phenopacket relative : component.getRelativesList()) { - Individual subject = relative.getSubject(); for (PhenotypicFeature feature : relative.getPhenotypicFeaturesList()) { - checkPhenotypeFeature(subject.getId(), feature) + checkPhenotypeFeature(relative, feature) .forEach(results::add); } } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/PhenopacketHpoPhenotypeValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/PhenopacketHpoPhenotypeValidator.java similarity index 69% rename from phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/PhenopacketHpoPhenotypeValidator.java rename to phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/PhenopacketHpoPhenotypeValidator.java index c30b2b45..7b43a10e 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/PhenopacketHpoPhenotypeValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/PhenopacketHpoPhenotypeValidator.java @@ -1,17 +1,16 @@ -package org.phenopackets.phenopackettools.validator.core.phenotype; +package org.phenopackets.phenopackettools.validator.core.phenotype.primary; import org.monarchinitiative.phenol.ontology.data.Ontology; import org.phenopackets.phenopackettools.validator.core.ValidationResult; import org.phenopackets.schema.v2.PhenopacketOrBuilder; -import org.phenopackets.schema.v2.core.Individual; import org.phenopackets.schema.v2.core.PhenotypicFeature; import java.util.ArrayList; import java.util.List; -class PhenopacketHpoPhenotypeValidator extends BaseHpoPhenotypeValidator { +public class PhenopacketHpoPhenotypeValidator extends AbstractHpoPhenotypeValidator { - PhenopacketHpoPhenotypeValidator(Ontology hpo) { + public PhenopacketHpoPhenotypeValidator(Ontology hpo) { super(hpo); } @@ -19,9 +18,8 @@ class PhenopacketHpoPhenotypeValidator extends BaseHpoPhenotypeValidator validate(PhenopacketOrBuilder component) { List results = new ArrayList<>(); - Individual subject = component.getSubject(); for (PhenotypicFeature feature : component.getPhenotypicFeaturesList()) { - checkPhenotypeFeature(subject.getId(), feature) + checkPhenotypeFeature(component, feature) .forEach(results::add); } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/package-info.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/package-info.java new file mode 100644 index 00000000..c233fd38 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/package-info.java @@ -0,0 +1,7 @@ +/** + * The package of {@link org.phenopackets.phenopackettools.validator.core.PhenopacketValidator}s that perform + * primary validation of HPO terms. + * + * @see org.phenopackets.phenopackettools.validator.core.phenotype.HpoPhenotypeValidators.Primary + */ +package org.phenopackets.phenopackettools.validator.core.phenotype.primary; \ No newline at end of file diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/util/MaybeExcludedTermId.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/util/MaybeExcludedTermId.java new file mode 100644 index 00000000..9b4eb0d4 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/util/MaybeExcludedTermId.java @@ -0,0 +1,20 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.util; + +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.schema.v2.core.PhenotypicFeature; + +import java.util.Optional; + +record MaybeExcludedTermId(TermId termId, boolean excluded) { + + static Optional fromPhenotypicFeature(PhenotypicFeature phenotypicFeature) { + TermId termId; + try { + termId = TermId.of(phenotypicFeature.getType().getId()); + } catch (PhenolRuntimeException e) { + return Optional.empty(); + } + return Optional.of(new MaybeExcludedTermId(termId, phenotypicFeature.getExcluded())); + } +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/util/PhenotypicFeaturesByExclusionStatus.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/util/PhenotypicFeaturesByExclusionStatus.java new file mode 100644 index 00000000..a5abb10e --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/util/PhenotypicFeaturesByExclusionStatus.java @@ -0,0 +1,9 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.util; + +import org.monarchinitiative.phenol.ontology.data.TermId; + +import java.util.Set; + +public record PhenotypicFeaturesByExclusionStatus(Set observedPhenotypicFeatures, + Set excludedPhenotypicFeatures) { +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/util/Util.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/util/Util.java new file mode 100644 index 00000000..ffcf668b --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/util/Util.java @@ -0,0 +1,42 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.util; + +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.schema.v2.core.PhenotypicFeature; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collection; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; + +public class Util { + + private static final Logger LOGGER = LoggerFactory.getLogger(Util.class); + + private Util() { + // static utility class + } + + public static PhenotypicFeaturesByExclusionStatus partitionByExclusionStatus(Collection phenotypicFeatures) { + Map> byExclusion = phenotypicFeatures.stream() + .map(toMaybeObservedTermId()) + .flatMap(Optional::stream) + // Use `partitioningBy` instead of `groupingBy` to ensure the map contains keys + // for both `true` and `false`. Then extract `TermId` and collect in a `Set`. + .collect(Collectors.partitioningBy(MaybeExcludedTermId::excluded, + Collectors.mapping(MaybeExcludedTermId::termId, Collectors.toSet()))); + return new PhenotypicFeaturesByExclusionStatus(byExclusion.get(false), byExclusion.get(true)); + } + + private static Function> toMaybeObservedTermId() { + return pf -> MaybeExcludedTermId.fromPhenotypicFeature(pf) + .or(() -> { + // Let's log the malformed term. + LOGGER.warn("Skipping validation of malformed term ID {}", pf.getType().getId()); + return Optional.empty(); + }); + } +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/writer/package-info.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/writer/package-info.java new file mode 100644 index 00000000..07399df3 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/writer/package-info.java @@ -0,0 +1,4 @@ +/** + * A package with API for serialization of {@link org.phenopackets.phenopackettools.validator.core.ValidationResults}. + */ +package org.phenopackets.phenopackettools.validator.core.writer; \ No newline at end of file diff --git a/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/AncestryHpoValidatorTest.java b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/AncestryHpoValidatorTest.java new file mode 100644 index 00000000..aa5467d0 --- /dev/null +++ b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/AncestryHpoValidatorTest.java @@ -0,0 +1,177 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.phenopackets.phenopackettools.validator.core.PhenopacketValidator; +import org.phenopackets.phenopackettools.validator.core.TestData; +import org.phenopackets.phenopackettools.validator.core.ValidationLevel; +import org.phenopackets.phenopackettools.validator.core.ValidationResult; +import org.phenopackets.schema.v2.*; +import org.phenopackets.schema.v2.core.PhenotypicFeature; + +import java.util.List; + +import static org.phenopackets.phenopackettools.validator.core.phenotype.Utils.*; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.*; + +public class AncestryHpoValidatorTest { + + private static final Ontology HPO = TestData.HPO; + + @Nested + public class PhenopacketTest { + + private PhenopacketValidator validator; + + @BeforeEach + public void setUp() { + validator = HpoPhenotypeValidators.Ancestry.phenopacketHpoAncestryValidator(HPO); + } + + @Test + public void testValidInput() { + // Has some Abnormality of finger but no Arachnodactyly. + Phenopacket pp = createPhenopacket( + "example-phenopacket", "example-subject", + createPhenotypicFeature("HP:0001167", "Abnormality of finger", false), + createPhenotypicFeature("HP:0001166", "Arachnodactyly", true) + ).build(); + + List results = validator.validate(pp); + + assertThat(results, is(empty())); + } + + @Test + public void testFailsIfTermAndAncestorIsObserved() { + // Has some Abnormality of finger and Arachnodactyly. Only Arachnodactyly should be present. + Phenopacket pp = createPhenopacket( + "example-phenopacket", "example-subject", createPhenotypicFeature("HP:0001167", "Abnormality of finger", false), + createPhenotypicFeature("HP:0001166", "Arachnodactyly", false) + ).build(); + + List results = validator.validate(pp); + + assertThat(results, hasSize(1)); + ValidationResult result = results.get(0); + assertThat(result.validatorInfo(), equalTo(validator.validatorInfo())); + assertThat(result.level(), equalTo(ValidationLevel.ERROR)); + assertThat(result.category(), equalTo("Violation of the annotation propagation rule")); + assertThat(result.message(), equalTo("Phenotypic features of example-phenopacket must not contain both an observed term (Arachnodactyly, HP:0001166) and an observed ancestor (Abnormality of finger, HP:0001167)")); + } + + @Test + public void testFailsIfTermAndAncestorIsExcluded() { + // Has neither Abnormality of finger nor Arachnodactyly. Only Abnormality of finger should be present. + Phenopacket pp = createPhenopacket( + "example-phenopacket", "example-subject", createPhenotypicFeature("HP:0001167", "Abnormality of finger", true), + createPhenotypicFeature("HP:0001166", "Arachnodactyly", true) + ).build(); + + List results = validator.validate(pp); + + assertThat(results, hasSize(1)); + ValidationResult result = results.get(0); + assertThat(result.level(), equalTo(ValidationLevel.ERROR)); + assertThat(result.category(), equalTo("Violation of the annotation propagation rule")); + assertThat(result.message(), equalTo("Phenotypic features of example-phenopacket must not contain both an excluded term (Abnormality of finger, HP:0001167) and an excluded child (Arachnodactyly, HP:0001166)")); + } + + @Test + public void testFailsIfTermIsPresentAndAncestorIsExcluded() { + // Has neither Abnormality of finger nor Arachnodactyly. Only Abnormality of finger should be present. + Phenopacket pp = createPhenopacket( + "example-phenopacket", "example-subject", createPhenotypicFeature("HP:0001167", "Abnormality of finger", true), + createPhenotypicFeature("HP:0001166", "Arachnodactyly", false) + ).build(); + + List results = validator.validate(pp); + + assertThat(results, hasSize(1)); + ValidationResult result = results.get(0); + assertThat(result.level(), equalTo(ValidationLevel.ERROR)); + assertThat(result.category(), equalTo("Violation of the annotation propagation rule")); + assertThat(result.message(), equalTo("Phenotypic features of example-phenopacket must not contain both an observed term (Arachnodactyly, HP:0001166) and an excluded ancestor (Abnormality of finger, HP:0001167)")); + } + } + + /** + * White-box testing - we know that the {@link PhenotypicFeature} is an attribute of a {@link Phenopacket}, so we + * test the validation logic extensively in {@link PhenopacketTest}. The {@link FamilyTest} test suite ensures + * there are not errors in valid input. + */ + @Nested + public class FamilyTest { + + private PhenopacketValidator validator; + + @BeforeEach + public void setUp() { + validator = HpoPhenotypeValidators.Ancestry.familyHpoAncestryValidator(HPO); + } + + @Test + public void testValidInput() { + Family family = Family.newBuilder() + .setProband(createPhenopacket("example-phenopacket", "example-subject", + createPhenotypicFeature("HP:0001167", "Abnormality of finger", false), + createPhenotypicFeature("HP:0001166", "Arachnodactyly", true)) + .build()) + .addRelatives(createPhenopacket("dad-phenopacket", "example-dad", + createPhenotypicFeature("HP:0001238", "Slender finger", false), + createPhenotypicFeature("HP:0100807", "Long fingers", false)) + .build()) + .addRelatives(createPhenopacket("mom-phenopacket", "example-mom", + createPhenotypicFeature("HP:0001238", "Slender finger", false), + createPhenotypicFeature("HP:0001166", "Arachnodactyly", true)) + .build()) + .build(); + + List results = validator.validate(family); + + assertThat(results, is(empty())); + } + } + + /** + * White-box testing (same as in {@link FamilyTest}) - we know that the {@link PhenotypicFeature} + * is an attribute of a {@link Phenopacket}, so we test the validation logic extensively + * in {@link PhenopacketTest}. The {@link CohortTest} test suite ensures there are not errors in valid input. + */ + @Nested + public class CohortTest { + + private PhenopacketValidator validator; + + @BeforeEach + public void setUp() { + validator = HpoPhenotypeValidators.Ancestry.cohortHpoAncestryValidator(HPO); + } + + @Test + public void testValidInput() { + Cohort cohort = Cohort.newBuilder() + .addMembers(createPhenopacket("joe-phenopacket", "example-subject", + createPhenotypicFeature("HP:0001167", "Abnormality of finger", false), + createPhenotypicFeature("HP:0001166", "Arachnodactyly", true)) + .build()) + .addMembers(createPhenopacket("jim-phenopacket", "example-jim", + createPhenotypicFeature("HP:0001238", "Slender finger", false), + createPhenotypicFeature("HP:0100807", "Long fingers", false)) + .build()) + .addMembers(createPhenopacket("jane-phenopacket", "example-jane", + createPhenotypicFeature("HP:0001238", "Slender finger", false), + createPhenotypicFeature("HP:0001166", "Arachnodactyly", true)) + .build()) + .build(); + + List results = validator.validate(cohort); + + assertThat(results, is(empty())); + } + } + +} diff --git a/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/OrganSystemValidatorTest.java b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/OrganSystemValidatorTest.java new file mode 100644 index 00000000..845fdc51 --- /dev/null +++ b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/OrganSystemValidatorTest.java @@ -0,0 +1,159 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.phenopackettools.validator.core.*; +import org.phenopackets.schema.v2.*; +import org.phenopackets.schema.v2.core.PhenotypicFeature; + +import java.util.List; +import java.util.Set; + +import static org.phenopackets.phenopackettools.validator.core.phenotype.Utils.*; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.*; + +public class OrganSystemValidatorTest { + + private static final Ontology HPO = TestData.HPO; + private static final Set ABNORMALITY_OF_LIMBS_ORGAN_SYSTEM = Set.of(TermId.of("HP:0040064")); + // Not a real organ system, but for the sake of testing... + private static final Set SLENDER_FINGER_ORGAN_SYSTEM = Set.of(TermId.of("HP:0001238")); + + @Nested + public class PhenopacketTest { + + private PhenopacketValidator abnormalityOfLimbValidator; + private PhenopacketValidator slenderFingerValidator; + + @BeforeEach + public void setUp() { + abnormalityOfLimbValidator = HpoPhenotypeValidators.OrganSystem.phenopacketHpoOrganSystemValidator(HPO, ABNORMALITY_OF_LIMBS_ORGAN_SYSTEM); + slenderFingerValidator = HpoPhenotypeValidators.OrganSystem.phenopacketHpoOrganSystemValidator(HPO, SLENDER_FINGER_ORGAN_SYSTEM); + } + + @Test + public void noValidationErrorsIfOrganSystemIsAnnotated() { + // Has Arachnodactyly. + Phenopacket pp = createPhenopacket( + "example-phenopacket", "example-subject", + createPhenotypicFeature("HP:0001166", "Arachnodactyly", false) + ).build(); + + List results = abnormalityOfLimbValidator.validate(pp); + + assertThat(results, is(empty())); + } + + @Test + public void noValidationErrorsIfOrganSystemAbnormalityIsExcluded() { + // Has Arachnodactyly. + Phenopacket pp = createPhenopacket( + "example-phenopacket", "example-subject", + createPhenotypicFeature("HP:0040064", "Abnormality of limbs", true) + ).build(); + + List results = abnormalityOfLimbValidator.validate(pp); + + assertThat(results, is(empty())); + } + + @ParameterizedTest + @CsvSource({ + "true", + "false" + }) + public void annotationAbsenceLeadsToAnError(boolean excluded) { + // Long fingers and Slender finger are siblings, hence no annotation here. + Phenopacket pp = createPhenopacket( + "example-phenopacket", "example-subject", + createPhenotypicFeature("HP:0100807", "Long fingers", excluded) + ).build(); + + List results = slenderFingerValidator.validate(pp); + + assertThat(results, hasSize(1)); + ValidationResult result = results.get(0); + assertThat(result.validatorInfo(), equalTo(slenderFingerValidator.validatorInfo())); + assertThat(result.level(), equalTo(ValidationLevel.ERROR)); + assertThat(result.category(), equalTo("Missing organ system annotation")); + assertThat(result.message(), equalTo("Missing annotation for Slender finger [HP:0001238] in example-phenopacket/example-subject")); + } + } + + /** + * White-box testing - we know that the {@link PhenotypicFeature} is an attribute of a {@link Phenopacket}, so we + * test the validation logic extensively in {@link OrganSystemValidatorTest.PhenopacketTest}. + * The {@link OrganSystemValidatorTest.FamilyTest} test suite ensures there are not errors in a valid input. + */ + @Nested + public class FamilyTest { + + private PhenopacketValidator abnormalityOfLimbValidator; + + @BeforeEach + public void setUp() { + abnormalityOfLimbValidator = HpoPhenotypeValidators.OrganSystem.familyHpoOrganSystemValidator(HPO, ABNORMALITY_OF_LIMBS_ORGAN_SYSTEM); + } + + @Test + public void testValidInput() { + Family family = Family.newBuilder() + .setProband(createPhenopacket("example-phenopacket", "example-subject", + createPhenotypicFeature("HP:0001166", "Arachnodactyly", false)) + .build()) + .addRelatives(createPhenopacket("dad-phenopacket", "example-dad", + createPhenotypicFeature("HP:0001238", "Slender finger", false)) + .build()) + .addRelatives(createPhenopacket("mom-phenopacket", "other-mom", + createPhenotypicFeature("HP:0100807", "Long fingers", false)) + .build()) + .build(); + + List results = abnormalityOfLimbValidator.validate(family); + + assertThat(results, is(empty())); + } + } + + /** + * White-box testing (same as in {@link OrganSystemValidatorTest.FamilyTest}) - we know that the {@link PhenotypicFeature} + * is an attribute of a {@link Phenopacket}, so we test the validation logic extensively + * in {@link OrganSystemValidatorTest.PhenopacketTest}. + * The {@link OrganSystemValidatorTest.CohortTest} test suite ensures there are not errors in valid input. + */ + @Nested + public class CohortTest { + + private PhenopacketValidator abnormalityOfLimbValidator; + + @BeforeEach + public void setUp() { + abnormalityOfLimbValidator = HpoPhenotypeValidators.OrganSystem.cohortHpoOrganSystemValidator(HPO, ABNORMALITY_OF_LIMBS_ORGAN_SYSTEM); + } + + @Test + public void testValidInput() { + Cohort cohort = Cohort.newBuilder() + .addMembers(createPhenopacket("joe-phenopacket", "example-subject", + createPhenotypicFeature("HP:0001166", "Arachnodactyly", false)) + .build()) + .addMembers(createPhenopacket("jim-phenopacket", "example-jim", + createPhenotypicFeature("HP:0001238", "Slender finger", false)) + .build()) + .addMembers(createPhenopacket("jane-phenopacket", "example-jane", + createPhenotypicFeature("HP:0100807", "Long fingers", false)) + .build()) + .build(); + + List results = abnormalityOfLimbValidator.validate(cohort); + + assertThat(results, is(empty())); + } + } +} diff --git a/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoPhenotypeValidatorTest.java b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/PrimaryHpoPhenotypeValidatorTest.java similarity index 93% rename from phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoPhenotypeValidatorTest.java rename to phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/PrimaryHpoPhenotypeValidatorTest.java index 822167ea..d85048d4 100644 --- a/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoPhenotypeValidatorTest.java +++ b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/PrimaryHpoPhenotypeValidatorTest.java @@ -5,30 +5,29 @@ import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.phenopackets.phenopackettools.validator.core.PhenopacketValidator; import org.phenopackets.phenopackettools.validator.core.TestData; import org.phenopackets.phenopackettools.validator.core.ValidationLevel; import org.phenopackets.phenopackettools.validator.core.ValidationResult; -import org.phenopackets.schema.v2.Cohort; -import org.phenopackets.schema.v2.Family; -import org.phenopackets.schema.v2.Phenopacket; +import org.phenopackets.schema.v2.*; import java.util.List; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.*; -public class HpoPhenotypeValidatorTest { +public class PrimaryHpoPhenotypeValidatorTest { private static final Ontology HPO = TestData.HPO; @Nested public class PhenopacketTest { - private PhenopacketHpoPhenotypeValidator validator; + private PhenopacketValidator validator; @BeforeEach public void setUp() { - validator = new PhenopacketHpoPhenotypeValidator(HPO); + validator = HpoPhenotypeValidators.Primary.phenopacketHpoPhenotypeValidator(HPO); } @Test @@ -86,7 +85,7 @@ public void testMissingTermId() throws Exception { ValidationResult result = results.get(0); assertThat(result.level(), equalTo(ValidationLevel.ERROR)); assertThat(result.category(), equalTo("Invalid TermId")); - assertThat(result.message(), equalTo("HP:0001182 in 'proband A' not found in http://purl.obolibrary.org/obo/hp/releases/2021-06-08/hp.json")); + assertThat(result.message(), equalTo("HP:0001182 in proband A not found in http://purl.obolibrary.org/obo/hp/releases/2021-06-08/hp.json")); } @Test @@ -117,7 +116,7 @@ public void testObsoleteTermId() throws Exception { ValidationResult result = results.get(0); assertThat(result.level(), equalTo(ValidationLevel.WARNING)); assertThat(result.category(), equalTo("Obsoleted TermId")); - assertThat(result.message(), equalTo("Using obsoleted id (HP:0001505) instead of current primary id (HP:0001166) in 'proband A'")); + assertThat(result.message(), equalTo("Using obsolete id (HP:0001505) instead of current primary id (HP:0001166) in proband A")); } @Test @@ -143,7 +142,7 @@ public void testMistypedTermId() throws Exception { ValidationResult result = results.get(0); assertThat(result.level(), equalTo(ValidationLevel.ERROR)); assertThat(result.category(), equalTo("Invalid TermId")); - assertThat(result.message(), equalTo("The HP_0100807 found in 'proband A' is not a valid value")); + assertThat(result.message(), equalTo("The HP_0100807 found in proband A is not a valid term ID")); } } @@ -152,11 +151,11 @@ public void testMistypedTermId() throws Exception { */ @Nested public class FamilyTest { - private FamilyHpoPhenotypeValidator validator; + private PhenopacketValidator validator; @BeforeEach public void setUp() { - validator = new FamilyHpoPhenotypeValidator(HPO); + validator = HpoPhenotypeValidators.Primary.familyHpoPhenotypeValidator(HPO); } @Test @@ -248,7 +247,7 @@ public void testInvalidIdInProband() throws Exception { ValidationResult result = results.get(0); assertThat(result.level(), equalTo(ValidationLevel.ERROR)); assertThat(result.category(), equalTo("Invalid TermId")); - assertThat(result.message(), equalTo("The HP_0001238 found in 'Flynn' is not a valid value")); + assertThat(result.message(), equalTo("The HP_0001238 found in Flynn is not a valid term ID")); } @Test @@ -296,7 +295,7 @@ public void testInvalidIdInRelative() throws Exception { ValidationResult result = results.get(0); assertThat(result.level(), equalTo(ValidationLevel.ERROR)); assertThat(result.category(), equalTo("Invalid TermId")); - assertThat(result.message(), equalTo("The HP_0001238 found in 'Walt' is not a valid value")); + assertThat(result.message(), equalTo("The HP_0001238 found in Walt is not a valid term ID")); } } @@ -306,11 +305,11 @@ public void testInvalidIdInRelative() throws Exception { @Nested public class CohortTest { - private CohortHpoPhenotypeValidator validator; + private PhenopacketValidator validator; @BeforeEach public void setUp() { - validator = new CohortHpoPhenotypeValidator(HPO); + validator = HpoPhenotypeValidators.Primary.cohortHpoPhenotypeValidator(HPO); } @Test @@ -384,7 +383,7 @@ public void testInvalidId() throws Exception { ValidationResult result = results.get(0); assertThat(result.level(), equalTo(ValidationLevel.ERROR)); assertThat(result.category(), equalTo("Invalid TermId")); - assertThat(result.message(), equalTo("The HP_0001238 found in 'Thing 1' is not a valid value")); + assertThat(result.message(), equalTo("The HP_0001238 found in Thing 1 is not a valid term ID")); } } diff --git a/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/Utils.java b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/Utils.java new file mode 100644 index 00000000..7b93bc68 --- /dev/null +++ b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/Utils.java @@ -0,0 +1,33 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype; + +import org.phenopackets.schema.v2.Phenopacket; +import org.phenopackets.schema.v2.core.Individual; +import org.phenopackets.schema.v2.core.OntologyClass; +import org.phenopackets.schema.v2.core.PhenotypicFeature; + +import java.util.Arrays; + +public class Utils { + + static Phenopacket.Builder createPhenopacket(String phenopacketId, + String subjectId, + PhenotypicFeature... features) { + return Phenopacket.newBuilder() + .setId(phenopacketId) + .setSubject(Individual.newBuilder() + .setId(subjectId) + .build()) + .addAllPhenotypicFeatures(Arrays.asList(features)); + } + + static PhenotypicFeature createPhenotypicFeature(String id, String label, boolean excluded) { + return PhenotypicFeature.newBuilder() + .setType(OntologyClass.newBuilder() + .setId(id) + .setLabel(label) + .build()) + .setExcluded(excluded) + .build(); + } + +} diff --git a/phenopacket-tools-validator-jsonschema/pom.xml b/phenopacket-tools-validator-jsonschema/pom.xml index 84f949d4..10ef5f3d 100644 --- a/phenopacket-tools-validator-jsonschema/pom.xml +++ b/phenopacket-tools-validator-jsonschema/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.6 + 0.4.7 phenopacket-tools-validator-jsonschema diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/module-info.java b/phenopacket-tools-validator-jsonschema/src/main/java/module-info.java index 092a770f..df165e96 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/java/module-info.java +++ b/phenopacket-tools-validator-jsonschema/src/main/java/module-info.java @@ -1,3 +1,13 @@ +/** + * Defines a {@link org.phenopackets.phenopackettools.validator.core.ValidationWorkflowRunner} with base + * validation backed by a JSON schema. + *

+ * The module provides {@link org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunner} + * an implementation of {@link org.phenopackets.phenopackettools.validator.core.ValidationWorkflowRunner} + * backed by a JSON schema validator. + * + * @see org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunner + */ module org.phenopackets.phenopackettools.validator.jsonschema { requires org.phenopackets.phenopackettools.util; requires transitive org.phenopackets.phenopackettools.validator.core; diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/ValidationWorkflowRunnerBuilder.java b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/BaseValidationWorkflowRunnerBuilder.java similarity index 85% rename from phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/ValidationWorkflowRunnerBuilder.java rename to phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/BaseValidationWorkflowRunnerBuilder.java index 5d75fc92..d21ec40c 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/ValidationWorkflowRunnerBuilder.java +++ b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/BaseValidationWorkflowRunnerBuilder.java @@ -18,14 +18,14 @@ import java.util.List; /** - * A utility class that provides {@link JsonSchemaValidationWorkflowRunner.Builder} implementations for top-level + * A utility class that provides {@link JsonSchemaValidationWorkflowRunnerBuilder} implementations for top-level * elements of Phenopacket schema. *

* The class exists because we do not want to expose {@link JsonSchemaValidator} to the outside world. */ -abstract class ValidationWorkflowRunnerBuilder extends JsonSchemaValidationWorkflowRunner.Builder { +abstract class BaseValidationWorkflowRunnerBuilder extends JsonSchemaValidationWorkflowRunnerBuilder { - private static final Logger LOGGER = LoggerFactory.getLogger(ValidationWorkflowRunnerBuilder.class); + private static final Logger LOGGER = LoggerFactory.getLogger(BaseValidationWorkflowRunnerBuilder.class); @Override public JsonSchemaValidationWorkflowRunner build() { @@ -33,6 +33,7 @@ public JsonSchemaValidationWorkflowRunner build() { return new JsonSchemaValidationWorkflowRunner<>(getFormatConverter(), getBaseRequirementsValidator(), requirementValidators, + syntaxValidators, semanticValidators); } @@ -56,7 +57,7 @@ private List readRequirementValidators(List schemaUrls return requirementValidators; } - static class PhenopacketWorkflowRunnerBuilder extends ValidationWorkflowRunnerBuilder { + static class PhenopacketWorkflowRunnerBuilder extends BaseValidationWorkflowRunnerBuilder { @Override protected PhenopacketFormatConverter getFormatConverter() { @@ -69,7 +70,7 @@ protected JsonSchemaValidator getBaseRequirementsValidator() { } } - static class FamilyWorkflowRunnerBuilder extends ValidationWorkflowRunnerBuilder { + static class FamilyWorkflowRunnerBuilder extends BaseValidationWorkflowRunnerBuilder { @Override protected PhenopacketFormatConverter getFormatConverter() { return PhenopacketFormatConverters.familyConverter(); @@ -82,7 +83,7 @@ protected JsonSchemaValidator getBaseRequirementsValidator() { } - static class CohortWorkflowRunnerBuilder extends ValidationWorkflowRunnerBuilder { + static class CohortWorkflowRunnerBuilder extends BaseValidationWorkflowRunnerBuilder { @Override protected PhenopacketFormatConverter getFormatConverter() { return PhenopacketFormatConverters.cohortConverter(); diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java index 28918f30..3bfd5bdf 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java +++ b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java @@ -5,7 +5,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.protobuf.MessageOrBuilder; import org.phenopackets.phenopackettools.util.format.FormatSniffer; -import org.phenopackets.phenopackettools.util.format.PhenopacketFormat; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; import org.phenopackets.phenopackettools.util.format.FormatSniffException; import org.phenopackets.phenopackettools.validator.core.*; import org.phenopackets.phenopackettools.validator.jsonschema.impl.JsonSchemaValidator; @@ -13,9 +13,6 @@ import org.phenopackets.schema.v2.FamilyOrBuilder; import org.phenopackets.schema.v2.PhenopacketOrBuilder; -import java.net.MalformedURLException; -import java.net.URL; -import java.nio.file.Path; import java.util.ArrayList; import java.util.Collection; import java.util.List; @@ -25,17 +22,18 @@ * Validates if given top-level element satisfies the following criteria: *

    *
  • data format requirements - for instance if the element is a valid JSON document if JSON input is provided
  • - *
  • basic Phenopacket schema requirements - the requirements described by the reference documentation. + *
  • basic Phenopacket schema syntax requirements - the requirements described by the reference documentation. * Absence of a required field is an {@link ValidationLevel#ERROR} and absence of a recommended field is - * a {@link ValidationLevel#WARNING}.
  • - *
  • custom requirements - requirements provided in a JSON schema document(s) provided by the user.
  • - *
  • semantic requirements - requirements checked by {@link PhenopacketValidator}s provided by the user.
  • + * a {@link ValidationLevel#WARNING}, + *
  • custom syntax requirements - requirements provided in a JSON schema document(s) provided by the user,
  • + *
  • syntax requirements - requirements checked by the provided ad hoc {@link PhenopacketValidator}s,
  • + *
  • semantic requirements - requirements checked by the provided {@link PhenopacketValidator}s.
  • *
*

- * The validation is performed in steps as outlined by the list above. Note that the data format validation must + * The validation is performed in the order as outlined above. Note that the data format validation must * pass in order for the latter steps to run. *

- * Use one of {@link Builder}s provided via static constructors (e.g. {@link #phenopacketBuilder()}) to build + * Use one of {@link JsonSchemaValidationWorkflowRunnerBuilder}s provided via static constructors (e.g. {@link #phenopacketBuilder()}) to build * the validation workflow. * * @param must be one of the three top-level elements of the Phenopacket schema: @@ -48,40 +46,43 @@ public class JsonSchemaValidationWorkflowRunner impl private final PhenopacketFormatConverter converter; private final JsonSchemaValidator baseValidator; private final Collection requirementValidators; + private final Collection> syntaxValidators; private final Collection> semanticValidators; private final List validatorInfos; /** - * @return a {@link Builder} for building a {@link JsonSchemaValidationWorkflowRunner} for validating + * @return a {@link JsonSchemaValidationWorkflowRunnerBuilder} for building a {@link JsonSchemaValidationWorkflowRunner} for validating * {@link PhenopacketOrBuilder}. */ - public static Builder phenopacketBuilder() { - return new ValidationWorkflowRunnerBuilder.PhenopacketWorkflowRunnerBuilder(); + public static JsonSchemaValidationWorkflowRunnerBuilder phenopacketBuilder() { + return new BaseValidationWorkflowRunnerBuilder.PhenopacketWorkflowRunnerBuilder(); } /** - * @return a {@link Builder} for building a {@link JsonSchemaValidationWorkflowRunner} for validating + * @return a {@link JsonSchemaValidationWorkflowRunnerBuilder} for building a {@link JsonSchemaValidationWorkflowRunner} for validating * {@link FamilyOrBuilder}. */ - public static Builder familyBuilder() { - return new ValidationWorkflowRunnerBuilder.FamilyWorkflowRunnerBuilder(); + public static JsonSchemaValidationWorkflowRunnerBuilder familyBuilder() { + return new BaseValidationWorkflowRunnerBuilder.FamilyWorkflowRunnerBuilder(); } /** - * @return a {@link Builder} for building a {@link JsonSchemaValidationWorkflowRunner} for validating - * {@link CohortOrBuilder}. + * @return a {@link JsonSchemaValidationWorkflowRunnerBuilder} for building a {@link JsonSchemaValidationWorkflowRunner} for validating + * {@link CohortOrBuilder} */ - public static Builder cohortBuilder() { - return new ValidationWorkflowRunnerBuilder.CohortWorkflowRunnerBuilder(); + public static JsonSchemaValidationWorkflowRunnerBuilder cohortBuilder() { + return new BaseValidationWorkflowRunnerBuilder.CohortWorkflowRunnerBuilder(); } JsonSchemaValidationWorkflowRunner(PhenopacketFormatConverter converter, JsonSchemaValidator baseValidator, Collection requirementValidators, + Collection> syntaxValidators, Collection> semanticValidators) { this.converter = Objects.requireNonNull(converter); this.baseValidator = Objects.requireNonNull(baseValidator); this.requirementValidators = Objects.requireNonNull(requirementValidators); + this.syntaxValidators = Objects.requireNonNull(syntaxValidators); this.semanticValidators = Objects.requireNonNull(semanticValidators); this.validatorInfos = summarizeValidatorInfos(baseValidator, requirementValidators, semanticValidators); } @@ -133,6 +134,12 @@ public ValidationResults validate(String json) { return wrapUpValidation(e, builder); } + try { + validateSyntax(json, builder); + } catch (ConversionException e) { + return wrapUpValidation(e, builder); + } + try { validateSemantic(json, builder); } catch (ConversionException e) { @@ -155,6 +162,8 @@ public ValidationResults validate(T item) { return wrapUpValidation(e, builder); } + validateSyntax(item, builder); + // No conversion necessary, hence no need to guard against the `ConversionException`. validateSemantic(item, builder); @@ -176,7 +185,7 @@ private String parseToString(byte[] payload) throws ConversionException { /** * Validate requirements using {@link #baseValidator} and all {@link #requirementValidators}. * - * @throws ConversionException if {@code json} cannot be mapped into {@link JsonNode}. + * @throws ConversionException if {@code json} cannot be mapped into {@link JsonNode} */ private void validateRequirements(String json, ValidationResults.Builder builder) throws ConversionException { JsonNode jsonNode; @@ -194,10 +203,22 @@ private void validateRequirements(String json, ValidationResults.Builder builder } } + private void validateSyntax(String item, ValidationResults.Builder builder) throws ConversionException { + T component = converter.toItem(item); + + validateSyntax(component, builder); + } + + private void validateSyntax(T component, ValidationResults.Builder builder) { + for (PhenopacketValidator validator : syntaxValidators) { + builder.addResults(validator.validatorInfo(), validator.validate(component)); + } + } + /** * Validate semantic requirements using {@link #semanticValidators}. * - * @throws ConversionException if {@code item} cannot be mapped into {@link T}. + * @throws ConversionException if {@code item} cannot be mapped into {@link T} */ private void validateSemantic(String item, ValidationResults.Builder builder) throws ConversionException { T component = converter.toItem(item); @@ -219,56 +240,4 @@ private static ValidationResults wrapUpValidation(ConversionException e, Validat .build(); } - /** - * A builder for {@link JsonSchemaValidationWorkflowRunner}. - *

- * Build the {@link JsonSchemaValidationWorkflowRunner} by providing JSON schema documents - * either as {@link Path} or {@link URL}s, and {@link PhenopacketValidator}s for performing semantic validation. - * - * @param one of top-level elements of the Phenopacket schema. - */ - public static abstract class Builder { - - protected final List jsonSchemaUrls = new ArrayList<>(); - protected final List> semanticValidators = new ArrayList<>(); - - protected Builder() { - // private no-op - } - - public Builder addJsonSchema(Path path) throws MalformedURLException { - return addJsonSchema(path.toUri().toURL()); - } - - public Builder addJsonSchema(URL url) { - jsonSchemaUrls.add(url); - return this; - } - - public Builder addAllJsonSchemaPaths(List paths) throws MalformedURLException { - for (Path path : paths) { - jsonSchemaUrls.add(path.toUri().toURL()); - } - return this; - } - - public Builder addAllJsonSchemaUrls(List urls) { - jsonSchemaUrls.addAll(urls); - return this; - } - - public Builder addSemanticValidator(PhenopacketValidator semanticValidator) { - this.semanticValidators.add(semanticValidator); - return this; - } - - public Builder addAllSemanticValidators(List> semanticValidators) { - this.semanticValidators.addAll(semanticValidators); - return this; - } - - public abstract JsonSchemaValidationWorkflowRunner build(); - - } - } diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerBuilder.java b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerBuilder.java new file mode 100644 index 00000000..a66a059a --- /dev/null +++ b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerBuilder.java @@ -0,0 +1,86 @@ +package org.phenopackets.phenopackettools.validator.jsonschema; + +import com.google.protobuf.MessageOrBuilder; +import org.phenopackets.phenopackettools.validator.core.PhenopacketValidator; +import org.phenopackets.phenopackettools.validator.core.ValidationWorkflowRunnerBuilder; + +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; + +/** + * A builder for {@link JsonSchemaValidationWorkflowRunner}. + *

+ * Build the {@link JsonSchemaValidationWorkflowRunner} by providing JSON schema documents + * either as {@link Path} or {@link URL}s, and {@link PhenopacketValidator}s for performing semantic validation. + * + * @param one of top-level elements of the Phenopacket schema. + */ +public abstract class JsonSchemaValidationWorkflowRunnerBuilder extends ValidationWorkflowRunnerBuilder { + + protected final List jsonSchemaUrls = new ArrayList<>(); + + protected JsonSchemaValidationWorkflowRunnerBuilder() { + // private no-op + } + + /** + * Register a JSON schema present at a given {@code path} to be used as a syntax validator. The {@code path} + * will be interpreted as a {@link URL}. + * + * @param path path to the JSON schema document + * @return the builder + * @throws MalformedURLException if the {@code path} cannot be converted to a well-formatted {@link URL} + */ + public JsonSchemaValidationWorkflowRunnerBuilder addJsonSchema(Path path) throws MalformedURLException { + return addJsonSchema(path.toUri().toURL()); + } + + /** + * Register a JSON schema present at a given {@code url} to be used as a syntax validator. + * + * @param url url to the JSON schema document + * @return the builder + */ + public JsonSchemaValidationWorkflowRunnerBuilder addJsonSchema(URL url) { + jsonSchemaUrls.add(url); + return this; + } + + /** + * Add JSON schemas in bulk. + * + * @param paths an iterable of paths pointing to JSON schema documents + * @return the builder + * @see JsonSchemaValidationWorkflowRunnerBuilder#addJsonSchema(Path) + */ + public JsonSchemaValidationWorkflowRunnerBuilder addAllJsonSchemaPaths(Iterable paths) throws MalformedURLException { + for (Path path : paths) { + jsonSchemaUrls.add(path.toUri().toURL()); + } + return this; + } + + /** + * Add JSON schemas in bulk. + * + * @param urls an iterable of urls pointing to JSON schema documents + * @return the builder + * @see JsonSchemaValidationWorkflowRunnerBuilder#addJsonSchema(URL) + */ + public JsonSchemaValidationWorkflowRunnerBuilder addAllJsonSchemaUrls(List urls) { + jsonSchemaUrls.addAll(urls); + return this; + } + + /** + * Finish building the {@link JsonSchemaValidationWorkflowRunner}. + * + * @return the runner + */ + @Override + public abstract JsonSchemaValidationWorkflowRunner build(); + +} diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaValidator.java b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaValidator.java index b883b534..9811498c 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaValidator.java +++ b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaValidator.java @@ -2,12 +2,18 @@ import com.fasterxml.jackson.databind.JsonNode; import com.networknt.schema.JsonSchema; +import com.networknt.schema.ValidationMessage; import org.phenopackets.phenopackettools.validator.core.ValidationResult; import org.phenopackets.phenopackettools.validator.core.ValidatorInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.List; import java.util.Objects; -import java.util.stream.Collectors; +import java.util.function.Function; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Stream; /** * {@link JsonSchemaValidator} applies a single {@link JsonSchema} @@ -16,6 +22,9 @@ */ public class JsonSchemaValidator { + private static final Logger LOGGER = LoggerFactory.getLogger(JsonSchemaValidator.class); + private static final Pattern VALIDATION_MSG_PT = Pattern.compile("^\\$\\.(?[\\w\\[\\].]+):(?.*)$"); + private final JsonSchema jsonSchema; private final ValidatorInfo validatorInfo; @@ -30,8 +39,21 @@ public ValidatorInfo validatorInfo() { public List validate(JsonNode node) { return jsonSchema.validate(node).stream() - .map(res -> ValidationResult.error(validatorInfo, res.getType(), res.getMessage())) - .collect(Collectors.toList()); + .flatMap(validationMessageIntoValidationResult()) + .toList(); + } + + private Function> validationMessageIntoValidationResult() { + return res -> { + Matcher matcher = VALIDATION_MSG_PT.matcher(res.getMessage()); + if (matcher.matches()) { + String msg = "'%s'%s".formatted(matcher.group("location"), matcher.group("msg")); + return Stream.of(ValidationResult.error(validatorInfo, res.getType(), msg)); + } else { + LOGGER.warn("Non-matching validation message: {}", res.getMessage()); + return Stream.empty(); + } + }; } } diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/package-info.java b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/package-info.java new file mode 100644 index 00000000..6e82c1a2 --- /dev/null +++ b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/package-info.java @@ -0,0 +1,14 @@ +/** + * The package provides {@link org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunner} + * and {@link org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunnerBuilder} + * for validating phenopackets. + *

+ * The {@link org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunnerBuilder} builds + * {@link org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunner} for validating + * {@link org.phenopackets.schema.v2.Phenopacket}, {@link org.phenopackets.schema.v2.Family} + * or {@link org.phenopackets.schema.v2.Cohort}. + *

+ * The {@link org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunner} applies the + * validation steps on phenopacket elements. + */ +package org.phenopackets.phenopackettools.validator.jsonschema; \ No newline at end of file diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/v2/JsonSchemaValidatorConfigurer.java b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/v2/JsonSchemaValidatorConfigurer.java index e75f7f1e..30d3d520 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/v2/JsonSchemaValidatorConfigurer.java +++ b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/v2/JsonSchemaValidatorConfigurer.java @@ -113,7 +113,7 @@ private static JsonNode readJsonSchemaNode(InputStream is) throws IOException { private static JsonSchemaNodeAndInfo phenopacketJsonSchemaAndInfo() { try (InputStream is = JsonSchemaValidatorConfigurer.class.getResourceAsStream(PHENOPACKET_SCHEMA_PATH)) { JsonNode schemaNode = readJsonSchemaNode(is); - return new JsonSchemaNodeAndInfo(schemaNode, ValidatorInfo.genericJsonSchema()); + return new JsonSchemaNodeAndInfo(schemaNode, ValidatorInfo.baseSyntaxValidation()); } catch (IOException e) { throw new PhenopacketValidatorRuntimeException("Invalid JSON schema specification: " + e.getMessage()); } @@ -125,7 +125,7 @@ private static JsonSchemaNodeAndInfo phenopacketJsonSchemaAndInfo() { private static JsonSchemaNodeAndInfo familyJsonSchemaAndInfo() { try (InputStream is = JsonSchemaValidatorConfigurer.class.getResourceAsStream(FAMILY_SCHEMA_PATH)) { JsonNode schemaNode = readJsonSchemaNode(is); - return new JsonSchemaNodeAndInfo(schemaNode, ValidatorInfo.genericJsonSchema()); + return new JsonSchemaNodeAndInfo(schemaNode, ValidatorInfo.baseSyntaxValidation()); } catch (IOException e) { throw new PhenopacketValidatorRuntimeException("Invalid JSON schema specification: " + e.getMessage()); } @@ -137,7 +137,7 @@ private static JsonSchemaNodeAndInfo familyJsonSchemaAndInfo() { private static JsonSchemaNodeAndInfo cohortJsonSchemaAndInfo() { try (InputStream is = JsonSchemaValidatorConfigurer.class.getResourceAsStream(COHORT_SCHEMA_PATH)) { JsonNode schemaNode = readJsonSchemaNode(is); - return new JsonSchemaNodeAndInfo(schemaNode, ValidatorInfo.genericJsonSchema()); + return new JsonSchemaNodeAndInfo(schemaNode, ValidatorInfo.baseSyntaxValidation()); } catch (IOException e) { throw new PhenopacketValidatorRuntimeException("Invalid JSON schema specification: " + e.getMessage()); } @@ -151,11 +151,11 @@ private static JsonSchemaNodeAndInfo readSchemaAndInfo(InputStream is) throws IO } private static ValidatorInfo decodeValidatorInfo(JsonNode schemaNode) { - String schema = getNodeAsTextOrDefaultText(schemaNode, "$schema", "UNKNOWN_SCHEMA"); + String id = getNodeAsTextOrDefaultText(schemaNode, "$id", "UNKNOWN_SCHEMA"); String title = getNodeAsTextOrDefaultText(schemaNode, "title", "UNKNOWN_TITLE"); String description = getNodeAsTextOrDefaultText(schemaNode, "description", "UNKNOWN VALIDATOR"); - return ValidatorInfo.of(schema, title, description); + return ValidatorInfo.of(id, title, description); } private static String getNodeAsTextOrDefaultText(JsonNode schemaNode, String fieldName, String defaultValue) { diff --git a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/README.md b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/README.md new file mode 100644 index 00000000..c18f9455 --- /dev/null +++ b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/README.md @@ -0,0 +1,25 @@ +# README + +This folder contains JSON schemas for validating top-level Phenopacket Schema elements and the `Variation` element +embedded in the Phenopacket Schema. + +## VRSATILE notes + +The datatype of the `VcfRecord.pos` field in `vrsatile.proto` is: +``` +uint64 pos = 3; +``` + +Since Protobuf's `JSONFormat` serializes `uint64` fields into a JSON `string` instead of a JSON `number`, +the JSON schema element for validation of the `VcfRecord.pos` field is: + +``` +"type": "string", +"pattern": "^[1-9][0-9]*$" +``` + +instead of a more straightforward: + +``` +"type": "integer" +``` diff --git a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs-variation-adapter.json b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs-variation-adapter.json new file mode 100644 index 00000000..8e7dcd02 --- /dev/null +++ b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs-variation-adapter.json @@ -0,0 +1,470 @@ +{ + "$schema": "https://json-schema.org/draft/2019-09/schema", + "$id": "https://www.ga4gh.org/phenopackets/vrs-facade", + "title": "VRS Variation facade", + "description": "An adapter for the VRS Variation representation that is embedded into Phenopacket schema. Note that the adapter does not map 1:1 to VRS Variation.", + "type": "object", + "properties": { + "allele": { + "$ref": "#/definitions/Allele" + }, + "haplotype": { + "$ref": "#/definitions/Haplotype" + }, + "copyNumber": { + "$ref": "#/definitions/CopyNumber" + }, + "text": { + "$ref": "#/definitions/Text" + }, + "variationSet": { + "$ref": "#/definitions/VariationSet" + } + }, + "oneOf": [ + { "required": [ "allele" ]}, + { "required": [ "haplotype" ]}, + { "required": [ "copyNumber" ]}, + { "required": [ "text" ]}, + { "required": [ "variationSet"]} + ], + "additionalProperties": false, + + + "definitions": { + "Allele": { + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + "curie": { + "type": "string" + }, + "chromosomeLocation": { + "$ref": "#/definitions/ChromosomeLocation" + }, + "sequenceLocation": { + "$ref": "#/definitions/SequenceLocation" + }, + + "sequenceState": { + "$ref": "#/definitions/SequenceState" + }, + "literalSequenceExpression": { + "$ref": "#/definitions/LiteralSequenceExpression" + }, + "derivedSequenceExpression": { + "$ref": "#/definitions/DerivedSequenceExpression" + }, + "repeatedSequenceExpression": { + "$ref": "#/definitions/RepeatedSequenceExpression" + } + }, + "allOf": [ + { + "oneOf": [ + { "required": ["curie"] }, + { "required": ["chromosomeLocation"] }, + { "required": ["sequenceLocation"] } + ] + }, { + "oneOf": [ + { "required": ["sequenceState"] }, + { "required": ["literalSequenceExpression"] }, + { "required": ["derivedSequenceExpression"] }, + { "required": ["repeatedSequenceExpression"] } + ] + } + ], + "additionalProperties": false + }, + + "Haplotype": { + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + "members": { + "type": "array", + "items": { + "$ref": "#/definitions/HaplotypeMember" + } + } + }, + "additionalProperties": false + }, + + "HaplotypeMember": { + "type": "object", + "description": "A utility object for representing Haplotype.Member of vrs.proto", + "properties": { + "allele": { + "$ref": "#/definitions/Allele" + }, + "curie": { + "type": "string" + } + }, + "oneOf": [ + { "required": ["allele"] }, + { "required": ["curie"] } + ], + "additionalProperties": false + }, + + "CopyNumber": { + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + + "allele": { + "$ref": "#/definitions/Allele" + }, + "haplotype": { + "$ref": "#/definitions/Haplotype" + }, + "gene": { + "$ref": "#/definitions/Gene" + }, + "literalSequenceExpression": { + "$ref": "#/definitions/LiteralSequenceExpression" + }, + "derivedSequenceExpression": { + "$ref": "#/definitions/DerivedSequenceExpression" + }, + "repeatedSequenceExpression": { + "$ref": "#/definitions/RepeatedSequenceExpression" + }, + "curie": { + "type": "string" + }, + + "number": { + "$ref": "#/definitions/Number" + }, + "indefiniteRange": { + "$ref": "#/definitions/IndefiniteRange" + }, + "definiteRange": { + "$ref": "#/definitions/DefiniteRange" + } + }, + "allOf": [ + { + "oneOf": [ + { "required": ["allele"] }, + { "required": ["haplotype"] }, + { "required": ["gene"] }, + { "required": ["literalSequenceExpression"] }, + { "required": ["derivedSequenceExpression"] }, + { "required": ["repeatedSequenceExpression"] }, + { "required": ["curie"] } + ] + }, { + "oneOf": [ + { "required": ["number"] }, + { "required": ["indefiniteRange"] }, + { "required": ["definiteRange"] } + ] + } + ], + "additionalProperties": false + }, + + "Text": { + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + "definition": { + "type": "string" + } + }, + "additionalProperties": false + }, + + "VariationSet": { + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + "members": { + "type": "array", + "items": { + "$ref": "#/definitions/VariationSetMember" + } + } + }, + "additionalProperties": false + }, + + "VariationSetMember": { + "type": "object", + "description": "A utility object for representing VariationSet.Member of vrs.proto", + "properties": { + "curie": { + "type": "string" + }, + "allele": { + "$ref": "#/definitions/Allele" + }, + "haplotype": { + "$ref": "#/definitions/Haplotype" + }, + "copyNumber": { + "$ref": "#/definitions/CopyNumber" + }, + "text": { + "$ref": "#/definitions/Text" + }, + "variationSet": { + "$ref": "#/definitions/VariationSet" + } + }, + "oneOf": [ + { "required": ["curie"] }, + { "required": ["allele"] }, + { "required": ["haplotype"] }, + { "required": ["copyNumber"] }, + { "required": ["text"] }, + { "required": ["variationSet"] } + ], + "additionalProperties": false + }, + + "ChromosomeLocation": { + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + "speciesId": { + "type": "string" + }, + "chr": { + "type": "string" + }, + "interval": { + "$ref": "#/definitions/CytobandInterval" + } + }, + "additionalProperties": false + }, + + "SequenceLocation": { + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + "sequenceId": { + "type": "string" + }, + "sequenceInterval": { + "$ref": "#/definitions/SequenceInterval" + }, + "simpleInterval": { + "$ref": "#/definitions/SimpleInterval" + } + }, + "oneOf": [ + { "required": ["sequenceInterval"] }, + { "required": ["simpleInterval"] } + ], + "additionalProperties": false + }, + + "SequenceInterval": { + "type": "object", + "properties": { + "startNumber": { + "$ref": "#/definitions/Number" + }, + "startIndefiniteRange": { + "$ref": "#/definitions/IndefiniteRange" + }, + "startDefiniteRange": { + "$ref": "#/definitions/DefiniteRange" + }, + "endNumber": { + "$ref": "#/definitions/Number" + }, + "endIndefiniteRange": { + "$ref": "#/definitions/IndefiniteRange" + }, + "endDefiniteRange": { + "$ref": "#/definitions/DefiniteRange" + } + }, + "allOf": [ + { + "oneOf": [ + { "required": ["startNumber"] }, + { "required": ["startIndefiniteRange"] }, + { "required": ["startDefiniteRange"] } + ] + }, { + "oneOf": [ + { "required": ["endNumber"] }, + { "required": ["endIndefiniteRange"] }, + { "required": ["endDefiniteRange"] } + ] + } + ], + "additionalProperties": false + }, + + "SimpleInterval": { + "type": "object", + "properties": { + "start": { + "$ref": "#/definitions/UnsignedInt64" + }, + "end": { + "$ref": "#/definitions/UnsignedInt64" + } + }, + "additionalProperties": false + }, + + "CytobandInterval": { + "type": "object", + "properties": { + "start": { + "type": "string" + }, + "end": { + "type": "string" + } + }, + "additionalProperties": false + }, + + "Gene": { + "type": "object", + "properties": { + "geneId": { + "type": "string" + } + }, + "additionalProperties": false + }, + + "Number": { + "type": "object", + "properties": { + "value": { + "$ref": "#/definitions/UnsignedInt64" + } + }, + "additionalProperties": false + }, + + "IndefiniteRange": { + "type": "object", + "properties": { + "value": { + "$ref": "#/definitions/UnsignedInt64" + }, + "comparator": { + "type": "string" + } + }, + "additionalProperties": false + }, + + "DefiniteRange": { + "type": "object", + "properties": { + "min": { + "$ref": "#/definitions/UnsignedInt64" + }, + "max": { + "$ref": "#/definitions/UnsignedInt64" + } + }, + "additionalProperties": false + }, + + "SequenceState": { + "type": "object", + "properties": { + "sequence": { + "type": "string" + } + }, + "additionalProperties": false + }, + + "LiteralSequenceExpression": { + "type": "object", + "properties": { + "sequence": { + "type": "string" + } + }, + "additionalProperties": false + }, + + "DerivedSequenceExpression": { + "type": "object", + "properties": { + "location": { + "$ref": "#/definitions/SequenceLocation" + }, + "reverseComplement": { + "type": "boolean" + } + }, + "additionalProperties": false + }, + + "RepeatedSequenceExpression": { + "type": "object", + "properties": { + "literalSequenceExpression": { + "$ref": "#/definitions/LiteralSequenceExpression" + }, + "derivedSequenceExpression": { + "$ref": "#/definitions/DerivedSequenceExpression" + }, + "number": { + "$ref": "#/definitions/Number" + }, + "indefiniteRange": { + "$ref": "#/definitions/IndefiniteRange" + }, + "definiteRange": { + "$ref": "#/definitions/DefiniteRange" + } + }, + "allOf": [ + { + "oneOf": [ + { "required": ["literalSequenceExpression"] }, + { "required": ["derivedSequenceExpression"] } + ] + }, { + "oneOf": [ + { "required": ["number"] }, + { "required": ["indefiniteRange"] }, + { "required": ["definiteRange"] } + ] + } + ], + "additionalProperties": false + }, + + "UnsignedInt64": { + "type": "string", + "pattern": "^[0-9]+$", + "description": "A utility to represent Protobuf `uint64` data type" + } + } +} \ No newline at end of file diff --git a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs.json b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs.json deleted file mode 100644 index 555e2a55..00000000 --- a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs.json +++ /dev/null @@ -1,980 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2019-09/schema#", - "title": "GA4GH-VRS-Definitions", - "type": "object", - "definitions": { - "Variation": { - "description": "The root class of all Variation types", - "oneOf": [ - { - "$ref": "#/definitions/MolecularVariation" - }, - { - "$ref": "#/definitions/SystemicVariation" - }, - { - "$ref": "#/definitions/UtilityVariation" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "MolecularVariation": { - "description": "A variation on a contiguous molecule.", - "oneOf": [ - { - "$ref": "#/definitions/Allele" - }, - { - "$ref": "#/definitions/Haplotype" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "UtilityVariation": { - "description": "Utility variation classes that cannot be constrained to a specific biological class of variation.", - "oneOf": [ - { - "$ref": "#/definitions/Text" - }, - { - "$ref": "#/definitions/VariationSet" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "SystemicVariation": { - "description": "A Variation of multiple molecules in the context of a system, e.g. a genome, sample, or homologous chromosomes.", - "oneOf": [ - { - "$ref": "#/definitions/Abundance" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "Allele": { - "description": "The sequence state at a Location.", - "additionalProperties": false, - "type": "object", - "properties": { - "_id": { - "$ref": "#/definitions/CURIE" - }, - "type": { - "type": "string", - "enum": [ - "Allele" - ], - "default": "Allele" - }, - "location": { - "oneOf": [ - { - "$ref": "#/definitions/CURIE" - }, - { - "$ref": "#/definitions/Location" - } - ] - }, - "state": { - "oneOf": [ - { - "$ref": "#/definitions/SequenceState" - }, - { - "$ref": "#/definitions/SequenceExpression" - } - ] - } - }, - "required": [ - "type", - "location", - "state" - ] - }, - "Haplotype": { - "description": "A set of zero or more Alleles", - "additionalProperties": false, - "type": "object", - "properties": { - "_id": { - "$ref": "#/definitions/CURIE" - }, - "type": { - "type": "string", - "enum": [ - "Haplotype" - ], - "default": "Haplotype" - }, - "members": { - "type": "array", - "minItems": 1, - "uniqueItems": true, - "items": { - "oneOf": [ - { - "$ref": "#/definitions/Allele" - }, - { - "$ref": "#/definitions/CURIE" - } - ] - } - } - }, - "required": [ - "type", - "members" - ] - }, - "Text": { - "description": "A textual description of variation, typically not parseable but understood by humans.", - "additionalProperties": false, - "type": "object", - "properties": { - "_id": { - "$ref": "#/definitions/CURIE" - }, - "type": { - "type": "string", - "enum": [ - "Text" - ], - "default": "Text" - }, - "definition": { - "type": "string", - "description": "An textual representation of variation intended to capture variation descriptions that cannot be parsed, but still treated as variation." - } - }, - "required": [ - "type", - "definition" - ] - }, - "VariationSet": { - "description": "A set of Variation objects.\nMembers may be specified inline or by reference (with CURIEs)", - "type": "object", - "additionalProperties": false, - "properties": { - "_id": { - "$ref": "#/definitions/CURIE" - }, - "type": { - "type": "string", - "enum": [ - "VariationSet" - ], - "default": "VariationSet" - }, - "members": { - "type": "array", - "uniqueItems": true, - "items": { - "oneOf": [ - { - "$ref": "#/definitions/CURIE" - }, - { - "$ref": "#/definitions/Variation" - } - ] - } - } - }, - "required": [ - "type", - "members" - ] - }, - "Abundance": { - "description": "The quantity of a feature, variation, molecule or part thereof in a system.", - "oneOf": [ - { - "$ref": "#/definitions/CopyNumber" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "CopyNumber": { - "additionalProperties": false, - "type": "object", - "description": "The count of copies of a Feature, Location, or Molecular Variation subject within a genome.", - "properties": { - "_id": { - "$ref": "#/definitions/CURIE" - }, - "type": { - "type": "string", - "enum": [ - "CopyNumber" - ], - "default": "CopyNumber" - }, - "subject": { - "oneOf": [ - { - "$ref": "#/definitions/MolecularVariation" - }, - { - "$ref": "#/definitions/Feature" - }, - { - "$ref": "#/definitions/SequenceExpression" - }, - { - "$ref": "#/definitions/CURIE" - } - ] - }, - "copies": { - "oneOf": [ - { - "$ref": "#/definitions/Number" - }, - { - "$ref": "#/definitions/IndefiniteRange" - }, - { - "$ref": "#/definitions/DefiniteRange" - } - ] - } - }, - "allOf": [ - { - "if": { - "properties": { - "copies": { - "$ref": "#/definitions/Number" - } - } - }, - "then": { - "properties": { - "copies": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "copies": { - "$ref": "#/definitions/IndefiniteRange" - } - } - }, - "then": { - "properties": { - "copies": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "copies": { - "$ref": "#/definitions/DefiniteRange" - } - } - }, - "then": { - "properties": { - "copies": { - "properties": { - "min": { - "minimum": 0, - "type": "integer" - }, - "max": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - } - ], - "required": [ - "type", - "subject", - "copies" - ] - }, - "Location": { - "description": "A Location represents a span on a specific sequence.", - "oneOf": [ - { - "$ref": "#/definitions/ChromosomeLocation" - }, - { - "$ref": "#/definitions/SequenceLocation" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "ChromosomeLocation": { - "additionalProperties": false, - "description": "A region of a chromosomed specified by species and name using cytogenetic naming conventions", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "ChromosomeLocation" - ], - "default": "ChromosomeLocation" - }, - "_id": { - "$ref": "#/definitions/CURIE" - }, - "species_id": { - "$ref": "#/definitions/CURIE", - "default": "taxonomy:9606" - }, - "chr": { - "type": "string" - }, - "interval": { - "$ref": "#/definitions/CytobandInterval" - } - }, - "required": [ - "type", - "species_id", - "chr", - "interval" - ] - }, - "SequenceLocation": { - "additionalProperties": false, - "description": "A specified subsequence within another sequence that is used as a reference sequence.", - "type": "object", - "properties": { - "_id": { - "$ref": "#/definitions/CURIE" - }, - "type": { - "type": "string", - "enum": [ - "SequenceLocation" - ], - "default": "SequenceLocation" - }, - "sequence_id": { - "$ref": "#/definitions/CURIE" - }, - "interval": { - "oneOf": [ - { - "$ref": "#/definitions/SequenceInterval" - }, - { - "$ref": "#/definitions/SimpleInterval" - } - ] - } - }, - "required": [ - "type", - "sequence_id", - "interval" - ] - }, - "SequenceInterval": { - "description": "A SequenceInterval represents a span of sequence. Positions are always represented by contiguous spans using interbase coordinates.\nSequenceInterval is intended to be compatible with that in Sequence Ontology ([SO:0000001](http://www.sequenceontology.org/browser/current_svn/term/SO:0000001)), with the exception that the GA4GH VRS SequenceInterval may be zero-width. The SO definition is for an \"extent greater than zero\".", - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "SequenceInterval" - ], - "default": "SequenceInterval" - }, - "start": { - "oneOf": [ - { - "$ref": "#/definitions/Number" - }, - { - "$ref": "#/definitions/IndefiniteRange" - }, - { - "$ref": "#/definitions/DefiniteRange" - } - ] - }, - "end": { - "oneOf": [ - { - "$ref": "#/definitions/Number" - }, - { - "$ref": "#/definitions/IndefiniteRange" - }, - { - "$ref": "#/definitions/DefiniteRange" - } - ] - } - }, - "allOf": [ - { - "if": { - "properties": { - "start": { - "$ref": "#/definitions/Number" - } - } - }, - "then": { - "properties": { - "start": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "start": { - "$ref": "#/definitions/IndefiniteRange" - } - } - }, - "then": { - "properties": { - "start": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "start": { - "$ref": "#/definitions/DefiniteRange" - } - } - }, - "then": { - "properties": { - "start": { - "properties": { - "min": { - "minimum": 0, - "type": "integer" - }, - "max": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "end": { - "$ref": "#/definitions/Number" - } - } - }, - "then": { - "properties": { - "end": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "end": { - "$ref": "#/definitions/IndefiniteRange" - } - } - }, - "then": { - "properties": { - "end": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "end": { - "$ref": "#/definitions/DefiniteRange" - } - } - }, - "then": { - "properties": { - "end": { - "properties": { - "min": { - "minimum": 0, - "type": "integer" - }, - "max": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - } - ], - "required": [ - "type", - "start", - "end" - ] - }, - "CytobandInterval": { - "description": "A contiguous region specified by chromosomal bands features.", - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "CytobandInterval" - ], - "default": "CytobandInterval" - }, - "start": { - "$ref": "#/definitions/HumanCytoband" - }, - "end": { - "$ref": "#/definitions/HumanCytoband" - } - }, - "example": { - "type": "CytobandInterval", - "start": "q22.2", - "end": "q22.3" - }, - "required": [ - "type", - "start", - "end" - ] - }, - "SequenceExpression": { - "description": "One of a set of sequence representation syntaxes.", - "oneOf": [ - { - "$ref": "#/definitions/LiteralSequenceExpression" - }, - { - "$ref": "#/definitions/DerivedSequenceExpression" - }, - { - "$ref": "#/definitions/RepeatedSequenceExpression" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "LiteralSequenceExpression": { - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "LiteralSequenceExpression" - ], - "default": "LiteralSequenceExpression" - }, - "sequence": { - "$ref": "#/definitions/Sequence" - } - }, - "required": [ - "type", - "sequence" - ] - }, - "DerivedSequenceExpression": { - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "DerivedSequenceExpression" - ], - "default": "DerivedSequenceExpression" - }, - "location": { - "$ref": "#/definitions/SequenceLocation" - }, - "reverse_complement": { - "type": "boolean" - } - }, - "required": [ - "type", - "location", - "reverse_complement" - ] - }, - "RepeatedSequenceExpression": { - "additionalProperties": false, - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "RepeatedSequenceExpression" - ], - "default": "RepeatedSequenceExpression" - }, - "seq_expr": { - "oneOf": [ - { - "$ref": "#/definitions/LiteralSequenceExpression" - }, - { - "$ref": "#/definitions/DerivedSequenceExpression" - } - ] - }, - "count": { - "oneOf": [ - { - "$ref": "#/definitions/Number" - }, - { - "$ref": "#/definitions/IndefiniteRange" - }, - { - "$ref": "#/definitions/DefiniteRange" - } - ] - } - }, - "allOf": [ - { - "if": { - "properties": { - "count": { - "$ref": "#/definitions/Number" - } - } - }, - "then": { - "properties": { - "count": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "count": { - "$ref": "#/definitions/IndefiniteRange" - } - } - }, - "then": { - "properties": { - "count": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "count": { - "$ref": "#/definitions/DefiniteRange" - } - } - }, - "then": { - "properties": { - "count": { - "properties": { - "min": { - "minimum": 0, - "type": "integer" - }, - "max": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - } - ], - "required": [ - "type", - "seq_expr", - "count" - ] - }, - "Feature": { - "description": "A named entity that can be mapped to a Location. Genes, protein domains, exons, and chromosomes are some examples of common biological entities that may be Features.", - "oneOf": [ - { - "$ref": "#/definitions/Gene" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "Gene": { - "description": "A reference to an external gene system, used as a location for variation. Currently, the `ncbigene` namespace is required. See https://registry.identifiers.org/registry/ncbigene.", - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "Gene" - ], - "default": "Gene" - }, - "gene_id": { - "$ref": "#/definitions/CURIE" - } - }, - "required": [ - "type", - "gene_id" - ] - }, - "Number": { - "description": "A simple number value as a VRS class.", - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "Number" - ], - "default": "Number" - }, - "value": { - "type": "number" - } - }, - "required": [ - "type", - "value" - ] - }, - "IndefiniteRange": { - "description": "An indefinite range represented as a number and associated comparator. The bound operator is interpreted as follows: '>=' are all values greater than and including the value, '<=' are all numbers less than and including the value.", - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "IndefiniteRange" - ], - "default": "IndefiniteRange" - }, - "value": { - "type": "number" - }, - "comparator": { - "type": "string", - "enum": [ - "<=", - ">=" - ] - } - }, - "required": [ - "type", - "value", - "comparator" - ] - }, - "DefiniteRange": { - "description": "A bounded, inclusive range of numbers.", - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "DefiniteRange" - ], - "default": "DefiniteRange" - }, - "min": { - "type": "number" - }, - "max": { - "type": "number" - } - }, - "required": [ - "type", - "min", - "max" - ] - }, - "Sequence": { - "additionalProperties": false, - "description": "A character string of residues that represents a biological sequence using the conventional sequence order (5\u2019-to-3\u2019 for nucleic acid sequences, and amino-to-carboxyl for amino acid sequences). IUPAC ambiguity codes are permitted in Sequences.", - "type": "string", - "pattern": "^[A-Z*\\-]*$" - }, - "CURIE": { - "additionalProperties": false, - "description": "A string that refers to an object uniquely. The lifetime and scope of an id is defined by the sender.\nVRS does not impose any contraints on strings used as ids in messages. However, to maximize sharability of data, VRS RECOMMENDS that implementations use [W3C Compact URI (CURIE)](https://www.w3.org/TR/curie/) syntax.\nString CURIEs are represented as `prefix`:`reference` (W3C terminology), but often referred to as `namespace`:`accession` or `namespace`:`local id` colloquially.\nVRS also RECOMMENDS that `prefix` be defined in identifiers.org.\nThe `reference` component is an unconstrained string.\nA CURIE is a URI. URIs may *locate* objects (i.e., specify where to retrieve them) or *name* objects conceptually. VRS uses CURIEs primarily as a naming mechanism.\nImplementations MAY provide CURIE resolution mechanisms for prefixes to make these objects locatable.\nUsing internal ids in public messages is strongly discouraged.", - "type": "string", - "pattern": "^\\w[^:]*:.+$", - "example": "ensembl:ENSG00000139618" - }, - "HumanCytoband": { - "additionalProperties": false, - "description": "A interval on a stained metaphase chromosome specified by cytobands. CytobandIntervals include the regions described by the start and end cytobands.", - "type": "string", - "pattern": "^cen|[pq](ter|([1-9][0-9]*(\\.[1-9][0-9]*)?))$", - "example": "q22.3" - }, - "SequenceState": { - "deprecated": true, - "description": "DEPRECATED: An assertion of the state of a sequence, typically at a Sequence Location within an Allele.\nThis class is deprecated. Use LiteralSequenceExpression instead.", - "additionalProperties": false, - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "SequenceState" - ], - "default": "SequenceState" - }, - "sequence": { - "$ref": "#/definitions/Sequence" - } - }, - "example": { - "type": "SequenceState", - "sequence": "C" - }, - "required": [ - "type", - "sequence" - ] - }, - "SimpleInterval": { - "deprecated": true, - "description": "DEPRECATED: A SimpleInterval represents a span of sequence. Positions are always represented by contiguous spans using interbase coordinates.\nThis class is deprecated. Use SequenceInterval instead.", - "additionalProperties": false, - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "SimpleInterval" - ], - "default": "SimpleInterval" - }, - "start": { - "type": "integer" - }, - "end": { - "type": "integer" - } - }, - "example": { - "type": "SimpleInterval", - "start": 11, - "end": 22 - }, - "required": [ - "type", - "start", - "end" - ] - } - } -} \ No newline at end of file diff --git a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json index 217883d1..840000a4 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json +++ b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json @@ -1,8 +1,8 @@ { - "$schema": "https://json-schema.org/draft/2019-09/schema#", - "$id": "https://www.ga4gh.org/phenopackets", - "title": "Phenopacket", - "description": "Schema for Global Alliance for Genomics and Health (GA4GH) Phenopacket", + "$schema": "https://json-schema.org/draft/2019-09/schema", + "$id": "https://www.ga4gh.org/phenopackets/vrsatile", + "title": "VRS Added Tools for Interoperable Loquacious Exchange", + "description": "VRSATILE: A set of proposed extensions for GA4GH's Variation Representation Specification (VRS) to enable interoperable exchange of common descriptive data alongside variation concepts", "type": "object", "definitions": { "extension": { @@ -58,7 +58,8 @@ }, "pos" : { "description": "position on the chromosome (VCF convention)", - "type": "integer" + "type": "string", + "pattern": "^[1-9][0-9]*$" }, "id" : { "description": "identifier as used in VCF line", @@ -97,17 +98,26 @@ "type": "string" }, "variation": { - "$ref": "classpath:/org/phenopackets/phenopackettools/validator/jsonschema/vrs.json#/definitions/Variation" + "$ref": "classpath:/org/phenopackets/phenopackettools/validator/jsonschema/vrs-variation-adapter.json", + "description": "An adapter for the VRS Variation representation that is embedded into Phenopacket schema. Note that the adapter does not map 1:1 to VRS Variation." }, "label": { - "type": "string" + "type": "string", + "description": "A primary label for the variation" + }, + "description": { + "type": "string", + "description": "A free-text description of the variation" + }, + "geneContext": { + "$ref": "#/definitions/geneDescriptor", + "description": "A specific gene context that applies to this variant" }, "expressions": { "type": "array", "items": { "$ref": "#/definitions/expression" - }, - "minItems": 0 + } }, "vcfRecord": { "$ref": "#/definitions/vcfRecord" @@ -116,22 +126,19 @@ "type": "array", "items": { "type": "string" - }, - "minItems": 0 + } }, "alternateLabels": { "type": "array", "items": { "type": "string" - }, - "minItems": 0 + } }, "extensions": { "type": "array", "items": { "$ref": "#/definitions/extension" - }, - "minItems": 0 + } }, "moleculeContext": { "enum": [ @@ -142,16 +149,21 @@ ] }, "structuralType" : { - "$ref": "classpath:/org/phenopackets/phenopackettools/validator/jsonschema/v2/base.json#/definitions/ontologyClass" + "$ref": "classpath:/org/phenopackets/phenopackettools/validator/jsonschema/v2/base.json#/definitions/ontologyClass", + "description": "The structural variant type associated with this variant, such as a substitution, deletion, or fusion. We RECOMMEND using a descendent term of SO:0001537." }, - "vrs_ref_allele_seq": { + "vrsRefAlleleSeq": { "type": "string", - "description": "A Sequence corresponding to a “ref allele”, describing the sequence expected at a SequenceLocation reference" + "description": "A Sequence corresponding to a \"ref allele\", describing the sequence expected at a SequenceLocation reference" }, "allelicState" : { "$ref": "classpath:/org/phenopackets/phenopackettools/validator/jsonschema/v2/base.json#/definitions/ontologyClass" } - } + }, + "required": [ + "id", "moleculeContext" + ], + "additionalProperties": false }, "geneDescriptor" : { "type": "object", @@ -166,30 +178,34 @@ }, "description": { "type": "string", - "description": "Free-text description" + "description": "Free-text description of the gene" }, "alternateIds" : { "type": "array", + "description": "Alternative identifier(s) of the gene", "items": { "type": "string" - }, - "minItems": 0 + } }, - "alternateSymbols" : { + "xrefs" : { "type": "array", + "description": "Related concept IDs (e.g. gene ortholog IDs) may be placed in xrefs", "items": { "type": "string" - }, - "minItems": 0 + } }, - "xrefs" : { + "alternateSymbols" : { "type": "array", + "description": "Alternative symbol(s) of the gene", "items": { "type": "string" - }, - "minItems": 0 + } } - } + }, + "required": [ + "valueId", "symbol" + ], + "additionalProperties": false } } } \ No newline at end of file diff --git a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java index cf885756..6ade9cf6 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java +++ b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; +import java.nio.file.Path; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.*; @@ -53,8 +54,8 @@ public void setUp() { */ @ParameterizedTest @CsvSource({ - "/id, DELETE, '$.id: is missing but it is required'", - "/metaData, DELETE, '$.metaData: is missing but it is required'", + "/id, DELETE, 'id' is missing but it is required", + "/metaData, DELETE, 'metaData' is missing but it is required", }) public void checkTopLevelPhenopacketConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -65,7 +66,7 @@ public void checkTopLevelPhenopacketConstraints(String path, String action, Stri */ @ParameterizedTest @CsvSource({ - "/subject/id, DELETE, '$.subject.id: is missing but it is required'" + "/subject/id, DELETE, 'subject.id' is missing but it is required" }) public void checkSubjectConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -77,7 +78,7 @@ public void checkSubjectConstraints(String path, String action, String expected) */ @ParameterizedTest @CsvSource({ - "/subject/vitalStatus/status, DELETE, '$.subject.vitalStatus.status: is missing but it is required'" + "/subject/vitalStatus/status, DELETE, 'subject.vitalStatus.status' is missing but it is required" }) public void checkVitalStatusConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -88,8 +89,8 @@ public void checkVitalStatusConstraints(String path, String action, String expec */ @ParameterizedTest @CsvSource({ - "/phenotypicFeatures[0]/type, DELETE, '$.phenotypicFeatures[0].type: is missing but it is required'", - "/phenotypicFeatures[1]/type, DELETE, '$.phenotypicFeatures[1].type: is missing but it is required'" + "/phenotypicFeatures[0]/type, DELETE, 'phenotypicFeatures[0].type' is missing but it is required", + "/phenotypicFeatures[1]/type, DELETE, 'phenotypicFeatures[1].type' is missing but it is required" }) public void checkPhenotypicFeatureConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -103,17 +104,17 @@ public void checkPhenotypicFeatureConstraints(String path, String action, String @CsvSource({ // TODO - this returns an error for each oneOf field // "/phenotypicFeatures[0]/onset/gestationalAge, DELETE, '$.phenotypicFeatures[0].onset.gestationalAge.weeks: is missing but it is required'", - "/phenotypicFeatures[0]/onset/gestationalAge/weeks, DELETE, '$.phenotypicFeatures[0].onset.gestationalAge.weeks: is missing but it is required'", - "/phenotypicFeatures[0]/onset/gestationalAge/weeks, SET[-1], '$.phenotypicFeatures[0].onset.gestationalAge.weeks: must have a minimum value of 0'", - "/phenotypicFeatures[0]/onset/gestationalAge/days, SET[-1], '$.phenotypicFeatures[0].onset.gestationalAge.days: must have a minimum value of 0'", - "/phenotypicFeatures[1]/onset/age/iso8601duration, DELETE, '$.phenotypicFeatures[1].onset.age.iso8601duration: is missing but it is required'", + "/phenotypicFeatures[0]/onset/gestationalAge/weeks, DELETE, 'phenotypicFeatures[0].onset.gestationalAge.weeks' is missing but it is required", + "/phenotypicFeatures[0]/onset/gestationalAge/weeks, SET[-1], 'phenotypicFeatures[0].onset.gestationalAge.weeks' must have a minimum value of 0", + "/phenotypicFeatures[0]/onset/gestationalAge/days, SET[-1], 'phenotypicFeatures[0].onset.gestationalAge.days' must have a minimum value of 0", + "/phenotypicFeatures[1]/onset/age/iso8601duration, DELETE, 'phenotypicFeatures[1].onset.age.iso8601duration' is missing but it is required", // TODO - add test for ensuring that the duration is in an ISO8601 pattern - "/phenotypicFeatures[2]/onset/ageRange/start, DELETE, '$.phenotypicFeatures[2].onset.ageRange.start: is missing but it is required'", - "/phenotypicFeatures[2]/onset/ageRange/end, DELETE, '$.phenotypicFeatures[2].onset.ageRange.end: is missing but it is required'", + "/phenotypicFeatures[2]/onset/ageRange/start, DELETE, 'phenotypicFeatures[2].onset.ageRange.start' is missing but it is required", + "/phenotypicFeatures[2]/onset/ageRange/end, DELETE, 'phenotypicFeatures[2].onset.ageRange.end' is missing but it is required", // TODO - require end being at or after start // We do not tamper with the ontology class and timestamp as we test their validity elsewhere. - "/phenotypicFeatures[5]/onset/interval/start, DELETE, '$.phenotypicFeatures[5].onset.interval.start: is missing but it is required'", - "/phenotypicFeatures[5]/onset/interval/end, DELETE, '$.phenotypicFeatures[5].onset.interval.end: is missing but it is required'", + "/phenotypicFeatures[5]/onset/interval/start, DELETE, 'phenotypicFeatures[5].onset.interval.start' is missing but it is required", + "/phenotypicFeatures[5]/onset/interval/end, DELETE, 'phenotypicFeatures[5].onset.interval.end' is missing but it is required", }) public void checkTimeElementConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -124,7 +125,7 @@ public void checkTimeElementConstraints(String path, String action, String expec */ @ParameterizedTest @CsvSource({ - "/phenotypicFeatures[0]/evidence[0]/evidenceCode, DELETE, '$.phenotypicFeatures[0].evidence[0].evidenceCode: is missing but it is required'", + "/phenotypicFeatures[0]/evidence[0]/evidenceCode, DELETE, 'phenotypicFeatures[0].evidence[0].evidenceCode' is missing but it is required", }) public void checkEvidenceConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -136,9 +137,9 @@ public void checkEvidenceConstraints(String path, String action, String expected */ @ParameterizedTest @CsvSource({ - "/measurements[0]/assay, DELETE, '$.measurements[0].assay: is missing but it is required'", - "/measurements[0]/value, DELETE, '$.measurements[0].value: is missing but it is required|$.measurements[0].complexValue: is missing but it is required'", - "/measurements[1]/complexValue, DELETE, '$.measurements[1].value: is missing but it is required|$.measurements[1].complexValue: is missing but it is required'", + "/measurements[0]/assay, DELETE, 'measurements[0].assay' is missing but it is required", + "/measurements[0]/value, DELETE, 'measurements[0].value' is missing but it is required|'measurements[0].complexValue' is missing but it is required", + "/measurements[1]/complexValue, DELETE, 'measurements[1].value' is missing but it is required|'measurements[1].complexValue' is missing but it is required", }) public void checkMeasurementConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -149,7 +150,7 @@ public void checkMeasurementConstraints(String path, String action, String expec */ @ParameterizedTest @CsvSource({ - "/biosamples[0]/id, DELETE, '$.biosamples[0].id: is missing but it is required'", + "/biosamples[0]/id, DELETE, 'biosamples[0].id' is missing but it is required", }) public void checkBiosampleConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -161,8 +162,8 @@ public void checkBiosampleConstraints(String path, String action, String expecte */ @ParameterizedTest @CsvSource({ - "/interpretations[0]/id, DELETE, '$.interpretations[0].id: is missing but it is required'", - "/interpretations[0]/progressStatus, DELETE, '$.interpretations[0].progressStatus: is missing but it is required'", + "/interpretations[0]/id, DELETE, 'interpretations[0].id' is missing but it is required", + "/interpretations[0]/progressStatus, DELETE, 'interpretations[0].progressStatus' is missing but it is required", }) public void checkInterpretationConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -173,7 +174,7 @@ public void checkInterpretationConstraints(String path, String action, String ex */ @ParameterizedTest @CsvSource({ - "/interpretations[0]/diagnosis/disease, DELETE, '$.interpretations[0].diagnosis.disease: is missing but it is required'", + "/interpretations[0]/diagnosis/disease, DELETE, 'interpretations[0].diagnosis.disease' is missing but it is required", }) public void checkDiagnosisConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -185,12 +186,12 @@ public void checkDiagnosisConstraints(String path, String action, String expecte */ @ParameterizedTest @CsvSource({ - "/interpretations[0]/diagnosis/genomicInterpretations[0]/subjectOrBiosampleId, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[0].subjectOrBiosampleId: is missing but it is required'", - "/interpretations[0]/diagnosis/genomicInterpretations[0]/interpretationStatus, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[0].interpretationStatus: is missing but it is required'", + "/interpretations[0]/diagnosis/genomicInterpretations[0]/subjectOrBiosampleId, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[0].subjectOrBiosampleId' is missing but it is required", + "/interpretations[0]/diagnosis/genomicInterpretations[0]/interpretationStatus, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[0].interpretationStatus' is missing but it is required", // TODO - as of now this leads to 2 errors instead of just one // "/interpretations[0]/diagnosis/genomicInterpretations[0]/interpretationStatus, SET[gibberish], '$.interpretations[0].diagnosis.genomicInterpretations[0].interpretationStatus: is missing but it is required'", - "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[0].gene: is missing but it is required|$.interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation: is missing but it is required'", - "/interpretations[0]/diagnosis/genomicInterpretations[1]/gene, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[1].gene: is missing but it is required|$.interpretations[0].diagnosis.genomicInterpretations[1].variantInterpretation: is missing but it is required'", + "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[0].gene' is missing but it is required|'interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation' is missing but it is required", + "/interpretations[0]/diagnosis/genomicInterpretations[1]/gene, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[1].gene' is missing but it is required|'interpretations[0].diagnosis.genomicInterpretations[1].variantInterpretation' is missing but it is required", }) public void checkGenomicInterpretationConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -202,8 +203,8 @@ public void checkGenomicInterpretationConstraints(String path, String action, St */ @ParameterizedTest @CsvSource({ - "/interpretations[0]/diagnosis/genomicInterpretations[1]/gene/valueId, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[1].gene.valueId: is missing but it is required'", - "/interpretations[0]/diagnosis/genomicInterpretations[1]/gene/symbol, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[1].gene.symbol: is missing but it is required'", + "/interpretations[0]/diagnosis/genomicInterpretations[1]/gene/valueId, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[1].gene.valueId' is missing but it is required", + "/interpretations[0]/diagnosis/genomicInterpretations[1]/gene/symbol, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[1].gene.symbol' is missing but it is required", }) public void checkGeneDescriptorConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -215,29 +216,49 @@ public void checkGeneDescriptorConstraints(String path, String action, String ex */ @ParameterizedTest @CsvSource({ - "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/acmgPathogenicityClassification, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.acmgPathogenicityClassification: is missing but it is required'", - "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/therapeuticActionability, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.therapeuticActionability: is missing but it is required'", - "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor: is missing but it is required'", + "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/acmgPathogenicityClassification, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.acmgPathogenicityClassification' is missing but it is required", + "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/therapeuticActionability, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.therapeuticActionability' is missing but it is required", + "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor' is missing but it is required", }) public void checkVariantInterpretationConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); } -// TODO - implement tests -// @ParameterizedTest -// @CsvSource({ -// "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor: is missing but it is required'", -// }) -// public void checkVariationDescriptorConstraints(String path, String action, String expected) { -// testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); -// } + @ParameterizedTest + @CsvSource({ + "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor/id, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor.id' is missing but it is required", + "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor/moleculeContext, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor.moleculeContext' is missing but it is required", + }) + public void checkVariationDescriptorConstraints(String path, String action, String expected) { + testErrors(runner, readRetinoblastomaPhenopacketNode(), path, action, expected); + } + + /** + * As of Nov 9, 2022, the {@link org.ga4gh.vrs.v1.Variation} validator does not check presence + * of required fields. The validator can only check presence of {@code oneof} fields. + *

+ * Note that the {@code path} is split into a prefix and sub-path to increase legibility of the + * test parameters. + */ + @ParameterizedTest + @CsvSource({ + "/variation/copyNumber, DELETE, 'REPLACE.allele' is missing but it is required|'REPLACE.haplotype' is missing but it is required|'REPLACE.copyNumber' is missing but it is required|'REPLACE.text' is missing but it is required|'REPLACE.variationSet' is missing but it is required", + }) + public void removingAOneOfFieldFromVariationProducesValidationError(String subPath, String action, String subExpected) { + String pathPrefix = "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor"; + String path = pathPrefix.concat(subPath); + + String validationMessagePrefix = "interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor.variation"; + String expectedValidationMessage = subExpected.replaceAll("REPLACE", validationMessagePrefix); + testErrors(runner, readRetinoblastomaPhenopacketNode(), path, action, expectedValidationMessage); + } /** * Absence of `term` leads to an {@link org.phenopackets.phenopackettools.validator.core.ValidationLevel#ERROR}. */ @ParameterizedTest @CsvSource({ - "/diseases[0]/term, DELETE, '$.diseases[0].term: is missing but it is required'", + "/diseases[0]/term, DELETE, 'diseases[0].term' is missing but it is required", }) public void checkDiseaseConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -248,10 +269,10 @@ public void checkDiseaseConstraints(String path, String action, String expected) */ @ParameterizedTest @CsvSource({ - "/medicalActions[0]/procedure, DELETE, '$.medicalActions[0].procedure: is missing but it is required|$.medicalActions[0].treatment: is missing but it is required|$.medicalActions[0].radiationTherapy: is missing but it is required|$.medicalActions[0].therapeuticRegimen: is missing but it is required'", - "/medicalActions[1]/treatment, DELETE, '$.medicalActions[1].procedure: is missing but it is required|$.medicalActions[1].treatment: is missing but it is required|$.medicalActions[1].radiationTherapy: is missing but it is required|$.medicalActions[1].therapeuticRegimen: is missing but it is required'", - "/medicalActions[2]/radiationTherapy, DELETE, '$.medicalActions[2].procedure: is missing but it is required|$.medicalActions[2].treatment: is missing but it is required|$.medicalActions[2].radiationTherapy: is missing but it is required|$.medicalActions[2].therapeuticRegimen: is missing but it is required'", - "/medicalActions[3]/therapeuticRegimen, DELETE, '$.medicalActions[3].procedure: is missing but it is required|$.medicalActions[3].treatment: is missing but it is required|$.medicalActions[3].radiationTherapy: is missing but it is required|$.medicalActions[3].therapeuticRegimen: is missing but it is required'", + "/medicalActions[0]/procedure, DELETE, 'medicalActions[0].procedure' is missing but it is required|'medicalActions[0].treatment' is missing but it is required|'medicalActions[0].radiationTherapy' is missing but it is required|'medicalActions[0].therapeuticRegimen' is missing but it is required", + "/medicalActions[1]/treatment, DELETE, 'medicalActions[1].procedure' is missing but it is required|'medicalActions[1].treatment' is missing but it is required|'medicalActions[1].radiationTherapy' is missing but it is required|'medicalActions[1].therapeuticRegimen' is missing but it is required", + "/medicalActions[2]/radiationTherapy, DELETE, 'medicalActions[2].procedure' is missing but it is required|'medicalActions[2].treatment' is missing but it is required|'medicalActions[2].radiationTherapy' is missing but it is required|'medicalActions[2].therapeuticRegimen' is missing but it is required", + "/medicalActions[3]/therapeuticRegimen, DELETE, 'medicalActions[3].procedure' is missing but it is required|'medicalActions[3].treatment' is missing but it is required|'medicalActions[3].radiationTherapy' is missing but it is required|'medicalActions[3].therapeuticRegimen' is missing but it is required", }) public void checkMedicalActionConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -262,7 +283,7 @@ public void checkMedicalActionConstraints(String path, String action, String exp */ @ParameterizedTest @CsvSource({ - "/medicalActions[0]/procedure/code, DELETE, '$.medicalActions[0].procedure.code: is missing but it is required'" + "/medicalActions[0]/procedure/code, DELETE, 'medicalActions[0].procedure.code' is missing but it is required" }) public void checkProcedureConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -273,7 +294,7 @@ public void checkProcedureConstraints(String path, String action, String expecte */ @ParameterizedTest @CsvSource({ - "/medicalActions[1]/treatment/agent, DELETE, '$.medicalActions[1].treatment.agent: is missing but it is required'" + "/medicalActions[1]/treatment/agent, DELETE, 'medicalActions[1].treatment.agent' is missing but it is required" }) public void checkTreatmentConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -285,10 +306,10 @@ public void checkTreatmentConstraints(String path, String action, String expecte */ @ParameterizedTest @CsvSource({ - "/medicalActions[2]/radiationTherapy/modality, DELETE, '$.medicalActions[2].radiationTherapy.modality: is missing but it is required'", - "/medicalActions[2]/radiationTherapy/bodySite, DELETE, '$.medicalActions[2].radiationTherapy.bodySite: is missing but it is required'", - "/medicalActions[2]/radiationTherapy/dosage, DELETE, '$.medicalActions[2].radiationTherapy.dosage: is missing but it is required'", - "/medicalActions[2]/radiationTherapy/fractions, DELETE, '$.medicalActions[2].radiationTherapy.fractions: is missing but it is required'" + "/medicalActions[2]/radiationTherapy/modality, DELETE, 'medicalActions[2].radiationTherapy.modality' is missing but it is required", + "/medicalActions[2]/radiationTherapy/bodySite, DELETE, 'medicalActions[2].radiationTherapy.bodySite' is missing but it is required", + "/medicalActions[2]/radiationTherapy/dosage, DELETE, 'medicalActions[2].radiationTherapy.dosage' is missing but it is required", + "/medicalActions[2]/radiationTherapy/fractions, DELETE, 'medicalActions[2].radiationTherapy.fractions' is missing but it is required" }) public void checkRadiationTherapyConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -300,9 +321,9 @@ public void checkRadiationTherapyConstraints(String path, String action, String */ @ParameterizedTest @CsvSource({ - "/medicalActions[3]/therapeuticRegimen/externalReference, DELETE, '$.medicalActions[3].therapeuticRegimen.ontologyClass: is missing but it is required|$.medicalActions[3].therapeuticRegimen.externalReference: is missing but it is required'", - "/medicalActions[4]/therapeuticRegimen/ontologyClass, DELETE, '$.medicalActions[4].therapeuticRegimen.ontologyClass: is missing but it is required|$.medicalActions[4].therapeuticRegimen.externalReference: is missing but it is required'", - "/medicalActions[3]/therapeuticRegimen/regimenStatus, DELETE, '$.medicalActions[3].therapeuticRegimen.regimenStatus: is missing but it is required'" + "/medicalActions[3]/therapeuticRegimen/externalReference, DELETE, 'medicalActions[3].therapeuticRegimen.ontologyClass' is missing but it is required|'medicalActions[3].therapeuticRegimen.externalReference' is missing but it is required", + "/medicalActions[4]/therapeuticRegimen/ontologyClass, DELETE, 'medicalActions[4].therapeuticRegimen.ontologyClass' is missing but it is required|'medicalActions[4].therapeuticRegimen.externalReference' is missing but it is required", + "/medicalActions[3]/therapeuticRegimen/regimenStatus, DELETE, 'medicalActions[3].therapeuticRegimen.regimenStatus' is missing but it is required" }) public void checkTherapeuticRegimenConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -314,7 +335,7 @@ public void checkTherapeuticRegimenConstraints(String path, String action, Strin */ @ParameterizedTest @CsvSource({ - "/files[0]/uri, DELETE, '$.files[0].uri: is missing but it is required'", + "/files[0]/uri, DELETE, 'files[0].uri' is missing but it is required", }) public void checkFileConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -326,10 +347,10 @@ public void checkFileConstraints(String path, String action, String expected) { */ @ParameterizedTest @CsvSource({ - "/metaData/created, DELETE, '$.metaData.created: is missing but it is required'", - "/metaData/createdBy, DELETE, '$.metaData.createdBy: is missing but it is required'", - "/metaData/resources[*], DELETE, '$.metaData.resources: there must be a minimum of 1 items in the array'", - "/metaData/phenopacketSchemaVersion, DELETE, '$.metaData.phenopacketSchemaVersion: is missing but it is required'", + "/metaData/created, DELETE, 'metaData.created' is missing but it is required", + "/metaData/createdBy, DELETE, 'metaData.createdBy' is missing but it is required", + "/metaData/resources[*], DELETE, 'metaData.resources' there must be a minimum of 1 items in the array", + "/metaData/phenopacketSchemaVersion, DELETE, 'metaData.phenopacketSchemaVersion' is missing but it is required", }) public void checkMetaDataConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -341,12 +362,12 @@ public void checkMetaDataConstraints(String path, String action, String expected */ @ParameterizedTest @CsvSource({ - "/metaData/resources[0]/id, DELETE, '$.metaData.resources[0].id: is missing but it is required'", - "/metaData/resources[0]/name, DELETE, '$.metaData.resources[0].name: is missing but it is required'", - "/metaData/resources[0]/namespacePrefix, DELETE, '$.metaData.resources[0].namespacePrefix: is missing but it is required'", - "/metaData/resources[0]/url, DELETE, '$.metaData.resources[0].url: is missing but it is required'", - "/metaData/resources[0]/version, DELETE, '$.metaData.resources[0].version: is missing but it is required'", - "/metaData/resources[0]/iriPrefix, DELETE, '$.metaData.resources[0].iriPrefix: is missing but it is required'", + "/metaData/resources[0]/id, DELETE, 'metaData.resources[0].id' is missing but it is required", + "/metaData/resources[0]/name, DELETE, 'metaData.resources[0].name' is missing but it is required", + "/metaData/resources[0]/namespacePrefix, DELETE, 'metaData.resources[0].namespacePrefix' is missing but it is required", + "/metaData/resources[0]/url, DELETE, 'metaData.resources[0].url' is missing but it is required", + "/metaData/resources[0]/version, DELETE, 'metaData.resources[0].version' is missing but it is required", + "/metaData/resources[0]/iriPrefix, DELETE, 'metaData.resources[0].iriPrefix' is missing but it is required", }) public void checkResourceConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -358,7 +379,7 @@ public void checkResourceConstraints(String path, String action, String expected */ @ParameterizedTest @CsvSource({ - "/metaData/updates[0]/timestamp, DELETE, '$.metaData.updates[0].timestamp: is missing but it is required'", + "/metaData/updates[0]/timestamp, DELETE, 'metaData.updates[0].timestamp' is missing but it is required", }) public void checkUpdateConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -372,7 +393,15 @@ public class RecommendedFieldsTest { } private static JsonNode readBethlemPhenopacketNode() { - try (InputStream is = Files.newInputStream(TestData.BETHLEM_MYOPATHY_PHENOPACKET_JSON)){ + return readJsonTree(TestData.BETHLEM_MYOPATHY_PHENOPACKET_JSON); + } + + private static JsonNode readRetinoblastomaPhenopacketNode() { + return readJsonTree(TestData.RETINOBLASTOMA_PHENOPACKET_JSON); + } + + private static JsonNode readJsonTree(Path jsonPath) { + try (InputStream is = Files.newInputStream(jsonPath)){ return MAPPER.readTree(is); } catch (IOException e) { throw new RuntimeException(e); @@ -406,11 +435,11 @@ public void validFamilyYieldsNoErrors() { @ParameterizedTest @CsvSource({ - "/id, DELETE, '$.id: is missing but it is required'", - "/proband, DELETE, '$.proband: is missing but it is required'", - "/consanguinousParents, DELETE, '$.consanguinousParents: is missing but it is required'", - "/pedigree, DELETE, '$.pedigree: is missing but it is required'", - "/metaData, DELETE, '$.metaData: is missing but it is required'", + "/id, DELETE, 'id' is missing but it is required", + "/proband, DELETE, 'proband' is missing but it is required", + "/consanguinousParents, DELETE, 'consanguinousParents' is missing but it is required", + "/pedigree, DELETE, 'pedigree' is missing but it is required", + "/metaData, DELETE, 'metaData' is missing but it is required", }) public void absenceOfTopLevelFamilyElementsYieldsErrors(String path, String action, String expected) { testErrors(runner, readExampleFamilyNode(), path, action, expected); @@ -418,8 +447,8 @@ public void absenceOfTopLevelFamilyElementsYieldsErrors(String path, String acti @ParameterizedTest @CsvSource({ - "/pedigree/persons, DELETE, '$.pedigree.persons: is missing but it is required'", - "/pedigree/persons[*], DELETE, '$.pedigree.persons: there must be a minimum of 1 items in the array'", + "/pedigree/persons, DELETE, 'pedigree.persons' is missing but it is required", + "/pedigree/persons[*], DELETE, 'pedigree.persons' there must be a minimum of 1 items in the array", }) public void emptyPedigreeYieldsError(String path, String action, String expected) { testErrors(runner, readExampleFamilyNode(), path, action, expected); @@ -462,9 +491,9 @@ public class RequiredFieldsTest { */ @ParameterizedTest @CsvSource({ - "/id, DELETE, '$.id: is missing but it is required'", - "/members[*], DELETE, '$.members: there must be a minimum of 1 items in the array'", - "/metaData, DELETE, '$.metaData: is missing but it is required'", + "/id, DELETE, 'id' is missing but it is required", + "/members[*], DELETE, 'members' there must be a minimum of 1 items in the array", + "/metaData, DELETE, 'metaData' is missing but it is required", }) public void checkCohortConstraints(String path, String action, String expected) { testErrors(runner, readExampleCohortNode(), path, action, expected); diff --git a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/TestData.java b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/TestData.java index 94356a47..615e29f2 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/TestData.java +++ b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/TestData.java @@ -18,6 +18,12 @@ public class TestData { */ public static final Path BETHLEM_MYOPATHY_PHENOPACKET_JSON = TEST_BASE_DIR.resolve("bethlem-myopathy.json"); + /** + * A path to an example phenopacket representing a case of retinoblastoma. The phenopacket is useful since + * it contains a VRS-like Variation object. + */ + public static final Path RETINOBLASTOMA_PHENOPACKET_JSON = TEST_BASE_DIR.resolve("retinoblastoma.json"); + /** * A path to an example family that, despite being medically invalid/nonsense, is complete from the testing diff --git a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaDiseaseValidatorTest.java b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaDiseaseValidatorTest.java index 7be1357d..99b342fc 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaDiseaseValidatorTest.java +++ b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaDiseaseValidatorTest.java @@ -87,7 +87,7 @@ public void testLacksId() throws Exception { assertEquals(1, errors.size()); ValidationResult error = errors.get(0); // Assertions.assertEquals(JsonError.REQUIRED, error.category()); - assertEquals("$.id: is missing but it is required", error.message()); + assertEquals("'id' is missing but it is required", error.message()); } private static JsonNode mapPhenopacketToJsonNode(PhenopacketOrBuilder phenopacket) throws Exception { diff --git a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaValidatorTest.java b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaValidatorTest.java index 7b7b0bab..f0194326 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaValidatorTest.java +++ b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaValidatorTest.java @@ -56,7 +56,7 @@ public void testValidationOfSimpleValidPhenopacket() throws Exception { assertEquals(1, errors.size()); ValidationResult error = errors.get(0); Assertions.assertEquals("required", error.category()); - assertEquals("$.id: is missing but it is required", error.message()); + assertEquals("'id' is missing but it is required", error.message()); } /** @@ -74,14 +74,14 @@ public void testValidationOfSimpleInValidPhenopacket() throws Exception { ValidationResult error = errors.get(0); // JsonError.CATEGORY is "JSON" assertEquals("required", error.category()); - assertEquals("$.id: is missing but it is required", error.message()); + assertEquals("'id' is missing but it is required", error.message()); error = errors.get(1); assertEquals("required", error.category()); - assertEquals("$.metaData: is missing but it is required", error.message()); + assertEquals("'metaData' is missing but it is required", error.message()); error = errors.get(2); assertEquals("additionalProperties", error.category()); - assertEquals("$.disney: is not defined in the schema and the schema does not allow additional properties", error.message()); + assertEquals("'disney' is not defined in the schema and the schema does not allow additional properties", error.message()); } @Test @@ -130,7 +130,7 @@ public void invalidEnum() throws JsonProcessingException { assertEquals(1, errors.size()); ValidationResult error = errors.get(0); assertEquals("enum", error.category()); - assertEquals("$.subject.sex: does not have a value in the enumeration [UNKNOWN_SEX, FEMALE, MALE, OTHER_SEX]", error.message()); + assertEquals("'subject.sex' does not have a value in the enumeration [UNKNOWN_SEX, FEMALE, MALE, OTHER_SEX]", error.message()); assertEquals(ValidationLevel.ERROR, error.level()); } diff --git a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/bethlem-myopathy.json b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/bethlem-myopathy.json index 1cff08fe..6be501a1 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/bethlem-myopathy.json +++ b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/bethlem-myopathy.json @@ -378,6 +378,7 @@ "therapeuticActionability": "ACTIONABLE", "variationDescriptor": { "id": "variant id", + "moleculeContext": "transcript", "expressions": [ { "syntax": "hgvs", diff --git a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-cohort.json b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-cohort.json index a5517087..c55001a7 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-cohort.json +++ b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-cohort.json @@ -378,6 +378,7 @@ "therapeuticActionability": "ACTIONABLE", "variationDescriptor": { "id": "variant id", + "moleculeContext": "transcript", "expressions": [ { "syntax": "hgvs", diff --git a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-family.json b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-family.json index 54cf1aa8..6c30d25f 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-family.json +++ b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-family.json @@ -377,6 +377,7 @@ "therapeuticActionability": "ACTIONABLE", "variationDescriptor": { "id": "variant id", + "moleculeContext": "transcript", "expressions": [ { "syntax": "hgvs", diff --git a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/retinoblastoma.json b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/retinoblastoma.json new file mode 100644 index 00000000..ca1fabdb --- /dev/null +++ b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/retinoblastoma.json @@ -0,0 +1,463 @@ +{ + "id": "arbitrary.id", + "subject": { + "id": "proband A", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P6M" + } + }, + "sex": "FEMALE", + "karyotypicSex": "XX" + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0030084", + "label": "Clinodactyly" + }, + "modifiers": [{ + "id": "HP:0012834", + "label": "Right" + }], + "onset": { + "age": { + "iso8601duration": "P3M" + } + } + }, { + "type": { + "id": "HP:0000555", + "label": "Leukocoria" + }, + "modifiers": [{ + "id": "HP:0012835", + "label": "Left" + }], + "onset": { + "age": { + "iso8601duration": "P4M" + } + } + }, { + "type": { + "id": "HP:0000486", + "label": "Strabismus" + }, + "modifiers": [{ + "id": "HP:0012835", + "label": "Left" + }], + "onset": { + "age": { + "iso8601duration": "P5M15D" + } + } + }, { + "type": { + "id": "HP:0000541", + "label": "Retinal detachment" + }, + "modifiers": [{ + "id": "HP:0012835", + "label": "Left" + }], + "onset": { + "age": { + "iso8601duration": "P6M" + } + } + }], + "measurements": [{ + "assay": { + "id": "LOINC:79893-4", + "label": "Left eye Intraocular pressure" + }, + "value": { + "quantity": { + "unit": { + "id": "UCUM:mm[Hg]", + "label": "millimetres of mercury" + }, + "value": 25.0, + "referenceRange": { + "unit": { + "id": "56844-4", + "label": "Intraocular pressure of Eye" + }, + "low": 10.0, + "high": 21.0 + } + } + }, + "timeObserved": { + "age": { + "iso8601duration": "P6M" + } + } + }, { + "assay": { + "id": "LOINC:79892-6", + "label": "Right eye Intraocular pressure" + }, + "value": { + "quantity": { + "unit": { + "id": "UCUM:mm[Hg]", + "label": "millimetres of mercury" + }, + "value": 15.0, + "referenceRange": { + "unit": { + "id": "56844-4", + "label": "Intraocular pressure of Eye" + }, + "low": 10.0, + "high": 21.0 + } + } + }, + "timeObserved": { + "age": { + "iso8601duration": "P6M" + } + } + }], + "biosamples": [{ + "id": "biosample.1", + "sampledTissue": { + "id": "UBERON:0000970", + "label": "eye" + }, + "phenotypicFeatures": [{ + "type": { + "id": "NCIT:C35941", + "label": "Flexner-Wintersteiner Rosette Formation" + } + }, { + "type": { + "id": "NCIT:C132485", + "label": "Apoptosis and Necrosis" + } + }], + "measurements": [{ + "assay": { + "id": "LOINC:33728-7", + "label": "Size.maximum dimension in Tumor" + }, + "value": { + "quantity": { + "unit": { + "id": "UCUM:mm", + "label": "millimeter" + }, + "value": 15.0 + } + }, + "timeObserved": { + "age": { + "iso8601duration": "P8M2W" + } + } + }], + "tumorProgression": { + "id": "NCIT:C8509", + "label": "Primary Neoplasm" + }, + "pathologicalTnmFinding": [{ + "id": "NCIT:C140720", + "label": "Retinoblastoma pT3 TNM Finding v8" + }, { + "id": "NCIT:C140711", + "label": "Retinoblastoma pN0 TNM Finding v8" + }], + "procedure": { + "code": { + "id": "NCIT:C48601", + "label": "Enucleation" + }, + "bodySite": { + "id": "UBERON:0004548", + "label": "left eye" + }, + "performed": { + "age": { + "iso8601duration": "P8M2W" + } + } + }, + "files": [{ + "uri": "file://data/fileSomaticWgs.vcf.gz", + "individualToFileIdentifiers": { + "biosample.1": "specimen.1" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38", + "fileFormat": "VCF" + } + }] + }], + "interpretations": [{ + "id": "interpretation.id", + "progressStatus": "SOLVED", + "diagnosis": { + "disease": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "genomicInterpretations": [{ + "subjectOrBiosampleId": "proband A", + "interpretationStatus": "CAUSATIVE", + "variantInterpretation": { + "acmgPathogenicityClassification": "PATHOGENIC", + "therapeuticActionability": "ACTIONABLE", + "variationDescriptor": { + "id": "example-cnv", + "moleculeContext": "genomic", + "variation": { + "copyNumber": { + "derivedSequenceExpression": { + "location": { + "sequenceId": "refseq:NC_000013.14", + "sequenceInterval": { + "startNumber": { + "value": "25981249" + }, + "endNumber": { + "value": "61706822" + } + } + } + }, + "number": { + "value": "1" + } + } + }, + "extensions": [{ + "name": "mosaicism", + "value": "40.0%" + }] + } + } + }, { + "subjectOrBiosampleId": "biosample.1", + "interpretationStatus": "CAUSATIVE", + "variantInterpretation": { + "acmgPathogenicityClassification": "PATHOGENIC", + "therapeuticActionability": "ACTIONABLE", + "variationDescriptor": { + "id": "rs121913300", + "variation": { + "allele": { + "sequenceLocation": { + "sequenceId": "refseq:NC_000013.11", + "sequenceInterval": { + "startNumber": { + "value": "48367511" + }, + "endNumber": { + "value": "48367512" + } + } + }, + "literalSequenceExpression": { + "sequence": "T" + } + } + }, + "label": "RB1 c.958C\u003eT (p.Arg320Ter)", + "geneContext": { + "valueId": "HGNC:9884", + "symbol": "RB1" + }, + "expressions": [{ + "syntax": "hgvs.c", + "value": "NM_000321.2:c.958C\u003eT" + }, { + "syntax": "transcript_reference", + "value": "NM_000321.2" + }], + "vcfRecord": { + "genomeAssembly": "GRCh38", + "chrom": "NC_000013.11", + "pos": "48367512", + "ref": "C", + "alt": "T" + }, + "extensions": [{ + "name": "allele-frequency", + "value": "25.0%" + }], + "moleculeContext": "genomic", + "allelicState": { + "id": "GENO:0000135", + "label": "heterozygous" + } + } + } + }] + } + }], + "diseases": [{ + "term": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "onset": { + "age": { + "iso8601duration": "P4M" + } + }, + "diseaseStage": [{ + "id": "LOINC:LA24739-7", + "label": "Group E" + }], + "clinicalTnmFinding": [{ + "id": "NCIT:C140678", + "label": "Retinoblastoma cM0 TNM Finding v8" + }], + "primarySite": { + "id": "UBERON:0004548", + "label": "left eye" + } + }], + "medicalActions": [{ + "treatment": { + "agent": { + "id": "DrugCentral:1678", + "label": "melphalan" + }, + "routeOfAdministration": { + "id": "NCIT:C38222", + "label": "Intraarterial Route of Administration" + }, + "doseIntervals": [{ + "quantity": { + "unit": { + "id": "UCUM:mg.kg-1", + "label": "milligram per kilogram" + }, + "value": 0.4 + }, + "scheduleFrequency": { + "id": "NCIT:C64576", + "label": "Once" + }, + "interval": { + "start": "2020-09-02T00:00:00Z", + "end": "2020-09-02T00:00:00Z" + } + }] + }, + "treatmentTarget": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "treatmentIntent": { + "id": "NCIT:C62220", + "label": "Cure" + }, + "adverseEvents": [{ + "id": "HP:0025637", + "label": "Vasospasm" + }], + "treatmentTerminationReason": { + "id": "NCIT:C41331", + "label": "Adverse Event" + } + }, { + "therapeuticRegimen": { + "ontologyClass": { + "id": "NCIT:C10894", + "label": "Carboplatin/Etoposide/Vincristine" + }, + "startTime": { + "age": { + "iso8601duration": "P7M" + } + }, + "endTime": { + "age": { + "iso8601duration": "P8M" + } + }, + "regimenStatus": "COMPLETED" + }, + "treatmentTarget": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "treatmentIntent": { + "id": "NCIT:C62220", + "label": "Cure" + } + }, { + "procedure": { + "code": { + "id": "NCIT:C48601", + "label": "Enucleation" + }, + "bodySite": { + "id": "UBERON:0004548", + "label": "left eye" + }, + "performed": { + "age": { + "iso8601duration": "P8M2W" + } + } + }, + "treatmentTarget": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "treatmentIntent": { + "id": "NCIT:C62220", + "label": "Cure" + } + }], + "files": [{ + "uri": "file://data/germlineWgs.vcf.gz", + "individualToFileIdentifiers": { + "proband A": "sample1" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38", + "fileFormat": "VCF" + } + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "21.05d", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }, { + "id": "efo", + "name": "Experimental Factor Ontology", + "url": "http://www.ebi.ac.uk/efo/efo.owl", + "version": "3.34.0", + "namespacePrefix": "EFO", + "iriPrefix": "http://purl.obolibrary.org/obo/EFO_" + }, { + "id": "uberon", + "name": "Uber-anatomy ontology", + "url": "http://purl.obolibrary.org/obo/uberon.owl", + "version": "2021-07-27", + "namespacePrefix": "UBERON", + "iriPrefix": "http://purl.obolibrary.org/obo/UBERON_" + }, { + "id": "ncbitaxon", + "name": "NCBI organismal classification", + "url": "http://purl.obolibrary.org/obo/ncbitaxon.owl", + "version": "2021-06-10", + "namespacePrefix": "NCBITaxon", + "iriPrefix": "http://purl.obolibrary.org/obo/NCBITaxon_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/pom.xml b/pom.xml index 000821e0..38051a77 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.6 + 0.4.7 pom @@ -15,12 +15,14 @@ + phenopacket-tools-core phenopacket-tools-test phenopacket-tools-util phenopacket-tools-builder phenopacket-tools-validator-core phenopacket-tools-validator-jsonschema phenopacket-tools-converter + phenopacket-tools-io phenopacket-tools-cli @@ -84,9 +86,12 @@ UTF-8 UTF-8 17 - 3.21.1 + 3.21.8 2.0.2 - 2.13.3 + 2.13.4.2 + + 2.13.4 + 1.33 2.0.0-RC3 5.7.1 @@ -149,18 +154,18 @@ com.fasterxml.jackson.core - jackson-databind - ${jackson.version} + jackson-core + ${jackson.core.version} com.fasterxml.jackson.core - jackson-core + jackson-databind ${jackson.version} - com.fasterxml.jackson.core - jackson-annotations - ${jackson.version} + com.fasterxml.jackson.dataformat + jackson-dataformat-yaml + ${jackson.core.version} com.google.guava @@ -187,6 +192,11 @@ phenol-io ${phenol.version} + + org.yaml + snakeyaml + ${snakeyaml.version} + org.apache.commons commons-csv