diff --git a/csv/definitions/example/example_flattened.tsv b/csv/definitions/example/example_flattened.tsv index 28de2fd..92b0176 100644 --- a/csv/definitions/example/example_flattened.tsv +++ b/csv/definitions/example/example_flattened.tsv @@ -1,83 +1,16 @@ - schema required dataType description comments exact_mappings validation -submitter_participant_id treatment True string Unique identifier of the participant within the study, assigned by the data provider. "CQDG:submitter_participant_id -mCODE.STU3:Patient.Identifier -FHIR:Patient.Identifier -Phenopacket:individual.id -BeaconV2:individual.id" pattern:^[A-Za-z0-9\-\._]{1,64} +field schema required dataType description comments exact_mappings validation +submitter_participant_id comorbidity True string Unique identifier of the participant within the study, assigned by the data provider. CQDG:submitter_participant_id;mCODE.STU3:Patient.Identifier;FHIR:Patient.Identifier;Phenopacket:individual.id;BeaconV2:individual.id pattern:^[A-Za-z0-9\-\._]{1,64} age_at_comorbidity_diagnosis comorbidity False integer Indicate the age (in days) of comorbidity diagnosis. comorbidity_code comorbidity True string Use ICD-10 code or Mondo code to indicate the comorbidity diagnosed. ['Provide code in Compact URI (CURIE) pattern. ICD-10 code: refer to https://icd.who.int/browse10/2019/en MONDO code: refer to https://www.ebi.ac.uk/ols/ontologies/mondo'] pattern:^ICD10:[A-TV-Z][0-9][0-9AB].?[0-9A-TV-Z]{0,4}$|^(MONDO:)[0-9]{7}$ -comorbidity_term comorbidity False string Provide the standardized and human readable term derived from the coding system associated with the comorbidity_code "mCODE.STU3:condition.code -FHIR:condition.code -Phenopacket:disease.term" -comorbidity_treatment_status comorbidity False string Indicate whether the comorbid condition is currently being treated or not. "Enum: -Treated and resolved -Under treatment -Unknown -Untreated" -comorbidity_status comorbidity False string Indicate the current state or activity of the comorbid condition. "Enum: -Active -In Remission -Resolved -Unknown" -study_id participant True string Unique identifier of the study. CQDG:study_id -gender demographic True string Refers to an individual's personal and social identity as a man, woman or non-binary person (a person who is not exclusively a man or a woman). The provided values are based on the categories defined by Statistics Canada ['EDI indicator'] "mCODE.STU3:Patient.gender -FHIR:Patient.gender -Phenopacket:individual.gender -BeaconV2:individual.gender" "Enum: -Man -Missing -Non-binary person -Not Applicable -Not Collected -Not Provided -Restricted Access -Woman" -sex_at_birth demographic True string Refers to sex assigned at birth. Sex at birth is typically assigned based on a person's reproductive system and other physical characteristics. The provided values are based on the categoried defined by Statistics Canada ['EDI indicator'] "All4One:Biological_sex -CQDG:Sex -mCODE.STU3:Patient.extension.birthsex -FHIR:Patient.extension.birthsex -Phenopacket:individual.sex -BeaconV2:individual.sex" "Enum: -Female -Male -Missing -Not Applicable -Not Collected -Not Provided -Restricted Access" -ethnicity demographic True string Refers to the ethnic or cultural origins of a person’s ancestors. The provided values are based on the list of ethnic or cultural origins 2021 defined by Statistics Canada ['EDI indicator'] "mCODE.STU3:Patient.extension.ethnicity -FHIR:Patient.extension.ethnicity" "Enum: -African -Asian -Caribbean -European -Latin, Central and South American -Missing -North American -Not Applicable -Not Collected -Not Provided -Oceanian -Other ethnic and cultural -Restricted Access" -race demographic True string A social construct used to judge and categorize people based on perceived differences in physical appearance in ways that create and maintain power differentials within social hierarchies. There is no scientifically supported biological basis for discrete racial groups. The provided values are based on race-based data standard defined by CIHI guidance ['EDI indicator'] "mCODE.STU3:Patient.extension.race -FHIR:Patient.extension.race" "Enum: -Another race category -Black -Do not know -East Asian -Indigenous (First Nations, Inuk/Inuit, Metis) -Latin American -Middle Eastern -Missing -Not Applicable -Not Collected -Not Provided -Prefer not to answer -Restricted Access -South Asian -Southeast Asian -White" +comorbidity_term comorbidity False string Provide the standardized and human readable term derived from the coding system associated with the comorbidity_code mCODE.STU3:condition.code;FHIR:condition.code;Phenopacket:disease.term +comorbidity_treatment_status comorbidity False string Indicate whether the comorbid condition is currently being treated or not. Enum:Treated and resolved,Under treatment,Unknown,Untreated +comorbidity_status comorbidity False string Indicate the current state or activity of the comorbid condition. Enum:Active,In Remission,Resolved,Unknown +study_id demographic True string Unique identifier of the study. CQDG:study_id +submitter_participant_id demographic True string Unique identifier of the participant within the study, assigned by the data provider. CQDG:submitter_participant_id;mCODE.STU3:Patient.Identifier;FHIR:Patient.Identifier;Phenopacket:individual.id;BeaconV2:individual.id pattern:^[A-Za-z0-9\-\._]{1,64} +gender demographic True string Refers to an individual's personal and social identity as a man, woman or non-binary person (a person who is not exclusively a man or a woman). The provided values are based on the categories defined by Statistics Canada ['EDI indicator'] mCODE.STU3:Patient.gender;FHIR:Patient.gender;Phenopacket:individual.gender;BeaconV2:individual.gender Enum:Man,Missing,Non-binary person,Not Applicable,Not Collected,Not Provided,Restricted Access,Woman +sex_at_birth demographic True string Refers to sex assigned at birth. Sex at birth is typically assigned based on a person's reproductive system and other physical characteristics. The provided values are based on the categoried defined by Statistics Canada ['EDI indicator'] All4One:Biological_sex;CQDG:Sex;mCODE.STU3:Patient.extension.birthsex;FHIR:Patient.extension.birthsex;Phenopacket:individual.sex;BeaconV2:individual.sex Enum:Female,Male,Missing,Not Applicable,Not Collected,Not Provided,Restricted Access +ethnicity demographic True string Refers to the ethnic or cultural origins of a person’s ancestors. The provided values are based on the list of ethnic or cultural origins 2021 defined by Statistics Canada ['EDI indicator'] mCODE.STU3:Patient.extension.ethnicity;FHIR:Patient.extension.ethnicity Enum:African,Asian,Caribbean,European,Latin, Central and South American,Missing,North American,Not Applicable,Not Collected,Not Provided,Oceanian,Other ethnic and cultural,Restricted Access +race demographic True string A social construct used to judge and categorize people based on perceived differences in physical appearance in ways that create and maintain power differentials within social hierarchies. There is no scientifically supported biological basis for discrete racial groups. The provided values are based on race-based data standard defined by CIHI guidance ['EDI indicator'] mCODE.STU3:Patient.extension.race;FHIR:Patient.extension.race Enum:Another race category,Black,Do not know,East Asian,Indigenous (First Nations, Inuk/Inuit, Metis),Latin American,Middle Eastern,Missing,Not Applicable,Not Collected,Not Provided,Prefer not to answer,Restricted Access,South Asian,Southeast Asian,White country_of_birth demographic False string The country where an individual was born, typically defined by current geopolitical borders. ancestry demographic False string The genetic or genomic heritage of an individual, typically tracing the origins of one's ancestors. height demographic False decimal The measurement of an individual from head to toe recorded in centimeters (cm). @@ -86,224 +19,73 @@ highest_education_level_achieved demographic False string The highest level of f employment demographic False string The current work status of an individual. type_of_residence demographic False string The kind of dwelling in which an individual lives. number_of_other_people_in_household demographic False integer The total number of individuals living in the same household as the participant, excluding the participant. -pregnancy demographic False string Indicates whether an individual is currently pregnant. "Enum: -No -Unknown -Yes" -submitter_diagnosis_id diagnosis True string Unique identifier of the primary diagnosis event, assigned by the data provider. "mCODE.STU1:Condition.identifier -mCODE.STU2:Condition.identifier -FHIR:condition.code" pattern:^[A-Za-z0-9\-\._]{1,64}$ -age_at_diagnosis diagnosis False integer Age of participant (in days) at time of diagnosis of the condition. "mCODE.STU3:condition.onset -FHIR:condition.onset -Phenopacket:disease.onset" -disease_code diagnosis True string Use ICD-10 code or Mondo code to represent the disease diagnosed. ['Provide code in Compact URI (CURIE) pattern. ICD-10 code: refer to https://icd.who.int/browse10/2019/en MONDO code: refer to https://www.ebi.ac.uk/ols/ontologies/mondo'] "mCODE.STU3:condition.code -FHIR:condition.code -Phenopacket:disease.term" pattern:^ICD10:[A-TV-Z][0-9][0-9AB].?[0-9A-TV-Z]{0,4}$|^(MONDO:)[0-9]{7}$ -disease_term diagnosis False string Provide the standardized and human readable term derived from the coding system associated with the disease_code "mCODE.STU3:condition.code -FHIR:condition.code -Phenopacket:disease.term" -disease_category diagnosis True string A broader classification that groups diseases into categories. "Enum: -Autoimmune disorder -Cancer -Infectious disease" -covid19_severity diagnosis False string A measure of the severity of a COVID-19 infection, which can range from mild to severe or critical. "Enum: -Ambulatory (limitation of activities) -Ambulatory (no limitation of activities) -Ambulatory (with or without limitation of activities) -Dead - death -Hospitalized - mild disease (hospitalized, no oxygen therapy) -Hospitalized - mild disease (hospitalized, oxygen by mask or nasal prongs) -Hospitalized - severe disease (intubation and mechanical ventilation) -Hospitalized - severe disease (non-invasive ventilation or high-flow oxygen) -Hospitalized - severe disease (ventilation and additional organ support, i.e. pressors, RRT, ECMO) -Uninfected (no clinical or virological evidence of infection)" -covid19_vaccine_doses diagnosis False string The number of COVID-19 vaccine doses the patient has received. "Enum: -No -Unknown -Yes, four doses -Yes, one dose -Yes, three doses -Yes, two doses" -alcohol_consumption exposure False string Indicate current alcohol consumtion based on recommended daily limit. ['Refer to the Canadian Centre on Substance Abuse, for men, no more than 15 drinks a week, with no more than 3 drinks a day most days. No more than 4 drinks on any single occasion. For women, no more than 10 drinks a week, with no more than 2 drinks a day most days. No more than 3 drinks on any single occasion.'] "Enum: -Alcohol consumption unknown -Alcohol intake exceeds recommended daily limit -Alcohol intake within recommended daily limit -No alcohol consumption" -smoking_status exposure False string Indicate smoking status and history of a participant at the time of the data collection. "Enum: -Current reformed smoker for <= 15 years -Current reformed smoker for > 15 years -Current reformed smoker, duration not specified -Current smoker -Lifelong non-smoker (<100 cigarettes smoked in lifetime) -Not applicable -Smoking history not documented" -physical_activity exposure False string Indicate how many times per week the participant exercises for at least 30 minutes. "Enum: -1-3 times a month -Every day -Less than once a month -Most days but not every day -Never -Not applicable -Once or twice a week -Unknown" +pregnancy demographic False string Indicates whether an individual is currently pregnant. Enum:No,Unknown,Yes +submitter_participant_id diagnosis True string Unique identifier of the participant within the study, assigned by the data provider. CQDG:submitter_participant_id;mCODE.STU3:Patient.Identifier;FHIR:Patient.Identifier;Phenopacket:individual.id;BeaconV2:individual.id pattern:^[A-Za-z0-9\-\._]{1,64} +submitter_diagnosis_id diagnosis True string Unique identifier of the primary diagnosis event, assigned by the data provider. mCODE.STU1:Condition.identifier;mCODE.STU2:Condition.identifier;FHIR:condition.code pattern:^[A-Za-z0-9\-\._]{1,64}$ +age_at_diagnosis diagnosis False integer Age of participant (in days) at time of diagnosis of the condition. mCODE.STU3:condition.onset;FHIR:condition.onset;Phenopacket:disease.onset +disease_code diagnosis True string Use ICD-10 code or Mondo code to represent the disease diagnosed. ['Provide code in Compact URI (CURIE) pattern. ICD-10 code: refer to https://icd.who.int/browse10/2019/en MONDO code: refer to https://www.ebi.ac.uk/ols/ontologies/mondo'] mCODE.STU3:condition.code;FHIR:condition.code;Phenopacket:disease.term pattern:^ICD10:[A-TV-Z][0-9][0-9AB].?[0-9A-TV-Z]{0,4}$|^(MONDO:)[0-9]{7}$ +disease_term diagnosis False string Provide the standardized and human readable term derived from the coding system associated with the disease_code mCODE.STU3:condition.code;FHIR:condition.code;Phenopacket:disease.term +disease_category diagnosis True string A broader classification that groups diseases into categories. Enum:Autoimmune disorder,Cancer,Infectious disease +covid19_severity diagnosis False string A measure of the severity of a COVID-19 infection, which can range from mild to severe or critical. Enum:Ambulatory (limitation of activities),Ambulatory (no limitation of activities),Ambulatory (with or without limitation of activities),Dead - death,Hospitalized - mild disease (hospitalized, no oxygen therapy),Hospitalized - mild disease (hospitalized, oxygen by mask or nasal prongs),Hospitalized - severe disease (intubation and mechanical ventilation),Hospitalized - severe disease (non-invasive ventilation or high-flow oxygen),Hospitalized - severe disease (ventilation and additional organ support, i.e. pressors, RRT, ECMO),Uninfected (no clinical or virological evidence of infection) +covid19_vaccine_doses diagnosis False string The number of COVID-19 vaccine doses the patient has received. Enum:No,Unknown,Yes, four doses,Yes, one dose,Yes, three doses,Yes, two doses +submitter_participant_id exposure True string Unique identifier of the participant within the study, assigned by the data provider. CQDG:submitter_participant_id;mCODE.STU3:Patient.Identifier;FHIR:Patient.Identifier;Phenopacket:individual.id;BeaconV2:individual.id pattern:^[A-Za-z0-9\-\._]{1,64} +alcohol_consumption exposure False string Indicate current alcohol consumtion based on recommended daily limit. ['Refer to the Canadian Centre on Substance Abuse, for men, no more than 15 drinks a week, with no more than 3 drinks a day most days. No more than 4 drinks on any single occasion. For women, no more than 10 drinks a week, with no more than 2 drinks a day most days. No more than 3 drinks on any single occasion.'] Enum:Alcohol consumption unknown,Alcohol intake exceeds recommended daily limit,Alcohol intake within recommended daily limit,No alcohol consumption +smoking_status exposure False string Indicate smoking status and history of a participant at the time of the data collection. Enum:Current reformed smoker for <= 15 years,Current reformed smoker for > 15 years,Current reformed smoker, duration not specified,Current smoker,Lifelong non-smoker (<100 cigarettes smoked in lifetime),Not applicable,Smoking history not documented +physical_activity exposure False string Indicate how many times per week the participant exercises for at least 30 minutes. Enum:1-3 times a month,Every day,Less than once a month,Most days but not every day,Never,Not applicable,Once or twice a week,Unknown +submitter_participant_id follow_up True string Unique identifier of the participant within the study, assigned by the data provider. CQDG:submitter_participant_id;mCODE.STU3:Patient.Identifier;FHIR:Patient.Identifier;Phenopacket:individual.id;BeaconV2:individual.id pattern:^[A-Za-z0-9\-\._]{1,64} age_at_followup follow_up False integer Participant's age (in days) at time of the follow up event -disease_status_at_followup follow_up True string Indicate the participant's disease status at time of follow-up "Enum: -Complete remission -Distant progression -Loco-regional progression -No evidence of disease -Partial remission -Progression NOS -Relapse or recurrence -Stable" +disease_status_at_followup follow_up True string Indicate the participant's disease status at time of follow-up Enum:Complete remission,Distant progression,Loco-regional progression,No evidence of disease,Partial remission,Progression NOS,Relapse or recurrence,Stable +submitter_participant_id measurement True string Unique identifier of the participant within the study, assigned by the data provider. CQDG:submitter_participant_id;mCODE.STU3:Patient.Identifier;FHIR:Patient.Identifier;Phenopacket:individual.id;BeaconV2:individual.id pattern:^[A-Za-z0-9\-\._]{1,64} age_at_measurement measurement False integer Age (in days) of the participant at the time the lab test or measurement was conducted. measurement_code measurement True string Use standartized LOINC (Logical Observation Identifiers Names and Codes) code to represent quantitative, ordinal, or categorical measurements. ['Provide code in Compact URI (CURIE) pattern. LOINC URL: https://loinc.org/{code}'] pattern:^LOINC:[0-9]{1,5}-[0-9]$ measurement_term measurement False string Provide the standardized and human readable term derived from the coding system associated with the measurement_code measurement_result_numeric measurement False decimal The numeric result of the lab test or measurement. measurement_unit measurement False string Indicate the unit of measurement for the result using LOINC code. measurement_result_categorical measurement False string The categorical result of the lab test or measurement. -submitter_treatment_id treatment True string Unique identifier of the treatment, assigned by the data provider. pattern:^[A-Za-z0-9\-\._]{1,64}$ +submitter_treatment_id medication True string Unique identifier of the treatment, assigned by the data provider. pattern:^[A-Za-z0-9\-\._]{1,64}$ drug_code medication True string Provide the standardized code from RxNorm, KEGG, PubChem or NCIt to represent the drug. ['Provide code in Compact URI (CURIE) pattern. RxNorm URL: https://rxnav.nlm.nih.gov/REST/rxcui/{code} KEGG URL: https://www.kegg.jp/entry/{code} PubChem URL: https://pubchem.ncbi.nlm.nih.gov/compound/{code} NCIt URL: https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=C{code}'] pattern:^RxNorm:\d{1,8}$|^KEGG:D\\d{5}$|^PubChem:\\d{1,9}$|^NCIt:C\\d{1,7}$ drug_term medication False string Provide the standardized and human readable term derived from the coding system associated with the drug_code -drug_dose_units medication False string Indicate units used to record drug dose. "Enum: -IU/kg -IU/m2 -Not available -g/m2 -mg -mg/kg -mg/m2 -ug/m2" +drug_dose_units medication False string Indicate units used to record drug dose. Enum:IU/kg,IU/m2,Not available,g/m2,mg,mg/kg,mg/m2,ug/m2 prescribed_cumulative_drug_dose medication False decimal Indicate the total prescribed cumulative drug dose in the same units specified in drug_dose_units. actual_cumulative_drug_dose medication False decimal Indicate the total actual cumulative drug dose in the same units specified in drug_dose_units. +study_id participant True string Unique identifier of the study. CQDG:study_id +submitter_participant_id participant True string Unique identifier of the participant within the study, assigned by the data provider. CQDG:submitter_participant_id;mCODE.STU3:Patient.Identifier;FHIR:Patient.Identifier;Phenopacket:individual.id;BeaconV2:individual.id pattern:^[A-Za-z0-9\-\._]{1,64} age_at_enrollment participant False integer Age (in days) of participant at time of enrollment into the study -vital_status participant False string Participant's last known state of living or deceased. "CQDG:vital_status -Phenopacket:individual.vital_status -BeaconV2:individual.vital_status" "Enum: -Alive -Deceased -Missing -Not Applicable -Not Collected -Not Provided -Restricted Access" -cause_of_death participant False string Indicate the cause of a participant's death. "Enum: -Died of cancer -Died of other reasons -Missing -Not Applicable -Not Collected -Not Provided -Restricted Access" +vital_status participant False string Participant's last known state of living or deceased. CQDG:vital_status;Phenopacket:individual.vital_status;BeaconV2:individual.vital_status Enum:Alive,Deceased,Missing,Not Applicable,Not Collected,Not Provided,Restricted Access +cause_of_death participant False string Indicate the cause of a participant's death. Enum:Died of cancer,Died of other reasons,Missing,Not Applicable,Not Collected,Not Provided,Restricted Access age_at_death participant False integer Age of participant (in days) at time of death +submitter_participant_id phenotype True string Unique identifier of the participant within the study, assigned by the data provider. CQDG:submitter_participant_id;mCODE.STU3:Patient.Identifier;FHIR:Patient.Identifier;Phenopacket:individual.id;BeaconV2:individual.id pattern:^[A-Za-z0-9\-\._]{1,64} age_at_phenotype phenotype False integer Participant's age (in days) when phenotype was observed phenotype_code phenotype True string Use standardized HPO (Human Phenotype Ontology) codes to represent the phenotype. ['Provide code in Compact URI (CURIE) pattern. HPO URL: https://hpo.jax.org/app/browse/term/HP:'] pattern:^HP:[0-9]{7}$ phenotype_term phenotype False string Provide the standardized and human readable term derived from the coding system associated with the phenotype_code. -phenotype_observed phenotype True string Indicate whether the phenotype was observed in the participant. "Enum: -No -Unknown -Yes" +phenotype_observed phenotype True string Indicate whether the phenotype was observed in the participant. Enum:No,Unknown,Yes phenotype_duration phenotype False integer Indicate the length of time (in days) over which the phenotype was observed in the participant. -phenotype_severity phenotype False string The degree or severity of the observed phenotype. ['Permissible values from https://hpo.jax.org/browse/term/HP:0012824'] "Enum: -Borderline -Mild -Moderate -Profound -Severe" +phenotype_severity phenotype False string The degree or severity of the observed phenotype. ['Permissible values from https://hpo.jax.org/browse/term/HP:0012824'] Enum:Borderline,Mild,Moderate,Profound,Severe +submitter_treatment_id procedure True string Unique identifier of the treatment, assigned by the data provider. pattern:^[A-Za-z0-9\-\._]{1,64}$ procedure_code procedure True string Use code from NCIt, SNOMED-CT, UMLS or CCI to represent the procedure performed. ['Provide code in Compact URI (CURIE) pattern. NCIt URL: https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=C SNOMED-CT URL: http://snomed.info/id/ UMLS URL: https://uts.nlm.nih.gov/uts/umls/concept/'] pattern:^NCIt:C\d{1,7}$|^SNOMEDCT:\d{6,9}$|^UMLS:C\d{7}$|^CCI:\d\.[A-Z]{2}\.\d{2}\.[A-Z]{2}$ procedure_term procedure False string Provide the standardized and human readable term derived from the coding system associated with the procedure_code -procedure_body_site_code procedure False string Indicate the ICD-O-3 topography code to describe the site of the surgery if applicable. Please use C80.9 if the site is Unkown. ['Refer to the International Classification of Diseases for Oncology, 3rd Edition (WHO ICD-O-3) manual for guidelines at https://apps.who.int/iris/handle/10665/42344'] "mCODE.STU3:Specimen.collection.bodySite -FHIR:Specimen.collection.bodySite" pattern:^[C][0-9]{2}(.[0-9]{1})?$ +procedure_body_site_code procedure False string Indicate the ICD-O-3 topography code to describe the site of the surgery if applicable. Please use C80.9 if the site is Unkown. ['Refer to the International Classification of Diseases for Oncology, 3rd Edition (WHO ICD-O-3) manual for guidelines at https://apps.who.int/iris/handle/10665/42344'] mCODE.STU3:Specimen.collection.bodySite;FHIR:Specimen.collection.bodySite pattern:^[C][0-9]{2}(.[0-9]{1})?$ procedure_body_site_term procedure False string Provide the standardized and human readable term derived from the coding system associated with the procedure_body_site_code -submitter_specimen_id specimen True string Unique identifier of the specimen within the study, assigned by the data provider. "CQDG:submitter_biospecimen_id -mCODE.STU3:Specimen.Identifier -FHIR:Specimen.Identifier -Phenopacket:biosample.id -BeaconV2:biosample.id" pattern:^[A-Za-z0-9\-\._]{1,64}$ -specimen_tissue_source_code specimen True string Indicate the tissue source of the specimen from which a biopsy or other tissue specimen was obtained. Use codes from NCIt (NCI Thesaurus) or SNOMED-CT (SNOMED Clinical Terms) in Compact URI (CURIE) pattern. "CQDG:biospecimen_tissue_source -mCODE.STU3:Specimen.type -FHIR:Specimen.type -Phenopacket:biosample.sampled_tissue -BeaconV2:biosample.sampled_tissue" pattern:^NCIT:C[0-9]+$|^SNOMEDCT:[0-9]+$ +submitter_participant_id specimen True string Unique identifier of the participant within the study, assigned by the data provider. CQDG:submitter_participant_id;mCODE.STU3:Patient.Identifier;FHIR:Patient.Identifier;Phenopacket:individual.id;BeaconV2:individual.id pattern:^[A-Za-z0-9\-\._]{1,64} +submitter_specimen_id specimen True string Unique identifier of the specimen within the study, assigned by the data provider. CQDG:submitter_biospecimen_id;mCODE.STU3:Specimen.Identifier;FHIR:Specimen.Identifier;Phenopacket:biosample.id;BeaconV2:biosample.id pattern:^[A-Za-z0-9\-\._]{1,64}$ +specimen_tissue_source_code specimen True string Indicate the tissue source of the specimen from which a biopsy or other tissue specimen was obtained. Use codes from NCIt (NCI Thesaurus) or SNOMED-CT (SNOMED Clinical Terms) in Compact URI (CURIE) pattern. CQDG:biospecimen_tissue_source;mCODE.STU3:Specimen.type;FHIR:Specimen.type;Phenopacket:biosample.sampled_tissue;BeaconV2:biosample.sampled_tissue pattern:^NCIT:C[0-9]+$|^SNOMEDCT:[0-9]+$ specimen_tissue_source_term specimen False string Indicate the human readable label for the specimen tissue source code. -specimen_storage specimen False string Indicate the method of long term storage for specimen that were not extracted freshly or immediately cultured. "Phenopacket:biosample.sample_storage -BeaconV2:biosample.sample_storage" "Enum: -Cut slide -Frozen in -70 freezer -Frozen in liquid nitrogen -Frozen in vapour phase -Not Applicable -Other -Paraffin block -RNA later frozen -Unknown" -specimen_processing specimen False string Indicate the technique used to process specimen. "Phenopacket:biosample.sample_processing -BeaconV2:biosample.sample_processing" "Enum: -Cryopreservation - other -Cryopreservation in dry ice (dead tissue) -Cryopreservation in liquid nitrogen (dead tissue) -Cryopreservation of live cells in liquid nitrogen -Formalin fixed & paraffin embedded -Formalin fixed - buffered -Formalin fixed - unbuffered -Fresh -Other -Unknown" +specimen_storage specimen False string Indicate the method of long term storage for specimen that were not extracted freshly or immediately cultured. Phenopacket:biosample.sample_storage;BeaconV2:biosample.sample_storage Enum:Cut slide,Frozen in -70 freezer,Frozen in liquid nitrogen,Frozen in vapour phase,Not Applicable,Other,Paraffin block,RNA later frozen,Unknown +specimen_processing specimen False string Indicate the technique used to process specimen. Phenopacket:biosample.sample_processing;BeaconV2:biosample.sample_processing Enum:Cryopreservation - other,Cryopreservation in dry ice (dead tissue),Cryopreservation in liquid nitrogen (dead tissue),Cryopreservation of live cells in liquid nitrogen,Formalin fixed & paraffin embedded,Formalin fixed - buffered,Formalin fixed - unbuffered,Fresh,Other,Unknown age_at_specimen_collection specimen False integer Indicate participant's age( in days) when specimen was collected. -specimen_anatomic_location_code specimen False string Indicate the ICD-O-3 topography code for the anatomic location of a specimen when it was collected. Please use C80.9 if the primary site of a specimen is Unkown. ['Refer to the International Classification of Diseases for Oncology, 3rd Edition (WHO ICD-O-3) manual for guidelines at https://apps.who.int/iris/handle/10665/42344.'] "mCODE.STU3:Specimen.collection.bodySite -FHIR:Specimen.collection.bodySite" pattern:^[C][0-9]{2}(.[0-9]{1})?$ +specimen_anatomic_location_code specimen False string Indicate the ICD-O-3 topography code for the anatomic location of a specimen when it was collected. Please use C80.9 if the primary site of a specimen is Unkown. ['Refer to the International Classification of Diseases for Oncology, 3rd Edition (WHO ICD-O-3) manual for guidelines at https://apps.who.int/iris/handle/10665/42344.'] mCODE.STU3:Specimen.collection.bodySite;FHIR:Specimen.collection.bodySite pattern:^[C][0-9]{2}(.[0-9]{1})?$ specimen_anatomic_location_label specimen False string Provide the human readable label for the specimen anatomic location code. -specimen_laterality specimen False string For cancer in a paired organ, indicate the side on which the specimen was obtained. ['Reference caDSR CDE ID 2007875 https://cdebrowser.nci.nih.gov/cdebrowserClient/cdeBrowser.html#/search?publicId=2007875&version=2.0'] "mCODE.STU3:Specimen.collection.bodySite.extension.lateralityQualifier -FHIR:Specimen.collection.bodySite.extension.lateralityQualifier" "Enum: -Left -Not applicable -Right -Unknown" -treatment_type treatment True string The category or method of treatment administered "Enum: -Medication -Pharmacotherapy -Procedure -Radiation therapy -Other" +specimen_laterality specimen False string For cancer in a paired organ, indicate the side on which the specimen was obtained. ['Reference caDSR CDE ID 2007875 https://cdebrowser.nci.nih.gov/cdebrowserClient/cdeBrowser.html#/search?publicId=2007875&version=2.0'] mCODE.STU3:Specimen.collection.bodySite.extension.lateralityQualifier;FHIR:Specimen.collection.bodySite.extension.lateralityQualifier Enum:Left,Not applicable,Right,Unknown +submitter_participant_id treatment True string Unique identifier of the participant within the study, assigned by the data provider. CQDG:submitter_participant_id;mCODE.STU3:Patient.Identifier;FHIR:Patient.Identifier;Phenopacket:individual.id;BeaconV2:individual.id pattern:^[A-Za-z0-9\-\._]{1,64} +submitter_treatment_id treatment True string Unique identifier of the treatment, assigned by the data provider. pattern:^[A-Za-z0-9\-\._]{1,64}$ +treatment_type treatment True string The category or method of treatment administered Enum:Medication,Pharmacotherapy,Procedure,Radiation therapy,Other age_at_treatment treatment False integer Age (in days) of the participant at the time the treatment was administered. treatment_duration treatment False integer The length of time (in days) over which the treatment was administered. -treatment_intent treatment False string The purpose of the treatment or the desired effect or outcome resulting from the treatment. "Enum: -Curative -Diagnostic -Forensic -Guidance -Palliative -Preventative -Screening -Supportive" -treatment_response treatment False string The outcome of the treatment, indicating how the patient responded to the intervention "Enum: -Clinical remission -Disease Progression -Improvement of symptoms -No improvement of symptoms -No sign of disease -Not applicable -Partial Response -Stable Disease -Treatment cessation due to toxicity -Worsening of symptoms" -treatment_status treatment False string Indicate the donor's status of the prescribed treatment. "Enum: -Not available -Other -Patient choice (stopped or interrupted treatment) -Physician decision (stopped or interrupted treatment) -Treatment completed as prescribed -Treatment incomplete because patient died -Treatment incomplete due to technical or organizational problems -Treatment ongoing -Treatment stopped due to acute toxicity -Treatment stopped due to lack of efficacy (disease progression)" +treatment_intent treatment False string The purpose of the treatment or the desired effect or outcome resulting from the treatment. Enum:Curative,Diagnostic,Forensic,Guidance,Palliative,Preventative,Screening,Supportive +treatment_response treatment False string The outcome of the treatment, indicating how the patient responded to the intervention Enum:Clinical remission,Disease Progression,Improvement of symptoms,No improvement of symptoms,No sign of disease,Not applicable,Partial Response,Stable Disease,Treatment cessation due to toxicity,Worsening of symptoms +treatment_status treatment False string Indicate the donor's status of the prescribed treatment. Enum:Not available,Other,Patient choice (stopped or interrupted treatment),Physician decision (stopped or interrupted treatment),Treatment completed as prescribed,Treatment incomplete because patient died,Treatment incomplete due to technical or organizational problems,Treatment ongoing,Treatment stopped due to acute toxicity,Treatment stopped due to lack of efficacy (disease progression) image_hosted_url imaging False string A URL for where the image is hosted -image_hosted_format imaging False string Accepted data types for images "Enum: -PNG -SVG -PDF -JPEG" +image_hosted_format imaging False string Accepted data types for images Enum:PNG,SVG,PDF,JPEG image_processing_pipeline imaging False string "Post processing pipeline dependent on image_hosted_format where: - image_hosted_format is PNG, choices are * PipelineA @@ -317,13 +99,7 @@ image_processing_pipeline imaging False string "Post processing pipeline depende * PipelineA * PipelineD * PipelineF -" "Enum: -pipelineA -pipelineB -pipelineC -pipelineD -pipelineE -pipelineF" +" Enum:pipelineA,pipelineB,pipelineC,pipelineD,pipelineE,pipelineF image_processing_personel imaging False string "String value for the person who processed the image. Must be provided.If not 'image_processing_null_reason' must be provided" image_processing_null_reason imaging False string "Tracks reason for why image processing personnel was not provided. @@ -332,7 +108,4 @@ Possible values: * Unknown * Revoked * Not Provided -Otherwise leave empty" "Enum: -Unknown -Not Provided -Revoked" +Otherwise leave empty" Enum:Unknown,Not Provided,Revoked diff --git a/csv/definitions/pcgl/pcgl_flattened.tsv b/csv/definitions/pcgl/pcgl_flattened.tsv index a223530..7dd97f5 100644 --- a/csv/definitions/pcgl/pcgl_flattened.tsv +++ b/csv/definitions/pcgl/pcgl_flattened.tsv @@ -1,281 +1,81 @@ - schema required dataType description comments exact_mappings validation -submitter_participant_id treatment True string Unique identifier of the participant within the study, assigned by the data provider. "CQDG:submitter_participant_id -mCODE.STU3:Patient.Identifier -FHIR:Patient.Identifier -Phenopacket:individual.id -BeaconV2:individual.id" pattern:^[A-Za-z0-9\-\._]{1,64} +field schema required dataType description comments exact_mappings validation +submitter_participant_id phenotype True string Unique identifier of the participant within the study, assigned by the data provider. CQDG:submitter_participant_id;mCODE.STU3:Patient.Identifier;FHIR:Patient.Identifier;Phenopacket:individual.id;BeaconV2:individual.id pattern:^[A-Za-z0-9\-\._]{1,64} age_at_phenotype phenotype False integer Participant's age (in days) when phenotype was observed phenotype_code phenotype True string Use standardized HPO (Human Phenotype Ontology) codes to represent the phenotype. ['Provide code in Compact URI (CURIE) pattern. HPO URL: https://hpo.jax.org/app/browse/term/HP:'] pattern:^HP:[0-9]{7}$ phenotype_term phenotype False string Provide the standardized and human readable term derived from the coding system associated with the phenotype_code. -phenotype_observed phenotype True string Indicate whether the phenotype was observed in the participant. "Enum: -No -Unknown -Yes" +phenotype_observed phenotype True string Indicate whether the phenotype was observed in the participant. Enum:No,Unknown,Yes phenotype_duration phenotype False integer Indicate the length of time (in days) over which the phenotype was observed in the participant. -phenotype_severity phenotype False string The degree or severity of the observed phenotype. ['Permissible values from https://hpo.jax.org/browse/term/HP:0012824'] "Enum: -Borderline -Mild -Moderate -Profound -Severe" -study_id participant True string Unique identifier of the study. CQDG:study_id -gender demographic True string Refers to an individual's personal and social identity as a man, woman or non-binary person (a person who is not exclusively a man or a woman). The provided values are based on the categories defined by Statistics Canada ['EDI indicator'] "mCODE.STU3:Patient.gender -FHIR:Patient.gender -Phenopacket:individual.gender -BeaconV2:individual.gender" "Enum: -Man -Missing -Non-binary person -Not Applicable -Not Collected -Not Provided -Restricted Access -Woman" -sex_at_birth demographic True string Refers to sex assigned at birth. Sex at birth is typically assigned based on a person's reproductive system and other physical characteristics. The provided values are based on the categoried defined by Statistics Canada ['EDI indicator'] "All4One:Biological_sex -CQDG:Sex -mCODE.STU3:Patient.extension.birthsex -FHIR:Patient.extension.birthsex -Phenopacket:individual.sex -BeaconV2:individual.sex" "Enum: -Female -Male -Missing -Not Applicable -Not Collected -Not Provided -Restricted Access" -ethnicity demographic True string Refers to the ethnic or cultural origins of a person’s ancestors. The provided values are based on the list of ethnic or cultural origins 2021 defined by Statistics Canada ['EDI indicator'] "mCODE.STU3:Patient.extension.ethnicity -FHIR:Patient.extension.ethnicity" "Enum: -African -Asian -Caribbean -European -Latin, Central and South American -Missing -North American -Not Applicable -Not Collected -Not Provided -Oceanian -Other ethnic and cultural -Restricted Access" -race demographic True string A social construct used to judge and categorize people based on perceived differences in physical appearance in ways that create and maintain power differentials within social hierarchies. There is no scientifically supported biological basis for discrete racial groups. The provided values are based on race-based data standard defined by CIHI guidance ['EDI indicator'] "mCODE.STU3:Patient.extension.race -FHIR:Patient.extension.race" "Enum: -Another race category -Black -Do not know -East Asian -Indigenous (First Nations, Inuk/Inuit, Metis) -Latin American -Middle Eastern -Missing -Not Applicable -Not Collected -Not Provided -Prefer not to answer -Restricted Access -South Asian -Southeast Asian -White" +phenotype_severity phenotype False string The degree or severity of the observed phenotype. ['Permissible values from https://hpo.jax.org/browse/term/HP:0012824'] Enum:Borderline,Mild,Moderate,Profound,Severe +study_id demographic True string Unique identifier of the study. CQDG:study_id +submitter_participant_id demographic True string Unique identifier of the participant within the study, assigned by the data provider. CQDG:submitter_participant_id;mCODE.STU3:Patient.Identifier;FHIR:Patient.Identifier;Phenopacket:individual.id;BeaconV2:individual.id pattern:^[A-Za-z0-9\-\._]{1,64} +gender demographic True string Refers to an individual's personal and social identity as a man, woman or non-binary person (a person who is not exclusively a man or a woman). The provided values are based on the categories defined by Statistics Canada ['EDI indicator'] mCODE.STU3:Patient.gender;FHIR:Patient.gender;Phenopacket:individual.gender;BeaconV2:individual.gender Enum:Man,Missing,Non-binary person,Not Applicable,Not Collected,Not Provided,Restricted Access,Woman +sex_at_birth demographic True string Refers to sex assigned at birth. Sex at birth is typically assigned based on a person's reproductive system and other physical characteristics. The provided values are based on the categoried defined by Statistics Canada ['EDI indicator'] All4One:Biological_sex;CQDG:Sex;mCODE.STU3:Patient.extension.birthsex;FHIR:Patient.extension.birthsex;Phenopacket:individual.sex;BeaconV2:individual.sex Enum:Female,Male,Missing,Not Applicable,Not Collected,Not Provided,Restricted Access +ethnicity demographic True string Refers to the ethnic or cultural origins of a person’s ancestors. The provided values are based on the list of ethnic or cultural origins 2021 defined by Statistics Canada ['EDI indicator'] mCODE.STU3:Patient.extension.ethnicity;FHIR:Patient.extension.ethnicity Enum:African,Asian,Caribbean,European,Latin, Central and South American,Missing,North American,Not Applicable,Not Collected,Not Provided,Oceanian,Other ethnic and cultural,Restricted Access +race demographic True string A social construct used to judge and categorize people based on perceived differences in physical appearance in ways that create and maintain power differentials within social hierarchies. There is no scientifically supported biological basis for discrete racial groups. The provided values are based on race-based data standard defined by CIHI guidance ['EDI indicator'] mCODE.STU3:Patient.extension.race;FHIR:Patient.extension.race Enum:Another race category,Black,Do not know,East Asian,Indigenous (First Nations, Inuk/Inuit, Metis),Latin American,Middle Eastern,Missing,Not Applicable,Not Collected,Not Provided,Prefer not to answer,Restricted Access,South Asian,Southeast Asian,White +submitter_participant_id comorbidity True string Unique identifier of the participant within the study, assigned by the data provider. CQDG:submitter_participant_id;mCODE.STU3:Patient.Identifier;FHIR:Patient.Identifier;Phenopacket:individual.id;BeaconV2:individual.id pattern:^[A-Za-z0-9\-\._]{1,64} age_at_comorbidity_diagnosis comorbidity False integer Indicate the age (in days) of comorbidity diagnosis. comorbidity_code comorbidity True string Use ICD-10 code or Mondo code to indicate the comorbidity diagnosed. ['Provide code in Compact URI (CURIE) pattern. ICD-10 code: refer to https://icd.who.int/browse10/2019/en MONDO code: refer to https://www.ebi.ac.uk/ols/ontologies/mondo'] pattern:^ICD10:[A-TV-Z][0-9][0-9AB].?[0-9A-TV-Z]{0,4}$|^(MONDO:)[0-9]{7}$ -comorbidity_term comorbidity False string Provide the standardized and human readable term derived from the coding system associated with the comorbidity_code "mCODE.STU3:condition.code -FHIR:condition.code -Phenopacket:disease.term" -comorbidity_treatment_status comorbidity False string Indicate whether the comorbid condition is currently being treated or not. "Enum: -Treated and resolved -Under treatment -Unknown -Untreated" -comorbidity_status comorbidity False string Indicate the current state or activity of the comorbid condition. "Enum: -Active -In Remission -Resolved -Unknown" -submitter_diagnosis_id diagnosis True string Unique identifier of the primary diagnosis event, assigned by the data provider. "mCODE.STU1:Condition.identifier -mCODE.STU2:Condition.identifier -FHIR:condition.code" pattern:^[A-Za-z0-9\-\._]{1,64}$ -age_at_diagnosis diagnosis False integer Age of participant (in days) at time of diagnosis of the condition. "mCODE.STU3:condition.onset -FHIR:condition.onset -Phenopacket:disease.onset" -disease_code diagnosis True string Use ICD-10 code or Mondo code to represent the disease diagnosed. ['Provide code in Compact URI (CURIE) pattern. ICD-10 code: refer to https://icd.who.int/browse10/2019/en MONDO code: refer to https://www.ebi.ac.uk/ols/ontologies/mondo'] "mCODE.STU3:condition.code -FHIR:condition.code -Phenopacket:disease.term" pattern:^ICD10:[A-TV-Z][0-9][0-9AB].?[0-9A-TV-Z]{0,4}$|^(MONDO:)[0-9]{7}$ -disease_term diagnosis False string Provide the standardized and human readable term derived from the coding system associated with the disease_code "mCODE.STU3:condition.code -FHIR:condition.code -Phenopacket:disease.term" -disease_category diagnosis True string A broader classification that groups diseases into categories. "Enum: -Autoimmune disorder -Cancer -Infectious disease" -alcohol_consumption exposure False string Indicate current alcohol consumtion based on recommended daily limit. ['Refer to the Canadian Centre on Substance Abuse, for men, no more than 15 drinks a week, with no more than 3 drinks a day most days. No more than 4 drinks on any single occasion. For women, no more than 10 drinks a week, with no more than 2 drinks a day most days. No more than 3 drinks on any single occasion.'] "Enum: -Alcohol consumption unknown -Alcohol intake exceeds recommended daily limit -Alcohol intake within recommended daily limit -No alcohol consumption" -smoking_status exposure False string Indicate smoking status and history of a participant at the time of the data collection. "Enum: -Current reformed smoker for <= 15 years -Current reformed smoker for > 15 years -Current reformed smoker, duration not specified -Current smoker -Lifelong non-smoker (<100 cigarettes smoked in lifetime) -Not applicable -Smoking history not documented" -physical_activity exposure False string Indicate how many times per week the participant exercises for at least 30 minutes. "Enum: -1-3 times a month -Every day -Less than once a month -Most days but not every day -Never -Not applicable -Once or twice a week -Unknown" +comorbidity_term comorbidity False string Provide the standardized and human readable term derived from the coding system associated with the comorbidity_code mCODE.STU3:condition.code;FHIR:condition.code;Phenopacket:disease.term +comorbidity_treatment_status comorbidity False string Indicate whether the comorbid condition is currently being treated or not. Enum:Treated and resolved,Under treatment,Unknown,Untreated +comorbidity_status comorbidity False string Indicate the current state or activity of the comorbid condition. Enum:Active,In Remission,Resolved,Unknown +submitter_participant_id diagnosis True string Unique identifier of the participant within the study, assigned by the data provider. CQDG:submitter_participant_id;mCODE.STU3:Patient.Identifier;FHIR:Patient.Identifier;Phenopacket:individual.id;BeaconV2:individual.id pattern:^[A-Za-z0-9\-\._]{1,64} +submitter_diagnosis_id diagnosis True string Unique identifier of the primary diagnosis event, assigned by the data provider. mCODE.STU1:Condition.identifier;mCODE.STU2:Condition.identifier;FHIR:condition.code pattern:^[A-Za-z0-9\-\._]{1,64}$ +age_at_diagnosis diagnosis False integer Age of participant (in days) at time of diagnosis of the condition. mCODE.STU3:condition.onset;FHIR:condition.onset;Phenopacket:disease.onset +disease_code diagnosis True string Use ICD-10 code or Mondo code to represent the disease diagnosed. ['Provide code in Compact URI (CURIE) pattern. ICD-10 code: refer to https://icd.who.int/browse10/2019/en MONDO code: refer to https://www.ebi.ac.uk/ols/ontologies/mondo'] mCODE.STU3:condition.code;FHIR:condition.code;Phenopacket:disease.term pattern:^ICD10:[A-TV-Z][0-9][0-9AB].?[0-9A-TV-Z]{0,4}$|^(MONDO:)[0-9]{7}$ +disease_term diagnosis False string Provide the standardized and human readable term derived from the coding system associated with the disease_code mCODE.STU3:condition.code;FHIR:condition.code;Phenopacket:disease.term +disease_category diagnosis True string A broader classification that groups diseases into categories. Enum:Autoimmune disorder,Cancer,Infectious disease +submitter_participant_id exposure True string Unique identifier of the participant within the study, assigned by the data provider. CQDG:submitter_participant_id;mCODE.STU3:Patient.Identifier;FHIR:Patient.Identifier;Phenopacket:individual.id;BeaconV2:individual.id pattern:^[A-Za-z0-9\-\._]{1,64} +alcohol_consumption exposure False string Indicate current alcohol consumtion based on recommended daily limit. ['Refer to the Canadian Centre on Substance Abuse, for men, no more than 15 drinks a week, with no more than 3 drinks a day most days. No more than 4 drinks on any single occasion. For women, no more than 10 drinks a week, with no more than 2 drinks a day most days. No more than 3 drinks on any single occasion.'] Enum:Alcohol consumption unknown,Alcohol intake exceeds recommended daily limit,Alcohol intake within recommended daily limit,No alcohol consumption +smoking_status exposure False string Indicate smoking status and history of a participant at the time of the data collection. Enum:Current reformed smoker for <= 15 years,Current reformed smoker for > 15 years,Current reformed smoker, duration not specified,Current smoker,Lifelong non-smoker (<100 cigarettes smoked in lifetime),Not applicable,Smoking history not documented +physical_activity exposure False string Indicate how many times per week the participant exercises for at least 30 minutes. Enum:1-3 times a month,Every day,Less than once a month,Most days but not every day,Never,Not applicable,Once or twice a week,Unknown +submitter_participant_id follow_up True string Unique identifier of the participant within the study, assigned by the data provider. CQDG:submitter_participant_id;mCODE.STU3:Patient.Identifier;FHIR:Patient.Identifier;Phenopacket:individual.id;BeaconV2:individual.id pattern:^[A-Za-z0-9\-\._]{1,64} age_at_followup follow_up False integer Participant's age (in days) at time of the follow up event -disease_status_at_followup follow_up True string Indicate the participant's disease status at time of follow-up "Enum: -Complete remission -Distant progression -Loco-regional progression -No evidence of disease -Partial remission -Progression NOS -Relapse or recurrence -Stable" +disease_status_at_followup follow_up True string Indicate the participant's disease status at time of follow-up Enum:Complete remission,Distant progression,Loco-regional progression,No evidence of disease,Partial remission,Progression NOS,Relapse or recurrence,Stable +submitter_participant_id measurement True string Unique identifier of the participant within the study, assigned by the data provider. CQDG:submitter_participant_id;mCODE.STU3:Patient.Identifier;FHIR:Patient.Identifier;Phenopacket:individual.id;BeaconV2:individual.id pattern:^[A-Za-z0-9\-\._]{1,64} age_at_measurement measurement False integer Age (in days) of the participant at the time the lab test or measurement was conducted. measurement_code measurement True string Use standartized LOINC (Logical Observation Identifiers Names and Codes) code to represent quantitative, ordinal, or categorical measurements. ['Provide code in Compact URI (CURIE) pattern. LOINC URL: https://loinc.org/{code}'] pattern:^LOINC:[0-9]{1,5}-[0-9]$ measurement_term measurement False string Provide the standardized and human readable term derived from the coding system associated with the measurement_code measurement_result_numeric measurement False decimal The numeric result of the lab test or measurement. measurement_unit measurement False string Indicate the unit of measurement for the result using LOINC code. measurement_result_categorical measurement False string The categorical result of the lab test or measurement. -submitter_treatment_id treatment True string Unique identifier of the treatment, assigned by the data provider. pattern:^[A-Za-z0-9\-\._]{1,64}$ +submitter_treatment_id medication True string Unique identifier of the treatment, assigned by the data provider. pattern:^[A-Za-z0-9\-\._]{1,64}$ drug_code medication True string Provide the standardized code from RxNorm, KEGG, PubChem or NCIt to represent the drug. ['Provide code in Compact URI (CURIE) pattern. RxNorm URL: https://rxnav.nlm.nih.gov/REST/rxcui/{code} KEGG URL: https://www.kegg.jp/entry/{code} PubChem URL: https://pubchem.ncbi.nlm.nih.gov/compound/{code} NCIt URL: https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=C{code}'] pattern:^RxNorm:\d{1,8}$|^KEGG:D\\d{5}$|^PubChem:\\d{1,9}$|^NCIt:C\\d{1,7}$ drug_term medication False string Provide the standardized and human readable term derived from the coding system associated with the drug_code -drug_dose_units medication False string Indicate units used to record drug dose. "Enum: -IU/kg -IU/m2 -Not available -g/m2 -mg -mg/kg -mg/m2 -ug/m2" +drug_dose_units medication False string Indicate units used to record drug dose. Enum:IU/kg,IU/m2,Not available,g/m2,mg,mg/kg,mg/m2,ug/m2 prescribed_cumulative_drug_dose medication False decimal Indicate the total prescribed cumulative drug dose in the same units specified in drug_dose_units. actual_cumulative_drug_dose medication False decimal Indicate the total actual cumulative drug dose in the same units specified in drug_dose_units. +study_id participant True string Unique identifier of the study. CQDG:study_id +submitter_participant_id participant True string Unique identifier of the participant within the study, assigned by the data provider. CQDG:submitter_participant_id;mCODE.STU3:Patient.Identifier;FHIR:Patient.Identifier;Phenopacket:individual.id;BeaconV2:individual.id pattern:^[A-Za-z0-9\-\._]{1,64} age_at_enrollment participant False integer Age (in days) of participant at time of enrollment into the study -vital_status participant False string Participant's last known state of living or deceased. "CQDG:vital_status -Phenopacket:individual.vital_status -BeaconV2:individual.vital_status" "Enum: -Alive -Deceased -Missing -Not Applicable -Not Collected -Not Provided -Restricted Access" -cause_of_death participant False string Indicate the cause of a participant's death. "Enum: -Died of cancer -Died of other reasons -Missing -Not Applicable -Not Collected -Not Provided -Restricted Access" +vital_status participant False string Participant's last known state of living or deceased. CQDG:vital_status;Phenopacket:individual.vital_status;BeaconV2:individual.vital_status Enum:Alive,Deceased,Missing,Not Applicable,Not Collected,Not Provided,Restricted Access +cause_of_death participant False string Indicate the cause of a participant's death. Enum:Died of cancer,Died of other reasons,Missing,Not Applicable,Not Collected,Not Provided,Restricted Access age_at_death participant False integer Age of participant (in days) at time of death +submitter_treatment_id procedure True string Unique identifier of the treatment, assigned by the data provider. pattern:^[A-Za-z0-9\-\._]{1,64}$ procedure_code procedure True string Use code from NCIt, SNOMED-CT, UMLS or CCI to represent the procedure performed. ['Provide code in Compact URI (CURIE) pattern. NCIt URL: https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=C SNOMED-CT URL: http://snomed.info/id/ UMLS URL: https://uts.nlm.nih.gov/uts/umls/concept/'] pattern:^NCIt:C\d{1,7}$|^SNOMEDCT:\d{6,9}$|^UMLS:C\d{7}$|^CCI:\d\.[A-Z]{2}\.\d{2}\.[A-Z]{2}$ procedure_term procedure False string Provide the standardized and human readable term derived from the coding system associated with the procedure_code -procedure_body_site_code procedure False string Indicate the ICD-O-3 topography code to describe the site of the surgery if applicable. Please use C80.9 if the site is Unkown. ['Refer to the International Classification of Diseases for Oncology, 3rd Edition (WHO ICD-O-3) manual for guidelines at https://apps.who.int/iris/handle/10665/42344'] "mCODE.STU3:Specimen.collection.bodySite -FHIR:Specimen.collection.bodySite" pattern:^[C][0-9]{2}(.[0-9]{1})?$ +procedure_body_site_code procedure False string Indicate the ICD-O-3 topography code to describe the site of the surgery if applicable. Please use C80.9 if the site is Unkown. ['Refer to the International Classification of Diseases for Oncology, 3rd Edition (WHO ICD-O-3) manual for guidelines at https://apps.who.int/iris/handle/10665/42344'] mCODE.STU3:Specimen.collection.bodySite;FHIR:Specimen.collection.bodySite pattern:^[C][0-9]{2}(.[0-9]{1})?$ procedure_body_site_term procedure False string Provide the standardized and human readable term derived from the coding system associated with the procedure_body_site_code -radiation_modality radiation True string Indicate NCIt code to denote the modality of radiation therapy. "Enum: -Electron Beam (NCIT:C28039) -High-LET Heavy Ion Therapy (NCIT:C15458) -Internal Radiation Therapy (NCIT:C15195) -Photon Beam Radiation Therapy (NCIT:C104914) -Proton Radiation (NCIT:C40431)." +submitter_treatment_id radiation True string Unique identifier of the treatment, assigned by the data provider. pattern:^[A-Za-z0-9\-\._]{1,64}$ +radiation_modality radiation True string Indicate NCIt code to denote the modality of radiation therapy. Enum:Electron Beam (NCIT:C28039),High-LET Heavy Ion Therapy (NCIT:C15458),Internal Radiation Therapy (NCIT:C15195),Photon Beam Radiation Therapy (NCIT:C104914),Proton Radiation (NCIT:C40431). radiation_fractions radiation False integer Indicate the total number of fractions delivered as part of treatment. radiation_dosage radiation False integer Indicate the total dose given in units of Gray (Gy). -anatomical_site_irradiated_code radiation False string Indicate the ICD-O-3 topography code to describe the irradiated site of radiation if applicable. Please use C80.9 if the site is Unkown. ['Refer to the International Classification of Diseases for Oncology, 3rd Edition (WHO ICD-O-3) manual for guidelines at https://apps.who.int/iris/handle/10665/42344'] "mCODE.STU3:Specimen.collection.bodySite -FHIR:Specimen.collection.bodySite" pattern:^[C][0-9]{2}(.[0-9]{1})?$ +anatomical_site_irradiated_code radiation False string Indicate the ICD-O-3 topography code to describe the irradiated site of radiation if applicable. Please use C80.9 if the site is Unkown. ['Refer to the International Classification of Diseases for Oncology, 3rd Edition (WHO ICD-O-3) manual for guidelines at https://apps.who.int/iris/handle/10665/42344'] mCODE.STU3:Specimen.collection.bodySite;FHIR:Specimen.collection.bodySite pattern:^[C][0-9]{2}(.[0-9]{1})?$ anatomical_site_irradiated_term radiation False string Provide the standardized and human readable term derived from the coding system associated with the anatomical_site_irradiated_code -submitter_specimen_id specimen True string Unique identifier of the specimen within the study, assigned by the data provider. "CQDG:submitter_biospecimen_id -mCODE.STU3:Specimen.Identifier -FHIR:Specimen.Identifier -Phenopacket:biosample.id -BeaconV2:biosample.id" pattern:^[A-Za-z0-9\-\._]{1,64}$ -specimen_tissue_source_code specimen True string Indicate the tissue source of the specimen from which a biopsy or other tissue specimen was obtained. Use codes from NCIt (NCI Thesaurus) or SNOMED-CT (SNOMED Clinical Terms) in Compact URI (CURIE) pattern. "CQDG:biospecimen_tissue_source -mCODE.STU3:Specimen.type -FHIR:Specimen.type -Phenopacket:biosample.sampled_tissue -BeaconV2:biosample.sampled_tissue" pattern:^NCIT:C[0-9]+$|^SNOMEDCT:[0-9]+$ +submitter_participant_id specimen True string Unique identifier of the participant within the study, assigned by the data provider. CQDG:submitter_participant_id;mCODE.STU3:Patient.Identifier;FHIR:Patient.Identifier;Phenopacket:individual.id;BeaconV2:individual.id pattern:^[A-Za-z0-9\-\._]{1,64} +submitter_specimen_id specimen True string Unique identifier of the specimen within the study, assigned by the data provider. CQDG:submitter_biospecimen_id;mCODE.STU3:Specimen.Identifier;FHIR:Specimen.Identifier;Phenopacket:biosample.id;BeaconV2:biosample.id pattern:^[A-Za-z0-9\-\._]{1,64}$ +specimen_tissue_source_code specimen True string Indicate the tissue source of the specimen from which a biopsy or other tissue specimen was obtained. Use codes from NCIt (NCI Thesaurus) or SNOMED-CT (SNOMED Clinical Terms) in Compact URI (CURIE) pattern. CQDG:biospecimen_tissue_source;mCODE.STU3:Specimen.type;FHIR:Specimen.type;Phenopacket:biosample.sampled_tissue;BeaconV2:biosample.sampled_tissue pattern:^NCIT:C[0-9]+$|^SNOMEDCT:[0-9]+$ specimen_tissue_source_term specimen False string Indicate the human readable label for the specimen tissue source code. -specimen_storage specimen False string Indicate the method of long term storage for specimen that were not extracted freshly or immediately cultured. "Phenopacket:biosample.sample_storage -BeaconV2:biosample.sample_storage" "Enum: -Cut slide -Frozen in -70 freezer -Frozen in liquid nitrogen -Frozen in vapour phase -Not Applicable -Other -Paraffin block -RNA later frozen -Unknown" -specimen_processing specimen False string Indicate the technique used to process specimen. "Phenopacket:biosample.sample_processing -BeaconV2:biosample.sample_processing" "Enum: -Cryopreservation - other -Cryopreservation in dry ice (dead tissue) -Cryopreservation in liquid nitrogen (dead tissue) -Cryopreservation of live cells in liquid nitrogen -Formalin fixed & paraffin embedded -Formalin fixed - buffered -Formalin fixed - unbuffered -Fresh -Other -Unknown" +specimen_storage specimen False string Indicate the method of long term storage for specimen that were not extracted freshly or immediately cultured. Phenopacket:biosample.sample_storage;BeaconV2:biosample.sample_storage Enum:Cut slide,Frozen in -70 freezer,Frozen in liquid nitrogen,Frozen in vapour phase,Not Applicable,Other,Paraffin block,RNA later frozen,Unknown +specimen_processing specimen False string Indicate the technique used to process specimen. Phenopacket:biosample.sample_processing;BeaconV2:biosample.sample_processing Enum:Cryopreservation - other,Cryopreservation in dry ice (dead tissue),Cryopreservation in liquid nitrogen (dead tissue),Cryopreservation of live cells in liquid nitrogen,Formalin fixed & paraffin embedded,Formalin fixed - buffered,Formalin fixed - unbuffered,Fresh,Other,Unknown age_at_specimen_collection specimen False integer Indicate participant's age( in days) when specimen was collected. -specimen_anatomic_location_code specimen False string Indicate the ICD-O-3 topography code for the anatomic location of a specimen when it was collected. Please use C80.9 if the primary site of a specimen is Unkown. ['Refer to the International Classification of Diseases for Oncology, 3rd Edition (WHO ICD-O-3) manual for guidelines at https://apps.who.int/iris/handle/10665/42344.'] "mCODE.STU3:Specimen.collection.bodySite -FHIR:Specimen.collection.bodySite" pattern:^[C][0-9]{2}(.[0-9]{1})?$ +specimen_anatomic_location_code specimen False string Indicate the ICD-O-3 topography code for the anatomic location of a specimen when it was collected. Please use C80.9 if the primary site of a specimen is Unkown. ['Refer to the International Classification of Diseases for Oncology, 3rd Edition (WHO ICD-O-3) manual for guidelines at https://apps.who.int/iris/handle/10665/42344.'] mCODE.STU3:Specimen.collection.bodySite;FHIR:Specimen.collection.bodySite pattern:^[C][0-9]{2}(.[0-9]{1})?$ specimen_anatomic_location_label specimen False string Provide the human readable label for the specimen anatomic location code. -specimen_laterality specimen False string For cancer in a paired organ, indicate the side on which the specimen was obtained. ['Reference caDSR CDE ID 2007875 https://cdebrowser.nci.nih.gov/cdebrowserClient/cdeBrowser.html#/search?publicId=2007875&version=2.0'] "mCODE.STU3:Specimen.collection.bodySite.extension.lateralityQualifier -FHIR:Specimen.collection.bodySite.extension.lateralityQualifier" "Enum: -Left -Not applicable -Right -Unknown" -treatment_type treatment True string The category or method of treatment administered "Enum: -Medication -Pharmacotherapy -Procedure -Radiation therapy -Other" +specimen_laterality specimen False string For cancer in a paired organ, indicate the side on which the specimen was obtained. ['Reference caDSR CDE ID 2007875 https://cdebrowser.nci.nih.gov/cdebrowserClient/cdeBrowser.html#/search?publicId=2007875&version=2.0'] mCODE.STU3:Specimen.collection.bodySite.extension.lateralityQualifier;FHIR:Specimen.collection.bodySite.extension.lateralityQualifier Enum:Left,Not applicable,Right,Unknown +submitter_participant_id treatment True string Unique identifier of the participant within the study, assigned by the data provider. CQDG:submitter_participant_id;mCODE.STU3:Patient.Identifier;FHIR:Patient.Identifier;Phenopacket:individual.id;BeaconV2:individual.id pattern:^[A-Za-z0-9\-\._]{1,64} +submitter_treatment_id treatment True string Unique identifier of the treatment, assigned by the data provider. pattern:^[A-Za-z0-9\-\._]{1,64}$ +treatment_type treatment True string The category or method of treatment administered Enum:Medication,Pharmacotherapy,Procedure,Radiation therapy,Other age_at_treatment treatment False integer Age (in days) of the participant at the time the treatment was administered. treatment_duration treatment False integer The length of time (in days) over which the treatment was administered. -treatment_intent treatment False string The purpose of the treatment or the desired effect or outcome resulting from the treatment. "Enum: -Curative -Diagnostic -Forensic -Guidance -Palliative -Preventative -Screening -Supportive" -treatment_response treatment False string The outcome of the treatment, indicating how the patient responded to the intervention "Enum: -Clinical remission -Disease Progression -Improvement of symptoms -No improvement of symptoms -No sign of disease -Not applicable -Partial Response -Stable Disease -Treatment cessation due to toxicity -Worsening of symptoms" -treatment_status treatment False string Indicate the donor's status of the prescribed treatment. "Enum: -Not available -Other -Patient choice (stopped or interrupted treatment) -Physician decision (stopped or interrupted treatment) -Treatment completed as prescribed -Treatment incomplete because patient died -Treatment incomplete due to technical or organizational problems -Treatment ongoing -Treatment stopped due to acute toxicity -Treatment stopped due to lack of efficacy (disease progression)" +treatment_intent treatment False string The purpose of the treatment or the desired effect or outcome resulting from the treatment. Enum:Curative,Diagnostic,Forensic,Guidance,Palliative,Preventative,Screening,Supportive +treatment_response treatment False string The outcome of the treatment, indicating how the patient responded to the intervention Enum:Clinical remission,Disease Progression,Improvement of symptoms,No improvement of symptoms,No sign of disease,Not applicable,Partial Response,Stable Disease,Treatment cessation due to toxicity,Worsening of symptoms +treatment_status treatment False string Indicate the donor's status of the prescribed treatment. Enum:Not available,Other,Patient choice (stopped or interrupted treatment),Physician decision (stopped or interrupted treatment),Treatment completed as prescribed,Treatment incomplete because patient died,Treatment incomplete due to technical or organizational problems,Treatment ongoing,Treatment stopped due to acute toxicity,Treatment stopped due to lack of efficacy (disease progression) diff --git a/csv/example/example_flattened.csv b/csv/example/example_flattened.csv deleted file mode 100644 index 74d6bac..0000000 --- a/csv/example/example_flattened.csv +++ /dev/null @@ -1,350 +0,0 @@ -,schema,required,dataType,description,comments,exact_mappings,validation -submitter_participant_id,Treatment,True,string,"Unique identifier of the participant within the study, assigned by the data provider.",,"CQDG:submitter_participant_id -mCODE.STU3:Patient.Identifier -FHIR:Patient.Identifier -Phenopacket:individual.id -BeaconV2:individual.id","pattern:^[A-Za-z0-9\-\._]{1,64}" -age_at_comorbidity_diagnosis,Comorbidity,False,integer,Indicate the age (in days) of comorbidity diagnosis.,,,minimum_value:0 -comorbidity_code,Comorbidity,True,string,Use ICD-10 code or Mondo code to indicate the comorbidity diagnosed.,['Provide code in Compact URI (CURIE) pattern. ICD-10 code: refer to https://icd.who.int/browse10/2019/en MONDO code: refer to https://www.ebi.ac.uk/ols/ontologies/mondo'],,"pattern:^ICD10:[A-TV-Z][0-9][0-9AB].?[0-9A-TV-Z]{0,4}$|^(MONDO:)[0-9]{7}$" -comorbidity_term,Comorbidity,False,string,Provide the standardized and human readable term derived from the coding system associated with the comorbidity_code,,"mCODE.STU3:condition.code -FHIR:condition.code -Phenopacket:disease.term", -comorbidity_source_text,Comorbidity,False,string,Provide the original textual description of the diagnosed comorbidity as reported in the participant's clinical or medical records if applicable,,"mCODE.STU3:condition.code -FHIR:condition.code -Phenopacket:disease.term", -comorbidity_treatment_status,Comorbidity,False,string,Indicate whether the comorbid condition is currently being treated or not.,,,"Enum: -Treated and resolved -Under treatment -Unknown -Untreated" -comorbidity_status,Comorbidity,False,string,Indicate the current state or activity of the comorbid condition.,,,"Enum: -Active -In Remission -Resolved -Unknown" -study_id,Participant,True,string,Unique identifier of the study.,,CQDG:study_id, -gender,Demographic,True,string,"Refers to an individual's personal and social identity as a man, woman or non-binary person (a person who is not exclusively a man or a woman). The provided values are based on the categories defined by Statistics Canada",['EDI indicator'],"mCODE.STU3:Patient.gender -FHIR:Patient.gender -Phenopacket:individual.gender -BeaconV2:individual.gender","Enum: -Man -Missing -Non-binary person -Not Applicable -Not Collected -Not Provided -Restricted Access -Woman" -sex_at_birth,Demographic,True,string,Refers to sex assigned at birth. Sex at birth is typically assigned based on a person's reproductive system and other physical characteristics. The provided values are based on the categoried defined by Statistics Canada,['EDI indicator'],"All4One:Biological_sex -CQDG:Sex -mCODE.STU3:Patient.extension.birthsex -FHIR:Patient.extension.birthsex -Phenopacket:individual.sex -BeaconV2:individual.sex","Enum: -Female -Male -Missing -Not Applicable -Not Collected -Not Provided -Restricted Access" -ethnicity,Demographic,True,string,Refers to the ethnic or cultural origins of a person’s ancestors. The provided values are based on the list of ethnic or cultural origins 2021 defined by Statistics Canada,['EDI indicator'],"mCODE.STU3:Patient.extension.ethnicity -FHIR:Patient.extension.ethnicity","Enum: -African -Asian -Caribbean -European -Latin, Central and South American -Missing -North American -Not Applicable -Not Collected -Not Provided -Oceanian -Other ethnic and cultural -Restricted Access" -race,Demographic,True,string,A social construct used to judge and categorize people based on perceived differences in physical appearance in ways that create and maintain power differentials within social hierarchies. There is no scientifically supported biological basis for discrete racial groups. The provided values are based on race-based data standard defined by CIHI guidance,['EDI indicator'],"mCODE.STU3:Patient.extension.race -FHIR:Patient.extension.race","Enum: -Another race category -Black -Do not know -East Asian -Indigenous (First Nations, Inuk/Inuit, Métis) -Latin American -Middle Eastern -Missing -Not Applicable -Not Collected -Not Provided -Prefer not to answer -Restricted Access -South Asian -Southeast Asian -White" -country_of_birth,Demographic,False,string,"The country where an individual was born, typically defined by current geopolitical borders.",,, -ancestry,Demographic,False,string,"The genetic or genomic heritage of an individual, typically tracing the origins of one's ancestors.",,, -height,Demographic,False,decimal,The measurement of an individual from head to toe recorded in centimeters (cm).,,, -weight,Demographic,False,decimal,The mass or body weight of an individual measured in kilograms (kg).,,, -highest_education_level_achieved,Demographic,False,string,The highest level of formal education an individual has completed.,,, -employment,Demographic,False,string,The current work status of an individual.,,, -type_of_residence,Demographic,False,string,The kind of dwelling in which an individual lives.,,, -number_of_other_people_in_household,Demographic,False,integer,"The total number of individuals living in the same household as the participant, excluding the participant.",,, -pregnancy,Demographic,False,string,Indicates whether an individual is currently pregnant.,,,"Enum: -No -Unknown -Yes" -submitter_diagnosis_id,Diagnosis,True,string,"Unique identifier of the primary diagnosis event, assigned by the data provider.",,"mCODE.STU1:Condition.identifier -mCODE.STU2:Condition.identifier -FHIR:condition.code","pattern:^[A-Za-z0-9\-\._]{1,64}$" -age_at_diagnosis,Diagnosis,False,integer,Age of participant (in days) at time of diagnosis of the condition.,,"mCODE.STU3:condition.onset -FHIR:condition.onset -Phenopacket:disease.onset", -disease_source_text,Diagnosis,False,string,Provide the original textual description of the diagnosed disease as reported in the participant's clinical or medical records if applicable,,"mCODE.STU3:condition.code -FHIR:condition.code -Phenopacket:disease.term", -disease_code,Diagnosis,True,string,Use ICD-10 code or Mondo code to represent the disease diagnosed.,['Provide code in Compact URI (CURIE) pattern. ICD-10 code: refer to https://icd.who.int/browse10/2019/en MONDO code: refer to https://www.ebi.ac.uk/ols/ontologies/mondo'],"mCODE.STU3:condition.code -FHIR:condition.code -Phenopacket:disease.term","pattern:^ICD10:[A-TV-Z][0-9][0-9AB].?[0-9A-TV-Z]{0,4}$|^(MONDO:)[0-9]{7}$" -disease_term,Diagnosis,False,string,Provide the standardized and human readable term derived from the coding system associated with the disease_code,,"mCODE.STU3:condition.code -FHIR:condition.code -Phenopacket:disease.term", -disease_category,Diagnosis,True,string,A broader classification that groups diseases into categories.,,,"Enum: -Autoimmune disorder -Cancer -Infectious disease" -covid19_severity,Diagnosis,False,string,"A measure of the severity of a COVID-19 infection, which can range from mild to severe or critical.",,,"Enum: -Ambulatory (limitation of activities) -Ambulatory (no limitation of activities) -Ambulatory (with or without limitation of activities) -Dead - death -Hospitalized - mild disease (hospitalized, no oxygen therapy) -Hospitalized - mild disease (hospitalized, oxygen by mask or nasal prongs) -Hospitalized - severe disease (intubation and mechanical ventilation) -Hospitalized - severe disease (non-invasive ventilation or high-flow oxygen) -Hospitalized - severe disease (ventilation and additional organ support, i.e. pressors, RRT, ECMO) -Uninfected (no clinical or virological evidence of infection)" -covid19_vaccine_doses,Diagnosis,False,string,The number of COVID-19 vaccine doses the patient has received.,,,"Enum: -No -Unknown -Yes, four doses -Yes, one dose -Yes, three doses -Yes, two doses" -alcohol_consumption,Exposure,False,string,Indicate current alcohol consumtion based on recommended daily limit.,"['Refer to the Canadian Centre on Substance Abuse, for men, no more than 15 drinks a week, with no more than 3 drinks a day most days. No more than 4 drinks on any single occasion. For women, no more than 10 drinks a week, with no more than 2 drinks a day most days. No more than 3 drinks on any single occasion.']",,"Enum: -Alcohol consumption unknown -Alcohol intake exceeds recommended daily limit -Alcohol intake within recommended daily limit -No alcohol consumption" -smoking_status,Exposure,False,string,Indicate smoking status and history of a participant at the time of the data collection.,,,"Enum: -Current reformed smoker for <= 15 years -Current reformed smoker for > 15 years -Current reformed smoker, duration not specified -Current smoker -Lifelong non-smoker (<100 cigarettes smoked in lifetime) -Not applicable -Smoking history not documented" -physical_activity,Exposure,False,string,Indicate how many times per week the participant exercises for at least 30 minutes.,,,"Enum: -1-3 times a month -Every day -Less than once a month -Most days but not every day -Never -Not applicable -Once or twice a week -Unknown" -age_at_followup,Follow_up,False,integer,Participant's age (in days) at time of the follow up event,,, -disease_status_at_followup,Follow_up,True,string,Indicate the participant's disease status at time of follow-up,,,"Enum: -Complete remission -Distant progression -Loco-regional progression -No evidence of disease -Partial remission -Progression NOS -Relapse or recurrence -Stable" -age_at_measurement,Measurement,False,integer,Age (in days) of the participant at the time the lab test or measurement was conducted.,,, -measurement_code,Measurement,True,string,"Use standartized LOINC (Logical Observation Identifiers Names and Codes) code to represent quantitative, ordinal, or categorical measurements.",['Provide code in Compact URI (CURIE) pattern. LOINC URL: https://loinc.org/{code}'],,"pattern:^LOINC:[0-9]{1,5}-[0-9]$" -measurement_term,Measurement,False,string,Provide the standardized and human readable term derived from the coding system associated with the measurement_code,,, -measurement_source_text,Measurement,False,string,Provide the original textual description of the measurement as reported in the participant's clinical or medical records if applicable,,, -measurement_result_numeric,Measurement,False,decimal,The numeric result of the lab test or measurement.,,, -measurement_unit,Measurement,False,string,Indicate the unit of measurement for the result using LOINC code.,,, -measurement_result_categorical,Measurement,False,string,The categorical result of the lab test or measurement.,,, -submitter_treatment_id,Treatment,True,string,"Unique identifier of the treatment, assigned by the data provider.",,,"pattern:^[A-Za-z0-9\-\._]{1,64}$" -drug_source_text,Medication,False,string,Provide the original text from medical records describing the drug used in the treatment if applicable.,,, -drug_code,Medication,True,string,"Provide the standardized code from RxNorm, KEGG, PubChem or NCIt to represent the drug.",['Provide code in Compact URI (CURIE) pattern. RxNorm URL: https://rxnav.nlm.nih.gov/REST/rxcui/{code} KEGG URL: https://www.kegg.jp/entry/{code} PubChem URL: https://pubchem.ncbi.nlm.nih.gov/compound/{code} NCIt URL: https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=C{code}'],,"pattern:^RxNorm:\d{1,8}$|^KEGG:D\\d{5}$|^PubChem:\\d{1,9}$|^NCIt:C\\d{1,7}$" -drug_term,Medication,False,string,Provide the standardized and human readable term derived from the coding system associated with the drug_code,,, -drug_dose_units,Medication,False,string,Indicate units used to record drug dose.,,,"Enum: -IU/kg -IU/m2 -Not available -g/m2 -mg -mg/kg -mg/m2 -ug/m2" -prescribed_cumulative_drug_dose,Medication,False,decimal,Indicate the total prescribed cumulative drug dose in the same units specified in drug_dose_units.,,, -actual_cumulative_drug_dose,Medication,False,decimal,Indicate the total actual cumulative drug dose in the same units specified in drug_dose_units.,,, -age_at_enrollment,Participant,False,integer,Age (in days) of participant at time of enrollment into the study,,, -vital_status,Participant,False,string,Participant's last known state of living or deceased.,,"CQDG:vital_status -Phenopacket:individual.vital_status -BeaconV2:individual.vital_status","Enum: -Alive -Deceased -Missing -Not Applicable -Not Collected -Not Provided -Restricted Access" -cause_of_death,Participant,False,string,Indicate the cause of a participant's death.,,,"Enum: -Died of cancer -Died of other reasons -Missing -Not Applicable -Not Collected -Not Provided -Restricted Access" -age_at_death,Participant,False,integer,Age of participant (in days) at time of death,,, -age_at_phenotype,Phenotype,False,integer,Participant's age (in days) when phenotype was observed,,, -phenotype_code,Phenotype,True,string,Use standardized HPO (Human Phenotype Ontology) codes to represent the phenotype.,['Provide code in Compact URI (CURIE) pattern. HPO URL: https://hpo.jax.org/app/browse/term/HP:'],,pattern:^HP:[0-9]{7}$ -phenotype_term,Phenotype,False,string,Provide the standardized and human readable term derived from the coding system associated with the phenotype_code.,,, -phenotype_source_text,Phenotype,False,string,Provide the original textual description of the phenotype as reported in the participant's clinical or medical records if applicable,,, -phenotype_observed,Phenotype,True,string,Indicate whether the phenotype was observed in the participant.,,,"Enum: -No -Unknown -Yes" -phenotype_duration,Phenotype,False,integer,Indicate the length of time (in days) over which the phenotype was observed in the participant.,,, -phenotype_severity,Phenotype,False,string,The degree or severity of the observed phenotype.,['Permissible values from https://hpo.jax.org/browse/term/HP:0012824'],,"Enum: -Borderline -Mild -Moderate -Profound -Severe" -procedure_source_text,Procedure,False,string,"The original texual description of the procedure performed, as recorded in clinical notes or medical records if applicable.",,, -procedure_code,Procedure,True,string,"Use code from NCIt, SNOMED-CT, UMLS or CCI to represent the procedure performed.",['Provide code in Compact URI (CURIE) pattern. NCIt URL: https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=C SNOMED-CT URL: http://snomed.info/id/ UMLS URL: https://uts.nlm.nih.gov/uts/umls/concept/'],,"pattern:^NCIt:C\d{1,7}$|^SNOMEDCT:\d{6,9}$|^UMLS:C\d{7}$|^CCI:\d\.[A-Z]{2}\.\d{2}\.[A-Z]{2}$" -procedure_term,Procedure,False,string,Provide the standardized and human readable term derived from the coding system associated with the procedure_code,,, -procedure_body_site_code,Procedure,False,string,Indicate the ICD-O-3 topography code to describe the site of the surgery if applicable. Please use C80.9 if the site is Unkown.,"['Refer to the International Classification of Diseases for Oncology, 3rd Edition (WHO ICD-O-3) manual for guidelines at https://apps.who.int/iris/handle/10665/42344']","mCODE.STU3:Specimen.collection.bodySite -FHIR:Specimen.collection.bodySite",pattern:^[C][0-9]{2}(.[0-9]{1})?$ -procedure_body_site_term,Procedure,False,string,Provide the standardized and human readable term derived from the coding system associated with the procedure_body_site_code,,, -procedure_body_site_source_text,Procedure,False,string,Provide the original textual description of the body site of the procedure as reported in the participant's clinical or medical records if applicable,,, -submitter_specimen_id,Specimen,True,string,"Unique identifier of the specimen within the study, assigned by the data provider.",,"CQDG:submitter_biospecimen_id -mCODE.STU3:Specimen.Identifier -FHIR:Specimen.Identifier -Phenopacket:biosample.id -BeaconV2:biosample.id","pattern:^[A-Za-z0-9\-\._]{1,64}$" -specimen_tissue_source_code,Specimen,True,string,Indicate the tissue source of the specimen from which a biopsy or other tissue specimen was obtained. Use codes from NCIt (NCI Thesaurus) or SNOMED-CT (SNOMED Clinical Terms) in Compact URI (CURIE) pattern.,,"CQDG:biospecimen_tissue_source -mCODE.STU3:Specimen.type -FHIR:Specimen.type -Phenopacket:biosample.sampled_tissue -BeaconV2:biosample.sampled_tissue",pattern:^NCIT:C[0-9]+$|^SNOMEDCT:[0-9]+$ -specimen_tissue_source_term,Specimen,False,string,Indicate the human readable label for the specimen tissue source code.,,, -specimen_tissue_source_text,Specimen,False,string,Indicate the specimen tissue source as reported in the participant's source data.,,, -specimen_storage,Specimen,False,string,Indicate the method of long term storage for specimen that were not extracted freshly or immediately cultured.,,"Phenopacket:biosample.sample_storage -BeaconV2:biosample.sample_storage","Enum: -Cut slide -Frozen in -70 freezer -Frozen in liquid nitrogen -Frozen in vapour phase -Not Applicable -Other -Paraffin block -RNA later frozen -Unknown" -specimen_processing,Specimen,False,string,Indicate the technique used to process specimen.,,"Phenopacket:biosample.sample_processing -BeaconV2:biosample.sample_processing","Enum: -Cryopreservation - other -Cryopreservation in dry ice (dead tissue) -Cryopreservation in liquid nitrogen (dead tissue) -Cryopreservation of live cells in liquid nitrogen -Formalin fixed & paraffin embedded -Formalin fixed - buffered -Formalin fixed - unbuffered -Fresh -Other -Unknown" -age_at_specimen_collection,Specimen,False,integer,Indicate participant's age( in days) when specimen was collected.,,, -specimen_anatomic_location_code,Specimen,False,string,Indicate the ICD-O-3 topography code for the anatomic location of a specimen when it was collected. Please use C80.9 if the primary site of a specimen is Unkown.,"['Refer to the International Classification of Diseases for Oncology, 3rd Edition (WHO ICD-O-3) manual for guidelines at https://apps.who.int/iris/handle/10665/42344.']","mCODE.STU3:Specimen.collection.bodySite -FHIR:Specimen.collection.bodySite",pattern:^[C][0-9]{2}(.[0-9]{1})?$ -specimen_anatomic_location_label,Specimen,False,string,Provide the human readable label for the specimen anatomic location code.,,, -specimen_anatomic_location_source_text,Specimen,False,string,Provide the specimen anatomic location as reported in the participant's source data if applicable,,, -specimen_laterality,Specimen,False,string,"For cancer in a paired organ, indicate the side on which the specimen was obtained.",['Reference caDSR CDE ID 2007875 https://cdebrowser.nci.nih.gov/cdebrowserClient/cdeBrowser.html#/search?publicId=2007875&version=2.0'],"mCODE.STU3:Specimen.collection.bodySite.extension.lateralityQualifier -FHIR:Specimen.collection.bodySite.extension.lateralityQualifier","Enum: -Left -Not applicable -Right -Unknown" -treatment_type,Treatment,True,string,The category or method of treatment administered,,,"Enum: -Medication -Pharmacotherapy -Procedure -Radiation therapy" -age_at_treatment,Treatment,False,integer,Age (in days) of the participant at the time the treatment was administered.,,, -treatment_duration,Treatment,False,integer,The length of time (in days) over which the treatment was administered.,,, -treatment_intent,Treatment,False,string,The purpose of the treatment or the desired effect or outcome resulting from the treatment.,,,"Enum: -Curative -Diagnostic -Forensic -Guidance -Palliative -Preventative -Screening -Supportive" -treatment_response,Treatment,False,string,"The outcome of the treatment, indicating how the patient responded to the intervention",,,"Enum: -Clinical remission -Disease Progression -Improvement of symptoms -No improvement of symptoms -No sign of disease -Not applicable -Partial Response -Stable Disease -Treatment cessation due to toxicity -Worsening of symptoms" -treatment_status,Treatment,False,string,Indicate the donor's status of the prescribed treatment.,,,"Enum: -Not available -Other -Patient choice (stopped or interrupted treatment) -Physician decision (stopped or interrupted treatment) -Treatment completed as prescribed -Treatment incomplete because patient died -Treatment incomplete due to technical or organizational problems -Treatment ongoing -Treatment stopped due to acute toxicity -Treatment stopped due to lack of efficacy (disease progression)" -image_hosted_url,Imaging,False,string,A URL for where the image is hosted,,, -image_hosted_format,Imaging,False,string,Accepted data types for images,,,"Enum: -PNG -SVG -PDF -JPEG" -image_processing_pipeline,Imaging,False,string,"Post processing pipeline dependent on image_hosted_format where: - - image_hosted_format is PNG, choices are -* PipelineA -* PipelineB -* PipelineC - - image_hosted_format is SVG, choices are -* PipelineA -* PipelineD -* PipelineE - - image_hosted_format is PDF or JPEG, choices are -* PipelineA -* PipelineD -* PipelineF -",,,"Enum: -pipelineA -pipelineB -pipelineC -pipelineD -pipelineE -pipelineF" -image_processing_personel,Imaging,False,string,"String value for the person who processed the image. -Must be provided.If not 'image_processing_null_reason' must be provided",,, -image_processing_null_reason,Imaging,False,string,"Tracks reason for why image processing personnel was not provided. -Only applicable if image_processing_personnel is not provided. -Possible values: -* Unknown -* Revoked -* Not Provided -Otherwise leave empty",,,"Enum: -Unknown -Not Provided -Revoked" diff --git a/csv/template/example/comorbidity_template.tsv b/csv/template/example/comorbidity_template.tsv index 7d78bbe..f13ddf7 100644 --- a/csv/template/example/comorbidity_template.tsv +++ b/csv/template/example/comorbidity_template.tsv @@ -1 +1 @@ - submitter_participant_id age_at_comorbidity_diagnosis comorbidity_code comorbidity_term comorbidity_treatment_status comorbidity_status +submitter_participant_id age_at_comorbidity_diagnosis comorbidity_code comorbidity_term comorbidity_treatment_status comorbidity_status diff --git a/csv/template/example/demographic_template.tsv b/csv/template/example/demographic_template.tsv index 2bc9f5e..3c0d835 100644 --- a/csv/template/example/demographic_template.tsv +++ b/csv/template/example/demographic_template.tsv @@ -1 +1 @@ - study_id submitter_participant_id gender sex_at_birth ethnicity race country_of_birth ancestry height weight highest_education_level_achieved employment type_of_residence number_of_other_people_in_household pregnancy +study_id submitter_participant_id gender sex_at_birth ethnicity race country_of_birth ancestry height weight highest_education_level_achieved employment type_of_residence number_of_other_people_in_household pregnancy diff --git a/csv/template/example/diagnosis_template.tsv b/csv/template/example/diagnosis_template.tsv index b1720b3..741139b 100644 --- a/csv/template/example/diagnosis_template.tsv +++ b/csv/template/example/diagnosis_template.tsv @@ -1 +1 @@ - submitter_participant_id submitter_diagnosis_id age_at_diagnosis disease_code disease_term disease_category covid19_severity covid19_vaccine_doses +submitter_participant_id submitter_diagnosis_id age_at_diagnosis disease_code disease_term disease_category covid19_severity covid19_vaccine_doses diff --git a/csv/template/example/exposure_template.tsv b/csv/template/example/exposure_template.tsv index c37190f..3d9c305 100644 --- a/csv/template/example/exposure_template.tsv +++ b/csv/template/example/exposure_template.tsv @@ -1 +1 @@ - submitter_participant_id alcohol_consumption smoking_status physical_activity +submitter_participant_id alcohol_consumption smoking_status physical_activity diff --git a/csv/template/example/follow_up_template.tsv b/csv/template/example/follow_up_template.tsv index 6f7c9ef..95bebb6 100644 --- a/csv/template/example/follow_up_template.tsv +++ b/csv/template/example/follow_up_template.tsv @@ -1 +1 @@ - submitter_participant_id age_at_followup disease_status_at_followup +submitter_participant_id age_at_followup disease_status_at_followup diff --git a/csv/template/example/imaging_template.tsv b/csv/template/example/imaging_template.tsv index f272e6b..fc080d1 100644 --- a/csv/template/example/imaging_template.tsv +++ b/csv/template/example/imaging_template.tsv @@ -1 +1 @@ - image_hosted_url image_hosted_format image_processing_pipeline image_processing_personel image_processing_null_reason +image_hosted_url image_hosted_format image_processing_pipeline image_processing_personel image_processing_null_reason diff --git a/csv/template/example/measurement_template.tsv b/csv/template/example/measurement_template.tsv index 3a21a73..3cb217d 100644 --- a/csv/template/example/measurement_template.tsv +++ b/csv/template/example/measurement_template.tsv @@ -1 +1 @@ - submitter_participant_id age_at_measurement measurement_code measurement_term measurement_result_numeric measurement_unit measurement_result_categorical +submitter_participant_id age_at_measurement measurement_code measurement_term measurement_result_numeric measurement_unit measurement_result_categorical diff --git a/csv/template/example/medication_template.tsv b/csv/template/example/medication_template.tsv index 35eee3b..65041a5 100644 --- a/csv/template/example/medication_template.tsv +++ b/csv/template/example/medication_template.tsv @@ -1 +1 @@ - submitter_treatment_id drug_code drug_term drug_dose_units prescribed_cumulative_drug_dose actual_cumulative_drug_dose +submitter_treatment_id drug_code drug_term drug_dose_units prescribed_cumulative_drug_dose actual_cumulative_drug_dose diff --git a/csv/template/example/participant_template.tsv b/csv/template/example/participant_template.tsv index d00f56a..f59d6c1 100644 --- a/csv/template/example/participant_template.tsv +++ b/csv/template/example/participant_template.tsv @@ -1 +1 @@ - study_id submitter_participant_id age_at_enrollment vital_status cause_of_death age_at_death +study_id submitter_participant_id age_at_enrollment vital_status cause_of_death age_at_death diff --git a/csv/template/example/phenotype_template.tsv b/csv/template/example/phenotype_template.tsv index 46e03c7..04095d7 100644 --- a/csv/template/example/phenotype_template.tsv +++ b/csv/template/example/phenotype_template.tsv @@ -1 +1 @@ - submitter_participant_id age_at_phenotype phenotype_code phenotype_term phenotype_observed phenotype_duration phenotype_severity +submitter_participant_id age_at_phenotype phenotype_code phenotype_term phenotype_observed phenotype_duration phenotype_severity diff --git a/csv/template/example/procedure_template.tsv b/csv/template/example/procedure_template.tsv index 9340183..2e93b9a 100644 --- a/csv/template/example/procedure_template.tsv +++ b/csv/template/example/procedure_template.tsv @@ -1 +1 @@ - submitter_treatment_id procedure_code procedure_term procedure_body_site_code procedure_body_site_term +submitter_treatment_id procedure_code procedure_term procedure_body_site_code procedure_body_site_term diff --git a/csv/template/example/specimen_template.tsv b/csv/template/example/specimen_template.tsv index b4de82f..7a3cc50 100644 --- a/csv/template/example/specimen_template.tsv +++ b/csv/template/example/specimen_template.tsv @@ -1 +1 @@ - submitter_participant_id submitter_specimen_id specimen_tissue_source_code specimen_tissue_source_term specimen_storage specimen_processing age_at_specimen_collection specimen_anatomic_location_code specimen_anatomic_location_label specimen_laterality +submitter_participant_id submitter_specimen_id specimen_tissue_source_code specimen_tissue_source_term specimen_storage specimen_processing age_at_specimen_collection specimen_anatomic_location_code specimen_anatomic_location_label specimen_laterality diff --git a/csv/template/example/treatment_template.tsv b/csv/template/example/treatment_template.tsv index 7a2a8e1..3b80a54 100644 --- a/csv/template/example/treatment_template.tsv +++ b/csv/template/example/treatment_template.tsv @@ -1 +1 @@ - submitter_participant_id submitter_treatment_id treatment_type age_at_treatment treatment_duration treatment_intent treatment_response treatment_status +submitter_participant_id submitter_treatment_id treatment_type age_at_treatment treatment_duration treatment_intent treatment_response treatment_status diff --git a/scripts/generateFlatDefinitionsTsvFromFullLinkml.py b/scripts/generateFlatDefinitionsTsvFromFullLinkml.py new file mode 100644 index 0000000..9cc9563 --- /dev/null +++ b/scripts/generateFlatDefinitionsTsvFromFullLinkml.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" + Copyright (C) 2022, icgc-argo + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + + Authors: + Edmund Su +""" + +import json +import glob +import urllib +import requests +import re +import numpy as np +import os +import random +import jsonschema +import string +import time +import random +import hashlib +import shutil +import argparse +import copy +import pandas as pd +from linkml_runtime.loaders import yaml_loader +from linkml_runtime.dumpers import yaml_dumper +from linkml_runtime.linkml_model import SchemaDefinition, ClassDefinition + +def main(): + """ + The script aims to translate the FULL linkML model to lectern syntax. + """ + parser = argparse.ArgumentParser(description='The script aims to translate the FULL linkML model to flatten TSV for viewing.') + parser.add_argument('-c', '--custom_linkml', dest="custom_linkml", help="The custom full LinkML schema", required=True,type=str) + parser.add_argument('-o', '--output_directory', dest="output_directory", help="Output directory to save the Lectern JSON schema", default=os.getcwd(),type=str) + + + cli_input= parser.parse_args() + + if not cli_input.custom_linkml.endswith("_full.yaml"): + print("%s does not end with the correct suffix. Please check the correct yaml was provided." % (cli_input.custom_linkml)) + + model=yaml_loader.load(cli_input.custom_linkml, SchemaDefinition) + + definitions=initialize_dataframe() + + populateDataFrame(model,definitions) + + definitions['schema']=[val.lower() for val in definitions['schema'].values.tolist()] + definitions.to_csv("%s/%s" % (cli_input.output_directory,cli_input.custom_linkml.split('/')[-1].replace("_full.yaml","_flattened.tsv")),index=False,sep='\t') + +def initialize_dataframe(): + df=pd.DataFrame() + df['field']=None + df['schema']=None + df['required']=None + df['dataType']=None + df['description']=None + df['comments']=None + df['exact_mappings']=None + return df + +def populateDataFrame(model,definitions): + count=0 + for lm_class in model.classes: + for slot in model.classes[lm_class]['slots']: + definitions.loc[count,"field"]=slot + definitions.loc[count,"schema"]=lm_class + count+=1 + + for ind in definitions.index.values.tolist(): + slot=definitions.loc[ind,"field"] + key="required" + if key in model.slots[slot] and model.slots[slot][key]!=None: + definitions.loc[ind,key]=model.slots[slot][key] + else: + definitions.loc[ind,key]=False + + key="range" + if key in model.slots[slot] and model.slots[slot][key]!=None: + if "Menu" in model.slots[slot][key]: + definitions.loc[ind,"dataType"]="string" + else: + definitions.loc[ind,"dataType"]=model.slots[slot][key] + else: + definitions.loc[ind,"dataType"]=False + + key="description" + if key in model.slots[slot] and model.slots[slot][key]!=None: + definitions.loc[ind,key]=model.slots[slot][key] + else: + definitions.loc[ind,key]=None + + key="comments" + if key in model.slots[slot] and len(model.slots[slot][key])!=0: + definitions.loc[ind,key]=model.slots[slot][key] + else: + definitions.loc[ind,key]=None + + key="exact_mappings" + if key in model.slots[slot] and len(model.slots[slot][key])!=0: + definitions.loc[ind,key]=";".join(model.slots[slot][key]) + else: + definitions.loc[ind,key]=None + + validation_rules=[] + + key="pattern" + if key in model.slots[slot] and model.slots[slot][key]!=None: + #print(key,ind) + validation_rules.append("%s:%s" % (key,model.slots[slot][key])) + #print(validation_rules) + + key="minimum_value" + if key in model.slots[slot] and model.slots[slot][key]!=None: + #print(key,ind) + validation_rules.append("%s:%s" % (key,str(model.slots[slot][key]))) + #print(validation_rules) + + key="maximum_value" + if key in model.slots[slot] and model.slots[slot][key]!=None: + #print(key,ind) + validation_rules.append("%s:%s" % (key,str(model.slots[slot][key]))) + #print(validation_rules) + + key="range" + if "Menu" in model.slots[slot][key]: + #print(key,ind) + enum_list=[] + #validation_rules.append("%s:" % ("Enum")) + for enum_value in model.enums[model.slots[slot][key]]['permissible_values']: + enum_list.append(enum_value) + validation_rules.append("%s:%s" % ("Enum",",".join(enum_list))) + #print(validation_rules) + + if len(validation_rules)>0: + definitions.loc[ind,"validation"]=";".join(validation_rules) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/generateTemplateTsvFromFullLinkml.py b/scripts/generateTemplateTsvFromFullLinkml.py new file mode 100644 index 0000000..51c33af --- /dev/null +++ b/scripts/generateTemplateTsvFromFullLinkml.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" + Copyright (C) 2022, icgc-argo + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + + Authors: + Edmund Su +""" + +import json +import glob +import urllib +import requests +import re +import numpy as np +import os +import random +import jsonschema +import string +import time +import random +import hashlib +import shutil +import argparse +import copy +import pandas as pd +from linkml_runtime.loaders import yaml_loader +from linkml_runtime.dumpers import yaml_dumper +from linkml_runtime.linkml_model import SchemaDefinition, ClassDefinition + +def main(): + """ + The script aims to translate the FULL linkML model to lectern syntax. + """ + parser = argparse.ArgumentParser(description='The script aims to translate the FULL linkML model to flatten TSV for viewing.') + parser.add_argument('-c', '--custom_linkml', dest="custom_linkml", help="The custom full LinkML schema", required=True,type=str) + parser.add_argument('-o', '--output_directory', dest="output_directory", help="Output directory to save the Lectern JSON schema", default=os.getcwd(),type=str) + + + cli_input= parser.parse_args() + + if not cli_input.custom_linkml.endswith("_full.yaml"): + print("%s does not end with the correct suffix. Please check the correct yaml was provided." % (cli_input.custom_linkml)) + + model=yaml_loader.load(cli_input.custom_linkml, SchemaDefinition) + + templates={} + for lm_class in model.classes: + templates[lm_class]=pd.DataFrame() + populateDataFrame(model,templates[lm_class],lm_class) + + for key in templates.keys(): + templates[key].to_csv("%s/%s_template.tsv" % (cli_input.output_directory,key.lower()),sep='\t',index=False) + + + #templates=initialize_dataframe() + + #populateDataFrame(model,definitions) + + #definitions.to_csv("%s/%s" % (cli_input.output_directory,cli_input.custom_linkml.split('/')[-1].replace("_full.yaml","_flattened.tsv")),index=True,sep='\t') + + +def populateDataFrame(model,template,key): + for slot in model.classes[key]['slots']: + template[slot]=None + +if __name__ == "__main__": + main() \ No newline at end of file