Pan-Canadian-Genome-Library · edsu7 · Jan 22, 2025 · Jan 24, 2025
diff --git a/csv/definitions/example/example_flattened.tsv b/csv/definitions/example/example_flattened.tsv
diff --git a/csv/definitions/pcgl/pcgl_flattened.tsv b/csv/definitions/pcgl/pcgl_flattened.tsv
diff --git a/csv/example/example_flattened.csv b/csv/example/example_flattened.csv
diff --git a/csv/template/example/comorbidity_template.tsv b/csv/template/example/comorbidity_template.tsv
@@ -1 +1 @@
-	submitter_participant_id	age_at_comorbidity_diagnosis	comorbidity_code	comorbidity_term	comorbidity_treatment_status	comorbidity_status
+submitter_participant_id	age_at_comorbidity_diagnosis	comorbidity_code	comorbidity_term	comorbidity_treatment_status	comorbidity_status
diff --git a/csv/template/example/demographic_template.tsv b/csv/template/example/demographic_template.tsv
@@ -1 +1 @@
-	study_id	submitter_participant_id	gender	sex_at_birth	ethnicity	race	country_of_birth	ancestry	height	weight	highest_education_level_achieved	employment	type_of_residence	number_of_other_people_in_household	pregnancy
+study_id	submitter_participant_id	gender	sex_at_birth	ethnicity	race	country_of_birth	ancestry	height	weight	highest_education_level_achieved	employment	type_of_residence	number_of_other_people_in_household	pregnancy
diff --git a/csv/template/example/diagnosis_template.tsv b/csv/template/example/diagnosis_template.tsv
@@ -1 +1 @@
-	submitter_participant_id	submitter_diagnosis_id	age_at_diagnosis	disease_code	disease_term	disease_category	covid19_severity	covid19_vaccine_doses
+submitter_participant_id	submitter_diagnosis_id	age_at_diagnosis	disease_code	disease_term	disease_category	covid19_severity	covid19_vaccine_doses
diff --git a/csv/template/example/exposure_template.tsv b/csv/template/example/exposure_template.tsv
@@ -1 +1 @@
-	submitter_participant_id	alcohol_consumption	smoking_status	physical_activity
+submitter_participant_id	alcohol_consumption	smoking_status	physical_activity
diff --git a/csv/template/example/follow_up_template.tsv b/csv/template/example/follow_up_template.tsv
@@ -1 +1 @@
-	submitter_participant_id	age_at_followup	disease_status_at_followup
+submitter_participant_id	age_at_followup	disease_status_at_followup
diff --git a/csv/template/example/imaging_template.tsv b/csv/template/example/imaging_template.tsv
@@ -1 +1 @@
-	image_hosted_url	image_hosted_format	image_processing_pipeline	image_processing_personel	image_processing_null_reason
+image_hosted_url	image_hosted_format	image_processing_pipeline	image_processing_personel	image_processing_null_reason
diff --git a/csv/template/example/measurement_template.tsv b/csv/template/example/measurement_template.tsv
@@ -1 +1 @@
-	submitter_participant_id	age_at_measurement	measurement_code	measurement_term	measurement_result_numeric	measurement_unit	measurement_result_categorical
+submitter_participant_id	age_at_measurement	measurement_code	measurement_term	measurement_result_numeric	measurement_unit	measurement_result_categorical
diff --git a/csv/template/example/medication_template.tsv b/csv/template/example/medication_template.tsv
@@ -1 +1 @@
-	submitter_treatment_id	drug_code	drug_term	drug_dose_units	prescribed_cumulative_drug_dose	actual_cumulative_drug_dose
+submitter_treatment_id	drug_code	drug_term	drug_dose_units	prescribed_cumulative_drug_dose	actual_cumulative_drug_dose
diff --git a/csv/template/example/participant_template.tsv b/csv/template/example/participant_template.tsv
@@ -1 +1 @@
-	study_id	submitter_participant_id	age_at_enrollment	vital_status	cause_of_death	age_at_death
+study_id	submitter_participant_id	age_at_enrollment	vital_status	cause_of_death	age_at_death
diff --git a/csv/template/example/phenotype_template.tsv b/csv/template/example/phenotype_template.tsv
@@ -1 +1 @@
-	submitter_participant_id	age_at_phenotype	phenotype_code	phenotype_term	phenotype_observed	phenotype_duration	phenotype_severity
+submitter_participant_id	age_at_phenotype	phenotype_code	phenotype_term	phenotype_observed	phenotype_duration	phenotype_severity
diff --git a/csv/template/example/procedure_template.tsv b/csv/template/example/procedure_template.tsv
@@ -1 +1 @@
-	submitter_treatment_id	procedure_code	procedure_term	procedure_body_site_code	procedure_body_site_term
+submitter_treatment_id	procedure_code	procedure_term	procedure_body_site_code	procedure_body_site_term
diff --git a/csv/template/example/specimen_template.tsv b/csv/template/example/specimen_template.tsv
@@ -1 +1 @@
-	submitter_participant_id	submitter_specimen_id	specimen_tissue_source_code	specimen_tissue_source_term	specimen_storage	specimen_processing	age_at_specimen_collection	specimen_anatomic_location_code	specimen_anatomic_location_label	specimen_laterality
+submitter_participant_id	submitter_specimen_id	specimen_tissue_source_code	specimen_tissue_source_term	specimen_storage	specimen_processing	age_at_specimen_collection	specimen_anatomic_location_code	specimen_anatomic_location_label	specimen_laterality
diff --git a/csv/template/example/treatment_template.tsv b/csv/template/example/treatment_template.tsv
@@ -1 +1 @@
-	submitter_participant_id	submitter_treatment_id	treatment_type	age_at_treatment	treatment_duration	treatment_intent	treatment_response	treatment_status
+submitter_participant_id	submitter_treatment_id	treatment_type	age_at_treatment	treatment_duration	treatment_intent	treatment_response	treatment_status
diff --git a/scripts/generateFlatDefinitionsTsvFromFullLinkml.py b/scripts/generateFlatDefinitionsTsvFromFullLinkml.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+  Copyright (C) 2022,  icgc-argo
+
+  This program is free software: you can redistribute it and/or modify
+  it under the terms of the GNU Affero General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU Affero General Public License for more details.
+
+  You should have received a copy of the GNU Affero General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+  Authors:
+    Edmund Su
+"""
+
+import json
+import glob
+import urllib
+import requests
+import re
+import numpy as np
+import os
+import random
+import jsonschema
+import string
+import time
+import random
+import hashlib
+import shutil
+import argparse
+import copy
+import pandas as pd
+from linkml_runtime.loaders import yaml_loader
+from linkml_runtime.dumpers import yaml_dumper
+from linkml_runtime.linkml_model import SchemaDefinition, ClassDefinition
+
+def main():
+  """
+  The script aims to translate the FULL linkML model to lectern syntax.
+  """
+  parser = argparse.ArgumentParser(description='The script aims to translate the FULL linkML model to flatten TSV for viewing.')
+  parser.add_argument('-c', '--custom_linkml', dest="custom_linkml", help="The custom full LinkML schema", required=True,type=str)
+  parser.add_argument('-o', '--output_directory', dest="output_directory", help="Output directory to save the Lectern JSON schema", default=os.getcwd(),type=str)
+
+
+  cli_input= parser.parse_args()
+
+  if not cli_input.custom_linkml.endswith("_full.yaml"):
+    print("%s does not end with the correct suffix. Please check the correct yaml was provided." % (cli_input.custom_linkml))
+
+  model=yaml_loader.load(cli_input.custom_linkml, SchemaDefinition)
+
+  definitions=initialize_dataframe()
+
+  populateDataFrame(model,definitions)
+
+  definitions['schema']=[val.lower() for val in definitions['schema'].values.tolist()]
+  definitions.to_csv("%s/%s" % (cli_input.output_directory,cli_input.custom_linkml.split('/')[-1].replace("_full.yaml","_flattened.tsv")),index=False,sep='\t')
+
+def initialize_dataframe():
+  df=pd.DataFrame()
+  df['field']=None
+  df['schema']=None
+  df['required']=None
+  df['dataType']=None
+  df['description']=None
+  df['comments']=None
+  df['exact_mappings']=None
+  return df
+
+def populateDataFrame(model,definitions):
+  count=0
+  for lm_class in model.classes:
+      for slot in model.classes[lm_class]['slots']:
+          definitions.loc[count,"field"]=slot
+          definitions.loc[count,"schema"]=lm_class
+          count+=1
+
+  for ind in definitions.index.values.tolist():
+      slot=definitions.loc[ind,"field"]
+      key="required"
+      if key in model.slots[slot] and model.slots[slot][key]!=None:
+          definitions.loc[ind,key]=model.slots[slot][key]
+      else:
+          definitions.loc[ind,key]=False
+
+      key="range"
+      if key in model.slots[slot] and model.slots[slot][key]!=None:
+          if "Menu" in model.slots[slot][key]:
+              definitions.loc[ind,"dataType"]="string"
+          else:
+              definitions.loc[ind,"dataType"]=model.slots[slot][key]
+      else:
+          definitions.loc[ind,"dataType"]=False
+
+      key="description"
+      if key in model.slots[slot] and model.slots[slot][key]!=None:
+          definitions.loc[ind,key]=model.slots[slot][key]
+      else:
+          definitions.loc[ind,key]=None
+
+      key="comments"
+      if key in model.slots[slot] and len(model.slots[slot][key])!=0:
+          definitions.loc[ind,key]=model.slots[slot][key]
+      else:
+          definitions.loc[ind,key]=None
+
+      key="exact_mappings"
+      if key in model.slots[slot] and len(model.slots[slot][key])!=0:
+          definitions.loc[ind,key]=";".join(model.slots[slot][key])
+      else:
+          definitions.loc[ind,key]=None
+
+      validation_rules=[]
+
+      key="pattern"
+      if key in model.slots[slot] and model.slots[slot][key]!=None:
+          #print(key,ind)
+          validation_rules.append("%s:%s" % (key,model.slots[slot][key]))
+          #print(validation_rules)
+
+      key="minimum_value"
+      if key in model.slots[slot] and model.slots[slot][key]!=None:
+          #print(key,ind)
+          validation_rules.append("%s:%s" % (key,str(model.slots[slot][key])))
+          #print(validation_rules)
+
+      key="maximum_value"
+      if key in model.slots[slot] and model.slots[slot][key]!=None:
+          #print(key,ind)
+          validation_rules.append("%s:%s" % (key,str(model.slots[slot][key])))
+          #print(validation_rules)
+
+      key="range"
+      if "Menu" in model.slots[slot][key]:
+          #print(key,ind)
+          enum_list=[]
+          #validation_rules.append("%s:" % ("Enum"))
+          for enum_value in model.enums[model.slots[slot][key]]['permissible_values']:
+              enum_list.append(enum_value)
+          validation_rules.append("%s:%s" % ("Enum",",".join(enum_list)))
+          #print(validation_rules)
+
+      if len(validation_rules)>0:
+          definitions.loc[ind,"validation"]=";".join(validation_rules)
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/generateTemplateTsvFromFullLinkml.py b/scripts/generateTemplateTsvFromFullLinkml.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+  Copyright (C) 2022,  icgc-argo
+
+  This program is free software: you can redistribute it and/or modify
+  it under the terms of the GNU Affero General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU Affero General Public License for more details.
+
+  You should have received a copy of the GNU Affero General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+  Authors:
+    Edmund Su
+"""
+
+import json
+import glob
+import urllib
+import requests
+import re
+import numpy as np
+import os
+import random
+import jsonschema
+import string
+import time
+import random
+import hashlib
+import shutil
+import argparse
+import copy
+import pandas as pd
+from linkml_runtime.loaders import yaml_loader
+from linkml_runtime.dumpers import yaml_dumper
+from linkml_runtime.linkml_model import SchemaDefinition, ClassDefinition
+
+def main():
+  """
+  The script aims to translate the FULL linkML model to lectern syntax.
+  """
+  parser = argparse.ArgumentParser(description='The script aims to translate the FULL linkML model to flatten TSV for viewing.')
+  parser.add_argument('-c', '--custom_linkml', dest="custom_linkml", help="The custom full LinkML schema", required=True,type=str)
+  parser.add_argument('-o', '--output_directory', dest="output_directory", help="Output directory to save the Lectern JSON schema", default=os.getcwd(),type=str)
+
+
+  cli_input= parser.parse_args()
+
+  if not cli_input.custom_linkml.endswith("_full.yaml"):
+    print("%s does not end with the correct suffix. Please check the correct yaml was provided." % (cli_input.custom_linkml))
+
+  model=yaml_loader.load(cli_input.custom_linkml, SchemaDefinition)
+
+  templates={}
+  for lm_class in model.classes:
+    templates[lm_class]=pd.DataFrame()
+    populateDataFrame(model,templates[lm_class],lm_class)
+
+  for key in templates.keys():
+    templates[key].to_csv("%s/%s_template.tsv" % (cli_input.output_directory,key.lower()),sep='\t',index=False)
+
+
+  #templates=initialize_dataframe()
+
+  #populateDataFrame(model,definitions)
+
+  #definitions.to_csv("%s/%s" % (cli_input.output_directory,cli_input.custom_linkml.split('/')[-1].replace("_full.yaml","_flattened.tsv")),index=True,sep='\t')
+
+
+def populateDataFrame(model,template,key):
+  for slot in model.classes[key]['slots']:
+      template[slot]=None
+
+if __name__ == "__main__":
+    main()
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		submitter_participant_id age_at_comorbidity_diagnosis comorbidity_code comorbidity_term comorbidity_treatment_status comorbidity_status
		submitter_participant_id age_at_comorbidity_diagnosis comorbidity_code comorbidity_term comorbidity_treatment_status comorbidity_status
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		study_id submitter_participant_id gender sex_at_birth ethnicity race country_of_birth ancestry height weight highest_education_level_achieved employment type_of_residence number_of_other_people_in_household pregnancy
		study_id submitter_participant_id gender sex_at_birth ethnicity race country_of_birth ancestry height weight highest_education_level_achieved employment type_of_residence number_of_other_people_in_household pregnancy
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		submitter_participant_id submitter_diagnosis_id age_at_diagnosis disease_code disease_term disease_category covid19_severity covid19_vaccine_doses
		submitter_participant_id submitter_diagnosis_id age_at_diagnosis disease_code disease_term disease_category covid19_severity covid19_vaccine_doses
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		submitter_participant_id alcohol_consumption smoking_status physical_activity
		submitter_participant_id alcohol_consumption smoking_status physical_activity
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		submitter_participant_id age_at_followup disease_status_at_followup
		submitter_participant_id age_at_followup disease_status_at_followup
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		image_hosted_url image_hosted_format image_processing_pipeline image_processing_personel image_processing_null_reason
		image_hosted_url image_hosted_format image_processing_pipeline image_processing_personel image_processing_null_reason
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		submitter_participant_id age_at_measurement measurement_code measurement_term measurement_result_numeric measurement_unit measurement_result_categorical
		submitter_participant_id age_at_measurement measurement_code measurement_term measurement_result_numeric measurement_unit measurement_result_categorical
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		submitter_treatment_id drug_code drug_term drug_dose_units prescribed_cumulative_drug_dose actual_cumulative_drug_dose
		submitter_treatment_id drug_code drug_term drug_dose_units prescribed_cumulative_drug_dose actual_cumulative_drug_dose
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		study_id submitter_participant_id age_at_enrollment vital_status cause_of_death age_at_death
		study_id submitter_participant_id age_at_enrollment vital_status cause_of_death age_at_death
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		submitter_participant_id age_at_phenotype phenotype_code phenotype_term phenotype_observed phenotype_duration phenotype_severity
		submitter_participant_id age_at_phenotype phenotype_code phenotype_term phenotype_observed phenotype_duration phenotype_severity