Skip to content

Commit

Permalink
add mutations and molecular profiles
Browse files Browse the repository at this point in the history
  • Loading branch information
LoesvdBiggelaar committed Jun 5, 2024
1 parent b8c8a17 commit 0d2af3e
Show file tree
Hide file tree
Showing 10 changed files with 6,673 additions and 82 deletions.
847 changes: 847 additions & 0 deletions .$model.drawio.bkp

Large diffs are not rendered by default.

4,383 changes: 4,383 additions & 0 deletions api_docs.json

Large diffs are not rendered by default.

69 changes: 42 additions & 27 deletions cbioportal/adapters/cbioportal_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ def get_data_from_api(self, _type):
if _type in [SampleField, samplePatientAssociationField]:
if "samples" in self.api_called: return self.api_called["samples"]
self.api_called["samples"] = []
# studies = self.cbioportal.Studies.getAllStudiesUsingGET().result()
studies = self.check_api_called(StudyField)
print(f"Getting samples of {len(studies)} studies")
for i, study in enumerate(studies):
Expand All @@ -75,12 +74,11 @@ def get_data_from_api(self, _type):
if "sample_lists" in self.api_called: return self.api_called["sample_lists"]
self.api_called["sample_lists"] = self.cbioportal.Sample_Lists.getAllSampleListsUsingGET().result()[0:self.limit]
return self.api_called["sample_lists"]

if _type == GeneField:
if "genes" in self.api_called: return self.api_called["genes"]
self.api_called["genes"] = self.cbioportal.Genes.getAllGenesUsingGET().result()[0:self.limit]
return self.api_called["genes"]
if _type in [GenePanelField, GenePanelGeneAssociationField]:
if _type in [GenePanelField]:
if "gene_panels" in self.api_called: return self.api_called["gene_panels"]
self.api_called["gene_panels"] = self.cbioportal.Gene_Panels.getAllGenePanelsUsingGET().result()[0:self.limit]
return self.api_called["gene_panels"]
Expand All @@ -92,36 +90,42 @@ def get_data_from_api(self, _type):
if "clinical_attributes" in self.api_called: return self.api_called["clinical_attributes"]
self.api_called["clinical_attributes"] = self.cbioportal.Clinical_Attributes.getAllClinicalAttributesUsingGET().result()[0:self.limit]
return self.api_called["clinical_attributes"]
if _type in [PatientSampleStudyEntityField, PatientToPatientSampleStudyEntityField, SampleToPatientSampleStudyEntityField, StudyToPatientSampleStudyEntityField]:
print("not correctly implemeted yet!")
return []

if "patient_sample_study_entities" in self.api_called: return self.api_called["patient_sample_study_entities"]
patients = self.check_api_called(PatientField)
samples = self.check_api_called(SampleField)
studies = self.check_api_called(StudyField)
self.api_called["patient_sample_study_entities"] = []
counter = 0
for _id1 in patients:
for _id2 in samples:
for _id3 in studies:
id = hash(f"{_id1.patientId}_{_id2.sampleId}_{_id3.studyId}")
self.api_called["patient_sample_study_entities"].append({"patientId":_id1.patientId, "sampleId":_id2.sampleId, "studyId":_id3.studyId, "id":id})
counter += 1
if counter == self.limit:
return self.api_called["patient_sample_study_entities"]
return self.api_called["patient_sample_study_entities"]

if _type in [MutationField, mutationToSampleField, mutationToGeneField, mutationToStudyField, mutationToPatientField, mutationToMolecularProfileField]:
if "mutations" in self.api_called: return self.api_called["mutations"]
profiles = self.check_api_called(MolecularProfileField)
# get a list of all molecular profile ids of all the profiles
molecularProfileIds = [profile["molecularProfileId"] for profile in profiles]
print(f"Getting mutations in {len(molecularProfileIds)} molecular profiles")
mutations_per_profile = {}
def get_mutations_in_profile(profile_id):
try:
return self.cbioportal.Mutations.fetchMutationsInMultipleMolecularProfilesUsingPOST(mutationMultipleStudyFilter = {"molecularProfileIds":[profile_id]}).result()[0:self.limit]
except HTTPNotFound:
return []
for i, profile_id in enumerate(molecularProfileIds):
mutations_per_profile[profile_id] = get_mutations_in_profile(profile_id)
print(f"Profile {i+1}/{len(molecularProfileIds)}: {len(mutations_per_profile[profile_id])} mutations")

self.api_called["mutations"] = [mutation for mutations in mutations_per_profile.values() for mutation in mutations]
print(f"found {len(self.api_called['mutations'])} mutations in {len(molecularProfileIds)} molecular profiles")
return self.api_called["mutations"]
if _type == "test":
print(dir(self.cbioportal))
gene_panels = self.cbioportal.Gene_Panels.getAllGenePanelsUsingGET().result()
print(gene_panels[0])


else:
raise ValueError(f"Node type {_type} not supported.")

def _yield_node_type(self, items, node_type):
for item in items:
_id = item[node_type._ID.value]
if isinstance(node_type._ID.value, str):
_id = item[node_type._ID.value]
elif isinstance(node_type._ID.value, list):
_id = str(hash("_".join([str(item[field]) for field in node_type._ID.value])))
else:
raise ValueError("ID field must be a string or a list of strings.")

_type = node_type._LABEL.value
_props = {"version": self.version}
for field in node_type:
Expand Down Expand Up @@ -151,8 +155,19 @@ def get_nodes(self):

def _yield_edge_type(self, items, edge_type):
for item in items:
_subject = item[edge_type._SUBJECT.value]
_object = item[edge_type._OBJECT.value]
if isinstance(edge_type._SUBJECT.value, str):
_subject = item[edge_type._SUBJECT.value]
elif isinstance(edge_type._SUBJECT.value, list):
_subject = str(hash("_".join([str(item[field]) for field in edge_type._SUBJECT.value])))
else:
raise ValueError("_subject field must be a string or a list of strings.")

if isinstance(edge_type._OBJECT.value, str):
_object = item[edge_type._OBJECT.value]
elif isinstance(edge_type._OBJECT.value, list):
_object = str(hash("_".join([str(item[field]) for field in edge_type._OBJECT.value])))
else:
raise ValueError("_subject field must be a string or a list of strings.")

try:
item[edge_type._ID.value]
Expand Down
53 changes: 30 additions & 23 deletions cbioportal/adapters/edge_field_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,6 @@ class samplePatientAssociationField(Enum):
_OBJECT = "patientId"
_LABEL = "fromPatient"

class GenePanelGeneAssociationField(Enum):
"""
Define possible fields the adapter can provide for genePanel-gene associations.
"""
_SUBJECT = "genePanelId"
_OBJECT = "genes"
_LABEL = "hasGene"

class MolecularProfiletoStudyField(Enum):
"""
Define possible fields the adapter can provide for molecular profile - study associations.
Expand Down Expand Up @@ -75,27 +67,42 @@ class CopyNumberSegmentToSampleField(Enum):
_OBJECT = "sampleId"
_LABEL = "fromSample"

class mutationToSampleField(Enum):
"""
Define possible fields the adapter can provide for mutation - sample associations.
"""
_SUBJECT = ["molecularProfileId", "sampleId", "patientId", "entrezGeneId", "studyId"]
_OBJECT = "sampleId"
_LABEL = "fromSample"

class PatientToPatientSampleStudyEntityField(Enum):
class mutationToGeneField(Enum):
"""
Define possible fields the adapter can provide for patient - patient sample study entity associations.
Define possible fields the adapter can provide for mutation - gene associations.
"""
_SUBJECT = "patientId"
_OBJECT = "id"
_LABEL = "partOf"
_SUBJECT = ["molecularProfileId", "sampleId", "patientId", "entrezGeneId", "studyId"]
_OBJECT = "entrezGeneId"
_LABEL = "fromGene"

class SampleToPatientSampleStudyEntityField(Enum):
class mutationToStudyField(Enum):
"""
Define possible fields the adapter can provide for sample - patient sample study entity associations.
Define possible fields the adapter can provide for mutation - study associations.
"""
_SUBJECT = "sampleId"
_OBJECT = "id"
_LABEL = "partOf"
_SUBJECT = ["molecularProfileId", "sampleId", "patientId", "entrezGeneId", "studyId"]
_OBJECT = "studyId"
_LABEL = "hasStudy"

class StudyToPatientSampleStudyEntityField(Enum):
class mutationToPatientField(Enum):
"""
Define possible fields the adapter can provide for study - patient sample study entity associations.
Define possible fields the adapter can provide for mutation - patient associations.
"""
_SUBJECT = "studyId"
_OBJECT = "id"
_LABEL = "partOf"
_SUBJECT = ["molecularProfileId", "sampleId", "patientId", "entrezGeneId", "studyId"]
_OBJECT = "patientId"
_LABEL = "fromPatient"

class mutationToMolecularProfileField(Enum):
"""
Define possible fields the adapter can provide for mutation - molecular profile associations.
"""
_SUBJECT = ["molecularProfileId", "sampleId", "patientId", "entrezGeneId", "studyId"]
_OBJECT = "molecularProfileId"
_LABEL = "fromMolecularProfile"
45 changes: 34 additions & 11 deletions cbioportal/adapters/node_field_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ class SampleListField(Enum):
NAME = "name"
CATEGORY = "category"
SAMPLE_COUNT = "sampleCount"
SAMPLE_IDS = "sampleIds"

class GeneField(Enum):
_ID = "entrezGeneId"
Expand Down Expand Up @@ -111,14 +110,38 @@ class CopyNumberSegmentField(Enum):
SEGMENT_MEAN = "segmentMean"
START = "start"
STUDY_ID = "studyId"


class PatientSampleStudyEntityField(Enum):
"""
Define possible fields the adapter can provide for patient-sample-study entities.
"""
PATIENT_ID = "patientId"
SAMPLE_ID = "sampleId"
STUDY_ID = "studyId"
_LABEL = "PatientSampleStudyEntity"
_ID = "id"
# EXAMPLE
# Mutation(alleleSpecificCopyNumber=None, aminoAcidChange=None, center='MSKCC', chr='15', driverFilter=None, driverFilterAnnotation=None, driverTiersFilter=None, driverTiersFilterAnnotation=None, endPosition=30700159, entrezGeneId=101059918, gene=None, keyword='GOLGA8R W275 missense', molecularProfileId='acbc_mskcc_2015_mutations', mutationStatus='SOMATIC', mutationType='Missense_Mutation', namespaceColumns=None, ncbiBuild='GRCh37', normalAltCount=None, normalRefCount=None, patientId='AdCC3T', proteinChange='W275R', proteinPosEnd=275, proteinPosStart=275, referenceAllele='A', refseqMrnaId='NM_001282484.1', sampleId='AdCC3T', startPosition=30700159, studyId='acbc_mskcc_2015', tumorAltCount=20, tumorRefCount=80, uniquePatientKey='QWRDQzNUOmFjYmNfbXNrY2NfMjAxNQ', uniqueSampleKey='QWRDQzNUOmFjYmNfbXNrY2NfMjAxNQ', validationStatus='Unknown', variantAllele='G', variantType='SNP')
class MutationField(Enum):
ALLELE_SPECIFIC_COPY_NUMBER = "alleleSpecificCopyNumber"
AMINO_ACID_CHANGE = "aminoAcidChange"
CENTER = "center"
CHROMOSOME = "chr"
DRIVER_FILTER = "driverFilter"
DRIVER_FILTER_ANNOTATION = "driverFilterAnnotation"
DRIVER_TIERS_FILTER = "driverTiersFilter"
DRIVER_TIERS_FILTER_ANNOTATION = "driverTiersFilterAnnotation"
END_POSITION = "endPosition"
KEYWORD = "keyword"
MUTATION_STATUS = "mutationStatus"
MUTATION_TYPE = "mutationType"
NAMESPACE_COLUMNS = "namespaceColumns"
NCBI_BUILD = "ncbiBuild"
NORMAL_ALT_COUNT = "normalAltCount"
NORMAL_REF_COUNT = "normalRefCount"
PROTEIN_CHANGE = "proteinChange"
PROTEIN_POS_END = "proteinPosEnd"
PROTEIN_POS_START = "proteinPosStart"
REFERENCE_ALLELE = "referenceAllele"
REFSEQ_MRNA_ID = "refseqMrnaId"
START_POSITION = "startPosition"
TUMOR_ALT_COUNT = "tumorAltCount"
TUMOR_REF_COUNT = "tumorRefCount"
UNIQUE_PATIENT_KEY = "uniquePatientKey"
UNIQUE_SAMPLE_KEY = "uniqueSampleKey"
VALIDATION_STATUS = "validationStatus"
VARIANT_ALLELE = "variantAllele"
VARIANT_TYPE = "variantType"
_ID = ["molecularProfileId", "sampleId", "patientId", "entrezGeneId", "studyId"]
_LABEL = "Mutation"
53 changes: 49 additions & 4 deletions config/schema_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,41 @@ clinical attribute:
patientAttribute: str
priority: str

mutation:
is_a: molecular entity
represented_as: node
input_label: Mutation
properties:
allele_specific_copy_number: str
amino_acid_change: str
c_position: str
chromosome: str
end_position: str
start_position: str
keyword: str
mutation_status: str
mutation_type: str
ncbi_build: str
normal_alt_count: str
normal_ref_count: str
protein_change: str
protein_pos_start: str
protein_pos_end: str
reference_allele: str
variant_allele: str
refseq_mrna_id: str
tumor_alt_count: str
tumor_ref_count: str
unique_sample_key: str
unique_patient_key: str
validation_status: str
variant_type: str



# ---------------------------------------------------
# associations

is child of:
is_a: [association, disease disease association]
represented_as: edge
Expand Down Expand Up @@ -127,12 +162,22 @@ has clinical attribute:
represented_as: edge
input_label: hasClinicalAttribute

has gene:
part of:
is_a: association
represented_as: edge
input_label: hasGene
input_label: partOf

part of:
from sample:
is_a: association
represented_as: edge
input_label: fromSample

from gene:
is_a: association
represented_as: edge
input_label: fromGene

from molecular profile:
is_a: association
represented_as: edge
input_label: partOf
input_label: fromMolecularProfile
36 changes: 19 additions & 17 deletions create_knowledge_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,29 +10,31 @@
bc = BioCypher()

node_types = [
PatientSampleStudyEntityField,
MutationField,
MolecularProfileField,
SampleListField,
ClinicalAttributesField,
# SampleListField,
# ClinicalAttributesField,
GeneField,
GenePanelField,
CancerTypeField,
# GenePanelField,
# CancerTypeField,
StudyField,
PatientField,
SampleField
SampleField,
]
edge_types = [
PatientToPatientSampleStudyEntityField,
SampleToPatientSampleStudyEntityField,
StudyToPatientSampleStudyEntityField,
SampleListToStudyField,
StudyToClinicalDataField,
MolecularProfiletoStudyField,
GenePanelGeneAssociationField,
DiseaseDiseaseAssociationField,
StudyDiseaseAssociationField,
studyPatientAssociationField,
samplePatientAssociationField
# SampleListToStudyField,
# StudyToClinicalDataField,
# MolecularProfiletoStudyField,
# DiseaseDiseaseAssociationField,
# StudyDiseaseAssociationField,
# studyPatientAssociationField,
# samplePatientAssociationField,
mutationToMolecularProfileField,
mutationToSampleField,
mutationToGeneField,
mutationToStudyField,
mutationToPatientField

]

# Create a protein adapter instance
Expand Down
4 changes: 4 additions & 0 deletions get_api_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from bravado.client import SwaggerClient

SwaggerClient.from_url('https://www.cbioportal.org/api/v2/api-docs',
config={"validate_requests":False,"validate_responses":False,"validate_swagger_spec": False})
Loading

0 comments on commit 0d2af3e

Please sign in to comment.