Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix/kbdev 1133 incorrect matching #108

Draft
wants to merge 31 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
e60a2e8
Merge pull request #106 from bcgsc/master
mathieulemieux Nov 9, 2023
37c3cec
Change args names in compare_positional_variants
mathieulemieux Nov 16, 2023
3be1b88
Remove unwanted ToDo
mathieulemieux Nov 16, 2023
d71f32e
Precision in compare_positional_variants docstring
mathieulemieux Nov 20, 2023
f53f80f
Formatting and comment in match_positional_variant
mathieulemieux Nov 20, 2023
126eec3
Move up type filtering in match_positional_variant
mathieulemieux Nov 20, 2023
69e8854
Refac. type_screening to structural_type_screening
mathieulemieux Nov 20, 2023
c630f8f
Fix type filtering in match_positional_variant
mathieulemieux Nov 20, 2023
5b9813f
Convert variant_types_details filter obj to list
mathieulemieux Nov 20, 2023
8387b6b
Black formating
mathieulemieux Nov 20, 2023
5cabf9f
Black formating
mathieulemieux Nov 22, 2023
978ba11
Fix match_positional_variant for small variants
mathieulemieux Nov 22, 2023
ab845d7
Black formating
mathieulemieux Nov 22, 2023
6771fb4
delinsSpecialHandling in match_positional_variant
mathieulemieux Nov 22, 2023
43ea745
Fix fusion bug in stripDisplayName()
mathieulemieux Nov 27, 2023
90f764c
Add ToDo in TestStripDisplayName
mathieulemieux Nov 27, 2023
1bd6be8
Remove SMALL_MUTATION_VARIANT_ALIASES from const.
mathieulemieux Nov 27, 2023
5750f5f
Add more data to tests
mathieulemieux Nov 27, 2023
642b9f9
Add structural_type_adjustment()
mathieulemieux Nov 27, 2023
4e3073c
update match_positional_variant docstring
mathieulemieux Nov 27, 2023
a927462
Fix match.py imports
mathieulemieux Nov 27, 2023
3b15354
Better comments in compare_positional_variants()
mathieulemieux Nov 27, 2023
46aa35c
Fix structural_type_screening() return type hint
mathieulemieux Nov 27, 2023
29ce9c9
Refactor category_variant_similarTo()
mathieulemieux Nov 27, 2023
973b7d0
Better comments in match_positional_variant()
mathieulemieux Nov 27, 2023
8f84d2a
More comments
mathieulemieux Nov 27, 2023
9683d73
Refactoring delins handling in match_pos_var
mathieulemieux Nov 27, 2023
104c2f0
Refactoring PV matching in match_pos_var
mathieulemieux Nov 27, 2023
f0061cd
Refactor category_variant_extension()
mathieulemieux Nov 27, 2023
3af74c5
Multiple stuff - need to be sorted later
mathieulemieux Jan 24, 2024
a7465b9
deleting script files not to be commited
mathieulemieux Jan 24, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
507 changes: 507 additions & 0 deletions cancer_genes.txt

Large diffs are not rendered by default.

161 changes: 161 additions & 0 deletions data_3.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
{
"TSC2:c.3365G>C": {
"comment": "KBDEV-1024",
"matches": {
"displayName": [
""
],
"type": [
""
]
},
"does_not_matches":{
"displayName": [
""
],
"type": [
""
]
}
},
"TSC2:c.4700G>A": {
"comment": "",
"matches": {
"displayName": [
"ENST00000219476:c.4700G>A",
"ENST00000219476:r.4810G>A",
"TSC2 mutation",
"TSC2:p.G1567D",
"chr16:g.2086230G>A"
],
"type": [
"missense mutation",
"mutation",
"substitution"
]
},
"does_not_matches":{
"displayName": [
"TSC2 nonsense"
],
"type": [
"nonsense"
]
}
},
"KRAS:p.G12D": {
"comment": "",
"matches": {
"displayName": [
"ENST00000256078:r.225_226delinsAC",
"ENST00000311936.7:c.35G>A",
"KRAS mutation",
"KRAS:c.35G>A",
"KRAS:p.(G12_G13)mut",
"KRAS:p.?12mut",
"KRAS:p.G12",
"KRAS:p.G12D",
"KRAS:p.G12X",
"KRAS:p.G12mut"
],
"type": [
"indel",
"missense",
"missense mutation",
"mutation",
"substitution"
]
},
"does_not_matches":{
"displayName": [
"chr12:g.25245349_25245351delinsGCT",
"cosm516",
"ensp00000452512:p.G12V"
],
"type": [
"nonsense"
]
}
},
"TP53:p.M237I": {
"comment": "GERO-299",
"matches": {
"displayName": [
"ENST00000269305:r.901G>T",
"TP53 missense",
"TP53 mutation",
"TP53:p.M237I",
"TP53:p.M237X",
"chr17:g.7577570C>T"
],
"type": [
"missense",
"missense mutation",
"mutation",
"substitution"
]
},
"does_not_matches":{
"displayName": [
"TP53 nonsense"
],
"type": [
"nonsense"
]
}
}
}

########################
## KBDEV-1038
########################
# "FGFR4:p.N535K",
# "EGFR:p.D942N",
########################
## KBDEV-1052
########################
# "EGFR:c.28246G>A",
# "chr7:g.55198839G>A",
# "EGFR:p.D942N",
# '(PCM1,JAK2):fusion(r.6280,r.1821)', # dummy test for Infers edges
########################
## KBDEV-1054
########################
# "ERBB2:p.R814C",
########################
## KBDEV-1056
########################
"ENST00000340107:c.1212dupC",
"ENST00000340107:c.1212dupACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT",
"ENST00000340107:c.1212_1213insC",
"ENST00000340107:c.1212C>A",
"chr1:g.33344590_33344592del",
"FGFR3:g.5000_5001del",
"FGFR3:g.5000_5100del",
"ENST00000340107:c.9002_9050delinsTTT",
"ENST00000340107:c.9002_9051delinsTTT",
########################
## GERO-299
########################
# "chr17:g.7674252C>T",
# "ENST00000269305:c.711G>A",
# "TP53:p.M237I",
########################
## KBDEV-1024
########################
# "TSC2:c.3365G>C",
# "NM_000548.5:c.3365G>A",
# "NM_000548.5:p.Arg1122His",
# "TSC2: p.R112H",
########################
## KBDEV-1044
########################
# "TSC2:c.4700G>A",
########################
## OTHER Ex.
########################
# "ENST00000219476:c.4700_4701delinsAT",
# "NM_000548.5:c.3365G>A",
# "TSC2:p.R112H",
# "TSC2:p.G1567D",
# "KRAS:p.G12D",
24 changes: 24 additions & 0 deletions displayStatement.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
const { schema, sentenceTemplates } = require('@bcgsc-pori/graphkb-schema');

const previewFunction = (obj) => schema.getPreview(obj);

// const statement = {
// displayName: 'displayName',
// '@class': 'Statement',
// '@rid': '22:0',
// displayNameTemplate: 'Given {conditions} {relevance} applies to {subject} ({evidence})',
// relevance: { displayName: 'Mood Swings', '@rid': '1' },
// conditions: [{ displayName: 'Low blood sugar', '@class': 'Disease', '@rid': '2' }],
// subject: { displayName: 'hungertitis', '@rid': '3', '@class': 'Disease' },
// evidence: [{ displayName: 'A reputable source', '@rid': '4' }],
// };

console.log('################################')
console.log(process.argv[2])
const statement = JSON.parse(process.argv[2])

const { content } = sentenceTemplates.generateStatementSentence(previewFunction, statement);
// Given Low blood sugar Mood Swings applies to hungertitis (A reputable source)

statementLabel = schema.getPreview(content);
console.log(statementLabel)
56 changes: 56 additions & 0 deletions genes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
AKT1
APC
ATM
AXIN2
BAP1
BLM
BMPR1A
BRCA1
BRCA2
BRIP1
CBL
CDH1
CDK4
CDKN2A
CHEK2
DICER1
EGFR
EPCAM
ETV6
EZH2
FH
FLCN
GATA2
HRAS
KIT
MEN1
MET
MLH1
MSH2
MSH6
MUTYH
NBN
NF1
PALB2
PDGFRA
PMS2
PTCH1
PTEN
PTPN11
RAD51C
RAD51D
RB1
RET
RUNX1
SDHA
SDHB
SDHC
SDHD
SMAD4
SMARCA4
STK11
TP53
TSC1
TSC2
VHL
WT1
59 changes: 31 additions & 28 deletions graphkb/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,44 +175,47 @@ def __getitem__(self, key):
"nonsense": ">",
}

# For match.type_screening() [KBDEV-1056]
DEFAULT_NON_STRUCTURAL_VARIANT_TYPE = 'mutation'
# For match.structural_type_screening() [KBDEV-1056, KBDEV-1133]
STRUCTURAL_VARIANT_ALIASES = [
"rearrangement",
"structural variant",
]
STRUCTURAL_VARIANT_SIZE_THRESHOLD = 48 # bp
STRUCTURAL_VARIANT_TYPES = [
"structural variant",
"insertion",
"in-frame insertion",
"amplification",
"copy gain",
"copy loss",
"copy number gain",
"copy number loss",
"copy number variant",
"copy number variation",
"copy variant",
"deep deletion",
"deletion",
"deletion polymorphism",
"in-frame deletion",
"translocation",
"inverted translocation",
"inversion",
"indel",
"fusion",
"out-of-frame fusion",
"oncogenic fusion",
"in-frame fusion",
"disruptive fusion",
"domain duplication",
"duplication",
"focal amplification",
"fusion",
"gene deletion",
"indel",
"insertion",
"internal duplication",
"tandem duplication",
"internal tandem duplication",
"inversion",
"inverted translocation",
"in-frame deletion",
"in-frame fusion",
"in-frame insertion",
"itd",
"domain duplication",
"kinase domain duplication",
"copy variant",
"copy number variation",
"copy number variant",
"copy loss",
"copy number loss",
"shallow deletion",
"deep deletion",
"gene deletion",
"copy gain",
"copy number gain",
"low level copy gain",
"amplification",
"focal amplification",
"oncogenic fusion",
"out-of-frame fusion",
"rearrangement",
"shallow deletion",
"structural variant",
"tandem duplication",
"translocation",
]
Loading
Loading