Skip to content

Commit

Permalink
Merge pull request #176 from eastgenomics/release-v2.8.0
Browse files Browse the repository at this point in the history
Merge release v2.8.0 to master (#176)

Co-Authored-By: Jethro Rainford <[email protected]>
Co-Authored-By: mattgarner <[email protected]>
  • Loading branch information
mattgarner and jethror1 authored Feb 29, 2024
2 parents 4e56485 + c1dc80c commit 19ae999
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 51 deletions.
4 changes: 2 additions & 2 deletions dxapp.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"title": "eggd_generate_variant_workbook",
"summary": "Create Excel workbook from VEP annotated vcf",
"dxapi": "1.0.0",
"version": "2.7.1",
"whatsNew": "* v2.0.0 Rewrite of previous app to generate xlsx file from a VEP annotated VCF(s); * v2.0.1 Bug fix to correctly treat CHROM as string values; * v2.0.2 Bug fix for ACMG report template structure; * v2.0.3 Bug fixes for issues with hyperlinks, changed app name to eggd_generate_variant_workbook; * v2.1.0 Handle VCFs from GATK gCNV and Illumina TSO500, readability tweaks to variant sheets; * v2.1.1 Bug fix for typing of numeric values in hyperlinks; * v2.2.0 Added ability to pass in non VCF files (tsvs/csvs and images) to additional sheets, optional adding of links to DECIPHER with --decipher; * v2.3.0 Added conditional colouring of cells in variant sheets, new 'basic' summary sheet; * v2.4.0 Added handling for duplicate annotation in VEP fields (i.e. cosmic, CGC, etc..); * v2.5.0 Better parsing of CombinedVariantOutput files as additional files; * v2.6.0 Add variant counts as DNAnexus file details to the .xlsx workbook; *v2.7.0 Handle pre-split and non VEP annotated VCFs, improvements to Dias reporting templates and Excel data validation; * v2.7.1 v.2.7.0 app was accidentally published on DNAnexus before testing; so a new version is created. Everything except version number is the same as v2.7.0",
"version": "2.8.0",
"whatsNew": "* v2.0.0 Rewrite of previous app to generate xlsx file from a VEP annotated VCF(s); * v2.0.1 Bug fix to correctly treat CHROM as string values; * v2.0.2 Bug fix for ACMG report template structure; * v2.0.3 Bug fixes for issues with hyperlinks, changed app name to eggd_generate_variant_workbook; * v2.1.0 Handle VCFs from GATK gCNV and Illumina TSO500, readability tweaks to variant sheets; * v2.1.1 Bug fix for typing of numeric values in hyperlinks; * v2.2.0 Added ability to pass in non VCF files (tsvs/csvs and images) to additional sheets, optional adding of links to DECIPHER with --decipher; * v2.3.0 Added conditional colouring of cells in variant sheets, new 'basic' summary sheet; * v2.4.0 Added handling for duplicate annotation in VEP fields (i.e. cosmic, CGC, etc..); * v2.5.0 Better parsing of CombinedVariantOutput files as additional files; * v2.6.0 Add variant counts as DNAnexus file details to the .xlsx workbook; *v2.7.0 Handle pre-split and non VEP annotated VCFs, improvements to Dias reporting templates and Excel data validation; * v2.7.1 v.2.7.0 app was accidentally published on DNAnexus before testing; so a new version is created. Everything except version number is the same as v2.7.0; * v2.8.0 Add PID box in summary, swap BS1 and BA1 in interpret table, increase the number of unlocked rows to 500",
"authorizedUsers": [
"org-emee_1"
],
Expand Down
116 changes: 67 additions & 49 deletions resources/home/dnanexus/generate_workbook/utils/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
# row and col counts that are to be unlocked next to
# populated table in all sheets if it is dias pipeline
# required for 'lock_sheet' function
ROW_TO_UNLOCK = 200
ROW_TO_UNLOCK = 500
COL_TO_UNLOCK = 200


Expand Down Expand Up @@ -394,7 +394,7 @@ def dias_summary(self) -> None:
self.summary.cell(1, 1).value = "Sample ID:"
self.summary.cell(1, 5).value = "Clinical Indication(s):"
self.summary.cell(2, 5).value = "Panel(s):"
self.summary.cell(28, 1).value = "Total records:"
self.summary.cell(33, 1).value = "Total records:"

# get sample name from vcf, should only be one but handle everything
# list-wise just in case
Expand All @@ -418,7 +418,7 @@ def dias_summary(self) -> None:
json.dump(details_dict, details_json)

# write total rows in each sheet
count = 28
count = 33

# cells to make bold
to_bold = []
Expand Down Expand Up @@ -480,24 +480,29 @@ def dias_summary(self) -> None:


# write center reporting section tables
self.summary.cell(9, 2).value = "Phenotype:"

self.summary.cell(16, 2).value = "Panels"
self.summary.cell(16, 3).value = "Excel file"
self.summary.cell(16, 4).value = "Comments"
self.summary.cell(16, 6).value = "Analysis by"
self.summary.cell(16, 7).value = "Date"
self.summary.cell(16, 8).value = "Checked by"
self.summary.cell(16, 9).value = "Date"

self.summary.cell(21, 2).value = "Sanger sequencing confirmation"
self.summary.cell(22, 2).value = "Gene"
self.summary.cell(22, 3).value = "NM_#"
self.summary.cell(22, 4).value = "Coordinate"
self.summary.cell(22, 5).value = "cDNA"
self.summary.cell(22, 6).value = "Protein change"
self.summary.cell(22, 7).value = "WS#"
self.summary.cell(22, 8).value = "Confirmed (Y/N)"
self.summary.cell(2, 1).value = "Lab no."
self.summary.cell(2, 3).value = "First name"
self.summary.cell(2, 4).value = "Last name"
self.summary.cell(4, 1).value = "Number checked"
self.summary.cell(5, 1).value = "Summary coverage"
self.summary.cell(14, 2).value = "Phenotype:"

self.summary.cell(21, 2).value = "Panels"
self.summary.cell(21, 3).value = "Excel file"
self.summary.cell(21, 4).value = "Comments"
self.summary.cell(21, 6).value = "Analysis by"
self.summary.cell(21, 7).value = "Date"
self.summary.cell(21, 8).value = "Checked by"
self.summary.cell(21, 9).value = "Date"

self.summary.cell(26, 2).value = "Sanger sequencing confirmation"
self.summary.cell(27, 2).value = "Gene"
self.summary.cell(27, 3).value = "NM_#"
self.summary.cell(27, 4).value = "Coordinate"
self.summary.cell(27, 5).value = "cDNA"
self.summary.cell(27, 6).value = "Protein change"
self.summary.cell(27, 7).value = "WS#"
self.summary.cell(27, 8).value = "Confirmed (Y/N)"

# merge some title columns that have longer text
self.summary.merge_cells(
Expand All @@ -507,18 +512,24 @@ def dias_summary(self) -> None:
self.summary.merge_cells(
start_row=2, end_row=2, start_column=6, end_column=20)
self.summary.merge_cells(
start_row=9, end_row=9, start_column=2, end_column=5)
start_row=14, end_row=14, start_column=2, end_column=5)
self.summary.merge_cells(
start_row=21, end_row=21, start_column=2, end_column=8)
start_row=26, end_row=26, start_column=2, end_column=8)
self.summary.merge_cells(
start_row=16, end_row=16, start_column=4, end_column=5)
start_row=21, end_row=21, start_column=4, end_column=5)
self.summary.merge_cells(
start_row=5, end_row=11, start_column=1, end_column=1)

# make the coverage tile centre of merged rows
self.summary["A5"].alignment = Alignment(
wrapText=True, vertical="center")

# titles to set to bold
to_bold += [
"A1", "A28", "B1", "B9", "B16", "B21", "B22",
"B28", "B29", "C16", "C22", "D16", "D22",
"E1", "E2", "E22", "F16", "F22", "G16",
"G22", "H16", "H22", "I16"
"A1", "A2", "A4", "A5", "A33", "B1", "B14", "B21", "B26",
"B27", "B33", "B34", "C2", "C21", "C27", "D2", "D21", "D27",
"E1", "E2", "E27", "F21", "F27", "G21", "G27", "H21", "H27",
"I21"
]

for cell in to_bold:
Expand All @@ -538,31 +549,36 @@ def dias_summary(self) -> None:
blueFill = PatternFill(patternType="solid", start_color="0CABA8")

colour_cells = [
"B9", "B16", "B21", "B22", "C16", "C22", "D16", "D22",
"E22", "F16", "F22", "G16", "G22", "H16", "H22", "I16"
"A2", "A4", "A5", "B2", "B14", "B21", "B26", "B27",
"C2", "C21", "C27", "D2", "D21", "D27", "E27", "F21",
"F27", "G21", "G27", "H21", "H27", "I21"
]
for cell in colour_cells:
self.summary[cell].fill = blueFill

# set borders around table areas
row_ranges = [
'B9:E9', 'B10:E10', 'B11:E11', 'B12:E12', 'B13:E13',
'B16:I16', 'B17:I17', 'B18:I18', 'B21:H21', 'B22:H22',
'B23:H23', 'B24:H24', 'B25:H25'
'A2:D2', 'A3:D3', 'A4:D4', 'A5:A5', 'B14:E14', 'B15:E15',
'B16:E16', 'B17:E17', 'B18:E18', 'B21:I21', 'B22:I22',
'B23:I23', 'B26:H26', 'B27:H27', 'B28:H28', 'B29:H29', 'B30:H30'
]
for row in row_ranges:
for cells in self.summary[row]:
for cell in cells:
cell.border = THIN_BORDER
if self.args.lock_sheet:
cell_to_unlock = ["B10", "C10", "D10", "E10", "B11", "C11", "D11",
"E11", "B12", "C12", "D12", "E12", "B13", "C13",
"D13", "E13", "B17", "C17", "D17", "E17", "F17",
"G17", "H17", "I17", "B18", "C18", "D18", "E18",
"F18", "G18", "H18", "I18", "B23", "C23", "D23",
"E23", "F23", "G23", "H23", "B24", "C24", "D24",
"E24", "F24", "G24", "H24", "B25", "C25", "D25",
"E25", "F25", "G25", "H25"
cell_to_unlock = ["A3", "B3", "B4", "B5", "B6", "B7", "B8", "B9",
"B10", "B11", "C3", "C4", "C5", "C6", "C7", "C8",
"C9", "C10", "C11", "D3", "D4", "D5", "D6", "D7",
"D8", "D9", "D10", "D11", "B15", "C15", "D15",
"E15", "B16", "C16", "D16", "E16", "B17", "C17",
"D17", "E17", "B18", "C18", "D18", "E18", "B22",
"C22", "D22", "E22", "F22", "G22", "H22", "I22",
"B23", "C23", "D23", "E23", "F23", "G23", "H23",
"I23", "B28", "C28", "D28", "E28", "F28", "G28",
"H28", "B29", "C29", "D29", "E29", "F29", "G29",
"H29", "B30", "C30", "D30", "E30", "F30", "G30",
"H30"
]
self.lock_sheet(ws=self.summary,
cell_to_unlock=cell_to_unlock,
Expand Down Expand Up @@ -592,7 +608,8 @@ def write_reporting_template(self, report_sheet_num) -> None:
"Associated disease": [4, 2],
"Known inheritance": [5, 2],
"Prevalence": [6, 2],
"Estimated allele frequency": [9, 2],
("Allele frequency is >5% (or gene-specific cut off) in "
"population data e.g. gnomAD, UKB"): [9, 2],
("Null variant in a gene where LOF is known mechanism "
"of disease\nand non-canonical splice variants where "
"RNA analysis confirms\naberrant transcription"): [10, 2],
Expand All @@ -606,7 +623,8 @@ def write_reporting_template(self, report_sheet_num) -> None:
"Prevalence in affected > controls": [14, 2],
("In mutational hot spot and/or critical functional "
"domain, without\nbenign variation"): [15, 2],
"Freq in controls eg gnomAD, low/absent or >5%": [16, 2],
("Freq in controls eg gnomAD, low/absent (PM2) or allele "
"frequency is greater than expected for disorder (BS1)"): [16, 2],
"Detected in trans/in cis with pathogenic variant": [17, 2],
("In frame protein length change/stop-loss variants, "
"non repeat\nvs. repeat region"): [18, 2],
Expand Down Expand Up @@ -645,10 +663,10 @@ def write_reporting_template(self, report_sheet_num) -> None:
"PP2": [(22, 7)],
"PP3": [(23, 7)],
"PP4": [(24, 7)],
"BS1": [(9, 10)],
"BA1": [(9, 10)],
"BS2": [(12, 10)],
"BS3": [(13, 10)],
"BA1": [(16, 10)],
"BS1": [(16, 10)],
"BP2": [(17, 10)],
"BP3": [(18, 10)],
"BS4": [(21, 10)],
Expand Down Expand Up @@ -721,9 +739,9 @@ def write_reporting_template(self, report_sheet_num) -> None:
'E46C0A': ['G11', 'G12', 'G13', 'G14'],
'FFC000': ['G15', 'G16', 'G17', 'G18', 'G19', 'G20'],
'FFFF00': ['G21', 'G22', 'G23', 'G24'],
'00B0F0': ['J9', 'J12', 'J13', 'J21'],
'00B0F0': ['J12', 'J13', 'J16', 'J21'],
'92D050': ['J17', 'J18', 'J22', 'J23', 'J24', 'J25'],
'0070C0': ['J16'],
'0070C0': ['J9'],
'FF0000': ['G10'],
'D9D9D9': ['G9', 'G25', 'H9', 'H25', 'I9', 'I25',
'J10', 'J11', 'J14', 'J15', 'J19', 'J20',
Expand Down Expand Up @@ -1533,7 +1551,7 @@ def drop_down(self) -> None:
report_sheet = wb[f"interpret_{sheet_num}"]
cells_for_strength = ['H10', 'H11', 'H12', 'H13', 'H14', 'H15',
'H16', 'H17', 'H18', 'H19', 'H20', 'H21',
'H22', 'H23', 'H24', 'K9', 'K12', 'K13',
'H22', 'H23', 'H24', 'K12', 'K13', 'K16',
'K17', 'K18', 'K21', 'K22', 'K23', 'K24',
'K25']
strength_options = '"Very Strong, Strong, Moderate, \
Expand All @@ -1551,7 +1569,7 @@ def drop_down(self) -> None:
prompt='Select from the list',
title='Strength',
sheet=report_sheet,
cells=['K16'])
cells=['K9'])

# adding final classification dropdown
report_sheet['B26'] = 'FINAL ACMG CLASSIFICATION'
Expand Down

0 comments on commit 19ae999

Please sign in to comment.