diff --git a/dxapp.json b/dxapp.json index af2317eb..ee6d439d 100644 --- a/dxapp.json +++ b/dxapp.json @@ -3,8 +3,8 @@ "title": "eggd_generate_variant_workbook", "summary": "Create Excel workbook from VEP annotated vcf", "dxapi": "1.0.0", - "version": "2.7.1", - "whatsNew": "* v2.0.0 Rewrite of previous app to generate xlsx file from a VEP annotated VCF(s); * v2.0.1 Bug fix to correctly treat CHROM as string values; * v2.0.2 Bug fix for ACMG report template structure; * v2.0.3 Bug fixes for issues with hyperlinks, changed app name to eggd_generate_variant_workbook; * v2.1.0 Handle VCFs from GATK gCNV and Illumina TSO500, readability tweaks to variant sheets; * v2.1.1 Bug fix for typing of numeric values in hyperlinks; * v2.2.0 Added ability to pass in non VCF files (tsvs/csvs and images) to additional sheets, optional adding of links to DECIPHER with --decipher; * v2.3.0 Added conditional colouring of cells in variant sheets, new 'basic' summary sheet; * v2.4.0 Added handling for duplicate annotation in VEP fields (i.e. cosmic, CGC, etc..); * v2.5.0 Better parsing of CombinedVariantOutput files as additional files; * v2.6.0 Add variant counts as DNAnexus file details to the .xlsx workbook; *v2.7.0 Handle pre-split and non VEP annotated VCFs, improvements to Dias reporting templates and Excel data validation; * v2.7.1 v.2.7.0 app was accidentally published on DNAnexus before testing; so a new version is created. Everything except version number is the same as v2.7.0", + "version": "2.8.0", + "whatsNew": "* v2.0.0 Rewrite of previous app to generate xlsx file from a VEP annotated VCF(s); * v2.0.1 Bug fix to correctly treat CHROM as string values; * v2.0.2 Bug fix for ACMG report template structure; * v2.0.3 Bug fixes for issues with hyperlinks, changed app name to eggd_generate_variant_workbook; * v2.1.0 Handle VCFs from GATK gCNV and Illumina TSO500, readability tweaks to variant sheets; * v2.1.1 Bug fix for typing of numeric values in hyperlinks; * v2.2.0 Added ability to pass in non VCF files (tsvs/csvs and images) to additional sheets, optional adding of links to DECIPHER with --decipher; * v2.3.0 Added conditional colouring of cells in variant sheets, new 'basic' summary sheet; * v2.4.0 Added handling for duplicate annotation in VEP fields (i.e. cosmic, CGC, etc..); * v2.5.0 Better parsing of CombinedVariantOutput files as additional files; * v2.6.0 Add variant counts as DNAnexus file details to the .xlsx workbook; *v2.7.0 Handle pre-split and non VEP annotated VCFs, improvements to Dias reporting templates and Excel data validation; * v2.7.1 v.2.7.0 app was accidentally published on DNAnexus before testing; so a new version is created. Everything except version number is the same as v2.7.0; * v2.8.0 Add PID box in summary, swap BS1 and BA1 in interpret table, increase the number of unlocked rows to 500", "authorizedUsers": [ "org-emee_1" ], diff --git a/resources/home/dnanexus/generate_workbook/utils/excel.py b/resources/home/dnanexus/generate_workbook/utils/excel.py index 68b4c7af..11f0ceff 100644 --- a/resources/home/dnanexus/generate_workbook/utils/excel.py +++ b/resources/home/dnanexus/generate_workbook/utils/excel.py @@ -33,7 +33,7 @@ # row and col counts that are to be unlocked next to # populated table in all sheets if it is dias pipeline # required for 'lock_sheet' function -ROW_TO_UNLOCK = 200 +ROW_TO_UNLOCK = 500 COL_TO_UNLOCK = 200 @@ -394,7 +394,7 @@ def dias_summary(self) -> None: self.summary.cell(1, 1).value = "Sample ID:" self.summary.cell(1, 5).value = "Clinical Indication(s):" self.summary.cell(2, 5).value = "Panel(s):" - self.summary.cell(28, 1).value = "Total records:" + self.summary.cell(33, 1).value = "Total records:" # get sample name from vcf, should only be one but handle everything # list-wise just in case @@ -418,7 +418,7 @@ def dias_summary(self) -> None: json.dump(details_dict, details_json) # write total rows in each sheet - count = 28 + count = 33 # cells to make bold to_bold = [] @@ -480,24 +480,29 @@ def dias_summary(self) -> None: # write center reporting section tables - self.summary.cell(9, 2).value = "Phenotype:" - - self.summary.cell(16, 2).value = "Panels" - self.summary.cell(16, 3).value = "Excel file" - self.summary.cell(16, 4).value = "Comments" - self.summary.cell(16, 6).value = "Analysis by" - self.summary.cell(16, 7).value = "Date" - self.summary.cell(16, 8).value = "Checked by" - self.summary.cell(16, 9).value = "Date" - - self.summary.cell(21, 2).value = "Sanger sequencing confirmation" - self.summary.cell(22, 2).value = "Gene" - self.summary.cell(22, 3).value = "NM_#" - self.summary.cell(22, 4).value = "Coordinate" - self.summary.cell(22, 5).value = "cDNA" - self.summary.cell(22, 6).value = "Protein change" - self.summary.cell(22, 7).value = "WS#" - self.summary.cell(22, 8).value = "Confirmed (Y/N)" + self.summary.cell(2, 1).value = "Lab no." + self.summary.cell(2, 3).value = "First name" + self.summary.cell(2, 4).value = "Last name" + self.summary.cell(4, 1).value = "Number checked" + self.summary.cell(5, 1).value = "Summary coverage" + self.summary.cell(14, 2).value = "Phenotype:" + + self.summary.cell(21, 2).value = "Panels" + self.summary.cell(21, 3).value = "Excel file" + self.summary.cell(21, 4).value = "Comments" + self.summary.cell(21, 6).value = "Analysis by" + self.summary.cell(21, 7).value = "Date" + self.summary.cell(21, 8).value = "Checked by" + self.summary.cell(21, 9).value = "Date" + + self.summary.cell(26, 2).value = "Sanger sequencing confirmation" + self.summary.cell(27, 2).value = "Gene" + self.summary.cell(27, 3).value = "NM_#" + self.summary.cell(27, 4).value = "Coordinate" + self.summary.cell(27, 5).value = "cDNA" + self.summary.cell(27, 6).value = "Protein change" + self.summary.cell(27, 7).value = "WS#" + self.summary.cell(27, 8).value = "Confirmed (Y/N)" # merge some title columns that have longer text self.summary.merge_cells( @@ -507,18 +512,24 @@ def dias_summary(self) -> None: self.summary.merge_cells( start_row=2, end_row=2, start_column=6, end_column=20) self.summary.merge_cells( - start_row=9, end_row=9, start_column=2, end_column=5) + start_row=14, end_row=14, start_column=2, end_column=5) self.summary.merge_cells( - start_row=21, end_row=21, start_column=2, end_column=8) + start_row=26, end_row=26, start_column=2, end_column=8) self.summary.merge_cells( - start_row=16, end_row=16, start_column=4, end_column=5) + start_row=21, end_row=21, start_column=4, end_column=5) + self.summary.merge_cells( + start_row=5, end_row=11, start_column=1, end_column=1) + + # make the coverage tile centre of merged rows + self.summary["A5"].alignment = Alignment( + wrapText=True, vertical="center") # titles to set to bold to_bold += [ - "A1", "A28", "B1", "B9", "B16", "B21", "B22", - "B28", "B29", "C16", "C22", "D16", "D22", - "E1", "E2", "E22", "F16", "F22", "G16", - "G22", "H16", "H22", "I16" + "A1", "A2", "A4", "A5", "A33", "B1", "B14", "B21", "B26", + "B27", "B33", "B34", "C2", "C21", "C27", "D2", "D21", "D27", + "E1", "E2", "E27", "F21", "F27", "G21", "G27", "H21", "H27", + "I21" ] for cell in to_bold: @@ -538,31 +549,36 @@ def dias_summary(self) -> None: blueFill = PatternFill(patternType="solid", start_color="0CABA8") colour_cells = [ - "B9", "B16", "B21", "B22", "C16", "C22", "D16", "D22", - "E22", "F16", "F22", "G16", "G22", "H16", "H22", "I16" + "A2", "A4", "A5", "B2", "B14", "B21", "B26", "B27", + "C2", "C21", "C27", "D2", "D21", "D27", "E27", "F21", + "F27", "G21", "G27", "H21", "H27", "I21" ] for cell in colour_cells: self.summary[cell].fill = blueFill # set borders around table areas row_ranges = [ - 'B9:E9', 'B10:E10', 'B11:E11', 'B12:E12', 'B13:E13', - 'B16:I16', 'B17:I17', 'B18:I18', 'B21:H21', 'B22:H22', - 'B23:H23', 'B24:H24', 'B25:H25' + 'A2:D2', 'A3:D3', 'A4:D4', 'A5:A5', 'B14:E14', 'B15:E15', + 'B16:E16', 'B17:E17', 'B18:E18', 'B21:I21', 'B22:I22', + 'B23:I23', 'B26:H26', 'B27:H27', 'B28:H28', 'B29:H29', 'B30:H30' ] for row in row_ranges: for cells in self.summary[row]: for cell in cells: cell.border = THIN_BORDER if self.args.lock_sheet: - cell_to_unlock = ["B10", "C10", "D10", "E10", "B11", "C11", "D11", - "E11", "B12", "C12", "D12", "E12", "B13", "C13", - "D13", "E13", "B17", "C17", "D17", "E17", "F17", - "G17", "H17", "I17", "B18", "C18", "D18", "E18", - "F18", "G18", "H18", "I18", "B23", "C23", "D23", - "E23", "F23", "G23", "H23", "B24", "C24", "D24", - "E24", "F24", "G24", "H24", "B25", "C25", "D25", - "E25", "F25", "G25", "H25" + cell_to_unlock = ["A3", "B3", "B4", "B5", "B6", "B7", "B8", "B9", + "B10", "B11", "C3", "C4", "C5", "C6", "C7", "C8", + "C9", "C10", "C11", "D3", "D4", "D5", "D6", "D7", + "D8", "D9", "D10", "D11", "B15", "C15", "D15", + "E15", "B16", "C16", "D16", "E16", "B17", "C17", + "D17", "E17", "B18", "C18", "D18", "E18", "B22", + "C22", "D22", "E22", "F22", "G22", "H22", "I22", + "B23", "C23", "D23", "E23", "F23", "G23", "H23", + "I23", "B28", "C28", "D28", "E28", "F28", "G28", + "H28", "B29", "C29", "D29", "E29", "F29", "G29", + "H29", "B30", "C30", "D30", "E30", "F30", "G30", + "H30" ] self.lock_sheet(ws=self.summary, cell_to_unlock=cell_to_unlock, @@ -592,7 +608,8 @@ def write_reporting_template(self, report_sheet_num) -> None: "Associated disease": [4, 2], "Known inheritance": [5, 2], "Prevalence": [6, 2], - "Estimated allele frequency": [9, 2], + ("Allele frequency is >5% (or gene-specific cut off) in " + "population data e.g. gnomAD, UKB"): [9, 2], ("Null variant in a gene where LOF is known mechanism " "of disease\nand non-canonical splice variants where " "RNA analysis confirms\naberrant transcription"): [10, 2], @@ -606,7 +623,8 @@ def write_reporting_template(self, report_sheet_num) -> None: "Prevalence in affected > controls": [14, 2], ("In mutational hot spot and/or critical functional " "domain, without\nbenign variation"): [15, 2], - "Freq in controls eg gnomAD, low/absent or >5%": [16, 2], + ("Freq in controls eg gnomAD, low/absent (PM2) or allele " + "frequency is greater than expected for disorder (BS1)"): [16, 2], "Detected in trans/in cis with pathogenic variant": [17, 2], ("In frame protein length change/stop-loss variants, " "non repeat\nvs. repeat region"): [18, 2], @@ -645,10 +663,10 @@ def write_reporting_template(self, report_sheet_num) -> None: "PP2": [(22, 7)], "PP3": [(23, 7)], "PP4": [(24, 7)], - "BS1": [(9, 10)], + "BA1": [(9, 10)], "BS2": [(12, 10)], "BS3": [(13, 10)], - "BA1": [(16, 10)], + "BS1": [(16, 10)], "BP2": [(17, 10)], "BP3": [(18, 10)], "BS4": [(21, 10)], @@ -721,9 +739,9 @@ def write_reporting_template(self, report_sheet_num) -> None: 'E46C0A': ['G11', 'G12', 'G13', 'G14'], 'FFC000': ['G15', 'G16', 'G17', 'G18', 'G19', 'G20'], 'FFFF00': ['G21', 'G22', 'G23', 'G24'], - '00B0F0': ['J9', 'J12', 'J13', 'J21'], + '00B0F0': ['J12', 'J13', 'J16', 'J21'], '92D050': ['J17', 'J18', 'J22', 'J23', 'J24', 'J25'], - '0070C0': ['J16'], + '0070C0': ['J9'], 'FF0000': ['G10'], 'D9D9D9': ['G9', 'G25', 'H9', 'H25', 'I9', 'I25', 'J10', 'J11', 'J14', 'J15', 'J19', 'J20', @@ -1533,7 +1551,7 @@ def drop_down(self) -> None: report_sheet = wb[f"interpret_{sheet_num}"] cells_for_strength = ['H10', 'H11', 'H12', 'H13', 'H14', 'H15', 'H16', 'H17', 'H18', 'H19', 'H20', 'H21', - 'H22', 'H23', 'H24', 'K9', 'K12', 'K13', + 'H22', 'H23', 'H24', 'K12', 'K13', 'K16', 'K17', 'K18', 'K21', 'K22', 'K23', 'K24', 'K25'] strength_options = '"Very Strong, Strong, Moderate, \ @@ -1551,7 +1569,7 @@ def drop_down(self) -> None: prompt='Select from the list', title='Strength', sheet=report_sheet, - cells=['K16']) + cells=['K9']) # adding final classification dropdown report_sheet['B26'] = 'FINAL ACMG CLASSIFICATION'