From 8e645db1684edc1b3b1196e67168b5afb1c7e879 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leon=20Sch=C3=BCtz?= Date: Thu, 23 Jan 2025 09:34:07 +0100 Subject: [PATCH] added option to calculate error rates on duplication groups separately --- calculateMRD.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/calculateMRD.py b/calculateMRD.py index e091f90..18ccddc 100644 --- a/calculateMRD.py +++ b/calculateMRD.py @@ -22,6 +22,7 @@ def parse_args(): parser.add_argument("--blacklist", type=str, help="VCF of variants which should be excluded for the background error rate", default="") parser.add_argument("--remove_off_target", action="store_true", help="Remove tumor off-target variants") parser.add_argument("--keep_indels", action="store_true", help="Do not remove InDels.") + parser.add_argument("--keep_duplication_groups_separate", action="store_true", help="Calculate errors/MRD on the different duplication set separately.") args = parser.parse_args() return args @@ -161,10 +162,11 @@ def main(): combined_tables[dup_level] = dedup_tables[dup_level][["A", "C", "G", "T", "DEL", "INS", "DP", "DP_HQ"]] included_levels = [dup_level] print(dup_level) - for idx in range(dup_levels.index(dup_level) + 1, len(dup_levels)): - combined_tables[dup_level] = combined_tables[dup_level].add(dedup_tables[dup_levels[idx]][["A", "C", "G", "T", "DEL", "INS", "DP", "DP_HQ"]], fill_value=0) - included_levels.append(dup_levels[idx]) - print(" - " + dup_levels[idx]) + if not args.keep_duplication_groups_separate: + for idx in range(dup_levels.index(dup_level) + 1, len(dup_levels)): + combined_tables[dup_level] = combined_tables[dup_level].add(dedup_tables[dup_levels[idx]][["A", "C", "G", "T", "DEL", "INS", "DP", "DP_HQ"]], fill_value=0) + included_levels.append(dup_levels[idx]) + print(" - " + dup_levels[idx]) # get combined ref column ref_column = pd.concat([dedup_tables[dedup]["REF"] for dedup in included_levels])