From a1e2eae73a50bcb4af61f4a102bd788c72f55227 Mon Sep 17 00:00:00 2001 From: Kilian Ilius Date: Mon, 13 Jan 2025 14:46:26 +0100 Subject: [PATCH] NGSDExportSV adds DISEASE_GROUP to each SV --- src/BedpeAnnotateCounts/main.cpp | 1 + src/NGSDExportSV/main.cpp | 11 ++++++++++- .../data_out/NGSDExportSV/sv_deletion.bedpe | 14 +++++++------- .../data_out/NGSDExportSV/sv_duplication.bedpe | 8 ++++---- .../data_out/NGSDExportSV/sv_insertion.bedpe | 14 +++++++------- .../data_out/NGSDExportSV/sv_inversion.bedpe | 14 +++++++------- .../data_out/NGSDExportSV/sv_translocation.bedpe | 14 +++++++------- 7 files changed, 43 insertions(+), 33 deletions(-) diff --git a/src/BedpeAnnotateCounts/main.cpp b/src/BedpeAnnotateCounts/main.cpp index 663a09e19..768c4a319 100644 --- a/src/BedpeAnnotateCounts/main.cpp +++ b/src/BedpeAnnotateCounts/main.cpp @@ -32,6 +32,7 @@ class ConcreteTool addString("processing_system", "Processing system short name of the processed sample", false); changeLog(2022, 2, 11, "Initial commit."); + changeLog(2025, 1, 13, "Added annotation of counts and AF grouped by disease group"); } virtual void main() diff --git a/src/NGSDExportSV/main.cpp b/src/NGSDExportSV/main.cpp index 38ccc0376..cb1c05fe3 100644 --- a/src/NGSDExportSV/main.cpp +++ b/src/NGSDExportSV/main.cpp @@ -34,6 +34,7 @@ class ConcreteTool changeLog(2022, 2, 18, "Implemented tool."); changeLog(2022, 2, 24, "Changed SV break point output format."); changeLog(2024, 2, 7, "Added output of processing specific breakpoint density."); + changeLog(2025, 1, 13, "Added DISEASE_GROUP column to BEDPE files"); } void collapseSvDensity(QString output_folder, QHash> sv_density, const QStringList& chromosomes, const QByteArray& sys="") @@ -124,11 +125,12 @@ class ConcreteTool //create BEDPE columns for output file BedpeFile bedpe_structure; - bedpe_structure.setAnnotationHeaders(QList() << "TYPE" << "PROCESSING_SYSTEM" << "ID" << "FORMAT" << "FORMAT_VALUES"); + bedpe_structure.setAnnotationHeaders(QList() << "TYPE" << "PROCESSING_SYSTEM" << "ID" << "FORMAT" << "FORMAT_VALUES" << "DISEASE_GROUP"); int idx_type = bedpe_structure.annotationIndexByName("TYPE"); int idx_processing_system = bedpe_structure.annotationIndexByName("PROCESSING_SYSTEM"); int idx_sv_id = bedpe_structure.annotationIndexByName("ID"); int idx_format = bedpe_structure.annotationIndexByName("FORMAT"); + int idx_disease_group = bedpe_structure.annotationIndexByName("DISEASE_GROUP"); QList sv_types = QList() << StructuralVariantType::DEL << StructuralVariantType::DUP << StructuralVariantType::INS << StructuralVariantType::INV << StructuralVariantType::BND; @@ -285,12 +287,19 @@ class ConcreteTool } debug_time_get_sys += timer_get_sys.elapsed()/1000.0; + //get disease group + QByteArray disease_group; + disease_group = db.getValue("SELECT s.disease_group FROM `sv_callset` sc " + QByteArray() + + + "INNER JOIN `processed_sample` ps ON sc.processed_sample_id = ps.id " + + "INNER JOIN `sample` s ON ps.sample_id = s.id WHERE sc.id = :0", false, QString::number(cs_id)).toByteArray(); + //write to file timer_write_file.restart(); //update annotation QList sv_annotation = sv.annotations(); sv_annotation[idx_type] = StructuralVariantTypeToString(sv_type).toUtf8(); sv_annotation[idx_processing_system] = processing_system; + sv_annotation[idx_disease_group] = disease_group; if (sv_type == StructuralVariantType::BND) { //special handling: store both directions and add SV id diff --git a/src/tools-TEST/data_out/NGSDExportSV/sv_deletion.bedpe b/src/tools-TEST/data_out/NGSDExportSV/sv_deletion.bedpe index c8755d269..e356dfcde 100644 --- a/src/tools-TEST/data_out/NGSDExportSV/sv_deletion.bedpe +++ b/src/tools-TEST/data_out/NGSDExportSV/sv_deletion.bedpe @@ -1,9 +1,9 @@ ##fileformat=BEDPE ##sample_count=(hpHBOCv5, 4) -#CHROM_A START_A END_A CHROM_B START_B END_B TYPE PROCESSING_SYSTEM ID FORMAT FORMAT_VALUES -chr1 5 50 chr1 12000 13000 DEL hpHBOCv5 GT ./. -chr1 5 50 chr1 12000 13000 DEL hpHBOCv5 GT ./. -chr1 1000 1020 chr1 12000 13000 DEL hpHBOCv5 GT ./. -chr1 1000 1020 chr1 12000 13000 DEL hpHBOCv5 GT ./. -chr1 1000 1020 chr1 20000 20000 DEL hpHBOCv5 GT ./. -chr1 1000 1020 chr1 20000 20000 DEL hpHBOCv5 GT ./. +#CHROM_A START_A END_A CHROM_B START_B END_B TYPE PROCESSING_SYSTEM ID FORMAT FORMAT_VALUES DISEASE_GROUP +chr1 5 50 chr1 12000 13000 DEL hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs +chr1 5 50 chr1 12000 13000 DEL hpHBOCv5 GT ./. Neoplasms +chr1 1000 1020 chr1 12000 13000 DEL hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs +chr1 1000 1020 chr1 12000 13000 DEL hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs +chr1 1000 1020 chr1 20000 20000 DEL hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs +chr1 1000 1020 chr1 20000 20000 DEL hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs diff --git a/src/tools-TEST/data_out/NGSDExportSV/sv_duplication.bedpe b/src/tools-TEST/data_out/NGSDExportSV/sv_duplication.bedpe index 4e31d7b3d..76536dcb0 100644 --- a/src/tools-TEST/data_out/NGSDExportSV/sv_duplication.bedpe +++ b/src/tools-TEST/data_out/NGSDExportSV/sv_duplication.bedpe @@ -1,6 +1,6 @@ ##fileformat=BEDPE ##sample_count=(hpHBOCv5, 4) -#CHROM_A START_A END_A CHROM_B START_B END_B TYPE PROCESSING_SYSTEM ID FORMAT FORMAT_VALUES -chr1 100005 100050 chr1 112000 113000 DUP hpHBOCv5 GT ./. -chr1 101000 101020 chr1 112000 113000 DUP hpHBOCv5 GT ./. -chr1 101000 101020 chr1 120000 120000 DUP hpHBOCv5 GT ./. +#CHROM_A START_A END_A CHROM_B START_B END_B TYPE PROCESSING_SYSTEM ID FORMAT FORMAT_VALUES DISEASE_GROUP +chr1 100005 100050 chr1 112000 113000 DUP hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs +chr1 101000 101020 chr1 112000 113000 DUP hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs +chr1 101000 101020 chr1 120000 120000 DUP hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs diff --git a/src/tools-TEST/data_out/NGSDExportSV/sv_insertion.bedpe b/src/tools-TEST/data_out/NGSDExportSV/sv_insertion.bedpe index e27cccf3e..f97ecf965 100644 --- a/src/tools-TEST/data_out/NGSDExportSV/sv_insertion.bedpe +++ b/src/tools-TEST/data_out/NGSDExportSV/sv_insertion.bedpe @@ -1,9 +1,9 @@ ##fileformat=BEDPE ##sample_count=(hpHBOCv5, 4) -#CHROM_A START_A END_A CHROM_B START_B END_B TYPE PROCESSING_SYSTEM ID FORMAT FORMAT_VALUES -chr1 15482205 15482455 chr1 15482205 15482205 INS hpHBOCv5 GT ./. -chr1 15482205 15482455 chr1 15482205 15482205 INS hpHBOCv5 GT ./. -chr1 16482455 16482515 chr1 16482455 16482455 INS hpHBOCv5 GT ./. -chr1 16482455 16482515 chr1 16482455 16482455 INS hpHBOCv5 GT ./. -chr1 17482432 17482509 chr1 17482432 17482432 INS hpHBOCv5 GT ./. -chr1 17482432 17482509 chr1 17482432 17482432 INS hpHBOCv5 GT ./. +#CHROM_A START_A END_A CHROM_B START_B END_B TYPE PROCESSING_SYSTEM ID FORMAT FORMAT_VALUES DISEASE_GROUP +chr1 15482205 15482455 chr1 15482205 15482205 INS hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs +chr1 15482205 15482455 chr1 15482205 15482205 INS hpHBOCv5 GT ./. Diseases of the immune system +chr1 16482455 16482515 chr1 16482455 16482455 INS hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs +chr1 16482455 16482515 chr1 16482455 16482455 INS hpHBOCv5 GT ./. Diseases of the immune system +chr1 17482432 17482509 chr1 17482432 17482432 INS hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs +chr1 17482432 17482509 chr1 17482432 17482432 INS hpHBOCv5 GT ./. Diseases of the immune system diff --git a/src/tools-TEST/data_out/NGSDExportSV/sv_inversion.bedpe b/src/tools-TEST/data_out/NGSDExportSV/sv_inversion.bedpe index c36f321e2..153ae0a8f 100644 --- a/src/tools-TEST/data_out/NGSDExportSV/sv_inversion.bedpe +++ b/src/tools-TEST/data_out/NGSDExportSV/sv_inversion.bedpe @@ -1,9 +1,9 @@ ##fileformat=BEDPE ##sample_count=(hpHBOCv5, 4) -#CHROM_A START_A END_A CHROM_B START_B END_B TYPE PROCESSING_SYSTEM ID FORMAT FORMAT_VALUES -chr1 9100005 9100050 chr1 9112000 9113000 INV hpHBOCv5 GT ./. -chr1 9100005 9100050 chr1 9112000 9113000 INV hpHBOCv5 GT ./. -chr1 9101000 9101020 chr1 9112000 9113000 INV hpHBOCv5 GT ./. -chr1 9101000 9101020 chr1 9112000 9113000 INV hpHBOCv5 GT ./. -chr1 9101000 9101020 chr1 9120000 9120000 INV hpHBOCv5 GT ./. -chr1 9101000 9101020 chr1 9120000 9120000 INV hpHBOCv5 GT ./. +#CHROM_A START_A END_A CHROM_B START_B END_B TYPE PROCESSING_SYSTEM ID FORMAT FORMAT_VALUES DISEASE_GROUP +chr1 9100005 9100050 chr1 9112000 9113000 INV hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs +chr1 9100005 9100050 chr1 9112000 9113000 INV hpHBOCv5 GT ./. Neoplasms +chr1 9101000 9101020 chr1 9112000 9113000 INV hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs +chr1 9101000 9101020 chr1 9112000 9113000 INV hpHBOCv5 GT ./. Neoplasms +chr1 9101000 9101020 chr1 9120000 9120000 INV hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs +chr1 9101000 9101020 chr1 9120000 9120000 INV hpHBOCv5 GT ./. Neoplasms diff --git a/src/tools-TEST/data_out/NGSDExportSV/sv_translocation.bedpe b/src/tools-TEST/data_out/NGSDExportSV/sv_translocation.bedpe index e2f0ac8d6..b85294ec1 100644 --- a/src/tools-TEST/data_out/NGSDExportSV/sv_translocation.bedpe +++ b/src/tools-TEST/data_out/NGSDExportSV/sv_translocation.bedpe @@ -1,9 +1,9 @@ ##fileformat=BEDPE ##sample_count=(hpHBOCv5, 4) -#CHROM_A START_A END_A CHROM_B START_B END_B TYPE PROCESSING_SYSTEM ID FORMAT FORMAT_VALUES -chr1 9100005 9100050 chr5 4112000 4113000 BND hpHBOCv5 3 GT ./. -chr5 4112000 4113000 chr1 9100005 9100050 BND hpHBOCv5 3 GT ./. -chr1 9101000 9101020 chr5 4112000 4113000 BND hpHBOCv5 1 GT ./. -chr5 4112000 4113000 chr1 9101000 9101020 BND hpHBOCv5 1 GT ./. -chr1 9101000 9101020 chr5 4120000 4120000 BND hpHBOCv5 2 GT ./. -chr5 4120000 4120000 chr1 9101000 9101020 BND hpHBOCv5 2 GT ./. +#CHROM_A START_A END_A CHROM_B START_B END_B TYPE PROCESSING_SYSTEM ID FORMAT FORMAT_VALUES DISEASE_GROUP +chr1 9100005 9100050 chr5 4112000 4113000 BND hpHBOCv5 3 GT ./. Diseases of the blood or blood-forming organs +chr5 4112000 4113000 chr1 9100005 9100050 BND hpHBOCv5 3 GT ./. Diseases of the blood or blood-forming organs +chr1 9101000 9101020 chr5 4112000 4113000 BND hpHBOCv5 1 GT ./. Diseases of the blood or blood-forming organs +chr5 4112000 4113000 chr1 9101000 9101020 BND hpHBOCv5 1 GT ./. Diseases of the blood or blood-forming organs +chr1 9101000 9101020 chr5 4120000 4120000 BND hpHBOCv5 2 GT ./. Diseases of the blood or blood-forming organs +chr5 4120000 4120000 chr1 9101000 9101020 BND hpHBOCv5 2 GT ./. Diseases of the blood or blood-forming organs