Skip to content

Commit

Permalink
NGSDExportSV adds DISEASE_GROUP to each SV
Browse files Browse the repository at this point in the history
  • Loading branch information
Kilian Ilius committed Jan 13, 2025
1 parent 5831b4d commit a1e2eae
Show file tree
Hide file tree
Showing 7 changed files with 43 additions and 33 deletions.
1 change: 1 addition & 0 deletions src/BedpeAnnotateCounts/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class ConcreteTool
addString("processing_system", "Processing system short name of the processed sample", false);

changeLog(2022, 2, 11, "Initial commit.");
changeLog(2025, 1, 13, "Added annotation of counts and AF grouped by disease group");
}

virtual void main()
Expand Down
11 changes: 10 additions & 1 deletion src/NGSDExportSV/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class ConcreteTool
changeLog(2022, 2, 18, "Implemented tool.");
changeLog(2022, 2, 24, "Changed SV break point output format.");
changeLog(2024, 2, 7, "Added output of processing specific breakpoint density.");
changeLog(2025, 1, 13, "Added DISEASE_GROUP column to BEDPE files");
}

void collapseSvDensity(QString output_folder, QHash<Chromosome, QMap<int,int>> sv_density, const QStringList& chromosomes, const QByteArray& sys="")
Expand Down Expand Up @@ -124,11 +125,12 @@ class ConcreteTool

//create BEDPE columns for output file
BedpeFile bedpe_structure;
bedpe_structure.setAnnotationHeaders(QList<QByteArray>() << "TYPE" << "PROCESSING_SYSTEM" << "ID" << "FORMAT" << "FORMAT_VALUES");
bedpe_structure.setAnnotationHeaders(QList<QByteArray>() << "TYPE" << "PROCESSING_SYSTEM" << "ID" << "FORMAT" << "FORMAT_VALUES" << "DISEASE_GROUP");
int idx_type = bedpe_structure.annotationIndexByName("TYPE");
int idx_processing_system = bedpe_structure.annotationIndexByName("PROCESSING_SYSTEM");
int idx_sv_id = bedpe_structure.annotationIndexByName("ID");
int idx_format = bedpe_structure.annotationIndexByName("FORMAT");
int idx_disease_group = bedpe_structure.annotationIndexByName("DISEASE_GROUP");

QList<StructuralVariantType> sv_types = QList<StructuralVariantType>() << StructuralVariantType::DEL << StructuralVariantType::DUP << StructuralVariantType::INS
<< StructuralVariantType::INV << StructuralVariantType::BND;
Expand Down Expand Up @@ -285,12 +287,19 @@ class ConcreteTool
}
debug_time_get_sys += timer_get_sys.elapsed()/1000.0;

//get disease group
QByteArray disease_group;
disease_group = db.getValue("SELECT s.disease_group FROM `sv_callset` sc " + QByteArray() +
+ "INNER JOIN `processed_sample` ps ON sc.processed_sample_id = ps.id "
+ "INNER JOIN `sample` s ON ps.sample_id = s.id WHERE sc.id = :0", false, QString::number(cs_id)).toByteArray();

//write to file
timer_write_file.restart();
//update annotation
QList<QByteArray> sv_annotation = sv.annotations();
sv_annotation[idx_type] = StructuralVariantTypeToString(sv_type).toUtf8();
sv_annotation[idx_processing_system] = processing_system;
sv_annotation[idx_disease_group] = disease_group;
if (sv_type == StructuralVariantType::BND)
{
//special handling: store both directions and add SV id
Expand Down
14 changes: 7 additions & 7 deletions src/tools-TEST/data_out/NGSDExportSV/sv_deletion.bedpe
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
##fileformat=BEDPE
##sample_count=(hpHBOCv5, 4)
#CHROM_A START_A END_A CHROM_B START_B END_B TYPE PROCESSING_SYSTEM ID FORMAT FORMAT_VALUES
chr1 5 50 chr1 12000 13000 DEL hpHBOCv5 GT ./.
chr1 5 50 chr1 12000 13000 DEL hpHBOCv5 GT ./.
chr1 1000 1020 chr1 12000 13000 DEL hpHBOCv5 GT ./.
chr1 1000 1020 chr1 12000 13000 DEL hpHBOCv5 GT ./.
chr1 1000 1020 chr1 20000 20000 DEL hpHBOCv5 GT ./.
chr1 1000 1020 chr1 20000 20000 DEL hpHBOCv5 GT ./.
#CHROM_A START_A END_A CHROM_B START_B END_B TYPE PROCESSING_SYSTEM ID FORMAT FORMAT_VALUES DISEASE_GROUP
chr1 5 50 chr1 12000 13000 DEL hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs
chr1 5 50 chr1 12000 13000 DEL hpHBOCv5 GT ./. Neoplasms
chr1 1000 1020 chr1 12000 13000 DEL hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs
chr1 1000 1020 chr1 12000 13000 DEL hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs
chr1 1000 1020 chr1 20000 20000 DEL hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs
chr1 1000 1020 chr1 20000 20000 DEL hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs
8 changes: 4 additions & 4 deletions src/tools-TEST/data_out/NGSDExportSV/sv_duplication.bedpe
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
##fileformat=BEDPE
##sample_count=(hpHBOCv5, 4)
#CHROM_A START_A END_A CHROM_B START_B END_B TYPE PROCESSING_SYSTEM ID FORMAT FORMAT_VALUES
chr1 100005 100050 chr1 112000 113000 DUP hpHBOCv5 GT ./.
chr1 101000 101020 chr1 112000 113000 DUP hpHBOCv5 GT ./.
chr1 101000 101020 chr1 120000 120000 DUP hpHBOCv5 GT ./.
#CHROM_A START_A END_A CHROM_B START_B END_B TYPE PROCESSING_SYSTEM ID FORMAT FORMAT_VALUES DISEASE_GROUP
chr1 100005 100050 chr1 112000 113000 DUP hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs
chr1 101000 101020 chr1 112000 113000 DUP hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs
chr1 101000 101020 chr1 120000 120000 DUP hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs
14 changes: 7 additions & 7 deletions src/tools-TEST/data_out/NGSDExportSV/sv_insertion.bedpe
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
##fileformat=BEDPE
##sample_count=(hpHBOCv5, 4)
#CHROM_A START_A END_A CHROM_B START_B END_B TYPE PROCESSING_SYSTEM ID FORMAT FORMAT_VALUES
chr1 15482205 15482455 chr1 15482205 15482205 INS hpHBOCv5 GT ./.
chr1 15482205 15482455 chr1 15482205 15482205 INS hpHBOCv5 GT ./.
chr1 16482455 16482515 chr1 16482455 16482455 INS hpHBOCv5 GT ./.
chr1 16482455 16482515 chr1 16482455 16482455 INS hpHBOCv5 GT ./.
chr1 17482432 17482509 chr1 17482432 17482432 INS hpHBOCv5 GT ./.
chr1 17482432 17482509 chr1 17482432 17482432 INS hpHBOCv5 GT ./.
#CHROM_A START_A END_A CHROM_B START_B END_B TYPE PROCESSING_SYSTEM ID FORMAT FORMAT_VALUES DISEASE_GROUP
chr1 15482205 15482455 chr1 15482205 15482205 INS hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs
chr1 15482205 15482455 chr1 15482205 15482205 INS hpHBOCv5 GT ./. Diseases of the immune system
chr1 16482455 16482515 chr1 16482455 16482455 INS hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs
chr1 16482455 16482515 chr1 16482455 16482455 INS hpHBOCv5 GT ./. Diseases of the immune system
chr1 17482432 17482509 chr1 17482432 17482432 INS hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs
chr1 17482432 17482509 chr1 17482432 17482432 INS hpHBOCv5 GT ./. Diseases of the immune system
14 changes: 7 additions & 7 deletions src/tools-TEST/data_out/NGSDExportSV/sv_inversion.bedpe
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
##fileformat=BEDPE
##sample_count=(hpHBOCv5, 4)
#CHROM_A START_A END_A CHROM_B START_B END_B TYPE PROCESSING_SYSTEM ID FORMAT FORMAT_VALUES
chr1 9100005 9100050 chr1 9112000 9113000 INV hpHBOCv5 GT ./.
chr1 9100005 9100050 chr1 9112000 9113000 INV hpHBOCv5 GT ./.
chr1 9101000 9101020 chr1 9112000 9113000 INV hpHBOCv5 GT ./.
chr1 9101000 9101020 chr1 9112000 9113000 INV hpHBOCv5 GT ./.
chr1 9101000 9101020 chr1 9120000 9120000 INV hpHBOCv5 GT ./.
chr1 9101000 9101020 chr1 9120000 9120000 INV hpHBOCv5 GT ./.
#CHROM_A START_A END_A CHROM_B START_B END_B TYPE PROCESSING_SYSTEM ID FORMAT FORMAT_VALUES DISEASE_GROUP
chr1 9100005 9100050 chr1 9112000 9113000 INV hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs
chr1 9100005 9100050 chr1 9112000 9113000 INV hpHBOCv5 GT ./. Neoplasms
chr1 9101000 9101020 chr1 9112000 9113000 INV hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs
chr1 9101000 9101020 chr1 9112000 9113000 INV hpHBOCv5 GT ./. Neoplasms
chr1 9101000 9101020 chr1 9120000 9120000 INV hpHBOCv5 GT ./. Diseases of the blood or blood-forming organs
chr1 9101000 9101020 chr1 9120000 9120000 INV hpHBOCv5 GT ./. Neoplasms
14 changes: 7 additions & 7 deletions src/tools-TEST/data_out/NGSDExportSV/sv_translocation.bedpe
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
##fileformat=BEDPE
##sample_count=(hpHBOCv5, 4)
#CHROM_A START_A END_A CHROM_B START_B END_B TYPE PROCESSING_SYSTEM ID FORMAT FORMAT_VALUES
chr1 9100005 9100050 chr5 4112000 4113000 BND hpHBOCv5 3 GT ./.
chr5 4112000 4113000 chr1 9100005 9100050 BND hpHBOCv5 3 GT ./.
chr1 9101000 9101020 chr5 4112000 4113000 BND hpHBOCv5 1 GT ./.
chr5 4112000 4113000 chr1 9101000 9101020 BND hpHBOCv5 1 GT ./.
chr1 9101000 9101020 chr5 4120000 4120000 BND hpHBOCv5 2 GT ./.
chr5 4120000 4120000 chr1 9101000 9101020 BND hpHBOCv5 2 GT ./.
#CHROM_A START_A END_A CHROM_B START_B END_B TYPE PROCESSING_SYSTEM ID FORMAT FORMAT_VALUES DISEASE_GROUP
chr1 9100005 9100050 chr5 4112000 4113000 BND hpHBOCv5 3 GT ./. Diseases of the blood or blood-forming organs
chr5 4112000 4113000 chr1 9100005 9100050 BND hpHBOCv5 3 GT ./. Diseases of the blood or blood-forming organs
chr1 9101000 9101020 chr5 4112000 4113000 BND hpHBOCv5 1 GT ./. Diseases of the blood or blood-forming organs
chr5 4112000 4113000 chr1 9101000 9101020 BND hpHBOCv5 1 GT ./. Diseases of the blood or blood-forming organs
chr1 9101000 9101020 chr5 4120000 4120000 BND hpHBOCv5 2 GT ./. Diseases of the blood or blood-forming organs
chr5 4120000 4120000 chr1 9101000 9101020 BND hpHBOCv5 2 GT ./. Diseases of the blood or blood-forming organs

0 comments on commit a1e2eae

Please sign in to comment.