Skip to content

Commit

Permalink
Added Count annotation by disease group to BedpeAnnotateCounts/main.cpp
Browse files Browse the repository at this point in the history
  • Loading branch information
Kilian Ilius committed Jan 14, 2025
1 parent a1e2eae commit 66b1434
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 1 deletion.
62 changes: 62 additions & 0 deletions src/BedpeAnnotateCounts/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ class ConcreteTool
{
}

struct GroupCount
{
int hom_count = 0;
int het_count = 0;
};

virtual void setup()
{
setDescription("Annotates a BEDPE file with NGSD count information of zipped BEDPE flat files.");
Expand Down Expand Up @@ -71,6 +77,7 @@ class ConcreteTool
int i_ngsd_hom = bedpe_input_file.annotationIndexByName("NGSD_HOM", false);
int i_ngsd_het = bedpe_input_file.annotationIndexByName("NGSD_HET", false);
int i_ngsd_af = bedpe_input_file.annotationIndexByName("NGSD_AF", false);
int i_disease_group = bedpe_input_file.annotationIndexByName("DISEASE_GROUP", false);

// create text buffer for output file
QByteArrayList output_buffer;
Expand Down Expand Up @@ -99,6 +106,12 @@ class ConcreteTool
additional_columns.append("");
header.append("NGSD_AF");
}
if (i_disease_group < 0)
{
i_disease_group = header.size();
additional_columns.append("");
header.append("DISEASE_GROUP");
}
output_buffer << "#CHROM_A\tSTART_A\tEND_A\tCHROM_B\tSTART_B\tEND_B\t" + header.join("\t") + "\n";

// iterate over all structural variants
Expand Down Expand Up @@ -131,6 +144,7 @@ class ConcreteTool
int ngsd_count_hom = 0;
int ngsd_count_het = 0;
QByteArrayList matches = count_indices[sv.type()].getMatchingLines(sv_region.chr(), sv_region.start(), sv_region.end(), true);
QHash<QByteArray, GroupCount> count_per_group;

// check resulting lines for exact matches
foreach (const QByteArray& match, matches)
Expand All @@ -151,10 +165,21 @@ class ConcreteTool
if (columns[idx_format_ + 1].split(':').at(0).trimmed() == "1/1")
{
ngsd_count_hom++;

//count by disease group
if (!columns[idx_format_ + 2].isEmpty())
{
count_per_group[columns[idx_format_ + 2]].hom_count += 1;
}
}
else
{
ngsd_count_het++;
//count by disease group
if (!columns[idx_format_ + 2].isEmpty())
{
count_per_group[columns[idx_format_ + 2]].het_count += 1;
}
}
}
}
Expand All @@ -172,10 +197,22 @@ class ConcreteTool
if (columns[idx_format_ + 1].split(':').at(0).trimmed() == "1/1")
{
ngsd_count_hom++;

//count by disease group
if (!columns[idx_format_ + 2].isEmpty())
{
count_per_group[columns[idx_format_ + 2]].hom_count += 1;
}
}
else
{
ngsd_count_het++;

//count by disease group
if (!columns[idx_format_ + 2].isEmpty())
{
count_per_group[columns[idx_format_ + 2]].het_count += 1;
}
}
bnd_ids.insert(bnd_id);
}
Expand All @@ -193,10 +230,22 @@ class ConcreteTool
if (columns[idx_format_ + 1].split(':').at(0).trimmed() == "1/1")
{
ngsd_count_hom++;

//count by disease group
if (!columns[idx_format_ + 2].isEmpty())
{
count_per_group[columns[idx_format_ + 2]].hom_count += 1;
}
}
else
{
ngsd_count_het++;

//count by disease group
if (!columns[idx_format_ + 2].isEmpty())
{
count_per_group[columns[idx_format_ + 2]].het_count += 1;
}
}
}
}
Expand All @@ -210,6 +259,19 @@ class ConcreteTool
double ngsd_af = std::min(1.0, (double) (2.0 * ngsd_count_hom + ngsd_count_het) / (double) (sample_count_ * 2.0));
sv_annotations[i_ngsd_af] = QByteArray::number(ngsd_af, 'f', 4);
}

// annotate counts per disease group
foreach (const QByteArray& group, count_per_group.keys())
{
if (count_per_group[group].hom_count > 0 || count_per_group[group].het_count > 0)
{
sv_annotations[i_disease_group] = group
+ "="
+ QByteArray::number(count_per_group[group].hom_count, 0)
+ ","
+ QByteArray::number(count_per_group[group].het_count, 0);
}
}
}

//write annotation back to BedpeLine
Expand Down
9 changes: 8 additions & 1 deletion src/NGSDExportSV/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,13 @@ class ConcreteTool
out << "##sample_count=(" + key + ", " + QString::number(sample_counts.value(key)) + ")\n";
}

//store disease_groups
QStringList disease_groups = db.getEnum("sample", "disease_group");
for(int i = 0; i < disease_groups.size(); i++)
{
out << "##INFO=<ID=GSC" << QByteArray::number(i + 1).rightJustified(2, '0') << ",Number=2,Type=Integer,Description=\"" << "Homozygous/Heterozygous variant counts in NGSD for " << disease_groups[i].toLower() << ".\">\n";
}

//write header
out << "#CHROM_A\tSTART_A\tEND_A\tCHROM_B\tSTART_B\tEND_B\t" + bedpe_structure.annotationHeaders().join('\t') + "\n";

Expand Down Expand Up @@ -299,7 +306,7 @@ class ConcreteTool
QList<QByteArray> sv_annotation = sv.annotations();
sv_annotation[idx_type] = StructuralVariantTypeToString(sv_type).toUtf8();
sv_annotation[idx_processing_system] = processing_system;
sv_annotation[idx_disease_group] = disease_group;
sv_annotation[idx_disease_group] = "GSC" + QByteArray::number(disease_groups.indexOf(disease_group) + 1).rightJustified(2, '0');
if (sv_type == StructuralVariantType::BND)
{
//special handling: store both directions and add SV id
Expand Down

0 comments on commit 66b1434

Please sign in to comment.