Skip to content

Commit

Permalink
NGSDAddVariantsSomatic/NGSDExportAnnotationData: added support for tu…
Browse files Browse the repository at this point in the history
…mor-only variants (#334)
  • Loading branch information
marc-sturm committed Jan 29, 2025
1 parent 8b127be commit f6ec58c
Show file tree
Hide file tree
Showing 8 changed files with 213 additions and 95 deletions.
63 changes: 31 additions & 32 deletions src/NGSDAddVariantsSomatic/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,12 @@ class ConcreteTool
{
setDescription("Imports variants of a tumor-normal processed sample into the NGSD.");
addString("t_ps", "Tumor processed sample name", false);
addString("n_ps", "Normal processed sample name", false);
//optional
addString("n_ps", "Normal processed sample name", true);
addInfile("var", "Small variant list (i.e. SNVs and small INDELs) in GSvar format (as produced by megSAP).", true, true);
addFlag("var_force", "Force import of detected small variants, even if already imported.");
addInfile("cnv", "CNV list in TSV format (as produced by megSAP).", true, true);
addFlag("cnv_force", "Force import of CNVs, even if already imported.");
addInfile("sv", "SV list in TSV format (as produced by megSAP).", true, true);
addFlag("sv_force", "Force import of SVs, even if already imported.");
addFlag("force", "Force import of variants, even if already imported.");
addOutfile("out", "Output file. If unset, writes to STDOUT.", true);
addFlag("test", "Uses the test database instead of on the production database.");
addFlag("debug", "Enable verbose debug output.");
Expand All @@ -42,26 +40,30 @@ class ConcreteTool
QString filename = getInfile("var");
if(filename=="") return;

QString ps_full_name = t_ps_name + "-" + n_ps_name;
bool is_tumor_only = n_ps_name.isEmpty();
QString analysis_name = t_ps_name + (is_tumor_only ? "" : "-" + n_ps_name);

out << endl;
out << "### importing small variants for " << ps_full_name << " ###" << endl;
out << "### importing small variants for " << analysis_name << " ###" << endl;
out << "filename: " << filename << endl;

QString t_ps_id = db.processedSampleId(t_ps_name);
QString n_ps_id = db.processedSampleId(n_ps_name);

int report_conf_id = db.somaticReportConfigId(t_ps_id, n_ps_id);
QString n_ps_id = is_tumor_only ? "" : db.processedSampleId(n_ps_name);
QString dv_where = "processed_sample_id_tumor=" + t_ps_id + " AND processed_sample_id_normal"+(is_tumor_only ? " IS NULL" : "=" + n_ps_id);

//DO NOT IMPORT Anything if a report config exists and contains small variants
if(report_conf_id != -1)
//do not anything if a report config exists and contains small variants
if (!is_tumor_only)
{
SqlQuery query = db.getQuery();
query.exec("SELECT * FROM somatic_report_configuration_variant WHERE somatic_report_configuration_id=" + QString::number(report_conf_id));
if(query.size()>0)
int report_conf_id = db.somaticReportConfigId(t_ps_id, n_ps_id);
if(report_conf_id != -1)
{
out << "Skipped import of small variants for sample " << ps_full_name << ": a somatic report configuration with small variants exists for this sample!" << endl;
return;
SqlQuery query = db.getQuery();
query.exec("SELECT * FROM somatic_report_configuration_variant WHERE somatic_report_configuration_id=" + QString::number(report_conf_id));
if(query.size()>0)
{
out << "Skipped import of small variants for sample " << analysis_name << ": a somatic report configuration with small variants exists for this sample!" << endl;
return;
}
}
}

Expand All @@ -70,11 +72,11 @@ class ConcreteTool
QTime sub_timer;
QStringList sub_times;

int count_old = db.getValue("SELECT count(*) FROM detected_somatic_variant WHERE processed_sample_id_tumor=" + t_ps_id + " AND processed_sample_id_normal=" + n_ps_id).toInt();
int count_old = db.getValue("SELECT count(*) FROM detected_somatic_variant WHERE "+dv_where).toInt();
out << "Found " << count_old << " variants already imported into NGSD!" << endl;
if(count_old>0 && !var_force)
{
THROW(ArgumentException, "Variants were already imported for '" + ps_full_name + "'. Use the flag '-var_force' to overwrite them.");
THROW(ArgumentException, "Variants were already imported for '" + analysis_name + "'. Use the flag '-force' to overwrite them.");
}

//Remove old variants
Expand All @@ -84,7 +86,7 @@ class ConcreteTool
sub_timer.start();

SqlQuery query = db.getQuery();
query.exec("DELETE FROM detected_somatic_variant WHERE processed_sample_id_tumor=" + t_ps_id +" AND processed_sample_id_normal=" + n_ps_id);
query.exec("DELETE FROM detected_somatic_variant WHERE "+dv_where);
out << "Deleted previous somatic variants." << endl;
sub_times << ("Deleted previous detected somatic variants took: " + Helper::elapsedTime(sub_timer));
}
Expand Down Expand Up @@ -114,7 +116,7 @@ class ConcreteTool
int i_qual = variants.annotationIndexByName("quality");

SqlQuery q_insert = db.getQuery();
q_insert.prepare("INSERT INTO detected_somatic_variant (processed_sample_id_tumor, processed_sample_id_normal, variant_id, variant_frequency, depth, quality_snp) VALUES (" + t_ps_id +", "+ n_ps_id +", :0, :1, :2, :3)");
q_insert.prepare("INSERT INTO detected_somatic_variant (processed_sample_id_tumor, processed_sample_id_normal, variant_id, variant_frequency, depth, quality_snp) VALUES (" + t_ps_id +", " + (is_tumor_only ? "NULL" : n_ps_id) + ", :0, :1, :2, :3)");

db.transaction();
for(int i=0; i<variants.count(); ++i)
Expand Down Expand Up @@ -388,7 +390,6 @@ class ConcreteTool
{
out << "Import took: " << Helper::elapsedTime(timer) << endl;
}

}

virtual void main()
Expand All @@ -402,22 +403,20 @@ class ConcreteTool
QString n_ps = getString("n_ps");
bool debug = getFlag("debug");
bool no_time = getFlag("no_time");
bool var_force = getFlag("var_force");
bool cnv_force = getFlag("cnv_force");
bool sv_force = getFlag("sv_force");
bool force = getFlag("force");

//prevent tumor samples from being imported into the germline variant tables
//prevent import of sample is not flagged as tumor
SampleData sample_data = db.getSampleData(db.sampleId(t_ps));
if (!sample_data.is_tumor)
{
THROW(ArgumentException, "Cannot import variant data for sample " + t_ps +"-" + n_ps + ": the sample is not a somatic sample according to NGSD!");
}
if (!sample_data.is_tumor) THROW(ArgumentException, "Cannot import variant data for sample " + t_ps +"-" + n_ps + ": the sample is not a somatic sample according to NGSD!");

importSmallVariants(db, stream, t_ps, n_ps, no_time, var_force);
importSmallVariants(db, stream, t_ps, n_ps, no_time, force);

importCNVs(db, stream, t_ps, n_ps, debug, no_time, cnv_force, 15.0);
if (!n_ps.isEmpty()) //tumor-normal pair
{
importCNVs(db, stream, t_ps, n_ps, debug, no_time, force, 15.0);

importSVs(db , stream , t_ps, n_ps, debug, no_time, sv_force);
importSVs(db , stream , t_ps, n_ps, debug, no_time, force);
}
}
private:
int variantQuality(const Variant& variant, int i_qual) const
Expand Down
60 changes: 35 additions & 25 deletions src/NGSDExportAnnotationData/ExportWorker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ void ExportWorker::run()
emit log(chr_, "Time for for database update (variant counts): " + getTimeString(ngsd_count_update));
}

//export somatic (tumor-normal and tumor-only)
if (!params_.somatic.isEmpty())
{
QString tmp_vcf = params_.tempVcf(chr_, "somatic");
Expand All @@ -274,7 +275,7 @@ void ExportWorker::run()

// define query to get the NGSD counts for each variant
SqlQuery ngsd_count_query = db.getQuery();
ngsd_count_query.prepare("SELECT s.id, dsv.processed_sample_id_tumor, p.name FROM detected_somatic_variant as dsv, processed_sample ps, sample as s, project as p WHERE ps.project_id=p.id AND ps.quality!='bad' AND dsv.processed_sample_id_tumor=ps.id AND ps.sample_id=s.id AND s.tumor='1' AND dsv.variant_id=:0");
ngsd_count_query.prepare("SELECT s.id, dsv.processed_sample_id_tumor, p.name, dsv.processed_sample_id_normal FROM detected_somatic_variant as dsv, processed_sample ps, sample as s, project as p WHERE ps.project_id=p.id AND ps.quality!='bad' AND dsv.processed_sample_id_tumor=ps.id AND ps.sample_id=s.id AND s.tumor='1' AND dsv.variant_id=:0");

//open output stream
QSharedPointer<QFile> vcf_file = Helper::openFileForWriting(tmp_vcf, true);
Expand Down Expand Up @@ -308,31 +309,36 @@ void ExportWorker::run()

//process variants
tmp_timer.start();
int somatic_count_to = 0;
QSet<int> s_ids_to_done;
QMap<QByteArray, int> project_map;
QSet<QByteArray> processed_ps_ids;
QSet<QByteArray> processed_s_ids;
QSet<int> s_ids_done;
while(ngsd_count_query.next())
{
QByteArray current_sample = ngsd_count_query.value(0).toByteArray();
QByteArray current_ps_id = ngsd_count_query.value(1).toByteArray();
QByteArray current_project = ngsd_count_query.value(2).toByteArray();

//skip already seen processed samples
// (there could be several variants because of indel window,
// but we want to process only one)
if (processed_ps_ids.contains(current_ps_id)) continue;
processed_ps_ids.insert(current_ps_id);

//skip already seen samples for general statistics
// (there could be several processings of the same sample because of
// different processing systems or because of experment repeats due to
// quality issues)
if (processed_s_ids.contains(current_sample)) continue;
processed_s_ids.insert(current_sample);

// count
if(!project_map.contains(current_project)) project_map.insert(current_project,0);
++project_map[current_project];
int current_sample = ngsd_count_query.value(0).toInt();

bool is_tumor_normal = !ngsd_count_query.value(3).isNull();
QTextStream(stderr) << variant.toString() << " " << is_tumor_normal << " " << current_sample << endl;
if (is_tumor_normal)
{
//skip already seen samples for general statistics (there could be several processings of the same sample because of different processing systems or because of experment repeats due to quality issues)
if (s_ids_done.contains(current_sample)) continue;
s_ids_done.insert(current_sample);

// count
QByteArray current_project = ngsd_count_query.value(2).toByteArray();
if(!project_map.contains(current_project)) project_map.insert(current_project,0);
++project_map[current_project];
}
else
{
//skip already seen samples for general statistics (there could be several processings of the same sample because of different processing systems or because of experment repeats due to quality issues)
if (s_ids_to_done.contains(current_sample)) continue;
s_ids_to_done.insert(current_sample);

++somatic_count_to;
QTextStream(stderr) << somatic_count_to << endl;
}
}

// calculate somatic count
Expand All @@ -357,9 +363,11 @@ void ExportWorker::run()
{
info_column.append("SOM_P=.");
}

}

if (somatic_count_to > 0)
{
info_column.append("SOM_TO_C=" + QByteArray::number(somatic_count_to));
}

//Add somatic VICC interpretation
if(db.getSomaticViccId(variant) != -1)
Expand Down Expand Up @@ -469,6 +477,8 @@ void ExportWorker::run()
//QTextStream(stdout) << "ExportWorker:error " << chr_ << " message:" << e.message() << endl;
emit error(chr_, e.message());
}


}


Expand Down
5 changes: 3 additions & 2 deletions src/NGSDExportAnnotationData/ThreadCoordinator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,9 @@ void ThreadCoordinator::writeSomaticVcf()
}

// write info column descriptions
vcf_stream << "##INFO=<ID=SOM_C,Number=1,Type=Integer,Description=\"Somatic variant count in the NGSD.\">\n";
vcf_stream << "##INFO=<ID=SOM_P,Number=.,Type=String,Description=\"Project names of project containing this somatic variant in the NGSD.\">\n";
vcf_stream << "##INFO=<ID=SOM_C,Number=1,Type=Integer,Description=\"Somatic variant count (tumor-normal) in the NGSD.\">\n";
vcf_stream << "##INFO=<ID=SOM_TO_C,Number=1,Type=Integer,Description=\"Somatic variant count (tumor-only) in the NGSD.\">\n";
vcf_stream << "##INFO=<ID=SOM_P,Number=.,Type=String,Description=\"Project names of project containing this somatic variant (tumor-normal) in the NGSD.\">\n";
vcf_stream << "##INFO=<ID=SOM_VICC,Number=1,Type=String,Description=\"Somatic variant interpretation according VICC standard in the NGSD.\">\n";
vcf_stream << "##INFO=<ID=SOM_VICC_COMMENT,Number=1,Type=String,Description=\"Somatic VICC interpretation comment in the NGSD.\">\n";
if(params_.vicc_config_details)
Expand Down
41 changes: 35 additions & 6 deletions src/tools-TEST/NGSDAddVariantsSomatic_Test.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ TEST_CLASS(NGSDAddVariantsSomatic_Test)
{
Q_OBJECT
private slots:
void test_addSmallVariants()
void test_small_variants()
{
if (!NGSD::isAvailable(true)) SKIP("Test needs access to the NGSD test database!");

Expand All @@ -28,12 +28,38 @@ private slots:
S_EQUAL(table.row(2).asString(';'), "3;8;7;3;0.1254;639;330");

//force variant import
EXECUTE("NGSDAddVariantsSomatic", "-test -no_time -t_ps DX184894_01 -n_ps DX184263_01 -var_force -var " + TESTDATA("data_in/NGSDAddVariantsSomatic_in1.GSvar"));
//should fail because variants already exist an var_force is unset
EXECUTE("NGSDAddVariantsSomatic", "-test -no_time -t_ps DX184894_01 -n_ps DX184263_01 -force -var " + TESTDATA("data_in/NGSDAddVariantsSomatic_in1.GSvar"));
//should fail because variants already exist an force is unset
EXECUTE_FAIL("NGSDAddVariantsSomatic", "-test -no_time -t_ps DX184894_01 -n_ps DX184263_01 -var " + TESTDATA("data_in/NGSDAddVariantsSomatic_in1.GSvar"));
}

void test_addCNvs()
void test_small_variants_tumor_only()
{
if (!NGSD::isAvailable(true)) SKIP("Test needs access to the NGSD test database!");

NGSD db(true);
db.init();
db.executeQueriesFromFile(TESTDATA("data_in/NGSDAddVariantsSomatic_init.sql"));
EXECUTE("NGSDAddVariantsSomatic", "-test -no_time -t_ps DX184894_01 -var " + TESTDATA("data_in/NGSDAddVariantsSomatic_in3.GSvar"));

S_EQUAL(db.variant("1").toString(), "chr2:178096717-178096717 T>C");
S_EQUAL(db.variant("2").toString(), "chr3:138456487-138456488 AT>-");
S_EQUAL(db.variant("3").toString(), "chr16:56870524-56870524 A>C");

//Check variant entries in detected_somatic_variants
DBTable table = db.createTable("test", "SELECT * FROM detected_somatic_variant");
I_EQUAL(table.rowCount(), 3);
S_EQUAL(table.row(0).asString(';'), "1;8;;1;0.1057;389;229");
S_EQUAL(table.row(1).asString(';'), "2;8;;2;0.1304;26;22");
S_EQUAL(table.row(2).asString(';'), "3;8;;3;0.1254;639;330");

//force variant import
EXECUTE("NGSDAddVariantsSomatic", "-test -no_time -t_ps DX184894_01 -force -var " + TESTDATA("data_in/NGSDAddVariantsSomatic_in3.GSvar"));
//should fail because variants already exist an force is unset
EXECUTE_FAIL("NGSDAddVariantsSomatic", "-test -no_time -t_ps DX184894_01 -var " + TESTDATA("data_in/NGSDAddVariantsSomatic_in3.GSvar"));
}

void test_cnvs()
{
if (!NGSD::isAvailable(true)) SKIP("Test needs access to the NGSD test database!");

Expand All @@ -52,8 +78,11 @@ private slots:
//Cnvs already imported
EXECUTE_FAIL("NGSDAddVariantsSomatic", "-test -debug -no_time -t_ps DX184894_01 -n_ps DX184263_01 -cnv " + TESTDATA("data_in/NGSDAddVariantsSomatic_in2.tsv"));
//Cnvs already imported force
EXECUTE("NGSDAddVariantsSomatic", "-test -debug -no_time -cnv_force -t_ps DX184894_01 -n_ps DX184263_01 -cnv " + TESTDATA("data_in/NGSDAddVariantsSomatic_in2.tsv"));
EXECUTE("NGSDAddVariantsSomatic", "-test -debug -no_time -force -t_ps DX184894_01 -n_ps DX184263_01 -cnv " + TESTDATA("data_in/NGSDAddVariantsSomatic_in2.tsv"));
}


void test_svs()
{
//TODO Alexander
}
};
Loading

0 comments on commit f6ec58c

Please sign in to comment.