diff --git a/src/GSvar/BurdenTestWidget.cpp b/src/GSvar/BurdenTestWidget.cpp index 73d74a6f0..6fcdef13b 100644 --- a/src/GSvar/BurdenTestWidget.cpp +++ b/src/GSvar/BurdenTestWidget.cpp @@ -329,11 +329,9 @@ void BurdenTestWidget::validateInputData() { //skip samples which will be removed anyways if(s_ids_to_remove_cases.contains(s_id)) continue; - QSet same_samples = db_.sameSamples(s_id); - qDebug() << "same samples" << same_samples; + QSet same_samples = db_.sameSamples(s_id, SameSampleMode::SAME_PATIENT); //add sample itself same_samples.insert(s_id); - qDebug() << "same samples + self " << same_samples; QSet same_sample_overlap = same_samples & sample_ids_cases.keys().toSet(); if (same_sample_overlap.size() > 1) { @@ -379,7 +377,7 @@ void BurdenTestWidget::validateInputData() { //skip samples which will be removed anyways if(s_ids_to_remove_controls.contains(s_id)) continue; - QSet same_samples = db_.sameSamples(s_id); + QSet same_samples = db_.sameSamples(s_id, SameSampleMode::SAME_PATIENT); //add sample itself same_samples.insert(s_id); QSet same_sample_overlap = same_samples & sample_ids_controls.keys().toSet(); diff --git a/src/NGSDExportAnnotationData/ExportWorker.cpp b/src/NGSDExportAnnotationData/ExportWorker.cpp index 4bb8acb7f..73b0b12e7 100644 --- a/src/NGSDExportAnnotationData/ExportWorker.cpp +++ b/src/NGSDExportAnnotationData/ExportWorker.cpp @@ -143,7 +143,7 @@ void ExportWorker::run() { ++count_het; samples_done_het << info.s_id; - samples_done_het.unite(db.sameSamples(info.s_id)); + samples_done_het.unite(db.sameSamples(info.s_id, SameSampleMode::SAME_PATIENT)); if (info.affected) { @@ -154,7 +154,7 @@ void ExportWorker::run() { ++count_mosaic; samples_done_mosaic << info.s_id; - samples_done_mosaic.unite(db.sameSamples(info.s_id)); + samples_done_mosaic.unite(db.sameSamples(info.s_id, SameSampleMode::SAME_PATIENT)); } } @@ -163,7 +163,7 @@ void ExportWorker::run() { ++count_hom; samples_done_hom << info.s_id; - samples_done_hom.unite(db.sameSamples(info.s_id)); + samples_done_hom.unite(db.sameSamples(info.s_id, SameSampleMode::SAME_PATIENT)); if (info.affected) { diff --git a/src/NGSDSameSample/NGSDSameSample.pro b/src/NGSDSameSample/NGSDSameSample.pro new file mode 100644 index 000000000..23c6126bb --- /dev/null +++ b/src/NGSDSameSample/NGSDSameSample.pro @@ -0,0 +1,16 @@ + +TEMPLATE = app + +QT -= gui +QT += sql +CONFIG += console +CONFIG -= app_bundle + +SOURCES += main.cpp + +include("../app_cli.pri") + + +#include cppNGS library +INCLUDEPATH += $$PWD/../cppNGSD +LIBS += -L$$PWD/../bin -lcppNGSD diff --git a/src/NGSDSameSample/main.cpp b/src/NGSDSameSample/main.cpp new file mode 100644 index 000000000..8058a1cc3 --- /dev/null +++ b/src/NGSDSameSample/main.cpp @@ -0,0 +1,134 @@ +#include "ToolBase.h" +#include "NGSD.h" + +class ConcreteTool + : public ToolBase +{ + Q_OBJECT + +public: + ConcreteTool(int& argc, char *argv[]) + : ToolBase(argc, argv) + { + } + + + virtual void setup() + { + setDescription("Lists all processed samples from the NGSD of the same patient/sample to a given processed sample."); + setExtendedDescription(QStringList() << "Does not contain the provided process sample itself"); + addString("ps", "Processd sample name.", false); + //optional + addOutfile("out", "Output TSV file. If unset, writes to STDOUT.", true); + addString("sample_type", "Type(s) of samples (can be a comma-separated list).", true, ""); + addString("system_type", "Type(s) of processing system (can be a comma-separated list).", true, ""); + addString("system", "Processing system (short) name(s) (can be a comma-separated list).", true, ""); + addEnum("mode", "Type of relation (either only same-sample or same-patient (includes same-sample)", true, QStringList() << "SAME_SAMPLE" << "SAME_PATIENT", "SAME_PATIENT"); + addFlag("test", "Uses the test database instead of on the production database."); + + + changeLog(2023, 11, 15, "initial commit"); + } + + virtual void main() + { + //init + NGSD db(getFlag("test")); + QSharedPointer output = Helper::openFileForWriting(getOutfile("out"), true); + + QString ps_name = getString("ps").trimmed(); + int provided_ps_id = db.processedSampleId(ps_name).toInt(); + int provided_s_id = db.sampleId(ps_name).toInt(); + + //get filter parameter + QSet filter_sample_types = getString("sample_type").split(',').toSet(); + filter_sample_types.remove(""); + QSet filter_system_types = getString("system_type").split(',').toSet(); + filter_system_types.remove(""); + QSet filter_systems = getString("system").split(',').toSet(); + filter_systems.remove(""); + + //validate filter parameters + QStringList valid_sample_types = db.getEnum("sample", "sample_type"); + foreach (const QString& sample_type, filter_sample_types) + { + if (!valid_sample_types.contains(sample_type)) THROW(ArgumentException, "Invalid sample type '" + sample_type + "' provided!\n Valid sample types are: " + valid_sample_types.join(",")); + } + + QStringList valid_system_types = db.getEnum("processing_system", "type"); + foreach (const QString& system_type, filter_system_types) + { + if (!valid_system_types.contains(system_type)) THROW(ArgumentException, "Invalid processing system type '" + system_type + "' provided!\n Valid system types are: " + valid_system_types.join(",")); + } + QStringList valid_system_names = db.getValues("SELECT name_short FROM processing_system"); + foreach (const QString& system_name, filter_systems) + { + if (!valid_system_names.contains(system_name)) THROW(ArgumentException, "Invalid processing system (short) name '" + system_name + "' provided!"); + } + + //get same samples + SameSampleMode mode = (getEnum("mode")=="SAME_PATIENT") ? SameSampleMode::SAME_PATIENT : SameSampleMode::SAME_SAMPLE; + QSet same_samples = db.sameSamples(provided_s_id, mode); + // add provided sample id itself to report different processings + same_samples.insert(provided_s_id); + + //get processed samples + QStringList ps_table; + foreach (int s_id, same_samples) + { + SampleData s_data = db.getSampleData(QString::number(s_id)); + QList ps_ids = db.getValuesInt("SELECT id FROM processed_sample WHERE sample_id=:0", QString::number(s_id)); + foreach (int ps_id, ps_ids) + { + //skip sample itself: + if (ps_id == provided_ps_id) continue; + ProcessedSampleData ps_data = db.getProcessedSampleData(QString::number(ps_id)); + QDate run_start_date = db.getValue("SELECT start_date FROM sequencing_run WHERE name=:0", false, ps_data.run_name).toDate(); + QString sys_name_short = db.getValue("SELECT name_short FROM processing_system WHERE name_manufacturer=:0", false, ps_data.processing_system).toString(); + + //apply filter + if (!filter_sample_types.isEmpty() && !filter_sample_types.contains(s_data.type)) continue; + if (!filter_system_types.isEmpty() && !filter_system_types.contains(ps_data.processing_system_type)) continue; + if (!filter_systems.isEmpty() && !filter_systems.contains(sys_name_short)) continue; + + QStringList line; + line << ps_data.name; + line << s_data.type; + line << sys_name_short; + line << ps_data.processing_system_type; + line << ps_data.processing_system; + line << ps_data.run_name; + line << run_start_date.toString("dd.MM.yyyy"); + + ps_table << line.join("\t"); + } + } + + QStringList header_line; + header_line << "#processed_sample"; + header_line << "sample_type"; + header_line << "processing_system_type"; + header_line << "processing_system_name"; + header_line << "processing_system_name_short"; + header_line << "run_id"; + header_line << "run_date"; + + //sort by processed sample name + std::sort(ps_table.begin(), ps_table.end()); + + //write to output file + output->write(header_line.join("\t").toUtf8() + '\n'); + output->write(ps_table.join("\n").toUtf8()); + output->flush(); + output->close(); + + } +}; + +#include "main.moc" + +int main(int argc, char *argv[]) +{ + ConcreteTool tool(argc, argv); + return tool.execute(); +} diff --git a/src/cppNGSD-TEST/NGSD_Test.h b/src/cppNGSD-TEST/NGSD_Test.h index cc9cf2eeb..a4344cc0a 100644 --- a/src/cppNGSD-TEST/NGSD_Test.h +++ b/src/cppNGSD-TEST/NGSD_Test.h @@ -1411,26 +1411,38 @@ private slots: IS_THROWN(DatabaseException, db.addSampleRelation(SampleRelation{"NA12345", "siblings", "NA12878"}, true)); //sameSample - I_EQUAL(db.sameSamples(99).count(), 0); - I_EQUAL(db.sameSamples(2).count(), 2); - IS_TRUE(db.sameSamples(2).contains(4)); - IS_TRUE(db.sameSamples(2).contains(7)); - I_EQUAL(db.sameSamples(4).count(), 1); - IS_TRUE(db.sameSamples(4).contains(2)); - I_EQUAL(db.sameSamples(7).count(), 1); - IS_TRUE(db.sameSamples(7).contains(2)); + I_EQUAL(db.sameSamples(99, SameSampleMode::SAME_PATIENT).count(), 0); + I_EQUAL(db.sameSamples(2, SameSampleMode::SAME_PATIENT).count(), 3); + I_EQUAL(db.sameSamples(2, SameSampleMode::SAME_SAMPLE).count(), 2); + IS_TRUE(db.sameSamples(2, SameSampleMode::SAME_PATIENT).contains(4)); + IS_TRUE(db.sameSamples(2, SameSampleMode::SAME_PATIENT).contains(7)); + IS_TRUE(db.sameSamples(2, SameSampleMode::SAME_PATIENT).contains(8)); + IS_TRUE(db.sameSamples(2, SameSampleMode::SAME_SAMPLE).contains(4)); + IS_TRUE(db.sameSamples(2, SameSampleMode::SAME_SAMPLE).contains(8)); + IS_FALSE(db.sameSamples(2, SameSampleMode::SAME_SAMPLE).contains(7)); + I_EQUAL(db.sameSamples(4, SameSampleMode::SAME_PATIENT).count(), 3); + IS_TRUE(db.sameSamples(4, SameSampleMode::SAME_PATIENT).contains(2)); + IS_TRUE(db.sameSamples(4, SameSampleMode::SAME_PATIENT).contains(7)); + IS_TRUE(db.sameSamples(4, SameSampleMode::SAME_PATIENT).contains(8)); + I_EQUAL(db.sameSamples(7, SameSampleMode::SAME_PATIENT).count(), 3); + IS_TRUE(db.sameSamples(7, SameSampleMode::SAME_PATIENT).contains(2)); + IS_TRUE(db.sameSamples(7, SameSampleMode::SAME_PATIENT).contains(4)); + IS_TRUE(db.sameSamples(7, SameSampleMode::SAME_PATIENT).contains(8)); //relatedSamples I_EQUAL(db.relatedSamples(99).count(), 0); I_EQUAL(db.relatedSamples(2).count(), 1); IS_TRUE(db.relatedSamples(2).contains(4)); - I_EQUAL(db.relatedSamples(4).count(), 1); + I_EQUAL(db.relatedSamples(4).count(), 2); IS_TRUE(db.relatedSamples(4).contains(2)); - I_EQUAL(db.relatedSamples(4, "same sample").count(), 1); + IS_TRUE(db.relatedSamples(4).contains(8)); + I_EQUAL(db.relatedSamples(4, "same sample").count(), 2); IS_TRUE(db.relatedSamples(4, "same sample").contains(2)); + IS_TRUE(db.relatedSamples(4, "same sample").contains(8)); I_EQUAL(db.relatedSamples(4, "twins").count(), 0); - I_EQUAL(db.relatedSamples(4, "same sample", "DNA").count(), 1); + I_EQUAL(db.relatedSamples(4, "same sample", "DNA").count(), 2); IS_TRUE(db.relatedSamples(4, "same sample", "DNA").contains(2)); + IS_TRUE(db.relatedSamples(4, "same sample", "DNA").contains(8)); //omimPreferredPhenotype S_EQUAL(db.omimPreferredPhenotype("BRCA1", "Neoplasms"), ""); diff --git a/src/cppNGSD-TEST/data_in/NGSD_in1.sql b/src/cppNGSD-TEST/data_in/NGSD_in1.sql index 1d1d1fd8b..4b6488076 100644 --- a/src/cppNGSD-TEST/data_in/NGSD_in1.sql +++ b/src/cppNGSD-TEST/data_in/NGSD_in1.sql @@ -26,7 +26,8 @@ INSERT INTO `sample` (`id`, `name`, `name_external`, `sample_type`, `species_id` (4, 'NA12123repeat', 'ex4', 'DNA', 1, 'female', 'good', 0 ,0, 1, 'comment_s4', 'Neoplasms', 'Affected', 'n/a', NULL, NULL), (5, 'DX184894', 'ex5', 'DNA', 1, 'female', 'good', 1, 1, 1, 'comment_s5', 'Neoplasms', 'Affected', 'buccal mucosa', NULL, NULL), (6, 'DX184263', 'ex6', 'DNA', 1, 'female', 'good', 0, 0, 1, 'comment_s6', 'Neoplasms', 'Affected', 'skin', NULL, NULL), -(7, 'NA12123repeat2', 'ex4', 'DNA', 1, 'female', 'good', 0 ,0, 1, 'comment_s4', 'Neoplasms', 'Affected', 'n/a', 'pat2', NULL); +(7, 'NA12123repeat2', 'ex4', 'DNA', 1, 'female', 'good', 0 ,0, 1, 'comment_s4', 'Neoplasms', 'Affected', 'n/a', 'pat2', NULL), +(8, 'NA12123repeat3', 'ex4', 'DNA', 1, 'female', 'good', 0 ,0, 1, 'comment_s8', 'Neoplasms', 'Affected', 'n/a', 'pat2', NULL); INSERT INTO `processing_system` (`id`, `name_short`, `name_manufacturer`, `adapter1_p5`, `adapter2_p7`, `type`, `shotgun`, `target_file`, `genome_id`) VALUES (1, 'hpHBOCv5', 'HaloPlex HBOC v5', 'AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC', 'AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT', 'Panel Haloplex', 0, 'hpHBOCv5.bed', 1), @@ -523,7 +524,8 @@ INSERT INTO `analysis_job_history`(`analysis_job_id`, `time`, `user_id`, `status (1, '2018-02-12T10:34:09', null, 'finished', 'warning: bla bla bla'); INSERT INTO `sample_relations`(`sample1_id`, `relation`, `sample2_id`) VALUES -(2, 'same sample', 4); +(2, 'same sample', 4), +(4, 'same sample', 8); INSERT INTO `sample_disease_info`(`id`, `sample_id`, `disease_info`, `type`, `user_id`) VALUES (1, 3, 'HP:0001251', 'HPO term id', 99), diff --git a/src/cppNGSD/NGSD.cpp b/src/cppNGSD/NGSD.cpp index d7fb0b861..99a60196d 100644 --- a/src/cppNGSD/NGSD.cpp +++ b/src/cppNGSD/NGSD.cpp @@ -787,51 +787,95 @@ QString NGSD::normalSample(const QString& processed_sample_id) return processedSampleName(value.toString()); } -const QSet& NGSD::sameSamples(int sample_id) +const QSet& NGSD::sameSamples(int sample_id, SameSampleMode mode) { static QSet empty_entry; - QHash>& same_samples = getCache().same_samples; + QHash>& same_samples = (mode == SameSampleMode::SAME_PATIENT)? getCache().same_patients : getCache().same_samples; + + //prepare iterative query + SqlQuery query_iterative = getQuery(); + query_iterative.prepare(QString("SELECT sample1_id, sample2_id FROM sample_relations WHERE (relation='same sample'") + ((mode == SameSampleMode::SAME_PATIENT)? " OR relation='same patient')": ")") + + " AND (sample1_id=:0 OR sample2_id=:0)"); //init if empty if (same_samples.isEmpty()) { //sample relation SqlQuery query = getQuery(); - query.exec("SELECT sample1_id, sample2_id FROM sample_relations WHERE relation='same sample' OR relation='same patient'"); + query.exec(QString("SELECT sample1_id FROM sample_relations WHERE relation='same sample'") + ((mode == SameSampleMode::SAME_PATIENT)? " OR relation='same patient'": "")); while (query.next()) { int sample1_id = query.value(0).toInt(); - int sample2_id = query.value(1).toInt(); - same_samples[sample1_id] << sample2_id; - same_samples[sample2_id] << sample1_id; - } - //same patient identifier - query.exec("SELECT id, patient_identifier FROM sample WHERE patient_identifier IS NOT NULL AND patient_identifier!=''"); - QHash> sample_ids_by_patient_id; - while (query.next()) - { - int sample_id = query.value(0).toInt(); - QString patient_identifier = query.value(1).toString().trimmed(); - if (patient_identifier.isEmpty()) continue; + //skip already checked samples + if (same_samples.contains(sample1_id)) continue; - sample_ids_by_patient_id[patient_identifier] << sample_id; + //look-up iteratively and get the same-sample cluster + QSet cluster; + cluster << sample1_id; + int n_ids = 0; + while(n_ids != cluster.size()) + { + //store current set size + n_ids = cluster.size(); + foreach (int id, cluster) + { + query_iterative.bindValue(0, id); + query_iterative.exec(); + while (query_iterative.next()) + { + cluster << query_iterative.value(0).toInt() << query_iterative.value(1).toInt(); + } + } + } + //set same samples for all samples of cluster (exclude key itself) + foreach (int id, cluster) + { + QSet current_cluster = cluster; + current_cluster.remove(id); + same_samples[id] = current_cluster; + } } - foreach(QString patient_id, sample_ids_by_patient_id.keys()) + + if (mode == SameSampleMode::SAME_PATIENT) { - QList& sample_ids = sample_ids_by_patient_id[patient_id]; + //same patient identifier + query.exec("SELECT id, patient_identifier FROM sample WHERE patient_identifier IS NOT NULL AND patient_identifier!=''"); + QHash> sample_ids_by_patient_id; + while (query.next()) + { + int sample_id = query.value(0).toInt(); + QString patient_identifier = query.value(1).toString().trimmed(); + if (patient_identifier.isEmpty()) continue; - for (int i=0; i& sample_ids = sample_ids_by_patient_id[patient_id]; + + //skip all patient ids with only 1 linked sample id + if (sample_ids.size() < 2) continue; + + //else: merge cluster + QSet combined_sample_ids; + foreach (int s_id, sample_ids) { - int sample1_id = sample_ids[i]; - int sample2_id = sample_ids[j]; - same_samples[sample1_id] << sample2_id; - same_samples[sample2_id] << sample1_id; + combined_sample_ids << s_id; + combined_sample_ids += same_samples[s_id]; + } + + //update each sample in the cluster + foreach (int id, combined_sample_ids) + { + QSet current_cluster = combined_sample_ids; + current_cluster.remove(id); + same_samples[id] = current_cluster; } } } + } if (same_samples.contains(sample_id)) @@ -1481,14 +1525,14 @@ GenotypeCounts NGSD::genotypeCounts(const QString& variant_id) ++c_het; samples_done_het << sample_id; - samples_done_het.unite(sameSamples(sample_id)); + samples_done_het.unite(sameSamples(sample_id, SameSampleMode::SAME_PATIENT)); } if (mosaic && !samples_done_mosaic.contains(sample_id)) { ++c_mosaic; samples_done_mosaic << sample_id; - samples_done_mosaic.unite(sameSamples(sample_id)); + samples_done_mosaic.unite(sameSamples(sample_id, SameSampleMode::SAME_PATIENT)); } } @@ -1497,7 +1541,7 @@ GenotypeCounts NGSD::genotypeCounts(const QString& variant_id) ++c_hom; samples_done_hom << sample_id; - samples_done_hom.unite(sameSamples(sample_id)); + samples_done_hom.unite(sameSamples(sample_id, SameSampleMode::SAME_PATIENT)); } } @@ -8954,6 +8998,7 @@ void NGSD::clearCache() cache_instance.table_infos.clear(); cache_instance.same_samples.clear(); + cache_instance.same_patients.clear(); cache_instance.related_samples.clear(); cache_instance.approved_gene_names.clear(); cache_instance.gene2id.clear(); diff --git a/src/cppNGSD/NGSD.h b/src/cppNGSD/NGSD.h index c0609837f..219f071a4 100644 --- a/src/cppNGSD/NGSD.h +++ b/src/cppNGSD/NGSD.h @@ -559,6 +559,13 @@ enum RnaCohortDeterminationStategy RNA_COHORT_CUSTOM //list of processed samples needs to be provided }; +///Same sample relation mode +enum SameSampleMode +{ + SAME_SAMPLE, //only consider samples from the same biological sample + SAME_PATIENT //consider samples from the same sample, patient or same patient id +}; + ///Custom structs for data exchange ///cfDNA disease course table @@ -936,8 +943,9 @@ Q_OBJECT ///Returns the normal processed sample corresponding to a tumor processed sample, or "" if no normal samples is defined. QString normalSample(const QString& processed_sample_id); - ///Returns the corresponding sample id(s) with relation 'same sample' or 'same patient'. Uses the cache to avoid database queries. - const QSet& sameSamples(int sample_id); + ///Returns the corresponding sample id(s) with relation 'same sample' (mode:SAME_SAMPLE) or 'same patient' and 'same patient' (mode: SAME_PATIENT). Uses the cache to avoid database queries. + /// (Does not contain the provided sample itself) + const QSet& sameSamples(int sample_id, SameSampleMode mode); ///Returns related sample id(s). Uses the cache to avoid database queries. const QSet& relatedSamples(int sample_id); ///Return a list of sample ids (not name) which have a (specific) relation of the given sample id. If relation is "", all relations are reported. @@ -1188,6 +1196,7 @@ Q_OBJECT QMap table_infos; QHash> same_samples; + QHash> same_patients; QHash> related_samples; GeneSet approved_gene_names; QHash gene2id; diff --git a/src/tools-TEST/NGSDSameSample.h b/src/tools-TEST/NGSDSameSample.h new file mode 100644 index 000000000..e0dc99e48 --- /dev/null +++ b/src/tools-TEST/NGSDSameSample.h @@ -0,0 +1,82 @@ +#include "TestFramework.h" +#include "Helper.h" +#include "Settings.h" +#include "NGSD.h" + +TEST_CLASS(NGSDSameSample_Test) +{ +Q_OBJECT +private slots: + + void same_sample() + { + if (!NGSD::isAvailable(true)) SKIP("Test needs access to the NGSD test database!"); + + //init + NGSD db(true); + db.init(); + db.executeQueriesFromFile(TESTDATA("data_in/NGSDSameSample_init.sql")); + + //test + EXECUTE("NGSDSameSample", "-test -mode SAME_SAMPLE -ps NA12880_01 -out out/NGSDSameSample_out1.tsv"); + COMPARE_FILES("out/NGSDSameSample_out1.tsv", TESTDATA("data_out/NGSDSameSample_out1.tsv")); + } + + void same_patient() + { + if (!NGSD::isAvailable(true)) SKIP("Test needs access to the NGSD test database!"); + + //init + NGSD db(true); + db.init(); + db.executeQueriesFromFile(TESTDATA("data_in/NGSDSameSample_init.sql")); + + //test + EXECUTE("NGSDSameSample", "-test -mode SAME_PATIENT -ps NA12880_01 -out out/NGSDSameSample_out2.tsv"); + COMPARE_FILES("out/NGSDSameSample_out2.tsv", TESTDATA("data_out/NGSDSameSample_out2.tsv")); + } + + void dna_only() + { + if (!NGSD::isAvailable(true)) SKIP("Test needs access to the NGSD test database!"); + + //init + NGSD db(true); + db.init(); + db.executeQueriesFromFile(TESTDATA("data_in/NGSDSameSample_init.sql")); + + //test + EXECUTE("NGSDSameSample", "-test -sample_type DNA -ps NA12880_01 -out out/NGSDSameSample_out3.tsv"); + COMPARE_FILES("out/NGSDSameSample_out3.tsv", TESTDATA("data_out/NGSDSameSample_out3.tsv")); + } + + void only_wgs() + { + if (!NGSD::isAvailable(true)) SKIP("Test needs access to the NGSD test database!"); + + //init + NGSD db(true); + db.init(); + db.executeQueriesFromFile(TESTDATA("data_in/NGSDSameSample_init.sql")); + + //test + EXECUTE("NGSDSameSample", "-test -system_type WGS,lrGS -ps NA12880_01 -out out/NGSDSameSample_out4.tsv"); + COMPARE_FILES("out/NGSDSameSample_out4.tsv", TESTDATA("data_out/NGSDSameSample_out4.tsv")); + } + + void only_nanopore() + { + if (!NGSD::isAvailable(true)) SKIP("Test needs access to the NGSD test database!"); + + //init + NGSD db(true); + db.init(); + db.executeQueriesFromFile(TESTDATA("data_in/NGSDSameSample_init.sql")); + + //test + EXECUTE("NGSDSameSample", "-test -system SQK-114 -ps NA12880_01 -out out/NGSDSameSample_out5.tsv"); + COMPARE_FILES("out/NGSDSameSample_out5.tsv", TESTDATA("data_out/NGSDSameSample_out5.tsv")); + } + + +}; diff --git a/src/tools-TEST/data_in/NGSDSameSample_init.sql b/src/tools-TEST/data_in/NGSDSameSample_init.sql new file mode 100644 index 000000000..7a1374cea --- /dev/null +++ b/src/tools-TEST/data_in/NGSDSameSample_init.sql @@ -0,0 +1,56 @@ + +-- device +INSERT INTO device (id, type, name) VALUES +(1, 'HiSeq2500', 'Morpheus'); + +-- sequencing_run +INSERT INTO sequencing_run (id, name, fcid, device_id, recipe, quality, start_date) VALUES +(1, 'run1', 'ABC', 1, '100+8+8+100', 'good', "2021-01-01"), +(2, 'run2', 'XYZ', 1, '100+8+100', 'good', "2023-01-01"); + +-- user +INSERT INTO user (id, user_id, password, user_role, name, email, created, active) VALUES +(99, 'ahuser', 's2d12kjg234hla0830t6hp9h3tt3t3tsdfg', 'user', 'The user', 'u@s.er', NOW(), '1'); + +-- sender +INSERT INTO sender (id, name) VALUES +(1, 'Klaus-Erhard'); + +-- project +INSERT INTO project (id, name, type, internal_coordinator_id, analysis, archived) VALUES +(1, 'First_project', 'research', 1, 'variants', 0), +(2, 'Second_project', 'diagnostic', 1, 'variants', 1), +(3, 'Third_project', 'diagnostic', 1, 'variants', 0); + +-- processing_system +INSERT INTO processing_system (id, name_manufacturer, shotgun, name_short, genome_id, type) VALUES +(1, 'HaloPlex System', '1', 'hpSYSv1', 1, 'Panel'), +(2, 'SureSelect Human All Exon v5', '1', 'ssHAEv5', 1, 'WES'), +(3, 'TruSeq DNA PCR-free', '1', 'TruSeq', 1, 'WGS'), +(4, 'Nanopore v14', '1', 'SQK-114', 1, 'lrGS'), +(5, 'RNA ps', '1', 'rna', 1, 'RNA'); + +-- sample +INSERT INTO sample (id, name, sample_type, species_id, gender, tumor, ffpe, sender_id, quality, disease_group, disease_status, tissue, received, year_of_birth, patient_identifier) VALUES +(1, 'NA12878', 'DNA', 1, 'female', '0', '0', 1, 'good', 'Neoplasms', 'Affected', 'blood', '2023-07-13', 1977, "pat1"), +(2, 'NA12880', 'DNA', 1, 'female', '1', '0', 1, 'good', 'n/a', 'n/a', 'skin', NULL, NULL, NULL), +(3, 'NA12881', 'DNA', 1, 'female', '1', '0', 1, 'good', 'n/a', 'n/a', 'skin', NULL, NULL, NULL), +(4, 'lrGS12882', 'DNA', 1, 'female', '1', '0', 1, 'good', 'n/a', 'n/a', 'skin', NULL, NULL, "pat1"), +(5, 'RNA12883', 'RNA', 1, 'female', '1', '0', 1, 'good', 'n/a', 'n/a', 'skin', NULL, NULL, NULL); + +-- processed_sample +INSERT INTO processed_sample (id, sample_id, process_id, sequencing_run_id, lane, operator_id, processing_system_id, project_id, quality, normal_id) VALUES +(1, 1, 1, 1, 1, 2, 1, 1, 'bad', NULL), +(2, 1, 2, 2, 1, 2, 2, 2, 'n/a', NULL), +(3, 2, 1, 2, 1, 2, 2, 2, 'n/a', 2), +(4, 2, 2, 2, 1, 2, 2, 3, 'n/a', NULL), +(5, 3, 45, 1, 1, 1, 3, 1, 'good', NULL), +(6, 3, 46, 1, 1, 1, 3, 1, 'good', NULL), +(8, 4, 23, 2, 1, 1, 4, 1, 'good', NULL), +(9, 4, 28, 2, 1, 1, 4, 1, 'good', NULL), +(10, 5, 2, 1, 1, 1, 5, 1, 'good', NULL); + +INSERT INTO `sample_relations`(`sample1_id`, `relation`, `sample2_id`) VALUES +(2, 'same sample', 3), +(3, 'same patient', 4), +(4, 'same sample', 5); \ No newline at end of file diff --git a/src/tools-TEST/data_out/NGSDSameSample_out1.tsv b/src/tools-TEST/data_out/NGSDSameSample_out1.tsv new file mode 100644 index 000000000..4637c928d --- /dev/null +++ b/src/tools-TEST/data_out/NGSDSameSample_out1.tsv @@ -0,0 +1,4 @@ +#processed_sample sample_type processing_system_type processing_system_name processing_system_name_short run_id run_date +NA12880_02 DNA ssHAEv5 WES SureSelect Human All Exon v5 run2 01.01.2023 +NA12881_45 DNA TruSeq WGS TruSeq DNA PCR-free run1 01.01.2021 +NA12881_46 DNA TruSeq WGS TruSeq DNA PCR-free run1 01.01.2021 \ No newline at end of file diff --git a/src/tools-TEST/data_out/NGSDSameSample_out2.tsv b/src/tools-TEST/data_out/NGSDSameSample_out2.tsv new file mode 100644 index 000000000..8415b4e6d --- /dev/null +++ b/src/tools-TEST/data_out/NGSDSameSample_out2.tsv @@ -0,0 +1,9 @@ +#processed_sample sample_type processing_system_type processing_system_name processing_system_name_short run_id run_date +NA12878_01 DNA hpSYSv1 Panel HaloPlex System run1 01.01.2021 +NA12878_02 DNA ssHAEv5 WES SureSelect Human All Exon v5 run2 01.01.2023 +NA12880_02 DNA ssHAEv5 WES SureSelect Human All Exon v5 run2 01.01.2023 +NA12881_45 DNA TruSeq WGS TruSeq DNA PCR-free run1 01.01.2021 +NA12881_46 DNA TruSeq WGS TruSeq DNA PCR-free run1 01.01.2021 +RNA12883_02 RNA rna RNA RNA ps run1 01.01.2021 +lrGS12882_23 DNA SQK-114 lrGS Nanopore v14 run2 01.01.2023 +lrGS12882_28 DNA SQK-114 lrGS Nanopore v14 run2 01.01.2023 \ No newline at end of file diff --git a/src/tools-TEST/data_out/NGSDSameSample_out3.tsv b/src/tools-TEST/data_out/NGSDSameSample_out3.tsv new file mode 100644 index 000000000..8f97bec34 --- /dev/null +++ b/src/tools-TEST/data_out/NGSDSameSample_out3.tsv @@ -0,0 +1,8 @@ +#processed_sample sample_type processing_system_type processing_system_name processing_system_name_short run_id run_date +NA12878_01 DNA hpSYSv1 Panel HaloPlex System run1 01.01.2021 +NA12878_02 DNA ssHAEv5 WES SureSelect Human All Exon v5 run2 01.01.2023 +NA12880_02 DNA ssHAEv5 WES SureSelect Human All Exon v5 run2 01.01.2023 +NA12881_45 DNA TruSeq WGS TruSeq DNA PCR-free run1 01.01.2021 +NA12881_46 DNA TruSeq WGS TruSeq DNA PCR-free run1 01.01.2021 +lrGS12882_23 DNA SQK-114 lrGS Nanopore v14 run2 01.01.2023 +lrGS12882_28 DNA SQK-114 lrGS Nanopore v14 run2 01.01.2023 \ No newline at end of file diff --git a/src/tools-TEST/data_out/NGSDSameSample_out4.tsv b/src/tools-TEST/data_out/NGSDSameSample_out4.tsv new file mode 100644 index 000000000..c35553a9a --- /dev/null +++ b/src/tools-TEST/data_out/NGSDSameSample_out4.tsv @@ -0,0 +1,5 @@ +#processed_sample sample_type processing_system_type processing_system_name processing_system_name_short run_id run_date +NA12881_45 DNA TruSeq WGS TruSeq DNA PCR-free run1 01.01.2021 +NA12881_46 DNA TruSeq WGS TruSeq DNA PCR-free run1 01.01.2021 +lrGS12882_23 DNA SQK-114 lrGS Nanopore v14 run2 01.01.2023 +lrGS12882_28 DNA SQK-114 lrGS Nanopore v14 run2 01.01.2023 \ No newline at end of file diff --git a/src/tools-TEST/data_out/NGSDSameSample_out5.tsv b/src/tools-TEST/data_out/NGSDSameSample_out5.tsv new file mode 100644 index 000000000..fec174fcf --- /dev/null +++ b/src/tools-TEST/data_out/NGSDSameSample_out5.tsv @@ -0,0 +1,3 @@ +#processed_sample sample_type processing_system_type processing_system_name processing_system_name_short run_id run_date +lrGS12882_23 DNA SQK-114 lrGS Nanopore v14 run2 01.01.2023 +lrGS12882_28 DNA SQK-114 lrGS Nanopore v14 run2 01.01.2023 \ No newline at end of file diff --git a/src/tools-TEST/tools-TEST.pro b/src/tools-TEST/tools-TEST.pro index 4fcdae57b..625f1f22e 100644 --- a/src/tools-TEST/tools-TEST.pro +++ b/src/tools-TEST/tools-TEST.pro @@ -30,6 +30,7 @@ HEADERS += NGSDAddVariantsSomatic_Test.h \ FastqCheckUMI.h \ BedpeExtractGenotype_Test.h \ BedpeExtractInfoField_Test.h \ + NGSDSameSample.h \ SampleAncestry_Test.h \ SvFilterAnnotations_Test.h \ UpdHunter_Test.h \ diff --git a/src/tools.pro b/src/tools.pro index 8da376ae2..6a20352b4 100644 --- a/src/tools.pro +++ b/src/tools.pro @@ -566,3 +566,7 @@ SamplePath.depends = cppNGS SUBDIRS += BedpeExtractInfoField tools-TEST.depends += BedpeExtractInfoField SamplePath.depends = cppNGS + +SUBDIRS += NGSDSameSample +tools-TEST.depends += NGSDSameSample +SamplePath.depends = cppNGSD