Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/imgag/ngs-bits
Browse files Browse the repository at this point in the history
  • Loading branch information
MarvinDo committed Nov 17, 2023
2 parents 13a351b + 9bc95e2 commit ecd7a94
Show file tree
Hide file tree
Showing 17 changed files with 437 additions and 49 deletions.
6 changes: 2 additions & 4 deletions src/GSvar/BurdenTestWidget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,11 +329,9 @@ void BurdenTestWidget::validateInputData()
{
//skip samples which will be removed anyways
if(s_ids_to_remove_cases.contains(s_id)) continue;
QSet<int> same_samples = db_.sameSamples(s_id);
qDebug() << "same samples" << same_samples;
QSet<int> same_samples = db_.sameSamples(s_id, SameSampleMode::SAME_PATIENT);
//add sample itself
same_samples.insert(s_id);
qDebug() << "same samples + self " << same_samples;
QSet<int> same_sample_overlap = same_samples & sample_ids_cases.keys().toSet();
if (same_sample_overlap.size() > 1)
{
Expand Down Expand Up @@ -379,7 +377,7 @@ void BurdenTestWidget::validateInputData()
{
//skip samples which will be removed anyways
if(s_ids_to_remove_controls.contains(s_id)) continue;
QSet<int> same_samples = db_.sameSamples(s_id);
QSet<int> same_samples = db_.sameSamples(s_id, SameSampleMode::SAME_PATIENT);
//add sample itself
same_samples.insert(s_id);
QSet<int> same_sample_overlap = same_samples & sample_ids_controls.keys().toSet();
Expand Down
6 changes: 3 additions & 3 deletions src/NGSDExportAnnotationData/ExportWorker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ void ExportWorker::run()
{
++count_het;
samples_done_het << info.s_id;
samples_done_het.unite(db.sameSamples(info.s_id));
samples_done_het.unite(db.sameSamples(info.s_id, SameSampleMode::SAME_PATIENT));

if (info.affected)
{
Expand All @@ -154,7 +154,7 @@ void ExportWorker::run()
{
++count_mosaic;
samples_done_mosaic << info.s_id;
samples_done_mosaic.unite(db.sameSamples(info.s_id));
samples_done_mosaic.unite(db.sameSamples(info.s_id, SameSampleMode::SAME_PATIENT));
}
}

Expand All @@ -163,7 +163,7 @@ void ExportWorker::run()
{
++count_hom;
samples_done_hom << info.s_id;
samples_done_hom.unite(db.sameSamples(info.s_id));
samples_done_hom.unite(db.sameSamples(info.s_id, SameSampleMode::SAME_PATIENT));

if (info.affected)
{
Expand Down
16 changes: 16 additions & 0 deletions src/NGSDSameSample/NGSDSameSample.pro
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@

TEMPLATE = app

QT -= gui
QT += sql
CONFIG += console
CONFIG -= app_bundle

SOURCES += main.cpp

include("../app_cli.pri")


#include cppNGS library
INCLUDEPATH += $$PWD/../cppNGSD
LIBS += -L$$PWD/../bin -lcppNGSD
134 changes: 134 additions & 0 deletions src/NGSDSameSample/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#include "ToolBase.h"
#include "NGSD.h"

class ConcreteTool
: public ToolBase
{
Q_OBJECT

public:
ConcreteTool(int& argc, char *argv[])
: ToolBase(argc, argv)
{
}


virtual void setup()
{
setDescription("Lists all processed samples from the NGSD of the same patient/sample to a given processed sample.");
setExtendedDescription(QStringList() << "Does not contain the provided process sample itself");
addString("ps", "Processd sample name.", false);
//optional
addOutfile("out", "Output TSV file. If unset, writes to STDOUT.", true);
addString("sample_type", "Type(s) of samples (can be a comma-separated list).", true, "");
addString("system_type", "Type(s) of processing system (can be a comma-separated list).", true, "");
addString("system", "Processing system (short) name(s) (can be a comma-separated list).", true, "");
addEnum("mode", "Type of relation (either only same-sample or same-patient (includes same-sample)", true, QStringList() << "SAME_SAMPLE" << "SAME_PATIENT", "SAME_PATIENT");
addFlag("test", "Uses the test database instead of on the production database.");


changeLog(2023, 11, 15, "initial commit");
}

virtual void main()
{
//init
NGSD db(getFlag("test"));
QSharedPointer<QFile> output = Helper::openFileForWriting(getOutfile("out"), true);

QString ps_name = getString("ps").trimmed();
int provided_ps_id = db.processedSampleId(ps_name).toInt();
int provided_s_id = db.sampleId(ps_name).toInt();

//get filter parameter
QSet<QString> filter_sample_types = getString("sample_type").split(',').toSet();
filter_sample_types.remove("");
QSet<QString> filter_system_types = getString("system_type").split(',').toSet();
filter_system_types.remove("");
QSet<QString> filter_systems = getString("system").split(',').toSet();
filter_systems.remove("");

//validate filter parameters
QStringList valid_sample_types = db.getEnum("sample", "sample_type");
foreach (const QString& sample_type, filter_sample_types)
{
if (!valid_sample_types.contains(sample_type)) THROW(ArgumentException, "Invalid sample type '" + sample_type + "' provided!\n Valid sample types are: " + valid_sample_types.join(","));
}

QStringList valid_system_types = db.getEnum("processing_system", "type");
foreach (const QString& system_type, filter_system_types)
{
if (!valid_system_types.contains(system_type)) THROW(ArgumentException, "Invalid processing system type '" + system_type + "' provided!\n Valid system types are: " + valid_system_types.join(","));
}
QStringList valid_system_names = db.getValues("SELECT name_short FROM processing_system");
foreach (const QString& system_name, filter_systems)
{
if (!valid_system_names.contains(system_name)) THROW(ArgumentException, "Invalid processing system (short) name '" + system_name + "' provided!");
}

//get same samples
SameSampleMode mode = (getEnum("mode")=="SAME_PATIENT") ? SameSampleMode::SAME_PATIENT : SameSampleMode::SAME_SAMPLE;
QSet<int> same_samples = db.sameSamples(provided_s_id, mode);
// add provided sample id itself to report different processings
same_samples.insert(provided_s_id);

//get processed samples
QStringList ps_table;
foreach (int s_id, same_samples)
{
SampleData s_data = db.getSampleData(QString::number(s_id));
QList<int> ps_ids = db.getValuesInt("SELECT id FROM processed_sample WHERE sample_id=:0", QString::number(s_id));
foreach (int ps_id, ps_ids)
{
//skip sample itself:
if (ps_id == provided_ps_id) continue;
ProcessedSampleData ps_data = db.getProcessedSampleData(QString::number(ps_id));
QDate run_start_date = db.getValue("SELECT start_date FROM sequencing_run WHERE name=:0", false, ps_data.run_name).toDate();
QString sys_name_short = db.getValue("SELECT name_short FROM processing_system WHERE name_manufacturer=:0", false, ps_data.processing_system).toString();

//apply filter
if (!filter_sample_types.isEmpty() && !filter_sample_types.contains(s_data.type)) continue;
if (!filter_system_types.isEmpty() && !filter_system_types.contains(ps_data.processing_system_type)) continue;
if (!filter_systems.isEmpty() && !filter_systems.contains(sys_name_short)) continue;

QStringList line;
line << ps_data.name;
line << s_data.type;
line << sys_name_short;
line << ps_data.processing_system_type;
line << ps_data.processing_system;
line << ps_data.run_name;
line << run_start_date.toString("dd.MM.yyyy");

ps_table << line.join("\t");
}
}

QStringList header_line;
header_line << "#processed_sample";
header_line << "sample_type";
header_line << "processing_system_type";
header_line << "processing_system_name";
header_line << "processing_system_name_short";
header_line << "run_id";
header_line << "run_date";

//sort by processed sample name
std::sort(ps_table.begin(), ps_table.end());

//write to output file
output->write(header_line.join("\t").toUtf8() + '\n');
output->write(ps_table.join("\n").toUtf8());
output->flush();
output->close();

}
};

#include "main.moc"

int main(int argc, char *argv[])
{
ConcreteTool tool(argc, argv);
return tool.execute();
}
34 changes: 23 additions & 11 deletions src/cppNGSD-TEST/NGSD_Test.h
Original file line number Diff line number Diff line change
Expand Up @@ -1411,26 +1411,38 @@ private slots:
IS_THROWN(DatabaseException, db.addSampleRelation(SampleRelation{"NA12345", "siblings", "NA12878"}, true));

//sameSample
I_EQUAL(db.sameSamples(99).count(), 0);
I_EQUAL(db.sameSamples(2).count(), 2);
IS_TRUE(db.sameSamples(2).contains(4));
IS_TRUE(db.sameSamples(2).contains(7));
I_EQUAL(db.sameSamples(4).count(), 1);
IS_TRUE(db.sameSamples(4).contains(2));
I_EQUAL(db.sameSamples(7).count(), 1);
IS_TRUE(db.sameSamples(7).contains(2));
I_EQUAL(db.sameSamples(99, SameSampleMode::SAME_PATIENT).count(), 0);
I_EQUAL(db.sameSamples(2, SameSampleMode::SAME_PATIENT).count(), 3);
I_EQUAL(db.sameSamples(2, SameSampleMode::SAME_SAMPLE).count(), 2);
IS_TRUE(db.sameSamples(2, SameSampleMode::SAME_PATIENT).contains(4));
IS_TRUE(db.sameSamples(2, SameSampleMode::SAME_PATIENT).contains(7));
IS_TRUE(db.sameSamples(2, SameSampleMode::SAME_PATIENT).contains(8));
IS_TRUE(db.sameSamples(2, SameSampleMode::SAME_SAMPLE).contains(4));
IS_TRUE(db.sameSamples(2, SameSampleMode::SAME_SAMPLE).contains(8));
IS_FALSE(db.sameSamples(2, SameSampleMode::SAME_SAMPLE).contains(7));
I_EQUAL(db.sameSamples(4, SameSampleMode::SAME_PATIENT).count(), 3);
IS_TRUE(db.sameSamples(4, SameSampleMode::SAME_PATIENT).contains(2));
IS_TRUE(db.sameSamples(4, SameSampleMode::SAME_PATIENT).contains(7));
IS_TRUE(db.sameSamples(4, SameSampleMode::SAME_PATIENT).contains(8));
I_EQUAL(db.sameSamples(7, SameSampleMode::SAME_PATIENT).count(), 3);
IS_TRUE(db.sameSamples(7, SameSampleMode::SAME_PATIENT).contains(2));
IS_TRUE(db.sameSamples(7, SameSampleMode::SAME_PATIENT).contains(4));
IS_TRUE(db.sameSamples(7, SameSampleMode::SAME_PATIENT).contains(8));

//relatedSamples
I_EQUAL(db.relatedSamples(99).count(), 0);
I_EQUAL(db.relatedSamples(2).count(), 1);
IS_TRUE(db.relatedSamples(2).contains(4));
I_EQUAL(db.relatedSamples(4).count(), 1);
I_EQUAL(db.relatedSamples(4).count(), 2);
IS_TRUE(db.relatedSamples(4).contains(2));
I_EQUAL(db.relatedSamples(4, "same sample").count(), 1);
IS_TRUE(db.relatedSamples(4).contains(8));
I_EQUAL(db.relatedSamples(4, "same sample").count(), 2);
IS_TRUE(db.relatedSamples(4, "same sample").contains(2));
IS_TRUE(db.relatedSamples(4, "same sample").contains(8));
I_EQUAL(db.relatedSamples(4, "twins").count(), 0);
I_EQUAL(db.relatedSamples(4, "same sample", "DNA").count(), 1);
I_EQUAL(db.relatedSamples(4, "same sample", "DNA").count(), 2);
IS_TRUE(db.relatedSamples(4, "same sample", "DNA").contains(2));
IS_TRUE(db.relatedSamples(4, "same sample", "DNA").contains(8));

//omimPreferredPhenotype
S_EQUAL(db.omimPreferredPhenotype("BRCA1", "Neoplasms"), "");
Expand Down
6 changes: 4 additions & 2 deletions src/cppNGSD-TEST/data_in/NGSD_in1.sql
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ INSERT INTO `sample` (`id`, `name`, `name_external`, `sample_type`, `species_id`
(4, 'NA12123repeat', 'ex4', 'DNA', 1, 'female', 'good', 0 ,0, 1, 'comment_s4', 'Neoplasms', 'Affected', 'n/a', NULL, NULL),
(5, 'DX184894', 'ex5', 'DNA', 1, 'female', 'good', 1, 1, 1, 'comment_s5', 'Neoplasms', 'Affected', 'buccal mucosa', NULL, NULL),
(6, 'DX184263', 'ex6', 'DNA', 1, 'female', 'good', 0, 0, 1, 'comment_s6', 'Neoplasms', 'Affected', 'skin', NULL, NULL),
(7, 'NA12123repeat2', 'ex4', 'DNA', 1, 'female', 'good', 0 ,0, 1, 'comment_s4', 'Neoplasms', 'Affected', 'n/a', 'pat2', NULL);
(7, 'NA12123repeat2', 'ex4', 'DNA', 1, 'female', 'good', 0 ,0, 1, 'comment_s4', 'Neoplasms', 'Affected', 'n/a', 'pat2', NULL),
(8, 'NA12123repeat3', 'ex4', 'DNA', 1, 'female', 'good', 0 ,0, 1, 'comment_s8', 'Neoplasms', 'Affected', 'n/a', 'pat2', NULL);

INSERT INTO `processing_system` (`id`, `name_short`, `name_manufacturer`, `adapter1_p5`, `adapter2_p7`, `type`, `shotgun`, `target_file`, `genome_id`) VALUES
(1, 'hpHBOCv5', 'HaloPlex HBOC v5', 'AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC', 'AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT', 'Panel Haloplex', 0, 'hpHBOCv5.bed', 1),
Expand Down Expand Up @@ -523,7 +524,8 @@ INSERT INTO `analysis_job_history`(`analysis_job_id`, `time`, `user_id`, `status
(1, '2018-02-12T10:34:09', null, 'finished', 'warning: bla bla bla');

INSERT INTO `sample_relations`(`sample1_id`, `relation`, `sample2_id`) VALUES
(2, 'same sample', 4);
(2, 'same sample', 4),
(4, 'same sample', 8);

INSERT INTO `sample_disease_info`(`id`, `sample_id`, `disease_info`, `type`, `user_id`) VALUES
(1, 3, 'HP:0001251', 'HPO term id', 99),
Expand Down
Loading

0 comments on commit ecd7a94

Please sign in to comment.