diff --git a/src/NGSDAddVariantsSomatic/main.cpp b/src/NGSDAddVariantsSomatic/main.cpp index 1ff1d2a67..175469251 100644 --- a/src/NGSDAddVariantsSomatic/main.cpp +++ b/src/NGSDAddVariantsSomatic/main.cpp @@ -22,14 +22,12 @@ class ConcreteTool { setDescription("Imports variants of a tumor-normal processed sample into the NGSD."); addString("t_ps", "Tumor processed sample name", false); - addString("n_ps", "Normal processed sample name", false); //optional + addString("n_ps", "Normal processed sample name", true); addInfile("var", "Small variant list (i.e. SNVs and small INDELs) in GSvar format (as produced by megSAP).", true, true); - addFlag("var_force", "Force import of detected small variants, even if already imported."); addInfile("cnv", "CNV list in TSV format (as produced by megSAP).", true, true); - addFlag("cnv_force", "Force import of CNVs, even if already imported."); addInfile("sv", "SV list in TSV format (as produced by megSAP).", true, true); - addFlag("sv_force", "Force import of SVs, even if already imported."); + addFlag("force", "Force import of variants, even if already imported."); addOutfile("out", "Output file. If unset, writes to STDOUT.", true); addFlag("test", "Uses the test database instead of on the production database."); addFlag("debug", "Enable verbose debug output."); @@ -42,26 +40,30 @@ class ConcreteTool QString filename = getInfile("var"); if(filename=="") return; - QString ps_full_name = t_ps_name + "-" + n_ps_name; + bool is_tumor_only = n_ps_name.isEmpty(); + QString analysis_name = t_ps_name + (is_tumor_only ? "" : "-" + n_ps_name); out << endl; - out << "### importing small variants for " << ps_full_name << " ###" << endl; + out << "### importing small variants for " << analysis_name << " ###" << endl; out << "filename: " << filename << endl; QString t_ps_id = db.processedSampleId(t_ps_name); - QString n_ps_id = db.processedSampleId(n_ps_name); - - int report_conf_id = db.somaticReportConfigId(t_ps_id, n_ps_id); + QString n_ps_id = is_tumor_only ? "" : db.processedSampleId(n_ps_name); + QString dv_where = "processed_sample_id_tumor=" + t_ps_id + " AND processed_sample_id_normal"+(is_tumor_only ? " IS NULL" : "=" + n_ps_id); - //DO NOT IMPORT Anything if a report config exists and contains small variants - if(report_conf_id != -1) + //do not anything if a report config exists and contains small variants + if (!is_tumor_only) { - SqlQuery query = db.getQuery(); - query.exec("SELECT * FROM somatic_report_configuration_variant WHERE somatic_report_configuration_id=" + QString::number(report_conf_id)); - if(query.size()>0) + int report_conf_id = db.somaticReportConfigId(t_ps_id, n_ps_id); + if(report_conf_id != -1) { - out << "Skipped import of small variants for sample " << ps_full_name << ": a somatic report configuration with small variants exists for this sample!" << endl; - return; + SqlQuery query = db.getQuery(); + query.exec("SELECT * FROM somatic_report_configuration_variant WHERE somatic_report_configuration_id=" + QString::number(report_conf_id)); + if(query.size()>0) + { + out << "Skipped import of small variants for sample " << analysis_name << ": a somatic report configuration with small variants exists for this sample!" << endl; + return; + } } } @@ -70,11 +72,11 @@ class ConcreteTool QTime sub_timer; QStringList sub_times; - int count_old = db.getValue("SELECT count(*) FROM detected_somatic_variant WHERE processed_sample_id_tumor=" + t_ps_id + " AND processed_sample_id_normal=" + n_ps_id).toInt(); + int count_old = db.getValue("SELECT count(*) FROM detected_somatic_variant WHERE "+dv_where).toInt(); out << "Found " << count_old << " variants already imported into NGSD!" << endl; if(count_old>0 && !var_force) { - THROW(ArgumentException, "Variants were already imported for '" + ps_full_name + "'. Use the flag '-var_force' to overwrite them."); + THROW(ArgumentException, "Variants were already imported for '" + analysis_name + "'. Use the flag '-force' to overwrite them."); } //Remove old variants @@ -84,7 +86,7 @@ class ConcreteTool sub_timer.start(); SqlQuery query = db.getQuery(); - query.exec("DELETE FROM detected_somatic_variant WHERE processed_sample_id_tumor=" + t_ps_id +" AND processed_sample_id_normal=" + n_ps_id); + query.exec("DELETE FROM detected_somatic_variant WHERE "+dv_where); out << "Deleted previous somatic variants." << endl; sub_times << ("Deleted previous detected somatic variants took: " + Helper::elapsedTime(sub_timer)); } @@ -114,7 +116,7 @@ class ConcreteTool int i_qual = variants.annotationIndexByName("quality"); SqlQuery q_insert = db.getQuery(); - q_insert.prepare("INSERT INTO detected_somatic_variant (processed_sample_id_tumor, processed_sample_id_normal, variant_id, variant_frequency, depth, quality_snp) VALUES (" + t_ps_id +", "+ n_ps_id +", :0, :1, :2, :3)"); + q_insert.prepare("INSERT INTO detected_somatic_variant (processed_sample_id_tumor, processed_sample_id_normal, variant_id, variant_frequency, depth, quality_snp) VALUES (" + t_ps_id +", " + (is_tumor_only ? "NULL" : n_ps_id) + ", :0, :1, :2, :3)"); db.transaction(); for(int i=0; i vcf_file = Helper::openFileForWriting(tmp_vcf, true); @@ -308,31 +309,36 @@ void ExportWorker::run() //process variants tmp_timer.start(); + int somatic_count_to = 0; + QSet s_ids_to_done; QMap project_map; - QSet processed_ps_ids; - QSet processed_s_ids; + QSet s_ids_done; while(ngsd_count_query.next()) { - QByteArray current_sample = ngsd_count_query.value(0).toByteArray(); - QByteArray current_ps_id = ngsd_count_query.value(1).toByteArray(); - QByteArray current_project = ngsd_count_query.value(2).toByteArray(); - - //skip already seen processed samples - // (there could be several variants because of indel window, - // but we want to process only one) - if (processed_ps_ids.contains(current_ps_id)) continue; - processed_ps_ids.insert(current_ps_id); - - //skip already seen samples for general statistics - // (there could be several processings of the same sample because of - // different processing systems or because of experment repeats due to - // quality issues) - if (processed_s_ids.contains(current_sample)) continue; - processed_s_ids.insert(current_sample); - - // count - if(!project_map.contains(current_project)) project_map.insert(current_project,0); - ++project_map[current_project]; + int current_sample = ngsd_count_query.value(0).toInt(); + + bool is_tumor_normal = !ngsd_count_query.value(3).isNull(); + QTextStream(stderr) << variant.toString() << " " << is_tumor_normal << " " << current_sample << endl; + if (is_tumor_normal) + { + //skip already seen samples for general statistics (there could be several processings of the same sample because of different processing systems or because of experment repeats due to quality issues) + if (s_ids_done.contains(current_sample)) continue; + s_ids_done.insert(current_sample); + + // count + QByteArray current_project = ngsd_count_query.value(2).toByteArray(); + if(!project_map.contains(current_project)) project_map.insert(current_project,0); + ++project_map[current_project]; + } + else + { + //skip already seen samples for general statistics (there could be several processings of the same sample because of different processing systems or because of experment repeats due to quality issues) + if (s_ids_to_done.contains(current_sample)) continue; + s_ids_to_done.insert(current_sample); + + ++somatic_count_to; + QTextStream(stderr) << somatic_count_to << endl; + } } // calculate somatic count @@ -357,9 +363,11 @@ void ExportWorker::run() { info_column.append("SOM_P=."); } - } - + if (somatic_count_to > 0) + { + info_column.append("SOM_TO_C=" + QByteArray::number(somatic_count_to)); + } //Add somatic VICC interpretation if(db.getSomaticViccId(variant) != -1) @@ -469,6 +477,8 @@ void ExportWorker::run() //QTextStream(stdout) << "ExportWorker:error " << chr_ << " message:" << e.message() << endl; emit error(chr_, e.message()); } + + } diff --git a/src/NGSDExportAnnotationData/ThreadCoordinator.cpp b/src/NGSDExportAnnotationData/ThreadCoordinator.cpp index c158e1fa2..16e1b2afc 100644 --- a/src/NGSDExportAnnotationData/ThreadCoordinator.cpp +++ b/src/NGSDExportAnnotationData/ThreadCoordinator.cpp @@ -166,8 +166,9 @@ void ThreadCoordinator::writeSomaticVcf() } // write info column descriptions - vcf_stream << "##INFO=\n"; - vcf_stream << "##INFO=\n"; + vcf_stream << "##INFO=\n"; + vcf_stream << "##INFO=\n"; + vcf_stream << "##INFO=\n"; vcf_stream << "##INFO=\n"; vcf_stream << "##INFO=\n"; if(params_.vicc_config_details) diff --git a/src/tools-TEST/NGSDAddVariantsSomatic_Test.h b/src/tools-TEST/NGSDAddVariantsSomatic_Test.h index 531a000fc..5e75bd19f 100644 --- a/src/tools-TEST/NGSDAddVariantsSomatic_Test.h +++ b/src/tools-TEST/NGSDAddVariantsSomatic_Test.h @@ -7,7 +7,7 @@ TEST_CLASS(NGSDAddVariantsSomatic_Test) { Q_OBJECT private slots: - void test_addSmallVariants() + void test_small_variants() { if (!NGSD::isAvailable(true)) SKIP("Test needs access to the NGSD test database!"); @@ -28,12 +28,38 @@ private slots: S_EQUAL(table.row(2).asString(';'), "3;8;7;3;0.1254;639;330"); //force variant import - EXECUTE("NGSDAddVariantsSomatic", "-test -no_time -t_ps DX184894_01 -n_ps DX184263_01 -var_force -var " + TESTDATA("data_in/NGSDAddVariantsSomatic_in1.GSvar")); - //should fail because variants already exist an var_force is unset + EXECUTE("NGSDAddVariantsSomatic", "-test -no_time -t_ps DX184894_01 -n_ps DX184263_01 -force -var " + TESTDATA("data_in/NGSDAddVariantsSomatic_in1.GSvar")); + //should fail because variants already exist an force is unset EXECUTE_FAIL("NGSDAddVariantsSomatic", "-test -no_time -t_ps DX184894_01 -n_ps DX184263_01 -var " + TESTDATA("data_in/NGSDAddVariantsSomatic_in1.GSvar")); } - void test_addCNvs() + void test_small_variants_tumor_only() + { + if (!NGSD::isAvailable(true)) SKIP("Test needs access to the NGSD test database!"); + + NGSD db(true); + db.init(); + db.executeQueriesFromFile(TESTDATA("data_in/NGSDAddVariantsSomatic_init.sql")); + EXECUTE("NGSDAddVariantsSomatic", "-test -no_time -t_ps DX184894_01 -var " + TESTDATA("data_in/NGSDAddVariantsSomatic_in3.GSvar")); + + S_EQUAL(db.variant("1").toString(), "chr2:178096717-178096717 T>C"); + S_EQUAL(db.variant("2").toString(), "chr3:138456487-138456488 AT>-"); + S_EQUAL(db.variant("3").toString(), "chr16:56870524-56870524 A>C"); + + //Check variant entries in detected_somatic_variants + DBTable table = db.createTable("test", "SELECT * FROM detected_somatic_variant"); + I_EQUAL(table.rowCount(), 3); + S_EQUAL(table.row(0).asString(';'), "1;8;;1;0.1057;389;229"); + S_EQUAL(table.row(1).asString(';'), "2;8;;2;0.1304;26;22"); + S_EQUAL(table.row(2).asString(';'), "3;8;;3;0.1254;639;330"); + + //force variant import + EXECUTE("NGSDAddVariantsSomatic", "-test -no_time -t_ps DX184894_01 -force -var " + TESTDATA("data_in/NGSDAddVariantsSomatic_in3.GSvar")); + //should fail because variants already exist an force is unset + EXECUTE_FAIL("NGSDAddVariantsSomatic", "-test -no_time -t_ps DX184894_01 -var " + TESTDATA("data_in/NGSDAddVariantsSomatic_in3.GSvar")); + } + + void test_cnvs() { if (!NGSD::isAvailable(true)) SKIP("Test needs access to the NGSD test database!"); @@ -52,8 +78,11 @@ private slots: //Cnvs already imported EXECUTE_FAIL("NGSDAddVariantsSomatic", "-test -debug -no_time -t_ps DX184894_01 -n_ps DX184263_01 -cnv " + TESTDATA("data_in/NGSDAddVariantsSomatic_in2.tsv")); //Cnvs already imported force - EXECUTE("NGSDAddVariantsSomatic", "-test -debug -no_time -cnv_force -t_ps DX184894_01 -n_ps DX184263_01 -cnv " + TESTDATA("data_in/NGSDAddVariantsSomatic_in2.tsv")); + EXECUTE("NGSDAddVariantsSomatic", "-test -debug -no_time -force -t_ps DX184894_01 -n_ps DX184263_01 -cnv " + TESTDATA("data_in/NGSDAddVariantsSomatic_in2.tsv")); } - + void test_svs() + { + //TODO Alexander + } }; diff --git a/src/tools-TEST/data_in/NGSDAddVariantsSomatic_in3.GSvar b/src/tools-TEST/data_in/NGSDAddVariantsSomatic_in3.GSvar new file mode 100644 index 000000000..b45e0d9a6 --- /dev/null +++ b/src/tools-TEST/data_in/NGSDAddVariantsSomatic_in3.GSvar @@ -0,0 +1,65 @@ +##ANALYSISTYPE=SOMATIC_PAIR +##PIPELINE=megSAP 0.1-835-g57035c2 +##SAMPLE= +##SAMPLE= +##DESCRIPTION=filter=Annotations for filtering and ranking variants. +##DESCRIPTION=quality=Quality parameters - variant quality (QUAL), depth (DP), allele frequency (AF), mean mapping quality of alternate allele (MQM). +##DESCRIPTION=gene=Affected gene list (comma-separated). +##DESCRIPTION=variant_type=Variant type. +##DESCRIPTION=coding_and_splicing=Coding and splicing details (Gene, ENST number, type, impact, exon/intron number, HGVS.c, HGVS.p, Pfam domain). +##DESCRIPTION=OMIM=OMIM database annotation. +##DESCRIPTION=ClinVar=ClinVar database annotation. +##DESCRIPTION=HGMD=HGMD database annotation. +##DESCRIPTION=RepeatMasker=RepeatMasker annotation. +##DESCRIPTION=dbSNP=Identifier in dbSNP database. +##DESCRIPTION=1000g=Allele frequency in 1000 genomes project. +##DESCRIPTION=gnomAD=Allele frequency in gnomAD project. +##DESCRIPTION=gnomAD_hom_hemi=Homoyzgous counts and hemizygous counts of gnomAD project (genome data). +##DESCRIPTION=gnomAD_sub=Sub-population allele frequenciens (AFR,AMR,EAS,NFE,SAS) in gnomAD project. +##DESCRIPTION=ESP_sub=Sub-population allele frequency (EA,AA) in NHLBI Exome Sequencing project. +##DESCRIPTION=phyloP=phyloP (100way vertebrate) annotation. Deleterious threshold > 1.6. +##DESCRIPTION=Sift=Sift effect prediction for each transcript: D=damaging, T=tolerated. +##DESCRIPTION=PolyPhen=PolyPhen (humVar) effect prediction for each transcript: D=probably damaging, P=possibly damaging, B=benign. +##DESCRIPTION=fathmm-MKL=fathmm-MKL score (for coding/non-coding regions). Deleterious threshold > 0.5. +##DESCRIPTION=CADD=CADD pathogenicity prediction scores (scaled phred-like). Deleterious threshold > 15-20. +##DESCRIPTION=REVEL=REVEL pathogenicity prediction score. Deleterious threshold > 0.5. +##DESCRIPTION=MaxEntScan=MaxEntScan splicing prediction (difference in percent/reference bases score/alternate bases score). +##DESCRIPTION=GeneSplicer=GeneSplicer splicing prediction (state/type/coordinates/confidence/score). +##DESCRIPTION=dbscSNV=dbscSNV splicing prediction (ADA/RF score). +##DESCRIPTION=COSMIC=COSMIC somatic variant database anntotation. +##DESCRIPTION=tumor_af=Mutant allele frequency in tumor (Sample DX184894_01). +##DESCRIPTION=tumor_dp=Tumor Depth (Sample DX184894_01). +##DESCRIPTION=normal_af=Mutant allele frequency in normal (Sample DX184263_01). +##DESCRIPTION=normal_dp=Normal depth (Sample DX184263_01). +##DESCRIPTION=NGSD_som_c=Somatic variant count in the NGSD. +##DESCRIPTION=NGSD_som_p=Project names of project containing this somatic variant in the NGSD. +##DESCRIPTION=NGSD_hom=Homozygous variant counts in NGSD independent of the processing system. +##DESCRIPTION=NGSD_het=Heterozygous variant counts in NGSD independent of the processing system. +##DESCRIPTION=classification=Classification from the NGSD. +##DESCRIPTION=classification_comment=Classification comment from the NGSD. +##DESCRIPTION=validation=Validation information from the NGSD. Validation results of other samples are listed in brackets! +##DESCRIPTION=comment=Variant comments from the NGSD. +##DESCRIPTION=gene_info=Gene information from NGSD (inheritance mode, ExAC pLI score). +##FILTER=LowDepth=Tumor or normal sample read depth at this locus is below 2 +##FILTER=LowEVS=Somatic Empirical Variant Score (SomaticEVS) is below threshold +##FILTER=all-unknown=Allele unknown +##FILTER=depth-nor=Sequencing depth in normal is too low (< 20) +##FILTER=depth-tum=Sequencing depth in tumor is too low (< 20) +##FILTER=freq-nor=Allele frequency in normal > 0.17 * allele frequency in tumor +##FILTER=freq-tum=Allele frequency in tumor < 0.05 +##FILTER=lt-3-reads=Less than 3 supporting tumor reads +##FILTER=off-target=Variant marked as 'off-target'. +##FILTER=special-chromosome=Special chromosome +##CGI_CANCER_TYPE=CH +##DESCRIPTION=CGI_id=Identifier for CGI statements +##DESCRIPTION=CGI_driver_statement=CancerGenomeInterpreter.org oncogenic classification +##DESCRIPTION=CGI_gene_role=CancerGenomeInterpreter.org gene role. LoF: Loss of Function, Act: Activating +##DESCRIPTION=CGI_transcript=CancerGenomeInterpreter.org CGI Ensembl transcript ID +##DESCRIPTION=CGI_gene=Gene symbol returned by CancerGenomeInterpreter.org +##DESCRIPTION=CGI_consequence=Consequence of the mutation assessed by CancerGenomeInterpreter.org +##DESCRIPTION=ncg_oncogene=1:gene is oncogene according NCG6.0, 0:No oncogene according NCG6.0, na: no information available about gene in NCG6.0. Order is the same as in column gene. +##DESCRIPTION=ncg_tsg=1:gene is TSG according NCG6.0, 0:No TSG according NCG6.0, na: no information available about gene in NCG6.0. Order is the same as in column gene. +#chr start end ref obs tumor_af tumor_dp filter quality gene variant_type coding_and_splicing OMIM ClinVar HGMD RepeatMasker dbSNP 1000g gnomAD gnomAD_hom_hemi gnomAD_sub ESP_sub phyloP Sift PolyPhen fathmm-MKL CADD REVEL MaxEntScan GeneSplicer dbscSNV COSMIC NGSD_som_c NGSD_som_p NGSD_hom NGSD_het classification classification_comment validation comment gene_info CGI_id CGI_driver_statement CGI_gene_role CGI_transcript CGI_gene CGI_consequence ncg_oncogene ncg_tsg SpliceAI PubMed +chr2 178096717 178096717 T C 0.1057 389 QUAL=229 NFE2L2 missense NFE2L2:ENST00000397062:missense_variant:MODERATE:exon5/5:c.614A>G:p.Asp205Gly:,NFE2L2:ENST00000397063:missense_variant:MODERATE:exon5/5:c.566A>G:p.Asp189Gly:,NFE2L2:ENST00000446151:missense_variant:MODERATE:exon5/5:c.545A>G:p.Asp182Gly:,NFE2L2:ENST00000464747:missense_variant:MODERATE:exon8/8:c.566A>G:p.Asp189Gly: 600492 [NFE2L2 Immunodeficiency,developmental delay,and hypohomocysteinemia,617744]; 5.3220 D,D,D,D B,B,B,B 0.96,0.97 3.29 0.06 0 0 0 NFE2L2 (inh=AD pLI=0.61) chr2_178096717_T_C predicted passenger Act ENST00000397062 NFE2L2 Missense 0 0 +chr3 138456487 138456488 AT - 0.1304 26 off-target QUAL=22 PIK3CB intron PIK3CB:ENST00000289153:intron_variant:MODIFIER:exon4/21:c.801+61_801+62del::,PIK3CB:ENST00000477593:intron_variant:MODIFIER:exon5/22:c.801+61_801+62del:: (AT)n rs375733254 0.0032 0, 0.4070 -0.29 1 SomaticAndTreatment 8 6 PIK3CB (inh=n/a pLI=1.00) chr3_138456486_AAT_A not protein-affecting Act ENST00000477593 PIK3CB IntronicDeletion 1 0 0.94 +chr16 56870524 56870524 A C 0.1254 639 QUAL=330 NUP93 synonymous NUP93:ENST00000308159:synonymous_variant:LOW:exon17/22:c.1794A>C:p.Ile598=:PF04097,NUP93:ENST00000542526:synonymous_variant:LOW:exon15/20:c.1425A>C:p.Ile475=:PF04097,NUP93:ENST00000564887:synonymous_variant:LOW:exon15/20:c.1425A>C:p.Ile475=:PF04097,NUP93:ENST00000569842:synonymous_variant:LOW:exon17/23:c.1794A>C:p.Ile598=:PF04097 614351 [NUP93 Nephrotic syndrome,type 12,616892]; 1.5880 0.80,0.91 1.07 0 0 0 NUP93 (inh=AR pLI=0.04) chr16_56870524_A_C not protein-affecting LoF ENST00000308159 NUP93 Synonymous na na 0.13 1234578 \ No newline at end of file diff --git a/src/tools-TEST/data_in/NGSDExportAnnotationData_init2.sql b/src/tools-TEST/data_in/NGSDExportAnnotationData_init2.sql index 70cb10db4..8ba0d9155 100644 --- a/src/tools-TEST/data_in/NGSDExportAnnotationData_init2.sql +++ b/src/tools-TEST/data_in/NGSDExportAnnotationData_init2.sql @@ -1,48 +1,60 @@ -- device -INSERT INTO device (id, type, name) VALUES (1, 'HiSeq2500', 'Morpheus'); +INSERT INTO device (id, type, name) VALUES +(1, 'HiSeq2500', 'Morpheus'); -- sequencing_run -INSERT INTO sequencing_run (id, name, fcid, device_id, recipe, quality) VALUES (1, 'First run', 'ABC', 1, '100+8+8+100', 'good'); -INSERT INTO sequencing_run (id, name, fcid, device_id, recipe, quality) VALUES (2, 'Second run', 'XYZ', 1, '100+8+100', 'good'); +INSERT INTO sequencing_run (id, name, fcid, device_id, recipe, quality) VALUES +(1, 'First run', 'ABC', 1, '100+8+8+100', 'good'), +(2, 'Second run', 'XYZ', 1, '100+8+100', 'good'); -- user -INSERT INTO user (id, user_id, password, user_role, name, email, created, active) VALUES (99, 'ahuser', 's2d12kjg234hla0830t6hp9h3tt3t3tsdfg', 'user', 'The user', 'u@s.er', NOW(), '1'); +INSERT INTO user (id, user_id, password, user_role, name, email, created, active) VALUES +(99, 'ahuser', 's2d12kjg234hla0830t6hp9h3tt3t3tsdfg', 'user', 'The user', 'u@s.er', NOW(), '1'); -- sender -INSERT INTO sender (id, name) VALUES (1, 'sender'); +INSERT INTO sender (id, name) VALUES +(1, 'sender'); -- project -INSERT INTO project (id, name, type, internal_coordinator_id, analysis) VALUES (1, 'First project', 'research', 1, 'variants'); -INSERT INTO project (id, name, type, internal_coordinator_id, analysis) VALUES (2, 'Second project', 'diagnostic', 1, 'variants'); +INSERT INTO project (id, name, type, internal_coordinator_id, analysis) VALUES +(1, 'First project', 'research', 1, 'variants'), +(2, 'Second project', 'diagnostic', 1, 'variants'); -- processing_system -INSERT INTO processing_system (id, name_manufacturer, shotgun, name_short, genome_id) VALUES (1, 'HaloPlex System', '1', 'hpSYSv1', 1); -INSERT INTO processing_system (id, name_manufacturer, shotgun, name_short, genome_id) VALUES (2, 'SureSelect Human All Exon v5', '1', 'ssHAEv5', 1); +INSERT INTO processing_system (id, name_manufacturer, shotgun, name_short, genome_id) VALUES +(1, 'HaloPlex System', '1', 'hpSYSv1', 1), +(2, 'SureSelect Human All Exon v5', '1', 'ssHAEv5', 1); -- sample -INSERT INTO sample (id, name, sample_type, species_id, gender, tumor, ffpe, sender_id, quality) VALUES (1, 'NA12878', 'DNA', 1, 'female', 1, '0', 1, 'good'); -INSERT INTO sample (id, name, sample_type, species_id, gender, tumor, ffpe, sender_id, quality) VALUES (2, 'NA12879', 'DNA', 1, 'male', 1, '0', 1, 'good'); -INSERT INTO sample (id, name, sample_type, species_id, gender, tumor, ffpe, sender_id, quality) VALUES (3, 'NA12880', 'DNA', 1, 'female', '0', '0', 1, 'good'); -INSERT INTO sample (id, name, sample_type, species_id, gender, tumor, ffpe, sender_id, quality) VALUES (4, 'DUMMY', 'DNA', 1, 'male', '0', '0', 1, 'good'); +INSERT INTO sample (id, name, sample_type, species_id, gender, tumor, ffpe, sender_id, quality) VALUES +(1, 'NA12878', 'DNA', 1, 'female', 1, '0', 1, 'good'), +(2, 'NA12879', 'DNA', 1, 'male', 1, '0', 1, 'good'), +(3, 'NA12880', 'DNA', 1, 'female', '0', '0', 1, 'good'), +(4, 'DUMMY', 'DNA', 1, 'male', '0', '0', 1, 'good'); -- processed_sample -INSERT INTO processed_sample (id, sample_id, process_id, sequencing_run_id, lane, operator_id, processing_system_id, project_id) VALUES (1, 1, 1, 1, 1, 2, 1, 1); -INSERT INTO processed_sample (id, sample_id, process_id, sequencing_run_id, lane, operator_id, processing_system_id, project_id) VALUES (2, 2, 2, 2, 1, 2, 2, 2); -INSERT INTO processed_sample (id, sample_id, process_id, sequencing_run_id, lane, operator_id, processing_system_id, project_id) VALUES (3, 3, 3, 2, 1, 2, 2, 2); -INSERT INTO processed_sample (id, sample_id, process_id, sequencing_run_id, lane, operator_id, processing_system_id, project_id) VALUES (4, 4, 1, 2, 1, 2, 2, 2); +INSERT INTO processed_sample (id, sample_id, process_id, sequencing_run_id, lane, operator_id, processing_system_id, project_id) VALUES +(1, 1, 1, 1, 1, 2, 1, 1), +(2, 2, 2, 2, 1, 2, 2, 2), +(3, 3, 3, 2, 1, 2, 2, 2), +(4, 4, 1, 2, 1, 2, 2, 2); --variant -INSERT INTO variant (id, chr, start, end, ref, obs) VALUES (1, 'chr1', 62263112, 62263112, 'A', 'G'); -INSERT INTO variant (id, chr, start, end, ref, obs) VALUES (2, 'chr3', 142558733, 142558733, 'A', 'T'); -INSERT INTO variant (id, chr, start, end, ref, obs) VALUES (3, 'chr2', 47805601, 47805601, '-', 'T'); +INSERT INTO variant (id, chr, start, end, ref, obs) VALUES +(1, 'chr1', 62263112, 62263112, 'A', 'G'), +(2, 'chr3', 142558733, 142558733, 'A', 'T'), +(3, 'chr2', 47805601, 47805601, '-', 'T'); --somatic_vicc_interpretation INSERT INTO `somatic_vicc_interpretation` (`id`, `variant_id`, `null_mutation_in_tsg`, `known_oncogenic_aa`, `strong_cancerhotspot`, `located_in_canerhotspot`, `absent_from_controls`, `protein_length_change`, `other_aa_known_oncogenic`, `weak_cancerhotspot`, `computational_evidence`, `mutation_in_gene_with_etiology`, `very_weak_cancerhotspot`, `very_high_maf`, `benign_functional_studies`, `high_maf`, `benign_computational_evidence`, `synonymous_mutation`, `comment`, `created_by`, `created_date`, `last_edit_by`, `last_edit_date`) VALUES (1, 3, 0, 1, 1, NULL, 1, 0, NULL, 0, 1, NULL, 0, 0, 0, 0, NULL, 0, 'test VICC comment', 99, '2020-12-21 09:59:37', 99, '2020-12-23 09:33:23'); --detected somatic variant -INSERT INTO detected_somatic_variant (id, processed_sample_id_tumor, processed_sample_id_normal, variant_id, variant_frequency, depth) VALUES (1, 1, 3, 1, 0.1, 500); -INSERT INTO detected_somatic_variant (id, processed_sample_id_tumor, processed_sample_id_normal, variant_id, variant_frequency, depth) VALUES (2, 1, 3, 2, 0.1, 500); -INSERT INTO detected_somatic_variant (id, processed_sample_id_tumor, processed_sample_id_normal, variant_id, variant_frequency, depth) VALUES (3, 2, 4, 2, 0.1, 500); -INSERT INTO detected_somatic_variant (id, processed_sample_id_tumor, processed_sample_id_normal, variant_id, variant_frequency, depth) VALUES (4, 2, 4, 3, 0.1, 500); +INSERT INTO detected_somatic_variant (id, processed_sample_id_tumor, processed_sample_id_normal, variant_id, variant_frequency, depth) VALUES +(1, 1, 3, 1, 0.1, 500), +(2, 1, 3, 2, 0.1, 500), +(3, 2, 4, 2, 0.1, 500), +(4, 2, 4, 3, 0.1, 500), +(5, 1, NULL, 3, 0.1, 500), +(6, 2, NULL, 3, 0.1, 500); diff --git a/src/tools-TEST/data_out/NGSDExportAnnotationData_out3.vcf b/src/tools-TEST/data_out/NGSDExportAnnotationData_out3.vcf index 4f65bc2e6..c0b58b7dd 100644 --- a/src/tools-TEST/data_out/NGSDExportAnnotationData_out3.vcf +++ b/src/tools-TEST/data_out/NGSDExportAnnotationData_out3.vcf @@ -24,11 +24,12 @@ ##contig= ##contig= ##contig= -##INFO= -##INFO= +##INFO= +##INFO= +##INFO= ##INFO= ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO chr1 62263112 1 A G . . SOM_C=1;SOM_P=First%20project -chr2 47805601 3 A AT . . SOM_C=1;SOM_P=Second%20project;SOM_VICC=ONCOGENIC;SOM_VICC_COMMENT=test%20VICC%20comment +chr2 47805601 3 A AT . . SOM_C=1;SOM_P=Second%20project;SOM_TO_C=2;SOM_VICC=ONCOGENIC;SOM_VICC_COMMENT=test%20VICC%20comment chr3 142558733 2 A T . . SOM_C=2;SOM_P=First%20project,Second%20project diff --git a/src/tools-TEST/data_out/NGSDExportAnnotationData_out4.vcf b/src/tools-TEST/data_out/NGSDExportAnnotationData_out4.vcf index 1fcc84d33..bb6e0fdbc 100644 --- a/src/tools-TEST/data_out/NGSDExportAnnotationData_out4.vcf +++ b/src/tools-TEST/data_out/NGSDExportAnnotationData_out4.vcf @@ -24,8 +24,9 @@ ##contig= ##contig= ##contig= -##INFO= -##INFO= +##INFO= +##INFO= +##INFO= ##INFO= ##INFO= ##INFO= @@ -51,5 +52,5 @@ ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO chr1 62263112 1 A G . . SOM_C=1;SOM_P=First%20project -chr2 47805601 3 A AT . . SOM_C=1;SOM_P=Second%20project;SOM_VICC=ONCOGENIC;SOM_VICC_COMMENT=test%20VICC%20comment;SOM_VICC_ABSENT_FROM_CONTROLS=TRUE;SOM_VICC_BENIGN_COMPUTATIONAL_EVIDENCE=NOT_APPLICABLE;SOM_VICC_BENIGN_FUNCTIONAL_STUDIES=FALSE;SOM_VICC_COMMENT=test%20VICC%20comment;SOM_VICC_COMPUTATIONAL_EVIDENCE=TRUE;SOM_VICC_CREATED_AT=2020-12-21%2009:59:37;SOM_VICC_CREATED_BY=ahuser;SOM_VICC_HIGH_MAF=FALSE;SOM_VICC_KNOWN_ONCOGENIC_AA=TRUE;SOM_VICC_LAST_UPDATED_AT=2020-12-23%2009:33:23;SOM_VICC_LAST_UPDATED_BY=ahuser;SOM_VICC_LOCATED_IN_CANERHOTSPOT=NOT_APPLICABLE;SOM_VICC_MUTATION_IN_GENE_WITH_ETIOLOGY=NOT_APPLICABLE;SOM_VICC_NULL_MUTATION_IN_TSG=FALSE;SOM_VICC_ONCOGENIC_FUNCTIONAL_STUDIES=NOT_APPLICABLE;SOM_VICC_OTHER_AA_KNOWN_ONCOGENIC=NOT_APPLICABLE;SOM_VICC_PROTEIN_LENGTH_CHANGE=FALSE;SOM_VICC_STRONG_CANCERHOTSPOT=TRUE;SOM_VICC_SYNONYMOUS_MUTATION=FALSE;SOM_VICC_VERY_HIGH_MAF=FALSE;SOM_VICC_VERY_WEAK_CANCERHOTSPOT=FALSE;SOM_VICC_WEAK_CANCERHOTSPOT=FALSE +chr2 47805601 3 A AT . . SOM_C=1;SOM_P=Second%20project;SOM_TO_C=2;SOM_VICC=ONCOGENIC;SOM_VICC_COMMENT=test%20VICC%20comment;SOM_VICC_ABSENT_FROM_CONTROLS=TRUE;SOM_VICC_BENIGN_COMPUTATIONAL_EVIDENCE=NOT_APPLICABLE;SOM_VICC_BENIGN_FUNCTIONAL_STUDIES=FALSE;SOM_VICC_COMMENT=test%20VICC%20comment;SOM_VICC_COMPUTATIONAL_EVIDENCE=TRUE;SOM_VICC_CREATED_AT=2020-12-21%2009:59:37;SOM_VICC_CREATED_BY=ahuser;SOM_VICC_HIGH_MAF=FALSE;SOM_VICC_KNOWN_ONCOGENIC_AA=TRUE;SOM_VICC_LAST_UPDATED_AT=2020-12-23%2009:33:23;SOM_VICC_LAST_UPDATED_BY=ahuser;SOM_VICC_LOCATED_IN_CANERHOTSPOT=NOT_APPLICABLE;SOM_VICC_MUTATION_IN_GENE_WITH_ETIOLOGY=NOT_APPLICABLE;SOM_VICC_NULL_MUTATION_IN_TSG=FALSE;SOM_VICC_ONCOGENIC_FUNCTIONAL_STUDIES=NOT_APPLICABLE;SOM_VICC_OTHER_AA_KNOWN_ONCOGENIC=NOT_APPLICABLE;SOM_VICC_PROTEIN_LENGTH_CHANGE=FALSE;SOM_VICC_STRONG_CANCERHOTSPOT=TRUE;SOM_VICC_SYNONYMOUS_MUTATION=FALSE;SOM_VICC_VERY_HIGH_MAF=FALSE;SOM_VICC_VERY_WEAK_CANCERHOTSPOT=FALSE;SOM_VICC_WEAK_CANCERHOTSPOT=FALSE chr3 142558733 2 A T . . SOM_C=2;SOM_P=First%20project,Second%20project