From 70258d957c6818d1d38baa8425cb9ab6ca2b5a8f Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Mon, 17 Jun 2024 21:35:38 -0400
Subject: [PATCH 01/16] fix: add hg19 to rules

---
 conf/base.config | 5 +++++
 main.nf          | 8 ++++----
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index c538c5c..50baa58 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -68,6 +68,11 @@ process {
         memory = { check_max( 48.GB * task.attempt, 'memory'  ) }
         time   = { check_max( 72.h   * task.attempt, 'time'    ) }
     }
+    withName:bwamem2 {
+        cpus   = { check_max( 20     * task.attempt, 'cpus'    ) }
+        memory = { check_max( 200.GB * task.attempt, 'memory'  ) }
+        time   = { check_max( 72.h   * task.attempt, 'time'    ) }
+    }
     withLabel:error_ignore {
         errorStrategy = 'ignore'
     }
diff --git a/main.nf b/main.nf
index 325842a..1541830 100644
--- a/main.nf
+++ b/main.nf
@@ -55,7 +55,7 @@ workflow {
         if (params.cnv){
             if (params.genome == "mm10"){
                 CNVmouse(ALIGN.out.bamwithsample)
-            } else if (params.genome== "hg38"){
+            } else if (params.genome== "hg38" |params.genome== "hg19"){
                 if (!params.vc){
                     CNVhuman_novc(ALIGN.out.bamwithsample)
                 } else {
@@ -84,7 +84,7 @@ workflow {
         if (params.cnv){
             if (params.genome == "mm10"){
                 CNVmouse(INPUT_BAM.out.bamwithsample)
-            } else if (params.genome == "hg38"){
+            } else if (params.genome == "hg38"|params.genome== "hg19"){
                 if (!params.vc){
                     CNVhuman_novc(INPUT_BAM.out.bamwithsample)
                 }else {
@@ -108,7 +108,7 @@ workflow {
         if (params.cnv){
             if (params.genome == "mm10"){
                 CNVmouse_tonly(ALIGN_TONLY.out.bamwithsample)
-            } else if (params.genome== "hg38"){
+            } else if (params.genome== "hg38"|params.genome== "hg19"){
                 if (!params.vc){
                     VC_TONLY(ALIGN_TONLY.out.bamwithsample,ALIGN_TONLY.out.splitout,ALIGN_TONLY.out.sample_sheet)
                     CNVhuman_tonly(ALIGN_TONLY.out.bamwithsample,VC_TONLY.out.somaticcall_input)
@@ -135,7 +135,7 @@ workflow {
         if (params.cnv){
             if (params.genome == "mm10"){
                 CNVmouse_tonly(INPUT_TONLY_BAM.out.bamwithsample)
-            } else if (params.genome== "hg38"){
+            } else if (params.genome== "hg38" | params.genome== "hg19"){
                 if (!params.vc){
                     VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet)
                     CNVhuman_tonly(INPUT_TONLY_BAM.out.bamwithsample,VC_TONLY.out.somaticcall_input)

From c03a313698bfd65261068c6641b902cd8d3736b2 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Mon, 17 Jun 2024 21:36:12 -0400
Subject: [PATCH 02/16] feat: hg19 purple

---
 modules/local/copynumber.nf | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf
index 3b2855d..1bcfe9c 100644
--- a/modules/local/copynumber.nf
+++ b/modules/local/copynumber.nf
@@ -15,13 +15,15 @@ if (params.genome=="mm10"){
 }
 
 if (params.genome=="hg38" | params.genome=="hg19"){
+    HMFGENOMEREF = file(params.genomes[params.genome].HMFGENOME)
     GENOMEVER = params.genomes[params.genome].GENOMEVER
     GCPROFILE = file(params.genomes[params.genome].GCPROFILE)
     GERMLINEHET = file(params.genomes[params.genome].GERMLINEHET)
     DIPLODREG = file(params.genomes[params.genome].DIPLODREG)
     ENSEMBLCACHE = params.genomes[params.genome].ENSEMBLCACHE
     DRIVERS = file(params.genomes[params.genome].DRIVERS)
-    HOTSPOTS = file(params.genomes[params.genome].HOTSPOTS)
+    SOMATICHOTSPOTS = file(params.genomes[params.genome].SOMATICHOTSPOTS)
+    GERMLINEHOTSPOTS = file(params.genomes[params.genome].GERMLINEHOTSPOTS)
 }
 
 //mm10 Paired-Sequenza, FREEC-tumor only
@@ -349,7 +351,7 @@ process amber_tonly {
     -tumor ${tumorname} -tumor_bam ${tumor} \
     -output_dir ${tumorname}_amber \
     -threads $task.cpus \
-    -ref_genome_version V38 \
+    -ref_genome_version $HMFGENOMEREF \
     -loci $GERMLINEHET
 
     """
@@ -372,7 +374,8 @@ process amber_tn {
         val(normalname), path(normal), path(normalbai)
 
     output:
-        tuple val(tumorname), path("${tumorname}_vs_${normalname}_amber")
+        tuple val("${tumorname}_vs_${normalname}"),
+        val(tumorname), val(normalname), path("${tumorname}_vs_${normalname}_amber")
       
     script:
 
@@ -383,7 +386,7 @@ process amber_tn {
     -reference ${normalname} -reference_bam ${normal} \
     -output_dir ${tumorname}_vs_${normalname}_amber \
     -threads $task.cpus \
-    -ref_genome_version V38 \
+    -ref_genome_version $HMFGENOMEREF \
     -loci $GERMLINEHET
 
     """
@@ -436,7 +439,8 @@ process cobalt_tn {
         val(normalname), path(normal), path(normalbai)
 
     output:
-        tuple val(tumorname), path("${tumorname}_vs_${normalname}_cobalt")
+        tuple val("${tumorname}_vs_${normalname}"),
+        val(tumorname), val(normalname), path("${tumorname}_vs_${normalname}_cobalt")
 
     script:
 
@@ -464,12 +468,12 @@ process purple {
     label 'process_medium'
 
     input:
-        tuple val(tumorname), val(normalname),
+        tuple val(id), val(tumorname), val(normalname),
         path(amberin), path(cobaltin),
         path(somaticvcf), path(somaticvcfindex)
 
     output:
-        tuple val(tumorname), path("${tumorname}")
+        tuple val(id), path("${id}")
 
     script:
 
@@ -485,16 +489,16 @@ process purple {
     $ENSEMBLCACHE \
     -somatic_vcf ${somaticvcf} \
     -driver_gene_panel $DRIVERS \
-    -somatic_hotspots $HOTSPOTS \
+    -somatic_hotspots $SOMATICHOTSPOTS \
     -threads $task.cpus \
-    -output_dir ${tumorname}
+    -output_dir ${id}
     """
 
     stub:
 
     """
-    mkdir ${tumorname}
-    touch ${tumorname}/${tumorname}.purple.cnv.somatic.tsv ${tumorname}/${tumorname}.purple.cnv.gene.tsv ${tumorname}/${tumorname}.driver.catalog.somatic.tsv
+    mkdir ${id}
+    touch ${id}/${id}.purple.cnv.somatic.tsv ${id}/${id}.purple.cnv.gene.tsv ${id}/${id}.driver.catalog.somatic.tsv
     """
 
 }
@@ -505,7 +509,7 @@ process purple_novc {
     label 'process_medium'
 
     input:
-        tuple val(tumorname), val(normalname),
+        tuple val(id), val(tumorname), val(normalname),
         path(cobaltin), path(amberin)
 
     output:
@@ -521,10 +525,10 @@ process purple_novc {
     -cobalt ${cobaltin} \
     -gc_profile $GCPROFILE \
     -ref_genome_version $GENOMEVER \
-    -ref_genome $GENOMEREF \
+    -ref_genome $HMFGENOMEREF \
     $ENSEMBLCACHE \
     -threads $task.cpus \
-    -output_dir ${tumorname}
+    -output_dir ${tvn}
 
     """
 

From 54054fdb51abaa0c2d791f23120b3c39b30e6871 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Mon, 17 Jun 2024 21:36:24 -0400
Subject: [PATCH 03/16] feat: hg19 cnv changes

---
 conf/genomes.config | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/conf/genomes.config b/conf/genomes.config
index 07a05d0..040c1b6 100644
--- a/conf/genomes.config
+++ b/conf/genomes.config
@@ -31,7 +31,9 @@ params {
             chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM']
             //HMFTOOLS
             GENOMEVER = "38"
-            HOTSPOTS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/KnownHotspots.somatic.38.vcf.gz"
+            HMFGENOME =  "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/bwamem2/GRCh38.d1.vd1.fa"
+            SOMATICHOTSPOTS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/KnownHotspots.somatic.38.vcf.gz"
+            GERMLINEHOTSPOTS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/KnownHotspots.germline.38.vcf.gz"
             PANELBED = "-panel_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/ActionableCodingPanel.38.bed.gz"
             HCBED = "-high_confidence_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_nosomaticdel_noCENorHET7.bed.gz"
             ENSEMBLCACHE = "-ensembl_data_dir /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/common/ensembl_data"
@@ -45,7 +47,7 @@ params {
         'hg19' {
             genome = "/data/CCBR_Pipeliner/db/PipeDB/lib/hg19.with_extra.fa"
             genomefai = "/data/CCBR_Pipeliner/db/PipeDB/lib/hg19.with_extra.fa.fai"
-            bwagenome= "/data/CCBR_Pipeliner/db/PipeDB/lib/hs37d5.fa"
+            bwagenome = "/data/CCBR_Pipeliner/db/PipeDB/lib/hs37d5.fa"
             genomedict= "/data/CCBR_Pipeliner/db/PipeDB/lib/hg19.with_extra.dict"
             intervals= "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hg19_noblacklist_maincontig.bed"
             INDELREF = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/Mills_and_1000G_gold_standard.indels.hg19.vcf.gz" 
@@ -71,16 +73,17 @@ params {
             chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM']
             //HMFTOOLS
             GENOMEVER = "37"
-            HOTSPOTS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/KnownHotspots.38.vcf.gz"
-            PANELBED = "-panel_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/ActionableCodingPanel.38.bed.gz"
-            HCBED = "-high_confidence_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_nosomaticdel_noCENorHET7.bed.gz"
-            ENSEMBLCACHE = "-ensembl_data_dir /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/common/ensembl_data"
+            HMFGENOME =  "/data/CCBR_Pipeliner/db/PipeDB/lib/hs37d5.fa"
+            SOMATICHOTSPOTS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/variants/KnownHotspots.somatic.37.vcf.gz"
+            GERMLINEHOTSPOTS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/variants/KnownHotspots.germline.37.vcf.gz"
+            PANELBED = "-panel_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/variants/ActionableCodingPanel.37.bed.gz"
+            HCBED = "-high_confidence_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/variants/NA12878_GIAB_highconf_IllFB-IllGATKHC-CG-Ion-Solid_ALLCHROM_v3.2.2_highconf.bed.gz"
+            ENSEMBLCACHE = "-ensembl_data_dir /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/common/ensembl_data"
              //PURPLE
-            GERMLINEHET = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/copy_number/AmberGermlineSites.38.tsv.gz"
-            GCPROFILE = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/GC_profile.1000bp.38.cnp"
-            DIPLODREG = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/DiploidRegions.38.bed.gz'
-            ENSEMBLCACHE = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/ensembl_data/'
-            DRIVERS = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/DriverGenePanel.38.tsv'
+            GERMLINEHET = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/copy_number/AmberGermlineSites.37.tsv.gz"
+            GCPROFILE = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/copy_number/GC_profile.1000bp.37.cnp"
+            DIPLODREG = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/copy_number/DiploidRegions.37.bed.gz'
+            DRIVERS = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/common/DriverGenePanel.37.tsv'
         }
 
         'mm10' {

From b56a8a0853a0f6d3c97fed6217366205f87c44c4 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Mon, 17 Jun 2024 21:36:45 -0400
Subject: [PATCH 04/16] fix: no file param

---
 modules/local/structural_variant.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/structural_variant.nf b/modules/local/structural_variant.nf
index 1ea049a..3b1038e 100644
--- a/modules/local/structural_variant.nf
+++ b/modules/local/structural_variant.nf
@@ -1,5 +1,5 @@
 GENOMEREF=file(params.genomes[params.genome].genome)
-ANNOTSVGENOME=file(params.genomes[params.genome].annotsvgenome)
+ANNOTSVGENOME=params.genomes[params.genome].annotsvgenome
 BWAGENOME=file(params.genomes[params.genome].bwagenome)
 INDELREF=file(params.genomes[params.genome].INDELREF)
 

From 678ecfaaea6b7d7b1d3c1d31541be5d97ad50ad5 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Mon, 17 Jun 2024 21:36:54 -0400
Subject: [PATCH 05/16] fix: bwa mem

---
 modules/local/trim_align.nf | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/local/trim_align.nf b/modules/local/trim_align.nf
index 2df6abf..f91168e 100644
--- a/modules/local/trim_align.nf
+++ b/modules/local/trim_align.nf
@@ -42,7 +42,6 @@ process fastp {
 
 process bwamem2 {
     container = "${params.containers.logan}"
-    label 'process_high'
     tag { name }
 
 

From cd9b73bb6b58d21f42fbea2c68ff1cfbe54e9381 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Mon, 17 Jun 2024 21:37:22 -0400
Subject: [PATCH 06/16] fix: add tumor vs normal to id for unique

---
 subworkflows/local/workflows.nf | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf
index e7d9956..dfce894 100644
--- a/subworkflows/local/workflows.nf
+++ b/subworkflows/local/workflows.nf
@@ -288,7 +288,7 @@ workflow VC {
         | map { tumor,normal,vcfs,vcfindex,indels,indelindex -> tuple("${tumor}_vs_${normal}",
         vcfs.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).somatic.snvs.vcf.gz/)[0][1].toInteger() },vcfindex,
         indels.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).somatic.indels.vcf.gz/)[0][1].toInteger() } ,indelindex)}
-        | combineVariants_strelka |  join(sample_sheet_paired)
+        | combineVariants_strelka | join(sample_sheet_paired)
         | map{sample,markedvcf,markedindex,finalvcf,finalindex,tumor,normal -> tuple(tumor,normal,"strelka",finalvcf,finalindex)}
         annotvep_tn_strelka(strelka_in)
 
@@ -469,21 +469,21 @@ workflow SV {
             svaba_out=svaba_somatic(bamwithsample)
             .map{ tumor,bps,contigs,discord,alignents,gindel,gsv,so_indel,so_sv,unfil_gindel,unfil_gsv,unfil_so_indel,unfil_sv,log ->
                 tuple(tumor,so_sv,"svaba")}
-            annotsv_svaba(svaba_out).ifEmpty("Empty SV input--No SV annotated")
+            //annotsv_svaba(svaba_out).ifEmpty("Empty SV input--No SV annotated")
         }
         if ("manta" in svcall_list){
         //Manta
         manta_out=manta_somatic(bamwithsample)
             .map{tumor,gsv,so_sv,unfil_sv,unfil_indel ->
             tuple(tumor,so_sv,"manta")}
-        annotsv_manta(manta_out).ifEmpty("Empty SV input--No SV annotated")
+        //annotsv_manta(manta_out).ifEmpty("Empty SV input--No SV annotated")
         }
-        //Delly-WIP
 
-         if ("manta" in svcall_list & "svaba" in svcall_list){
+        if ("manta" in svcall_list & "svaba" in svcall_list){
             //Survivor
             gunzip(manta_out).concat(svaba_out).groupTuple()
-                | survivor_sv | annotsv_survivor_tn | ifEmpty("Empty SV input--No SV annotated")
+                | survivor_sv 
+                | annotsv_survivor_tn | ifEmpty("Empty SV input--No SV annotated")
          }
 }
 
@@ -524,14 +524,14 @@ workflow CNVhuman {
 
     main:  
         cnvcall_list = params.cnvcallers.split(',') as List
-
+        scinput = somaticcall_input|map{t1,n1,cal,vcf,ind -> tuple("${t1}_vs_${n1}",cal,vcf,ind)}
         if ("purple" in cnvcall_list){
             //Purple
             bamwithsample | amber_tn
             bamwithsample | cobalt_tn
-            purplein=amber_tn.out.join(cobalt_tn.out) 
-            purplein.join(somaticcall_input) 
-            | map{t1,amber,cobalt,n1,vc,vcf,vcfindex -> tuple(t1,n1,amber,cobalt,vcf,vcfindex)}
+            purplein=amber_tn.out.join(cobalt_tn.out,by:[0,1,2]) 
+            purplein.join(scinput) 
+            | map{id,t1,n1,amber,cobalt,vc,vcf,vcfindex -> tuple(id,t1,n1,amber,cobalt,vcf,vcfindex)}
             | purple
         }
 
@@ -558,10 +558,10 @@ workflow CNVhuman_novc {
 
         if ("purple" in cnvcall_list){
             //Purple
-            bamwithsample | amber_tn
-            bamwithsample | cobalt_tn
-            purplein=amber_tn.out |join(cobalt_tn.out) 
-            purplein | map{t1,amber,cobalt,n1 -> tuple(t1,n1,amber,cobalt)}
+            bamwithsample | amber_tn 
+            bamwithsample | cobalt_tn 
+            purplein=amber_tn.out |join(cobalt_tn.out)  
+            purplein | map{id,t1,n1,amber,t2,n2,cobalt -> tuple(id,t1,n1,amber,cobalt)}
                 | purple_novc
         }
 

From 662cb0fd5581c1b5fa7f03f3999afb3e0e0a2788 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Mon, 17 Jun 2024 21:37:34 -0400
Subject: [PATCH 07/16] fix: add strelka converter

---
 bin/convertStrelka.py  | 92 ++++++++++++++++++++++++------------------
 conf/containers.config |  3 +-
 2 files changed, 55 insertions(+), 40 deletions(-)

diff --git a/bin/convertStrelka.py b/bin/convertStrelka.py
index 8550db1..c6ed2cd 100755
--- a/bin/convertStrelka.py
+++ b/bin/convertStrelka.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python
-import os
 import numpy as np
-import vcfpy
+import cyvcf2
 import sys 
+import gzip 
 
 def _tumor_normal_genotypes(ref, alt, info):
     """Retrieve standard 0/0, 0/1, 1/1 style genotypes from INFO field.
@@ -33,7 +33,7 @@ def name_to_gt(val):
             # print(fname, coords, ref, alt, info, val)
             return "0/1"
     def alleles_to_gt(val):
-        gt_indices = {gt.upper(): i for i, gt in enumerate([ref] + [alt])}
+        gt_indices = {gt.upper(): i for i, gt in enumerate([ref] + alt)}
         tumor_gts = [gt_indices[x.upper()] for x in val if x in gt_indices]
         if tumor_gts and val not in known_names:
             if max(tumor_gts) == 0:
@@ -45,13 +45,13 @@ def alleles_to_gt(val):
         else:
             tumor_gt = name_to_gt(val)
         return tumor_gt
-    nt_val = info.get('NT').split("=")[-1]
+    nt_val = [x.split("=")[-1] for x in info if x.startswith("NT=")][0]
     normal_gt = name_to_gt(nt_val)
-    sgt_val = info.get('SGT').split("=")[-1]
+    sgt_val = [x.split("=")[-1] for x in info if x.startswith("SGT=")]
     if not sgt_val:
         tumor_gt = "0/0"
     else:
-        sgt_val = sgt_val.split("->")[-1]
+        sgt_val = sgt_val[0].split("->")[-1]
         tumor_gt = alleles_to_gt(sgt_val)
     return normal_gt, tumor_gt
 
@@ -70,47 +70,61 @@ def _af_annotate_and_filter(in_file,out_file):
     #data = paired.tumor_data if paired else items[0]
     #min_freq = float(utils.get_in(data["config"], ("algorithm", "min_allele_fraction"), 10)) / 100.0
     #logger.debug("Filtering Strelka2 calls with allele fraction threshold of %s" % min_freq)
-    vcf = vcfpy.Reader.from_path(in_file)
-    vcf.header.add_format_line(vcfpy.OrderedDict([
-        ('ID', 'AF'), 
-        ('Description', 'Allele frequency, as calculated in bcbio: AD/DP (germline), <ALT>U/DP (somatic snps), TIR/DPI (somatic indels)'),
-        ('Type','Float'),
-        ('Number', '.')
-    ]))
-    vcf.header.add_format_line(vcfpy.OrderedDict([
-        ('ID', 'GT'), 
-        ('Description', 'Genotype'),
-        ('Type','String'),
-        ('Number', '1')
-    ]))
-    writer = vcfpy.Writer.from_path(out_file, vcf.header)
+    vcf = cyvcf2.VCF(in_file)
+    vcf.add_format_to_header(dict(
+        ID='AF', Number='1',Type='Float',
+        Description='Allele frequency, as calculated in bcbio: AD/DP (germline), <ALT>U/DP (somatic snps), TIR/DPI (somatic indels)'
+        ))
+    writer = cyvcf2.Writer(out_file, vcf)
     for rec in vcf:
         #print(rec)
-        if rec.is_snv():  # snps?
-            alt_counts_n = rec.calls[0].data[rec.ALT[0].value + "U"]  # {ALT}U=tier1_depth,tier2_depth
-            alt_counts_t = rec.calls[1].data[rec.ALT[0].value + "U"]  # {ALT}U=tier1_depth,tier2_depth
+        if rec.is_snp:  # snps?
+            alt_counts = rec.format(rec.ALT[0] +'U')[:,0]  # {ALT}U=tier1_depth,tier2_depth
         else:  # indels
-            alt_counts_n = rec.calls[0].data['TIR']  # TIR=tier1_depth,tier2_depth
-            alt_counts_t = rec.calls[1].data['TIR']
-        DP_n=rec.calls[0].data["DP"]
-        DP_t=rec.calls[1].data["DP"]
-        if DP_n is not None and DP_t is not None:
+            alt_counts = rec.format('TIR')[:,0]  # TIR=tier1_depth,tier2_depth
+        dp = rec.format('DP')[:,0]
+        if dp is not None :
             with np.errstate(divide='ignore', invalid='ignore'):  # ignore division by zero and put AF=.0
                 #alt_n = alt_counts_n[0]/DP_n
                 #alt_t = alt_counts_t[0]/DP_t
-                af_n = np.true_divide(alt_counts_n[0], DP_n)
-                af_t = np.true_divide(alt_counts_t[0], DP_t)
-                rec.add_format('AF',0)
-                rec.calls[0].data["AF"]= [round(af_n,5)]
-                rec.calls[1].data["AF"]= [round(af_t,5)]
-        normal_gt, tumor_gt= _tumor_normal_genotypes(rec.REF,rec.ALT[0].value,rec.INFO)
-        rec.add_format('GT',"1/0")
-        rec.calls[0].data["GT"]=normal_gt
-        rec.calls[1].data["GT"]=tumor_gt
-        writer.write_record(rec)
+                af = np.true_divide(alt_counts, dp)
+                rec.set_format('AF',np.round(af,5))
+                writer.write_record(rec)
+    writer.close()
+
+def is_gzipped(path):
+    return path.endswith(".gz")
+
+def _add_gt(in_file):
+        ##Set genotypes now
+        out_file = in_file.replace(".vcf.gz", "-fixed.vcf")
+        #open_fn = gzip.open if is_gzipped(in_file) else open
+        with gzip.open(in_file,'rt') as in_handle: 
+            with open(out_file,"w") as out_handle:
+                added_gt = False
+                for line in in_handle:
+                    if line.startswith("##FORMAT") and not added_gt:
+                        added_gt = True
+                        out_handle.write('##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n')
+                        out_handle.write(line)
+                    elif line.startswith("#CHROM"):
+                        assert added_gt
+                        out_handle.write(line)
+                    elif line.startswith("#"):
+                        out_handle.write(line)
+                    else:
+                        parts = line.rstrip().split("\t")
+                        normal_gt,tumor_gt = _tumor_normal_genotypes(parts[3], parts[4].split(","),
+                                                                        parts[7].split(";"))
+                        parts[8] = "GT:%s" % parts[8]
+                        parts[9] = "%s:%s" % (normal_gt, parts[9])
+                        parts[10] = "%s:%s" % (tumor_gt, parts[10])
+                        out_handle.write("\t".join(parts) + "\n")
+
 
 if __name__ == '__main__':
     filename = sys.argv[1]
     outname = sys.argv[2]
     _af_annotate_and_filter(filename, outname)
-    
+    _add_gt(outname)
+
diff --git a/conf/containers.config b/conf/containers.config
index c5ad361..213986f 100644
--- a/conf/containers.config
+++ b/conf/containers.config
@@ -6,6 +6,7 @@ params {
         vcf2maf = 'docker://dnousome/ccbr_vcf2maf:v102.0.0'
         lofreq = 'docker://dnousome/ccbr_lofreq:v0.0.1'
         octopus = 'docker://dancooke/octopus:latest'
-        annotcnvsv = 'docker://dnousome/ccbr_annotate_cnvsv:v0.0.1'
+        annotsv = "docker://quay.io/biocontainers/annotsv:3.4.2--py312hdfd78af_0"
+        annotcnvsv = 'docker://dnousome/ccbr_annotate_cnvsv:v0.0.2'
     }
 }

From 9f613212c5e80cd3d63406aca910e114a9b443c5 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Tue, 18 Jun 2024 09:40:11 -0400
Subject: [PATCH 08/16] fix: genome version for hmf

---
 conf/genomes.config         | 1 -
 modules/local/copynumber.nf | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/conf/genomes.config b/conf/genomes.config
index 040c1b6..f111b58 100644
--- a/conf/genomes.config
+++ b/conf/genomes.config
@@ -79,7 +79,6 @@ params {
             PANELBED = "-panel_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/variants/ActionableCodingPanel.37.bed.gz"
             HCBED = "-high_confidence_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/variants/NA12878_GIAB_highconf_IllFB-IllGATKHC-CG-Ion-Solid_ALLCHROM_v3.2.2_highconf.bed.gz"
             ENSEMBLCACHE = "-ensembl_data_dir /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/common/ensembl_data"
-             //PURPLE
             GERMLINEHET = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/copy_number/AmberGermlineSites.37.tsv.gz"
             GCPROFILE = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/copy_number/GC_profile.1000bp.37.cnp"
             DIPLODREG = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/copy_number/DiploidRegions.37.bed.gz'
diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf
index 1bcfe9c..08e74c7 100644
--- a/modules/local/copynumber.nf
+++ b/modules/local/copynumber.nf
@@ -351,7 +351,7 @@ process amber_tonly {
     -tumor ${tumorname} -tumor_bam ${tumor} \
     -output_dir ${tumorname}_amber \
     -threads $task.cpus \
-    -ref_genome_version $HMFGENOMEREF \
+    -ref_genome_version $GENOMEVER \
     -loci $GERMLINEHET
 
     """
@@ -386,7 +386,7 @@ process amber_tn {
     -reference ${normalname} -reference_bam ${normal} \
     -output_dir ${tumorname}_vs_${normalname}_amber \
     -threads $task.cpus \
-    -ref_genome_version $HMFGENOMEREF \
+    -ref_genome_version $GENOMEVER \
     -loci $GERMLINEHET
 
     """

From 7e97a8dd6395789900b14c2211d046da9ca073ad Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Tue, 2 Jul 2024 09:21:17 -0400
Subject: [PATCH 09/16] fix: purple process order

---
 conf/modules.config         | 2 +-
 modules/local/copynumber.nf | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index d171cef..bb2fcbc 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -43,7 +43,7 @@ process {
         ]
     }
 
-    withName: 'purple' {
+    withName: 'purple|purple_tonly|purple_novc' {
         publishDir = [
             path: { "${params.outdir}/cnv/purple" },
             mode: 'copy'
diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf
index 08e74c7..c1239ec 100644
--- a/modules/local/copynumber.nf
+++ b/modules/local/copynumber.nf
@@ -510,10 +510,10 @@ process purple_novc {
 
     input:
         tuple val(id), val(tumorname), val(normalname),
-        path(cobaltin), path(amberin)
+        path(amberin), path(cobaltin)
 
     output:
-        tuple val(tumorname), path("${tumorname}")
+        tuple val(id), val(tumorname), val(normalname), path("${id}")
 
     script:
 
@@ -528,7 +528,7 @@ process purple_novc {
     -ref_genome $HMFGENOMEREF \
     $ENSEMBLCACHE \
     -threads $task.cpus \
-    -output_dir ${tvn}
+    -output_dir ${id}
 
     """
 
@@ -548,7 +548,7 @@ process purple_tonly {
 
     input:
         tuple val(tumorname), 
-        path(cobaltin), path(amberin),
+        path(amberin), path(cobaltin),
         path(somaticvcf), path(somaticvcfindex)
 
     output:

From d17fc2a16ca2d7a995e73e4b5701da2274858f04 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Tue, 2 Jul 2024 09:21:32 -0400
Subject: [PATCH 10/16] feat: add genomes to support output from SF

---
 conf/genomes.config | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/conf/genomes.config b/conf/genomes.config
index f111b58..e6e2e2c 100644
--- a/conf/genomes.config
+++ b/conf/genomes.config
@@ -121,5 +121,48 @@ params {
             chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chrX','chrY','chrM']
 
         }
+        'hg38_sf' {
+            genome = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/genome/bwamem2/Homo_sapiens_assembly38.fasta"
+            genomefai = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/genome/bwamem2/Homo_sapiens_assembly38.fasta.fai"
+            bwagenome= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.fasta"
+            genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict"
+            wgsregion = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list"
+            intervals= "${projectDir}/assets/hg38_v0_wgs_calling_regions.hg38.bed"
+            fullinterval = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/genomes/hg38_main.bed" 
+            INDELREF = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz"
+            KNOWNINDELS = "-known /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz -known /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz"
+            KNOWNRECAL = '--known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz'
+            dbsnp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz"
+            gnomad = '--germline-resource /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' 
+            pon = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PON/updatedpon.vcf.gz"    //pon="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz" //file{params.pon}
+            germline_resource = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz"
+            KRAKENBACDB = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/kraken/20180907_standard_kraken2"
+            snpeff_genome = "GRCh38.86"
+            snpeff_config = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/snpEff/4.3t/snpEff.config"
+            snpeff_bundle = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/snpEff/4.3t/"
+            sites_vcf= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/somalier/sites.hg38.vcf.gz"
+            somalier_ancestrydb="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/somalier/1kg-somalier"
+            vepcache = "/fdb/VEP/102/cache"
+            vepspecies = "homo_sapiens"
+            vepbuild = "GRCh38"
+            annotsvgenome = "GRCh38"
+            octopus_sforest= "--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest"
+            octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest"
+            SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz"
+            chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM']
+            //HMFTOOLS
+            GENOMEVER = "38"
+            HMFGENOME =  "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/bwamem2/GRCh38.d1.vd1.fa"
+            SOMATICHOTSPOTS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/KnownHotspots.somatic.38.vcf.gz"
+            GERMLINEHOTSPOTS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/KnownHotspots.germline.38.vcf.gz"
+            PANELBED = "-panel_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/ActionableCodingPanel.38.bed.gz"
+            HCBED = "-high_confidence_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_nosomaticdel_noCENorHET7.bed.gz"
+            ENSEMBLCACHE = "-ensembl_data_dir /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/common/ensembl_data"
+            //PURPLE
+            GERMLINEHET = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/copy_number/AmberGermlineSites.38.tsv.gz"
+            GCPROFILE = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/copy_number/GC_profile.1000bp.38.cnp"
+            DIPLODREG = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/copy_number/DiploidRegions.38.bed.gz"
+            DRIVERS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/common/DriverGenePanel.38.tsv"
+        }  
     }
 }

From d4a4bf9a4f75f17ed650400c3d7c08b5f95ab10f Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Tue, 2 Jul 2024 09:21:49 -0400
Subject: [PATCH 11/16] fix: output additional octopus annotation

---
 modules/local/variant_calling.nf       | 2 +-
 modules/local/variant_calling_tonly.nf | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf
index a55b424..5d1a568 100644
--- a/modules/local/variant_calling.nf
+++ b/modules/local/variant_calling.nf
@@ -472,7 +472,7 @@ process octopus_tn {
     """
     octopus -R $GENOMEREF -I ${normal} ${tumor} --normal-sample ${normalname} \
     -C cancer \
-    --annotations AF AC AD DP -t ${bed} \
+    --annotations AF AC AD DP SB -t ${bed} \
     --threads $task.cpus \
     $GERMLINE_FOREST \
     $SOMATIC_FOREST \
diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf
index 4e0991e..6a2516a 100644
--- a/modules/local/variant_calling_tonly.nf
+++ b/modules/local/variant_calling_tonly.nf
@@ -342,7 +342,7 @@ process octopus_tonly {
     script:
     """
     octopus -R $GENOMEREF -C cancer -I ${tumor} \
-    --annotations AF AC AD DP \
+    --annotations AF AC AD DP SB \
     -B 92Gb \
     -t ${bed} \
     --threads ${task.cpus}\

From 609a9bb1dfa17ba25aeff50eaded584af0bcd400 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Fri, 5 Jul 2024 10:26:19 -0400
Subject: [PATCH 12/16] feat: add GT for strelka

---
 bin/{convertStrelka.py => strelka_convert.py} | 16 +++++++++-------
 conf/modules.config                           |  4 ++--
 2 files changed, 11 insertions(+), 9 deletions(-)
 rename bin/{convertStrelka.py => strelka_convert.py} (93%)

diff --git a/bin/convertStrelka.py b/bin/strelka_convert.py
similarity index 93%
rename from bin/convertStrelka.py
rename to bin/strelka_convert.py
index c6ed2cd..e0e6a34 100755
--- a/bin/convertStrelka.py
+++ b/bin/strelka_convert.py
@@ -3,6 +3,7 @@
 import cyvcf2
 import sys 
 import gzip 
+import os 
 
 def _tumor_normal_genotypes(ref, alt, info):
     """Retrieve standard 0/0, 0/1, 1/1 style genotypes from INFO field.
@@ -92,15 +93,15 @@ def _af_annotate_and_filter(in_file,out_file):
                 writer.write_record(rec)
     writer.close()
 
-def is_gzipped(path):
-    return path.endswith(".gz")
+#def is_gzipped(path):
+#    return path.endswith(".gz")
 
 def _add_gt(in_file):
         ##Set genotypes now
-        out_file = in_file.replace(".vcf.gz", "-fixed.vcf")
+        out_file = os.path.basename(in_file).replace(".vcf.gz", "-fixed.vcf")
         #open_fn = gzip.open if is_gzipped(in_file) else open
         with gzip.open(in_file,'rt') as in_handle: 
-            with open(out_file,"w") as out_handle:
+            with open(out_file,"wt") as out_handle:
                 added_gt = False
                 for line in in_handle:
                     if line.startswith("##FORMAT") and not added_gt:
@@ -121,10 +122,11 @@ def _add_gt(in_file):
                         parts[10] = "%s:%s" % (tumor_gt, parts[10])
                         out_handle.write("\t".join(parts) + "\n")
 
-
 if __name__ == '__main__':
     filename = sys.argv[1]
     outname = sys.argv[2]
-    _af_annotate_and_filter(filename, outname)
-    _add_gt(outname)
+    _add_gt(filename)
+    newname = os.path.basename(filename).replace(".vcf.gz", "-fixed.vcf")
+    _af_annotate_and_filter(newname,outname)
+    os.remove(newname)
 
diff --git a/conf/modules.config b/conf/modules.config
index bb2fcbc..a1e595f 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -224,14 +224,14 @@ process {
         ]
     }
 
-    withName: 'contamination_tumoronly|learnreadorientationmodel_tonly|learnreadorientationmodel|mergemut2stats|mergemut2stats_tonly|contamination_paired|mutect2filter' {
+    withName: 'learnreadorientationmodel|mergemut2stats|contamination_paired|mutect2filter' {
         publishDir = [
             path: { "${params.outdir}/vcfs/mutect2" },
             mode: 'copy'
         ]
     }
 
- withName: 'mutect2filter_tonly' {
+ withName: 'mutect2filter_tonly|contamination_tumoronly|learnreadorientationmodel_tonly|mergemut2stats_tonly' {
         publishDir = [
             path: { "${params.outdir}/vcfs/mutect2_tonly" },
             mode: 'copy'

From 4bf7e9bb7eead80d2f7a0a64de296adfa6f4a10a Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Fri, 5 Jul 2024 10:26:33 -0400
Subject: [PATCH 13/16] feat: strelka workflow

---
 subworkflows/local/workflows.nf | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf
index dfce894..d8b98af 100644
--- a/subworkflows/local/workflows.nf
+++ b/subworkflows/local/workflows.nf
@@ -17,7 +17,7 @@ include {pileup_paired_t; pileup_paired_n;
     strelka_tn; 
     varscan_tn; vardict_tn; lofreq_tn; muse_tn; sage_tn;
     octopus_tn; bcftools_index_octopus; bcftools_index_octopus as bcftools_index_octopus_tonly; octopus_convertvcf; 
-    combineVariants_strelka;
+    combineVariants_strelka; convert_strelka;
     combineVariants as combineVariants_vardict; combineVariants as combineVariants_vardict_tonly;
     combineVariants as combineVariants_varscan; combineVariants as combineVariants_varscan_tonly;
     combineVariants as combineVariants_sage; combineVariants as combineVariants_sage_tonly;
@@ -290,6 +290,7 @@ workflow VC {
         indels.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).somatic.indels.vcf.gz/)[0][1].toInteger() } ,indelindex)}
         | combineVariants_strelka | join(sample_sheet_paired)
         | map{sample,markedvcf,markedindex,finalvcf,finalindex,tumor,normal -> tuple(tumor,normal,"strelka",finalvcf,finalindex)}
+        | convert_strelka
         annotvep_tn_strelka(strelka_in)
 
         vc_all=vc_all|concat(strelka_in)

From 8586a18dcd72dedff3ee55ad2640401c6f9120d7 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Fri, 5 Jul 2024 10:27:14 -0400
Subject: [PATCH 14/16] style: better visibility

---
 modules/local/variant_calling.nf       | 38 ++++++++++++++++++++++----
 modules/local/variant_calling_tonly.nf |  3 +-
 2 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf
index 5d1a568..eae43f2 100644
--- a/modules/local/variant_calling.nf
+++ b/modules/local/variant_calling.nf
@@ -257,14 +257,12 @@ process mutect2filter {
 
     input:
         tuple val(tumor), val(normal),path(mutvcfs), path(stats), path(obs),
-        path(pileups), path(normal_pileups),path(tumorcontamination),path(normalcontamination)
+        path(pileups), path(normal_pileups), path(tumorcontamination), path(normalcontamination)
 
     output:
         tuple val("${tumor}_vs_${normal}"),
-        path("${tumor}_vs_${normal}.mut2.marked.vcf.gz"),
-        path("${tumor}_vs_${normal}.mut2.marked.vcf.gz.tbi"),
-        path("${tumor}_vs_${normal}.mut2.norm.vcf.gz"), 
-        path("${tumor}_vs_${normal}.mut2.norm.vcf.gz.tbi"),
+        path("${tumor}_vs_${normal}.mut2.marked.vcf.gz"), path("${tumor}_vs_${normal}.mut2.marked.vcf.gz.tbi"),
+        path("${tumor}_vs_${normal}.mut2.norm.vcf.gz"), path("${tumor}_vs_${normal}.mut2.norm.vcf.gz.tbi"),
         path("${tumor}_vs_${normal}.mut2.marked.vcf.gz.filteringStats.tsv")
 
     script:
@@ -813,6 +811,36 @@ process combineVariants_strelka {
 }
 
 
+process convert_strelka {
+    //Add GT column to 
+    conda 'bioconda::cyvcf2 bioconda::bcftools numpy'
+    label 'process_medium'
+
+    input:
+        tuple val(tumor), val(normal), val(vc),
+        path(strelkavcf), path(strelkaindex)
+
+    output:
+        tuple val(tumor), val(normal), val("strelka"),
+        path("${tumor}_vs_${normal}.filtered.strelka-fixed.vcf.gz"), 
+        path("${tumor}_vs_${normal}.filtered.strelka-fixed.vcf.gz.tbi")
+
+
+    script:
+
+    """
+    python /data/nousomedr/wgs/LOGAN/bin/strelka_convert.py ${strelkavcf} ${tumor}_vs_${normal}.filtered.strelka-fixed.vcf.gz
+    bcftools index -t ${tumor}_vs_${normal}.filtered.strelka-fixed.vcf.gz
+    """
+
+    stub:
+
+    """
+    touch ${tumor}.filtered.strelka-fixed.vcf.gz ${tumor}.filtered.strelka-fixed.vcf.gz.tbi
+    """
+
+}
+
 process somaticcombine {
     container "${params.containers.logan}"
     label 'process_medium'
diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf
index 6a2516a..f810a04 100644
--- a/modules/local/variant_calling_tonly.nf
+++ b/modules/local/variant_calling_tonly.nf
@@ -194,7 +194,7 @@ process mutect2filter_tonly {
     label 'process_medium'
 
     input:
-        tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups),path(tumorcontamination)
+        tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups), path(tumorcontamination)
     output:
         tuple val(sample),
         path("${sample}.tonly.mut2.marked.vcf.gz"),path("${sample}.tonly.mut2.marked.vcf.gz.tbi"),
@@ -205,7 +205,6 @@ process mutect2filter_tonly {
     //Include the stats and  concat ${mutvcfs} -Oz -o ${sample}.concat.vcf.gz
     mut2in = mutvcfs.join(" -I ")
 
-
     """
     gatk SortVcf -I ${mut2in} -O ${sample}.tonly.concat.vcf.gz --CREATE_INDEX
     gatk FilterMutectCalls \

From 7f8e7acb7ee0fc745981862c8804a185e2f8d5e7 Mon Sep 17 00:00:00 2001
From: dnousome <dnousome@gmail.com>
Date: Mon, 8 Jul 2024 16:36:34 -0400
Subject: [PATCH 15/16] feat: docker update with gatk, cyvcf, hmftools changes

---
 docker/logan_base/Dockerfile | 46 +++++++++++++++++-------------------
 docker/logan_base/build.sh   |  8 +++----
 2 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/docker/logan_base/Dockerfile b/docker/logan_base/Dockerfile
index e80ed91..d11804b 100644
--- a/docker/logan_base/Dockerfile
+++ b/docker/logan_base/Dockerfile
@@ -1,4 +1,4 @@
-FROM --platform=linux/amd64 nciccbr/ccbr_ubuntu_base_20.04:v5
+FROM --platform=linux/amd64 nciccbr/ccbr_ubuntu_base_20.04:v6
 
 # build time variables
 ARG BUILD_DATE="000000"
@@ -21,7 +21,7 @@ RUN apt-get update \
  && apt-get -y upgrade \
  && DEBIAN_FRONTEND=noninteractive apt-get install -y \
       bc \
-      openjdk-17-jdk   
+      openjdk-17-jdk   #Needed for GATK >4
 
 # Common bioinformatics tools 
 # bwa/0.7.17-4  bowtie/1.2.3  bowtie2/2.3.5.1 
@@ -48,11 +48,11 @@ RUN wget https://github.com/biod/sambamba/releases/download/v0.8.1/sambamba-0.8.
 
 # Install GATK4 (GATK/4.5.0.0)
 # Requires Java17 
-RUN wget https://github.com/broadinstitute/gatk/releases/download/4.5.0.0/gatk-4.5.0.0.zip \
-    && unzip /opt2/gatk-4.5.0.0.zip \
-    && rm /opt2/gatk-4.5.0.0.zip \
-    && /opt2/gatk-4.5.0.0/gatk --list
-ENV PATH="/opt2/gatk-4.5.0.0:$PATH"
+RUN wget https://github.com/broadinstitute/gatk/releases/download/4.6.0.0/gatk-4.6.0.0.zip \
+    && unzip /opt2/gatk-4.6.0.0.zip \
+    && rm /opt2/gatk-4.6.0.0.zip \
+    && /opt2/gatk-4.6.0.0/gatk --list
+ENV PATH="/opt2/gatk-4.6.0.0:$PATH"
 
 
 #Use DISCVRSeq For CombineVariants Replacement
@@ -96,12 +96,15 @@ RUN Rscript -e 'BiocManager::install(c("rtracklayer"))'
 # Install Sequenza-Utils/3.0.0 and Sequenza
 # Requires R, Python, SAMtools, tabix (already satisfied)
 # https://cran.r-project.org/web/packages/sequenza/vignettes/sequenza.html#getting-started
-##Install Old version of IOtools for parallel processing
+# Install Old version of IOtools for parallel processing
 RUN pip3 install --upgrade pip \
 	  && pip3 install sequenza-utils \
       && Rscript -e 'remotes::install_github("ShixiangWang/copynumber"); remotes::install_github("cran/sequenza")' \
       && Rscript -e 'remotes::install_version("iotools",version="0.3-2")'
 
+##Install cyvcf2 for processing vcfs in python
+RUN pip3 install cyvcf2
+
 # Install Control-FREEC/v11.6 and additional dependencies
 # Requires R, samtools, bedtools, sambamba (already satisfied)
 # http://boevalab.inf.ethz.ch/FREEC/tutorial.html#install
@@ -114,10 +117,10 @@ ENV PATH="/opt2/FREEC-11.6/src:$PATH"
 WORKDIR /opt2 
 
 
-# Install Somalier/v0.2.16
+# Install Somalier/v0.2.19
 # download static binary
 RUN mkdir somalier  \
-    && wget -O somalier/somalier https://github.com/brentp/somalier/releases/download/v0.2.16/somalier \
+    && wget -O somalier/somalier https://github.com/brentp/somalier/releases/download/v0.2.19/somalier \
     && chmod a+rx /opt2/somalier/somalier
 ENV PATH="/opt2/somalier:$PATH"
 
@@ -189,31 +192,26 @@ RUN wget -O svaba_1.2.0 https://github.com/walaj/svaba/releases/download/v1.2.0/
     && mkdir svaba \ 
     && mv svaba_1.2.0 svaba/svaba \
     && chmod a+x svaba/svaba
-
 ENV PATH="/opt2/svaba:$PATH"
 
-# MUSE
-RUN wget -O muse_2.0.4.tar.gz https://github.com/wwylab/MuSE/archive/refs/tags/v2.0.4.tar.gz \
-    && tar -xzf muse_2.0.4.tar.gz \ 
-    && cd MuSE-2.0.4 \
+# MUSE-Use Cloned Version due to -ldeflate option needed
+RUN git clone https://github.com/dnousome/MuSE \
+    && cd MuSE \
     && ./install_muse.sh \
-    && mv MuSE /opt2/ \
-    && chmod a+x /opt2/MuSE \
-    && rm -R /opt2/MuSE-2.0.4 \
-    && rm /opt2/muse_2.0.4.tar.gz
-
+    && chmod a+rX /opt2/MuSE/MuSE
 ENV PATH="/opt2/MuSE:$PATH"
+WORKDIR /opt2
 
 # HMFtools for PURPLE/COBALT/AMBER
-RUN wget https://github.com/hartwigmedical/hmftools/releases/download/amber-v4.0/amber_v4.0.jar \
+RUN wget https://github.com/hartwigmedical/hmftools/releases/download/amber-v4.0.1/amber_v4.0.1.jar \
     && wget https://github.com/hartwigmedical/hmftools/releases/download/cobalt-v1.16/cobalt_v1.16.jar \
     && wget https://github.com/hartwigmedical/hmftools/releases/download/purple-v4.0.2/purple_v4.0.2.jar \
-    && wget https://github.com/hartwigmedical/hmftools/releases/download/sage-v3.4.3/sage_v3.4.3.jar \ 
+    && wget https://github.com/hartwigmedical/hmftools/releases/download/sage-v3.4.4/sage_v3.4.4.jar \ 
     && mkdir hmftools \
-    && mv amber_v4.0.jar hmftools/amber.jar \
+    && mv amber_v4.0.1.jar hmftools/amber.jar \
     && mv cobalt_v1.16.jar hmftools/cobalt.jar \
     && mv purple_v4.0.2.jar hmftools/purple.jar \
-    && mv sage_v3.4.3.jar hmftools/sage.jar \
+    && mv sage_v3.4.4.jar hmftools/sage.jar \
     && chmod a+x hmftools/amber.jar
 ENV PATH="/opt2/hmftools:$PATH"
 
diff --git a/docker/logan_base/build.sh b/docker/logan_base/build.sh
index 5515cc0..90b4446 100644
--- a/docker/logan_base/build.sh
+++ b/docker/logan_base/build.sh
@@ -5,13 +5,13 @@
 #docker buildx inspect upbeat_ganguly
 #docker buildx build --platform linux/amd64 -f Dockerfile -t dnousome/ccbr_logan_base:v0.3.0 -t dnousome/ccbr_logan_base:latest --push .
 
-docker build --platform linux/amd64 --tag ccbr_logan_base:v0.3.5 -f Dockerfile . 
+docker build --platform linux/amd64 --tag ccbr_logan_base:v0.3.6 -f Dockerfile . 
 
-docker tag ccbr_logan_base:v0.3.5 dnousome/ccbr_logan_base:v0.3.5
-docker tag ccbr_logan_base:v0.3.5 dnousome/ccbr_logan_base
+docker tag ccbr_logan_base:v0.3.6 dnousome/ccbr_logan_base:v0.3.6
+docker tag ccbr_logan_base:v0.3.6 dnousome/ccbr_logan_base
 
 
-docker push dnousome/ccbr_logan_base:v0.3.5
+docker push dnousome/ccbr_logan_base:v0.3.6
 docker push dnousome/ccbr_logan_base:latest
 
 

From b518795804b44f7ec4d2c0006c39a144ff118dd5 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Wed, 10 Jul 2024 10:12:39 -0400
Subject: [PATCH 16/16] fix: strelka add GT/AD columns

---
 conf/containers.config                 |  2 +-
 modules/local/variant_calling.nf       | 48 +++++++++++++++++---------
 modules/local/variant_calling_tonly.nf |  2 +-
 nextflow.config                        |  2 +-
 subworkflows/local/workflows.nf        | 11 +++---
 subworkflows/local/workflows_tonly.nf  | 17 ++++-----
 6 files changed, 48 insertions(+), 34 deletions(-)

diff --git a/conf/containers.config b/conf/containers.config
index 213986f..fa3cd71 100644
--- a/conf/containers.config
+++ b/conf/containers.config
@@ -2,7 +2,7 @@
 params {
     containers {
         base = 'docker://nciccbr/ccbr_ubuntu_base_20.04:v6.1'
-        logan = 'docker://dnousome/ccbr_logan_base:v0.3.5'
+        logan = 'docker://dnousome/ccbr_logan_base:v0.3.6'
         vcf2maf = 'docker://dnousome/ccbr_vcf2maf:v102.0.0'
         lofreq = 'docker://dnousome/ccbr_lofreq:v0.0.1'
         octopus = 'docker://dancooke/octopus:latest'
diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf
index eae43f2..e258a8d 100644
--- a/modules/local/variant_calling.nf
+++ b/modules/local/variant_calling.nf
@@ -333,7 +333,7 @@ process strelka_tn {
     mv wd/results/variants/somatic.snvs.vcf.gz  ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic_temp.snvs.vcf.gz
     mv wd/results/variants/somatic.indels.vcf.gz  ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic_temp.indels.vcf.gz
 
-    printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" >sampname
+    printf %s "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" >sampname
 
     bcftools reheader -s sampname ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic_temp.snvs.vcf.gz \
         | bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.snvs.vcf.gz
@@ -631,18 +631,27 @@ process combineVariants {
 
     script:
     vcfin = inputvcf.join(" -I ")
+    //Create Tumor Normal here
+    samplist=sample.tokenize('_vs_')
+    if(samplist.size>1){
+        samporder = samplist.join(",")
+    }else{
+        samporder = sample
+    }
 
     """
     mkdir ${vc}
     gatk --java-options "-Xmx48g" SortVcf \
-        -O ${sample}.${vc}.marked.vcf.gz \
+        -O ${sample}.${vc}.markedtemp.vcf.gz \
         -SD $GENOMEDICT \
         -I $vcfin
+    
+    bcftools view ${sample}.${vc}.markedtemp.vcf.gz -s $samporder -Oz -o ${sample}.${vc}.marked.vcf.gz 
     bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
         awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\
         sed '/^\$/d' > ${sample}.${vc}.temp.vcf
 
-    bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz
+    bcftools view ${sample}.${vc}.temp.vcf -f PASS -s $samporder -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz
 
     mv ${sample}.${vc}.marked.vcf.gz ${vc}
     mv ${sample}.${vc}.marked.vcf.gz.tbi ${vc}
@@ -681,14 +690,19 @@ process combineVariants_alternative {
 
     script:
     vcfin = vcfs.join(" ")
-
+    samplist=sample.tokenize('_vs_')
+    if(samplist.size>1){
+        samporder = samplist.join(",")
+    }else{
+        samporder = sample
+    }
 
     if (vc.contains("octopus")) {
         """
         mkdir ${vc}
         bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp1.vcf.gz
         bcftools reheader -f $GENOMEFAI ${sample}.${vc}.temp1.vcf.gz -o ${sample}.${vc}.temp.vcf
-        bcftools sort ${sample}.${vc}.temp.vcf | bcftools view - -i "INFO/SOMATIC==1" -Oz -o ${sample}.${vc}.marked.vcf.gz
+        bcftools sort ${sample}.${vc}.temp.vcf | bcftools view - -i "INFO/SOMATIC==1" -s $samporder -Oz -o ${sample}.${vc}.marked.vcf.gz
         bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
         awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\
         sed '/^\$/d' > ${sample}.${vc}.temp.vcf
@@ -705,7 +719,7 @@ process combineVariants_alternative {
         mkdir ${vc}
         bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp1.vcf.gz
         bcftools reheader -f $GENOMEFAI ${sample}.${vc}.temp1.vcf.gz -o ${sample}.${vc}.temp.vcf
-        bcftools sort ${sample}.${vc}.temp.vcf -Oz -o ${sample}.${vc}.marked.vcf.gz
+        bcftools sort ${sample}.${vc}.temp.vcf  | bcftools view - -s $samporder -Oz -o ${sample}.${vc}.marked.vcf.gz
         bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
         awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\
         sed '/^\$/d' > ${sample}.${vc}.temp.vcf
@@ -717,12 +731,7 @@ process combineVariants_alternative {
         bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t
         """
     }
-
-
-   
-   
    
-
     stub:
 
     """
@@ -784,15 +793,19 @@ process combineVariants_strelka {
 
     vcfin = strelkasnvs.join(" ")
     indelsin = strelkaindels.join(" ")
-
-
+    samplist=sample.tokenize('_vs_')
+    if(samplist.size>1){
+        samporder = samplist.join(",")
+    }else{
+        samporder = sample
+    }
     """
     bcftools concat $vcfin $indelsin --threads $task.cpus -Oz -o ${sample}.temp.strelka.vcf.gz -a
     bcftools norm ${sample}.temp.strelka.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
         awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\
         sed '/^\$/d' > ${sample}.temp1.strelka.vcf.gz
 
-    bcftools sort ${sample}.temp1.strelka.vcf.gz -Oz -o ${sample}.strelka.vcf.gz
+    bcftools sort ${sample}.temp1.strelka.vcf.gz |bcftools view - -s $samporder -Oz -o ${sample}.strelka.vcf.gz
 
     bcftools view ${sample}.strelka.vcf.gz --threads $task.cpus -f PASS -Oz -o ${sample}.filtered.strelka.vcf.gz
 
@@ -812,8 +825,8 @@ process combineVariants_strelka {
 
 
 process convert_strelka {
-    //Add GT column to 
-    conda 'bioconda::cyvcf2 bioconda::bcftools numpy'
+    //Add GT/AD column to Strelka Variants
+    container "${params.containers.logan}"
     label 'process_medium'
 
     input:
@@ -912,6 +925,9 @@ process ffpe_1 {
     touch "${tumorsample}_vs_${normal}_combined_ffpolish.vcf.gz"
     """
 }
+
+
+
 /*DISCVR
 process somaticcombine {
     container "${params.containers.logan}"
diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf
index f810a04..7c30eb8 100644
--- a/modules/local/variant_calling_tonly.nf
+++ b/modules/local/variant_calling_tonly.nf
@@ -264,7 +264,7 @@ process varscan_tonly {
     awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf_temp \
         | sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf
 
-    printf "TUMOR\t!{tumorname}\n" > sampname
+    printf "Sample1\t!{tumorname}\n" > sampname
 
     bcftools reheader -s sampname !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf \
         | bcftools view -Oz -o !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf.gz
diff --git a/nextflow.config b/nextflow.config
index 9f5c5c6..9a8269a 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -54,7 +54,7 @@ params {
     bam_input=null
     BAMINPUT=null
 
-    callers = "mutect2,octopus,lofreq,muse,sage,vardict,varscan"
+    callers = "mutect2,octopus,strelka,lofreq,muse,sage,vardict,varscan"
     tonlycallers = "mutect2,octopus,vardict,varscan"
     cnvcallers = "purple,sequenza,freec"
     svcallers = "manta,svaba"
diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf
index d8b98af..783d0fc 100644
--- a/subworkflows/local/workflows.nf
+++ b/subworkflows/local/workflows.nf
@@ -97,22 +97,20 @@ workflow ALIGN {
     take:
         fastqinput
         sample_sheet
-    main:
-    fastp(fastqinput)
 
+    main:
     if (params.intervals){
         intervalbedin = Channel.fromPath(params.intervals)
     }else{
         intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file')
     }
-
     splitinterval(intervalbedin)
 
+    fastp(fastqinput)
     bwamem2(fastp.out)
     bqsrbambyinterval=bwamem2.out.combine(splitinterval.out.flatten())
     bambyinterval=bwamem2.out.combine(splitinterval.out.flatten())
 
-
     bqsr(bqsrbambyinterval)
     bqsrs=bqsr.out.groupTuple()
         .map { samplename,beds -> tuple( samplename,
@@ -123,7 +121,6 @@ workflow ALIGN {
     tobqsr=bwamem2.out.combine(gatherbqsr.out,by:0)
     applybqsr(tobqsr)
 
-    //sample_sheet.view()
     bamwithsample=applybqsr.out.combine(sample_sheet,by:0).map{it.swap(3,0)}.combine(applybqsr.out,by:0).map{it.swap(3,0)}
 
     emit:
@@ -287,7 +284,7 @@ workflow VC {
         strelka_in=strelka_tn(bambyinterval) | groupTuple(by:[0,1])
         | map { tumor,normal,vcfs,vcfindex,indels,indelindex -> tuple("${tumor}_vs_${normal}",
         vcfs.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).somatic.snvs.vcf.gz/)[0][1].toInteger() },vcfindex,
-        indels.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).somatic.indels.vcf.gz/)[0][1].toInteger() } ,indelindex)}
+        indels.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).somatic.indels.vcf.gz/)[0][1].toInteger() },indelindex)}
         | combineVariants_strelka | join(sample_sheet_paired)
         | map{sample,markedvcf,markedindex,finalvcf,finalindex,tumor,normal -> tuple(tumor,normal,"strelka",finalvcf,finalindex)}
         | convert_strelka
@@ -308,7 +305,7 @@ workflow VC {
 
         vc_all=vc_all|concat(vardict_in)
 
-        //VarDict TOnly
+        //Vardict TOnly
         if (!params.no_tonly){
         vardict_in_tonly=vardict_tonly(bambyinterval_t) 
         | groupTuple()
diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf
index d691039..02d8698 100644
--- a/subworkflows/local/workflows_tonly.nf
+++ b/subworkflows/local/workflows_tonly.nf
@@ -49,7 +49,7 @@ workflow INPUT_TONLY {
     if(params.fastq_input){
         fastqinput=Channel.fromFilePairs(params.fastq_input)
     }else if(params.fastq_file_input){
-        fastqinput=Channel.fromPath(params.fastq_file_input).view()
+        fastqinput=Channel.fromPath(params.fastq_file_input)
                         .splitCsv(header: false, sep: "\t", strip:true)
                         .map{ sample,fq1,fq2 -> 
                             tuple(sample, tuple(file(fq1),file(fq2))) 
@@ -80,14 +80,15 @@ workflow ALIGN_TONLY {
         sample_sheet
 
     main:
-        fastp(fastqinput)
-            if (params.intervals){
-                intervalbedin = Channel.fromPath(params.intervals,checkIfExists: true,type: 'file')
-            }else{
-                intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file')
-            }
-            splitinterval(intervalbedin)
+
+    if (params.intervals){
+        intervalbedin = Channel.fromPath(params.intervals,checkIfExists: true,type: 'file')
+    }else{
+        intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file')
+        }
     
+    splitinterval(intervalbedin)
+    fastp(fastqinput)
     bwamem2(fastp.out)
     //indelrealign(bwamem2.out) Consider indelreaglinement using ABRA?