diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e6e4582..401dbae 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -13,9 +13,10 @@ on: jobs: build: runs-on: ubuntu-latest + timeout-minutes: 2 strategy: matrix: - python-version: ["3.9"] + python-version: ["3.10"] steps: - uses: actions/checkout@v3 @@ -25,22 +26,29 @@ jobs: python-version: ${{ matrix.python-version }} cache: "pip" - name: Install nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 + with: + version: "24.04.4" - name: Install dependencies run: | python -m pip install --upgrade pip setuptools pip install .[dev,test] python -c 'from logan.src.util import chmod_bins_exec; chmod_bins_exec()' - - name: Test stub run + - name: Check CLI basics run: | - mkdir tmp && cd tmp which logan - logan init - logan run -profile ci_stub,docker \ - --fastq_input "/opt2/.tests/*R{1,2}.fastq.gz" \ - --vc --cnv --sv \ + logan --version + logan --citation + - name: Test stub run for Fastqs + run: | + logan init + logan run -profile ci_stub \ + --sample_sheet .tests/pairs.tsv \ + --fastq_input ".tests/*R{1,2}_001.fastq.gz" \ + --vc --cnv --sv --gl --qc \ + --split_regions 2 \ --genome hg38 \ - --outdir /opt2/output_tn_fqs \ - --interval /opt2/.tests/interval.bed \ - -stub + --outdir output_tn_fqs \ + --intervals .tests/interval.bed \ + -preview diff --git a/.tests/README.md b/.tests/README.md index 97b6f4c..540c385 100644 --- a/.tests/README.md +++ b/.tests/README.md @@ -2,5 +2,5 @@ These input files are used for continuous integration purposes, specificially to dry run the pipeline whenever commits have been made to the main, master, or unified branches. -**Please Note:** Each of the provided FastQ files and BAM files are empty and are not suitable input to the CCBR GATK4 pipeline! +**Please Note:** Each of the provided FastQ files and BAM files have only headers and will not work for the LOGAN pipeline diff --git a/.tests/Sample10_ARK1_S37.R1.fastq.gz b/.tests/Sample10_ARK1_S37.R1.fastq.gz deleted file mode 100644 index e69de29..0000000 diff --git a/.tests/Sample10_ARK1_S37.R2.fastq.gz b/.tests/Sample10_ARK1_S37.R2.fastq.gz deleted file mode 100644 index e69de29..0000000 diff --git a/.tests/Sample10_ARK1_S37.recal.bam b/.tests/Sample10_ARK1_S37.recal.bam deleted file mode 100644 index e69de29..0000000 diff --git a/.tests/Sample11_ACI_158_S38.R1.fastq.gz b/.tests/Sample11_ACI_158_S38.R1.fastq.gz deleted file mode 100644 index e69de29..0000000 diff --git a/.tests/Sample11_ACI_158_S38.R2.fastq.gz b/.tests/Sample11_ACI_158_S38.R2.fastq.gz deleted file mode 100644 index e69de29..0000000 diff --git a/.tests/Sample11_ACI_158_S38.recal.bam b/.tests/Sample11_ACI_158_S38.recal.bam deleted file mode 100644 index e69de29..0000000 diff --git a/.tests/Sample4_CRL1622_S31.R1.fastq.gz b/.tests/Sample4_CRL1622_S31.R1.fastq.gz deleted file mode 100644 index e69de29..0000000 diff --git a/.tests/Sample4_CRL1622_S31.R2.fastq.gz b/.tests/Sample4_CRL1622_S31.R2.fastq.gz deleted file mode 100644 index e69de29..0000000 diff --git a/.tests/Sample4_CRL1622_S31.recal.bam b/.tests/Sample4_CRL1622_S31.recal.bam deleted file mode 100644 index e69de29..0000000 diff --git a/.tests/WGS_NC_N.R1_001.fastq.gz b/.tests/WGS_NC_N.R1_001.fastq.gz new file mode 100644 index 0000000..23a6c16 Binary files /dev/null and b/.tests/WGS_NC_N.R1_001.fastq.gz differ diff --git a/.tests/WGS_NC_N.R2_001.fastq.gz b/.tests/WGS_NC_N.R2_001.fastq.gz new file mode 100644 index 0000000..57d5fc6 Binary files /dev/null and b/.tests/WGS_NC_N.R2_001.fastq.gz differ diff --git a/.tests/WGS_NC_N.bam b/.tests/WGS_NC_N.bam new file mode 100644 index 0000000..02704a1 Binary files /dev/null and b/.tests/WGS_NC_N.bam differ diff --git a/.tests/WGS_NC_T.R1_001.fastq.gz b/.tests/WGS_NC_T.R1_001.fastq.gz new file mode 100644 index 0000000..92f3a83 Binary files /dev/null and b/.tests/WGS_NC_T.R1_001.fastq.gz differ diff --git a/.tests/WGS_NC_T.R2_001.fastq.gz b/.tests/WGS_NC_T.R2_001.fastq.gz new file mode 100644 index 0000000..39b1ba2 Binary files /dev/null and b/.tests/WGS_NC_T.R2_001.fastq.gz differ diff --git a/.tests/WGS_NC_T.bam b/.tests/WGS_NC_T.bam new file mode 100644 index 0000000..02704a1 Binary files /dev/null and b/.tests/WGS_NC_T.bam differ diff --git a/.tests/interval.bed b/.tests/interval.bed index e69de29..91f2661 100644 --- a/.tests/interval.bed +++ b/.tests/interval.bed @@ -0,0 +1,10 @@ +chr22 10510000 10784643 . intersection ACGTmer 500 + +chr22 10834643 10874572 . intersection ACGTmer 500 + +chr22 10924572 10966724 . intersection ACGTmer 500 + +chr22 11016724 11068987 . intersection ACGTmer 500 + +chr22 11118987 11160921 . intersection ACGTmer 500 + +chr22 11210921 11378056 . intersection ACGTmer 500 + +chr22 11428056 11497337 . intersection ACGTmer 500 + +chr22 11547337 11631288 . intersection ACGTmer 500 + +chr22 11681288 11724629 . intersection ACGTmer 500 + +chr22 11774629 11977555 . intersection ACGTmer 500 + diff --git a/.tests/pairs.tsv b/.tests/pairs.tsv index 86f1bc9..fdccfb3 100644 --- a/.tests/pairs.tsv +++ b/.tests/pairs.tsv @@ -1,3 +1,2 @@ -Tumor Normal -Sample10_ARK1_S37 Sample4_CRL1622_S31 -Sample11_ACI_158_S38 Sample4_CRL1622_S31 +Tumor Normal +WGS_NC_T WGS_NC_N diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e2f526..9663d18 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,15 @@ # LOGAN development version +## LOGAN 0.2.0 +### New features +- Added additional SV callers(GRIDSS) and annotation for SV (GRIPSS) + CNV Callers (ASCAT, CNVKit) + SNV (Deepsomatic) +- Bugfixes for hg19 by fixing references +- Updated PON for hg38 using TCGA/GDC references +- In development: adding exome support by using bed file to restrict calling regions +- Refactored modules to be similar to nf-core + +## LOGAN 0.1.0 +### Features - Changed over to Nextflow CCBR template and pip packaging - Processes moved to `modules/local` directory - Workflows under the `subworkflows/local` directory diff --git a/CITATION.cff b/CITATION.cff index 6d81de7..72984e8 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -9,6 +9,10 @@ authors: given-names: Kelly orcid: https://orcid.org/0000-0003-3283-829X affiliation: Advanced Biomedical Computational Science, Frederick National Laboratory for Cancer Research, Frederick, MD 21702, USA + - family-names: Mathur + given-names: Samarth + orcid: https://orcid.org/0000-0002-6446-5718 + affiliation: Advanced Biomedical Computational Science, Frederick National Laboratory for Cancer Research, Frederick, MD 21702, USA - family-names: Koparde given-names: Vishal orcid: https://orcid.org/0000-0001-8978-8495 diff --git a/README.md b/README.md index fe3b0c8..9c276f2 100644 --- a/README.md +++ b/README.md @@ -19,19 +19,20 @@ Original pipelining and code forked from the CCBR Exome-seek Pipeline [Exome-see [singularity](https://singularity.lbl.gov/all-releases) must be installed on the target system. Snakemake orchestrates the execution of each step in the pipeline. To guarantee the highest level of reproducibility, each step relies on versioned images from [DockerHub](https://hub.docker.com/orgs/nciccbr/repositories). Nextflow uses singularity to pull these images onto the local filesystem prior to job execution, and as so, nextflow and singularity are the only two dependencies. ## Setup -LOGAN can be used with the Nextflow pipelining software +LOGAN can be used with the Nextflow pipelining software in Please clone this repository to your local filesystem using the following command on Biowulf: + ```bash # start an interactive node sinteractive --mem=2g --cpus-per-task=2 --gres=lscratch:200 + git clone https://github.com/CCBR/LOGAN module load nextflow ##Example run -nextflow run /data/LOGAN//main.nf +nextflow run LOGAN/main.nf -profile ci_stub -preview ``` ## Usage -LOGAN supports ### Input Files LOGAN supports inputs of either @@ -63,15 +64,16 @@ c130889189_PBMC /data/nousomedr/c130889189_PBMC.bam /data/nousomedr/c130889189 ``` ### Genome -`--genome` - A flag to indicate which genome to run for alignment/variant calling/etc. Like `--genome hg38` to run the hg38 genome +`--genome` - A flag to indicate which genome to run. hg38, hg19 and mm10 are supported. +Example: `--genome hg38` to run the hg38 genome `--genome hg19` and `--genome mm10` are also supported #### hg38 has options for either -`--genome hg38` - Based off the GRCh38.d1.vd1.fa which is consistent with TCGA and other GDC processing pipelines +`--genome hg38` - Based off the GRCh38.d1.vd1.fa which is consistent with TCGA/GDC processing pipelines `--genome hg38_sf` - Based off the Homo_sapiens_assembly38.fasta which is derived from the Broad Institute/NCI Sequencing Facility -The biggest difference between the two is that GRCh38.d1.vd1.fa has fewer contigs (especially related to HLA regions), so reads should map to chr6 vs the HLA contig directly +The biggest difference between the two is that GRCh38.d1.vd1.fa only the GCA_000001405.15_GRCh38_no_alt_analysis_set, Sequence Decoys (GenBank Accession GCA_000786075), and Virus Sequences. Homo_sapiens_assembly38.fasta has HLA specific contigs which may not be compatible with certain downstream tools. ### Operating Modes @@ -97,51 +99,58 @@ No addtional flags for sample sheet are required as all samples will be used to Adding flags determines SNV (germline and/or somatic), SV, and/or CNV calling modes -`--vc`- Enables somatic SNV calling using mutect2, vardict, varscan, octopus, strelka (TN only), MUSE (TN only), and lofreq (TN only) +`--vc` or `--snv` - Enables somatic SNV calling using mutect2, vardict, varscan, octopus, deepsomatic, strelka (TN only), MUSE (TN only), and lofreq (TN only) -`--germline`- Enables germline using Deepvariant +`--gl` or `--germline` - Enables germline calling using Deepvariant -`--sv`- Enables somatic SV calling using Manta, SVABA, and GRIDSS (coming soon) - -`--cnv`- Enables somatic CNV calling using FREEC, Sequenza, and Purple (hg19/hg38 only) +`--sv` or `--structural`- Enables somatic SV calling using Manta, GRIDSS, and SVABA +`--cnv` or `--copynumber`- Enables somatic CNV calling using FREEC, Sequenza, ASCAT, CNVKit, and Purple (hg19/hg38 only) #### Optional Arguments -`--indelrealign` - Enables indel realignment when running alignment steps. May be helpful for certain callers (VarScan, VarDict) - -`--callers`- Comma separated argument for callers, the default is to use all available. +`--callers` - Comma separated argument for selecting only specified callers, the default is to use all. Example: `--callers mutect2,octopus` -`--cnvcallers`- - Comma separated argument for CNV callers to use. Adding flag allows only certain callers to run. +`--cnvcallers` - Comma separated argument for selecting only specified CNV callers, the default is to use all. Example: `--cnvcallers purple` -`--svcallers`- - Comma separated argument for SV callers. Adding flag allows only certain callers to run. -Example: `--cnvcallers manta` +`--svcallers` - Comma separated argument for selecting only specified SV callers, the default is to use all. +Example: `--svcallers gridss` +`--ffpe` - Adds additional filtering for FFPE by detecting strand orientation bias using SOBDetector. + +`--intervals` - Limits calling to intervals provided in target bed file (target bed should have three columns of chr, start, and end) + +`--exome` - When using exome data, this flag limits calling to intervals provided in target bed to reduce time and to account for exome sequencing specific parameters. + +`--indelrealign` - Enables indel realignment using the GATK pipeline when running alignment steps. May be helpful for certain callers (VarScan, VarDict) that do not have local haplotype reassembly. ## Running LOGAN Example of Tumor_Normal calling mode ```bash # preview the logan jobs that will run -nextflow run /data/LOGAN/main.nf --mode local -profile ci_stub --genome hg38 --sample_sheet samplesheet.tsv --outdir out --fastq_input "*R{1,2}.fastq.gz" -preview --vc --sv --cnv +nextflow run LOGAN/main.nf --mode local -profile ci_stub --genome hg38 --sample_sheet samplesheet.tsv --outdir out --fastq_input "*R{1,2}.fastq.gz" -preview --vc --sv --cnv # run a stub/dryrun of the logan jobs -nextflow run /data/LOGAN/main.nf --mode local -profile ci_stub --genome hg38 --sample_sheet samplesheet.tsv --outdir out --fastq_input "*R{1,2}.fastq.gz" -stub --vc --sv --cnv +nextflow run LOGAN/main.nf --mode local -profile ci_stub --genome hg38 --sample_sheet samplesheet.tsv --outdir out --fastq_input "*R{1,2}.fastq.gz" -stub --vc --sv --cnv # launch a logan run on slurm with the test dataset -nextflow run /data/LOGAN/main.nf --mode slurm -profile biowulf,slurm --genome hg38 --sample_sheet samplesheet.tsv --outdir out --fastq_input "*R{1,2}.fastq.gz" --vc --sv --cnv +nextflow run LOGAN/main.nf --mode slurm -profile biowulf,slurm --genome hg38 --sample_sheet samplesheet.tsv --outdir out --fastq_input "*R{1,2}.fastq.gz" --vc --sv --cnv ``` Example of Tumor only calling mode ```bash # preview the logan jobs that will run -nextflow run /data/LOGAN/main.nf --mode local -profile ci_stub --genome hg38 --outdir out --fastq_input "*R{1,2}.fastq.gz" --callers octopus,mutect2 -preview --vc --sv --cnv +nextflow run LOGAN/main.nf --mode local -profile ci_stub --genome hg38 --outdir out --fastq_input "*R{1,2}.fastq.gz" --callers octopus,mutect2 -preview --vc --sv --cnv # run a stub/dryrun of the logan jobs -nextflow run /data/LOGAN/main.nf --mode local -profile ci_stub --genome hg38 --outdir out --fastq_input "*R{1,2}.fastq.gz" --callers octopus,mutect2 -stub --vc --sv --cnv +nextflow run LOGAN/main.nf --mode local -profile ci_stub --genome hg38 --outdir out --fastq_input "*R{1,2}.fastq.gz" --callers octopus,mutect2 -stub --vc --sv --cnv # launch a logan run on slurm with the test dataset -nextflow run /data/LOGAN/main.nf --mode slurm -profile biowulf,slurm --genome hg38 --outdir out --fastq_input "*R{1,2}.fastq.gz" --callers octopus,mutect2 --vc --sv --cnv +nextflow run LOGAN/main.nf --mode slurm -profile biowulf,slurm --genome hg38 --outdir out --fastq_input "*R{1,2}.fastq.gz" --callers octopus,mutect2 --vc --sv --cnv ``` +### Pipeline Tools and Overview +![alt text](docs/LOGAN.png) + ## Contribute diff --git a/VERSION b/VERSION index 49ffebc..341cf11 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.1.0-dev \ No newline at end of file +0.2.0 \ No newline at end of file diff --git a/bin/ascat.R b/bin/ascat.R old mode 100644 new mode 100755 index 334afe2..a275194 --- a/bin/ascat.R +++ b/bin/ascat.R @@ -10,38 +10,60 @@ library(RColorBrewer) args = commandArgs(trailingOnly=TRUE) tumor_bam=args[1] -normal_bam=args[2] +tumor_name=args[2] +normal_bam=args[3] +normal_name=args[4] +genome=args[5] +bed=args[6] +exome=args[7] +#chroms=scan(text=args[4],sep=",",quiet=T) +cpus=as.numeric(Sys.getenv("SLURM_CPUS_PER_TASK")) +cpus=ifelse(is.na(cpus),2,cpus) -genome="hg38" +if (exists(exome)){ + genomebasedir=sprintf("/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/%s/ASCAT/WES",genome) +}else{ + genomebasedir=sprintf("/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/%s/ASCAT",genome) + +} + +##DETERMINE SEX +system(sprintf('alleleCounter -l %s/gender_chr.loci -b %s -c chrX -o %s_temp_gender.out', + genomebasedir,normal_bam,normal_name)) +s=read.table(sprintf("%s_temp_gender.out",normal_name)) +gender=ifelse(sum(s$V7)>5,"XY","XX") +print(gender) ascat.prepareHTS( tumourseqfile = tumor_bam, normalseqfile = normal_bam, tumourname = tumor_name, normalname = normal_name, - allelecounter_exe = "/PATH/TO/allelecounter", - alleles.prefix = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/ASCAT/G1000_alleles", - loci.prefix = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/ASCAT_G1000_loci", - nthreads = 10 - gender = "XX", + allelecounter_exe = "alleleCounter", + alleles.prefix = sprintf("%s/G1000_alleles/G1000_alleles_%s_chr",genomebasedir,genome), + loci.prefix = sprintf("%s/G1000_loci/G1000_loci_%s_chr",genomebasedir,genome), + gender = gender, genomeVersion = genome, - nthreads = 8, - tumourLogR_file = "Tumor_LogR.txt", - tumourBAF_file = "Tumor_BAF.txt", - normalLogR_file = "Germline_LogR.txt", - normalBAF_file = "Germline_BAF.txt") + nthreads = cpus, + tumourLogR_file = sprintf("%s_LogR.txt",tumor_name), + tumourBAF_file = sprintf("%s_BAF.txt",tumor_name), + normalLogR_file = sprintf("%s_LogR.txt",normal_name), + normalBAF_file = sprintf("%s_BAF.txt",normal_name), + BED_file=bed) -ascat.bc = ascat.loadData(Tumor_LogR_file = "Tumor_LogR.txt", Tumor_BAF_file = "Tumor_BAF.txt", - Germline_LogR_file = "Germline_LogR.txt", Germline_BAF_file = "Germline_BAF.txt", gender = 'XX', genomeVersion = "hg19") +ascat.bc = ascat.loadData(Tumor_LogR_file = sprintf("%s_LogR.txt",tumor_name), + Tumor_BAF_file = sprintf("%s_BAF.txt",tumor_name), + Germline_LogR_file = sprintf("%s_LogR.txt",normal_name), Germline_BAF_file = sprintf("%s_BAF.txt",normal_name), + gender = gender, genomeVersion = genome) ascat.plotRawData(ascat.bc, img.prefix = "Before_correction_") -ascat.bc = ascat.correctLogR(ascat.bc, GCcontentfile = "GC_file.txt", replictimingfile = "RT_file.txt") +ascat.bc = ascat.correctLogR(ascat.bc, + GCcontentfile = sprintf("%s/GC_G1000/GC_G1000_%s.txt",genomebasedir,genome), + replictimingfile = sprintf("%s/RT_G1000/RT_G1000_%s.txt",genomebasedir,genome)) ascat.plotRawData(ascat.bc, img.prefix = "After_correction_") ascat.bc = ascat.aspcf(ascat.bc) ascat.plotSegmentedData(ascat.bc) ascat.output = ascat.runAscat(ascat.bc, gamma=1, write_segments = T) QC = ascat.metrics(ascat.bc,ascat.output) -save(ascat.bc, ascat.output, QC, file = 'ASCAT_objects.Rdata') - - -##### \ No newline at end of file +write.table(QC,sprintf("%s.qc.txt",paste0(tumor_name,"_vs_",normal_name))) +save(ascat.bc, ascat.output, QC, file = sprintf('%s_vs_%s_ascat.Rdata',tumor_name,normal_name)) diff --git a/bin/assess_significance.R b/bin/assess_significance.R old mode 100644 new mode 100755 diff --git a/bin/combineAllSampleCompareResults.R b/bin/combineAllSampleCompareResults.R old mode 100644 new mode 100755 diff --git a/bin/flowcell_lane.py b/bin/flowcell_lane.py old mode 100644 new mode 100755 diff --git a/bin/hello-world.py b/bin/hello-world.py new file mode 100644 index 0000000..a5df617 --- /dev/null +++ b/bin/hello-world.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python + +""" +Example script that could be used by a nextflow process +""" + +import sys + + +def main(): + print("Hello world!") + + +if __name__ == "__main__": + main(sys.argv) \ No newline at end of file diff --git a/bin/logan b/bin/logan new file mode 100644 index 0000000..07b7af3 --- /dev/null +++ b/bin/logan @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +# script that allows the CLI to work out-of-the-box +# without the need to install it via pip first + +TOOLDIR=$(realpath $(dirname $(dirname ${BASH_SOURCE}))) + +${TOOLDIR}/main.py "$@" \ No newline at end of file diff --git a/bin/makeGraph.R b/bin/makeGraph.R old mode 100644 new mode 100755 diff --git a/bin/make_freec_exome_paired.pl b/bin/make_freec_exome_paired.pl new file mode 100644 index 0000000..eb192a2 --- /dev/null +++ b/bin/make_freec_exome_paired.pl @@ -0,0 +1,50 @@ +#!/usr/bin/perl -w +use strict; +use List::Util 'shuffle'; + +#INPUT + +#my $mergedmaf = $ARGV[1] . '_out/oncotator_out/' . $ARGV[1] . '_merged.maf'; #to fix... +#open C, ">$mergedmaf"; + +my $outfile = $ARGV[0] . '/freec_exome_config.txt'; +my $chrLenFile = $ARGV[1]; +my $chrFiles = $ARGV[2]; +my $tumormateFile = $ARGV[3]; +my $controlmateFile = $ARGV[4]; +my $makePileup = $ARGV[5]; +my $fastaFile = $ARGV[6]; +my $SNPfile = $ARGV[7]; +my $targets = $ARGV[8]; + +open C, ">$outfile"; + +print C '[general]' . "\n\n"; + +print C "BedGraphOutput = TRUE\ndegree = 1\nforceGCcontentNormalization = 1\nminCNAlength = 3\nnoisyData = TRUE\nreadCountThreshold = 50\n"; +print C "chrLenFile = $chrLenFile\n"; +print C "ploidy = 2,3,4\nbreakPointThreshold = 0.8\nwindow = 0\nbreakPointType=4\n"; +print C "chrFiles = $chrFiles\n"; +print C "minimalSubclonePresence = 30\nprintNA = FALSE\ncontaminationAdjustment = TRUE\nmaxThreads = 8\n"; +print C "outputDir = $ARGV[0]\n\n"; + +print C '[sample]' . "\n\n"; + +print C "mateFile = $tumormateFile\n"; +print C "inputFormat = BAM\nmateOrientation = FR\n\n"; + +print C '[control]' . "\n\n"; + +print C "mateFile = $controlmateFile\n"; +print C "inputFormat = BAM\nmateOrientation = FR\n\n"; + +print C '[target]' . "\n\n"; + +print C "captureRegions = $targets\n\n"; + +print C '[BAF]' . "\n\n"; + +print C "makePileup = $makePileup\n"; +print C "fastaFile = $fastaFile\n"; +print C "minimalCoveragePerPosition = 10\n"; +print C "SNPfile = $SNPfile"; \ No newline at end of file diff --git a/bin/make_freec_genome.pl b/bin/make_freec_genome.pl old mode 100644 new mode 100755 diff --git a/bin/make_freec_genome_paired.pl b/bin/make_freec_genome_paired.pl old mode 100644 new mode 100755 index 474dfaf..e1861bb --- a/bin/make_freec_genome_paired.pl +++ b/bin/make_freec_genome_paired.pl @@ -40,5 +40,5 @@ print C '[BAF]' . "\n\n"; print C "makePileup = $makePileup\n"; print C "fastaFile = $fastaFile\n"; -print C "minimalCoveragePerPosition = 20\nminimalQualityPerPosition = 20\n"; +print C "minimalCoveragePerPosition = 5\nminimalQualityPerPosition = 5\n"; print C "SNPfile = $SNPfile"; \ No newline at end of file diff --git a/bin/predictGender.R b/bin/predictGender.R old mode 100644 new mode 100755 diff --git a/bin/reformat_bed.py b/bin/reformat_bed.py new file mode 100644 index 0000000..fc64c5b --- /dev/null +++ b/bin/reformat_bed.py @@ -0,0 +1,85 @@ +######################################################## +## This script is designed to take a "covered" targets bed file for exome seq +## and make it compatible for downstream analysis +## In particular, it does two things to work with FREEC +## (1) Add a 'chr' prefix to chromosome names if genome is hg19 +## (2) Remove repeated regions with the same start site (the first one is kept) + +from __future__ import print_function +import argparse +import sys + +parser = argparse.ArgumentParser() +parser.add_argument("-i", "--input_bed", help="Input BED file to be reformatted") +# parser.add_argument("-g","--genome", help="Only used if equals 'hg19'; otherwise ignored", default="hg38") +parser.add_argument( + "-o", + "--output_bed", + help="Reformatted output BED file", + default="exome_targets.bed", +) +parser.add_argument( + "-f", + "--output_fields", + help="Number of fields in output BED ('3' or '6')", + default="6", +) +args = parser.parse_args() + +infile = args.input_bed +outfile = args.output_bed +nfield = str(args.output_fields) + +last_start = "-1" ## Position of the last start site + +### Open input bed file for reading +with open(infile, "r") as inputFile: + ### Open output exome targets bed file for writing + with open(outfile, "w") as exome_bed: + ### Step through each line of input + for line in inputFile: + ### Skip comments + if ( + not line.startswith("#") + and not line.startswith("track") + and not line.startswith("browser") + ): + curr_cols = line.strip().split("\t") + if len(curr_cols) < 3: + sys.exit( + "Targets BED file must contain at least three columns: chr, start, end" + ) + + if len(curr_cols) < 4: + curr_cols.append(".") + + min_output = curr_cols[0] + "\t" + curr_cols[1] + "\t" + curr_cols[2] + + extra_fields = "" + if not (nfield == "3"): + extra_fields = "\t" + curr_cols[3] + "\t0\t.\n" + + bed_output = min_output + extra_fields + + ### Add 'chr' prefix if genome is hg19 + # if (args.genome=="hg19"): + # freec_bed_output="chr" + curr_cols[0].lstrip("chr") + "\t" + curr_cols[1] + "\t" + curr_cols[2] + "\n" + # else: + # freec_bed_output=curr_cols[0] + "\t" + curr_cols[1] + "\t" + curr_cols[2] + "\n" + + ### If current start location is same as previous, output empty string + if curr_cols[1] == last_start: + print( + curr_cols[1] + + " IS equal to " + + last_start + + " so skipping it..." + ) + bed_output = "" + + ### Write to both files + exome_bed.write(bed_output) + # freec_bed.write(freec_bed_output) + + ### Update loop variables + last_start = curr_cols[1] \ No newline at end of file diff --git a/bin/run_sequenza.R b/bin/run_sequenza.R old mode 100644 new mode 100755 diff --git a/bin/sampleCompareAncestoryPlots.R b/bin/sampleCompareAncestoryPlots.R old mode 100644 new mode 100755 diff --git a/conf/base.config b/conf/base.config index 50baa58..e1a58a6 100644 --- a/conf/base.config +++ b/conf/base.config @@ -69,8 +69,13 @@ process { time = { check_max( 72.h * task.attempt, 'time' ) } } withName:bwamem2 { - cpus = { check_max( 20 * task.attempt, 'cpus' ) } - memory = { check_max( 200.GB * task.attempt, 'memory' ) } + cpus = { check_max( 22 * task.attempt, 'cpus' ) } + memory = { check_max( 220.GB * task.attempt, 'memory' ) } + time = { check_max( 72.h * task.attempt, 'time' ) } + } + withName:'gridss_somatic|gridss_tonly' { + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + memory = { check_max( 96.GB * task.attempt, 'memory' ) } time = { check_max( 72.h * task.attempt, 'time' ) } } withLabel:error_ignore { diff --git a/conf/biowulf.config b/conf/biowulf.config index d28a1ac..84295ab 100644 --- a/conf/biowulf.config +++ b/conf/biowulf.config @@ -22,16 +22,41 @@ singularity { autoMounts = true cacheDir = "/data/CCBR_Pipeliner/SIFS" envWhitelist = 'https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID,SINGULARITY_BINDPATH' - runOptions = '-B /gs10,/gs11,/gs12,/spin1,/data/CCBR_Pipeliner/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb' + runOptions = '-B /gs10,/gs11,/gs12,/spin1,/data/CCBR_Pipeliner/,/vf/users,/gpfs,/fdb' } env.SINGULARITY_CACHEDIR = "/data/CCBR_Pipeliner/SIFS" process { - clusterOptions = ' --gres=lscratch:200 ' + clusterOptions = ' --gres=lscratch:64 ' scratch = '/lscratch/$SLURM_JOBID' stageInMode = 'symlink' stageOutMode = 'rsync' // for running pipeline on group sharing data directory, this can avoid inconsistent files timestamps cache = 'lenient' + + //Additional Process for BWAMEM2 + withName:bwamem2 { + cpus = { check_max( 22 * task.attempt, 'cpus' ) } + memory = { check_max( 220.GB * task.attempt, 'memory' ) } + time = { check_max( 72.h * task.attempt, 'time' ) } + clusterOptions = ' --gres=lscratch:300 ' + } + + withName:"gridss_somatic|gridss_tonly" { + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + memory = { check_max( 96.GB * task.attempt, 'memory' ) } + time = { check_max( 72.h * task.attempt, 'time' ) } + clusterOptions = ' --gres=lscratch:300 ' + } + + withName:applybqsr { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 24.GB * task.attempt, 'memory' ) } + time = { check_max( 120.h * task.attempt, 'time' ) } + clusterOptions = ' --gres=lscratch:300 ' + } + } + + diff --git a/conf/ci_stub.config b/conf/ci_stub.config index aa74e29..90f9d7c 100644 --- a/conf/ci_stub.config +++ b/conf/ci_stub.config @@ -12,13 +12,21 @@ params { publish_dir_mode = "symlink" } +singularity { + enabled = true + autoMounts = true + cacheDir = "/data/CCBR_Pipeliner/SIFS" + envWhitelist = 'https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID,SINGULARITY_BINDPATH' + runOptions = '-B /gs10,/gs11,/gs12,/spin1,/data/CCBR_Pipeliner/,/vf/users,/gpfs,/fdb' +} + +env.SINGULARITY_CACHEDIR = "/data/CCBR_Pipeliner/SIFS" + + process { cpus = 1 memory = '1.GB' scratch = false - singularity { - enabled = false - } } stubRun = true \ No newline at end of file diff --git a/conf/containers.config b/conf/containers.config index fa3cd71..1e587d0 100644 --- a/conf/containers.config +++ b/conf/containers.config @@ -2,11 +2,18 @@ params { containers { base = 'docker://nciccbr/ccbr_ubuntu_base_20.04:v6.1' - logan = 'docker://dnousome/ccbr_logan_base:v0.3.6' + logan = 'docker://dnousome/ccbr_logan_base:v0.3.8' vcf2maf = 'docker://dnousome/ccbr_vcf2maf:v102.0.0' lofreq = 'docker://dnousome/ccbr_lofreq:v0.0.1' octopus = 'docker://dancooke/octopus:latest' annotsv = "docker://quay.io/biocontainers/annotsv:3.4.2--py312hdfd78af_0" annotcnvsv = 'docker://dnousome/ccbr_annotate_cnvsv:v0.0.2' + loganqc = 'docker://dnousome/ccbr_logan_qc:v0.0.1' + multiqc = 'docker://multiqc/multiqc:v1.23' + sv = 'docker://dnousome/ccbr_logan_sv:v0.0.1' + cnv = 'docker://dnousome/ccbr_logan_cnv:v0.0.1' + deepsomatic = 'docker://google/deepsomatic:1.8.0' + deepvariant = 'docker://google/deepvariant:1.6.1' + } } diff --git a/conf/genomes.config b/conf/genomes.config index e6e2e2c..aed4ddf 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -3,24 +3,25 @@ params { 'hg38' { genome = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/bwamem2/GRCh38.d1.vd1.fa" genomefai = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/bwamem2/GRCh38.d1.vd1.fa.fai" - bwagenome= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.fasta" - genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict" + bwagenome= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/bwamem/GRCh38.d1.vd1.fa" + genomedict= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/bwamem/GRCh38.d1.vd1.dict" wgsregion = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" intervals= "${projectDir}/assets/hg38_v0_wgs_calling_regions.hg38.bed" fullinterval = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/genomes/hg38_main.bed" - INDELREF = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" - KNOWNINDELS = "-known /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz -known /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" + INDELREF = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" + KNOWNINDELS = "-known /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz -known /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" KNOWNRECAL = '--known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz' - dbsnp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz" - gnomad = '--germline-resource /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' - pon = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PON/updatedpon.vcf.gz" //pon="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz" //file{params.pon} - germline_resource = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz" - KRAKENBACDB = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/kraken/20180907_standard_kraken2" + dbsnp = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/GATK_GRCh38.d1.vd1/dbsnp_138.hg38.vcf.gz" + gnomad = '--germline-resource /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/GATK_GRCh38.d1.vd1/somatic-hg38-af-only-gnomad.hg38.vcf.gz' + tonly_PON = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PON/gatk4_mutect2_4136_pon.vcf.gz" + PON = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PON/MuTect2.PON.5210.vcf.gz" + germline_resource = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/GATK_GRCh38.d1.vd1/somatic-hg38-af-only-gnomad.hg38.vcf.gz" + KRAKENBACDB = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/kraken/20180907_standard_kraken2" snpeff_genome = "GRCh38.86" - snpeff_config = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/snpEff/4.3t/snpEff.config" - snpeff_bundle = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/snpEff/4.3t/" - sites_vcf= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/somalier/sites.hg38.vcf.gz" - somalier_ancestrydb="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/somalier/1kg-somalier" + snpeff_config = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/snpEff/4.3t/snpEff.config" + snpeff_bundle = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/snpEff/4.3t/" + sites_vcf= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/somalier/sites.hg38.vcf.gz" + somalier_ancestrydb = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/somalier/1kg-somalier" vepcache = "/fdb/VEP/102/cache" vepspecies = "homo_sapiens" vepbuild = "GRCh38" @@ -37,18 +38,33 @@ params { PANELBED = "-panel_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/ActionableCodingPanel.38.bed.gz" HCBED = "-high_confidence_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_nosomaticdel_noCENorHET7.bed.gz" ENSEMBLCACHE = "-ensembl_data_dir /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/common/ensembl_data" + GRIDSSBLACKLIST= '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/sv/gridss_blacklist.38.bed.gz' //PURPLE GERMLINEHET = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/copy_number/AmberGermlineSites.38.tsv.gz" GCPROFILE = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/copy_number/GC_profile.1000bp.38.cnp" DIPLODREG = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/copy_number/DiploidRegions.38.bed.gz" DRIVERS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/common/DriverGenePanel.38.tsv" + //GRIPSS + PONSGL = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/sv/sgl_pon.38.bed.gz' + PONSV = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/sv/sv_pon.38.bedpe.gz' + SVHOTSPOT = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/sv/known_fusions.38.bedpe' + REPEATMASK= '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/sv/repeat_mask_data.38.fa.gz' + FREEC { + FREECLENGTHS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/FREEC/hg38.filtered.fa.fai" + FREECCHROMS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/FREEC/Chromosomes" + FREECPILEUP = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/FREEC/dbsnp_146.hg38.SingleDiNucl.IDs.vcf" + FREECSNPS= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/FREEC/dbsnp_146.hg38.SingleDiNucl.IDs.vcf" + } + //CNVKIT + REFFLAT = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/cnvkit/refFlat.txt" + ACCESS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/cnvkit/access-10kb.hg38.bed" } 'hg19' { - genome = "/data/CCBR_Pipeliner/db/PipeDB/lib/hg19.with_extra.fa" - genomefai = "/data/CCBR_Pipeliner/db/PipeDB/lib/hg19.with_extra.fa.fai" + genome = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/genome/bwamem2/hg19.with_extra.fa" + genomefai = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/genome/bwamem2/hg19.with_extra.fa.fai" bwagenome = "/data/CCBR_Pipeliner/db/PipeDB/lib/hs37d5.fa" - genomedict= "/data/CCBR_Pipeliner/db/PipeDB/lib/hg19.with_extra.dict" + genomedict= "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/genome/bwamem2/hg19.with_extra.dict" intervals= "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hg19_noblacklist_maincontig.bed" INDELREF = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/Mills_and_1000G_gold_standard.indels.hg19.vcf.gz" KNOWNINDELS = "-known /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/Mills_and_1000G_gold_standard.indels.hg19.vcf.gz -known /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/1000G_phase1.indels.hg19.vcf.gz" @@ -57,18 +73,18 @@ params { germline_resource = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/af-only-gnomad.raw.sites.liftover.hg19.vcf.gz" gnomad = '--germline-resource /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/af-only-gnomad.raw.sites.liftover.hg19.vcf.gz' pon = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/hg19.liftGRCh37.noCOSMIC_ClinVar.pon.vcf.gz" - KRAKENBACDB = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/kraken/20180907_standard_kraken2" + KRAKENBACDB = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/kraken/20180907_standard_kraken2" snpeff_genome = "GRCh37.75" snpeff_config = "/usr/local/apps/snpEff/4.3t/snpEff.config" - snpeff_bundle = "/usr/local/apps/snpEff/4.3t/snpEff.confi" - sites_vcf= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/somalier/sites.hg38.vcf.gz" - somalier_ancestrydb="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/somalier/1kg-somalier" + snpeff_bundle = "/usr/local/apps/snpEff/4.3t/" + sites_vcf= "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/somalier/sites.hg19.vcf.gz" + somalier_ancestrydb = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/somalier/1kg-somalier" //Works for both hg19/hg38 vepcache = "/fdb/VEP/102/cache" vepspecies = "homo_sapiens" vepbuild = "GRCh37" annotsvgenome = "GRCh37" - octopus_sforest= "" //"--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest" - octopus_gforest= "" //"--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" + octopus_sforest= "" //NO hg19 somaticforest"--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest" + octopus_gforest= "" //no hg19 gforest"--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz" chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM'] //HMFTOOLS @@ -76,6 +92,7 @@ params { HMFGENOME = "/data/CCBR_Pipeliner/db/PipeDB/lib/hs37d5.fa" SOMATICHOTSPOTS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/variants/KnownHotspots.somatic.37.vcf.gz" GERMLINEHOTSPOTS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/variants/KnownHotspots.germline.37.vcf.gz" + GRIDSSBLACKLIST= '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/sv/gridss_blacklist.37.bed.gz' PANELBED = "-panel_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/variants/ActionableCodingPanel.37.bed.gz" HCBED = "-high_confidence_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/variants/NA12878_GIAB_highconf_IllFB-IllGATKHC-CG-Ion-Solid_ALLCHROM_v3.2.2_highconf.bed.gz" ENSEMBLCACHE = "-ensembl_data_dir /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/common/ensembl_data" @@ -83,6 +100,19 @@ params { GCPROFILE = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/copy_number/GC_profile.1000bp.37.cnp" DIPLODREG = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/copy_number/DiploidRegions.37.bed.gz' DRIVERS = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/common/DriverGenePanel.37.tsv' + PONSGL = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/sv/sgl_pon.37.bed.gz' + PONSV = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/sv/sv_pon.37.bedpe.gz' + SVHOTSPOT = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/sv/known_fusions.37.bedpe' + REPEATMASK= '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hmftools/v5_34/ref/37/sv/repeat_mask_data.37.fa.gz' + FREEC { + FREECLENGTHS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/FREEC/hg19.fa.fai" + FREECCHROMS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/FREEC/Chromosomes" + FREECPILEUP = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/FREEC/hg19_snp142.SingleDiNucl.1based.txt" + FREECSNPS= "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/FREEC/hg19_snp142.SingleDiNucl.1based.txt" + } + //CNVKIT + REFFLAT = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/cnvkit/refFlat.txt" + ACCESS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/cnvkit/access-10kb.hg19.bed" } 'mm10' { @@ -95,16 +125,17 @@ params { INDELREF = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_indels.vcf.gz" KNOWNRECAL = "-known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_indels.vcf.gz -known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_snps.vcf.gz" dbsnp = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_allstrains_dbSNP142.vcf.gz" - pon = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_dbSNP_allStrains_compSet_noIND.vcf.gz" + tonly_PON = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_dbSNP_allStrains_compSet_noIND.vcf.gz" + PON = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_dbSNP_allStrains_compSet_noIND.vcf.gz" germline_resource = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_knownSNPs_sites.vcf.gz" gnomad= "--germline-resource /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_allstrains_dbSNP142.vcf.gz" - KRAKENBACDB = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/kraken/20180907_standard_kraken2" + KRAKENBACDB = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/kraken/20180907_standard_kraken2" snpeff_genome = "GRCm38.86" snpeff_config = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/snpEff/4.3t/snpEff.config" snpeff_bundle = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/snpEff/4.3t/" sites_vcf = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/somalier/mm10.sites.vcf.gz" //EDIT SOMALIER ANCESTRY AFTER! - somalier_ancestrydb="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/somalier/1kg-somalier" + somalier_ancestrydb = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/somalier/1kg-somalier" vepcache = "/fdb/VEP/102/cache" vepspecies = "mus_musculus" vepbuild= "GRCm38" @@ -116,32 +147,36 @@ params { FREECLENGTHS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/mm10.fa.fai" FREECCHROMS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/Chromosomes" FREECPILEUP = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/mm10_dbSNP137.ucsc.freec.bed" - FREECSNPS= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/mm10_dbSNP137.ucsc.freec.txt.gz" + FREECSNPS= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/mm10_dbSNP137.ucsc.freec.txt" } chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chrX','chrY','chrM'] + //CNVKIT + REFFLAT = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/mm10/cnvkit/refFlat.txt" + ACCESS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/mm10/cnvkit/access-10kb.mm10.bed" } 'hg38_sf' { genome = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/genome/bwamem2/Homo_sapiens_assembly38.fasta" genomefai = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/genome/bwamem2/Homo_sapiens_assembly38.fasta.fai" - bwagenome= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.fasta" - genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict" + bwagenome= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/genome/Homo_sapiens_assembly38.fasta" + genomedict= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/genome/Homo_sapiens_assembly38.dict" wgsregion = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" intervals= "${projectDir}/assets/hg38_v0_wgs_calling_regions.hg38.bed" fullinterval = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/genomes/hg38_main.bed" - INDELREF = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" - KNOWNINDELS = "-known /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz -known /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" + INDELREF = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" + KNOWNINDELS = "-known /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz -known /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" KNOWNRECAL = '--known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz' - dbsnp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz" - gnomad = '--germline-resource /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' - pon = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PON/updatedpon.vcf.gz" //pon="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz" //file{params.pon} - germline_resource = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz" - KRAKENBACDB = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/kraken/20180907_standard_kraken2" + dbsnp = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz" + gnomad = '--germline-resource /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' + tonly_PON = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PON/gatk4_mutect2_4136_pon.vcf.gz" + PON = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PON/MuTect2.PON.5210.vcf.gz" + germline_resource = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz" + KRAKENBACDB = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/kraken/20180907_standard_kraken2" snpeff_genome = "GRCh38.86" - snpeff_config = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/snpEff/4.3t/snpEff.config" - snpeff_bundle = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/snpEff/4.3t/" - sites_vcf= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/somalier/sites.hg38.vcf.gz" - somalier_ancestrydb="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/somalier/1kg-somalier" + snpeff_config = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/snpEff/4.3t/snpEff.config" + snpeff_bundle = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/snpEff/4.3t/" + sites_vcf= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/somalier/sites.hg38.vcf.gz" + somalier_ancestrydb = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/somalier/1kg-somalier" vepcache = "/fdb/VEP/102/cache" vepspecies = "homo_sapiens" vepbuild = "GRCh38" @@ -158,11 +193,80 @@ params { PANELBED = "-panel_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/ActionableCodingPanel.38.bed.gz" HCBED = "-high_confidence_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_nosomaticdel_noCENorHET7.bed.gz" ENSEMBLCACHE = "-ensembl_data_dir /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/common/ensembl_data" + GRIDSSBLACKLIST= '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/sv/gridss_blacklist.38.bed.gz' //PURPLE GERMLINEHET = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/copy_number/AmberGermlineSites.38.tsv.gz" GCPROFILE = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/copy_number/GC_profile.1000bp.38.cnp" DIPLODREG = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/copy_number/DiploidRegions.38.bed.gz" DRIVERS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/common/DriverGenePanel.38.tsv" + FREEC { + FREECLENGTHS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/FREEC/hg38.filtered.fa.fai" + FREECCHROMS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/FREEC/Chromosomes" + FREECPILEUP = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/FREEC/dbsnp_146.hg38.SingleDiNucl.IDs.vcf" + FREECSNPS= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/FREEC/dbsnp_146.hg38.SingleDiNucl.IDs.vcf" + } + //CNVKIT + REFFLAT = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/cnvkit/refFlat.txt" + ACCESS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/cnvkit/access-10kb.hg38.bed" } + 'hg38_noalt' { + genome = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/genome_noalt/bwamem2/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta" + genomefai = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/genome_noalt/bwamem2/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta" + genomedict= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/genome_noalt/bwa/GCA_000001405.15_GRCh38_no_alt_analysis_set.dict" + bwagenome= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/genome_noalt/bwa/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta" + wgsregion = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" + intervals= "${projectDir}/assets/hg38_v0_wgs_calling_regions.hg38.bed" + fullinterval = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/genomes/hg38_main.bed" + INDELREF = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" + KNOWNINDELS = "-known /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz -known /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" + KNOWNRECAL = '--known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz' + dbsnp = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz" + gnomad = '--germline-resource /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' + tonly_PON = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PON/gatk4_mutect2_4136_pon.vcf.gz" + PON = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PON/MuTect2.PON.5210.vcf.gz" + germline_resource = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz" + KRAKENBACDB = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/kraken/20180907_standard_kraken2" + snpeff_genome = "GRCh38.86" + snpeff_config = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/snpEff/4.3t/snpEff.config" + snpeff_bundle = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/snpEff/4.3t/" + sites_vcf= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/somalier/sites.hg38.vcf.gz" + somalier_ancestrydb = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/somalier/1kg-somalier" + vepcache = "/fdb/VEP/102/cache" + vepspecies = "homo_sapiens" + vepbuild = "GRCh38" + annotsvgenome = "GRCh38" + octopus_sforest= "--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest" + octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" + SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz" + chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM'] + //HMFTOOLS + GENOMEVER = "38" + HMFGENOME = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/genome_noalt/bwamem2/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta" + SOMATICHOTSPOTS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/KnownHotspots.somatic.38.vcf.gz" + GERMLINEHOTSPOTS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/KnownHotspots.germline.38.vcf.gz" + PANELBED = "-panel_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/ActionableCodingPanel.38.bed.gz" + HCBED = "-high_confidence_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_nosomaticdel_noCENorHET7.bed.gz" + ENSEMBLCACHE = "-ensembl_data_dir /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/common/ensembl_data" + GRIDSSBLACKLIST= '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/sv/gridss_blacklist.38.bed.gz' + //PURPLE + GERMLINEHET = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/copy_number/AmberGermlineSites.38.tsv.gz" + GCPROFILE = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/copy_number/GC_profile.1000bp.38.cnp" + DIPLODREG = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/copy_number/DiploidRegions.38.bed.gz" + DRIVERS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/common/DriverGenePanel.38.tsv" + //GRIPSS + PONSGL = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/sv/sgl_pon.38.bed.gz' + PONSV = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/sv/sv_pon.38.bedpe.gz' + SVHOTSPOT = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/sv/known_fusions.38.bedpe' + REPEATMASK= '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/sv/repeat_mask_data.38.fa.gz' + FREEC { + FREECLENGTHS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/FREEC/hg38.filtered.fa.fai" + FREECCHROMS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/FREEC/Chromosomes" + FREECPILEUP = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/FREEC/dbsnp_146.hg38.SingleDiNucl.IDs.vcf" + FREECSNPS= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/FREEC/dbsnp_146.hg38.SingleDiNucl.IDs.vcf" + } + //CNVKIT + REFFLAT = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/cnvkit/refFlat.txt" + ACCESS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/cnvkit/access-10kb.hg38.bed" + } } } diff --git a/conf/modules.config b/conf/modules.config index a1e595f..996b26a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -15,7 +15,7 @@ process { ] } - withName: 'freec_paired' { + withName: 'freec_paired|freec_paired_exome' { publishDir = [ path: { "${params.outdir}/cnv/freec_paired" }, mode: 'copy' @@ -50,16 +50,26 @@ process { ] } - withName: 'deepvariant_step3|deepvariant_combined|glnexus' { + withName: 'ascat_tn|ascat_tn_exome' { publishDir = [ - path: { "${params.outdir}/germline/deepvariant" }, + path: { "${params.outdir}/cnv/ascat" }, + mode: 'copy' + ] + } + + withName: 'cnvkit|cnvkit_exome' { + publishDir = [ + path: { "${params.outdir}/cnv/cnvkit" }, mode: 'copy' ] - module=['deepvariant/1.4.0'] } - withName: 'deepvariant_step1|deepvariant_step2' { - module = ['deepvariant/1.4.0'] + + withName: 'deepvariant_combined|glnexus|bcfconcat_vcf|bcfconcat_gvcf' { + publishDir = [ + path: { "${params.outdir}/germline/deepvariant" }, + mode: 'copy' + ] } withName: 'fc_lane' { @@ -188,6 +198,13 @@ process { ] } + withName: 'gridss_somatic' { + publishDir = [ + path: { "${params.outdir}/SV/gridss" }, + mode: 'copy' + ] + } + withName: 'annotsv_tn' { publishDir = [ path: { "${params.outdir}/SV/annotated" }, @@ -217,7 +234,7 @@ process { ] } - withName: 'applybqsr|samtoolsindex' { + withName: 'applybqsr|gatherbqsr|samtoolsindex' { publishDir = [ path: { "${params.outdir}/bams/BQSR" }, mode: 'copy' @@ -246,6 +263,14 @@ process { errorStrategy='ignore' } + withName: 'sobdetect_pass1|sobdetect_pass2|sobdetect_cohort_params|sobdetect_metrics' { + publishDir = [ + path: { "${params.outdir}/ffpe" }, + mode: 'copy' + ] + errorStrategy='ignore' + } + withName: 'combinemafs_tonly' { publishDir = [ path: { "${params.outdir}/mafs/tumor_only" }, diff --git a/conf/slurm.config b/conf/slurm.config index b63e01f..0c8982f 100644 --- a/conf/slurm.config +++ b/conf/slurm.config @@ -13,7 +13,7 @@ executor { process { executor = 'slurm' - maxRetries = 1 + maxRetries = 2 clusterOptions = ' --gres=lscratch:200 ' diff --git a/docker/annotate_cnvsv/Dockerfile b/docker/annotate_cnvsv/Dockerfile index 308ad14..4a3b4c5 100644 --- a/docker/annotate_cnvsv/Dockerfile +++ b/docker/annotate_cnvsv/Dockerfile @@ -1,4 +1,4 @@ -FROM --platform=linux/amd64 nciccbr/ccbr_ubuntu_base_20.04:v5 +FROM --platform=linux/amd64 nciccbr/ccbr_ubuntu_base_20.04:v6 # build time variables ARG BUILD_DATE="000000" @@ -40,9 +40,8 @@ RUN wget https://github.com/fritzsedlazeck/SURVIVOR/archive/refs/tags/v1.0.6.tar && rm /opt2/v1.0.6.tar.gz \ && cd /opt2/SURVIVOR-1.0.6/Debug \ && make -ENV PATH="/opt2/SURVIVOR-1.0.6/:$PATH" +ENV PATH="/opt2/SURVIVOR-1.0.6/Debug:$PATH" COPY Dockerfile /opt2/Dockerfile_${REPONAME}.${BUILD_TAG} -RUN chmod a+r /opt2/Dockerfile_${REPONAME}.${BUILD_TAG} - +RUN chmod a+r /opt2/Dockerfile_${REPONAME}.${BUILD_TAG} \ No newline at end of file diff --git a/docker/annotate_cnvsv/build.sh b/docker/annotate_cnvsv/build.sh index e1bc935..e98686f 100644 --- a/docker/annotate_cnvsv/build.sh +++ b/docker/annotate_cnvsv/build.sh @@ -1,10 +1,10 @@ ##BUILD cnv/sv -docker build --platform linux/amd64 --tag ccbr_annotate_cnvsv:v0.0.1 -f Dockerfile . +docker build --platform linux/amd64 --tag ccbr_annotate_cnvsv:v0.0.2 -f Dockerfile . -docker tag ccbr_annotate_cnvsv:v0.0.1 dnousome/ccbr_annotate_cnvsv:v0.0.1 -docker tag ccbr_annotate_cnvsv:v0.0.1 dnousome/ccbr_annotate_cnvsv +docker tag ccbr_annotate_cnvsv:v0.0.2 dnousome/ccbr_annotate_cnvsv:v0.0.2 +docker tag ccbr_annotate_cnvsv:v0.0.2 dnousome/ccbr_annotate_cnvsv:latest -docker push dnousome/ccbr_annotate_cnvsv:v0.0.1 +docker push dnousome/ccbr_annotate_cnvsv:v0.0.2 docker push dnousome/ccbr_annotate_cnvsv:latest diff --git a/docker/cnv/Dockerfile b/docker/cnv/Dockerfile new file mode 100644 index 0000000..340fe72 --- /dev/null +++ b/docker/cnv/Dockerfile @@ -0,0 +1,46 @@ +FROM --platform=linux/amd64 nciccbr/ccbr_ubuntu_base_20.04:v6 + +LABEL maintainer= + +WORKDIR /opt2 + +RUN apt-get update + +RUN DEBIAN_FRONTEND=noninteractive apt-get install -y \ + gnupg \ + dirmngr \ + ca-certificates \ + apt-transport-https \ + software-properties-common + +#Install R for ASCAT +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 \ + && add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' \ + && add-apt-repository --enable-source --yes 'ppa:c2d4u.team/c2d4u4.0+' \ + && apt-get -y install r-base r-base-core r-recommended r-base-dev \ + && apt-get -y install libcurl4-openssl-dev libssl-dev libboost-dev libxml2-dev \ + && apt-get -y install r-cran-biocmanager r-cran-devtools r-bioc-genomicranges + +#ASCAT +RUN Rscript -e 'devtools::install_github("VanLoo-lab/ascat/ASCAT")' +RUN Rscript -e 'install.packages(c("argparse"), repos="http://cran.r-project.org")' +RUN Rscript -e 'BiocManager::install("DNAcopy")' + +#Allelecounter +RUN git clone https://github.com/cancerit/alleleCount ac \ +&& cd ac \ +&& ./setup.sh /opt2/alleleCount +ENV PATH="/opt2/alleleCount/bin:$PATH" +ENV LD_LIBARY_PATH="/opt2/alleleCount/lib" + +#CNVKIT +RUN git clone https://github.com/etal/cnvkit \ +&& cd cnvkit/ \ +&& pip3 install --upgrade pip \ +&& pip install -e . + +##Clean up folders +WORKDIR /opt2 +RUN rm -R ac + + diff --git a/docker/cnv/build.sh b/docker/cnv/build.sh new file mode 100644 index 0000000..bd707fb --- /dev/null +++ b/docker/cnv/build.sh @@ -0,0 +1,12 @@ +##Build cnv +docker build --platform=linux/amd64 --tag ccbr_logan_cnv:v0.0.1 -f Dockerfile . + +docker tag ccbr_logan_cnv:v0.0.1 dnousome/ccbr_logan_cnv:v0.0.1 +docker tag ccbr_logan_cnv:v0.0.1 dnousome/ccbr_logan_cnv:latest + +docker push dnousome/ccbr_logan_cnv:v0.0.1 +docker push dnousome/ccbr_logan_cnv:latest + + +#singularity pull dnousome-ccbr_logan_cnv-v0.0.1.img docker://dnousome/ccbr_logan_cnv:v0.0.1 +#docker run -it ccbr_logan_cnv:v0.0.1 diff --git a/docker/ffpe/Dockerfile b/docker/ffpe/Dockerfile new file mode 100644 index 0000000..8ef615d --- /dev/null +++ b/docker/ffpe/Dockerfile @@ -0,0 +1,45 @@ +FROM --platform=linux/amd64 nciccbr/ccbr_ubuntu_base_20.04:v6 + +# build time variables +ARG BUILD_DATE="000000" +ENV BUILD_DATE=${BUILD_DATE} +ARG BUILD_TAG="000000" +ENV BUILD_TAG=${BUILD_TAG} +ARG REPONAME="000000" +ENV REPONAME=${REPONAME} + +LABEL maintainer + +# Create Container filesystem specific +# working directory and opt directories +WORKDIR /opt2 + +# This section installs system packages required for your project +# If you need extra system packages add them here. +# python/3.8.0 and python/2.7.16 (strelka and manta) +RUN apt-get update \ + && apt-get -y upgrade + +# Common bioinformatics tools +# bwa/0.7.17-4 bowtie/1.2.3 bowtie2/2.3.5.1 +# bedtools/2.27.1 bedops/2.4.37 +# vcftools/0.1.16 +# Previous tools already installed +# tabix/1.10.2 + +# Install SOB +RUN wget https://github.com/mikdio/SOBDetector/releases/download/v1.0.4/SOBDetector_v1.0.4.jar +ENV SOB_JAR="/opt2/SOBDetector_v1.0.4.jar" + +WORKDIR /data2 + +# Clean-up step to reduce size +# and install GNU awk to calculate mean and standard +# deviation, ensures backward compatibility with +# biowulf installation of awk is a pointer to gawk, +# and install pandoc (>= 1.12.3 required for Rmarkdown) +RUN DEBIAN_FRONTEND=noninteractive apt-get install -y \ + gawk \ + pandoc \ + && apt-get clean && apt-get purge \ + && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \ No newline at end of file diff --git a/docker/ffpe/build.sh b/docker/ffpe/build.sh new file mode 100644 index 0000000..806887c --- /dev/null +++ b/docker/ffpe/build.sh @@ -0,0 +1,13 @@ +#Build.sh +docker build --platform=linux/amd64 --tag ccbr_logan_ffpe:v0.0.1 -f Dockerfile . + +#Test Docker Build +#docker run -it ccbr_logan_ffpe:v0.0.1 +# + +docker tag ccbr_logan_ffpe:v0.0.1 dnousome/ccbr_logan_ffpe:v0.0.1 +docker tag ccbr_logan_ffpe:v0.0.1 dnousome/ccbr_logan_ffpe:latest + +docker push dnousome/ccbr_logan_ffpe:v0.0.1 +docker push dnousome/ccbr_logan_ffpe:latest + diff --git a/docker/logan_base/Dockerfile b/docker/logan_base/Dockerfile index d11804b..b77aa0a 100644 --- a/docker/logan_base/Dockerfile +++ b/docker/logan_base/Dockerfile @@ -21,24 +21,47 @@ RUN apt-get update \ && apt-get -y upgrade \ && DEBIAN_FRONTEND=noninteractive apt-get install -y \ bc \ - openjdk-17-jdk #Needed for GATK >4 + openjdk-17-jdk # Common bioinformatics tools # bwa/0.7.17-4 bowtie/1.2.3 bowtie2/2.3.5.1 -# bedtools/2.27.1 bedops/2.4.37 samtools/1.10 -# bcftools/1.10.2 vcftools/0.1.16 -# Previous tools already installed tabix/1.10.2 trimmomatic/0.39 +# bedtools/2.27.1 bedops/2.4.37 +# vcftools/0.1.16 +# Previous tools already installed +# tabix/1.10.2 RUN DEBIAN_FRONTEND=noninteractive apt-get install -y \ tabix \ libhts-dev - # Install BWA-MEM2 v2.2.1 RUN wget https://github.com/bwa-mem2/bwa-mem2/releases/download/v2.2.1/bwa-mem2-2.2.1_x64-linux.tar.bz2 \ && tar -xvjf /opt2/bwa-mem2-2.2.1_x64-linux.tar.bz2 \ - && rm /opt2/bwa-mem2-2.2.1_x64-linux.tar.bz2 + && rm /opt2/bwa-mem2-2.2.1_x64-linux.tar.bz2 ENV PATH="/opt2/bwa-mem2-2.2.1_x64-linux:$PATH" +# samtools/1.10 # bcftools/1.10.2 are dated in package +RUN wget https://github.com/samtools/htslib/releases/download/1.20/htslib-1.20.tar.bz2 \ + && tar -vxjf htslib-1.20.tar.bz2 \ + && cd htslib-1.20 \ + && make \ + && rm /opt2/htslib-1.20.tar.bz2 +ENV PATH="/opt2/htslib-1.20:$PATH" + +RUN wget https://github.com/samtools/samtools/releases/download/1.20/samtools-1.20.tar.bz2 \ + && tar -vxjf samtools-1.20.tar.bz2 \ + && cd samtools-1.20 \ + && make \ + && rm /opt2/samtools-1.20.tar.bz2 +ENV PATH="/opt2/samtools-1.20:$PATH" + +RUN wget https://github.com/samtools/bcftools/releases/download/1.20/bcftools-1.20.tar.bz2 \ + && tar -vxjf bcftools-1.20.tar.bz2 \ + && cd bcftools-1.20 \ + && make \ + && rm /opt2/bcftools-1.20.tar.bz2 +ENV PATH="/opt2/bcftools-1.20:$PATH" + + # Install Sambamba/0.8.1 for CTRL-Freec # not available to apt-get on Ubuntu 20.04 RUN wget https://github.com/biod/sambamba/releases/download/v0.8.1/sambamba-0.8.1-linux-amd64-static.gz \ @@ -46,7 +69,7 @@ RUN wget https://github.com/biod/sambamba/releases/download/v0.8.1/sambamba-0.8. && mv /opt2/sambamba-0.8.1-linux-amd64-static /opt2/sambamba \ && chmod a+rx /opt2/sambamba -# Install GATK4 (GATK/4.5.0.0) +# Install GATK4 (GATK/4.6.0.0) # Requires Java17 RUN wget https://github.com/broadinstitute/gatk/releases/download/4.6.0.0/gatk-4.6.0.0.zip \ && unzip /opt2/gatk-4.6.0.0.zip \ @@ -54,6 +77,10 @@ RUN wget https://github.com/broadinstitute/gatk/releases/download/4.6.0.0/gatk-4 && /opt2/gatk-4.6.0.0/gatk --list ENV PATH="/opt2/gatk-4.6.0.0:$PATH" +# Picard +RUN mkdir picard \ + && wget -O picard/picard.jar https://github.com/broadinstitute/picard/releases/download/3.2.0/picard.jar +ENV PICARDJARPATH="/opt2/picard" #Use DISCVRSeq For CombineVariants Replacement #RUN wget https://github.com/BimberLab/DISCVRSeq/releases/download/1.3.62/DISCVRSeq-1.3.62.jar @@ -117,14 +144,6 @@ ENV PATH="/opt2/FREEC-11.6/src:$PATH" WORKDIR /opt2 -# Install Somalier/v0.2.19 -# download static binary -RUN mkdir somalier \ - && wget -O somalier/somalier https://github.com/brentp/somalier/releases/download/v0.2.19/somalier \ - && chmod a+rx /opt2/somalier/somalier -ENV PATH="/opt2/somalier:$PATH" - - # Install VarScan/v2.4.4 # Works with java8 # and each wrapper script similar to HPC module @@ -178,13 +197,13 @@ RUN wget https://github.com/AstraZeneca-NGS/VarDictJava/releases/download/v1.8.3 ENV PATH="/opt2/VarDict-1.8.3/bin:$PATH" # Fastp From Opengene github -RUN wget http://opengene.org/fastp/fastp.0.23.4 \ +RUN wget http://opengene.org/fastp/fastp.0.24.0 \ && mkdir fastp \ - && mv fastp.0.23.4 fastp/fastp \ + && mv fastp.0.24.0 fastp/fastp \ && chmod a+x fastp/fastp ENV PATH="/opt2/fastp:$PATH" -# ASCAT +#ASCAT RUN Rscript -e 'devtools::install_github("VanLoo-lab/ascat/ASCAT")' # SvABA @@ -206,7 +225,7 @@ WORKDIR /opt2 RUN wget https://github.com/hartwigmedical/hmftools/releases/download/amber-v4.0.1/amber_v4.0.1.jar \ && wget https://github.com/hartwigmedical/hmftools/releases/download/cobalt-v1.16/cobalt_v1.16.jar \ && wget https://github.com/hartwigmedical/hmftools/releases/download/purple-v4.0.2/purple_v4.0.2.jar \ - && wget https://github.com/hartwigmedical/hmftools/releases/download/sage-v3.4.4/sage_v3.4.4.jar \ + && wget https://github.com/hartwigmedical/hmftools/releases/download/sage-v3.4.4/sage_v3.4.4.jar \ && mkdir hmftools \ && mv amber_v4.0.1.jar hmftools/amber.jar \ && mv cobalt_v1.16.jar hmftools/cobalt.jar \ @@ -214,6 +233,11 @@ RUN wget https://github.com/hartwigmedical/hmftools/releases/download/amber-v4.0 && mv sage_v3.4.4.jar hmftools/sage.jar \ && chmod a+x hmftools/amber.jar ENV PATH="/opt2/hmftools:$PATH" +WORKDIR /opt2 + +#GLNEXUS +RUN wget https://github.com/dnanexus-rnd/GLnexus/releases/download/v1.4.1/glnexus_cli \ + && chmod +x glnexus_cli # Add Dockerfile and argparse.bash script # and export environment variables diff --git a/docker/logan_base/build.sh b/docker/logan_base/build.sh index 90b4446..ea8d8f7 100644 --- a/docker/logan_base/build.sh +++ b/docker/logan_base/build.sh @@ -5,22 +5,15 @@ #docker buildx inspect upbeat_ganguly #docker buildx build --platform linux/amd64 -f Dockerfile -t dnousome/ccbr_logan_base:v0.3.0 -t dnousome/ccbr_logan_base:latest --push . -docker build --platform linux/amd64 --tag ccbr_logan_base:v0.3.6 -f Dockerfile . +docker build --platform linux/amd64 --tag ccbr_logan_base:v0.3.8 -f Dockerfile . -docker tag ccbr_logan_base:v0.3.6 dnousome/ccbr_logan_base:v0.3.6 -docker tag ccbr_logan_base:v0.3.6 dnousome/ccbr_logan_base +docker tag ccbr_logan_base:v0.3.8 dnousome/ccbr_logan_base:v0.3.8 +docker tag ccbr_logan_base:v0.3.8 dnousome/ccbr_logan_base:latest -docker push dnousome/ccbr_logan_base:v0.3.6 +docker push dnousome/ccbr_logan_base:v0.3.8 docker push dnousome/ccbr_logan_base:latest - - - -# Tag image with version and reset latest -#docker tag ccbr_wgs_base:v0.1.0 nciccbr/ccbr_wgs_base:v0.1.0 -#docker tag ccbr_wgs_base:v0.1.0 nciccbr/ccbr_wgs_base - -# Push image to DockerHub -#docker push nciccbr/ccbr_wgs_base:v0.1.0 -#docker push nciccbr/ccbr_wgs_base:latest +#Pull to CCBR +cd /data/CCBR_Pipeliner/SIFS +singularity pull dnousome-ccbr_logan_base-v0.3.8.img docker://dnousome/ccbr_logan_base:v0.3.8 diff --git a/docker/logan_base/meta.yml b/docker/logan_base/meta.yml index 8e76b0a..07bb557 100644 --- a/docker/logan_base/meta.yml +++ b/docker/logan_base/meta.yml @@ -1,4 +1,4 @@ dockerhub_namespace: dnousome image_name: ccbr_logan_base -version: v0.3.5 +version: v0.3.8 container: "$(dockerhub_namespace)/$(image_name):$(version)" diff --git a/docker/qc/Dockerfile b/docker/qc/Dockerfile new file mode 100644 index 0000000..eab35f0 --- /dev/null +++ b/docker/qc/Dockerfile @@ -0,0 +1,90 @@ +FROM --platform=linux/amd64 nciccbr/ccbr_ubuntu_base_20.04:v6 + +# build time variables +ARG BUILD_DATE="000000" +ENV BUILD_DATE=${BUILD_DATE} +ARG BUILD_TAG="000000" +ENV BUILD_TAG=${BUILD_TAG} +ARG REPONAME="000000" +ENV REPONAME=${REPONAME} + +LABEL maintainer= + +# Create Container filesystem specific +# working directory and opt directories +WORKDIR /opt2 + +# This section installs system packages required for your project +# If you need extra system packages add them here. +# python/3.8.0 and python/2.7.16 (strelka and manta) +RUN apt-get update \ + && apt-get -y upgrade \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + bc \ + libgd-perl + +#FASTQ Screen 'fastq_screen/0.15.3:bowtie/2-2.5.3' +RUN yes | perl -MCPAN -e "install GD" +RUN yes | perl -MCPAN -e "install GD::Graph" + +RUN wget https://github.com/StevenWingett/FastQ-Screen/archive/refs/tags/v0.15.3.tar.gz \ + && tar -xvzf /opt2/v0.15.3.tar.gz \ + && rm /opt2/v0.15.3.tar.gz +ENV PATH="/opt2/FastQ-Screen-0.15.3:$PATH" + +##FASTQC 'fastqc/0.12.1' +RUN wget https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.12.1.zip \ + && unzip fastqc_v0.12.1.zip \ + && rm fastqc_v0.12.1.zip +ENV PATH="/opt2/FastQC:$PATH" + +##QUALIMAP 'qualimap/2.3' +RUN wget https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.3.zip \ + && unzip qualimap_v2.3.zip \ + && rm qualimap_v2.3.zip +ENV PATH="/opt2/qualimap_v2.3:$PATH" + +## MOSDEPTH 'mosdepth/0.3.8' +RUN mkdir mosdepth \ + && wget -O mosdepth/mosdepth https://github.com/brentp/mosdepth/releases/download/v0.3.8/mosdepth \ + && chmod a+rx /opt2/mosdepth/mosdepth +ENV PATH="/opt2/mosdepth:$PATH" + +# Install Somalier/v0.2.19 +# download static binary +RUN mkdir somalier \ + && wget -O somalier/somalier https://github.com/brentp/somalier/releases/download/v0.2.19/somalier \ + && chmod a+rx /opt2/somalier/somalier +ENV PATH="/opt2/somalier:$PATH" + +##KRAKEN kraken/2.1.2','kronatools/2.8.1'] +RUN wget https://github.com/DerrickWood/kraken2/archive/refs/tags/v2.1.3.tar.gz \ + && tar -xzvf v2.1.3.tar.gz \ + && cd kraken2-2.1.3 \ + && ./install_kraken2.sh . \ + && cd /opt2 \ + && rm v2.1.3.tar.gz +ENV PATH="/opt2/kraken2-2.1.3:$PATH" + +RUN wget https://github.com/marbl/Krona/releases/download/v2.8.1/KronaTools-2.8.1.tar \ + && tar -xf KronaTools-2.8.1.tar \ + && cd KronaTools-2.8.1 \ + && ./install.pl --prefix . \ + && ./updateTaxonomy.sh \ + && chmod 775 bin/ -R \ + && chmod 775 lib/ -R \ + && chmod 775 src/ -R \ + && chmod 775 scripts/ -R \ + && cd /opt2 \ + && rm KronaTools-2.8.1.tar +ENV PATH="/opt2/KronaTools-2.8.1/bin:$PATH" + +# Clean-up step to reduce size +# and install GNU awk to calculate mean and standard +# deviation, ensures backward compatibility with +# biowulf installation of awk is a pointer to gawk, +RUN DEBIAN_FRONTEND=noninteractive apt-get install -y \ + gawk \ + && apt-get clean && apt-get purge \ + && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + diff --git a/docker/qc/build.sh b/docker/qc/build.sh new file mode 100644 index 0000000..8244adf --- /dev/null +++ b/docker/qc/build.sh @@ -0,0 +1,11 @@ +#Build.sh +docker build --platform=linux/amd64 --tag ccbr_logan_qc:v0.0.1 -f Dockerfile . + +docker tag ccbr_logan_qc:v0.0.1 dnousome/ccbr_logan_qc:v0.0.1 +docker tag ccbr_logan_qc:v0.0.1 dnousome/ccbr_logan_qc:latest + +docker push dnousome/ccbr_logan_qc:v0.0.1 +docker push dnousome/ccbr_logan_qc:latest + +## +#docker run -it ccbr_logan_qc:v0.0.1 \ No newline at end of file diff --git a/docker/sv/Dockerfile b/docker/sv/Dockerfile new file mode 100644 index 0000000..df38225 --- /dev/null +++ b/docker/sv/Dockerfile @@ -0,0 +1,64 @@ +FROM --platform=linux/amd64 nciccbr/ccbr_ubuntu_base_20.04:v6 + +LABEL maintainer= + +WORKDIR /opt2 + +RUN apt-get update + +RUN DEBIAN_FRONTEND=noninteractive apt-get install -y \ + gnupg \ + dirmngr \ + ca-certificates \ + apt-transport-https \ + software-properties-common \ + openjdk-17-jdk + +# Create Container filesystem specific +# working directory and opt directories + +##Install R +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 \ + && add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' \ + && add-apt-repository --enable-source --yes 'ppa:c2d4u.team/c2d4u4.0+' \ + && apt-get -y install r-base r-base-core r-recommended r-base-dev \ + && apt-get -y install libcurl4-openssl-dev libssl-dev libboost-dev libxml2-dev +ENV PATH="/usr/bin/Rscript:$PATH" + +RUN wget https://github.com/samtools/htslib/releases/download/1.20/htslib-1.20.tar.bz2 \ + && tar -vxjf htslib-1.20.tar.bz2 \ + && cd htslib-1.20 \ + && make \ + && rm /opt2/htslib-1.20.tar.bz2 +ENV PATH="/opt2/htslib-1.20:$PATH" + +RUN wget https://github.com/samtools/samtools/releases/download/1.20/samtools-1.20.tar.bz2 \ + && tar -vxjf samtools-1.20.tar.bz2 \ + && cd samtools-1.20 \ + && make \ + && rm /opt2/samtools-1.20.tar.bz2 +ENV PATH="/opt2/samtools-1.20:$PATH" + +#Grab GRIDSS +RUN mkdir gridss +WORKDIR /opt2/gridss +RUN wget https://github.com/PapenfussLab/gridss/releases/download/v2.13.2/gridss-2.13.2-gridss-jar-with-dependencies.jar \ + && wget https://github.com/PapenfussLab/gridss/releases/download/v2.13.2/gridss \ + && wget https://github.com/PapenfussLab/gridss/releases/download/v2.13.2/gridss.config.R \ + && wget https://github.com/PapenfussLab/gridss/releases/download/v2.13.2/libgridss.R \ + && wget https://github.com/PapenfussLab/gridss/releases/download/v2.13.2/gridss_somatic_filter + +ENV GRIDSS_JAR=/opt/gridss2/gridss-2.13.2-gridss-jar-with-dependencies.jar + +RUN chmod +x /opt2/gridss/* \ + && chmod +x /opt2/gridss/*.R + +WORKDIR /opt2 + + +##Add GRIPSS for SOMATIC FILTERING +RUN wget https://github.com/hartwigmedical/hmftools/releases/download/gripss-v2.3.4/gripss_v2.3.4.jar \ + && mkdir hmftools \ + && mv gripss_v2.3.4.jar hmftools/gripss.jar +ENV PATH="/opt2/gridss:/opt2/hmftools:$PATH" + diff --git a/docker/sv/build.sh b/docker/sv/build.sh new file mode 100644 index 0000000..b1cf22e --- /dev/null +++ b/docker/sv/build.sh @@ -0,0 +1,13 @@ +#Build.sh +docker build --platform=linux/amd64 --tag ccbr_logan_sv:v0.0.1 -f Dockerfile . + +docker tag ccbr_logan_sv:v0.0.1 dnousome/ccbr_logan_sv:v0.0.1 +docker tag ccbr_logan_sv:v0.0.1 dnousome/ccbr_logan_sv:latest + +docker push dnousome/ccbr_logan_sv:v0.0.1 +docker push dnousome/ccbr_logan_sv:latest + +## +#docker run -it ccbr_logan_sv:v0.0.1 +#gridss --jar /opt2/gridss/gridss-2.13.2-gridss-jar-with-dependencies.jar \ +#--reference test.fa --output t.vcf.gz s.bam \ No newline at end of file diff --git a/docs/LOGAN.png b/docs/LOGAN.png new file mode 100644 index 0000000..12b23da Binary files /dev/null and b/docs/LOGAN.png differ diff --git a/docs/index.md b/docs/index.md index 21f6deb..9900632 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,12 +1,2 @@ -# Welcome to WGS-Seek - -## Overview - -Guide for running wgs-seek for WGS data! - -* `wgs-seek` - Builds a submission script for slurm - -### References -Forked from [Exome-seek](https://github.com/mtandon09/CCBR_GATK4_Exome_Seq_Pipeline) - +# **_LOGAN-whoLe genOme-sequencinG Analysis pipeliNe_**. Call germline and somatic variants, CNVs, and SVs and annotate variants! diff --git a/docs/user-guide/pipeline.md b/docs/user-guide/pipeline.md index c5a31bf..ba1d82a 100644 --- a/docs/user-guide/pipeline.md +++ b/docs/user-guide/pipeline.md @@ -1,37 +1,118 @@ -# How to run WGS-Seek +# How to run LOGAN ## Guide -* `./wgs-seek` - Starts a next nextflow run -Supports runs from Fastq and either Tumor-Normal or Tumor-only Sequencing +### Input Files +LOGAN supports inputs of either +1) paired end fastq files -## Running Nextflow -Multiple options required for running +`--fastq_input`- A glob can be used to include all FASTQ files. Like `--fastq_input "*R{1,2}.fastq.gz"`. Globbing requires quotes. -## Code -`./wgs-seek --fastq "Samples/Sample_R{1,2}.fastq.gz" --output 'B2' --sample_sheet sample.tsv --paired T --profile biowulf` +2) Pre aligned BAM files with BAI indices +`--bam_input`- A glob can be used to include all FASTQ files. Like `--bam_input "*.bam"`. Globbing requires quotes. -### Arguments -Input selection can either be -`--fastq` -1) A wildcard expansion of Fastq files - "Samples/Sample_*_R{1,2}.fastq.gz" which finds all Samples in the directory with the head Sample_ -OR -`--filelist` -2a) A tab separated file with 3 columns Sample Name, Fastq1 Full path, Fastq2 Full Path if using fastq files or -2b) A tab separated file with 2 columns Sample Name, BAM file path +3) A sheet that indicates the sample name and either FASTQs or BAM file locations -`--output` - Output Directory +`--fastq_file_input`- A headerless tab delimited sheet that has the sample name, R1, and R2 file locations -`--sample_sheet`- Tab separated file for Normal and Tumor delination with a header for "Normal" and "Tumor" +Example +```bash +c130863309_TUMOR /data/nousomedr/c130863309_TUMOR.R1_001.fastq.gz /data/nousomedr/c130863309_TUMOR.R2_001.fastq.gz +c130889189_PBMC /data/nousomedr/c130889189_PBMC.R1_001.fastq.gz /data/nousomedr/c130889189_PBMC.R2_001.fastq.gz +``` -`--profile` Biowulf or Local Run +`--bam_file_input` - A headerless Tab delimited sheet that has the sample name, bam, and bam index (bai) file locations -`--resume` Resume previous nextflow run +Example +```bash +c130863309_TUMOR /data/nousomedr/c130863309_TUMOR.bam /data/nousomedr/c130863309_TUMOR.bam.bai +c130889189_PBMC /data/nousomedr/c130889189_PBMC.bam /data/nousomedr/c130889189_PBMC.bam.bai +``` -`--submit`- Submit job to Biowulf? +### Genome +`--genome` - A flag to indicate which genome to run. hg38, hg19 and mm10 are supported. +Example: `--genome hg38` to run the hg38 genome -`--paired`- Are Samples paired Tumor-Normal +`--genome hg19` and `--genome mm10` are also supported +#### hg38 has options for either +`--genome hg38` - Based off the GRCh38.d1.vd1.fa which is consistent with TCGA/GDC processing pipelines + +`--genome hg38_sf` - Based off the Homo_sapiens_assembly38.fasta which is derived from the Broad Institute/NCI Sequencing Facility +The biggest difference between the two is that GRCh38.d1.vd1.fa includes the GCA_000001405.15_GRCh38_no_alt_analysis_set, Sequence Decoys (GenBank Accession GCA_000786075), and Virus Sequences. Homo_sapiens_assembly38.fasta has HLA specific contigs which may not be compatible with certain downstream tools. + +### Operating Modes + +#### 1. Paired Tumor/Normal Mode + +Required for Paired Tumor/Normal Mode + +`--sample_sheet` In Paired mode a sample sheet must be provided with the basename of the Tumor and Normal samples. This sheet must be Tab separated with a header for Tumor and Normal. + +Example +```bash +Tumor Normal +c130863309_TUMOR c130863309_PBMC +c130889189_TUMOR c130889189_PBMC +``` + +#### 2. Tumor only mode + +No addtional flags for sample sheet are required as all samples will be used to call variants + +#### Calling Mode + +Adding flags determines SNV (germline and/or somatic), SV, and/or CNV calling modes + +`--vc` or `--snv` - Enables somatic SNV calling using mutect2, vardict, varscan, octopus, deepsomatic, strelka (TN only), MUSE (TN only), and lofreq (TN only) + +`--gl` or `--germline` - Enables germline calling using Deepvariant + +`--sv` or `--structural`- Enables somatic SV calling using Manta, GRIDSS, and SVABA + +`--cnv` or `--copynumber`- Enables somatic CNV calling using FREEC, Sequenza, ASCAT, CNVKit, and Purple (hg19/hg38 only) + + + +#### Optional Arguments +`--callers` - Comma separated argument for selecting only specified callers, the default is to use all. +Example: `--callers mutect2,octopus` + +`--cnvcallers` - Comma separated argument for selecting only specified CNV callers, the default is to use all. +Example: `--cnvcallers purple` + +`--svcallers` - Comma separated argument for selecting only specified SV callers, the default is to use all. +Example: `--svcallers gridss` + +`--ffpe` - Adds additional filtering for FFPE by detecting strand orientation bias using SOBDetector. + +`--exome` - Limits calling to intervals provided in target bed to reduce time and to account for exome sequencing specific parameters. + +`--indelrealign` - Enables indel realignment using the GATK pipeline when running alignment steps. May be helpful for certain callers (VarScan, VarDict) that do not have local haplotype reassembly. + + +## Running LOGAN +Example of Tumor_Normal calling mode +```bash +# preview the logan jobs that will run +nextflow run LOGAN/main.nf --mode local -profile ci_stub --genome hg38 --sample_sheet samplesheet.tsv --outdir out --fastq_input "*R{1,2}.fastq.gz" -preview --vc --sv --cnv +# run a stub/dryrun of the logan jobs +nextflow run LOGAN/main.nf --mode local -profile ci_stub --genome hg38 --sample_sheet samplesheet.tsv --outdir out --fastq_input "*R{1,2}.fastq.gz" -stub --vc --sv --cnv +# launch a logan run on slurm with the test dataset +nextflow run LOGAN/main.nf --mode slurm -profile biowulf,slurm --genome hg38 --sample_sheet samplesheet.tsv --outdir out --fastq_input "*R{1,2}.fastq.gz" --vc --sv --cnv +``` + +Example of Tumor only calling mode +```bash +# preview the logan jobs that will run +nextflow run LOGAN/main.nf --mode local -profile ci_stub --genome hg38 --outdir out --fastq_input "*R{1,2}.fastq.gz" --callers octopus,mutect2 -preview --vc --sv --cnv + +# run a stub/dryrun of the logan jobs + +nextflow run LOGAN/main.nf --mode local -profile ci_stub --genome hg38 --outdir out --fastq_input "*R{1,2}.fastq.gz" --callers octopus,mutect2 -stub --vc --sv --cnv + +# launch a logan run on slurm with the test dataset +nextflow run LOGAN/main.nf --mode slurm -profile biowulf,slurm --genome hg38 --outdir out --fastq_input "*R{1,2}.fastq.gz" --callers octopus,mutect2 --vc --sv --cnv +``` diff --git a/docs/user-guide/tool_comparisons.md b/docs/user-guide/tool_comparisons.md new file mode 100644 index 0000000..cd4051a --- /dev/null +++ b/docs/user-guide/tool_comparisons.md @@ -0,0 +1,43 @@ +# LOGAN Tools and Tools Tested + + +## SNV +| Tools |Pros | Cons | Used in Logan| +|----|---|---|--- +|Mutect2 |Part of GATK best practices| | x | +|Strelka | Fast| Paired only|x| +|Muse | Fast| Paired only, can't be parallelized|x| +|Lofreq | Low frequency variants| Slow,Paired only|x| +|Vardict | Fast | Lower accuracy|x| +|Varscan | Fast| Lower accuracy|x|| +|Octopus | Accurate| Slow,High memory|x| +|Deepsomatic|Relatively fast|Trained on human data|x| + + +## Structural Variants +| Tools |Pros | Cons | Approach| Used in Logan| +|----|---|---|---|---| +|Manta |Accurate, fast| |graph-based| x | +|SVABA | Deletion detection||local assembly+ multiple alignment|x| +|GRIDSS | Provides blacklist| Slow, part of HMFtools pipeline|Break end assembly (discordant +split)|x| + +Manta, GridSS, and SvABA are based on read-pairs, split-reads, and local-assemblies. +References [Joe et al](https://bmcgenomics.biomedcentral.com/articles/10.1186/s12864-024-10239-9) + +## Copy Number + +| Tools |Pros | Cons | Used in Logan| +|----|---|---|---| +|Purple |Complete workflow|Doesn't support mm10, requires SV,SNV calls as well | x | +|Sequenza | Purity/Ploidy||x| +|FREEC | Fast | No Purity/Ploidy Estimatation|x| +|ASCAT | Fast, Purity/Ploidy| |x| +|CNVkit |Fast | No Purity/Ploidy Estimatation|x| +|PureCN|Tumor only|Needs Panel of Normals on Sequencing| + + + +## Germline +| Tools |Pros | Cons | Used in Logan| +|----|---|---|---| +|Deepvariant |Fast, most accurate| Model trained on human genomes (May not support mm10)| x| diff --git a/main.nf b/main.nf index 1541830..ef61d01 100644 --- a/main.nf +++ b/main.nf @@ -9,8 +9,8 @@ log.info """\ ============================= genome: ${params.genome} outdir: ${params.outdir} + Samples: ${params.fastq_input ?: ''} ${params.fastq_file_input ?: ''} ${params.bam_input ?: ''} ${params.bam_file_input ?: ''} Sample Sheet: ${params.sample_sheet} - Samples: ${params.fastq_input} ${params.fastq_file_input} ${params.bam_input} ${params.bam_file_input} """ .stripIndent() @@ -18,12 +18,12 @@ log.info """\ include {DETERMINEBAM; INPUT; INPUT_BAM; ALIGN; GL; VC; SV; CNVmouse; CNVhuman; CNVhuman_novc; - QC_GL; QC_NOGL} from "./subworkflows/local/workflows.nf" + QC_GL; QC_NOGL; QC_GL_BAM; QC_NOGL_BAM} from "./subworkflows/local/workflows.nf" include {INPUT_TONLY; INPUT_TONLY_BAM; ALIGN_TONLY; VC_TONLY; SV_TONLY; CNVmouse_tonly; CNVhuman_tonly; CNVhuman_novc_tonly; - QC_TONLY } from "./subworkflows/local/workflows_tonly.nf" + QC_TONLY; QC_TONLY_BAM} from "./subworkflows/local/workflows_tonly.nf" workflow.onComplete { @@ -35,27 +35,28 @@ workflow.onComplete { } } -//Final Workflow +//All Workflows workflow { - if ([params.fastq_input,params.fastq_file_input].any() && params.sample_sheet){ + //Tumor-Normal FASTQ + if ([params.fastq_input,params.fastq_file_input].any() && params.sample_sheet){ println "Tumor-Normal FASTQ" INPUT() ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet) - //Germline - if (params.gl){ - GL(ALIGN.out.bambyinterval) + //Germline + if (params.gl || params.germline){ + GL(ALIGN.out.sample_sheet,ALIGN.out.bambyinterval) } //Tumor-Normal VC, SV, CNV - if (params.vc){ + if (params.vc || params.snv){ VC(ALIGN.out.bamwithsample,ALIGN.out.splitout,ALIGN.out.sample_sheet) } - if (params.sv){ + if (params.sv || params.structural){ SV(ALIGN.out.bamwithsample) } - if (params.cnv){ - if (params.genome == "mm10"){ + if (params.cnv || params.copynumber){ + if (params.genome.matches("mm10")){ CNVmouse(ALIGN.out.bamwithsample) - } else if (params.genome== "hg38" |params.genome== "hg19"){ + } else if (params.genome.matches("hg38(.*)") | params.genome.matches("hg19(.*)")){ if (!params.vc){ CNVhuman_novc(ALIGN.out.bamwithsample) } else { @@ -63,35 +64,47 @@ workflow { } } } - if (params.qc && params.gl){ + //QC Steps + if (params.qc && (params.gl || params.germline)){ QC_GL(ALIGN.out.fastqin,ALIGN.out.fastpout,ALIGN.out.bqsrout,GL.out.glnexusout,GL.out.bcfout) - } else if (params.qc){ + }else if (params.qc){ QC_NOGL(ALIGN.out.fastqin,ALIGN.out.fastpout,ALIGN.out.bqsrout) } } - //TUMOR-NOMRAL BAM INPUT + //TUMOR-NORMAL BAM INPUT if ([params.bam_input,params.bam_file_input].any() && params.sample_sheet){ println "Tumor-Normal BAM" INPUT_BAM() - if (params.vc){ + if (params.vc || params.snv ){ VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet) } - if (params.sv){ + if (params.sv || params.structural){ SV(INPUT_BAM.out.bamwithsample) } - if (params.cnv){ - if (params.genome == "mm10"){ + if (params.gl || params.germline){ + GL(INPUT_BAM.out.sample_sheet,INPUT_BAM.out.bambyinterval) + } + if (params.cnv || params.copynumber){ + if (params.genome.matches("mm10")){ CNVmouse(INPUT_BAM.out.bamwithsample) - } else if (params.genome == "hg38"|params.genome== "hg19"){ - if (!params.vc){ + } else if (params.genome.matches("hg38(.*)") | params.genome.matches("hg19(.*)")){ + if (!(params.vc || params.snv)){ CNVhuman_novc(INPUT_BAM.out.bamwithsample) }else { CNVhuman(INPUT_BAM.out.bamwithsample,VC.out.somaticcall_input) } } } + //QC Steps + if (params.qc && (params.gl || params.germline)){ + QC_GL_BAM(INPUT_BAM.out.allbam,GL.out.glnexusout,GL.out.bcfout) + }else if(params.qc){ + QC_NOGL_BAM(INPUT_BAM.out.allbam) + } + + } ///Tumor Only Pipelines @@ -99,17 +112,17 @@ workflow { println "Tumor-Only FASTQ" INPUT_TONLY() ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet) - if (params.vc){ + if (params.vc || params.snv){ VC_TONLY(ALIGN_TONLY.out.bamwithsample,ALIGN_TONLY.out.splitout,ALIGN_TONLY.out.sample_sheet) } - if (params.sv){ + if (params.sv || params.structural){ SV_TONLY(ALIGN_TONLY.out.bamwithsample) } - if (params.cnv){ - if (params.genome == "mm10"){ + if (params.cnv || params.copynumber){ + if (params.genome.matches("mm10")){ CNVmouse_tonly(ALIGN_TONLY.out.bamwithsample) - } else if (params.genome== "hg38"|params.genome== "hg19"){ - if (!params.vc){ + } else if (params.genome.matches("hg38(.*)") | params.genome.matches("hg19(.*)")){ + if (!(params.vc || params.snv)){ VC_TONLY(ALIGN_TONLY.out.bamwithsample,ALIGN_TONLY.out.splitout,ALIGN_TONLY.out.sample_sheet) CNVhuman_tonly(ALIGN_TONLY.out.bamwithsample,VC_TONLY.out.somaticcall_input) } else { @@ -118,7 +131,7 @@ workflow { } } if (params.qc){ - QC_TONLY(ALIGN_TONLY.out.fastqin,ALIGN_TONLY.out.fastpout,ALIGN_TONLY.out.bqsrout) + QC_TONLY(ALIGN_TONLY.out.fastqin,ALIGN_TONLY.out.fastpout,ALIGN_TONLY.out.bqsrout) } } @@ -126,24 +139,27 @@ workflow { if ([params.bam_input,params.bam_file_input].any() && !params.sample_sheet){ println "Tumor-Only BAM" INPUT_TONLY_BAM() - if (params.vc){ + if (params.vc || params.snv){ VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet) } - if (params.sv){ + if (params.sv || params.structural){ SV_TONLY(INPUT_TONLY_BAM.out.bamwithsample) } - if (params.cnv){ - if (params.genome == "mm10"){ + if (params.cnv || params.copynumber){ + if (params.genome.matches("mm10")){ CNVmouse_tonly(INPUT_TONLY_BAM.out.bamwithsample) - } else if (params.genome== "hg38" | params.genome== "hg19"){ - if (!params.vc){ + }else if (params.genome.matches("hg38(.*)") | params.genome.matches("hg19(.*)")){ + if (!(params.vc || params.snv)){ VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet) CNVhuman_tonly(INPUT_TONLY_BAM.out.bamwithsample,VC_TONLY.out.somaticcall_input) } else { CNVhuman_tonly(INPUT_TONLY_BAM.out.bamwithsample,VC_TONLY.out.somaticcall_input) } - + } } + if (params.qc){ + QC_TONLY_BAM(INPUT_TONLY_BAM.out.bamwithsample) } + } } diff --git a/modules/local/annotsv.nf b/modules/local/annotsv.nf new file mode 100644 index 0000000..e83aeb4 --- /dev/null +++ b/modules/local/annotsv.nf @@ -0,0 +1,132 @@ +GENOMEREF=file(params.genomes[params.genome].genome) +ANNOTSVGENOME=params.genomes[params.genome].annotsvgenome + + +process annotsv_tn { + //AnnotSV for Manta/Svaba/GRIDSS works with either vcf.gz or .vcf files + //Requires bedtools,bcftools + errorStrategy = 'ignore' + container = "${params.containers.annotcnvsv}" + + input: + tuple val(tumorname), path(somaticvcf), val(sv) + + output: + tuple val(tumorname), + path("${sv}/${tumorname}.tsv"), + path("${sv}/${tumorname}.unannotated.tsv") + + + script: + """ + mkdir ${sv} + + AnnotSV -SVinputFile ${somaticvcf} \ + -genomeBuild $ANNOTSVGENOME \ + -SVinputInfo 1 -outputFile ${tumorname} \ + -outputDir ${sv} + + """ + + stub: + """ + mkdir ${sv} + + touch "${sv}/${tumorname}.tsv" + touch "${sv}/${tumorname}.unannotated.tsv" + """ +} + + + + +process gunzip { + label 'process_single' + + input: + tuple val(tumorname), + path(vcf), val(sv) + + output: + tuple val(tumorname), + path("${tumorname}.tumorSV_${sv}.vcf"), val(sv) + + script: + """ + gunzip -f ${vcf} > ${tumorname}.tumorSV_${sv}.vcf + """ + + stub: + + """ + touch ${tumorname}.tumorSV_${sv}.vcf + """ + +} + + +process survivor_sv { + container = "${params.containers.annotcnvsv}" + + input: + tuple val(tumorname), + path(vcfs), val(svs) + + output: + tuple val(tumorname), + path("${tumorname}_merged.vcf"), + val("survivor") + + + script: + strin = vcfs.join("\\n") + + """ + echo -e '$strin' > filelistin + SURVIVOR merge filelistin 1000 2 1 1 1 30 ${tumorname}_merged.vcf + """ + + stub: + strin = vcfs.join("\\n") + """ + echo -e '$strin' > filelistin + touch "${tumorname}_merged.vcf" + """ +} + + +process annotsv_tonly { + //AnnotSV for Manta/Svaba works with either vcf.gz or .vcf files + //Requires bedtools,bcftools + errorStrategy = 'ignore' + + container = "${params.containers.annotcnvsv}" + + input: + tuple val(tumorname), path(somaticvcf), val(sv) + + output: + tuple val(tumorname), + path("${sv}/${tumorname}.tsv"), + path("${sv}/${tumorname}.unannotated.tsv") + + + script: + """ + mkdir ${sv} + + AnnotSV -SVinputFile ${somaticvcf} \ + -genomeBuild $ANNOTSVGENOME \ + -SVinputInfo 1 -outputFile ${tumorname} \ + -outputDir ${sv} + + """ + + stub: + """ + mkdir ${sv} + + touch "${sv}/${tumorname}.tsv" + touch "${sv}/${tumorname}.unannotated.tsv" + """ +} diff --git a/modules/local/annotvep.nf b/modules/local/annotvep.nf new file mode 100644 index 0000000..502cad5 --- /dev/null +++ b/modules/local/annotvep.nf @@ -0,0 +1,139 @@ +//References +GENOMEREF=file(params.genomes[params.genome].genome) + +//VEP +VEPCACHEDIR=file(params.genomes[params.genome].vepcache) +VEPSPECIES=params.genomes[params.genome].vepspecies +VEPBUILD=params.genomes[params.genome].vepbuild + +process annotvep_tn { + label 'process_medium' + container "${params.containers.vcf2maf}" + + input: + tuple val(tumorsample), val(normalsample), + val(vc), path(tumorvcf), path(vcfindex) + + output: + path("paired/${vc}/${tumorsample}_vs_${normalsample}.maf") + + shell: + + ''' + VCF_SAMPLE_IDS=($(bcftools query -l !{tumorvcf})) + TID_IDX=0 + NID_IDX="" + VCF_NID="" + NORM_VCF_ID_ARG="" + NSAMPLES=${#VCF_SAMPLE_IDS[@]} + if [ $NSAMPLES -gt 1 ]; then + # Assign tumor, normal IDs + # Look through column names and + # see if they match provided IDs + for (( i = 0; i < $NSAMPLES; i++ )); do + echo "${VCF_SAMPLE_IDS[$i]}" + if [ "${VCF_SAMPLE_IDS[$i]}" == !{tumorsample} ]; then + TID_IDX=$i + fi + + if [ "${VCF_SAMPLE_IDS[$i]}" == !{normalsample} ]; then + NID_IDX=$i + fi + done + + if [ ! -z $NID_IDX ]; then + VCF_NID=${VCF_SAMPLE_IDS[$NID_IDX]} + NORM_VCF_ID_ARG="--vcf-normal-id $VCF_NID" + fi + fi + VCF_TID=${VCF_SAMPLE_IDS[$TID_IDX]} + + zcat !{tumorvcf} > !{tumorvcf.baseName} + + mkdir -p paired/!{vc} + + vcf2maf.pl \ + --vep-forks !{task.cpus} --input-vcf !{tumorvcf.baseName} \ + --output-maf paired/!{vc}/!{tumorsample}_vs_!{normalsample}.maf \ + --tumor-id !{tumorsample} \ + --normal-id !{normalsample} \ + --vep-path /opt/vep/src/ensembl-vep \ + --vep-data !{VEPCACHEDIR} \ + --ncbi-build !{VEPBUILD} --species !{VEPSPECIES} --ref-fasta !{GENOMEREF} \ + --retain-info "set" \ + --vep-overwrite + + ''' + + stub: + """ + mkdir -p paired/${vc} + touch paired/${vc}/${tumorsample}_vs_${normalsample}.maf + """ +} + + +process annotvep_tonly { + container "${params.containers.vcf2maf}" + label 'process_medium' + + input: + tuple val(tumorsample), + val(vc), path(tumorvcf), + path(vcfindex) + + + output: + path("tumor_only/${vc}/${tumorsample}.tonly.maf") + + shell: + + ''' + VCF_SAMPLE_IDS=($(bcftools query -l !{tumorvcf})) + TID_IDX=0 + NID_IDX="" + VCF_NID="" + NORM_VCF_ID_ARG="" + NSAMPLES=${#VCF_SAMPLE_IDS[@]} + if [ $NSAMPLES -gt 1 ]; then + # Assign tumor, normal IDs + # Look through column names and + # see if they match provided IDs + for (( i = 0; i < $NSAMPLES; i++ )); do + echo "${VCF_SAMPLE_IDS[$i]}" + if [ "${VCF_SAMPLE_IDS[$i]}" == !{tumorsample} ]; then + TID_IDX=$i + fi + + done + + if [ ! -z $NID_IDX ]; then + VCF_NID=${VCF_SAMPLE_IDS[$NID_IDX]} + NORM_VCF_ID_ARG="--vcf-normal-id $VCF_NID" + fi + fi + VCF_TID=${VCF_SAMPLE_IDS[$TID_IDX]} + + zcat !{tumorvcf} > !{tumorvcf.baseName} + + mkdir -p tumor_only/!{vc} + + vcf2maf.pl \ + --vep-forks !{task.cpus} --input-vcf !{tumorvcf.baseName} \ + --output-maf tumor_only/!{vc}/!{tumorsample}.tonly.maf \ + --tumor-id !{tumorsample} \ + --vep-path /opt/vep/src/ensembl-vep \ + --vep-data !{VEPCACHEDIR} \ + --ncbi-build !{VEPBUILD} --species !{VEPSPECIES} --ref-fasta !{GENOMEREF} \ + --retain-info "set" \ + --vep-overwrite + + + ''' + + stub: + """ + mkdir -p tumor_only/${vc} + touch tumor_only/${vc}/${tumorsample}.tonly.maf + """ +} diff --git a/modules/local/ascat.nf b/modules/local/ascat.nf new file mode 100644 index 0000000..c37aae5 --- /dev/null +++ b/modules/local/ascat.nf @@ -0,0 +1,108 @@ +ascatR = params.script_ascat +if (params.genome.matches("hg38(.*)")){ + GENOMEVER="hg38" +} else if (params.genome.matches("hg19(.*)")){ + GENOMEVER="hg19" +} + + +process ascat_tn { + container = "${params.containers.cnv}" + label 'process_medium' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai) + + output: + tuple val(tumorname), + path("After_correction_${tumorname}.germline.png"), + path("After_correction_${tumorname}.tumour.png"), + path("Before_correction_${tumorname}.germline.png"), + path("Before_correction_${tumorname}.tumour.png"), + path("${tumorname}.ASCATprofile.png"), + path("${tumorname}.ASPCF.png"), + path("${tumorname}.sunrise.png"), + path("${tumorname}_BAF.txt"), + path("${tumorname}_LogR.txt"), + path("${tumorname}.segments_raw.txt"), + path("${tumorname}.segments.txt"), + path("${tumorname}_vs_${normalname}.qc.txt"), + path("${tumorname}_vs_${normalname}_ascat.Rdata") + + script: + """ + Rscript $ascatR ${tumor} ${tumorname} ${normal} ${normalname} $GENOMEVER + """ + + stub: + """ + touch After_correction_${tumorname}.germline.png + touch After_correction_${tumorname}.tumour.png + touch Before_correction_${tumorname}.germline.png + touch Before_correction_${tumorname}.tumour.png + touch ${tumorname}.ASCATprofile.png + touch ${tumorname}.ASPCF.png + touch ${tumorname}.sunrise.png + touch ${tumorname}_BAF.txt + touch ${tumorname}_LogR.txt + touch ${tumorname}.segments_raw.txt + touch ${tumorname}.segments.txt + touch ${tumorname}_vs_${normalname}.qc.txt + touch ${tumorname}_vs_${normalname}_ascat.Rdata + + """ + +} + + +process ascat_tn_exome { + container = "${params.containers.cnv}" + label 'process_medium' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai), + path(bed) + + output: + tuple val(tumorname), + path("After_correction_${tumorname}.germline.png"), + path("After_correction_${tumorname}.tumour.png"), + path("Before_correction_${tumorname}.germline.png"), + path("Before_correction_${tumorname}.tumour.png"), + path("${tumorname}.ASCATprofile.png"), + path("${tumorname}.ASPCF.png"), + path("${tumorname}.sunrise.png"), + path("${tumorname}_BAF.txt"), + path("${tumorname}_LogR.txt"), + path("${tumorname}.segments_raw.txt"), + path("${tumorname}.segments.txt"), + path("${tumorname}_vs_${normalname}.qc.txt"), + path("${tumorname}_vs_${normalname}_ascat.Rdata") + + script: + """ + sed 's/^chr//' ${bed} > nochrtemp.bed + Rscript $ascatR ${tumor} ${tumorname} ${normal} ${normalname} $GENOMEVER nochrtemp.bed wes + """ + + stub: + """ + touch After_correction_${tumorname}.germline.png + touch After_correction_${tumorname}.tumour.png + touch Before_correction_${tumorname}.germline.png + touch Before_correction_${tumorname}.tumour.png + touch ${tumorname}.ASCATprofile.png + touch ${tumorname}.ASPCF.png + touch ${tumorname}.sunrise.png + touch ${tumorname}_BAF.txt + touch ${tumorname}_LogR.txt + touch ${tumorname}.segments_raw.txt + touch ${tumorname}.segments.txt + touch ${tumorname}_vs_${normalname}.qc.txt + touch ${tumorname}_vs_${normalname}_ascat.Rdata + + """ + +} diff --git a/modules/local/bcftools_stats.nf b/modules/local/bcftools_stats.nf new file mode 100644 index 0000000..cadd490 --- /dev/null +++ b/modules/local/bcftools_stats.nf @@ -0,0 +1,33 @@ + +process bcftools_stats { + /* + Quality-control step to collect summary statistics from bcftools stats. + When bcftools stats is run with one VCF file then stats by non-reference + allele frequency, depth distribution, stats by quality and per-sample + counts, singleton statsistics are calculated. Please see bcftools' + documentation for more information: + http://samtools.github.io/bcftools/bcftools.html#stats + @Input: + Per sample gVCF file (scatter) + @Output: + Text file containing a collection of summary statistics + */ + container = "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(samplename), path("${samplename}.gvcf.gz"),path("${samplename}.gvcf.gz.tbi") + output: + path("${samplename}.germline.bcftools_stats.txt") + + script: + """ + bcftools stats ${samplename}.gvcf.gz > ${samplename}.germline.bcftools_stats.txt + """ + + stub: + """ + touch ${samplename}.germline.bcftools_stats.txt + """ + +} \ No newline at end of file diff --git a/modules/local/cnvkit.nf b/modules/local/cnvkit.nf new file mode 100644 index 0000000..28856b9 --- /dev/null +++ b/modules/local/cnvkit.nf @@ -0,0 +1,137 @@ + + +//CNVKIT +GENOMEREF = file(params.genomes[params.genome].genome) + +REFFLAT = file(params.genomes[params.genome].REFFLAT) +ACCESS = file(params.genomes[params.genome].ACCESS) + +process cnvkit { + container = "${params.containers.cnv}" + label 'process_medium' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai) + + + output: + tuple val(tumorname), path("${tumorname}") + + + script: + """ + cnvkit.py batch ${tumor} --normal ${normal} \ + --annotate $REFFLAT \ + --fasta $GENOMEREF --access $ACCESS \ + --output-reference ${tumorname}.cnn --output-dir ${tumorname}/ \ + --diagram --scatter \ + -m wgs -p $task.cpus + """ + + stub: + """ + mkdir ${tumorname} + touch ${tumorname}/${normalname}.antitargetcoverage.cnn ${tumorname}/${normalname}.targetcoverage.cnn + touch ${tumorname}/${tumorname}.antitargetcoverage.cnn ${tumorname}/${tumorname}.targetcoverage.cnn + touch ${tumorname}/${tumorname}.bintest.cns ${tumorname}/${tumorname}.call.cns ${tumorname}/${tumorname}.cnr ${tumorname}/${tumorname}.cns ${tumorname}/${tumorname}-diagram.pdf ${tumorname}/${tumorname}-scatter.png + """ + +} + +process cnvkit_exome { + container = "${params.containers.cnv}" + label 'process_medium' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai), + path(bed) + + output: + tuple val(tumorname), path("${tumorname}") + + script: + """ + cnvkit.py batch ${tumor} --normal ${normal} \ + --targets ${bed} --annotate $REFFLAT \ + --fasta $GENOMEREF --access $ACCESS \ + --output-reference ${tumorname}.cnn --output-dir ${tumorname}/ \ + --diagram --scatter -p $task.cpus + """ + + stub: + """ + mkdir ${tumorname} + touch ${tumorname}/${normalname}.antitargetcoverage.cnn ${tumorname}/${normalname}.targetcoverage.cnn + touch ${tumorname}/${tumorname}.antitargetcoverage.cnn ${tumorname}/${tumorname}.targetcoverage.cnn + touch ${tumorname}/${tumorname}.bintest.cns ${tumorname}/${tumorname}.call.cns ${tumorname}/${tumorname}.cnr ${tumorname}/${tumorname}.cns ${tumorname}/${tumorname}-diagram.pdf ${tumorname}/${tumorname}-scatter.png + + """ + +} + + + +process cnvkit_tonly { + container = "${params.containers.cnv}" + label 'process_medium' + + input: + tuple val(tumorname), path(tumor), path(tumorbai) + + output: + tuple val(tumorname), path("${tumorname}") + + + script: + """ + cnvkit.py batch ${tumor} -n \ + --annotate $REFFLAT \ + --fasta $GENOMEREF --access $ACCESS \ + --output-reference ${tumorname}.cnn --output-dir ${tumorname}/ \ + --diagram --scatter \ + -m wgs -p $task.cpus + """ + + stub: + """ + mkdir ${tumorname} + touch ${tumorname}/${tumorname}.antitargetcoverage.cnn ${tumorname}/${tumorname}.targetcoverage.cnn + touch ${tumorname}/${tumorname}.bintest.cns ${tumorname}/${tumorname}.call.cns ${tumorname}/${tumorname}.cnr ${tumorname}/${tumorname}.cns ${tumorname}/${tumorname}-diagram.pdf ${tumorname}/${tumorname}-scatter.png + """ + +} + +process cnvkit_exome_tonly { + container = "${params.containers.cnv}" + label 'process_medium' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai), + path(bed) + + output: + tuple val(tumorname), path("${tumorname}") + + script: + """ + cnvkit.py batch ${tumor} --normal \ + --targets $bed --annotate $REFFLAT \ + --fasta $GENOMEREF --access $ACCESS \ + --output-reference ${tumorname}.cnn --output-dir ${tumorname}/ \ + --diagram --scatter -p $task.cpus + """ + + stub: + """ + mkdir ${tumorname} + touch ${tumorname}/${normalname}.antitargetcoverage.cnn ${tumorname}/${normalname}.targetcoverage.cnn + touch ${tumorname}/${tumorname}.antitargetcoverage.cnn ${tumorname}/${tumorname}.targetcoverage.cnn + touch ${tumorname}/${tumorname}.bintest.cns ${tumorname}/${tumorname}.call.cns ${tumorname}/${tumorname}.cnr ${tumorname}/${tumorname}.cns ${tumorname}/${tumorname}-diagram.pdf ${tumorname}/${tumorname}-scatter.png + + """ + +} + diff --git a/modules/local/combinefilter.nf b/modules/local/combinefilter.nf new file mode 100644 index 0000000..b6fc6ce --- /dev/null +++ b/modules/local/combinefilter.nf @@ -0,0 +1,282 @@ +//References +GENOMEREF=file(params.genomes[params.genome].genome) +GENOMEFAI=file(params.genomes[params.genome].genomefai) +GENOMEDICT=file(params.genomes[params.genome].genomedict) + + +process combineVariants { + container "${params.containers.logan}" + label 'process_highmem' + + input: + tuple val(sample), path(inputvcf), val(vc) + + output: + tuple val(sample), + path("${vc}/${sample}.${vc}.marked.vcf.gz"), + path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"), + path("${vc}/${sample}.${vc}.norm.vcf.gz"), + path("${vc}/${sample}.${vc}.norm.vcf.gz.tbi") + + script: + vcfin = inputvcf.join(" -I ") + //Create Tumor Normal here + samplist=sample.split('_vs_') + if(samplist.size()>1){ + samporder = samplist.join(",") + }else{ + samporder = sample + } + + """ + mkdir ${vc} + gatk --java-options "-Xmx48g" SortVcf \ + -O ${sample}.${vc}.markedtemp.vcf.gz \ + -SD $GENOMEDICT \ + -I $vcfin + + bcftools view ${sample}.${vc}.markedtemp.vcf.gz -s $samporder -Oz -o ${sample}.${vc}.marked.vcf.gz + bcftools index -t ${sample}.${vc}.marked.vcf.gz + + bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ + awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ + sed '/^\$/d' > ${sample}.${vc}.temp.vcf + + bcftools view ${sample}.${vc}.temp.vcf -f PASS -s $samporder -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz + bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t + + mv ${sample}.${vc}.marked.vcf.gz ${vc} + mv ${sample}.${vc}.marked.vcf.gz.tbi ${vc} + + """ + + stub: + + """ + mkdir ${vc} + touch ${vc}/${sample}.${vc}.marked.vcf.gz + touch ${vc}/${sample}.${vc}.norm.vcf.gz + touch ${vc}/${sample}.${vc}.marked.vcf.gz.tbi + touch ${vc}/${sample}.${vc}.norm.vcf.gz.tbi + """ + +} + + +process combineVariants_alternative { + container "${params.containers.logan}" + label 'process_highmem' + + input: + tuple val(sample), path(vcfs), path(vcfsindex), val(vc) + + output: + tuple val(sample), + path("${vc}/${sample}.${vc}.marked.vcf.gz"), + path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"), + path("${vc}/${sample}.${vc}.norm.vcf.gz"), + path("${vc}/${sample}.${vc}.norm.vcf.gz.tbi") + + script: + vcfin = vcfs.join(" ") + samplist=sample.split('_vs_') + if (vc.contains("lofreq") | vc.contains('deepsomatic')) { + samporder = samplist[0] + }else if(samplist.size()>1){ + samporder = samplist.join(",") + }else{ + samporder = sample + } + + if (vc.contains("octopus")) { + """ + mkdir ${vc} + bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp1.vcf.gz + bcftools reheader -f $GENOMEFAI ${sample}.${vc}.temp1.vcf.gz -o ${sample}.${vc}.temp.vcf + bcftools sort ${sample}.${vc}.temp.vcf | bcftools view - -i "INFO/SOMATIC==1" -s $samporder -Oz -o ${sample}.${vc}.marked.vcf.gz + bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ + awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ + sed '/^\$/d' > ${sample}.${vc}.temp.vcf + + bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz + mv ${sample}.${vc}.marked.vcf.gz ${vc} + + bcftools index ${vc}/${sample}.${vc}.marked.vcf.gz -t + bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t + """ + + }else{ + """ + mkdir ${vc} + bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp1.vcf.gz + bcftools reheader -f $GENOMEFAI ${sample}.${vc}.temp1.vcf.gz -o ${sample}.${vc}.temp.vcf + bcftools sort ${sample}.${vc}.temp.vcf | bcftools view - -s $samporder -Oz -o ${sample}.${vc}.marked.vcf.gz + bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ + awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ + sed '/^\$/d' > ${sample}.${vc}.temp.vcf + + bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz + mv ${sample}.${vc}.marked.vcf.gz ${vc} + + bcftools index ${vc}/${sample}.${vc}.marked.vcf.gz -t + bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t + """ + } + + stub: + + """ + mkdir ${vc} + touch ${vc}/${sample}.${vc}.marked.vcf.gz + touch ${vc}/${sample}.${vc}.norm.vcf.gz + touch ${vc}/${sample}.${vc}.marked.vcf.gz.tbi + touch ${vc}/${sample}.${vc}.norm.vcf.gz.tbi + + """ + +} + + + +process combinemafs_tn { + container "${params.containers.logan}" + label 'process_low' + + input: + path(allmafs) + + output: + path("final_tn.maf") + + shell: + mafin= allmafs.join(" ") + + """ + echo "Combining MAFs..." + head -2 ${allmafs[0]} > final_tn.maf + awk 'FNR>2 {{print}}' ${mafin} >> final_tn.maf + """ + + stub: + """ + touch final_tn.maf + """ +} + + + +process combinemafs_tonly { + container "${params.containers.logan}" + label 'process_low' + + input: + path(allmafs) + + output: + path("final_tonly.maf") + + shell: + mafin= allmafs.join(" ") + + """ + echo "Combining MAFs..." + head -2 ${allmafs[0]} > final_tonly.maf + awk 'FNR>2 {{print}}' ${mafin} >> final_tonly.maf + """ + + stub: + """ + touch final_tonly.maf + """ +} + + + +process somaticcombine { + container "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(tumorsample), val(normal), + val(caller), + path(vcfs), path(vcfindex) + + output: + tuple val(tumorsample), val(normal), + path("${tumorsample}_vs_${normal}_combined.vcf.gz"), + path("${tumorsample}_vs_${normal}_combined.vcf.gz.tbi") + + script: + vcfin1=[caller, vcfs].transpose().collect { a, b -> a + " " + b } + vcfin2="-V:" + vcfin1.join(" -V:") + + callerin=caller.join(",") + """ + /usr/lib/jvm/java-8-openjdk-amd64/bin/java -jar \$GATK_JAR -T CombineVariants \ + -R $GENOMEREF \ + --genotypemergeoption PRIORITIZE \ + --rod_priority_list $callerin \ + --filteredrecordsmergetype KEEP_IF_ANY_UNFILTERED \ + -o ${tumorsample}_vs_${normal}_combined.vcf.gz \ + $vcfin2 + + """ + + stub: + vcfin1=[caller, vcfs].transpose().collect { a, b -> a + " " + b } + vcfin2="-V:" + vcfin1.join(" -V:") + + callerin=caller.join(",") + + """ + touch ${tumorsample}_vs_${normal}_combined.vcf.gz + touch ${tumorsample}_vs_${normal}_combined.vcf.gz.tbi + """ + +} + + + + +process somaticcombine_tonly { + container "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(tumorsample), + val(caller), + path(vcfs), path(vcfindex) + + output: + tuple val(tumorsample), + path("${tumorsample}_combined_tonly.vcf.gz"), + path("${tumorsample}_combined_tonly.vcf.gz.tbi") + + script: + vcfin1=[caller, vcfs].transpose().collect { a, b -> a + " " + b } + vcfin2="-V:" + vcfin1.join(" -V:") + + callerin=caller.join(",")//.replaceAll("_tonly","") + + """ + /usr/lib/jvm/java-8-openjdk-amd64/bin/java -jar \$GATK_JAR -T CombineVariants \ + -R $GENOMEREF \ + --genotypemergeoption PRIORITIZE \ + --rod_priority_list $callerin \ + --filteredrecordsmergetype KEEP_IF_ANY_UNFILTERED \ + -o ${tumorsample}_combined_tonly.vcf.gz \ + $vcfin2 + """ + + stub: + + vcfin1=[caller, vcfs].transpose().collect { a, b -> a + " " + b } + vcfin2="-V:" + vcfin1.join(" -V:") + callerin=caller.join(",")//.replaceAll("_tonly","") + + """ + touch ${tumorsample}_combined_tonly.vcf.gz ${tumorsample}_combined_tonly.vcf.gz.tbi + """ + +} + diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf deleted file mode 100644 index c1239ec..0000000 --- a/modules/local/copynumber.nf +++ /dev/null @@ -1,661 +0,0 @@ -GENOMEREF = file(params.genomes[params.genome].genome) -SEQUENZAGC = file(params.genomes[params.genome].SEQUENZAGC) -SEQUENZA_SCRIPT = params.script_sequenza - -if (params.genome=="mm10"){ - FREECLENGTHS = params.genomes[params.genome].FREEC.FREECLENGTHS - FREECCHROMS = params.genomes[params.genome].FREEC.FREECCHROMS - FREECPILEUP = params.genomes[params.genome].FREEC.FREECPILEUP - FREECSNPS = params.genomes[params.genome].FREEC.FREECSNPS - FREECTARGETS = params.genomes[params.genome].intervals - FREECSCRIPT = params.script_freec - FREECPAIR_SCRIPT = params.script_freecpaired - FREECSIGNIFICANCE = params.freec_significance - FREECPLOT = params.freec_plot -} - -if (params.genome=="hg38" | params.genome=="hg19"){ - HMFGENOMEREF = file(params.genomes[params.genome].HMFGENOME) - GENOMEVER = params.genomes[params.genome].GENOMEVER - GCPROFILE = file(params.genomes[params.genome].GCPROFILE) - GERMLINEHET = file(params.genomes[params.genome].GERMLINEHET) - DIPLODREG = file(params.genomes[params.genome].DIPLODREG) - ENSEMBLCACHE = params.genomes[params.genome].ENSEMBLCACHE - DRIVERS = file(params.genomes[params.genome].DRIVERS) - SOMATICHOTSPOTS = file(params.genomes[params.genome].SOMATICHOTSPOTS) - GERMLINEHOTSPOTS = file(params.genomes[params.genome].GERMLINEHOTSPOTS) -} - -//mm10 Paired-Sequenza, FREEC-tumor only -process seqz_sequenza_bychr { - container = "${params.containers.logan}" - label 'process_long' - - input: - tuple val(pairid), val(tumorname), path(tumor), path(tumorbai), - val(normalname), path(normal), path(normalbai), val(chr) - - output: - tuple val(pairid), path("${tumorname}_${normalname}_${chr}.seqz.gz") - - script: - """ - sequenza-utils bam2seqz \ - -gc ${SEQUENZAGC} \ - -F $GENOMEREF \ - -C ${chr} \ - -n ${normal} \ - -t ${tumor} | gzip > "${tumorname}_${normalname}_${chr}.seqz.gz" - - """ - - stub: - """ - touch "${tumorname}_${normalname}_${chr}.seqz.gz" - """ -} - -process pileup_sequenza { - container = "${params.containers.logan}" - label 'process_low' - - input: - tuple val(pairid), val(name), - path(bam), path(bai), path(bed) - - output: - tuple val(pairid), path("${name}_${bed}.mpileup.gz"), path("${name}_${bed}.mpileup.gz.tbi") - - script: - //Q20 is default in sequenza - """ - samtools mpileup -f $GENOMEREF -R ${bed} -Q 20 ${bam} |gzip > ${name}_${bed}.mpileup.gz - tabix -s1 -b2 -e2 ${name}_${bed}.mpileup.gz - """ - - stub: - """ - touch "${name}_${bed}.mpileup.gz" - touch "${name}_${bed}.mpileup.gz.tbi" - """ -} - -process seqz_sequenza_reg { - container = "${params.containers.logan}" - label 'process_low' - - input: - tuple val(pairid), val(tumorname), path(tumor), path(tumorbai), - val(normalname), path(normal), path(normalbai), path(bed) - - output: - tuple val(pairid), path("${tumorname}_${normalname}_${chr}.seqz.gz") - - script: - """ - sequenza-utils bam2seqz \ - -gc ${SEQUENZAGC} \ - -p \ - -F $GENOMEREF \ - -n ${normal} \ - -t ${tumor} | gzip > "${tumorname}_${normalname}_${bed}.seqz.gz" - - """ - - stub: - """ - touch "${tumorname}_${normalname}_${chr}.seqz.gz" - """ -} - -process seqz_sequenza { - container = "${params.containers.logan}" - label 'process_low' - - input: - tuple val(pairid), val(tumorname), path(tumor), path(tumorbai), - val(normalname), path(normal), path(normalbai), path(bed) - - output: - tuple val(pairid), path("${tumorname}_${normalname}_${chr}.seqz.gz") - - script: - """ - sequenza-utils bam2seqz \ - -gc ${SEQUENZAGC} \ - -p \ - -F $GENOMEREF \ - -n ${normal} \ - -t ${tumor} | gzip > "${tumorname}_${normalname}_${bed}.seqz.gz" - - """ - - stub: - """ - touch "${tumorname}_${normalname}_${chr}.seqz.gz" - """ -} - - - - -process sequenza { - container = "${params.containers.logan}" - label 'process_medium' - - input: - tuple val(pairid), path(seqz) - - output: - tuple val(pairid), - path("${pairid}_alternative_solutions.txt"), - path("${pairid}_alternative_fit.pdf"), - path("${pairid}_model_fit.pdf"), - path("${pairid}_confints_CP.txt"), - path("${pairid}_CN_bars.pdf"), - path("${pairid}_genome_view.pdf"), - path("${pairid}_chromosome_view.pdf"), - path("${pairid}_mutations.txt"), - path("${pairid}_segments.txt"), - path("${pairid}_CP_contours.pdf"), - path("${pairid}_sequenza_cp_table.RData"), - path("${pairid}_chromosome_depths.pdf"), - path("${pairid}_gc_plots.pdf"), - path("${pairid}_sequenza_extract.RData") - - - shell: - ''' - - zcat !{seqz} | awk '{if (NR==1) {print $0} else {if ($1!="chromosome"){print $0}}}' |\ - sequenza-utils seqz_binning \ - -w 100 \ - -s - > !{pairid}.bin100.seqz - - Rscript !{SEQUENZA_SCRIPT} \ - !{pairid}.bin100.seqz \ - . \ - !{pairid} \ - !{task.cpus} - - ''' - - stub: - - """ - touch "${pairid}_alternative_solutions.txt" - touch "${pairid}_alternative_fit.pdf" - touch "${pairid}_model_fit.pdf" - touch "${pairid}_confints_CP.txt" - touch "${pairid}_CN_bars.pdf" - touch "${pairid}_genome_view.pdf" - touch "${pairid}_chromosome_view.pdf" - touch "${pairid}_mutations.txt" - touch "${pairid}_segments.txt" - touch "${pairid}_CP_contours.pdf" - touch "${pairid}_sequenza_cp_table.RData" - touch "${pairid}_chromosome_depths.pdf" - touch "${pairid}_gc_plots.pdf" - touch "${pairid}_sequenza_extract.RData" - - """ - -} - - -process freec_paired { - container = "${params.containers.logan}" - label 'process_long' - - input: - tuple val(tumorname), path(tumor), path(tumorbai), - val(normalname), path(normal), path(normalbai) - - output: - tuple val(tumorname), val(normalname), - path("${tumorname}_vs_${normalname}_CNVs.p.value.txt"), - path("${tumorname}_vs_${normalname}_ratio.txt"), - path("${tumorname}_vs_${normalname}_BAF.txt"), - path("${tumorname}_vs_${normalname}_ratio.txt.log2.png"), - path("${tumorname}_vs_${normalname}_ratio.txt.png") - - shell: - """ - - perl $FREECPAIR_SCRIPT \ - . \ - $FREECLENGTHS \ - $FREECCHROMS \ - ${tumor} \ - ${normal} \ - $FREECPILEUP \ - $GENOMEREF \ - $FREECSNPS \ - $FREECTARGETS - - freec -conf freec_genome_config.txt - - cat $FREECSIGNIFICANCE | \ - R --slave \ - --args ${tumor}_CNVs \ - ${tumor}_ratio.txt - - cat $FREECPLOT | \ - R --slave \ - --args 2 \ - ${tumor}_ratio.txt \ - ${tumor}_BAF.txt - - mv ${tumor}_CNVs.p.value.txt ${tumorname}_vs_${normalname}_CNVs.p.value.txt - mv ${tumor}_ratio.txt ${tumorname}_vs_${normalname}_ratio.txt - mv ${tumor}_BAF.txt ${tumorname}_vs_${normalname}_BAF.txt - mv ${tumor}_BAF.txt.png ${tumorname}_vs_${normalname}_BAF.txt.png - mv ${tumor}_ratio.txt.log2.png ${tumorname}_vs_${normalname}_ratio.txt.log2.png - mv ${tumor}_ratio.txt.png ${tumorname}_vs_${normalname}_ratio.txt.png - - """ - - stub: - """ - touch ${tumorname}_vs_${normalname}_CNVs.p.value.txt - touch ${tumorname}_vs_${normalname}_ratio.txt - touch ${tumorname}_vs_${normalname}_BAF.txt - touch ${tumorname}_vs_${normalname}_BAF.txt.png - touch ${tumorname}_vs_${normalname}_ratio.txt.log2.png - touch ${tumorname}_vs_${normalname}_ratio.txt.png - - """ -} - - -process freec { - container = "${params.containers.logan}" - label 'process_medium' - - input: - tuple val(tumorname), path(tumor), path(tumorbai) - - output: - tuple val(tumorname), - path("${tumorname}_CNVs.p.value.txt"), - path("${tumorname}_ratio.txt"), - path("${tumorname}_BAF.txt"), - path("${tumorname}_ratio.txt.log2.png"), - path("${tumorname}_ratio.txt.png") - - - shell: """ - - perl $FREECSCRIPT \ - . \ - $FREECLENGTHS \ - $FREECCHROMS \ - ${tumor} \ - $FREECPILEUP \ - $GENOMEREF \ - $FREECSNPS \ - $FREECTARGETS - - freec -conf freec_genome_config.txt - - cat $FREECSIGNIFICANCE | \ - R --slave \ - --args ${tumor}_CNVs \ - ${tumor}_ratio.txt - - cat $FREECPLOT | \ - R --slave \ - --args 2 \ - ${tumor}_ratio.txt \ - ${tumor}_BAF.txt - - mv ${tumor}_CNVs.p.value.txt ${tumorname}_CNVs.p.value.txt - mv ${tumor}_ratio.txt ${tumorname}_ratio.txt - mv ${tumor}_BAF.txt ${tumorname}_BAF.txt - mv ${tumor}_BAF.txt.png ${tumorname}_BAF.txt.png - mv ${tumor}_ratio.txt.log2.png ${tumorname}_ratio.txt.log2.png - mv ${tumor}_ratio.txt.png ${tumorname}_ratio.txt.png - - """ - - stub: - """ - touch ${tumorname}_CNVs.p.value.txt - touch ${tumorname}_ratio.txt - touch ${tumorname}_BAF.txt - touch ${tumorname}_BAF.txt.png - touch ${tumorname}_ratio.txt.log2.png - touch ${tumorname}_ratio.txt.png - - """ -} - - -process amber_tonly { - container = "${params.containers.logan}" - - label 'process_medium' - - input: - tuple val(tumorname), path(tumor), path(tumorbai) - - - output: - tuple val(tumorname), path("${tumorname}_amber") - - script: - - """ - - java -Xmx32G -cp /opt2/hmftools/amber.jar com.hartwig.hmftools.amber.AmberApplication \ - -tumor ${tumorname} -tumor_bam ${tumor} \ - -output_dir ${tumorname}_amber \ - -threads $task.cpus \ - -ref_genome_version $GENOMEVER \ - -loci $GERMLINEHET - - """ - - stub: - - """ - mkdir ${tumorname}_amber - touch ${tumorname}_amber/${tumorname}.amber.baf.tsv.gz ${tumorname}_amber/${tumorname}.amber.baf.pcf ${tumorname}_amber/${tumorname}.amber.qc - """ -} - -process amber_tn { - container = "${params.containers.logan}" - - label 'process_medium' - - input: - tuple val(tumorname), path(tumor), path(tumorbai), - val(normalname), path(normal), path(normalbai) - - output: - tuple val("${tumorname}_vs_${normalname}"), - val(tumorname), val(normalname), path("${tumorname}_vs_${normalname}_amber") - - script: - - """ - - java -Xmx32G -cp /opt2/hmftools/amber.jar com.hartwig.hmftools.amber.AmberApplication \ - -tumor ${tumorname} -tumor_bam ${tumor} \ - -reference ${normalname} -reference_bam ${normal} \ - -output_dir ${tumorname}_vs_${normalname}_amber \ - -threads $task.cpus \ - -ref_genome_version $GENOMEVER \ - -loci $GERMLINEHET - - """ - - stub: - - """ - mkdir ${tumorname}_vs_${normalname}_amber - touch ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.baf.tsv.gz ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.baf.pcf ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.qc - """ -} - -process cobalt_tonly { - container = "${params.containers.logan}" - label 'process_medium' - - input: - tuple val(tumorname), path(tumor), path(tumorbai) - - output: - tuple val(tumorname), path("${tumorname}_cobalt") - - script: - - """ - - java -jar -Xmx8G /opt2/hmftools/cobalt.jar \ - -tumor ${tumorname} -tumor_bam ${tumor} \ - -output_dir ${tumorname}_cobalt \ - -threads $task.cpus \ - -tumor_only_diploid_bed $DIPLODREG \ - -gc_profile $GCPROFILE - - """ - - stub: - - """ - mkdir ${tumorname}_cobalt - touch ${tumorname}_cobalt/${tumorname}.cobalt.ratio.tsv.gz ${tumorname}_cobalt/${tumorname}.cobalt.ratio.pcf ${tumorname}_cobalt/${tumorname}.cobalt.gc.median.tsv - """ -} - -process cobalt_tn { - container = "${params.containers.logan}" - label 'process_medium' - - input: - tuple val(tumorname), path(tumor), path(tumorbai), - val(normalname), path(normal), path(normalbai) - - output: - tuple val("${tumorname}_vs_${normalname}"), - val(tumorname), val(normalname), path("${tumorname}_vs_${normalname}_cobalt") - - script: - - """ - java -jar -Xmx8G /opt2/hmftools/cobalt.jar \ - -tumor ${tumorname} -tumor_bam ${tumor} \ - -reference ${normalname} -reference_bam ${normal} \ - -output_dir ${tumorname}_vs_${normalname}_cobalt \ - -threads $task.cpus \ - -gc_profile $GCPROFILE - - """ - - stub: - - """ - mkdir ${tumorname}_vs_${normalname}_cobalt - touch ${tumorname}_vs_${normalname}_cobalt/${tumorname}.cobalt.ratio.tsv.gz ${tumorname}_vs_${normalname}_cobalt/${tumorname}.cobalt.ratio.pcf ${tumorname}_vs_${normalname}_cobalt/${tumorname}.cobalt.gc.median.tsv - """ -} - - -process purple { - container = "${params.containers.logan}" - label 'process_medium' - - input: - tuple val(id), val(tumorname), val(normalname), - path(amberin), path(cobaltin), - path(somaticvcf), path(somaticvcfindex) - - output: - tuple val(id), path("${id}") - - script: - - """ - java -jar /opt2/hmftools/purple.jar \ - -tumor ${tumorname} \ - -reference ${normalname} \ - -amber ${amberin} \ - -cobalt ${cobaltin} \ - -gc_profile $GCPROFILE \ - -ref_genome_version $GENOMEVER \ - -ref_genome $GENOMEREF \ - $ENSEMBLCACHE \ - -somatic_vcf ${somaticvcf} \ - -driver_gene_panel $DRIVERS \ - -somatic_hotspots $SOMATICHOTSPOTS \ - -threads $task.cpus \ - -output_dir ${id} - """ - - stub: - - """ - mkdir ${id} - touch ${id}/${id}.purple.cnv.somatic.tsv ${id}/${id}.purple.cnv.gene.tsv ${id}/${id}.driver.catalog.somatic.tsv - """ - -} - - -process purple_novc { - container = "${params.containers.logan}" - label 'process_medium' - - input: - tuple val(id), val(tumorname), val(normalname), - path(amberin), path(cobaltin) - - output: - tuple val(id), val(tumorname), val(normalname), path("${id}") - - script: - - """ - java -jar /opt2/hmftools/purple.jar \ - -tumor ${tumorname} \ - -reference ${normalname} \ - -amber ${amberin} \ - -cobalt ${cobaltin} \ - -gc_profile $GCPROFILE \ - -ref_genome_version $GENOMEVER \ - -ref_genome $HMFGENOMEREF \ - $ENSEMBLCACHE \ - -threads $task.cpus \ - -output_dir ${id} - - """ - - stub: - - """ - mkdir ${tumorname} - touch ${tumorname}/${tumorname}.purple.cnv.somatic.tsv ${tumorname}/${tumorname}.purple.cnv.gene.tsv ${tumorname}/${tumorname}.driver.catalog.somatic.tsv - """ - -} - - -process purple_tonly { - container = "${params.containers.logan}" - label 'process_medium' - - input: - tuple val(tumorname), - path(amberin), path(cobaltin), - path(somaticvcf), path(somaticvcfindex) - - output: - tuple val(tumorname), path("${tumorname}") - - script: - - """ - java -jar /opt2/hmftools/purple.jar \ - -tumor ${tumorname} \ - -amber ${amberin} \ - -cobalt ${cobaltin} \ - -gc_profile $GCPROFILE \ - -ref_genome_version $GENOMEVER \ - -ref_genome $GENOMEREF \ - $ENSEMBLCACHE \ - -somatic_vcf ${somaticvcf} \ - -driver_gene_panel $DRIVERS \ - -somatic_hotspots $HOTSPOTS \ - -threads $task.cpus \ - -output_dir ${tumorname} - """ - - stub: - - """ - mkdir ${tumorname} - touch ${tumorname}/${tumorname}.purple.cnv.somatic.tsv ${tumorname}/${tumorname}.purple.cnv.gene.tsv ${tumorname}/${tumorname}.driver.catalog.somatic.tsv - """ - -} - - -process purple_tonly_novc { - container = "${params.containers.logan}" - label 'process_medium' - - input: - tuple val(tumorname), val(normalname), - path(cobaltin), path(amberin) - - output: - tuple val(tumorname), path("${tumorname}") - - script: - - """ - java -jar /opt2/hmftools/purple.jar \ - -tumor ${tumorname} \ - -amber ${amberin} \ - -cobalt ${cobaltin} \ - -gc_profile $GCPROFILE \ - -ref_genome_version $GENOMEVER \ - -ref_genome $GENOMEREF \ - $ENSEMBLCACHE \ - -threads $task.cpus \ - -output_dir ${tumorname} - """ - - stub: - - """ - mkdir ${tumorname} - touch ${tumorname}/${tumorname}.purple.cnv.somatic.tsv ${tumorname}/${tumorname}.purple.cnv.gene.tsv ${tumorname}/${tumorname}.driver.catalog.somatic.tsv - """ - -} - -/* -process ascat_tn { - module=["java/12.0.1","R/3.6.3"] - - input: - tuple val(samplename), path(cobaltin), path(amberin), path("${samplename}.tonly.final.mut2.vcf.gz") - - output: - tuple val(samplename), path("${samplename}") - - script: - - """ - Rscript ${ascatR} - """ - - stub: - - """ - touch ${prefix}.after_correction.gc_rt.test.tumour.germline.png - touch ${prefix}.after_correction.gc_rt.test.tumour.tumour.png - touch ${prefix}.before_correction.test.tumour.germline.png - touch ${prefix}.before_correction.test.tumour.tumour.png - touch ${prefix}.cnvs.txt - touch ${prefix}.metrics.txt - touch ${prefix}.normal_alleleFrequencies_chr21.txt - touch ${prefix}.normal_alleleFrequencies_chr22.txt - touch ${prefix}.purityploidy.txt - touch ${prefix}.segments.txt - touch ${prefix}.tumour.ASPCF.png - touch ${prefix}.tumour.sunrise.png - touch ${prefix}.tumour_alleleFrequencies_chr21.txt - touch ${prefix}.tumour_alleleFrequencies_chr22.txt - touch ${prefix}.tumour_normalBAF.txt - touch ${prefix}.tumour_normalLogR.txt - touch ${prefix}.tumour_tumourBAF.txt - touch ${prefix}.tumour_tumourLogR.txt - """ - -} - -*/ diff --git a/modules/local/deepsomatic.nf b/modules/local/deepsomatic.nf new file mode 100644 index 0000000..529254d --- /dev/null +++ b/modules/local/deepsomatic.nf @@ -0,0 +1,210 @@ +GENOMEREF=file(params.genomes[params.genome].genome) + +if(params.exome && params.ffpe) { + DS_MODEL = "/opt/models/deepsomatic/ffpe_wes" +}else if(params.ffpe){ + DS_MODEL = "/opt/models/deepsomatic/ffpe_wgs" +}else if(params.exome){ + DS_MODEL = "/opt/models/deepsomatic/wes" +}else{ + DS_MODEL = "/opt/models/deepsomatic/wgs" +} + +process deepsomatic_tn_step1 { + container = "${params.containers.deepsomatic}" + label 'process_somaticcaller' + + input: + tuple val(tname), path(tbam), path(tbai), + val(nname), path(nbam), path(nbai), + path(bed) + + output: + tuple val(tname), val(nname), + path("${tname}_vs_${nname}.tfrecord_${bed}.gz"), + path("${tname}_vs_${nname}.tfrecord_${bed}.gz.example_info.json"), + path(bed) + + script: + """ + make_examples_somatic \ + --mode calling \ + --ref $GENOMEREF \ + --regions ${bed} \ + --checkpoint $DS_MODEL \ + --population_vcfs "/opt/models/deepsomatic/pons/AF_ilmn_PON_DeepVariant.GRCh38.AF0.05.vcf.gz" \ + --vsc_max_fraction_indels_for_non_target_sample "0.5" --vsc_max_fraction_snps_for_non_target_sample "0.5" --vsc_min_fraction_indels "0.05" --vsc_min_fraction_snps "0.029" \ + --reads_tumor ${tbam} \ + --reads_normal ${nbam} \ + --sample_name_tumor ${tname} \ + --sample_name_normal ${nname} \ + --examples ${tname}_vs_${nname}.tfrecord_${bed}.gz \ + """ + + stub: + """ + touch ${tname}_vs_${nname}.tfrecord_${bed}.gz + touch ${tname}_vs_${nname}.tfrecord_${bed}.gz.example_info.json + """ + +} + + +process deepsomatic_tonly_step1 { + container = "${params.containers.deepsomatic}" + label 'process_somaticcaller' + + input: + tuple val(tname), path(tbam), path(tbai), + path(bed) + + output: + tuple val(tname), + path("${tname}.tfrecord_${bed}.gz"), + path("${tname}.tfrecord_${bed}.gz.example_info.json"), + path(bed) + + script: + """ + make_examples_somatic \ + --mode calling \ + --ref $GENOMEREF \ + --regions ${bed} \ + --checkpoint /opt/models/deepsomatic/wgs_tumor_only \ + --population_vcfs "/opt/models/deepsomatic/pons/AF_ilmn_PON_DeepVariant.GRCh38.AF0.05.vcf.gz" \ + --vsc_max_fraction_indels_for_non_target_sample "0.5" --vsc_max_fraction_snps_for_non_target_sample "0.5" --vsc_min_fraction_indels "0.07" --vsc_min_fraction_snps "0.05" \ + --reads_tumor ${tbam} \ + --sample_name_tumor ${tname} \ + --examples ${tname}.tfrecord_${bed}.gz + """ + + stub: + """ + touch ${tname}.tfrecord_${bed}.gz + touch ${tname}.tfrecord_${bed}.gz.example_info.json + """ + +} + + +//Step 2 can run in CPU or GPU mode for now use only CPUs +process deepsomatic_step2 { + container = "${params.containers.deepsomatic}" + label 'process_somaticcaller' + errorStrategy { task.exitStatus == 1 ? 'ignore' : 'terminate' } + + input: + tuple val(samplename), path(tfrecords), path(json), path(bed) + + output: + tuple val(samplename), + path(tfrecords), + path("outds/*"), path(bed) + + script: + sub_cpus = "$task.cpus".toInteger() - 1 + + """ + mkdir -p outds/ + call_variants \ + --examples $tfrecords \ + --outfile outds/${samplename}_call_variants_output.tfrecord.gz \ + --checkpoint $DS_MODEL \ + --num_readers $sub_cpus + """ + + stub: + """ + mkdir -p outds + touch "outds/${samplename}_call_variants_output.tfrecord.gz" + """ +} + +process deepsomatic_tonly_step2 { + container = "${params.containers.deepsomatic}" + label 'process_somaticcaller' + errorStrategy { task.exitStatus == 1 ? 'ignore' : 'terminate' } + + input: + tuple val(samplename), path(tfrecords), path(json), path(bed) + + output: + tuple val(samplename), + path(tfrecords), + path("outds/*"), path(bed) + + script: + sub_cpus = "$task.cpus".toInteger() - 1 + + """ + mkdir -p outds/ + call_variants \ + --examples $tfrecords \ + --outfile outds/${samplename}_call_variants_output.tfrecord.gz \ + --checkpoint /opt/models/deepsomatic/wgs_tumor_only \ + --num_readers $sub_cpus + """ + + stub: + """ + mkdir -p outds + touch "outds/${samplename}_call_variants_output.tfrecord.gz" + """ +} + + +//Step 3 DV +process deepsomatic_step3 { + container = "${params.containers.deepsomatic}" + label 'process_somaticcaller' + + input: + tuple val(samplename), path(tfrecords), + path("outds/*"), path(bed) + + output: + tuple val(samplename), path("${samplename}_${bed}.vcf.gz"), path("${samplename}_${bed}.vcf.gz.tbi") + + + script: + """ + postprocess_variants \ + --ref $GENOMEREF \ + -j $task.cpus \ + --process_somatic=true --pon_filtering "/opt/models/deepsomatic/pons/PON_dbsnp138_gnomad_ILMN1000g_pon.vcf.gz" \ + --infile outds/${samplename}_call_variants_output.tfrecord.gz \ + --outfile ${samplename}_${bed}.vcf.gz + """ + + stub: + """ + touch ${samplename}_${bed}.vcf.gz ${samplename}_${bed}.vcf.gz.tbi + """ + +} + + +process bcfconcat { + container = "${params.containers.logan}" + label 'process_low' + + input: + tuple val(samplename), path(vcf), path(index), val(type) + + output: + tuple val(samplename), path("${samplename}.${type}.gz"), path("${samplename}.${type}.gz.tbi") + + script: + vcfin=vcf.join(" ") + + """ + bcftools concat $vcfin --write-index -Oz -o ${samplename}.${type}.gz##idx##${samplename}.${type}.gz.tbi + """ + + stub: + """ + touch ${samplename}.${type}.gz ${samplename}.${type}.gz.tbi + """ + +} + diff --git a/modules/local/deepvariant.nf b/modules/local/deepvariant.nf new file mode 100644 index 0000000..1109655 --- /dev/null +++ b/modules/local/deepvariant.nf @@ -0,0 +1,205 @@ +GENOMEREF=file(params.genomes[params.genome].genome) +MODEL="/opt/models/wgs/" + + +//Processes +//Deep Variant +process deepvariant_step1 { + container = "${params.containers.deepvariant}" + label 'process_somaticcaller' + + input: + tuple val(samplename), path(bam), path(bai), path(bed) + + output: + tuple val(samplename), path("${samplename}.tfrecord_${bed}.gz"), + path("${samplename}.tfrecord_${bed}.gz.example_info.json"), + path("${samplename}.gvcf.tfrecord_${bed}.gz"), path(bed) + + script: + """ + make_examples \ + --mode calling \ + --ref $GENOMEREF \ + --regions ${bed} \ + --reads ${bam} \ + --channels insert_size \ + --examples ${samplename}.tfrecord_${bed}.gz \ + --gvcf ${samplename}.gvcf.tfrecord_${bed}.gz + """ + + stub: + """ + touch ${samplename}.tfrecord_${bed}.gz + touch ${samplename}.tfrecord_${bed}.gz.example_info.json + touch ${samplename}.gvcf.tfrecord_${bed}.gz + """ + +} + +//Step 2 requires GPU +process deepvariant_step2 { + container = "${params.containers.deepvariant}" + //clusterOptions '--gres=lscratch:100,gpu:p100:1 --partition=gpu' + label 'process_somaticcaller' + + input: + tuple val(samplename), path(tfrecords), path(json), path(tfgvcf), path(bed) + + output: + tuple val(samplename), path(tfrecords), + path(tfgvcf), path("outdv/*"), path(bed) + + script: + sub_cpus = "$task.cpus".toInteger() - 1 + + """ + mkdir -p outdv/ + call_variants \ + --examples $tfrecords \ + --outfile outdv/${samplename}_call_variants_output.tfrecord.gz \ + --checkpoint $MODEL \ + --writer_threads $sub_cpus + """ + + stub: + """ + mkdir -p outdv + touch "outdv/${samplename}_call_variants_output.tfrecord.gz" + """ +} + + +//Step 3 DV +process deepvariant_step3 { + container = "${params.containers.deepvariant}" + label 'process_somaticcaller' + + input: + tuple val(samplename), path(tfrecords), + path(tfgvcf), path("outdv/*"), path(bed) + + output: + tuple val(samplename), path("${samplename}_${bed}.vcf.gz"), path("${samplename}_${bed}.vcf.gz.tbi"), + path("${samplename}_${bed}.gvcf.gz"), path("${samplename}_${bed}.gvcf.gz.tbi") + + + script: + """ + postprocess_variants \ + --ref $GENOMEREF \ + --infile outdv/${samplename}_call_variants_output.tfrecord.gz \ + --outfile ${samplename}_${bed}.vcf.gz \ + --gvcf_outfile ${samplename}_${bed}.gvcf.gz \ + --nonvariant_site_tfrecord_path . + """ + + stub: + """ + touch ${samplename}_${bed}.vcf.gz ${samplename}_${bed}.vcf.gz.tbi + touch ${samplename}_${bed}.gvcf.gz ${samplename}_${bed}.gvcf.gz.tbi + + """ + +} + + +process bcfconcat { + container = "${params.containers.logan}" + label 'process_low' + + input: + tuple val(samplename), path(vcf), path(index), val(type) + + output: + tuple val(samplename), path("${samplename}.${type}.gz"), path("${samplename}.${type}.gz.tbi") + + script: + vcfin=vcf.join(" ") + + """ + bcftools concat $vcfin --write-index -Oz -o ${samplename}.${type}.gz##idx##${samplename}.${type}.gz.tbi + """ + + stub: + """ + touch ${samplename}.${type}.gz ${samplename}.${type}.gz.tbi + """ + +} + + +process glnexus { + container = "${params.containers.logan}" + label 'process_medium' + + input: + path(gvcfs) + + output: + tuple path("germline.v.bcf"), + path("germline.norm.vcf.gz"), path("germline.norm.vcf.gz.tbi") + + script: + + """ + glnexus_cli --config DeepVariant_unfiltered \ + *.gvcf.gz --threads 8 > germline.v.bcf + + bcftools norm \ + -m - \ + -Oz \ + --threads 8 \ + -f $GENOMEREF \ + -o germline.norm.vcf.gz \ + germline.v.bcf + + bcftools index \ + -f -t \ + --threads 8 \ + germline.norm.vcf.gz + """ + + stub: + """ + touch germline.v.bcf + touch germline.norm.vcf.gz + touch germline.norm.vcf.gz.tbi + """ +} + + + + +//Combined DeepVariant +process deepvariant_combined { + module = ['deepvariant/1.6.0'] + + input: + tuple val(samplename), path(bam), path(bai) + + output: + tuple val(samplename), path("${samplename}.gvcf.gz"), path("${samplename}.gvcf.gz.tbi"), + path("${samplename}.vcf.gz"), path("${samplename}.vcf.gz.tbi") + + + script: + """ + run_deepvariant \ + --model_type=WGS \ + --ref=$GENOMEREF \ + --reads=${bam} \ + --output_gvcf=${samplename}.gvcf.gz \ + --output_vcf=${samplename}.vcf.gz \ + --num_shards=16 + """ + + + stub: + """ + touch ${samplename}.vcf.gz ${samplename}.vcf.gz.tbi + touch ${samplename}.gvcf.gz ${samplename}.gvcf.gz.tbi + """ + + +} \ No newline at end of file diff --git a/modules/local/fastq_screen.nf b/modules/local/fastq_screen.nf new file mode 100644 index 0000000..9f88fb9 --- /dev/null +++ b/modules/local/fastq_screen.nf @@ -0,0 +1,43 @@ +FASTQ_SCREEN_CONF=file(params.fastq_screen_conf) + +process fastq_screen { + //Uses Trimmed Files + container = "${params.containers.loganqc}" + label 'process_medium' + + input: + tuple val(samplename), + path("${samplename}.R1.trimmed.fastq.gz"), + path("${samplename}.R2.trimmed.fastq.gz"), + path("${samplename}.fastp.json"), + path("${samplename}.fastp.html") + + output: + tuple path("${samplename}.R1.trimmed_screen.html"), + path("${samplename}.R1.trimmed_screen.png"), + path("${samplename}.R1.trimmed_screen.txt"), + path("${samplename}.R2.trimmed_screen.html"), + path("${samplename}.R2.trimmed_screen.png"), + path("${samplename}.R2.trimmed_screen.txt") + + script: + FASTQ_SCREEN_CONF=file(params.fastq_screen_conf) + + """ + fastq_screen --conf $FASTQ_SCREEN_CONF \ + --outdir . \ + --threads 8 \ + --subset 1000000 \ + --aligner bowtie2 \ + --force \ + ${samplename}.R1.trimmed.fastq.gz ${samplename}.R2.trimmed.fastq.gz + + """ + + stub: + """ + touch ${samplename}.R1.trimmed_screen.html ${samplename}.R1.trimmed_screen.png + touch ${samplename}.R1.trimmed_screen.txt ${samplename}.R2.trimmed_screen.html + touch ${samplename}.R2.trimmed_screen.png ${samplename}.R2.trimmed_screen.txt + """ +} \ No newline at end of file diff --git a/modules/local/fastqc.nf b/modules/local/fastqc.nf new file mode 100644 index 0000000..af65c62 --- /dev/null +++ b/modules/local/fastqc.nf @@ -0,0 +1,36 @@ + +process fastqc { + """ + Quality-control step to assess sequencing quality of each sample. + FastQC generates a set of basic statistics to identify problems + that can arise during sequencing or library preparation. + @Input: + Recalibrated BAM file (scatter) + @Output: + FastQC report and zip file containing sequencing quality information + """ + container = "${params.containers.loganqc}" + label 'process_medium' + + input: + tuple val(samplename), path(bam), path(bai) + output: + tuple val(samplename), path("${samplename}_fastqc.html"), path("${samplename}_fastqc.zip") + + script: + + """ + mkdir -p fastqc + fastqc -t 8 \ + -f bam \ + -o fastqc \ + $bam + mv fastqc/${samplename}.bqsr_fastqc.html ${samplename}_fastqc.html + mv fastqc/${samplename}.bqsr_fastqc.zip ${samplename}_fastqc.zip + """ + + stub: + """ + touch ${samplename}_fastqc.html ${samplename}_fastqc.zip + """ +} diff --git a/modules/local/fc_lane.nf b/modules/local/fc_lane.nf new file mode 100644 index 0000000..d5adff5 --- /dev/null +++ b/modules/local/fc_lane.nf @@ -0,0 +1,25 @@ +process fc_lane { + container = "${params.containers.logan}" + label 'process_low' + + input: + tuple val(samplename), path(fqs) + + output: + tuple val(samplename), + path("${samplename}.fastq.info.txt") + + script: + GET_FLOWCELL_LANES=file(params.get_flowcell_lanes) + + """ + python $GET_FLOWCELL_LANES \ + ${fqs[0]} \ + ${samplename} > ${samplename}.fastq.info.txt + """ + + stub: + """ + touch ${samplename}.fastq.info.txt + """ +} diff --git a/modules/local/ffpe.nf b/modules/local/ffpe.nf new file mode 100644 index 0000000..ff6ac78 --- /dev/null +++ b/modules/local/ffpe.nf @@ -0,0 +1,169 @@ + +process sobdetect_pass1 { + container = "${params.containers.ffpe}" + label 'process_medium' + + input: + tuple val(sample), path(vcf), path(bam), val(vc) + + output: + tuple val(sample), + path("${vc}/pass1/${sample}.pass1.sobdetect.vcf"), + path("${vc}/pass1/${sample}.info") + + script: + """ + mkdir -p ${vc}/pass1 + java -jar $SOB_JAR \ + --input-type VCF \ + --input-variants ${vcf} \ + --input-bam ${bam} \ + --output-variants ${sample}.pass1.sobdetect.vcf \ + --only-passed false + + bcftools query \ + -f '%INFO/numF1R2Alt\t%INFO/numF2R1Alt\t%INFO/numF1R2Ref\t%INFO/numF2R1Ref\t%INFO/numF1R2Other\t%INFO/numF2R1Other\t%INFO/SOB\n' \ + ${sample}.sobdetect.vcf \ + | awk '{if (\$1 != "."){tum_alt=\$1+\$2; tum_depth=\$1+\$2+\$3+\$4+\$5+\$6; if (tum_depth==0){tum_af=1} else {tum_af=tum_alt/tum_depth }; print tum_alt,tum_depth,tum_af,\$7}}' \ + > ${sample}.info + + mv ${sample}.pass1.sobdetect.vcf ${vc}/pass1 + mv ${sample}.info ${vc}/pass1 + + """ + + stub: + """ + mkdir -p ${vc}/pass1 + touch ${vc}/pass1/${sample}.pass1.sobdetect.vcf + touch ${vc}/pass1/${sample}.info + """ +} + + + +// Cohort parameter calculation +process sobdetect_cohort_params { + container = "${params.containers.ffpe}" + label 'process_medium' + + input: + path info_files + + output: + tuple path("all_samples.info"), path("cohort_params.txt") + + script: + allinfos = info_file.join(" ") + + """ + echo -e "#TUMOR.alt\tTUMOR.depth\tTUMOR.AF\tSOB\tFS\tSOR\tTLOD\tReadPosRankSum" > all_samples.info + cat ${allinfos} >> all_samples.info + + + grep -v '^#' all_samples.info \ + | awk '{ total1 += \$1; ss1 += \$1^2; total2 += \$2; ss2 += \$2^2; total3 += \$3; ss3 += \$3^2; total4 += \$4; ss4 += \$4^2 } END { print total1/NR,total2/NR,total3/NR,total4/NR; print sqrt(ss1/NR-(total1/NR)^2),sqrt(ss2/NR-(total2/NR)^2),sqrt(ss3/NR-(total3/NR)^3),sqrt(ss4/NR-(total4/NR)^2) }' > cohort_params.txt + """ + + stub: + """ + touch all_samples.info cohort_params.txt + """ +} + +process sobdetect_pass2 { + container = "${params.containers.ffpe}" + label 'process_medium' + + input: + tuple val(sample), path(vcf), path(bam), val(vc), path(sample_info), path(params_file) + + output: + tuple val(sample), + path("${vc}/pass2/${sample}.pass2.sobdetect.vcf"), + path("${vc}/pass2/${sample}.info"), + path("${vc}/pass2/${sample}_${vc}.artifact_filtered.vcf.gz"), + path("${vc}/pass2/${sample}_${vc}.artifact_filtered.vcf.gz.tbi") + + script: + """ + mkdir -p ${vc}/pass2 + java -jar $SOB_JAR \ + --input-type VCF \ + --input-variants ${vcf} \ + --input-bam ${bam} \ + --output-variants pass2_output.vcf \ + --only-passed true \ + --standardization-parameters ${params_file} + + bcftools query \ + -f '%INFO/numF1R2Alt\t%INFO/numF2R1Alt\t%INFO/numF1R2Ref\t%INFO/numF2R1Ref\t%INFO/numF1R2Other\t%INFO/numF2R1Other\t%INFO/SOB\n' \ + pass2_output.vcf \ + | awk '{if (\$1 != "."){tum_alt=\$1+\$2; tum_depth=\$1+\$2+\$3+\$4+\$5+\$6; if (tum_depth==0){tum_af=1} else {tum_af=tum_alt/tum_depth }; print tum_alt,tum_depth,tum_af,\$7}}' \ + > F{sample}.info + + # Artifact filtering + bcftools filter \ + -e 'INFO/pArtifact < 0.05' \ + -Oz \ + -o ${sample}.artifact_filtered.vcf.gz ${sample}.sobdetect.vcf + + bcftools index -f -t ${sample}.artifact_filtered.vcf.gz + + mv ${sample}.pass2.sobdetect.vcf ${vc}/pass2 + mv ${sample}.info ${vc}/pass2 + mv ${sample}.artifact_filtered.vcf.gz ${vc}/pass2 + """ + + stub: + """ + mkdir -p ${vc}/pass2 + touch ${vc}/pass2/${sample}.pass2.sobdetect.vcf + touch ${vc}/pass2/${sample}.info + touch ${vc}/pass2/${sample}_${vc}.artifact_filtered.vcf.gz + touch ${vc}/pass2/${sample}_${vc}.artifact_filtered.vcf.gz.tbi + + """ +} + +// Metrics calculation +process sobdetect_metrics { + container = "${params.containers.ffpe}" + label 'process_medium' + + input: + path (pass1_vcfs) + path (pass2_vcfs) + + output: + tuple path("variant_count_table.txt"), + path("all_metrics.txt") + + script: + """ + echo -e "#ID\tDefaultParam\tCohortParam\tTotalVariants" > variant_count_table.txt + echo -e "#SAMPLE_ID\tParam\tCHROM\tPOS\tnumF1R2Alt\tnumF2R1Alt\tnumF1R2Ref\tnumF2R1Ref\tnumF1R2Other\tnumF2R1Other\tSOB\tpArtifact\tFS\tSOR\tTLOD\tReadPosRankSum" > all_metrics.txt + + P1FILES=(\$(echo ${pass1_vcfs})) + P2FILES=(\$(echo ${pass2_vcfs})) + for (( i=0; i<\${#P1FILES[@]}; i++ )); do + MYID=\$(basename -s ".sobdetect.vcf" \${P1FILES[\$i]}) + + total_count=\$(grep -v ^# \${P1FILES[\$i]} | wc -l) || total_count=0 + count_1p=\$(bcftools query -f '%INFO/pArtifact\n' \${P1FILES[\$i]} | awk '{if (\$1 != "." && \$1 < 0.05){print}}' | wc -l) + count_2p=\$(bcftools query -f '%INFO/pArtifact\n' \${P2FILES[\$i]} | awk '{if (\$1 != "." && \$1 < 0.05){print}}' | wc -l) + + echo -e "\$MYID\t\$count_1p\t\$count_2p\t\$total_count" >> variant_count_table.txt + + bcftools query -f '%CHROM\t%POS\t%INFO/numF1R2Alt\t%INFO/numF2R1Alt\t%INFO/numF1R2Ref\t%INFO/numF2R1Ref\t%INFO/numF1R2Other\t%INFO/numF2R1Other\t%INFO/SOB\t%INFO/pArtifact\n' \${P1FILES[\$i]} | awk -v id=\$MYID 'BEGIN{OFS="\t"}{print id,"PASS_1",\$0}' >> all_metrics.txt + bcftools query -f '%CHROM\t%POS\t%INFO/numF1R2Alt\t%INFO/numF2R1Alt\t%INFO/numF1R2Ref\t%INFO/numF2R1Ref\t%INFO/numF1R2Other\t%INFO/numF2R1Other\t%INFO/SOB\t%INFO/pArtifact\n' \${P2FILES[\$i]} | awk -v id=\$MYID 'BEGIN{OFS="\t"}{print id,"PASS_2",\$0}' >> all_metrics.txt + done + """ + + stub: + """ + touch variant_count_table.txt all_metrics.txt + """ + +} + diff --git a/modules/local/freec.nf b/modules/local/freec.nf new file mode 100644 index 0000000..93b56c3 --- /dev/null +++ b/modules/local/freec.nf @@ -0,0 +1,227 @@ +//FREEC +//mm10 Paired-Sequenza, FREEC-tumor only +GENOMEREF = file(params.genomes[params.genome].genome) + +//CNV Intervals +if (params.intervals){ + CNVTARGETS = file(params.intervals) +}else{ + CNVTARGETS = file(params.genomes[params.genome].intervals) +} + + +REFORMATBED = params.script_reformatbed +FREEC_SCRIPT = params.script_freec +if(params.exome){ + FREECPAIR_SCRIPT = params.script_freecpaired_exome +}else{ + FREECPAIR_SCRIPT = params.script_freecpaired +} +FREECSIGNIFICANCE = params.freec_significance +FREECLENGTHS = file(params.genomes[params.genome].FREEC.FREECLENGTHS) +FREECCHROMS = file(params.genomes[params.genome].FREEC.FREECCHROMS) +FREECPILEUP = file(params.genomes[params.genome].FREEC.FREECPILEUP) +FREECSNPS = file(params.genomes[params.genome].FREEC.FREECSNPS) +FREECPLOT = params.freec_plot + + +process freec_paired { + container = "${params.containers.logan}" + label 'process_long' + errorStrategy 'ignore' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai) + + output: + tuple val(tumorname), val(normalname), + path("${tumorname}_vs_${normalname}_CNVs.p.value.txt"), + path("${tumorname}_vs_${normalname}_ratio.txt"), + path("${tumorname}_vs_${normalname}_BAF.txt"), + path("${tumorname}_vs_${normalname}_ratio.txt.log2.png"), + path("${tumorname}_vs_${normalname}_ratio.txt.png") + + shell: + + """ + + perl $FREECPAIR_SCRIPT \ + . \ + $FREECLENGTHS \ + $FREECCHROMS \ + ${tumor} \ + ${normal} \ + $FREECPILEUP \ + $GENOMEREF \ + $FREECSNPS \ + $CNVTARGETS + + freec -conf freec_genome_config.txt + + cat $FREECSIGNIFICANCE | \ + R --slave \ + --args ${tumor}_CNVs \ + ${tumor}_ratio.txt + + cat $FREECPLOT | \ + R --slave \ + --args 2 \ + ${tumor}_ratio.txt \ + ${tumor}_BAF.txt + + mv ${tumor}_CNVs.p.value.txt ${tumorname}_vs_${normalname}_CNVs.p.value.txt + mv ${tumor}_ratio.txt ${tumorname}_vs_${normalname}_ratio.txt + mv ${tumor}_BAF.txt ${tumorname}_vs_${normalname}_BAF.txt + mv ${tumor}_BAF.txt.png ${tumorname}_vs_${normalname}_BAF.txt.png + mv ${tumor}_ratio.txt.log2.png ${tumorname}_vs_${normalname}_ratio.txt.log2.png + mv ${tumor}_ratio.txt.png ${tumorname}_vs_${normalname}_ratio.txt.png + + """ + + stub: + """ + touch ${tumorname}_vs_${normalname}_CNVs.p.value.txt + touch ${tumorname}_vs_${normalname}_ratio.txt + touch ${tumorname}_vs_${normalname}_BAF.txt + touch ${tumorname}_vs_${normalname}_BAF.txt.png + touch ${tumorname}_vs_${normalname}_ratio.txt.log2.png + touch ${tumorname}_vs_${normalname}_ratio.txt.png + + """ +} + + + + +process freec_paired_exome { + container = "${params.containers.logan}" + label 'process_long' + errorStrategy 'ignore' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai) + + output: + tuple val(tumorname), val(normalname), + path("${tumorname}_vs_${normalname}_CNVs.p.value.txt"), + path("${tumorname}_vs_${normalname}_ratio.txt"), + path("${tumorname}_vs_${normalname}_BAF.txt"), + path("${tumorname}_vs_${normalname}_ratio.txt.log2.png"), + path("${tumorname}_vs_${normalname}_ratio.txt.png") + + shell: + + """ + python $REFORMATBED -i $CNVTARGETS + perl $FREECPAIR_SCRIPT \ + . \ + $FREECLENGTHS \ + $FREECCHROMS \ + ${tumor} \ + ${normal} \ + $FREECPILEUP \ + $GENOMEREF \ + $FREECSNPS \ + exome_targets.bed + + freec -conf freec_exome_config.txt + + cat $FREECSIGNIFICANCE | \ + R --slave \ + --args ${tumor}_CNVs \ + ${tumor}_ratio.txt + + cat $FREECPLOT | \ + R --slave \ + --args 2 \ + ${tumor}_ratio.txt \ + ${tumor}_BAF.txt + + mv ${tumor}_CNVs.p.value.txt ${tumorname}_vs_${normalname}_CNVs.p.value.txt + mv ${tumor}_ratio.txt ${tumorname}_vs_${normalname}_ratio.txt + mv ${tumor}_BAF.txt ${tumorname}_vs_${normalname}_BAF.txt + mv ${tumor}_BAF.txt.png ${tumorname}_vs_${normalname}_BAF.txt.png + mv ${tumor}_ratio.txt.log2.png ${tumorname}_vs_${normalname}_ratio.txt.log2.png + mv ${tumor}_ratio.txt.png ${tumorname}_vs_${normalname}_ratio.txt.png + + """ + + stub: + """ + touch ${tumorname}_vs_${normalname}_CNVs.p.value.txt + touch ${tumorname}_vs_${normalname}_ratio.txt + touch ${tumorname}_vs_${normalname}_BAF.txt + touch ${tumorname}_vs_${normalname}_BAF.txt.png + touch ${tumorname}_vs_${normalname}_ratio.txt.log2.png + touch ${tumorname}_vs_${normalname}_ratio.txt.png + + """ +} + + + +process freec { + container = "${params.containers.logan}" + label 'process_medium' + errorStrategy 'ignore' + + input: + tuple val(tumorname), path(tumor), path(tumorbai) + + output: + tuple val(tumorname), + path("${tumorname}_CNVs.p.value.txt"), + path("${tumorname}_ratio.txt"), + path("${tumorname}_BAF.txt"), + path("${tumorname}_ratio.txt.log2.png"), + path("${tumorname}_ratio.txt.png") + + + shell: + + """ + perl $FREEC_SCRIPT \ + . \ + $FREECLENGTHS \ + $FREECCHROMS \ + ${tumor} \ + $FREECPILEUP \ + $GENOMEREF \ + $FREECSNPS \ + $CNVTARGETS + + freec -conf freec_genome_config.txt + + cat $FREECSIGNIFICANCE | \ + R --slave \ + --args ${tumor}_CNVs \ + ${tumor}_ratio.txt + + cat $FREECPLOT | \ + R --slave \ + --args 2 \ + ${tumor}_ratio.txt \ + ${tumor}_BAF.txt + + mv ${tumor}_CNVs.p.value.txt ${tumorname}_CNVs.p.value.txt + mv ${tumor}_ratio.txt ${tumorname}_ratio.txt + mv ${tumor}_BAF.txt ${tumorname}_BAF.txt + mv ${tumor}_BAF.txt.png ${tumorname}_BAF.txt.png + mv ${tumor}_ratio.txt.log2.png ${tumorname}_ratio.txt.log2.png + mv ${tumor}_ratio.txt.png ${tumorname}_ratio.txt.png + + """ + + stub: + """ + touch ${tumorname}_CNVs.p.value.txt + touch ${tumorname}_ratio.txt + touch ${tumorname}_BAF.txt + touch ${tumorname}_BAF.txt.png + touch ${tumorname}_ratio.txt.log2.png + touch ${tumorname}_ratio.txt.png + + """ +} diff --git a/modules/local/gatk_varianteval.nf b/modules/local/gatk_varianteval.nf new file mode 100644 index 0000000..be84526 --- /dev/null +++ b/modules/local/gatk_varianteval.nf @@ -0,0 +1,78 @@ +GENOMEREF=file(params.genomes[params.genome].genome) +DBSNP=file(params.genomes[params.genome].dbsnp) //dbsnp_138.hg38.vcf.gz" + + +process gatk_varianteval { + /* + Quality-control step to calculate various quality control metrics from a + variant callset. These metrics include the number of raw or filtered SNP + counts; ratio of transition mutations to transversions; concordance of a + particular sample's calls to a genotyping chip; number of s per sample. + Please see GATK's documentation for more information: + https://gatk.broadinstitute.org/hc/en-us/articles/360040507171-VariantEval + @Input: + Per sample gVCF file (scatter) + @Output: + Evaluation table containing a collection of summary statistics + */ + container = "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(samplename), path("${samplename}.gvcf.gz") ,path("${samplename}.gvcf.gz.tbi") + output: + path("${samplename}.germline.eval.grp") + script: + """ + gatk --java-options '-Xmx12g -XX:ParallelGCThreads=16' VariantEval \ + -R $GENOMEREF \ + -O ${samplename}.germline.eval.grp \ + --dbsnp $DBSNP \ + --eval ${samplename}.gvcf.gz + """ + + stub: + + """ + touch ${samplename}.germline.eval.grp + """ + +} + +process collectvariantcallmetrics { + /* + Quality-control step to collect summary metrics about snps and indels + called in a multisample VCF file. Please see the Broad's documentation + for more information about each field in the generated log file: + https://broadinstitute.github.io/picard/picard-metric-definitions.html + @Input: + Multi-sample gVCF file (indirect-gather-due-to-aggregation) + @Output: + Text file containing a collection of metrics relating to snps and indels + */ + container = "${params.containers.logan}" + label 'process_medium' + + input: + tuple path(germlinevcf),path(germlinetbi) + + output: + tuple path("raw_variants.variant_calling_detail_metrics"), + path("raw_variants.variant_calling_summary_metrics") + + + script: + """ + java -Xmx24g -jar \${PICARDJARPATH}/picard.jar \ + CollectVariantCallingMetrics \ + INPUT=${germlinevcf} \ + OUTPUT= "raw_variants" \ + DBSNP=$DBSNP Validation_Stringency=SILENT + """ + + stub: + """ + touch raw_variants.variant_calling_detail_metrics raw_variants.variant_calling_summary_metrics + """ + +} diff --git a/modules/local/germline.nf b/modules/local/germline.nf deleted file mode 100644 index 285a0f3..0000000 --- a/modules/local/germline.nf +++ /dev/null @@ -1,168 +0,0 @@ -GENOMEREF=file(params.genomes[params.genome].genome) -MODEL="/opt/models/wgs/model.ckpt" - - -//Processes -//Deep Variant -process deepvariant_step1 { - - input: - tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai"), path(bed) - - output: - tuple val(samplename), path("outputshard/${samplename}.tfrecord_${bed}.gz"), - path("gvcf/${samplename}.gvcf.tfrecord_${bed}.gz") - - script: - """ - mkdir -p outputshard - mkdir -p gvcf - make_examples \ - --mode calling \ - --ref $GENOMEREF \ - --regions ${bed} \ - --reads ${samplename}.bam \ - --channels insert_size \ - --examples outputshard/${samplename}.tfrecord_${bed}.gz \ - --gvcf gvcf/${samplename}.gvcf.tfrecord_${bed}.gz - """ - - stub: - """ - mkdir -p outputshard - mkdir -p gvcf - touch outputshard/${samplename}.tfrecord_${bed}.gz - touch gvcf/${samplename}.gvcf.tfrecord_${bed}.gz - """ - -} - -//Step 2 requires GPU -process deepvariant_step2 { - - input: - tuple val(samplename), path(tfrecords), path(tfgvcf) - - output: - tuple val(samplename), path(tfrecords), - path("${samplename}_call_variants_output.tfrecord.gz"), path(tfgvcf) - - script: - - """ - call_variants \ - --examples "${samplename}.tfrecord_*.gz" \ - --outfile ${samplename}_call_variants_output.tfrecord.gz \ - --checkpoint $MODEL \ - --num_readers 16 - """ - - stub: - """ - touch ${samplename}_call_variants_output.tfrecord.gz - """ - -} - - -//Step 3 DV -process deepvariant_step3 { - - input: - tuple val(samplename), path(tfrecords), path("${samplename}_call_variants_output.tfrecord.gz"), - path(tfgvcf) - - output: - tuple val(samplename), path("${samplename}.vcf.gz"), path("${samplename}.vcf.gz.tbi"), - path("${samplename}.gvcf.gz"), path("${samplename}.gvcf.gz.tbi") - - - script: - """ - postprocess_variants \ - --ref $GENOMEREF \ - --infile ${samplename}_call_variants_output.tfrecord.gz \ - --outfile ${samplename}.vcf.gz \ - --gvcf_outfile ${samplename}.gvcf.gz \ - --nonvariant_site_tfrecord_path . - """ - - stub: - """ - touch ${samplename}.vcf.gz ${samplename}.vcf.gz.tbi - touch ${samplename}.gvcf.gz ${samplename}.gvcf.gz.tbi - - """ - -} - -//Combined DeepVariant -process deepvariant_combined { - - input: - tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai") - - output: - tuple val(samplename), path("${samplename}.gvcf.gz"), path("${samplename}.gvcf.gz.tbi"), - path("${samplename}.vcf.gz"), path("${samplename}.vcf.gz.tbi") - - - script: - """ - run_deepvariant \ - --model_type=WGS \ - --ref=$GENOMEREF \ - --reads=${samplename}.bam \ - --output_gvcf= ${samplename}.gvcf.gz \ - --output_vcf=${samplename}.vcf.gz \ - --num_shards=16 - """ - - - stub: - """ - touch ${samplename}.vcf.gz ${samplename}.vcf.gz.tbi - touch ${samplename}.gvcf.gz ${samplename}.gvcf.gz.tbi - - """ - - -} - -process glnexus { - - input: - path(gvcfs) - - output: - tuple path("germline.v.bcf"), - path("germline.norm.vcf.gz"),path("germline.norm.vcf.gz.tbi") - - script: - - """ - glnexus_cli --config DeepVariant_unfiltered \ - *.gvcf.gz --threads 8 > germline.v.bcf - - bcftools norm \ - -m - \ - -Oz \ - --threads 8 \ - -f $GENOMEREF \ - -o germline.norm.vcf.gz \ - germline.v.bcf - - bcftools index \ - -f -t \ - --threads 8 \ - germline.norm.vcf.gz - - """ - - stub: - """ - touch germline.v.bcf - touch germline.norm.vcf.gz - touch germline.norm.vcf.gz.tbi - """ -} diff --git a/modules/local/gridss.nf b/modules/local/gridss.nf new file mode 100644 index 0000000..ab91c03 --- /dev/null +++ b/modules/local/gridss.nf @@ -0,0 +1,130 @@ +BWAGENOME=file(params.genomes[params.genome].bwagenome) +BLACKLIST=file(params.genomes[params.genome].GRIDSSBLACKLIST) +GENOMEREF=file(params.genomes[params.genome].genome) + +if (params.genome.matches("hg38(.*)")| params.genome.matches("hg19(.*)")){ + GENOMEVER = params.genomes[params.genome].GENOMEVER + PONSGL = file(params.genomes[params.genome].PONSGL) + PONSV = file(params.genomes[params.genome].PONSV) + SVHOTSPOT = file(params.genomes[params.genome].SVHOTSPOT) + REPEATMASK = file(params.genomes[params.genome].REPEATMASK) +} + + + +process gridss_somatic { + container = "${params.containers.sv}" + + input: + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai) + + output: + tuple val(tumorname), val(normalname), + path("${tumorname}_vs_${normalname}.vcf.gz"), + path("${tumorname}_vs_${normalname}.vcf.gz.tbi"), + path("${tumorname}_vs_${normalname}.vcf.gz.assembly.bam"), + path("${tumorname}.gripss.vcf.gz"), + path("${tumorname}.gripss.vcf.gz.tbi"), + path("${tumorname}.gripss.filtered.vcf.gz"), + path("${tumorname}.gripss.filtered.vcf.gz.tbi") + + script: + """ + gridss --jar /opt2/gridss/gridss-2.13.2-gridss-jar-with-dependencies.jar \ + -r $BWAGENOME \ + -l ${normalname},${tumorname} \ + -o ${tumorname}_vs_${normalname}.vcf.gz -b $BLACKLIST \ + --picardoptions VALIDATION_STRINGENCY=LENIENT \ + --jvmheap 90g \ + --otherjvmheap 64g \ + -t $task.cpus \ + ${normal} ${tumor} + + mkdir -p ${tumorname}_vs_${normalname} + + java -jar /opt2/hmftools/gripss.jar \ + -sample ${tumorname} \ + -reference ${normalname} \ + -ref_genome_version $GENOMEVER \ + -ref_genome $GENOMEREF \ + -pon_sgl_file $PONSGL \ + -pon_sv_file $PONSV \ + -known_hotspot_file $SVHOTSPOT \ + -repeat_mask_file $REPEATMASK \ + -vcf ${tumorname}_vs_${normalname}.vcf.gz \ + -output_dir ${tumorname}_vs_${normalname} + + mv ${tumorname}_vs_${normalname}/* . + """ + + + stub: + + """ + touch "${tumorname}_vs_${normalname}.vcf.gz" + touch "${tumorname}_vs_${normalname}.vcf.gz.tbi" + touch "${tumorname}_vs_${normalname}.vcf.gz.assembly.bam" + touch "${tumorname}.gripss.vcf.gz" + touch "${tumorname}.gripss.vcf.gz.tbi" + touch "${tumorname}.gripss.filtered.vcf.gz" + touch "${tumorname}.gripss.filtered.vcf.gz.tbi" + """ +} + + + +process gridss_tonly { + container = "${params.containers.sv}" + + input: + tuple val(tumorname), path(tumor), path(tumorbai) + + output: + tuple val(tumorname), + path("${tumorname}.vcf.gz"), + path("${tumorname}.vcf.gz.tbi"), + path("${tumorname}.vcf.gz.assembly.bam"), + path("${tumorname}.gripss.vcf.gz"), + path("${tumorname}.gripss.vcf.gz.tbi"), + path("${tumorname}.gripss.filtered.vcf.gz"), + path("${tumorname}.gripss.filtered.vcf.gz.tbi") + + script: + """ + gridss --jar /opt2/gridss/gridss-2.13.2-gridss-jar-with-dependencies.jar \ + -r $BWAGENOME \ + -l ${tumorname} \ + -o ${tumorname}.vcf.gz -b $BLACKLIST \ + --picardoptions VALIDATION_STRINGENCY=LENIENT \ + ${tumor} -t $task.cpus + + mkdir -p ${tumorname} + + java -jar /opt2/hmftools/gripss.jar \ + -sample ${tumorname} \ + -ref_genome_version $GENOMEVER \ + -ref_genome $GENOMEREF \ + -pon_sgl_file $PONSGL \ + -pon_sv_file $PONSV \ + -known_hotspot_file $SVHOTSPOT \ + -repeat_mask_file $REPEATMASK \ + -vcf ${tumorname}.vcf.gz \ + -output_dir ${tumorname} + + mv ${tumorname}/* . + """ + + + stub: + + """ + touch "${tumorname}.vcf.gz" + touch "${tumorname}.vcf.gz.tbi" + touch "${tumorname}.vcf.gz.assembly.bam" + touch "${tumorname}.gripss.vcf.gz" + touch "${tumorname}.gripss.vcf.gz.tbi" + touch "${tumorname}.gripss.filtered.vcf.gz" + touch "${tumorname}.gripss.filtered.vcf.gz.tbi" + """ +} diff --git a/modules/local/kraken.nf b/modules/local/kraken.nf new file mode 100644 index 0000000..752475f --- /dev/null +++ b/modules/local/kraken.nf @@ -0,0 +1,54 @@ +BACDB=file(params.genomes[params.genome].KRAKENBACDB) + +process kraken { + /* + Quality-control step to assess for potential sources of microbial contamination. + If there are high levels of microbial contamination, Kraken will provide an + estimation of the taxonomic composition. Kraken is used in conjunction with + Krona to produce an interactive reports. + @Input: + Trimmed FastQ files (scatter) + @Output: + Kraken logfile and interactive krona report + */ + container = "${params.containers.loganqc}" + label 'process_high' + + input: + tuple val(samplename), + path(fqs) + + output: + tuple val(samplename), + //path("${samplename}.trimmed.kraken_bacteria.out.txt"), + path("${samplename}.trimmed.kraken_bacteria.taxa.txt"), + path("${samplename}.trimmed.kraken_bacteria.krona.html") + + + script: + """ + #Setups temporary directory for + #intermediate files with built-in + #mechanism for deletion on exit + + + # Copy kraken2 db to local node storage to reduce filesystem strain + cp -rv $BACDB . + kdb_base=\$(basename $BACDB) + + kraken2 --db $BACDB \ + --threads 16 --report ${samplename}.trimmed.kraken_bacteria.taxa.txt \ + --output - \ + --gzip-compressed \ + --paired ${fqs[0]} ${fqs[1]} + # Generate Krona Report + cut -f2,3 ${samplename}.trimmed.kraken_bacteria.taxa.txt | \ + ktImportTaxonomy - -o ${samplename}.trimmed.kraken_bacteria.krona.html + """ + + stub: + """ + touch ${samplename}.trimmed.kraken_bacteria.taxa.txt ${samplename}.trimmed.kraken_bacteria.krona.html + """ + +} \ No newline at end of file diff --git a/modules/local/lofreq.nf b/modules/local/lofreq.nf new file mode 100644 index 0000000..b1df228 --- /dev/null +++ b/modules/local/lofreq.nf @@ -0,0 +1,66 @@ +GENOMEREF=file(params.genomes[params.genome].genome) + +//DBSNP for LOFREQ/MUSE +DBSNP=file(params.genomes[params.genome].dbsnp) +//HelperScripts +LOFREQ_CONVERT=params.lofreq_convert + +process lofreq_tn { + container "${params.containers.lofreq}" + label 'process_somaticcaller' + errorStrategy 'ignore' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai), path(bed) + + + output: + tuple val(tumorname), val(normalname), + path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.snvs.vcf.gz"), + path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz"), + path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.indels.vcf.gz"), + path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz"), + path("${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz"), + path("${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz.tbi") + + script: + + """ + lofreq somatic -f $GENOMEREF -n ${normal} -t ${tumor} \ + -d $DBSNP \ + --threads $task.cpus \ + -l ${bed} \ + --call-indels \ + -o ${tumorname}_vs_${normalname}_${bed.simpleName}_ + + bcftools concat ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz \ + ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz --threads $task.cpus -Oz -o \ + ${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz + + $LOFREQ_CONVERT -i ${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz -g 1/0 \ + -n ${tumorname} -o ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz + + bcftools view -h ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz >temphead + + sed 's/^##FORMAT=/##FORMAT=/' temphead > temphead1 + bcftools reheader ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz -h temphead1 |\ + bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz + + bcftools index -t ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz + + """ + + stub: + + """ + touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.snvs.vcf.gz" + touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz" + touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.indels.vcf.gz" + touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz" + touch "${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz" "${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz.tbi" + + """ +} + + diff --git a/modules/local/manta.nf b/modules/local/manta.nf new file mode 100644 index 0000000..3de341a --- /dev/null +++ b/modules/local/manta.nf @@ -0,0 +1,98 @@ +GENOMEREF=file(params.genomes[params.genome].genome) + +process manta_somatic { + container = "${params.containers.logan}" + label 'process_high' + + input: + tuple val(tumorname), path(tumorbam), path(tumorbai), + val(normalname), path(normalbam), path(normalbai) + + output: + tuple val(tumorname), val(normalname), + path("${tumorname}_vs_${normalname}.diplodSV.vcf.gz"), path("${tumorname}_vs_${normalname}.diplodSV.vcf.gz.tbi"), + path("${tumorname}_vs_${normalname}.somaticSV.vcf.gz"), path("${tumorname}_vs_${normalname}.somaticSV.vcf.gz.tbi"), + path("${tumorname}_vs_${normalname}.candidateSV.vcf.gz"), path("${tumorname}_vs_${normalname}.candidateSV.vcf.gz.tbi"), + path("${tumorname}_vs_${normalname}.candidateSmallIndels.vcf.gz"), path("${tumorname}_vs_${normalname}.candidateSmallIndels.vcf.gz.tbi") + + script: + """ + mkdir -p wd + + configManta.py \ + --normalBam=${normalbam} \ + --tumorBam=${tumorbam} \ + --referenceFasta=$GENOMEREF \ + --runDir=wd + + wd/runWorkflow.py -m local -j $task.cpus + + mv wd/results/variants/diploidSV.vcf.gz ${tumorname}_vs_${normalname}.diplodSV.vcf.gz + mv wd/results/variants/somaticSV.vcf.gz ${tumorname}_vs_${normalname}.somaticSV.vcf.gz + mv wd/results/variants/candidateSV.vcf.gz ${tumorname}_vs_${normalname}.candidateSV.vcf.gz + mv wd/results/variants/candidateSmallIndels.vcf.gz ${tumorname}_vs_${normalname}.candidateSmallIndels.vcf.gz + + bcftools index -t ${tumorname}_vs_${normalname}.diplodSV.vcf.gz + bcftools index -t ${tumorname}_vs_${normalname}.somaticSV.vcf.gz + bcftools index -t ${tumorname}_vs_${normalname}.candidateSV.vcf.gz + bcftools index -t ${tumorname}_vs_${normalname}.candidateSmallIndels.vcf.gz + """ + + stub: + + """ + touch ${tumorname}_vs_${normalname}.diplodSV.vcf.gz ${tumorname}_vs_${normalname}.diplodSV.vcf.gz.tbi + touch ${tumorname}_vs_${normalname}.somaticSV.vcf.gz ${tumorname}_vs_${normalname}.somaticSV.vcf.gz.tbi + touch ${tumorname}_vs_${normalname}.candidateSV.vcf.gz ${tumorname}_vs_${normalname}.candidateSV.vcf.gz.tbi + touch ${tumorname}_vs_${normalname}.candidateSmallIndels.vcf.gz ${tumorname}_vs_${normalname}.candidateSmallIndels.vcf.gz.tbi + """ +} + + + + + +process manta_tonly { + container = "${params.containers.logan}" + label 'process_high' + + input: + tuple val(tumorname), path(tumorbam), path(tumorbai) + + output: + tuple val(tumorname), + path("${tumorname}.candidateSV.vcf.gz"), path("${tumorname}.candidateSV.vcf.gz.tbi"), + path("${tumorname}.candidateSmallIndels.vcf.gz"), path("${tumorname}.candidateSmallIndels.vcf.gz.tbi"), + path("${tumorname}.tumorSV.vcf.gz"), path("${tumorname}.tumorSV.vcf.gz.tbi") + + + script: + """ + mkdir -p wd + + configManta.py \ + --tumorBam=${tumorbam} \ + --referenceFasta=$GENOMEREF \ + --runDir=wd + + wd/runWorkflow.py -m local -j $task.cpus + + mv wd/results/variants/candidateSV.vcf.gz ${tumorname}.candidateSV.vcf.gz + mv wd/results/variants/candidateSmallIndels.vcf.gz ${tumorname}.candidateSmallIndels.vcf.gz + mv wd/results/variants/tumorSV.vcf.gz ${tumorname}.tumorSV.vcf.gz + + bcftools index -t ${tumorname}.candidateSV.vcf.gz + bcftools index -t ${tumorname}.candidateSmallIndels.vcf.gz + bcftools index -t ${tumorname}.tumorSV.vcf.gz + + """ + + stub: + + """ + touch ${tumorname}.candidateSV.vcf.gz ${tumorname}.candidateSV.vcf.gz.tbi + touch ${tumorname}.candidateSmallIndels.vcf.gz ${tumorname}.candidateSmallIndels.vcf.gz.tbi + touch ${tumorname}.tumorSV.vcf.gz ${tumorname}.tumorSV.vcf.gz.tbi + + """ +} diff --git a/modules/local/mosdepth.nf b/modules/local/mosdepth.nf new file mode 100644 index 0000000..7edc122 --- /dev/null +++ b/modules/local/mosdepth.nf @@ -0,0 +1,41 @@ + +process mosdepth { + /* + Quality-control step to assess depth + @Input: + Recalibrated BAM file (scatter) + @Output: + `{prefix}.mosdepth.global.dist.txt` + `{prefix}.mosdepth.summary.txt` + `{prefix}.mosdepth.region.dist.txt` (if --by is specified) + `{prefix}.per-base.bed.gz|per-base.d4` (unless -n/--no-per-base is specified) + `{prefix}.regions.bed.gz` (if --by is specified) + `{prefix}.quantized.bed.gz` (if --quantize is specified) + `{prefix}.thresholds.bed.gz` (if --thresholds is specified) + */ + container = "${params.containers.loganqc}" + label 'process_medium' + + input: + tuple val(samplename), path(bam), path(bai) + + output: + tuple path("${samplename}.mosdepth.region.dist.txt"), + path("${samplename}.mosdepth.summary.txt"), + path("${samplename}.regions.bed.gz"), + path("${samplename}.regions.bed.gz.csi") + + + script: + """ + mosdepth -n --fast-mode --by 500 ${samplename} ${bam} -t $task.cpus + """ + + stub: + """ + touch "${samplename}.mosdepth.region.dist.txt" + touch "${samplename}.mosdepth.summary.txt" + touch "${samplename}.regions.bed.gz" + touch "${samplename}.regions.bed.gz.csi" + """ +} \ No newline at end of file diff --git a/modules/local/multiqc.nf b/modules/local/multiqc.nf new file mode 100644 index 0000000..b6404ef --- /dev/null +++ b/modules/local/multiqc.nf @@ -0,0 +1,36 @@ + +process multiqc { + """ + Reporting step to aggregate sample summary statistics and quality-control + information across all samples. This will be one of the last steps of the + pipeline. The inputs listed here are to ensure that this step runs last. + During runtime, MultiQC will recursively crawl through the working directory + and parse files that it supports. + @Input: + List of files to ensure this step runs last (gather) + @Output: + Interactive MulitQC report and a QC metadata table + """ + container = "${params.containers.multiqc}" + label 'process_low' + + input: + path(allqcin) + + output: + path("MultiQC_Report.html") + + script: + + """ + multiqc . \ + -f --interactive \ + -n "MultiQC_Report.html" \ + """ + + stub: + + """ + touch MultiQC_Report.html + """ +} diff --git a/modules/local/muse.nf b/modules/local/muse.nf new file mode 100644 index 0000000..129c6fb --- /dev/null +++ b/modules/local/muse.nf @@ -0,0 +1,44 @@ +//References +GENOMEREF=file(params.genomes[params.genome].genome) +GENOMEFAI=file(params.genomes[params.genome].genomefai) +GENOMEDICT=file(params.genomes[params.genome].genomedict) +GERMLINE_RESOURCE=file(params.genomes[params.genome].germline_resource) + +//DBSNP for LOFREQ/MUSE +DBSNP=file(params.genomes[params.genome].dbsnp) + +process muse_tn { + container "${params.containers.logan}" + label 'process_somaticcaller' + input: + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai) + + + output: + tuple val(tumorname), val(normalname), + path("${tumorname}_vs_${normalname}.vcf.gz") + + script: + + """ + MuSE call -f $GENOMEREF -O ${tumorname}_vs_${normalname} -n $task.cpus $tumor $normal + MuSE sump -I ${tumorname}_vs_${normalname}.MuSE.txt \ + -O ${tumorname}_vs_${normalname}.vcf -n $task.cpus -D $DBSNP -G + + bcftools view ${tumorname}_vs_${normalname}.vcf -Oz -o ${tumorname}_vs_${normalname}_temp.vcf.gz + + printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" > sampname + + bcftools reheader -s sampname ${tumorname}_vs_${normalname}_temp.vcf.gz \ + | bcftools view -Oz -o ${tumorname}_vs_${normalname}.vcf.gz + + """ + + stub: + + """ + touch "${tumorname}_vs_${normalname}.vcf.gz" + """ + +} diff --git a/modules/local/mutect2.nf b/modules/local/mutect2.nf new file mode 100644 index 0000000..4fbf00b --- /dev/null +++ b/modules/local/mutect2.nf @@ -0,0 +1,518 @@ +//References +GENOMEREF=file(params.genomes[params.genome].genome) +GENOMEDICT=file(params.genomes[params.genome].genomedict) +GERMLINE_RESOURCE=file(params.genomes[params.genome].germline_resource) +GNOMADGERMLINE=params.genomes[params.genome].gnomad +//PON Mutect2 +PON=file(params.genomes[params.genome].PON) +TONLYPON=file(params.genomes[params.genome].tonly_PON) + + + +process mutect2 { + container "${params.containers.logan}" + label 'process_somaticcaller' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai), + path(bed) + + output: + tuple val(tumorname), val(normalname), + path("${tumorname}_vs_${normalname}_${bed.simpleName}.mut2.vcf.gz"), + path("${tumorname}_vs_${normalname}_${bed.simpleName}.f1r2.tar.gz"), + path("${tumorname}_vs_${normalname}_${bed.simpleName}.mut2.vcf.gz.stats") + + + script: + """ + gatk Mutect2 \ + --reference $GENOMEREF \ + --intervals ${bed} \ + --input ${tumor} \ + --input ${normal} \ + --normal-sample ${normalname} \ + --tumor-sample ${tumorname} \ + $GNOMADGERMLINE \ + --panel-of-normals ${PON} \ + --output ${tumorname}_vs_${normalname}_${bed.simpleName}.mut2.vcf.gz \ + --f1r2-tar-gz ${tumorname}_vs_${normalname}_${bed.simpleName}.f1r2.tar.gz \ + --independent-mates + """ + + stub: + """ + touch ${tumorname}_vs_${normalname}_${bed.simpleName}.mut2.vcf.gz + touch ${tumorname}_vs_${normalname}_${bed.simpleName}.f1r2.tar.gz + touch ${tumorname}_vs_${normalname}_${bed.simpleName}.mut2.vcf.gz.stats + """ +} + +process pileup_paired { + container "${params.containers.logan}" + label 'process_highmem' + + input: + tuple val(tumorname), + path(bam), path(bai), + path(bed), val(pilename) + + output: + tuple val(tumorname), + path("${tumorname}_${bed.simpleName}.${pilename}.table") + + script: + """ + gatk --java-options -Xmx48g GetPileupSummaries \ + -I ${bam} \ + -V $GERMLINE_RESOURCE \ + -L ${bed} \ + -O ${tumorname}_${bed.simpleName}.${pilename}.table + + """ + + stub: + """ + touch ${tumorname}_${bed.simpleName}.${pilename}.table + """ + +} + + +process pileup_paired_t { + container "${params.containers.logan}" + label 'process_highmem' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai), path(bed) + + output: + tuple val(tumorname), val(normalname), + path("${tumorname}_${bed.simpleName}.tpileup.table") + + script: + """ + gatk --java-options -Xmx48g GetPileupSummaries \ + -I ${tumor} \ + -V $GERMLINE_RESOURCE \ + -L ${bed} \ + -O ${tumorname}_${bed.simpleName}.tpileup.table + + """ + + stub: + """ + touch ${tumorname}_${bed.simpleName}.tpileup.table + """ + +} + +process pileup_paired_n { + container "${params.containers.logan}" + label 'process_highmem' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai), path(bed) + + output: + tuple val(tumorname), + val(normalname), + path("${normalname}_${bed.simpleName}.npileup.table") + + script: + """ + gatk --java-options -Xmx48g GetPileupSummaries \ + -I ${normal} \ + -V $GERMLINE_RESOURCE \ + -L ${bed} \ + -O ${normalname}_${bed.simpleName}.npileup.table + + """ + + stub: + """ + touch ${normalname}_${bed.simpleName}.npileup.table + """ +} + + +process contamination_paired { + container "${params.containers.logan}" + label 'process_highmem' + + input: + tuple val(tumorname), val(normalname), + path(tumor_pileups), + path(normal_pileups) + + output: + tuple val(tumorname), + path("${tumorname}_allpileups.table"), + path("${tumorname}_normal.allpileups.table"), + path("${tumorname}.contamination.table"), + path("${tumorname}_normal.contamination.table") + + script: + //Gather all the Pileup summaries first for Tumor and Also for NORMAL and then run! + alltumor = tumor_pileups.join(" -I ") + allnormal = normal_pileups.join(" -I ") + + + """ + gatk GatherPileupSummaries \ + --sequence-dictionary $GENOMEDICT \ + -I ${alltumor} -O ${tumorname}_allpileups.table + + gatk GatherPileupSummaries \ + --sequence-dictionary $GENOMEDICT \ + -I ${allnormal} -O ${tumorname}_normal.allpileups.table + + gatk CalculateContamination \ + -I ${tumorname}_allpileups.table \ + --matched-normal ${tumorname}_normal.allpileups.table \ + -O ${tumorname}.contamination.table + + gatk CalculateContamination \ + -I ${tumorname}_normal.allpileups.table \ + -O ${tumorname}_normal.contamination.table + + """ + + stub: + """ + touch ${tumorname}_allpileups.table + touch ${tumorname}_normal.allpileups.table + touch ${tumorname}.contamination.table + touch ${tumorname}_normal.contamination.table + """ + + +} + + +process learnreadorientationmodel { + container "${params.containers.logan}" + label 'process_highmem' + + input: + tuple val(sample), path(f1r2) + + output: + tuple val(sample), path("${sample}.read-orientation-model.tar.gz") + + script: + f1r2in = f1r2.join(" --input ") + + """ + gatk LearnReadOrientationModel \ + --output ${sample}.read-orientation-model.tar.gz \ + --input ${f1r2in} + """ + + stub: + """ + touch ${sample}.read-orientation-model.tar.gz + """ +} + + +process mergemut2stats { + container "${params.containers.logan}" + label 'process_low' + + input: + tuple val(sample), path(stats) + + output: + tuple val(sample), path("${sample}.final.stats") + + script: + statsin = stats.join(" --stats ") + + """ + gatk MergeMutectStats \ + --stats ${statsin} \ + -O ${sample}.final.stats + """ + + stub: + """ + touch ${sample}.final.stats + """ + +} + + +process mutect2filter { + container "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(tumor), val(normal),path(mutvcfs), path(stats), path(obs), + path(pileups), path(normal_pileups), path(tumorcontamination), path(normalcontamination) + + output: + tuple val("${tumor}_vs_${normal}"), + path("${tumor}_vs_${normal}.mut2.marked.vcf.gz"), path("${tumor}_vs_${normal}.mut2.marked.vcf.gz.tbi"), + path("${tumor}_vs_${normal}.mut2.norm.vcf.gz"), path("${tumor}_vs_${normal}.mut2.norm.vcf.gz.tbi"), + path("${tumor}_vs_${normal}.mut2.marked.vcf.gz.filteringStats.tsv") + + script: + mut2in = mutvcfs.join(" -I ") + + """ + gatk SortVcf -I ${mut2in} -O ${tumor}_vs_${normal}.concat.vcf.gz --CREATE_INDEX + gatk FilterMutectCalls \ + -R $GENOMEREF \ + -V ${tumor}_vs_${normal}.concat.vcf.gz \ + --ob-priors ${obs} \ + --contamination-table ${tumorcontamination} \ + --stats ${stats} \ + -O ${tumor}_vs_${normal}.mut2.marked.vcf.gz + gatk SelectVariants \ + -R $GENOMEREF \ + --variant ${tumor}_vs_${normal}.mut2.marked.vcf.gz \ + --exclude-filtered \ + --output ${tumor}_vs_${normal}.mut2.final.vcf.gz + + bcftools sort ${tumor}_vs_${normal}.mut2.final.vcf.gz |\ + bcftools norm --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ + awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ + sed '/^\$/d' | bcftools view - -Oz -o ${tumor}_vs_${normal}.mut2.norm.vcf.gz + bcftools index -t ${tumor}_vs_${normal}.mut2.norm.vcf.gz + """ + + stub: + """ + touch ${tumor}_vs_${normal}.mut2.marked.vcf.gz ${tumor}_vs_${normal}.mut2.marked.vcf.gz.tbi + touch ${tumor}_vs_${normal}.mut2.norm.vcf.gz ${tumor}_vs_${normal}.mut2.norm.vcf.gz.tbi + touch ${tumor}_vs_${normal}.mut2.marked.vcf.gz.filteringStats.tsv + """ + + +} + +process pileup_paired_tonly { + container "${params.containers.logan}" + + label 'process_highmem' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), path(bed) + + output: + tuple val(tumorname), + path("${tumorname}_${bed.simpleName}.tpileup.table") + + script: + + """ + gatk --java-options -Xmx48g GetPileupSummaries \ + -I ${tumor} \ + -V $GERMLINE_RESOURCE \ + -L ${bed} \ + -O ${tumorname}_${bed.simpleName}.tpileup.table + + """ + + stub: + """ + touch ${tumorname}_${bed.simpleName}.tpileup.table + + """ + +} + + +process contamination_tumoronly { + container "${params.containers.logan}" + + label 'process_highmem' + + input: + tuple val(tumorname), + path(tumor_pileups) + + output: + tuple val(tumorname), + path("${tumorname}_allpileups.table"), + path("${tumorname}.contamination.table") + + script: + //Gather all the Pileup summaries first for Tumor and Also for NORMAL and then run! + alltumor = tumor_pileups.join(" -I ") + + + """ + gatk GatherPileupSummaries \ + --sequence-dictionary $GENOMEDICT \ + -I ${alltumor} -O ${tumorname}_allpileups.table + + gatk CalculateContamination \ + -I ${tumorname}_allpileups.table \ + -O ${tumorname}.contamination.table + + """ + + stub: + """ + touch ${tumorname}_allpileups.table + touch ${tumorname}.contamination.table + """ + +} + + + +process learnreadorientationmodel_tonly { + container "${params.containers.logan}" + + label 'process_highmem' + + input: + tuple val(sample), path(f1r2) + + output: + tuple val(sample), path("${sample}.read-orientation-model.tar.gz") + + script: + f1r2in = f1r2.join(" --input ") + + """ + gatk LearnReadOrientationModel \ + --output ${sample}.read-orientation-model.tar.gz \ + --input ${f1r2in} + """ + + stub: + """ + touch ${sample}.read-orientation-model.tar.gz + """ +} + + + + + +process mergemut2stats_tonly { + container "${params.containers.logan}" + label 'process_low' + + input: + tuple val(sample), path(stats) + + output: + tuple val(sample), path("${sample}.final.stats") + + script: + statsin = stats.join(" --stats ") + + """ + gatk MergeMutectStats \ + --stats ${statsin} \ + -O ${sample}.final.stats + """ + + stub: + """ + touch ${sample}.final.stats + """ + +} + + + +process mutect2_t_tonly { + container "${params.containers.logan}" + label 'process_somaticcaller' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), path(bed) + + output: + tuple val(tumorname), + path("${tumorname}_${bed.simpleName}.tonly.mut2.vcf.gz"), + path("${tumorname}_${bed.simpleName}.f1r2.tar.gz"), + path("${tumorname}_${bed.simpleName}.tonly.mut2.vcf.gz.stats") + + script: + + """ + gatk Mutect2 \ + --reference $GENOMEREF \ + --intervals ${bed} \ + --input ${tumor} \ + --tumor-sample ${tumorname} \ + $GNOMADGERMLINE \ + --panel-of-normals $TONLYPON \ + --output ${tumorname}_${bed.simpleName}.tonly.mut2.vcf.gz \ + --f1r2-tar-gz ${tumorname}_${bed.simpleName}.f1r2.tar.gz \ + --independent-mates + """ + + stub: + """ + touch ${tumorname}_${bed.simpleName}.tonly.mut2.vcf.gz + touch ${tumorname}_${bed.simpleName}.f1r2.tar.gz + touch ${tumorname}_${bed.simpleName}.tonly.mut2.vcf.gz.stats + """ + + +} + + + +process mutect2filter_tonly { + container "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups), path(tumorcontamination) + output: + tuple val(sample), + path("${sample}.tonly.mut2.marked.vcf.gz"),path("${sample}.tonly.mut2.marked.vcf.gz.tbi"), + path("${sample}.tonly.mut2.norm.vcf.gz"),path("${sample}.tonly.mut2.norm.vcf.gz.tbi"), + path("${sample}.tonly.mut2.marked.vcf.gz.filteringStats.tsv") + + script: + //Include the stats and concat ${mutvcfs} -Oz -o ${sample}.concat.vcf.gz + mut2in = mutvcfs.join(" -I ") + + """ + gatk SortVcf -I ${mut2in} -O ${sample}.tonly.concat.vcf.gz --CREATE_INDEX + gatk FilterMutectCalls \ + -R $GENOMEREF \ + -V ${sample}.tonly.concat.vcf.gz \ + --ob-priors ${obs} \ + --contamination-table ${tumorcontamination} \ + --stats ${stats} \ + -O ${sample}.tonly.mut2.marked.vcf.gz + + gatk SelectVariants \ + -R $GENOMEREF \ + --variant ${sample}.tonly.mut2.marked.vcf.gz \ + --exclude-filtered \ + --output ${sample}.tonly.mut2.final.vcf.gz + + bcftools sort ${sample}.tonly.mut2.final.vcf.gz |\ + bcftools norm --threads ${task.cpus} --check-ref s -f $GENOMEREF -O v |\ + awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ + sed '/^\$/d' |\ + bcftools view - -Oz -o ${sample}.tonly.mut2.norm.vcf.gz + bcftools index -t ${sample}.tonly.mut2.norm.vcf.gz + + """ + + stub: + """ + touch ${sample}.tonly.mut2.marked.vcf.gz ${sample}.tonly.mut2.marked.vcf.gz.tbi + touch ${sample}.tonly.mut2.norm.vcf.gz ${sample}.tonly.mut2.norm.vcf.gz.tbi + touch ${sample}.tonly.mut2.marked.vcf.gz.filteringStats.tsv + """ +} + + + diff --git a/modules/local/newmodules.nf b/modules/local/newmodules.nf new file mode 100644 index 0000000..90c8eec --- /dev/null +++ b/modules/local/newmodules.nf @@ -0,0 +1,81 @@ + + +/*DISCVR +process somaticcombine { + container "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(tumorsample), val(normal), + val(callers), + path(vcfs), path(vcfindex) + + output: + tuple val(tumorsample), val(normal), + path("${tumorsample}_vs_${normal}_combined.vcf.gz"), + path("${tumorsample}_vs_${normal}_combined.vcf.gz.tbi") + + script: + vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b } + vcfin2="-V:" + vcfin1.join(" -V:") + + """ + java -jar \$DISCVRSeq_JAR MergeVcfsAndGenotypes \ + -R $GENOMEREF \ + --genotypeMergeOption PRIORITIZE \ + --priority_list mutect2,strelka,octopus,muse,lofreq,vardict,varscan \ + --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED \ + -O ${tumorsample}_vs_${normal}_combined.vcf.gz \ + $vcfin2 + """ + + stub: + vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b } + vcfin2="-V:" + vcfin1.join(" -V:") + + """ + touch ${tumorsample}_vs_${normal}_combined.vcf.gz + touch ${tumorsample}_vs_${normal}_combined.vcf.gz.tbi + """ + +} +*/ + + + +/*DISCVRSeq +process somaticcombine_tonly { + container "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(tumorsample), + val(callers), + path(vcfs), path(vcfindex) + + output: + tuple val(tumorsample), + path("${tumorsample}_combined_tonly.vcf.gz"), + path("${tumorsample}_combined_tonly.vcf.gz.tbi") + + script: + vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b } + vcfin2="-V:" + vcfin1.join(" -V:") + + """ + java -jar \$DISCVRSeq_JAR MergeVcfsAndGenotypes \ + -R $GENOMEREF \ + --genotypeMergeOption PRIORITIZE \ + --priority_list mutect2_tonly,octopus_tonly,vardict_tonly,varscan_tonly \ + --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED \ + -O ${tumorsample}_combined_tonly.vcf.gz \ + $vcfin2 + """ + + stub: + """ + touch ${tumorsample}_combined_tonly.vcf.gz ${tumorsample}_combined_tonly.vcf.gz.tbi + """ + +} +*/ diff --git a/modules/local/octopus.nf b/modules/local/octopus.nf new file mode 100644 index 0000000..4d66f11 --- /dev/null +++ b/modules/local/octopus.nf @@ -0,0 +1,158 @@ +//References +GENOMEREF=file(params.genomes[params.genome].genome) +GENOMEFAI=file(params.genomes[params.genome].genomefai) + +//Octopus +SOMATIC_FOREST=params.genomes[params.genome].octopus_sforest +GERMLINE_FOREST=params.genomes[params.genome].octopus_gforest + + + +process octopus_tn { + container "${params.containers.octopus}" + label 'process_somaticcaller_high' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai), path(bed) + + output: + tuple val("${tumorname}_vs_${normalname}"), + path("${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz") + + script: + """ + octopus -R $GENOMEREF -I ${normal} ${tumor} --normal-sample ${normalname} \ + -C cancer \ + --annotations AF AC AD DP SB -t ${bed} \ + --threads $task.cpus \ + $GERMLINE_FOREST \ + $SOMATIC_FOREST \ + -B 92Gb \ + -o ${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz + """ + + stub: + """ + touch "${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz" + """ + +} + + + +process bcftools_index_octopus { + container "${params.containers.logan}" + label 'process_low' + + input: + tuple val(tumor), + path(vcf) + + output: + tuple val(tumor), + path(vcf), + path("${vcf}.tbi") + + script: + """ + bcftools index -t ${vcf} + """ + + stub: + """ + touch ${vcf} + touch ${vcf}.tbi + """ + +} + + + +process octopus_convertvcf { + container "${params.containers.logan}" + label 'process_low' + + input: + tuple val(tumor), val(normal), + val(oct), path(vcf), path(vcfindex) + + output: + tuple val(tumor), val(normal), path("${tumor}.octopus.norm.vcf.gz"), + path("${tumor}.octopus.norm.vcf.gz.tbi") + + + script: + """ + zcat ${vcf} | sed 's/^##fileformat=VCFv4.3/##fileformat=VCFv4.2/' > ${tumor}_temp.octopus.norm.vcf + bgzip ${tumor}_temp.octopus.norm.vcf + mv ${tumor}_temp.octopus.norm.vcf.gz ${tumor}.octopus.norm.vcf.gz + bcftools index -t ${tumor}.octopus.norm.vcf.gz -f + """ + + stub: + """ + touch ${tumor}.octopus.norm.vcf.gz ${tumor}.octopus.norm.vcf.gz.tbi + """ +} + + + + +process octopus_tonly { + container "${params.containers.octopus}" + label 'process_somaticcaller_high' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), path(bed) + + output: + tuple val(tumorname), + path("${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz") + + script: + """ + octopus -R $GENOMEREF -C cancer -I ${tumor} \ + --annotations AF AC AD DP SB \ + -B 92Gb \ + -t ${bed} \ + --threads ${task.cpus}\ + $SOMATIC_FOREST \ + -o ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz + """ + + stub: + """ + touch ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz + """ +} + + + + +process octopus_convertvcf_tonly { + container "${params.containers.logan}" + label 'process_low' + + input: + tuple val(tumor), val(oct), path(vcf), path(vcfindex) + + output: + tuple val(tumor), path("${tumor}.octopus_tonly.norm.vcf.gz"), + path("${tumor}.octopus_tonly.norm.vcf.gz.tbi") + + + script: + """ + zcat ${vcf} | sed 's/^##fileformat=VCFv4.3/##fileformat=VCFv4.2/' > ${tumor}_temp.octopus_tonly.norm.vcf + bgzip ${tumor}_temp.octopus_tonly.norm.vcf + mv ${tumor}_temp.octopus_tonly.norm.vcf.gz ${tumor}.octopus_tonly.norm.vcf.gz + bcftools index -t ${tumor}.octopus_tonly.norm.vcf.gz -f + """ + + stub: + """ + touch ${tumor}.octopus_tonly.norm.vcf.gz ${tumor}.octopus_tonly.norm.vcf.gz.tbi + """ +} + diff --git a/modules/local/purple.nf b/modules/local/purple.nf new file mode 100644 index 0000000..2a58dda --- /dev/null +++ b/modules/local/purple.nf @@ -0,0 +1,303 @@ +//PURPLE +GENOMEREF=file(params.genomes[params.genome].genome) +HMFGENOMEREF = file(params.genomes[params.genome].HMFGENOME) +GENOMEVER = params.genomes[params.genome].GENOMEVER +GCPROFILE = file(params.genomes[params.genome].GCPROFILE) +GERMLINEHET = file(params.genomes[params.genome].GERMLINEHET) +DIPLODREG = file(params.genomes[params.genome].DIPLODREG) +ENSEMBLCACHE = params.genomes[params.genome].ENSEMBLCACHE +DRIVERS = file(params.genomes[params.genome].DRIVERS) +SOMATICHOTSPOTS = file(params.genomes[params.genome].SOMATICHOTSPOTS) +GERMLINEHOTSPOTS = file(params.genomes[params.genome].GERMLINEHOTSPOTS) +//if (params.genome.matches("hg38(.*)")| params.genome.matches("hg19(.*)")){ +//} + + + +process amber_tonly { + container = "${params.containers.logan}" + + label 'process_medium' + + input: + tuple val(tumorname), path(tumor), path(tumorbai) + + + output: + tuple val(tumorname), path("${tumorname}_amber") + + script: + + """ + + java -Xmx32G -cp /opt2/hmftools/amber.jar com.hartwig.hmftools.amber.AmberApplication \ + -tumor ${tumorname} -tumor_bam ${tumor} \ + -output_dir ${tumorname}_amber \ + -threads $task.cpus \ + -ref_genome_version $GENOMEVER \ + -loci $GERMLINEHET + + """ + + stub: + + """ + mkdir ${tumorname}_amber + touch ${tumorname}_amber/${tumorname}.amber.baf.tsv.gz ${tumorname}_amber/${tumorname}.amber.baf.pcf ${tumorname}_amber/${tumorname}.amber.qc + """ +} + +process amber_tn { + container = "${params.containers.logan}" + + label 'process_medium' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai) + + output: + tuple val("${tumorname}_vs_${normalname}"), + val(tumorname), val(normalname), path("${tumorname}_vs_${normalname}_amber") + + script: + + """ + + java -Xmx32G -cp /opt2/hmftools/amber.jar com.hartwig.hmftools.amber.AmberApplication \ + -tumor ${tumorname} -tumor_bam ${tumor} \ + -reference ${normalname} -reference_bam ${normal} \ + -output_dir ${tumorname}_vs_${normalname}_amber \ + -threads $task.cpus \ + -ref_genome_version $GENOMEVER \ + -loci $GERMLINEHET + + """ + + stub: + + """ + mkdir ${tumorname}_vs_${normalname}_amber + touch ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.baf.tsv.gz ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.baf.pcf ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.qc + """ +} + +process cobalt_tonly { + container = "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(tumorname), path(tumor), path(tumorbai) + + output: + tuple val(tumorname), path("${tumorname}_cobalt") + + script: + + """ + + java -jar -Xmx8G /opt2/hmftools/cobalt.jar \ + -tumor ${tumorname} -tumor_bam ${tumor} \ + -output_dir ${tumorname}_cobalt \ + -threads $task.cpus \ + -tumor_only_diploid_bed $DIPLODREG \ + -gc_profile $GCPROFILE + + """ + + stub: + + """ + mkdir ${tumorname}_cobalt + touch ${tumorname}_cobalt/${tumorname}.cobalt.ratio.tsv.gz ${tumorname}_cobalt/${tumorname}.cobalt.ratio.pcf ${tumorname}_cobalt/${tumorname}.cobalt.gc.median.tsv + """ +} + +process cobalt_tn { + container = "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai) + + output: + tuple val("${tumorname}_vs_${normalname}"), + val(tumorname), val(normalname), path("${tumorname}_vs_${normalname}_cobalt") + + script: + + """ + java -jar -Xmx8G /opt2/hmftools/cobalt.jar \ + -tumor ${tumorname} -tumor_bam ${tumor} \ + -reference ${normalname} -reference_bam ${normal} \ + -output_dir ${tumorname}_vs_${normalname}_cobalt \ + -threads $task.cpus \ + -gc_profile $GCPROFILE + + """ + + stub: + + """ + mkdir ${tumorname}_vs_${normalname}_cobalt + touch ${tumorname}_vs_${normalname}_cobalt/${tumorname}.cobalt.ratio.tsv.gz ${tumorname}_vs_${normalname}_cobalt/${tumorname}.cobalt.ratio.pcf ${tumorname}_vs_${normalname}_cobalt/${tumorname}.cobalt.gc.median.tsv + """ +} + + +process purple { + container = "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(id), val(tumorname), val(normalname), + path(amberin), path(cobaltin), + path(somaticvcf), path(somaticvcfindex) + + output: + tuple val(id), path("${id}") + + script: + + """ + java -jar /opt2/hmftools/purple.jar \ + -tumor ${tumorname} \ + -reference ${normalname} \ + -amber ${amberin} \ + -cobalt ${cobaltin} \ + -gc_profile $GCPROFILE \ + -ref_genome_version $GENOMEVER \ + -ref_genome $GENOMEREF \ + $ENSEMBLCACHE \ + -somatic_vcf ${somaticvcf} \ + -driver_gene_panel $DRIVERS \ + -somatic_hotspots $SOMATICHOTSPOTS \ + -threads $task.cpus \ + -output_dir ${id} + """ + + stub: + + """ + mkdir ${id} + touch ${id}/${id}.purple.cnv.somatic.tsv ${id}/${id}.purple.cnv.gene.tsv ${id}/${id}.driver.catalog.somatic.tsv + """ + +} + + +process purple_novc { + container = "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(id), val(tumorname), val(normalname), + path(amberin), path(cobaltin) + + output: + tuple val(id), val(tumorname), val(normalname), + path("${id}") + + script: + + """ + java -jar /opt2/hmftools/purple.jar \ + -tumor ${tumorname} \ + -reference ${normalname} \ + -amber ${amberin} \ + -cobalt ${cobaltin} \ + -gc_profile $GCPROFILE \ + -ref_genome_version $GENOMEVER \ + -ref_genome $HMFGENOMEREF \ + $ENSEMBLCACHE \ + -threads $task.cpus \ + -output_dir ${id} + + """ + + stub: + + """ + mkdir ${id} + touch ${id}/${id}.purple.cnv.somatic.tsv ${id}/${id}.purple.cnv.gene.tsv ${id}/${id}.driver.catalog.somatic.tsv + """ + +} + + +process purple_tonly { + container = "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(tumorname), + path(amberin), path(cobaltin), + path(somaticvcf), path(somaticvcfindex) + + output: + tuple val(tumorname), path("${tumorname}") + + script: + + """ + java -jar /opt2/hmftools/purple.jar \ + -tumor ${tumorname} \ + -amber ${amberin} \ + -cobalt ${cobaltin} \ + -gc_profile $GCPROFILE \ + -ref_genome_version $GENOMEVER \ + -ref_genome $GENOMEREF \ + $ENSEMBLCACHE \ + -somatic_vcf ${somaticvcf} \ + -driver_gene_panel $DRIVERS \ + -somatic_hotspots $HOTSPOTS \ + -threads $task.cpus \ + -output_dir ${tumorname} + """ + + stub: + + """ + mkdir ${tumorname} + touch ${tumorname}/${tumorname}.purple.cnv.somatic.tsv ${tumorname}/${tumorname}.purple.cnv.gene.tsv ${tumorname}/${tumorname}.driver.catalog.somatic.tsv + """ + +} + + +process purple_tonly_novc { + container = "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(tumorname), val(normalname), + path(cobaltin), path(amberin) + + output: + tuple val(tumorname), path("${tumorname}") + + script: + + """ + java -jar /opt2/hmftools/purple.jar \ + -tumor ${tumorname} \ + -amber ${amberin} \ + -cobalt ${cobaltin} \ + -gc_profile $GCPROFILE \ + -ref_genome_version $GENOMEVER \ + -ref_genome $GENOMEREF \ + $ENSEMBLCACHE \ + -threads $task.cpus \ + -output_dir ${tumorname} + """ + + stub: + + """ + mkdir ${tumorname} + touch ${tumorname}/${tumorname}.purple.cnv.somatic.tsv ${tumorname}/${tumorname}.purple.cnv.gene.tsv ${tumorname}/${tumorname}.driver.catalog.somatic.tsv + """ + +} + diff --git a/modules/local/qc.nf b/modules/local/qc.nf index 501fce6..dde0a42 100644 --- a/modules/local/qc.nf +++ b/modules/local/qc.nf @@ -1,5 +1,6 @@ ///References to assign GENOMEREF=file(params.genomes[params.genome].genome) + DBSNP=file(params.genomes[params.genome].dbsnp) //dbsnp_138.hg38.vcf.gz" FASTQ_SCREEN_CONF=file(params.fastq_screen_conf) BACDB=file(params.genomes[params.genome].KRAKENBACDB) @@ -45,6 +46,9 @@ process fc_lane { process fastq_screen { //Uses Trimmed Files + container = "${params.containers.loganqc}" + label 'process_medium' + input: tuple val(samplename), path("${samplename}.R1.trimmed.fastq.gz"), @@ -93,6 +97,8 @@ process kraken { @Output: Kraken logfile and interactive krona report */ + container = "${params.containers.loganqc}" + label 'process_high' input: tuple val(samplename), @@ -122,7 +128,7 @@ process kraken { --gzip-compressed \ --paired ${fqs[0]} ${fqs[1]} # Generate Krona Report - cut -f2,3 ${samplename}.trimmed.kraken_bacteria.taxa.txt} | \ + cut -f2,3 ${samplename}.trimmed.kraken_bacteria.taxa.txt | \ ktImportTaxonomy - -o ${samplename}.trimmed.kraken_bacteria.krona.html """ @@ -143,8 +149,11 @@ process fastqc { @Output: FastQC report and zip file containing sequencing quality information """ + container = "${params.containers.loganqc}" + label 'process_medium' + input: - tuple val(samplename), path("${samplename}.bqsr.bam"), path("${samplename}.bqsr.bai") + tuple val(samplename), path(bam), path(bai) output: tuple val(samplename), path("${samplename}_fastqc.html"), path("${samplename}_fastqc.zip") @@ -155,7 +164,7 @@ process fastqc { fastqc -t 8 \ -f bam \ -o fastqc \ - ${samplename}.bqsr.bam + $bam mv fastqc/${samplename}.bqsr_fastqc.html ${samplename}_fastqc.html mv fastqc/${samplename}.bqsr_fastqc.zip ${samplename}_fastqc.zip """ @@ -177,6 +186,8 @@ process qualimap_bamqc { @Output: Report containing post-aligment quality-control metrics */ + container = "${params.containers.loganqc}" + label 'process_medium' input: tuple val(samplename), path(bam), path(bai) @@ -218,6 +229,7 @@ process samtools_flagstats { @Output: Text file containing alignment statistics */ + container = "${params.containers.logan}" label 'process_medium' input: @@ -252,11 +264,14 @@ process mosdepth { `{prefix}.quantized.bed.gz` (if --quantize is specified) `{prefix}.thresholds.bed.gz` (if --thresholds is specified) */ + container = "${params.containers.loganqc}" + label 'process_medium' + input: tuple val(samplename), path(bam), path(bai) output: - path("${samplename}.mosdepth.region.dist.txt"), + tuple path("${samplename}.mosdepth.region.dist.txt"), path("${samplename}.mosdepth.summary.txt"), path("${samplename}.regions.bed.gz"), path("${samplename}.regions.bed.gz.csi") @@ -264,7 +279,7 @@ process mosdepth { script: """ - mosdepth -n --fast-mode --by 500 ${samplename} ${bam} -t $task.cpus + mosdepth -n --fast-mode --by 500 $samplename $bam -t $task.cpus """ stub: @@ -288,11 +303,12 @@ process vcftools { @Output: Text file containing a measure of heterozygosity */ + container = "${params.containers.logan}" label 'process_medium' - input: tuple path(germlinevcf),path(germlinetbi) + output: path("variants_raw_variants.het") @@ -319,6 +335,9 @@ process collectvariantcallmetrics { @Output: Text file containing a collection of metrics relating to snps and indels */ + container = "${params.containers.logan}" + label 'process_medium' + input: tuple path(germlinevcf),path(germlinetbi) @@ -357,7 +376,7 @@ process bcftools_stats { @Output: Text file containing a collection of summary statistics */ - + container = "${params.containers.logan}" label 'process_medium' input: @@ -390,6 +409,7 @@ process gatk_varianteval { @Output: Evaluation table containing a collection of summary statistics */ + container = "${params.containers.logan}" label 'process_medium' input: @@ -424,6 +444,7 @@ process snpeff { @Output: Evaluation table containing a collection of summary statistics */ + container = "${params.containers.logan}" label 'process_medium' input: @@ -466,10 +487,12 @@ process somalier_extract { rname = 'somalier_extract' container: config['images']['wes_base'] */ + container = "${params.containers.loganqc}" label 'process_low' input: - tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai") + tuple val(samplename), path(bam), path(bai) + output: path("output/${samplename}.somalier") @@ -480,7 +503,7 @@ process somalier_extract { -d output \ --sites $SITES_VCF \ -f $GENOMEREF \ - ${samplename}.bam + $bam """ stub: @@ -501,8 +524,9 @@ process somalier_analysis_human { Separate tab-separated value (TSV) files with relatedness and ancestry outputs */ + container = "${params.containers.loganqc}" label 'process_low' - + errorStrategy='ignore' input: path(somalierin) @@ -565,7 +589,10 @@ process somalier_analysis_mouse { Separate tab-separated value (TSV) files with relatedness and ancestry outputs */ + container = "${params.containers.loganqc}" label 'process_low' + errorStrategy='ignore' + input: path(somalierin) @@ -605,7 +632,6 @@ process somalier_analysis_mouse { } process multiqc { - """ Reporting step to aggregate sample summary statistics and quality-control information across all samples. This will be one of the last steps of the @@ -617,6 +643,8 @@ process multiqc { @Output: Interactive MulitQC report and a QC metadata table """ + container = "${params.containers.multiqc}" + label 'process_low' input: path(allqcin) diff --git a/modules/local/qualimap.nf b/modules/local/qualimap.nf new file mode 100644 index 0000000..8013f3a --- /dev/null +++ b/modules/local/qualimap.nf @@ -0,0 +1,47 @@ +if (params.genome.matches("hg38(.*)")| params.genome.matches("hg19(.*)")){ + SPECIES="HUMAN" +}else if (params.genome.matches("mm10")){ + SPECIES="MOUSE" +} +process qualimap_bamqc { + /* + Quality-control step to assess various post-alignment metrics + and a secondary method to calculate insert size. Please see + QualiMap's website for more information about BAM QC: + http://qualimap.conesalab.org/ + @Input: + Recalibrated BAM file (scatter) + @Output: + Report containing post-aligment quality-control metrics + */ + container = "${params.containers.loganqc}" + label 'process_high' + + input: + tuple val(samplename), path(bam), path(bai) + + output: + tuple path("${samplename}_genome_results.txt"), path("${samplename}_qualimapReport.html") + + script: + """ + unset DISPLAY + qualimap bamqc -bam ${bam} \ + --java-mem-size=70G \ + -c -ip \ + -outdir ${samplename} \ + -outformat HTML \ + -nt $task.cpus \ + --gd $SPECIES \ + --skip-duplicated \ + -nw 500 \ + -p NON-STRAND-SPECIFIC + mv ${samplename}/genome_results.txt ${samplename}_genome_results.txt + mv ${samplename}/qualimapReport.html ${samplename}_qualimapReport.html + """ + + stub: + """ + touch ${samplename}_genome_results.txt ${samplename}_qualimapReport.html + """ +} \ No newline at end of file diff --git a/modules/local/sage.nf b/modules/local/sage.nf new file mode 100644 index 0000000..38f1595 --- /dev/null +++ b/modules/local/sage.nf @@ -0,0 +1,78 @@ +//References +GENOMEREF=file(params.genomes[params.genome].genome) +GENOMEFAI=file(params.genomes[params.genome].genomefai) + +//HMFTOOLS +SOMATICHOTSPOTS=params.genomes[params.genome].SOMATICHOTSPOTS +PANELBED=params.genomes[params.genome].PANELBED +HCBED=params.genomes[params.genome].HCBED +ENSEMBLCACHE=params.genomes[params.genome].ENSEMBLCACHE +GENOMEVER=params.genomes[params.genome].GENOMEVER + + + +process sage_tn { + container "${params.containers.logan}" + label 'process_high' + + input: + tuple val(tumorname), path(tumorbam), path(tumorbai), + val(normalname), path(normalbam), path(normalbai) + + output: + tuple val(tumorname), val(normalname), + path("${tumorname}_vs_${normalname}.sage.vcf.gz"), + path("${tumorname}_vs_${normalname}.sage.vcf.gz.tbi") + + + script: + """ + java -Xms4G -Xmx32G -cp /opt2/hmftools/sage.jar \ + -tumor ${tumorname} -tumor_bam ${tumorbam} \ + -reference ${normalname} -reference_bam ${normalbam} \ + -threads $task.cpus \ + -ref_genome_version $GENOMEVER \ + -ref_genome $GENOMEREF \ + -hotspots $SOMATICHOTSPOTS \ + $PANELBED $HCBED $ENSEMBLCACHE \ + -output_vcf ${tumorname}_vs_${normalname}.sage.vcf.gz + """ + + stub: + """ + touch "${tumorname}_vs_${normalname}.sage.vcf.gz" "${tumorname}_vs_${normalname}.sage.vcf.gz.tbi" + """ +} + + + +process sage_tonly { + container "${params.containers.logan}" + label 'process_somaticcaller' + + input: + tuple val(tumorname), path(tumorbam), path(tumorbai) + + output: + tuple val(tumorname), + path("${tumorname}.tonly.sage.vcf.gz"), + path("${tumorname}.tonly.sage.vcf.gz.tbi") + + script: + """ + java -Xms4G -Xmx32G -cp /opt2/hmftools/sage.jar \ + -tumor ${tumorname} -tumor_bam ${tumorbam} \ + -threads $task.cpus \ + -ref_genome_version $GENOMEVER \ + -ref_genome $GENOMEREF \ + -hotspots $HOTSPOTS \ + $PANELBED $HCBED $ENSEMBLCACHE \ + -output_vcf ${tumorname}.tonly.sage.vcf.gz + """ + + stub: + """ + touch "${tumorname}.tonly.sage.vcf.gz" "${tumorname}.tonly.sage.vcf.gz.tbi" + """ + +} diff --git a/modules/local/samtools_flagstats.nf b/modules/local/samtools_flagstats.nf new file mode 100644 index 0000000..daf388f --- /dev/null +++ b/modules/local/samtools_flagstats.nf @@ -0,0 +1,31 @@ + +process samtools_flagstats { + /* + Quality-control step to assess alignment quality. Flagstat provides + counts for each of 13 categories based primarily on bit flags in the + FLAG field. Information on the meaning of the flags is given in the + SAM specification: https://samtools.github.io/hts-specs/SAMv1.pdf + @Input: + Recalibrated BAM file (scatter) + @Output: + Text file containing alignment statistics + */ + container = "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(samplename), path(bam), path(bai) + + output: + path("${samplename}.samtools_flagstat.txt") + + script: + """ + samtools flagstat ${bam} > ${samplename}.samtools_flagstat.txt + """ + + stub: + """ + touch ${samplename}.samtools_flagstat.txt + """ +} diff --git a/modules/local/sequenza.nf b/modules/local/sequenza.nf new file mode 100644 index 0000000..d276810 --- /dev/null +++ b/modules/local/sequenza.nf @@ -0,0 +1,181 @@ +GENOMEREF = file(params.genomes[params.genome].genome) + +//SEQUENZA +SEQUENZAGC = file(params.genomes[params.genome].SEQUENZAGC) +SEQUENZA_SCRIPT = params.script_sequenza + + +process seqz_sequenza_bychr { + container = "${params.containers.logan}" + label 'process_long' + + input: + tuple val(pairid), val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai), val(chr) + + output: + tuple val(pairid), path("${tumorname}_${normalname}_${chr}.seqz.gz") + + script: + """ + sequenza-utils bam2seqz \ + -gc ${SEQUENZAGC} \ + -F $GENOMEREF \ + -C ${chr} \ + -n ${normal} \ + -t ${tumor} | gzip > "${tumorname}_${normalname}_${chr}.seqz.gz" + + """ + + stub: + """ + touch "${tumorname}_${normalname}_${chr}.seqz.gz" + """ +} + +process pileup_sequenza { + container = "${params.containers.logan}" + label 'process_low' + + input: + tuple val(pairid), val(name), + path(bam), path(bai), path(bed) + + output: + tuple val(pairid), path("${name}_${bed}.mpileup.gz"), path("${name}_${bed}.mpileup.gz.tbi") + + script: + //Q20 is default in sequenza + """ + samtools mpileup -f $GENOMEREF -R ${bed} -Q 20 ${bam} |gzip > ${name}_${bed}.mpileup.gz + tabix -s1 -b2 -e2 ${name}_${bed}.mpileup.gz + """ + + stub: + """ + touch "${name}_${bed}.mpileup.gz" + touch "${name}_${bed}.mpileup.gz.tbi" + """ +} + +process seqz_sequenza_reg { + container = "${params.containers.logan}" + label 'process_low' + + input: + tuple val(pairid), val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai), path(bed) + + output: + tuple val(pairid), path("${tumorname}_${normalname}_${chr}.seqz.gz") + + script: + """ + sequenza-utils bam2seqz \ + -gc ${SEQUENZAGC} \ + -p \ + -F $GENOMEREF \ + -n ${normal} \ + -t ${tumor} | gzip > "${tumorname}_${normalname}_${bed}.seqz.gz" + + """ + + stub: + """ + touch "${tumorname}_${normalname}_${chr}.seqz.gz" + """ +} + +process seqz_sequenza { + container = "${params.containers.logan}" + label 'process_low' + + input: + tuple val(pairid), val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai), path(bed) + + output: + tuple val(pairid), path("${tumorname}_${normalname}_${chr}.seqz.gz") + + script: + """ + sequenza-utils bam2seqz \ + -gc ${SEQUENZAGC} \ + -p \ + -F $GENOMEREF \ + -n ${normal} \ + -t ${tumor} | gzip > "${tumorname}_${normalname}_${bed}.seqz.gz" + + """ + + stub: + """ + touch "${tumorname}_${normalname}_${chr}.seqz.gz" + """ +} + + + + +process sequenza { + container = "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(pairid), path(seqz) + + output: + tuple val(pairid), + path("${pairid}_alternative_solutions.txt"), + path("${pairid}_alternative_fit.pdf"), + path("${pairid}_model_fit.pdf"), + path("${pairid}_confints_CP.txt"), + path("${pairid}_CN_bars.pdf"), + path("${pairid}_genome_view.pdf"), + path("${pairid}_chromosome_view.pdf"), + path("${pairid}_mutations.txt"), + path("${pairid}_segments.txt"), + path("${pairid}_CP_contours.pdf"), + path("${pairid}_sequenza_cp_table.RData"), + path("${pairid}_chromosome_depths.pdf"), + path("${pairid}_gc_plots.pdf"), + path("${pairid}_sequenza_extract.RData") + + + shell: + ''' + + zcat !{seqz} | awk '{if (NR==1) {print $0} else {if ($1!="chromosome"){print $0}}}' |\ + sequenza-utils seqz_binning \ + -w 100 \ + -s - > !{pairid}.bin100.seqz + + Rscript !{SEQUENZA_SCRIPT} \ + !{pairid}.bin100.seqz \ + . \ + !{pairid} \ + !{task.cpus} + + ''' + + stub: + + """ + touch "${pairid}_alternative_solutions.txt" + touch "${pairid}_alternative_fit.pdf" + touch "${pairid}_model_fit.pdf" + touch "${pairid}_confints_CP.txt" + touch "${pairid}_CN_bars.pdf" + touch "${pairid}_genome_view.pdf" + touch "${pairid}_chromosome_view.pdf" + touch "${pairid}_mutations.txt" + touch "${pairid}_segments.txt" + touch "${pairid}_CP_contours.pdf" + touch "${pairid}_sequenza_cp_table.RData" + touch "${pairid}_chromosome_depths.pdf" + touch "${pairid}_gc_plots.pdf" + touch "${pairid}_sequenza_extract.RData" + + """ + +} diff --git a/modules/local/snpeff.nf b/modules/local/snpeff.nf new file mode 100644 index 0000000..e475377 --- /dev/null +++ b/modules/local/snpeff.nf @@ -0,0 +1,43 @@ +SNPEFF_GENOME = params.genomes[params.genome].snpeff_genome +SNPEFF_CONFIG = file(params.genomes[params.genome].snpeff_config) +SNPEFF_BUNDLE = file(params.genomes[params.genome].snpeff_bundle) + +process snpeff { + /* + Data processing and quality-control step to annotate variants, predict its + functional effects, and collect various summary statistics about variants and + their annotations. Please see SnpEff's documentation for more information: + https://pcingola.github.io/SnpEff/ + @Input: + Per sample gVCF file (scatter) + @Output: + Evaluation table containing a collection of summary statistics + */ + container = "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(samplename), path("${samplename}.gvcf.gz"), path("${samplename}.gvcf.gz.tbi") + output: + tuple path("${samplename}.germline.snpeff.ann.vcf"), + path("${samplename}.germline.snpeff.ann.csv"), + path("${samplename}.germline.snpeff.ann.html") + + script: + """ + java -Xmx12g -jar \$SNPEFF_JAR \ + -v -canon -c $SNPEFF_CONFIG \ + -csvstats ${samplename}.germline.snpeff.ann.csv \ + -stats ${samplename}.germline.snpeff.ann.html \ + $SNPEFF_GENOME \ + ${samplename}.gvcf.gz > ${samplename}.germline.snpeff.ann.vcf + """ + + stub: + + """ + touch ${samplename}.germline.snpeff.ann.vcf + touch ${samplename}.germline.snpeff.ann.csv + touch ${samplename}.germline.snpeff.ann.html + """ +} diff --git a/modules/local/somalier.nf b/modules/local/somalier.nf new file mode 100644 index 0000000..fdf9aa9 --- /dev/null +++ b/modules/local/somalier.nf @@ -0,0 +1,161 @@ +//SOMALIER +GENOMEREF=file(params.genomes[params.genome].genome) + +SITES_VCF= file(params.genomes[params.genome].sites_vcf) +ANCESTRY_DB=file(params.genomes[params.genome].somalier_ancestrydb) +SCRIPT_PATH_GENDER = file(params.script_genderPrediction) +SCRIPT_PATH_SAMPLES = file(params.script_combineSamples) +SCRIPT_PATH_PCA = file(params.script_ancestry) + + +process somalier_extract { + /* + To estimate ancestry, Somalier first extracts known sites from mapped reads + @Input: + Mapped and pre-processed BAM file + @Output: + Exracted sites in (binary) somalier format + */ + container = "${params.containers.loganqc}" + label 'process_low' + + input: + tuple val(samplename), path(bam), path(bai) + + output: + path("output/${samplename}.somalier") + + script: + """ + mkdir -p output + somalier extract \ + -d output \ + --sites $SITES_VCF \ + -f $GENOMEREF \ + $bam + """ + + stub: + """ + mkdir -p output + touch output/${samplename}.somalier + """ +} + +process somalier_analysis_human { + /* + To estimate relatedness, Somalier uses extracted site information to + compare across all samples. This step also runs the ancestry estimation + function in Somalier. + @Input: + Exracted sites in (binary) somalier format for ALL samples in the cohort + @Output: + Separate tab-separated value (TSV) files with relatedness and ancestry outputs + + */ + container = "${params.containers.loganqc}" + label 'process_low' + errorStrategy='ignore' + + input: + path(somalierin) + + output: + tuple path("relatedness.pairs.tsv"), path("relatedness.samples.tsv"), + path("ancestry.somalier-ancestry.tsv"), path("predicted.genders.tsv"), + path("predicted.pairs.tsv"), + path("sampleAncestryPCAPlot.html"), + path("predictedPairsAncestry.pdf") + + script: + """ + echo "Estimating relatedness" + somalier relate \ + -o "relatedness" \ + $somalierin + + echo "Estimating ancestry" + somalier ancestry \ + -o "ancestry" \ + --labels $ANCESTRY_DB/ancestry-labels-1kg.tsv \ + $ANCESTRY_DB/*.somalier ++ \ + $somalierin + + Rscript $SCRIPT_PATH_GENDER \ + relatedness.samples.tsv \ + predicted.genders.tsv + + Rscript $SCRIPT_PATH_SAMPLES \ + relatedness.pairs.tsv \ + predicted.pairs.tsv + + Rscript $SCRIPT_PATH_PCA \ + ancestry.somalier-ancestry.tsv \ + predicted.pairs.tsv \ + sampleAncestryPCAPlot.html \ + predictedPairsAncestry.pdf + """ + + stub: + + """ + touch relatedness.pairs.tsv + touch relatedness.samples.tsv + touch ancestry.somalier-ancestry.tsv predicted.genders.tsv + touch predicted.pairs.tsv sampleAncestryPCAPlot.html + touch predictedPairsAncestry.pdf + """ +} + +process somalier_analysis_mouse { + /* + To estimate relatedness, Somalier uses extracted site information to + compare across all samples. This step also runs the ancestry estimation + function in Somalier. + @Input: + Exracted sites in (binary) somalier format for ALL samples in the cohort + @Output: + Separate tab-separated value (TSV) files with relatedness and ancestry outputs + + */ + container = "${params.containers.loganqc}" + label 'process_low' + errorStrategy='ignore' + + + input: + path(somalierin) + + output: + tuple path("relatedness.pairs.tsv"), + path("relatedness.samples.tsv"), + path("predicted.genders.tsv"), + path("predicted.pairs.tsv") + + script: + """ + echo "Estimating relatedness" + somalier relate \ + -o "relatedness" \ + $somalierin + + Rscript $SCRIPT_PATH_GENDER \ + relatedness.samples.tsv \ + predicted.genders.tsv + + Rscript $SCRIPT_PATH_SAMPLES \ + relatedness.pairs.tsv \ + predicted.pairs.tsv + + """ + + stub: + + """ + touch relatedness.pairs.tsv + touch relatedness.samples.tsv + touch predicted.genders.tsv + touch predicted.pairs.tsv + + """ +} diff --git a/modules/local/splitbed.nf b/modules/local/splitbed.nf index c39ee31..66ea180 100644 --- a/modules/local/splitbed.nf +++ b/modules/local/splitbed.nf @@ -1,33 +1,54 @@ SPLIT_BED=file(params.splitbed) SPLIT_REGIONS=params.split_regions +GENOMEFAI = file(params.genomes[params.genome].genomefai) + // Split Bed Step to create the path process splitinterval { - //Keep Process Local - executor="local" - cpus= '2' - memory=2.GB + container "${params.containers.logan}" + label "process_single" input: - path(BED_IN) + path(BED_IN) output: - path('bedout/*.bed') + path('bedout/*.bed') script: - """ mkdir -p bedout python $SPLIT_BED -infile ${BED_IN} -num ${SPLIT_REGIONS} -out 'bedout/bed' """ } +process matchbed { + container "${params.containers.logan}" + label "process_single" + + input: + path(bed) + + output: + path('target.bed') + + script: + """ + awk -F '\\t' '{printf("%s\\t0\\t%s\\n",\$1,\$2);}' $GENOMEFAI >temp.bed + bedtools intersect -a ${bed} -b temp.bed > target.bed + """ + +} + + + + /* Code to convert beds to interval list +#Subset current bed +#hg38 awk -F '\t' '{printf("%s\t0\t%s\n",$1,$2);}' genome.fa.fai bedtools subtract -a GRCh38.primary_assembly.genome.bed -b ../hg38.blacklist.bed > GRCh38.primary_assembly.genome.interval.bed - gatk BedToIntervalList -I GRCh38.primary_assembly.genome.interval.bed -O \ GRCh38.primary_assembly.genome.interval_list -SD GRCh38.primary_assembly.genome.dict diff --git a/modules/local/strelka.nf b/modules/local/strelka.nf new file mode 100644 index 0000000..0b24fc5 --- /dev/null +++ b/modules/local/strelka.nf @@ -0,0 +1,140 @@ +//References +GENOMEREF=file(params.genomes[params.genome].genome) + +//HelperScripts +STRELKA_CONVERT=params.strelka_convert + + +process strelka_tn { + container "${params.containers.logan}" + label 'process_high' + input: + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai), path(bed) + + output: + tuple val(tumorname), val(normalname), + path("${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.snvs.vcf.gz"), + path("${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi"), + path("${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.indels.vcf.gz"), + path("${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.indels.vcf.gz.tbi") + + script: + + """ + mkdir -p wd + + bgzip ${bed} + tabix ${bed}.gz + + configureStrelkaSomaticWorkflow.py \ + --ref=$GENOMEREF \ + --tumor=${tumor} \ + --normal=${normal} \ + --runDir=wd \ + --callRegions ${bed}.gz + ./wd/runWorkflow.py -m local -j $task.cpus + mv wd/results/variants/somatic.snvs.vcf.gz ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic_temp.snvs.vcf.gz + mv wd/results/variants/somatic.indels.vcf.gz ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic_temp.indels.vcf.gz + + printf %s "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" >sampname + + bcftools reheader -s sampname ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic_temp.snvs.vcf.gz \ + | bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.snvs.vcf.gz + bcftools reheader -s sampname ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic_temp.indels.vcf.gz \ + | bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.indels.vcf.gz + + bcftools index -t ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.snvs.vcf.gz + bcftools index -t ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.indels.vcf.gz + + """ + + stub: + + """ + touch ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.snvs.vcf.gz ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi + touch ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.indels.vcf.gz ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.indels.vcf.gz.tbi + + """ + +} + +process convert_strelka { + //Add GT/AD column to Strelka Variants + container "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(tumor), val(normal), val(vc), + path(strelkavcf), path(strelkaindex) + + output: + tuple val(tumor), val(normal), val("strelka"), + path("${tumor}_vs_${normal}.filtered.strelka-fixed.vcf.gz"), + path("${tumor}_vs_${normal}.filtered.strelka-fixed.vcf.gz.tbi") + + + script: + + """ + python $STRELKA_CONVERT ${strelkavcf} ${tumor}_vs_${normal}.filtered.strelka-fixed.vcf.gz + bcftools index -t ${tumor}_vs_${normal}.filtered.strelka-fixed.vcf.gz + """ + + stub: + + """ + touch ${tumor}_vs_${normal}.filtered.strelka-fixed.vcf.gz ${tumor}_vs_${normal}.filtered.strelka-fixed.vcf.gz.tbi + """ + +} + +process combineVariants_strelka { + //Concat all somatic snvs/indels across all files, strelka separates snv/indels + container "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(sample), + path(strelkasnvs), path(snvindex), + path(strelkaindels), path(indelindex) + + output: + tuple val(sample), + path("${sample}.strelka.vcf.gz"), path("${sample}.strelka.vcf.gz.tbi"), + path("${sample}.filtered.strelka.vcf.gz"), path("${sample}.filtered.strelka.vcf.gz.tbi") + + + script: + + vcfin = strelkasnvs.join(" ") + indelsin = strelkaindels.join(" ") + samplist=sample.split('_vs_') + if(samplist.size()>1){ + samporder = samplist.join(",") + }else{ + samporder = sample + } + """ + bcftools concat $vcfin $indelsin --threads $task.cpus -Oz -o ${sample}.temp.strelka.vcf.gz -a + bcftools norm ${sample}.temp.strelka.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ + awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ + sed '/^\$/d' > ${sample}.temp1.strelka.vcf.gz + + bcftools sort ${sample}.temp1.strelka.vcf.gz |bcftools view - -s $samporder -Oz -o ${sample}.strelka.vcf.gz + + bcftools view ${sample}.strelka.vcf.gz --threads $task.cpus -f PASS -Oz -o ${sample}.filtered.strelka.vcf.gz + + bcftools index ${sample}.strelka.vcf.gz -t + bcftools index ${sample}.filtered.strelka.vcf.gz -t + """ + + stub: + + """ + touch ${sample}.strelka.vcf.gz ${sample}.strelka.vcf.gz.tbi + touch ${sample}.filtered.strelka.vcf.gz ${sample}.filtered.strelka.vcf.gz.tbi + + """ + +} diff --git a/modules/local/structural_variant.nf b/modules/local/structural_variant.nf deleted file mode 100644 index 3b1038e..0000000 --- a/modules/local/structural_variant.nf +++ /dev/null @@ -1,309 +0,0 @@ -GENOMEREF=file(params.genomes[params.genome].genome) -ANNOTSVGENOME=params.genomes[params.genome].annotsvgenome -BWAGENOME=file(params.genomes[params.genome].bwagenome) -INDELREF=file(params.genomes[params.genome].INDELREF) - - - -process svaba_somatic { - container = "${params.containers.logan}" - label 'process_high' - - input: - tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai) - - output: - tuple val(tumorname), - path("${tumor.simpleName}.bps.txt.gz"), - path("${tumor.simpleName}.contigs.bam"), - path("${tumor.simpleName}.discordant.txt.gz"), - path("${tumor.simpleName}.alignments.txt.gz"), - path("${tumor.simpleName}.svaba.germline.indel.vcf"), - path("${tumor.simpleName}.svaba.germline.sv.vcf"), - path("${tumor.simpleName}.svaba.somatic.indel.vcf"), - path("${tumor.simpleName}.svaba.somatic.sv.vcf"), - path("${tumor.simpleName}.svaba.unfiltered.germline.indel.vcf"), - path("${tumor.simpleName}.svaba.unfiltered.germline.sv.vcf"), - path("${tumor.simpleName}.svaba.unfiltered.somatic.indel.vcf"), - path("${tumor.simpleName}.svaba.unfiltered.somatic.sv.vcf"), - path("${tumor.simpleName}.log") - - - script: - """ - svaba run -t ${tumor} -n ${normal} -p $task.cpus -D $INDELREF -a ${tumor.simpleName} -G $BWAGENOME - """ - - stub: - - """ - touch "${tumor.simpleName}.bps.txt.gz" - touch "${tumor.simpleName}.contigs.bam" - touch "${tumor.simpleName}.discordant.txt.gz" - touch "${tumor.simpleName}.alignments.txt.gz" - touch "${tumor.simpleName}.svaba.germline.indel.vcf" - touch "${tumor.simpleName}.svaba.germline.sv.vcf" - touch "${tumor.simpleName}.svaba.somatic.indel.vcf" - touch "${tumor.simpleName}.svaba.somatic.sv.vcf" - touch "${tumor.simpleName}.svaba.unfiltered.germline.indel.vcf" - touch "${tumor.simpleName}.svaba.unfiltered.germline.sv.vcf" - touch "${tumor.simpleName}.svaba.unfiltered.somatic.indel.vcf" - touch "${tumor.simpleName}.svaba.unfiltered.somatic.sv.vcf" - touch "${tumor.simpleName}.log" - - """ -} - - - -process manta_somatic { - container = "${params.containers.logan}" - label 'process_high' - - input: - tuple val(tumorname), path(tumor), path(tumorbai),val(normalname), path(normal), path(normalbai) - - output: - tuple val(tumorname), - path("${tumor.simpleName}.diplodSV.vcf.gz"), - path("${tumor.simpleName}.somaticSV.vcf.gz"), - path("${tumor.simpleName}.candidateSV.vcf.gz"), - path("${tumor.simpleName}.candidateSmallIndels.vcf.gz") - - script: - """ - mkdir -p wd - - configManta.py \ - --normalBam=${normal} \ - --tumorBam=${tumor} \ - --referenceFasta=$GENOMEREF \ - --runDir=wd - - wd/runWorkflow.py -m local -j $task.cpus - - mv wd/results/variants/diploidSV.vcf.gz ${tumor.simpleName}.diplodSV.vcf.gz - mv wd/results/variants/somaticSV.vcf.gz ${tumor.simpleName}.somaticSV.vcf.gz - mv wd/results/variants/candidateSV.vcf.gz ${tumor.simpleName}.candidateSV.vcf.gz - mv wd/results/variants/candidateSmallIndels.vcf.gz ${tumor.simpleName}.candidateSmallIndels.vcf.gz - - """ - - stub: - - """ - touch ${tumor.simpleName}.diplodSV.vcf.gz - touch ${tumor.simpleName}.somaticSV.vcf.gz - touch ${tumor.simpleName}.candidateSV.vcf.gz - touch ${tumor.simpleName}.candidateSmallIndels.vcf.gz - """ -} - - -process annotsv_tn { - //AnnotSV for Manta/Svaba works with either vcf.gz or .vcf files - //Requires bedtools,bcftools - container = "${params.containers.annotcnvsv}" - - input: - tuple val(tumorname), path(somaticvcf), val(sv) - - output: - tuple val(tumorname), - path("${sv}/${tumorname}.tsv"), - path("${sv}/${tumorname}.unannotated.tsv") - - - script: - """ - mkdir ${sv} - - AnnotSV -SVinputFile ${somaticvcf} \ - -genomeBuild $ANNOTSVGENOME \ - -SVinputInfo 1 -outputFile ${tumorname} \ - -outputDir ${sv} - - """ - - stub: - """ - mkdir ${sv} - - touch "${sv}/${tumorname}.tsv" - touch "${sv}/${tumorname}.unannotated.tsv" - """ -} - - -process manta_tonly { - container = "${params.containers.logan}" - label 'process_high' - - input: - tuple val(tumorname), path(tumor), path(tumorbai) - - output: - tuple val(tumorname), - path("${tumor.simpleName}.candidateSV.vcf.gz"), - path("${tumor.simpleName}.candidateSmallIndels.vcf.gz"), - path("${tumor.simpleName}.tumorSV.vcf.gz") - - - script: - """ - mkdir -p wd - - configManta.py \ - --tumorBam=${tumor} \ - --referenceFasta=$GENOMEREF \ - --runDir=wd - - wd/runWorkflow.py -m local -j $task.cpus - - mv wd/results/variants/candidateSV.vcf.gz ${tumor.simpleName}.candidateSV.vcf.gz - mv wd/results/variants/candidateSmallIndels.vcf.gz ${tumor.simpleName}.candidateSmallIndels.vcf.gz - mv wd/results/variants/tumorSV.vcf.gz ${tumor.simpleName}.tumorSV.vcf.gz - - """ - - stub: - - """ - touch ${tumor.simpleName}.candidateSV.vcf.gz - touch ${tumor.simpleName}.candidateSmallIndels.vcf.gz - touch ${tumor.simpleName}.tumorSV.vcf.gz - - """ -} - - - -process svaba_tonly { - container = "${params.containers.logan}" - label 'process_high' - - input: - tuple val(tumorname), path(tumor), path(tumorbai) - - output: - tuple val(tumorname), - path("${tumor.simpleName}.bps.txt.gz"), - path("${tumor.simpleName}.contigs.bam"), - path("${tumor.simpleName}.discordant.txt.gz"), - path("${tumor.simpleName}.alignments.txt.gz"), - path("${tumor.simpleName}.svaba.indel.vcf"), - path("${tumor.simpleName}.svaba.sv.vcf"), - path("${tumor.simpleName}.svaba.unfiltered.indel.vcf"), - path("${tumor.simpleName}.svaba.unfiltered.sv.vcf"), - path("${tumor.simpleName}.log") - - - script: - """ - svaba run -t ${tumor} -p $task.cpus -D $INDELREF -a ${tumor.simpleName} -G $BWAGENOME - """ - - stub: - - """ - touch "${tumor.simpleName}.bps.txt.gz" - touch "${tumor.simpleName}.contigs.bam" - touch "${tumor.simpleName}.discordant.txt.gz" - touch "${tumor.simpleName}.alignments.txt.gz" - touch "${tumor.simpleName}.svaba.indel.vcf" - touch "${tumor.simpleName}.svaba.sv.vcf" - touch "${tumor.simpleName}.svaba.unfiltered.indel.vcf" - touch "${tumor.simpleName}.svaba.unfiltered.sv.vcf" - touch "${tumor.simpleName}.log" - - """ -} - - -process gunzip { - - input: - tuple val(tumorname), - path(vcf), val(sv) - - output: - tuple val(tumorname), - path("${tumorname}.tumorSV.vcf"), val(sv) - - script: - """ - gunzip -f ${vcf} > ${tumorname}.tumorSV.vcf - """ - - stub: - - """ - touch ${tumorname}.tumorSV.vcf - """ - -} - - -process survivor_sv { - container = "${params.containers.annotcnvsv}" - - input: - tuple val(tumorname), - path(vcfs),val(svs) - - output: - tuple val(tumorname), - path("${tumorname}_merged.vcf"), - val("survivor") - - - script: - strin = vcfs.join("\\n") - - """ - echo -e '$strin' > filelistin - SURVIVOR merge filelistin 1000 2 1 1 1 30 ${tumorname}_merged.vcf - """ - - stub: - strin = vcfs.join("\\n") - """ - echo -e '$strin' > filelistin - touch "${tumorname}_merged.vcf" - """ -} - - -process annotsv_tonly { - //AnnotSV for Manta/Svaba works with either vcf.gz or .vcf files - //Requires bedtools,bcftools - container = "${params.containers.annotcnvsv}" - - input: - tuple val(tumorname), path(somaticvcf), val(sv) - - output: - tuple val(tumorname), - path("${sv}/${tumorname}.tsv"), - path("${sv}/${tumorname}.unannotated.tsv") - - - script: - """ - mkdir ${sv} - - AnnotSV -SVinputFile ${somaticvcf} \ - -genomeBuild $ANNOTSVGENOME \ - -SVinputInfo 1 -outputFile ${tumorname} \ - -outputDir ${sv} - - """ - - stub: - """ - mkdir ${sv} - - touch "${sv}/${tumorname}.tsv" - touch "${sv}/${tumorname}.unannotated.tsv" - """ -} diff --git a/modules/local/svaba.nf b/modules/local/svaba.nf new file mode 100644 index 0000000..33d6fa3 --- /dev/null +++ b/modules/local/svaba.nf @@ -0,0 +1,95 @@ +BWAGENOME=file(params.genomes[params.genome].bwagenome) +INDELREF=file(params.genomes[params.genome].INDELREF) + + +process svaba_somatic { + container = "${params.containers.logan}" + label 'process_high' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai) + + output: + tuple val(tumorname), + path("${tumor.simpleName}.bps.txt.gz"), + path("${tumor.simpleName}.contigs.bam"), + path("${tumor.simpleName}.discordant.txt.gz"), + path("${tumor.simpleName}.alignments.txt.gz"), + path("${tumor.simpleName}.svaba.germline.indel.vcf"), + path("${tumor.simpleName}.svaba.germline.sv.vcf"), + path("${tumor.simpleName}.svaba.somatic.indel.vcf"), + path("${tumor.simpleName}.svaba.somatic.sv.vcf"), + path("${tumor.simpleName}.svaba.unfiltered.germline.indel.vcf"), + path("${tumor.simpleName}.svaba.unfiltered.germline.sv.vcf"), + path("${tumor.simpleName}.svaba.unfiltered.somatic.indel.vcf"), + path("${tumor.simpleName}.svaba.unfiltered.somatic.sv.vcf"), + path("${tumor.simpleName}.log") + + + script: + """ + svaba run -t ${tumor} -n ${normal} -p $task.cpus -D $INDELREF -a ${tumor.simpleName} -G $BWAGENOME + """ + + stub: + + """ + touch "${tumor.simpleName}.bps.txt.gz" + touch "${tumor.simpleName}.contigs.bam" + touch "${tumor.simpleName}.discordant.txt.gz" + touch "${tumor.simpleName}.alignments.txt.gz" + touch "${tumor.simpleName}.svaba.germline.indel.vcf" + touch "${tumor.simpleName}.svaba.germline.sv.vcf" + touch "${tumor.simpleName}.svaba.somatic.indel.vcf" + touch "${tumor.simpleName}.svaba.somatic.sv.vcf" + touch "${tumor.simpleName}.svaba.unfiltered.germline.indel.vcf" + touch "${tumor.simpleName}.svaba.unfiltered.germline.sv.vcf" + touch "${tumor.simpleName}.svaba.unfiltered.somatic.indel.vcf" + touch "${tumor.simpleName}.svaba.unfiltered.somatic.sv.vcf" + touch "${tumor.simpleName}.log" + + """ +} + + + +process svaba_tonly { + container = "${params.containers.logan}" + label 'process_high' + + input: + tuple val(tumorname), path(tumor), path(tumorbai) + + output: + tuple val(tumorname), + path("${tumor.simpleName}.bps.txt.gz"), + path("${tumor.simpleName}.contigs.bam"), + path("${tumor.simpleName}.discordant.txt.gz"), + path("${tumor.simpleName}.alignments.txt.gz"), + path("${tumor.simpleName}.svaba.indel.vcf"), + path("${tumor.simpleName}.svaba.sv.vcf"), + path("${tumor.simpleName}.svaba.unfiltered.indel.vcf"), + path("${tumor.simpleName}.svaba.unfiltered.sv.vcf"), + path("${tumor.simpleName}.log") + + + script: + """ + svaba run -t ${tumor} -p $task.cpus -D $INDELREF -a ${tumor.simpleName} -G $BWAGENOME + """ + + stub: + + """ + touch "${tumor.simpleName}.bps.txt.gz" + touch "${tumor.simpleName}.contigs.bam" + touch "${tumor.simpleName}.discordant.txt.gz" + touch "${tumor.simpleName}.alignments.txt.gz" + touch "${tumor.simpleName}.svaba.indel.vcf" + touch "${tumor.simpleName}.svaba.sv.vcf" + touch "${tumor.simpleName}.svaba.unfiltered.indel.vcf" + touch "${tumor.simpleName}.svaba.unfiltered.sv.vcf" + touch "${tumor.simpleName}.log" + + """ +} diff --git a/modules/local/trim_align.nf b/modules/local/trim_align.nf index f91168e..9d4c729 100644 --- a/modules/local/trim_align.nf +++ b/modules/local/trim_align.nf @@ -44,7 +44,6 @@ process bwamem2 { container = "${params.containers.logan}" tag { name } - input: tuple val(samplename), path("${samplename}.R1.trimmed.fastq.gz"), @@ -53,26 +52,25 @@ process bwamem2 { path("${samplename}.fastp.html") output: - tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai") + tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bam.bai") script: - """ + sub_cpus = "$task.cpus".toInteger()/2 - bwa-mem2 mem -M \ + """ + mkdir -p tmp + bwa-mem2 mem -M \ -R '@RG\\tID:${samplename}\\tSM:${samplename}\\tPL:illumina\\tLB:${samplename}\\tPU:${samplename}\\tCN:hgsc\\tDS:wgs' \ -t $task.cpus \ ${GENOMEREF} \ ${samplename}.R1.trimmed.fastq.gz ${samplename}.R2.trimmed.fastq.gz | \ samblaster -M | \ - samtools sort -@ $task.cpus -m 4G - -o ${samplename}.bam - - samtools index -@ $task.cpus ${samplename}.bam ${samplename}.bai - + samtools sort -T tmp/ -@ $sub_cpus -m 10G - --write-index -o ${samplename}.bam##idx##${samplename}.bam.bai """ stub: """ - touch ${samplename}.bam ${samplename}.bai + touch ${samplename}.bam ${samplename}.bam.bai """ } @@ -83,10 +81,10 @@ process indelrealign { label 'process_long' input: - tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai") + tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bam.bai") output: - tuple val(samplename), path("${samplename}.ir.bam"), path("${samplename}.ir.bai") + tuple val(samplename), path("${samplename}.ir.bam"), path("${samplename}.ir.bam.bai") script: @@ -108,7 +106,7 @@ process indelrealign { stub: """ - touch ${samplename}.ir.bam ${samplename}.ir.bai + touch ${samplename}.ir.bam ${samplename}.ir.bam.bai """ } @@ -118,17 +116,18 @@ process bqsr_ir { /* Base quality recalibration for all samples */ + errorStrategy 'ignore' container = "${params.containers.logan}" label 'process_low' input: - tuple val(samplename), path("${samplename}.ir.bam"), path("${samplename}.ir.bai"), path(bed) + tuple val(samplename), path("${samplename}.ir.bam"), path("${samplename}.ir.bam.bai"), path(bed) output: tuple val(samplename), path("${samplename}_${bed.simpleName}.recal_data.grp") script: """ - gatk --java-options '-Xmx16g' BaseRecalibrator \ + gatk --java-options '-Xmx10g' BaseRecalibrator \ --input ${samplename}.ir.bam \ --reference ${GENOMEREF} \ ${KNOWNRECAL} \ @@ -146,18 +145,19 @@ process bqsr { /* Base quality recalibration for all samples */ + errorStrategy 'ignore' container = "${params.containers.logan}" label 'process_low' input: - tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai"), path(bed) + tuple val(samplename), path(bam), path(bai), path(bed) output: - tuple val(samplename),path("${samplename}_${bed.simpleName}.recal_data.grp"), emit: bqsrby + tuple val(samplename), path("${samplename}_${bed.simpleName}.recal_data.grp"), optional: true script: """ - gatk --java-options '-Xmx16g' BaseRecalibrator \ - --input ${samplename}.bam \ + gatk --java-options '-Xmx10g' BaseRecalibrator \ + --input ${bam} \ --reference ${GENOMEREF} \ ${KNOWNRECAL} \ --output ${samplename}_${bed.simpleName}.recal_data.grp \ @@ -173,23 +173,24 @@ process bqsr { process gatherbqsr { container = "${params.containers.logan}" label 'process_low' + input: tuple val(samplename), path(recalgroups) + output: tuple val(samplename), path("${samplename}.recal_data.grp") - script: + script: strin = recalgroups.join(" --input ") """ - gatk --java-options '-Xmx32g' GatherBQSRReports \ + gatk --java-options '-Xmx10g' GatherBQSRReports \ --input ${strin} \ --output ${samplename}.recal_data.grp """ stub: - """ touch ${samplename}.recal_data.grp """ @@ -206,24 +207,25 @@ process applybqsr { tuple val(samplename), path(bam), path(bai), path("${samplename}.recal_data.grp") output: - tuple val(samplename), path("${samplename}.bqsr.bam"), path("${samplename}.bqsr.bai") + tuple val(samplename), path("${samplename}.bqsr.bam"), path("${samplename}.bqsr.bam.bai") script: """ - gatk --java-options '-Xmx32g' ApplyBQSR \ + gatk --java-options '-Xmx16g' ApplyBQSR \ --reference ${GENOMEREF} \ --input ${bam} \ --bqsr-recal-file ${samplename}.recal_data.grp \ --output ${samplename}.bqsr.bam \ --use-jdk-inflater \ --use-jdk-deflater + mv ${samplename}.bqsr.bai ${samplename}.bqsr.bam.bai """ stub: """ - touch ${samplename}.bqsr.bam ${samplename}.bqsr.bai + touch ${samplename}.bqsr.bam ${samplename}.bqsr.bam.bai """ } @@ -237,11 +239,11 @@ process samtoolsindex { tuple val(bamname), path(bam) output: - tuple val(bamname), path(bam), path("${bam}.bai") + tuple val(bamname), path(bam), path("${bam.simpleName}.bam.bai") script: """ - samtools index -@ $task.cpus ${bam} ${bam}.bai + samtools index -@ $task.cpus ${bam} ${bam.simpleName}.bam.bai """ stub: @@ -257,24 +259,45 @@ process bamtocram_tonly { label 'process_medium' input: - tuple val(tumorname), path(tumor), path(tumorbai) + tuple val(id), path(bam), path(bai) output: - path("${tumorname}.cram"), path("${tumorname}.cram.crai") + tuple val(id), path("${id}.cram"), path("${id}.cram.crai") script: """ - samtools view -@ $task.cpus -C -T $GENOMEREF -o ${sample}.cram $tumor - samtools index ${tumorname}.cram -@ $task.cpus + samtools view -@ $task.cpus -C -T $GENOMEREF -o ${id}.cram $tumor + samtools index ${id}.cram -@ $task.cpus """ stub: """ - touch ${tumorname}.cram ${tumorname}.cram.crai + touch ${id}.cram ${id}.cram.crai """ } +process samtools2fq { + container = "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(id), path(bam), path(bai) + + output: + tuple val(id), path("${id}.R1.fastq"), path("${id}.R2.fastq") + script: + """ + samtools fastq -@ $task.cpus \ + -1 ${id}.R1.fastq -2 ${id}.R2.fastq -0 /dev/null -s /dev/null \ + -n $bam + """ + + stub: + """ + touch ${id}.R1.fastq ${id}.R2.fastq + """ +} diff --git a/modules/local/vardict.nf b/modules/local/vardict.nf new file mode 100644 index 0000000..e085e56 --- /dev/null +++ b/modules/local/vardict.nf @@ -0,0 +1,98 @@ +//References +GENOMEREF=file(params.genomes[params.genome].genome) + + +process vardict_tn { + container "${params.containers.logan}" + label 'process_somaticcaller_high' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed) + + output: + tuple val(tumorname), val(normalname), + path("${tumorname}_vs_${normalname}_${bed.simpleName}.vardict.vcf.gz") + //bcbio notes of vardict filtering var2vcf_paired.pl -P 0.9 -m 4.25 -f 0.01 -M” and + //filtered with “((AF*DP < 6) && ((MQ < 55.0 && NM > 1.0) || (MQ < 60.0 && NM > 2.0) || (DP < 10) || (QUAL < 45)))” + script: + + """ + bedtools makewindows -b ${bed} -w 50150 -s 50000 > temp_${bed} + + VarDict -G $GENOMEREF \ + -f 0.01 \ + --nosv \ + -b "${tumor}|${normal}" --fisher \ + -t -Q 20 -c 1 -S 2 -E 3 \ + --th $task.cpus temp_${bed} \ + | var2vcf_paired.pl \ + -N "${tumor}|${normal}" \ + -Q 20 \ + -d 10 \ + -v 6 \ + -S \ + -f 0.05 > ${tumorname}_vs_${normalname}_${bed.simpleName}.vardict.vcf + + printf "${normal.Name}\t${normalname}\n${tumor.Name}\t${tumorname}\n" > sampname + + bcftools reheader -s sampname ${tumorname}_vs_${normalname}_${bed.simpleName}.vardict.vcf \ + | bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}.vardict.vcf.gz + + + """ + + stub: + + """ + touch ${tumorname}_vs_${normalname}_${bed.simpleName}.vardict.vcf.gz + """ +} + + + +process vardict_tonly { + container "${params.containers.logan}" + label 'process_somaticcaller_high' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), path(bed) + + output: + tuple val(tumorname), + path("${tumorname}_${bed.simpleName}.tonly.vardict.vcf.gz") + + script: + + """ + bedtools makewindows -b ${bed} -w 50150 -s 50000 > temp_${bed} + + VarDict -G $GENOMEREF \ + -f 0.01 \ + -x 500 \ + --nosv \ + -b ${tumor} --fisher \ + -t -Q 20 -c 1 -S 2 -E 3 --th ${task.cpus} \ + temp_${bed} | var2vcf_valid.pl \ + -N ${tumor} \ + -Q 20 \ + -d 10 \ + -v 6 \ + -S \ + -E \ + -f 0.05 > ${tumorname}_${bed.simpleName}_temp.tonly.vardict.vcf + + printf "${tumor.Name}\t${tumorname}\n" > sampname + + bcftools reheader -s sampname ${tumorname}_${bed.simpleName}_temp.tonly.vardict.vcf \ + | bcftools view -Oz -o ${tumorname}_${bed.simpleName}.tonly.vardict.vcf.gz + + """ + + stub: + + """ + touch ${tumorname}_${bed.simpleName}.tonly.vardict.vcf.gz + + """ + +} \ No newline at end of file diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf deleted file mode 100644 index c23215f..0000000 --- a/modules/local/variant_calling.nf +++ /dev/null @@ -1,1066 +0,0 @@ -GENOMEREF=file(params.genomes[params.genome].genome) -GENOMEFAI=file(params.genomes[params.genome].genomefai) -GENOMEDICT=file(params.genomes[params.genome].genomedict) -GERMLINE_RESOURCE=file(params.genomes[params.genome].germline_resource) -GNOMADGERMLINE=params.genomes[params.genome].gnomad -DBSNP=file(params.genomes[params.genome].dbsnp) -PON=file(params.genomes[params.genome].pon) -VEPCACHEDIR=file(params.genomes[params.genome].vepcache) -VEPSPECIES=params.genomes[params.genome].vepspecies -VEPBUILD=params.genomes[params.genome].vepbuild -LOFREQ_CONVERT=params.lofreq_convert -//Octopus -SOMATIC_FOREST=params.genomes[params.genome].octopus_sforest -GERMLINE_FOREST=params.genomes[params.genome].octopus_gforest -//HMFTOOLS -HOTSPOTS=params.genomes[params.genome].HOTSPOTS -PANELBED=params.genomes[params.genome].PANELBED -HCBED=params.genomes[params.genome].HCBED -ENSEMBLCACHE=params.genomes[params.genome].ENSEMBLCACHE -GENOMEVER=params.genomes[params.genome].GENOMEVER - -process mutect2 { - container "${params.containers.logan}" - label 'process_somaticcaller' - - input: - tuple val(tumorname), path(tumor), path(tumorbai), - val(normalname), path(normal), path(normalbai), - path(bed) - - output: - tuple val(tumorname), val(normalname), - path("${tumorname}_vs_${normalname}_${bed.simpleName}.mut2.vcf.gz"), - path("${tumorname}_vs_${normalname}_${bed.simpleName}.f1r2.tar.gz"), - path("${tumorname}_vs_${normalname}_${bed.simpleName}.mut2.vcf.gz.stats") - - - script: - """ - gatk Mutect2 \ - --reference $GENOMEREF \ - --intervals ${bed} \ - --input ${tumor} \ - --input ${normal} \ - --normal-sample ${normalname} \ - --tumor-sample ${tumorname} \ - $GNOMADGERMLINE \ - --panel-of-normals ${PON} \ - --output ${tumorname}_vs_${normalname}_${bed.simpleName}.mut2.vcf.gz \ - --f1r2-tar-gz ${tumorname}_vs_${normalname}_${bed.simpleName}.f1r2.tar.gz \ - --independent-mates - """ - - stub: - """ - touch ${tumorname}_vs_${normalname}_${bed.simpleName}.mut2.vcf.gz - touch ${tumorname}_vs_${normalname}_${bed.simpleName}.f1r2.tar.gz - touch ${tumorname}_vs_${normalname}_${bed.simpleName}.mut2.vcf.gz.stats - """ -} - -process pileup_paired_t { - container "${params.containers.logan}" - label 'process_highmem' - - input: - tuple val(tumorname), path(tumor), path(tumorbai), - val(normalname), path(normal), path(normalbai), path(bed) - - output: - tuple val(tumorname), val(normalname), - path("${tumorname}_${bed.simpleName}.tpileup.table") - - script: - """ - gatk --java-options -Xmx48g GetPileupSummaries \ - -I ${tumor} \ - -V $GERMLINE_RESOURCE \ - -L ${bed} \ - -O ${tumorname}_${bed.simpleName}.tpileup.table - - """ - - stub: - """ - touch ${tumorname}_${bed.simpleName}.tpileup.table - """ - -} - -process pileup_paired_n { - container "${params.containers.logan}" - label 'process_highmem' - - input: - tuple val(tumorname), path(tumor), path(tumorbai), - val(normalname), path(normal), path(normalbai), path(bed) - - output: - tuple val(tumorname), - val(normalname), - path("${normalname}_${bed.simpleName}.npileup.table") - - script: - """ - gatk --java-options -Xmx48g GetPileupSummaries \ - -I ${normal} \ - -V $GERMLINE_RESOURCE \ - -L ${bed} \ - -O ${normalname}_${bed.simpleName}.npileup.table - - """ - - stub: - """ - touch ${normalname}_${bed.simpleName}.npileup.table - """ -} - - -process contamination_paired { - container "${params.containers.logan}" - label 'process_highmem' - - input: - tuple val(tumorname), val(normalname), - path(tumor_pileups), - path(normal_pileups) - - output: - tuple val(tumorname), - path("${tumorname}_allpileups.table"), - path("${tumorname}_normal.allpileups.table"), - path("${tumorname}.contamination.table"), - path("${tumorname}_normal.contamination.table") - - script: - //Gather all the Pileup summaries first for Tumor and Also for NORMAL and then run! - alltumor = tumor_pileups.join(" -I ") - allnormal = normal_pileups.join(" -I ") - - - """ - gatk GatherPileupSummaries \ - --sequence-dictionary $GENOMEDICT \ - -I ${alltumor} -O ${tumorname}_allpileups.table - - gatk GatherPileupSummaries \ - --sequence-dictionary $GENOMEDICT \ - -I ${allnormal} -O ${tumorname}_normal.allpileups.table - - gatk CalculateContamination \ - -I ${tumorname}_allpileups.table \ - --matched-normal ${tumorname}_normal.allpileups.table \ - -O ${tumorname}.contamination.table - - gatk CalculateContamination \ - -I ${tumorname}_normal.allpileups.table \ - -O ${tumorname}_normal.contamination.table - - """ - - stub: - """ - touch ${tumorname}_allpileups.table - touch ${tumorname}_normal.allpileups.table - touch ${tumorname}.contamination.table - touch ${tumorname}_normal.contamination.table - """ - - -} - - -process learnreadorientationmodel { - container "${params.containers.logan}" - label 'process_highmem' - - input: - tuple val(sample), path(f1r2) - - output: - tuple val(sample), path("${sample}.read-orientation-model.tar.gz") - - script: - f1r2in = f1r2.join(" --input ") - - """ - gatk LearnReadOrientationModel \ - --output ${sample}.read-orientation-model.tar.gz \ - --input ${f1r2in} - """ - - stub: - """ - touch ${sample}.read-orientation-model.tar.gz - """ -} - - -process mergemut2stats { - container "${params.containers.logan}" - label 'process_low' - - input: - tuple val(sample), path(stats) - - output: - tuple val(sample), path("${sample}.final.stats") - - script: - statsin = stats.join(" --stats ") - - """ - gatk MergeMutectStats \ - --stats ${statsin} \ - -O ${sample}.final.stats - """ - - stub: - """ - touch ${sample}.final.stats - """ - -} - -process octopus_convertvcf { - container "${params.containers.logan}" - label 'process_low' - - input: - tuple val(tumor), val(normal), - val(oct), path(vcf), path(vcfindex) - - output: - tuple val(tumor), val(normal), path("${tumor}.octopus.norm.vcf.gz"), - path("${tumor}.octopus.norm.vcf.gz.tbi") - - - script: - """ - zcat ${vcf} | sed 's/^##fileformat=VCFv4.3/##fileformat=VCFv4.2/' > ${tumor}_temp.octopus.norm.vcf - bgzip ${tumor}_temp.octopus.norm.vcf - mv ${tumor}_temp.octopus.norm.vcf.gz ${tumor}.octopus.norm.vcf.gz - bcftools index -t ${tumor}.octopus.norm.vcf.gz -f - """ - - stub: - """ - touch ${tumor}.octopus.norm.vcf.gz ${tumor}.octopus.norm.vcf.gz.tbi - """ -} - -process mutect2filter { - container "${params.containers.logan}" - label 'process_medium' - - input: - tuple val(tumor), val(normal),path(mutvcfs), path(stats), path(obs), - path(pileups), path(normal_pileups), path(tumorcontamination), path(normalcontamination) - - output: - tuple val("${tumor}_vs_${normal}"), - path("${tumor}_vs_${normal}.mut2.marked.vcf.gz"), path("${tumor}_vs_${normal}.mut2.marked.vcf.gz.tbi"), - path("${tumor}_vs_${normal}.mut2.norm.vcf.gz"), path("${tumor}_vs_${normal}.mut2.norm.vcf.gz.tbi"), - path("${tumor}_vs_${normal}.mut2.marked.vcf.gz.filteringStats.tsv") - - script: - mut2in = mutvcfs.join(" -I ") - - """ - gatk SortVcf -I ${mut2in} -O ${tumor}_vs_${normal}.concat.vcf.gz --CREATE_INDEX - gatk FilterMutectCalls \ - -R $GENOMEREF \ - -V ${tumor}_vs_${normal}.concat.vcf.gz \ - --ob-priors ${obs} \ - --contamination-table ${tumorcontamination} \ - --stats ${stats} \ - -O ${tumor}_vs_${normal}.mut2.marked.vcf.gz - gatk SelectVariants \ - -R $GENOMEREF \ - --variant ${tumor}_vs_${normal}.mut2.marked.vcf.gz \ - --exclude-filtered \ - --output ${tumor}_vs_${normal}.mut2.final.vcf.gz - - bcftools sort ${tumor}_vs_${normal}.mut2.final.vcf.gz |\ - bcftools norm --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ - awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ - sed '/^\$/d' | bcftools view - -Oz -o ${tumor}_vs_${normal}.mut2.norm.vcf.gz - bcftools index -t ${tumor}_vs_${normal}.mut2.norm.vcf.gz - """ - - stub: - """ - touch ${tumor}_vs_${normal}.mut2.marked.vcf.gz ${tumor}_vs_${normal}.mut2.marked.vcf.gz.tbi - touch ${tumor}_vs_${normal}.mut2.norm.vcf.gz ${tumor}_vs_${normal}.mut2.norm.vcf.gz.tbi - touch ${tumor}_vs_${normal}.mut2.marked.vcf.gz.filteringStats.tsv - """ - - -} - - -process strelka_tn { - container "${params.containers.logan}" - label 'process_high' - input: - tuple val(tumorname), path(tumor), path(tumorbai), - val(normalname), path(normal), path(normalbai), path(bed) - - output: - tuple val(tumorname), val(normalname), - path("${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.snvs.vcf.gz"), - path("${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi"), - path("${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.indels.vcf.gz"), - path("${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.indels.vcf.gz.tbi") - - script: - - """ - mkdir -p wd - - bgzip ${bed} - tabix ${bed}.gz - - configureStrelkaSomaticWorkflow.py \ - --ref=$GENOMEREF \ - --tumor=${tumor} \ - --normal=${normal} \ - --runDir=wd \ - --callRegions ${bed}.gz - ./wd/runWorkflow.py -m local -j $task.cpus - mv wd/results/variants/somatic.snvs.vcf.gz ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic_temp.snvs.vcf.gz - mv wd/results/variants/somatic.indels.vcf.gz ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic_temp.indels.vcf.gz - - printf %s "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" >sampname - - bcftools reheader -s sampname ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic_temp.snvs.vcf.gz \ - | bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.snvs.vcf.gz - bcftools reheader -s sampname ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic_temp.indels.vcf.gz \ - | bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.indels.vcf.gz - - bcftools index -t ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.snvs.vcf.gz - bcftools index -t ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.indels.vcf.gz - - """ - - stub: - - """ - touch ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.snvs.vcf.gz ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi - touch ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.indels.vcf.gz ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.indels.vcf.gz.tbi - - """ - -} - - -process vardict_tn { - container "${params.containers.logan}" - label 'process_somaticcaller_high' - - input: - tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed) - - output: - tuple val(tumorname), val(normalname), - path("${tumorname}_vs_${normalname}_${bed.simpleName}.vardict.vcf.gz") - //bcbio notes of vardict filtering var2vcf_paired.pl -P 0.9 -m 4.25 -f 0.01 -M” and - //filtered with “((AF*DP < 6) && ((MQ < 55.0 && NM > 1.0) || (MQ < 60.0 && NM > 2.0) || (DP < 10) || (QUAL < 45)))” - script: - - """ - bedtools makewindows -b ${bed} -w 50150 -s 50000 > temp_${bed} - - VarDict -G $GENOMEREF \ - -f 0.01 \ - --nosv \ - -b "${tumor}|${normal}" --fisher \ - -t -Q 20 -c 1 -S 2 -E 3 \ - --th $task.cpus temp_${bed} \ - | var2vcf_paired.pl \ - -N "${tumor}|${normal}" \ - -Q 20 \ - -d 10 \ - -v 6 \ - -S \ - -f 0.05 > ${tumorname}_vs_${normalname}_${bed.simpleName}.vardict.vcf - - printf "${normal.Name}\t${normalname}\n${tumor.Name}\t${tumorname}\n" > sampname - - bcftools reheader -s sampname ${tumorname}_vs_${normalname}_${bed.simpleName}.vardict.vcf \ - | bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}.vardict.vcf.gz - - - """ - - stub: - - """ - touch ${tumorname}_vs_${normalname}_${bed.simpleName}.vardict.vcf.gz - - """ - - -} - - -process varscan_tn { - container "${params.containers.logan}" - label 'process_somaticcaller' - - input: - tuple val(tumorname), path(tumor), path(tumorbai), - val(normalname), path(normal), path(normalbai), path(bed), - path(tumorpileup), path(normalpileup), - path(tumor_con_table), path(normal_con_table) - - output: - tuple val(tumorname), val(normalname), - path("${tumorname}_vs_${normalname}_${bed.simpleName}.varscan.vcf.gz") - - shell: - ''' - tumor_purity=$( echo "1-$(printf '%.6f' $(tail -n -1 !{tumor_con_table} | cut -f2 ))" | bc -l) - normal_purity=$( echo "1-$(printf '%.6f' $(tail -n -1 !{normal_con_table} | cut -f2 ))" | bc -l) - dual_pileup="samtools mpileup -d 10000 -q 15 -Q 15 -f !{GENOMEREF} -l !{bed} !{normal} !{tumor}" - varscan_opts="--strand-filter 1 --min-var-freq 0.01 --min-avg-qual 30 --somatic-p-value 0.05 --output-vcf 1 --normal-purity $normal_purity --tumor-purity $tumor_purity" - varscan_cmd="varscan somatic <($dual_pileup) !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.vcf $varscan_opts --mpileup 1" - eval "$varscan_cmd" - - awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.vcf.indel \ - | sed '/^$/d' | bcftools view - -Oz -o !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.indel_temp.vcf.gz - awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.vcf.snp \ - | sed '/^$/d' | bcftools view - -Oz -o !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.snp_temp.vcf.gz - - gatk SortVcf -I !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.snp_temp.vcf.gz \ - -I !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.indel_temp.vcf.gz \ - -R !{GENOMEREF} -SD !{GENOMEDICT} \ - -O !{tumorname}_vs_!{normalname}_!{bed.simpleName}_temp.varscan.vcf - - printf "NORMAL\t!{normalname}\nTUMOR\t!{tumorname}\n" > sampname - - bcftools reheader -s sampname !{tumorname}_vs_!{normalname}_!{bed.simpleName}_temp.varscan.vcf \ - | bcftools view -Oz -o !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.vcf.gz - - ''' - - stub: - """ - touch ${tumorname}_vs_${normalname}_${bed.simpleName}.varscan.vcf.gz - """ - -} - - -process octopus_tn { - container "${params.containers.octopus}" - label 'process_somaticcaller_high' - - input: - tuple val(tumorname), path(tumor), path(tumorbai), - val(normalname), path(normal), path(normalbai), path(bed) - - output: - tuple val("${tumorname}_vs_${normalname}"), - path("${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz") - - script: - """ - octopus -R $GENOMEREF -I ${normal} ${tumor} --normal-sample ${normalname} \ - -C cancer \ - --annotations AF AC AD DP SB -t ${bed} \ - --threads $task.cpus \ - $GERMLINE_FOREST \ - $SOMATIC_FOREST \ - -B 92Gb \ - -o ${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz - """ - - stub: - """ - touch "${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz" - """ - -} - - -process sage_tn { - container "${params.containers.logan}" - label 'process_high' - - input: - tuple val(tumorname), path(tumorbam), path(tumorbai), - val(normalname), path(normalbam), path(normalbai) - - output: - tuple val(tumorname), val(normalname), - path("${tumorname}_vs_${normalname}.sage.vcf.gz"), - path("${tumorname}_vs_${normalname}.sage.vcf.gz.tbi") - -script: - """ - java -Xms4G -Xmx32G -cp /opt2/hmftools/sage.jar com.hartwig.hmftools.sage.SageApplication \ - -tumor ${tumorname} -tumor_bam ${tumorbam} \ - -reference ${normalname} -reference_bam ${normalbam} \ - -threads $task.cpus \ - -ref_genome_version $GENOMEVER \ - -ref_genome $GENOMEREF \ - -hotspots $HOTSPOTS \ - $PANELBED $HCBED $ENSEMBLCACHE \ - -output_vcf ${tumorname}_vs_${normalname}.sage.vcf.gz - """ - - stub: - """ - touch "${tumorname}_vs_${normalname}.sage.vcf.gz" "${tumorname}_vs_${normalname}.sage.vcf.gz.tbi" - """ -} - - -process lofreq_tn { - container "${params.containers.lofreq}" - label 'process_somaticcaller' - - input: - tuple val(tumorname), path(tumor), path(tumorbai), - val(normalname), path(normal), path(normalbai), path(bed) - - - output: - tuple val(tumorname), val(normalname), - path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.snvs.vcf.gz"), - path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz"), - path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.indels.vcf.gz"), - path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz"), - path("${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz"), - path("${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz.tbi") - - script: - - """ - lofreq somatic -f $GENOMEREF -n ${normal} -t ${tumor} \ - -d $DBSNP \ - --threads $task.cpus \ - -l ${bed} \ - --call-indels \ - -o ${tumorname}_vs_${normalname}_${bed.simpleName}_ - - bcftools concat ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz \ - ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz --threads $task.cpus -Oz -o \ - ${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz - - $LOFREQ_CONVERT -i ${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz -g 1/0 \ - -n ${tumorname} -o ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz - - bcftools view -h ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz >temphead - - sed 's/^##FORMAT=/##FORMAT=/' temphead > temphead1 - bcftools reheader ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz -h temphead1 |\ - bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz - - bcftools index -t ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz - - """ - - stub: - - """ - touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.snvs.vcf.gz" - touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz" - touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.indels.vcf.gz" - touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz" - touch "${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz" "${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz.tbi" - - """ -} - - - -process muse_tn { - container "${params.containers.logan}" - label 'process_somaticcaller' - input: - tuple val(tumorname), path(tumor), path(tumorbai), - val(normalname), path(normal), path(normalbai) - - - output: - tuple val(tumorname), val(normalname), - path("${tumorname}_vs_${normalname}.vcf.gz") - - script: - - """ - MuSE call -f $GENOMEREF -O ${tumorname}_vs_${normalname} -n $task.cpus $tumor $normal - MuSE sump -I ${tumorname}_vs_${normalname}.MuSE.txt \ - -O ${tumorname}_vs_${normalname}.vcf -n $task.cpus -D $DBSNP -G - - bcftools view ${tumorname}_vs_${normalname}.vcf -Oz -o ${tumorname}_vs_${normalname}_temp.vcf.gz - - printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" > sampname - - bcftools reheader -s sampname ${tumorname}_vs_${normalname}_temp.vcf.gz \ - | bcftools view -Oz -o ${tumorname}_vs_${normalname}.vcf.gz - - """ - - stub: - - """ - touch "${tumorname}_vs_${normalname}.vcf.gz" - """ - -} - - -process combineVariants { - container "${params.containers.logan}" - label 'process_highmem' - - input: - tuple val(sample), path(inputvcf), val(vc) - - output: - tuple val(sample), - path("${vc}/${sample}.${vc}.marked.vcf.gz"), - path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"), - path("${vc}/${sample}.${vc}.norm.vcf.gz"), - path("${vc}/${sample}.${vc}.norm.vcf.gz.tbi") - - script: - vcfin = inputvcf.join(" -I ") - //Create Tumor Normal here - samplist=sample.split('_vs_') - if(samplist.size()>1){ - samporder = samplist.join(",") - }else{ - samporder = sample - } - - """ - mkdir ${vc} - gatk --java-options "-Xmx48g" SortVcf \ - -O ${sample}.${vc}.markedtemp.vcf.gz \ - -SD $GENOMEDICT \ - -I $vcfin - - bcftools view ${sample}.${vc}.markedtemp.vcf.gz -s $samporder -Oz -o ${sample}.${vc}.marked.vcf.gz - bcftools index -t ${sample}.${vc}.marked.vcf.gz - - bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ - awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ - sed '/^\$/d' > ${sample}.${vc}.temp.vcf - - bcftools view ${sample}.${vc}.temp.vcf -f PASS -s $samporder -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz - bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t - - mv ${sample}.${vc}.marked.vcf.gz ${vc} - mv ${sample}.${vc}.marked.vcf.gz.tbi ${vc} - - """ - - stub: - - """ - mkdir ${vc} - touch ${vc}/${sample}.${vc}.marked.vcf.gz - touch ${vc}/${sample}.${vc}.norm.vcf.gz - touch ${vc}/${sample}.${vc}.marked.vcf.gz.tbi - touch ${vc}/${sample}.${vc}.norm.vcf.gz.tbi - """ - -} - - - - -process combineVariants_alternative { - container "${params.containers.logan}" - label 'process_highmem' - - input: - tuple val(sample), path(vcfs), path(vcfsindex), val(vc) - - output: - tuple val(sample), - path("${vc}/${sample}.${vc}.marked.vcf.gz"), - path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"), - path("${vc}/${sample}.${vc}.norm.vcf.gz"), - path("${vc}/${sample}.${vc}.norm.vcf.gz.tbi") - - script: - vcfin = vcfs.join(" ") - samplist=sample.split('_vs_') - if (vc.contains("lofreq")) { - samporder = samplist[0] - }else if(samplist.size()>1){ - samporder = samplist.join(",") - }else{ - samporder = sample - } - - if (vc.contains("octopus")) { - """ - mkdir ${vc} - bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp1.vcf.gz - bcftools reheader -f $GENOMEFAI ${sample}.${vc}.temp1.vcf.gz -o ${sample}.${vc}.temp.vcf - bcftools sort ${sample}.${vc}.temp.vcf | bcftools view - -i "INFO/SOMATIC==1" -s $samporder -Oz -o ${sample}.${vc}.marked.vcf.gz - bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ - awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ - sed '/^\$/d' > ${sample}.${vc}.temp.vcf - - bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz - mv ${sample}.${vc}.marked.vcf.gz ${vc} - - bcftools index ${vc}/${sample}.${vc}.marked.vcf.gz -t - bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t - """ - - }else{ - """ - mkdir ${vc} - bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp1.vcf.gz - bcftools reheader -f $GENOMEFAI ${sample}.${vc}.temp1.vcf.gz -o ${sample}.${vc}.temp.vcf - bcftools sort ${sample}.${vc}.temp.vcf | bcftools view - -s $samporder -Oz -o ${sample}.${vc}.marked.vcf.gz - bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ - awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ - sed '/^\$/d' > ${sample}.${vc}.temp.vcf - - bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz - mv ${sample}.${vc}.marked.vcf.gz ${vc} - - bcftools index ${vc}/${sample}.${vc}.marked.vcf.gz -t - bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t - """ - } - - stub: - - """ - mkdir ${vc} - touch ${vc}/${sample}.${vc}.marked.vcf.gz - touch ${vc}/${sample}.${vc}.norm.vcf.gz - touch ${vc}/${sample}.${vc}.marked.vcf.gz.tbi - touch ${vc}/${sample}.${vc}.norm.vcf.gz.tbi - - """ - -} - - -process bcftools_index_octopus { - container "${params.containers.logan}" - label 'process_low' - - input: - tuple val(tumor), - path(vcf) - - output: - tuple val(tumor), - path(vcf), - path("${vcf}.tbi") - - script: - """ - bcftools index -t ${vcf} - """ - - stub: - """ - touch ${vcf} - touch ${vcf}.tbi - """ - -} - - -process combineVariants_strelka { - //Concat all somatic snvs/indels across all files, strelka separates snv/indels - container "${params.containers.logan}" - label 'process_medium' - - input: - tuple val(sample), - path(strelkasnvs), path(snvindex), - path(strelkaindels), path(indelindex) - - output: - tuple val(sample), - path("${sample}.strelka.vcf.gz"), path("${sample}.strelka.vcf.gz.tbi"), - path("${sample}.filtered.strelka.vcf.gz"), path("${sample}.filtered.strelka.vcf.gz.tbi") - - - script: - - vcfin = strelkasnvs.join(" ") - indelsin = strelkaindels.join(" ") - samplist=sample.split('_vs_') - if(samplist.size()>1){ - samporder = samplist.join(",") - }else{ - samporder = sample - } - """ - bcftools concat $vcfin $indelsin --threads $task.cpus -Oz -o ${sample}.temp.strelka.vcf.gz -a - bcftools norm ${sample}.temp.strelka.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ - awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ - sed '/^\$/d' > ${sample}.temp1.strelka.vcf.gz - - bcftools sort ${sample}.temp1.strelka.vcf.gz |bcftools view - -s $samporder -Oz -o ${sample}.strelka.vcf.gz - - bcftools view ${sample}.strelka.vcf.gz --threads $task.cpus -f PASS -Oz -o ${sample}.filtered.strelka.vcf.gz - - bcftools index ${sample}.strelka.vcf.gz -t - bcftools index ${sample}.filtered.strelka.vcf.gz -t - """ - - stub: - - """ - touch ${sample}.strelka.vcf.gz ${sample}.strelka.vcf.gz.tbi - touch ${sample}.filtered.strelka.vcf.gz ${sample}.filtered.strelka.vcf.gz.tbi - - """ - -} - - -process convert_strelka { - //Add GT/AD column to Strelka Variants - container "${params.containers.logan}" - label 'process_medium' - - input: - tuple val(tumor), val(normal), val(vc), - path(strelkavcf), path(strelkaindex) - - output: - tuple val(tumor), val(normal), val("strelka"), - path("${tumor}_vs_${normal}.filtered.strelka-fixed.vcf.gz"), - path("${tumor}_vs_${normal}.filtered.strelka-fixed.vcf.gz.tbi") - - - script: - - """ - python /data/nousomedr/wgs/LOGAN/bin/strelka_convert.py ${strelkavcf} ${tumor}_vs_${normal}.filtered.strelka-fixed.vcf.gz - bcftools index -t ${tumor}_vs_${normal}.filtered.strelka-fixed.vcf.gz - """ - - stub: - - """ - touch ${tumor}_vs_${normal}.filtered.strelka-fixed.vcf.gz ${tumor}_vs_${normal}.filtered.strelka-fixed.vcf.gz.tbi - """ - -} - -process somaticcombine { - container "${params.containers.logan}" - label 'process_medium' - - input: - tuple val(tumorsample), val(normal), - val(caller), - path(vcfs), path(vcfindex) - - output: - tuple val(tumorsample), val(normal), - path("${tumorsample}_vs_${normal}_combined.vcf.gz"), - path("${tumorsample}_vs_${normal}_combined.vcf.gz.tbi") - - script: - vcfin1=[caller, vcfs].transpose().collect { a, b -> a + " " + b } - vcfin2="-V:" + vcfin1.join(" -V:") - - callerin=caller.join(",") - """ - /usr/lib/jvm/java-8-openjdk-amd64/bin/java -jar \$GATK_JAR -T CombineVariants \ - -R $GENOMEREF \ - --genotypemergeoption PRIORITIZE \ - --rod_priority_list $callerin \ - --filteredrecordsmergetype KEEP_IF_ANY_UNFILTERED \ - -o ${tumorsample}_vs_${normal}_combined.vcf.gz \ - $vcfin2 - - """ - - stub: - vcfin1=[caller, vcfs].transpose().collect { a, b -> a + " " + b } - vcfin2="-V:" + vcfin1.join(" -V:") - - callerin=caller.join(",") - - """ - touch ${tumorsample}_vs_${normal}_combined.vcf.gz - touch ${tumorsample}_vs_${normal}_combined.vcf.gz.tbi - """ - -} - - -process ffpe_1 { - - container "${params.containers.logan}" - label 'process_medium' - - input: - tuple val(tumorsample), val(normal), - path("${tumorsample}_vs_${normal}_combined.vcf.gz"), - path("${tumorsample}_vs_${normal}_combined.vcf.gz.tbi"), - bam, bamindex - tuple val(tumorsample), val(normal), - val(caller), - path(vcfs), path(vcfindex) - - output: - tuple val(tumorsample), val(normal), - path("${tumorsample}_vs_${normal}_combined.vcf.gz"), - path("${tumorsample}_vs_${normal}_combined.vcf.gz.tbi") - - script: - - - stub: - """ - touch "${tumorsample}_vs_${normal}_combined_ffpolish.vcf.gz" - """ -} - - - -/*DISCVR -process somaticcombine { - container "${params.containers.logan}" - label 'process_medium' - - input: - tuple val(tumorsample), val(normal), - val(callers), - path(vcfs), path(vcfindex) - - output: - tuple val(tumorsample), val(normal), - path("${tumorsample}_vs_${normal}_combined.vcf.gz"), - path("${tumorsample}_vs_${normal}_combined.vcf.gz.tbi") - - script: - vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b } - vcfin2="-V:" + vcfin1.join(" -V:") - - """ - java -jar \$DISCVRSeq_JAR MergeVcfsAndGenotypes \ - -R $GENOMEREF \ - --genotypeMergeOption PRIORITIZE \ - --priority_list mutect2,strelka,octopus,muse,lofreq,vardict,varscan \ - --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED \ - -O ${tumorsample}_vs_${normal}_combined.vcf.gz \ - $vcfin2 - """ - - stub: - vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b } - vcfin2="-V:" + vcfin1.join(" -V:") - - """ - touch ${tumorsample}_vs_${normal}_combined.vcf.gz - touch ${tumorsample}_vs_${normal}_combined.vcf.gz.tbi - """ - -} -*/ - -process annotvep_tn { - label 'process_medium' - container "${params.containers.vcf2maf}" - - input: - tuple val(tumorsample), val(normalsample), - val(vc), path(tumorvcf), path(vcfindex) - - output: - path("paired/${vc}/${tumorsample}_vs_${normalsample}.maf") - - shell: - - ''' - VCF_SAMPLE_IDS=($(bcftools query -l !{tumorvcf})) - TID_IDX=0 - NID_IDX="" - VCF_NID="" - NORM_VCF_ID_ARG="" - NSAMPLES=${#VCF_SAMPLE_IDS[@]} - if [ $NSAMPLES -gt 1 ]; then - # Assign tumor, normal IDs - # Look through column names and - # see if they match provided IDs - for (( i = 0; i < $NSAMPLES; i++ )); do - echo "${VCF_SAMPLE_IDS[$i]}" - if [ "${VCF_SAMPLE_IDS[$i]}" == !{tumorsample} ]; then - TID_IDX=$i - fi - - if [ "${VCF_SAMPLE_IDS[$i]}" == !{normalsample} ]; then - NID_IDX=$i - fi - done - - if [ ! -z $NID_IDX ]; then - VCF_NID=${VCF_SAMPLE_IDS[$NID_IDX]} - NORM_VCF_ID_ARG="--vcf-normal-id $VCF_NID" - fi - fi - VCF_TID=${VCF_SAMPLE_IDS[$TID_IDX]} - - zcat !{tumorvcf} > !{tumorvcf.baseName} - - mkdir -p paired/!{vc} - - vcf2maf.pl \ - --vep-forks !{task.cpus} --input-vcf !{tumorvcf.baseName} \ - --output-maf paired/!{vc}/!{tumorsample}_vs_!{normalsample}.maf \ - --tumor-id !{tumorsample} \ - --normal-id !{normalsample} \ - --vep-path /opt/vep/src/ensembl-vep \ - --vep-data !{VEPCACHEDIR} \ - --ncbi-build !{VEPBUILD} --species !{VEPSPECIES} --ref-fasta !{GENOMEREF} \ - --retain-info "set" \ - --vep-overwrite - - ''' - - stub: - """ - mkdir -p paired/${vc} - touch paired/${vc}/${tumorsample}_vs_${normalsample}.maf - """ -} - - -process combinemafs_tn { - container "${params.containers.logan}" - label 'process_low' - - input: - path(allmafs) - - output: - path("final_tn.maf") - - shell: - mafin= allmafs.join(" ") - - """ - echo "Combining MAFs..." - head -2 ${allmafs[0]} > final_tn.maf - awk 'FNR>2 {{print}}' ${mafin} >> final_tn.maf - """ - - stub: - """ - touch final_tn.maf - """ -} diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf deleted file mode 100644 index 7c30eb8..0000000 --- a/modules/local/variant_calling_tonly.nf +++ /dev/null @@ -1,586 +0,0 @@ -GENOMEREF=file(params.genomes[params.genome].genome) -GENOMEFAI=file(params.genomes[params.genome].genomefai) -GENOMEDICT=file(params.genomes[params.genome].genomedict) -GERMLINE_RESOURCE=file(params.genomes[params.genome].germline_resource) -GNOMADGERMLINE=params.genomes[params.genome].gnomad -DBSNP=file(params.genomes[params.genome].dbsnp) -PON=file(params.genomes[params.genome].pon) -VEPCACHEDIR=file(params.genomes[params.genome].vepcache) -VEPSPECIES=params.genomes[params.genome].vepspecies -VEPBUILD=params.genomes[params.genome].vepbuild -//Octopus -SOMATIC_FOREST=params.genomes[params.genome].octopus_sforest -GERMLINE_FOREST=params.genomes[params.genome].octopus_gforest -//HMFTOOLS -HOTSPOTS=params.genomes[params.genome].HOTSPOTS -PANELBED=params.genomes[params.genome].PANELBED -HCBED=params.genomes[params.genome].HCBED -ENSEMBLCACHE=params.genomes[params.genome].ENSEMBLCACHE -GENOMEVER=params.genomes[params.genome].GENOMEVER - - -process pileup_paired_tonly { - container "${params.containers.logan}" - - label 'process_highmem' - - input: - tuple val(tumorname), path(tumor), path(tumorbai), path(bed) - - output: - tuple val(tumorname), - path("${tumorname}_${bed.simpleName}.tpileup.table") - - script: - - """ - gatk --java-options -Xmx48g GetPileupSummaries \ - -I ${tumor} \ - -V $GERMLINE_RESOURCE \ - -L ${bed} \ - -O ${tumorname}_${bed.simpleName}.tpileup.table - - """ - - stub: - """ - touch ${tumorname}_${bed.simpleName}.tpileup.table - - """ - -} - - -process contamination_tumoronly { - container "${params.containers.logan}" - - label 'process_highmem' - - input: - tuple val(tumorname), - path(tumor_pileups) - - output: - tuple val(tumorname), - path("${tumorname}_allpileups.table"), - path("${tumorname}.contamination.table") - - script: - //Gather all the Pileup summaries first for Tumor and Also for NORMAL and then run! - alltumor = tumor_pileups.join(" -I ") - - - """ - gatk GatherPileupSummaries \ - --sequence-dictionary $GENOMEDICT \ - -I ${alltumor} -O ${tumorname}_allpileups.table - - gatk CalculateContamination \ - -I ${tumorname}_allpileups.table \ - -O ${tumorname}.contamination.table - - """ - - stub: - """ - touch ${tumorname}_allpileups.table - touch ${tumorname}.contamination.table - """ - -} - - - -process learnreadorientationmodel_tonly { - container "${params.containers.logan}" - - label 'process_highmem' - - input: - tuple val(sample), path(f1r2) - - output: - tuple val(sample), path("${sample}.read-orientation-model.tar.gz") - - script: - f1r2in = f1r2.join(" --input ") - - """ - gatk LearnReadOrientationModel \ - --output ${sample}.read-orientation-model.tar.gz \ - --input ${f1r2in} - """ - - stub: - """ - touch ${sample}.read-orientation-model.tar.gz - """ -} - - - - - -process mergemut2stats_tonly { - container "${params.containers.logan}" - label 'process_low' - - input: - tuple val(sample), path(stats) - - output: - tuple val(sample), path("${sample}.final.stats") - - script: - statsin = stats.join(" --stats ") - - """ - gatk MergeMutectStats \ - --stats ${statsin} \ - -O ${sample}.final.stats - """ - - stub: - """ - touch ${sample}.final.stats - """ - -} - - - -process mutect2_t_tonly { - container "${params.containers.logan}" - label 'process_somaticcaller' - - input: - tuple val(tumorname), path(tumor), path(tumorbai), path(bed) - - output: - tuple val(tumorname), - path("${tumorname}_${bed.simpleName}.tonly.mut2.vcf.gz"), - path("${tumorname}_${bed.simpleName}.f1r2.tar.gz"), - path("${tumorname}_${bed.simpleName}.tonly.mut2.vcf.gz.stats") - - script: - - """ - gatk Mutect2 \ - --reference $GENOMEREF \ - --intervals ${bed} \ - --input ${tumor} \ - --tumor-sample ${tumorname} \ - $GNOMADGERMLINE \ - --panel-of-normals $PON \ - --output ${tumorname}_${bed.simpleName}.tonly.mut2.vcf.gz \ - --f1r2-tar-gz ${tumorname}_${bed.simpleName}.f1r2.tar.gz \ - --independent-mates - """ - - stub: - """ - touch ${tumorname}_${bed.simpleName}.tonly.mut2.vcf.gz - touch ${tumorname}_${bed.simpleName}.f1r2.tar.gz - touch ${tumorname}_${bed.simpleName}.tonly.mut2.vcf.gz.stats - """ - - -} - - - -process mutect2filter_tonly { - container "${params.containers.logan}" - label 'process_medium' - - input: - tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups), path(tumorcontamination) - output: - tuple val(sample), - path("${sample}.tonly.mut2.marked.vcf.gz"),path("${sample}.tonly.mut2.marked.vcf.gz.tbi"), - path("${sample}.tonly.mut2.norm.vcf.gz"),path("${sample}.tonly.mut2.norm.vcf.gz.tbi"), - path("${sample}.tonly.mut2.marked.vcf.gz.filteringStats.tsv") - - script: - //Include the stats and concat ${mutvcfs} -Oz -o ${sample}.concat.vcf.gz - mut2in = mutvcfs.join(" -I ") - - """ - gatk SortVcf -I ${mut2in} -O ${sample}.tonly.concat.vcf.gz --CREATE_INDEX - gatk FilterMutectCalls \ - -R $GENOMEREF \ - -V ${sample}.tonly.concat.vcf.gz \ - --ob-priors ${obs} \ - --contamination-table ${tumorcontamination} \ - --stats ${stats} \ - -O ${sample}.tonly.mut2.marked.vcf.gz - - gatk SelectVariants \ - -R $GENOMEREF \ - --variant ${sample}.tonly.mut2.marked.vcf.gz \ - --exclude-filtered \ - --output ${sample}.tonly.mut2.final.vcf.gz - - bcftools sort ${sample}.tonly.mut2.final.vcf.gz |\ - bcftools norm --threads ${task.cpus} --check-ref s -f $GENOMEREF -O v |\ - awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ - sed '/^\$/d' |\ - bcftools view - -Oz -o ${sample}.tonly.mut2.norm.vcf.gz - bcftools index -t ${sample}.tonly.mut2.norm.vcf.gz - - """ - - stub: - """ - touch ${sample}.tonly.mut2.marked.vcf.gz ${sample}.tonly.mut2.marked.vcf.gz.tbi - touch ${sample}.tonly.mut2.norm.vcf.gz ${sample}.tonly.mut2.norm.vcf.gz.tbi - touch ${sample}.tonly.mut2.marked.vcf.gz.filteringStats.tsv - """ -} - - -process varscan_tonly { - container "${params.containers.logan}" - - label 'process_somaticcaller' - input: - tuple val(tumorname), path(tumor), path(tumorbai), - path(bed), - path(tumorpileup), path(tumor_con_table) - - output: - tuple val(tumorname), - path("${tumorname}_${bed.simpleName}.tonly.varscan.vcf.gz") - - shell: - - ''' - varscan_opts="--strand-filter 0 --min-var-freq 0.01 --output-vcf 1 --variants 1" - pileup_cmd="samtools mpileup -d 100000 -q 15 -Q 15 -f !{GENOMEREF} -l !{bed} !{tumor}" - varscan_cmd="varscan mpileup2cns <($pileup_cmd) $varscan_opts" - - eval "$varscan_cmd > !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf_temp" - - awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf_temp \ - | sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf - - printf "Sample1\t!{tumorname}\n" > sampname - - bcftools reheader -s sampname !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf \ - | bcftools view -Oz -o !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf.gz - - ''' - - stub: - """ - touch ${tumorname}_${bed.simpleName}.tonly.varscan.vcf.gz - """ - -} - - -process vardict_tonly { - container "${params.containers.logan}" - label 'process_somaticcaller_high' - - input: - tuple val(tumorname), path(tumor), path(tumorbai), path(bed) - - output: - tuple val(tumorname), - path("${tumorname}_${bed.simpleName}.tonly.vardict.vcf.gz") - - script: - - """ - bedtools makewindows -b ${bed} -w 50150 -s 50000 > temp_${bed} - - VarDict -G $GENOMEREF \ - -f 0.01 \ - -x 500 \ - --nosv \ - -b ${tumor} --fisher \ - -t -Q 20 -c 1 -S 2 -E 3 --th ${task.cpus} \ - temp_${bed} | var2vcf_valid.pl \ - -N ${tumor} \ - -Q 20 \ - -d 10 \ - -v 6 \ - -S \ - -E \ - -f 0.05 > ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf - - printf "${tumor.Name}\t${tumorname}\n" > sampname - - bcftools reheader -s sampname ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf \ - | bcftools view -Oz -o ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz - - """ - - stub: - - """ - touch ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz - - """ - -} - - -process octopus_tonly { - container "${params.containers.octopus}" - label 'process_somaticcaller_high' - - input: - tuple val(tumorname), path(tumor), path(tumorbai), path(bed) - - output: - tuple val(tumorname), - path("${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz") - - script: - """ - octopus -R $GENOMEREF -C cancer -I ${tumor} \ - --annotations AF AC AD DP SB \ - -B 92Gb \ - -t ${bed} \ - --threads ${task.cpus}\ - $SOMATIC_FOREST \ - -o ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz - """ - - stub: - """ - touch ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz - """ -} - - - -process octopus_convertvcf_tonly { - container "${params.containers.logan}" - label 'process_low' - - input: - tuple val(tumor), val(oct), path(vcf), path(vcfindex) - - output: - tuple val(tumor), path("${tumor}.octopus_tonly.norm.vcf.gz"), - path("${tumor}.octopus_tonly.norm.vcf.gz.tbi") - - - script: - """ - zcat ${vcf} | sed 's/^##fileformat=VCFv4.3/##fileformat=VCFv4.2/' > ${tumor}_temp.octopus_tonly.norm.vcf - bgzip ${tumor}_temp.octopus_tonly.norm.vcf - mv ${tumor}_temp.octopus_tonly.norm.vcf.gz ${tumor}.octopus_tonly.norm.vcf.gz - bcftools index -t ${tumor}.octopus_tonly.norm.vcf.gz -f - """ - - stub: - """ - touch ${tumor}.octopus_tonly.norm.vcf.gz ${tumor}.octopus_tonly.norm.vcf.gz.tbi - """ -} - - -process sage_tonly { - container "${params.containers.logan}" - label 'process_somaticcaller' - - input: - tuple val(tumorname), path(tumorbam), path(tumorbai) - - output: - tuple val(tumorname), - path("${tumorname}.tonly.sage.vcf.gz"), - path("${tumorname}.tonly.sage.vcf.gz.tbi") - - script: - """ - java -Xms4G -Xmx32G -cp /opt2/hmftools/sage.jar com.hartwig.hmftools.sage.SageApplication \ - -tumor ${tumorname} -tumor_bam ${tumorbam} \ - -threads $task.cpus \ - -ref_genome_version $GENOMEVER \ - -ref_genome $GENOMEREF \ - -hotspots $HOTSPOTS \ - $PANELBED $HCBED $ENSEMBLCACHE \ - -output_vcf ${tumorname}.tonly.sage.vcf.gz - """ - - stub: - """ - touch "${tumorname}.tonly.sage.vcf.gz" "${tumorname}.tonly.sage.vcf.gz.tbi" - """ - -} - - -process somaticcombine_tonly { - container "${params.containers.logan}" - label 'process_medium' - - input: - tuple val(tumorsample), - val(caller), - path(vcfs), path(vcfindex) - - output: - tuple val(tumorsample), - path("${tumorsample}_combined_tonly.vcf.gz"), - path("${tumorsample}_combined_tonly.vcf.gz.tbi") - - script: - vcfin1=[caller, vcfs].transpose().collect { a, b -> a + " " + b } - vcfin2="-V:" + vcfin1.join(" -V:") - - callerin=caller.join(",")//.replaceAll("_tonly","") - - """ - /usr/lib/jvm/java-8-openjdk-amd64/bin/java -jar \$GATK_JAR -T CombineVariants \ - -R $GENOMEREF \ - --genotypemergeoption PRIORITIZE \ - --rod_priority_list $callerin \ - --filteredrecordsmergetype KEEP_IF_ANY_UNFILTERED \ - -o ${tumorsample}_combined_tonly.vcf.gz \ - $vcfin2 - """ - - stub: - - vcfin1=[caller, vcfs].transpose().collect { a, b -> a + " " + b } - vcfin2="-V:" + vcfin1.join(" -V:") - callerin=caller.join(",")//.replaceAll("_tonly","") - - """ - touch ${tumorsample}_combined_tonly.vcf.gz ${tumorsample}_combined_tonly.vcf.gz.tbi - """ - -} - - -/*DISCVRSeq -process somaticcombine_tonly { - container "${params.containers.logan}" - label 'process_medium' - - input: - tuple val(tumorsample), - val(callers), - path(vcfs), path(vcfindex) - - output: - tuple val(tumorsample), - path("${tumorsample}_combined_tonly.vcf.gz"), - path("${tumorsample}_combined_tonly.vcf.gz.tbi") - - script: - vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b } - vcfin2="-V:" + vcfin1.join(" -V:") - - """ - java -jar \$DISCVRSeq_JAR MergeVcfsAndGenotypes \ - -R $GENOMEREF \ - --genotypeMergeOption PRIORITIZE \ - --priority_list mutect2_tonly,octopus_tonly,vardict_tonly,varscan_tonly \ - --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED \ - -O ${tumorsample}_combined_tonly.vcf.gz \ - $vcfin2 - """ - - stub: - """ - touch ${tumorsample}_combined_tonly.vcf.gz ${tumorsample}_combined_tonly.vcf.gz.tbi - """ - -} -*/ -process annotvep_tonly { - container "${params.containers.vcf2maf}" - label 'process_medium' - - input: - tuple val(tumorsample), - val(vc), path(tumorvcf), - path(vcfindex) - - - output: - path("tumor_only/${vc}/${tumorsample}.tonly.maf") - - shell: - - ''' - VCF_SAMPLE_IDS=($(bcftools query -l !{tumorvcf})) - TID_IDX=0 - NID_IDX="" - VCF_NID="" - NORM_VCF_ID_ARG="" - NSAMPLES=${#VCF_SAMPLE_IDS[@]} - if [ $NSAMPLES -gt 1 ]; then - # Assign tumor, normal IDs - # Look through column names and - # see if they match provided IDs - for (( i = 0; i < $NSAMPLES; i++ )); do - echo "${VCF_SAMPLE_IDS[$i]}" - if [ "${VCF_SAMPLE_IDS[$i]}" == !{tumorsample} ]; then - TID_IDX=$i - fi - - done - - if [ ! -z $NID_IDX ]; then - VCF_NID=${VCF_SAMPLE_IDS[$NID_IDX]} - NORM_VCF_ID_ARG="--vcf-normal-id $VCF_NID" - fi - fi - VCF_TID=${VCF_SAMPLE_IDS[$TID_IDX]} - - zcat !{tumorvcf} > !{tumorvcf.baseName} - - mkdir -p tumor_only/!{vc} - - vcf2maf.pl \ - --vep-forks !{task.cpus} --input-vcf !{tumorvcf.baseName} \ - --output-maf tumor_only/!{vc}/!{tumorsample}.tonly.maf \ - --tumor-id !{tumorsample} \ - --vep-path /opt/vep/src/ensembl-vep \ - --vep-data !{VEPCACHEDIR} \ - --ncbi-build !{VEPBUILD} --species !{VEPSPECIES} --ref-fasta !{GENOMEREF} \ - --retain-info "set" \ - --vep-overwrite - - - ''' - - stub: - """ - mkdir -p tumor_only/${vc} - touch tumor_only/${vc}/${tumorsample}.tonly.maf - """ -} - -process combinemafs_tonly { - container "${params.containers.logan}" - label 'process_low' - - input: - path(allmafs) - - output: - path("final_tonly.maf") - - shell: - mafin= allmafs.join(" ") - - """ - echo "Combining MAFs..." - head -2 ${allmafs[0]} > final_tonly.maf - awk 'FNR>2 {{print}}' ${mafin} >> final_tonly.maf - """ - - stub: - """ - touch final_tonly.maf - """ -} diff --git a/modules/local/varscan.nf b/modules/local/varscan.nf new file mode 100644 index 0000000..81a3e2c --- /dev/null +++ b/modules/local/varscan.nf @@ -0,0 +1,95 @@ +//References +GENOMEREF=file(params.genomes[params.genome].genome) +GENOMEDICT=file(params.genomes[params.genome].genomedict) + + +process varscan_tn { + container "${params.containers.logan}" + label 'process_somaticcaller' + errorStrategy 'ignore' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai), path(bed), + path(tumorpileup), path(normalpileup), + path(tumor_con_table), path(normal_con_table) + + output: + tuple val(tumorname), val(normalname), + path("${tumorname}_vs_${normalname}_${bed.simpleName}.varscan.vcf.gz") + + shell: + ''' + tumor_purity=$( echo "1-$(printf '%.6f' $(tail -n -1 !{tumor_con_table} | cut -f2 ))" | bc -l) + normal_purity=$( echo "1-$(printf '%.6f' $(tail -n -1 !{normal_con_table} | cut -f2 ))" | bc -l) + dual_pileup="samtools mpileup -d 10000 -q 15 -Q 15 -f !{GENOMEREF} -l !{bed} !{normal} !{tumor}" + varscan_opts="--strand-filter 1 --min-var-freq 0.01 --min-avg-qual 30 --somatic-p-value 0.05 --output-vcf 1 --normal-purity $normal_purity --tumor-purity $tumor_purity" + varscan_cmd="varscan somatic <($dual_pileup) !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.vcf $varscan_opts --mpileup 1" + eval "$varscan_cmd" + + awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.vcf.indel \ + | sed '/^$/d' | bcftools view - -Oz -o !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.indel_temp.vcf.gz + awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.vcf.snp \ + | sed '/^$/d' | bcftools view - -Oz -o !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.snp_temp.vcf.gz + + gatk SortVcf -I !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.snp_temp.vcf.gz \ + -I !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.indel_temp.vcf.gz \ + -R !{GENOMEREF} -SD !{GENOMEDICT} \ + -O !{tumorname}_vs_!{normalname}_!{bed.simpleName}_temp.varscan.vcf + + printf "NORMAL\t!{normalname}\nTUMOR\t!{tumorname}\n" > sampname + + bcftools reheader -s sampname !{tumorname}_vs_!{normalname}_!{bed.simpleName}_temp.varscan.vcf \ + | bcftools view -Oz -o !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.vcf.gz + + ''' + + stub: + """ + touch ${tumorname}_vs_${normalname}_${bed.simpleName}.varscan.vcf.gz + """ + +} + + + +process varscan_tonly { + container "${params.containers.logan}" + label 'process_somaticcaller' + errorStrategy 'ignore' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), + path(bed), + path(tumorpileup), path(tumor_con_table) + + output: + tuple val(tumorname), + path("${tumorname}_${bed.simpleName}.tonly.varscan.vcf.gz") + + shell: + + ''' + varscan_opts="--strand-filter 0 --min-var-freq 0.01 --output-vcf 1 --variants 1" + pileup_cmd="samtools mpileup -d 100000 -q 15 -Q 15 -f !{GENOMEREF} -l !{bed} !{tumor}" + varscan_cmd="varscan mpileup2cns <($pileup_cmd) $varscan_opts" + + eval "$varscan_cmd > !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf_temp" + + awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf_temp \ + | sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf + + printf "Sample1\t!{tumorname}\n" > sampname + + bcftools reheader -s sampname !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf \ + | bcftools view -Oz -o !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf.gz + + ''' + + stub: + """ + touch ${tumorname}_${bed.simpleName}.tonly.varscan.vcf.gz + """ + +} + diff --git a/modules/local/vcftools.nf b/modules/local/vcftools.nf new file mode 100644 index 0000000..0b3ae87 --- /dev/null +++ b/modules/local/vcftools.nf @@ -0,0 +1,33 @@ + +process vcftools { + /* + Quality-control step to calculates a measure of heterozygosity on + a per-individual basis. The inbreeding coefficient, F, is estimated + for each individual using a method of moments. Please see VCFtools + documentation for more information: + https://vcftools.github.io/man_latest.html + @Input: + Multi-sample gVCF file (indirect-gather-due-to-aggregation) + @Output: + Text file containing a measure of heterozygosity + */ + container = "${params.containers.logan}" + label 'process_medium' + + input: + tuple path(germlinevcf),path(germlinetbi) + + output: + path("variants_raw_variants.het") + + + script: + """ + vcftools --gzvcf ${germlinevcf} --het --out variants_raw_variants + """ + + stub: + """ + touch variants_raw_variants.het + """ +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 9a8269a..d3e17d6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -17,33 +17,44 @@ includeConfig 'conf/modules.config' params { fastq_screen_conf = "${projectDir}/conf/fastq_screen.conf" - get_flowcell_lanes = "${projectDir}/bin/scripts/flowcell_lane.py" + get_flowcell_lanes = "${projectDir}/bin/flowcell_lane.py" splitbed= "${projectDir}/bin/split_Bed_into_equal_regions.py" - script_genderPrediction = "${projectDir}/bin/RScripts/predictGender.R" + script_genderPrediction = "${projectDir}/bin/predictGender.R" script_combineSamples = "${projectDir}/bin/combineAllSampleCompareResults.R" script_ancestry = "${projectDir}/bin/sampleCompareAncestoryPlots.R" script_sequenza = "${projectDir}/bin/run_sequenza.R" script_freec = "${projectDir}/bin/make_freec_genome.pl" script_freecpaired = "${projectDir}/bin/make_freec_genome_paired.pl" + script_freecpaired_exome = "${projectDir}/bin/make_freec_exome_paired.pl" + script_reformatbed = "${projectDir}/bin/reformat_bed.py" + script_ascat = "${projectDir}/bin/ascat.R" freec_significance = "${projectDir}/bin/assess_significance.R" freec_plot = "${projectDir}/bin/makeGraph.R" lofreq_convert = "${projectDir}/bin/add_gt_lofreq.sh" - split_regions = "24" //Number of regions to split by + strelka_convert = "${projectDir}/bin/strelka_convert.py" + split_regions = "110" //Number of regions to split by vep_cache = "/fdb/VEP/102/cache" //SUB WORKFLOWS to SPLIT - gl=null - vc=null - sv=null - cnv=null - qc=null + qc = true //QC Default true + gl = null + germline = null + vc = null + snv = null + sv = null + structural = null + cnv = null + copynumber = null + bam=null align=null indelrealign=null no_tonly=null - + ffpe=null + exome=null + //Set all Inputs to null sample_sheet=null fastq_file_input=null @@ -53,11 +64,11 @@ params { fastq_input=null bam_input=null BAMINPUT=null - - callers = "mutect2,octopus,strelka,lofreq,muse,sage,vardict,varscan" - tonlycallers = "mutect2,octopus,vardict,varscan" - cnvcallers = "purple,sequenza,freec" - svcallers = "manta,svaba" + + callers = "mutect2,deepsomatic,octopus,strelka,lofreq,muse,vardict,varscan" + tonlycallers = "mutect2,deepsomatic,octopus,vardict,varscan" + cnvcallers = "purple,sequenza,freec,ascat,cnvkit" + svcallers = "manta,gridss,svaba" intervals = null publish_dir_mode = 'symlink' outdir = 'results' @@ -81,7 +92,7 @@ profiles { autoMounts = true cacheDir = "$PWD/singularity" envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID' - runOptions = '-B /gs10,/gs11,/gs12,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb' + runOptions = '-B /gs10,/gs11,/gs12,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/vf/users,/gpfs,/fdb' } biowulf { includeConfig 'conf/biowulf.config' diff --git a/pyproject.toml b/pyproject.toml index 8f9c593..3ba17e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ keywords = ["bioinformatics", "nextflow"] authors = [ {name = 'Darryl Nousome', email = 'darryl.nousome@nih.gov'}, {name = "Kelly Sovacool", email = "kelly.sovacool@nih.gov"}, + {name = 'Samarth Mathur', email = 'samarth.mathur@nih.gov'}, {name = "Vishal Koparde", email = "vishal.koparde@nih.gov"}, ] maintainers = [ @@ -32,11 +33,12 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Topic :: Scientific/Engineering :: Bio-Informatics", ] -requires-python = ">=3.8" +requires-python = ">=3.10" dependencies = [ - "pyyaml >= 6.0", + "ccbr_tools@git+https://github.com/CCBR/Tools", + "cffconvert >= 2.0.0", "Click >= 8.1.3", - "cffconvert" + "pyyaml >= 6.0" ] [project.optional-dependencies] diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf index 783d0fc..bd939bc 100644 --- a/subworkflows/local/workflows.nf +++ b/subworkflows/local/workflows.nf @@ -1,62 +1,128 @@ -include {fc_lane; fastq_screen;kraken;qualimap_bamqc;fastqc; - samtools_flagstats;vcftools;collectvariantcallmetrics; - bcftools_stats;gatk_varianteval; - snpeff; - somalier_extract;somalier_analysis_human;somalier_analysis_mouse; - multiqc} from '../../modules/local/qc.nf' +include {splitinterval; matchbed; matchbed as matchbed_ascat; matchbed as matchbed_cnvkit} from '../../modules/local/splitbed.nf' + +//QC +include {fc_lane} from '../../modules/local/fc_lane.nf' +include {fastq_screen} from '../../modules/local/fastq_screen.nf' +include {kraken} from '../../modules/local/kraken.nf' +include {qualimap_bamqc} from '../../modules/local/qualimap.nf' +include {fastqc} from '../../modules/local/fastqc.nf' +include {samtools_flagstats} from '../../modules/local/samtools_flagstats.nf' +include {vcftools} from '../../modules/local/vcftools.nf' +include {bcftools_stats} from '../../modules/local/bcftools_stats.nf' +include {gatk_varianteval; collectvariantcallmetrics} from '../../modules/local/gatk_varianteval.nf' +include {snpeff} from '../../modules/local/snpeff.nf' +include {somalier_extract;somalier_analysis_human;somalier_analysis_mouse} from '../../modules/local/somalier.nf' +include {mosdepth} from '../../modules/local/mosdepth.nf' +include {multiqc} from '../../modules/local/multiqc.nf' include {fastp; bwamem2; indelrealign; bqsr_ir; bqsr; gatherbqsr; applybqsr; samtoolsindex} from '../../modules/local/trim_align.nf' include {deepvariant_step1; deepvariant_step2; deepvariant_step3; - deepvariant_combined;glnexus} from '../../modules/local/germline.nf' - -include {pileup_paired_t; pileup_paired_n; - mutect2; mutect2filter; - contamination_paired; learnreadorientationmodel;mergemut2stats; - strelka_tn; - varscan_tn; vardict_tn; lofreq_tn; muse_tn; sage_tn; - octopus_tn; bcftools_index_octopus; bcftools_index_octopus as bcftools_index_octopus_tonly; octopus_convertvcf; - combineVariants_strelka; convert_strelka; - combineVariants as combineVariants_vardict; combineVariants as combineVariants_vardict_tonly; + deepvariant_combined; glnexus; + bcfconcat as bcfconcat_vcf; bcfconcat as bcfconcat_gvcf} from '../../modules/local/deepvariant.nf' + +include {pileup_paired as pileup_paired_t; pileup_paired as pileup_paired_n; + learnreadorientationmodel; + mutect2; mutect2filter; contamination_paired; mergemut2stats; + mutect2_t_tonly; mutect2filter_tonly; + contamination_tumoronly; + learnreadorientationmodel_tonly; + mergemut2stats_tonly} from '../../modules/local/mutect2.nf' + +include {sage_tn; sage_tonly} from '../../modules/local/sage.nf' +include {vardict_tn; vardict_tonly} from '../../modules/local/vardict.nf' +include {varscan_tn; varscan_tonly} from '../../modules/local/varscan.nf' +include {octopus_tn; bcftools_index_octopus; + bcftools_index_octopus as bcftools_index_octopus_tonly; octopus_convertvcf; + octopus_tonly; octopus_convertvcf_tonly} from '../../modules/local/octopus.nf' +include {lofreq_tn} from '../../modules/local/lofreq.nf' +include {strelka_tn; combineVariants_strelka; convert_strelka} from '../../modules/local/strelka.nf' +include {muse_tn} from '../../modules/local/muse.nf' +include {deepsomatic_tn_step1; deepsomatic_step2; deepsomatic_step3; + deepsomatic_tonly_step1; deepsomatic_tonly_step2; + deepsomatic_step3 as deepsomatic_tonly_step3 } from "../../modules/local/deepsomatic.nf" + +include {combineVariants as combineVariants_vardict; combineVariants as combineVariants_vardict_tonly; combineVariants as combineVariants_varscan; combineVariants as combineVariants_varscan_tonly; + combineVariants_alternative as combineVariants_deepsomatic; combineVariants_alternative as combineVariants_deepsomatic_tonly; combineVariants as combineVariants_sage; combineVariants as combineVariants_sage_tonly; combineVariants_alternative as combineVariants_lofreq; combineVariants as combineVariants_muse; - combineVariants_alternative as combineVariants_octopus; combineVariants_alternative as combineVariants_octopus_tonly; - annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka; + combineVariants_alternative as combineVariants_octopus; + combineVariants_alternative as combineVariants_octopus_tonly; + combinemafs_tn; somaticcombine; somaticcombine as somaticcombine_ffpe; + combinemafs_tonly;somaticcombine_tonly; somaticcombine_tonly as somaticcombine_tonly_ffpe} from '../../modules/local/combinefilter.nf' + +include {sobdetect_pass1 as sobdetect_pass1_mutect2; sobdetect_pass2 as sobdetect_pass2_mutect2; + sobdetect_metrics as sobdetect_metrics_mutect2; sobdetect_cohort_params as sobdetect_cohort_params_mutect2; + sobdetect_pass1 as sobdetect_pass1_octopus; sobdetect_pass2 as sobdetect_pass2_octopus; + sobdetect_metrics as sobdetect_metrics_octopus; sobdetect_cohort_params as sobdetect_cohort_params_octopus; + sobdetect_pass1 as sobdetect_pass1_strelka; sobdetect_pass2 as sobdetect_pass2_strelka; + sobdetect_metrics as sobdetect_metrics_strelka; sobdetect_cohort_params as sobdetect_cohort_params_strelka; + sobdetect_pass1 as sobdetect_pass1_lofreq; sobdetect_pass2 as sobdetect_pass2_lofreq; + sobdetect_metrics as sobdetect_metrics_lofreq; sobdetect_cohort_params as sobdetect_cohort_params_lofreq; + sobdetect_pass1 as sobdetect_pass1_muse; sobdetect_pass2 as sobdetect_pass2_muse; + sobdetect_metrics as sobdetect_metrics_muse; sobdetect_cohort_params as sobdetect_cohort_params_muse; + sobdetect_pass1 as sobdetect_pass1_vardict; sobdetect_pass2 as sobdetect_pass2_vardict; + sobdetect_metrics as sobdetect_metrics_vardict; sobdetect_cohort_params as sobdetect_cohort_params_vardict; + sobdetect_pass1 as sobdetect_pass1_varscan; sobdetect_pass2 as sobdetect_pass2_varscan; + sobdetect_metrics as sobdetect_metrics_varscan; sobdetect_cohort_params as sobdetect_cohort_params_varscan; + //Tumor Only + sobdetect_pass1 as sobdetect_pass1_mutect2_tonly; sobdetect_pass2 as sobdetect_pass2_mutect2_tonly; + sobdetect_metrics as sobdetect_metrics_mutect2_tonly; sobdetect_cohort_params as sobdetect_cohort_params_mutect2_tonly; + sobdetect_pass1 as sobdetect_pass1_octopus_tonly; sobdetect_pass2 as sobdetect_pass2_octopus_tonly; + sobdetect_metrics as sobdetect_metrics_octopus_tonly; sobdetect_cohort_params as sobdetect_cohort_params_octopus_tonly; + sobdetect_pass1 as sobdetect_pass1_vardict_tonly; sobdetect_pass2 as sobdetect_pass2_vardict_tonly; + sobdetect_metrics as sobdetect_metrics_vardict_tonly; sobdetect_cohort_params as sobdetect_cohort_params_vardict_tonly; + sobdetect_pass1 as sobdetect_pass1_varscan_tonly; sobdetect_pass2 as sobdetect_pass2_varscan_tonly; + sobdetect_metrics as sobdetect_metrics_varscan_tonly; sobdetect_cohort_params as sobdetect_cohort_params_varscan_tonly + + } from "../../modules/local/ffpe.nf" + + +include {annotvep_tn as annotvep_tn_mut2_ffpe; annotvep_tn as annotvep_tn_strelka_ffpe; + annotvep_tn as annotvep_tn_varscan_ffpe; annotvep_tn as annotvep_tn_vardict_ffpe; annotvep_tn as annotvep_tn_octopus_ffpe; + annotvep_tn as annotvep_tn_lofreq_ffpe; annotvep_tn as annotvep_tn_muse_ffpe; annotvep_tn as annotvep_tn_sage_ffpe; + annotvep_tn as annotvep_tn_deepsomatic_ffpe; + annotvep_tn as annotvep_tn_combined_ffpe; + annotvep_tonly as annotvep_tonly_varscan_ffpe; annotvep_tonly as annotvep_tonly_vardict_ffpe; + annotvep_tonly as annotvep_tonly_mut2_ffpe; annotvep_tonly as annotvep_tonly_octopus_ffpe; + annotvep_tonly as annotvep_tonly_sage_ffpe; annotvep_tonly as annotvep_tonly_deepsomatic_ffpe; + annotvep_tonly as annotvep_tonly_combined_ffpe} from '../../modules/local/annotvep.nf' + + +include {annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka; annotvep_tn as annotvep_tn_varscan; annotvep_tn as annotvep_tn_vardict; annotvep_tn as annotvep_tn_octopus; annotvep_tn as annotvep_tn_lofreq; annotvep_tn as annotvep_tn_muse; annotvep_tn as annotvep_tn_sage; - annotvep_tn as annotvep_tn_combined; - combinemafs_tn; somaticcombine} from '../../modules/local/variant_calling.nf' - -include {mutect2_t_tonly; mutect2filter_tonly; - varscan_tonly; vardict_tonly; octopus_tonly; sage_tonly; - contamination_tumoronly; - learnreadorientationmodel_tonly; - mergemut2stats_tonly; octopus_convertvcf_tonly; + annotvep_tn as annotvep_tn_deepsomatic; + annotvep_tn as annotvep_tn_combined; annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict; - annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus; annotvep_tonly as annotvep_tonly_sage; - annotvep_tonly as annotvep_tonly_combined; - combinemafs_tonly;somaticcombine_tonly} from '../../modules/local/variant_calling_tonly.nf' - -include {svaba_somatic; manta_somatic; - survivor_sv; gunzip; + annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus; + annotvep_tonly as annotvep_tonly_sage; annotvep_tonly as annotvep_tonly_deepsomatic; + annotvep_tonly as annotvep_tonly_combined} from '../../modules/local/annotvep.nf' + +include {svaba_somatic} from '../../modules/local/svaba.nf' +include {manta_somatic} from '../../modules/local/manta.nf' +include {gridss_somatic} from '../../modules/local/gridss.nf' +include {survivor_sv; + gunzip as gunzip_manta; gunzip as gunzip_gridss; annotsv_tn as annotsv_survivor_tn - annotsv_tn as annotsv_svaba;annotsv_tn as annotsv_manta} from '../../modules/local/structural_variant.nf' + annotsv_tn as annotsv_gridss; annotsv_tn as annotsv_svaba; annotsv_tn as annotsv_manta} from '../../modules/local/annotsv.nf' -include {amber_tn; cobalt_tn; purple; purple_novc; - sequenza; seqz_sequenza_bychr; freec; freec_paired } from '../../modules/local/copynumber.nf' - -include {splitinterval} from '../../modules/local/splitbed.nf' +include {amber_tn; cobalt_tn; purple; purple_novc} from '../../modules/local/purple.nf' +include {sequenza; seqz_sequenza_bychr} from '../../modules/local/sequenza.nf' +include {freec; freec_paired; freec_paired_exome} from '../../modules/local/freec.nf' +include {ascat_tn; ascat_tn_exome} from '../../modules/local/ascat.nf' +include {cnvkit; cnvkit_exome } from '../../modules/local/cnvkit.nf' +//Workflows workflow DETERMINEBAM { if(params.bam_input){ params.BAMINPUT=true }else if(params.file_input){ file(params.file_input).text - } } @@ -74,10 +140,10 @@ workflow INPUT { } if(params.sample_sheet){ - sample_sheet=Channel.fromPath(params.sample_sheet, checkIfExists: true).view() - .ifEmpty { "sample sheet not found" } - .splitCsv(header:true, sep: "\t", strip:true) - .map { row -> tuple( + sample_sheet=Channel.fromPath(params.sample_sheet, checkIfExists: true) | view() + | ifEmpty("sample sheet not found" ) + | splitCsv(header:true, sep: "\t", strip:true) + | map { row -> tuple( row.Tumor, row.Normal ) @@ -104,24 +170,47 @@ workflow ALIGN { }else{ intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file') } - splitinterval(intervalbedin) + matchbed(intervalbedin) | splitinterval fastp(fastqinput) bwamem2(fastp.out) - bqsrbambyinterval=bwamem2.out.combine(splitinterval.out.flatten()) - bambyinterval=bwamem2.out.combine(splitinterval.out.flatten()) + + //Indel Realignment + if (params.indelrealign){ + bwaindelre = bwamem2.out | indelrealign + bqsrbambyinterval = bwaindelre.combine(splitinterval.out.flatten()) + bambyinterval = bwaindelre.combine(splitinterval.out.flatten()) - bqsr(bqsrbambyinterval) - bqsrs=bqsr.out.groupTuple() - .map { samplename,beds -> tuple( samplename, - beds.toSorted{ it -> (it.name =~ /${samplename}_(.*?).recal_data.grp/)[0][1].toInteger() } ) - } - gatherbqsr(bqsrs) + bqsr_ir(bqsrbambyinterval) + + bqsrs = bqsr_ir.out + | groupTuple + | map { samplename,beds -> + tuple( samplename, beds.toSorted{ it -> (it.name =~ /${samplename}_(.*?).recal_data.grp/)[0][1].toInteger() } )} + gatherbqsr(bqsrs) - tobqsr=bwamem2.out.combine(gatherbqsr.out,by:0) - applybqsr(tobqsr) + tobqsr=bwaindelre.combine(gatherbqsr.out,by:0) + applybqsr(tobqsr) - bamwithsample=applybqsr.out.combine(sample_sheet,by:0).map{it.swap(3,0)}.combine(applybqsr.out,by:0).map{it.swap(3,0)} + bamwithsample=applybqsr.out.combine(sample_sheet,by:0).map{it.swap(3,0)}.combine(applybqsr.out,by:0).map{it.swap(3,0)} + + }else{ + bqsrbambyinterval=bwamem2.out.combine(splitinterval.out.flatten()) + bambyinterval=bwamem2.out.combine(splitinterval.out.flatten()) + + bqsr(bqsrbambyinterval) + bqsrs=bqsr.out | groupTuple + | map { samplename,beds -> + tuple( samplename, + beds.toSorted{ it -> (it.name =~ /${samplename}_(.*?).recal_data.grp/)[0][1].toInteger() } )} + gatherbqsr(bqsrs) + + tobqsr=bwamem2.out.combine(gatherbqsr.out,by:0) + applybqsr(tobqsr) + + bamwithsample=applybqsr.out.combine(sample_sheet,by:0).map{it.swap(3,0)}.combine(applybqsr.out,by:0).map{it.swap(3,0)} + + } emit: bamwithsample @@ -135,23 +224,31 @@ workflow ALIGN { } workflow GL { - //GERMLINE REQUIRES only BAMBYINTERVAL take: + sample_sheet bambyinterval + main: - deepvariant_step1(bambyinterval) - deepvariant_1_sorted=deepvariant_step1.out.groupTuple() - .map { samplename,tfbeds,gvcfbed -> tuple( samplename, - tfbeds.toSorted{ it -> (it.name =~ /${samplename}.tfrecord_(.*?).bed.gz/)[0][1].toInteger() } , - gvcfbed.toSorted{ it -> (it.name =~ /${samplename}.gvcf.tfrecord_(.*?).bed.gz/)[0][1].toInteger() } ) - } - deepvariant_step2(deepvariant_1_sorted) | deepvariant_step3 - glin=deepvariant_step3.out.map{samplename,vcf,vcf_tbi,gvcf,gvcf_tbi -> gvcf}.collect() + //Keep Only the NormalSamples + bambyinterval_normonly=sample_sheet | map{t,n -> tuple(n)} | combine(bambyinterval,by:[0]) + | unique{it -> it[0]+ '~' + it[3] } + + deepvariant_step1(bambyinterval_normonly) | deepvariant_step2 + | deepvariant_step3 | groupTuple + | multiMap{samplename,vcf,vcf_tbi,gvcf,gvcf_tbi -> + vcf: tuple(samplename,vcf.toSorted{it -> (it.name =~ /${samplename}_(.*?).bed.vcf.gz/)[0][1].toInteger()},vcf_tbi,"vcf") + gvcf: tuple(samplename,gvcf.toSorted{it -> (it.name =~ /${samplename}_(.*?).bed.gvcf.gz/)[0][1].toInteger()},gvcf_tbi,"gvcf") + } + | set{dv_out} + dv_out.vcf | bcfconcat_vcf + dv_out.gvcf | bcfconcat_gvcf | map{sample,gvcf,index -> gvcf} + | collect + | glnexus + deepvariant_out=bcfconcat_vcf.out | join(bcfconcat_gvcf.out) - glnexus(glin) emit: glnexusout=glnexus.out - bcfout=deepvariant_step3.out + bcfout=deepvariant_out } @@ -164,8 +261,8 @@ workflow VC { main: //Create Pairing for TN (in case of dups) - sample_sheet_paired=sample_sheet|map{tu,no -> tuple ("${tu}_vs_${no}",tu, no)} |view() - bambyinterval=bamwithsample.combine(splitout.flatten()) + sample_sheet_paired=sample_sheet | map{tu,no -> tuple ("${tu}_vs_${no}",tu, no)} | view() + bambyinterval=bamwithsample.combine(splitout.flatten()) bambyinterval | multiMap {tumorname,tumor,tumorbai,normalname,normalbam,normalbai,bed -> @@ -175,44 +272,58 @@ workflow VC { | set{bambyinterval_tonly} bambyinterval_t=bambyinterval_tonly.t1 | - concat(bambyinterval_tonly.n1) |unique() + concat(bambyinterval_tonly.n1) | unique() //Prep Pileups call_list = params.callers.split(',') as List call_list_tonly = params.tonlycallers.split(',') as List call_list_tonly = call_list.intersect(call_list_tonly) - + + //Drop MUSE if using Exome + if (params.exome && "muse" in call_list){ + call_list.removeIf { it == 'muse' } + } + vc_all=Channel.empty() vc_tonly=Channel.empty() //Common for Mutect2/Varscan if ("mutect2" in call_list | "varscan" in call_list){ - pileup_paired_t(bambyinterval) - pileup_paired_n(bambyinterval) - - pileup_paired_t.out.groupTuple(by:[0,1]) - | multiMap { samplename, normalname, pileups -> - tout: tuple( samplename, normalname, + bambyinterval | + map{tumorname,tumor,tumorbai,normalname,normal,normalbai,bed -> tuple(tumorname,tumor,tumorbai,bed,"tpileup")} | + unique | + pileup_paired_t + bambyinterval | + map{tumorname,tumor,tumorbai,normalname,normal,normalbai,bed -> tuple(normalname,normal,normalbai,bed,"npileup")} | + unique | + pileup_paired_n + + pileup_paired_t.out | groupTuple | + multiMap { samplename, pileups -> + tout: tuple( samplename, pileups.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tpileup.table/)[0][1].toInteger() } ) tonly: tuple( samplename, pileups.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tpileup.table/)[0][1].toInteger() } ) } | set{pileup_paired_tout} - pileup_paired_n.out.groupTuple(by:[0,1]) - | multiMap { samplename, normalname, pileups-> - nout: tuple (samplename,normalname, + pileup_paired_n.out | groupTuple | + multiMap { normalname, pileups -> + nout: tuple (normalname, pileups.toSorted{ it -> (it.name =~ /${normalname}_(.*?).npileup.table/)[0][1].toInteger() } ) nonly: tuple (normalname, pileups.toSorted{ it -> (it.name =~ /${normalname}_(.*?).npileup.table/)[0][1].toInteger() } ) } | set{pileup_paired_nout} - pileup_paired_match=pileup_paired_tout.tout.join(pileup_paired_nout.nout,by:[0,1]) + pileup_paired_match=sample_sheet_paired |map{id,t,n-> tuple(t,id,n)} | combine(pileup_paired_tout.tout,by:0) | + map{it.swap(2,0)} | combine(pileup_paired_nout.nout,by:0) |map{no,id,tu,tpi,npi->tuple(tu,no,tpi,npi)} + + //pileup_paired_match=pileup_paired_tout.tout.join(pileup_paired_nout.nout,by:[0,1]) contamination_paired(pileup_paired_match) if (!params.no_tonly){ - pileup_all=pileup_paired_tout.tonly.concat(pileup_paired_nout.nonly) + pileup_all=pileup_paired_tout.tonly | concat(pileup_paired_nout.nonly) contamination_tumoronly(pileup_all) } } @@ -292,7 +403,6 @@ workflow VC { vc_all=vc_all|concat(strelka_in) - } if ("vardict" in call_list){ @@ -330,10 +440,9 @@ workflow VC { vc_all=vc_all|concat(varscan_in) - if (!params.no_tonly){ //VarScan TOnly - varscan_in_tonly=bambyinterval_t.combine(contamination_tumoronly.out,by:0) + varscan_in_tonly=bambyinterval_t.combine(contamination_tumoronly.out,by:0) | varscan_tonly | groupTuple | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.varscan.vcf.gz/)[0][1].toInteger()},"varscan_tonly")} | combineVariants_varscan_tonly @@ -382,6 +491,44 @@ workflow VC { } + //DeepSomatic TN + if ("deepsomatic" in call_list){ + deepsomatic_in = deepsomatic_tn_step1(bambyinterval) + | map{tname,nname,tf,tfjson,bed -> tuple("${tname}_vs_${nname}",tf,tfjson,bed)} + | deepsomatic_step2 + | deepsomatic_step3 | groupTuple + | map{samplename,vcf,vcf_tbi -> + tuple(samplename,vcf.toSorted{it -> (it.name =~ /${samplename}_(.*?).bed.vcf.gz/)[0][1].toInteger()},vcf_tbi,"deepsomatic") + } + | combineVariants_deepsomatic + | join(sample_sheet_paired) + | map{sample,marked,markedindex,normvcf,normindex,tumor,normal->tuple(tumor,normal,"deepsomatic",normvcf,normindex)} + annotvep_tn_deepsomatic(deepsomatic_in) + + vc_all=vc_all|concat(deepsomatic_in) + + //DeepSomatic TOnly + if (!params.no_tonly){ + deepsomatic_tonly_in = deepsomatic_tonly_step1(bambyinterval_t) + | deepsomatic_tonly_step2 + | deepsomatic_tonly_step3 | groupTuple + + | map{samplename,vcf,vcf_tbi -> + tuple(samplename,vcf.toSorted{it -> (it.name =~ /${samplename}_(.*?).bed.vcf.gz/)[0][1].toInteger()},vcf_tbi,"deepsomatic_tonly") + } + + | combineVariants_deepsomatic_tonly + | join(sample_sheet) + | map{tumor,marked,markedindex,normvcf,normindex,normal->tuple(tumor,"deepsomatic_tonly",normvcf,normindex)} + + annotvep_tonly_deepsomatic(deepsomatic_tonly_in) + + vc_tonly=vc_tonly | concat(deepsomatic_tonly_in) + + } + + } + //MuSE TN if ("muse" in call_list){ muse_in=muse_tn(bamwithsample) @@ -402,9 +549,9 @@ workflow VC { | map{samplename,marked,markedindex,normvcf,normindex -> tuple(samplename.split('_vs_')[0],samplename.split('_vs_')[1],"octopus",normvcf,normindex)} annotvep_tn_octopus(octopus_in) - octopus_in_sc = octopus_in | octopus_convertvcf + octopus_in = octopus_in | octopus_convertvcf | map{tumor,normal,vcf,vcfindex ->tuple(tumor,normal,"octopus",vcf,vcfindex)} - vc_all=vc_all|concat(octopus_in_sc) + vc_all=vc_all|concat(octopus_in) //Octopus TOnly if (!params.no_tonly){ @@ -423,6 +570,253 @@ workflow VC { } + + //FFPE Steps + if(params.ffpe){ + vc_ffpe_paired=Channel.empty() + vc_ffpe_tonly=Channel.empty() + bamwithsample1=bamwithsample | map{tumor,tbam,tbai,norm,nbam,nbai ->tuple(tumor,norm,tbam,tbai,nbam,nbai)} + + if('mutect2' in call_list){ + mutect2_p1=bamwithsample1 | join(mutect2_in,by:[0,1]) + | map{tumor,normal,tbam,tbai,nbam,nbai,vc,normvcf,tbi->tuple("${tumor}_vs_${normal}",normvcf,tbam,vc)} + | sobdetect_pass1_mutect2 + mutect2_p1 | map{sample,vcf,info->info} + | collect + | sobdetect_cohort_params_mutect2 + + mutect2_p2 = bamwithsample1 + | join(mutect2_in,by:[0,1]) + | map{tumor,normal,tbam,tbai,nbam,nbai,vc,normvcf,tbi->tuple("${tumor}_vs_${normal}",normvcf,tbam,vc)} + | combine(sobdetect_cohort_params_mutect2.out) + | sobdetect_pass2_mutect2 + mutect2_p1_vcfs=mutect2_p1 | map{sample,vcf,info->vcf} | collect + mutect2_p2_vcfs=mutect2_p2 | map{sample,vcf,info,filtvcf,vcftbi->vcf} | collect + sobdetect_metrics_mutect2(mutect2_p1_vcfs,mutect2_p2_vcfs) + + mutect2_ffpe_out=mutect2_p2 | map{sample,vcf,info,filtvcf,vcftbi->tuple(sample.split("_vs_")[0],sample.split("_vs_")[1],"mutect2",filtvcf,vcftbi)} + annotvep_tn_mut2_ffpe(mutect2_ffpe_out) + vc_ffpe_paired=vc_ffpe_paired |concat(mutect2_ffpe_out) + + if (!params.no_tonly){ + mutect2_tonly_p1=bamwithsample1 | join(mutect2_in_tonly) + | map{tumor,normal,tbam,tbai,nbam,nbai,vc,normvcf,tbi->tuple(tumor,normvcf,tbam,vc)} + | sobdetect_pass1_mutect2_tonly + mutect2_tonly_p1 | map{sample,vcf,info->info} + | collect + | sobdetect_cohort_params_mutect2_tonly + + mutect2_tonly_p2 = bamwithsample1 + | join(mutect2_in_tonly) + | map{tumor,normal,tbam,tbai,nbam,nbai,vc,normvcf,tbi->tuple(tumor,normvcf,tbam,vc)} + | combine(sobdetect_cohort_params_mutect2_tonly.out) + | sobdetect_pass2_mutect2_tonly + mutect2_tonly_p1_vcfs=mutect2_tonly_p1 | map{sample,vcf,info->vcf} |collect + mutect2_tonly_p2_vcfs=mutect2_tonly_p2 | map{sample,vcf,info,filtvcf,vcftbi->vcf} |collect + sobdetect_metrics_mutect2_tonly(mutect2_tonly_p1_vcfs,mutect2_tonly_p2_vcfs) + + mutect2_tonly_ffpe_out=mutect2_tonly_p2 | map{sample,vcf,info,filtvcf,vcftbi->tuple(sample,"mutect2_tonly",filtvcf,vcftbi)} + annotvep_tonly_mut2_ffpe(mutect2_tonly_ffpe_out) + vc_ffpe_tonly=vc_ffpe_tonly |concat(mutect2_tonly_ffpe_out) + + } + + } + + if('octopus' in call_list){ + octopus_p1=bamwithsample1 | join(octopus_in,by:[0,1]) + | map{tumor,normal,tbam,tbai,nbam,nbai,vc,normvcf,tbi->tuple("${tumor}_vs_${normal}",normvcf,tbam,vc)} + | sobdetect_pass1_octopus + octopus_p1 | map{sample,vcf,info->info} + | collect + | sobdetect_cohort_params_octopus + octopus_p2 = bamwithsample1 + | join(octopus_in,by:[0,1]) + | map{tumor,normal,tbam,tbai,nbam,nbai,vc,normvcf,tbi->tuple("${tumor}_vs_${normal}",normvcf,tbam,vc)} + | combine(sobdetect_cohort_params_octopus.out) + | sobdetect_pass2_octopus + octopus_p1_vcfs=octopus_p1 | map{sample,vcf,info->vcf} | collect + octopus_p2_vcfs=octopus_p2 | map{sample,vcf,info,filtvcf,vcftbi->vcf} | collect + sobdetect_metrics_octopus(octopus_p1_vcfs,octopus_p2_vcfs) + + octopus_ffpe_out=octopus_p2 | map{sample,vcf,info,filtvcf,vcftbi->tuple(sample.split("_vs_")[0],sample.split("_vs_")[1],"octopus",filtvcf,vcftbi)} + annotvep_tn_octopus_ffpe(octopus_ffpe_out) + vc_ffpe_paired=vc_ffpe_paired |concat(octopus_ffpe_out) + + if (!params.no_tonly){ + octopus_tonly_p1=bamwithsample1 | join(octopus_in_tonly) + | map{tumor,normal,tbam,tbai,nbam,nbai,vc,normvcf,tbi->tuple(tumor,normvcf,tbam,vc)} + | sobdetect_pass1_octopus_tonly + octopus_tonly_p1 | map{sample,vcf,info->info} + | collect + | sobdetect_cohort_params_octopus_tonly + + octopus_tonly_p2 = bamwithsample1 + | join(octopus_in_tonly) + | map{tumor,normal,tbam,tbai,nbam,nbai,vc,normvcf,tbi->tuple(tumor,normvcf,tbam,vc)} + | combine(sobdetect_cohort_params_octopus_tonly.out) + | sobdetect_pass2_octopus_tonly + octopus_tonly_p1_vcfs=octopus_tonly_p1 | map{sample,vcf,info->vcf} |collect + octopus_tonly_p2_vcfs=octopus_tonly_p2 | map{sample,vcf,info,filtvcf,vcftbi->vcf} |collect + sobdetect_metrics_octopus_tonly(octopus_tonly_p1_vcfs,octopus_tonly_p2_vcfs) + + octopus_tonly_ffpe_out=octopus_tonly_p2 | map{sample,vcf,info,filtvcf,vcftbi->tuple(sample,"octopus_tonly",filtvcf,vcftbi)} + annotvep_tonly_octopus_ffpe(octopus_tonly_ffpe_out) + vc_ffpe_tonly=vc_ffpe_tonly |concat(octopus_tonly_ffpe_out) + } + } + + if('strelka' in call_list){ + strelka_p1=bamwithsample1 | join(strelka_in,by:[0,1]) + | map{tumor,normal,tbam,tbai,nbam,nbai,vc,normvcf,tbi->tuple("${tumor}_vs_${normal}",normvcf,tbam,vc)} + | sobdetect_pass1_strelka + strelka_p1 | map{sample,vcf,info->info} + | collect + | sobdetect_cohort_params_strelka + strelka_p2 = bamwithsample1 + | join(strelka_in,by:[0,1]) + | map{tumor,normal,tbam,tbai,nbam,nbai,vc,normvcf,tbi->tuple("${tumor}_vs_${normal}",normvcf,tbam,vc)} + | combine(sobdetect_cohort_params_strelka.out) + | sobdetect_pass2_strelka + + strelka_p1_vcfs=strelka_p1 | map{sample,vcf,info->vcf} |collect + strelka_p2_vcfs=strelka_p2 | map{sample,vcf,info,filtvcf,vcftbi->vcf} | collect + sobdetect_metrics_strelka(strelka_p1_vcfs,strelka_p2_vcfs) + + strelka_ffpe_out=strelka_p2 | map{sample,vcf,info,filtvcf,vcftbi->tuple(sample.split("_vs_")[0],sample.split("_vs_")[1],"strelka",filtvcf,vcftbi)} + annotvep_tn_strelka_ffpe(strelka_ffpe_out) + vc_ffpe_paired=vc_ffpe_paired |concat(strelka_ffpe_out) + + } + + if('lofreq' in call_list){ + lofreq_p1=bamwithsample1 | join(lofreq_in,by:[0,1]) + | map{tumor,normal,tbam,tbai,nbam,nbai,vc,normvcf,tbi->tuple("${tumor}_vs_${normal}",normvcf,tbam,vc)} + | sobdetect_pass1_lofreq + lofreq_p1 | map{sample,vcf,info->info} + | collect + | sobdetect_cohort_params_lofreq + lofreq_p2 = bamwithsample1 + | join(lofreq_in,by:[0,1]) + | map{tumor,normal,tbam,tbai,nbam,nbai,vc,normvcf,tbi->tuple("${tumor}_vs_${normal}",normvcf,tbam,vc)} + | combine(sobdetect_cohort_params_lofreq.out) + | sobdetect_pass2_lofreq + lofreq_p1_vcfs=lofreq_p1 | map{sample,vcf,info->vcf} |collect + lofreq_p2_vcfs=lofreq_p2 | map{sample,vcf,info,filtvcf,vcftbi->vcf} |collect + sobdetect_metrics_lofreq(lofreq_p1_vcfs,lofreq_p2_vcfs) + + lofreq_ffpe_out=lofreq_p2 | map{sample,vcf,info,filtvcf,vcftbi->tuple(sample.split("_vs_")[0],sample.split("_vs_")[1],"lofreq",filtvcf,vcftbi)} + annotvep_tn_lofreq_ffpe(lofreq_ffpe_out) + vc_ffpe_paired=vc_ffpe_paired |concat(lofreq_ffpe_out) + } + + if('muse' in call_list){ + muse_p1=bamwithsample1 | join(muse_in,by:[0,1]) + | map{tumor,normal,tbam,tbai,nbam,nbai,vc,normvcf,tbi->tuple("${tumor}_vs_${normal}",normvcf,tbam,vc)} + | sobdetect_pass1_muse + muse_p1 | map{sample,vcf,info->info} + | collect + | sobdetect_cohort_params_muse + muse_p2 = bamwithsample1 + | join(muse_in,by:[0,1]) + | map{tumor,normal,tbam,tbai,nbam,nbai,vc,normvcf,tbi->tuple("${tumor}_vs_${normal}",normvcf,tbam,vc)} + | combine(sobdetect_cohort_params_muse.out) + | sobdetect_pass2_muse + muse_p1_vcfs=muse_p1 | map{sample,vcf,info->vcf} |collect + muse_p2_vcfs=muse_p2 | map{sample,vcf,info,filtvcf,vcftbi->vcf} |collect + sobdetect_metrics_muse(muse_p1_vcfs,muse_p2_vcfs) + + muse_ffpe_out=muse_p2 | map{sample,vcf,info,filtvcf,vcftbi->tuple(sample.split("_vs_")[0],sample.split("_vs_")[1],"muse",filtvcf,vcftbi)} + annotvep_tn_muse_ffpe(muse_ffpe_out) + vc_ffpe_paired=vc_ffpe_paired |concat(muse_ffpe_out) + } + + if('vardict' in call_list){ + vardict_p1=bamwithsample1 | join(vardict_in,by:[0,1]) + | map{tumor,normal,tbam,tbai,nbam,nbai,vc,normvcf,tbi->tuple("${tumor}_vs_${normal}",normvcf,tbam,vc)} + | sobdetect_pass1_vardict + vardict_p1 | map{sample,vcf,info->info} + | collect + | sobdetect_cohort_params_vardict + vardict_p2 = bamwithsample1 + | join(vardict_in,by:[0,1]) + | map{tumor,normal,tbam,tbai,nbam,nbai,vc,normvcf,tbi->tuple("${tumor}_vs_${normal}",normvcf,tbam,vc)} + | combine(sobdetect_cohort_params_vardict.out) + | sobdetect_pass2_vardict + vardict_p1_vcfs=vardict_p1 | map{sample,vcf,info->vcf} |collect + vardict_p2_vcfs=vardict_p2 | map{sample,vcf,info,filtvcf,vcftbi->vcf} |collect + sobdetect_metrics_vardict(vardict_p1_vcfs,vardict_p2_vcfs) + + vardict_ffpe_out=vardict_p2 | map{sample,vcf,info,filtvcf,vcftbi->tuple(sample.split("_vs_")[0],sample.split("_vs_")[1],"vardict",filtvcf,vcftbi)} + annotvep_tn_vardict_ffpe(vardict_ffpe_out) + vc_ffpe_paired=vc_ffpe_paired |concat(vardict_ffpe_out) + + if (!params.no_tonly){ + vardict_tonly_p1=bamwithsample1 | join(vardict_in_tonly) + | map{tumor,normal,tbam,tbai,nbam,nbai,vc,normvcf,tbi->tuple(tumor,normvcf,tbam,vc)} + | sobdetect_pass1_vardict_tonly + vardict_tonly_p1 | map{sample,vcf,info->info} + | collect + | sobdetect_cohort_params_vardict_tonly + + vardict_tonly_p2 = bamwithsample1 + | join(vardict_in_tonly) + | map{tumor,normal,tbam,tbai,nbam,nbai,vc,normvcf,tbi->tuple(tumor,normvcf,tbam,vc)} + | combine(sobdetect_cohort_params_vardict_tonly.out) + | sobdetect_pass2_vardict_tonly + vardict_tonly_p1_vcfs=vardict_tonly_p1 | map{sample,vcf,info->vcf} |collect + vardict_tonly_p2_vcfs=vardict_tonly_p2 | map{sample,vcf,info,filtvcf,vcftbi->vcf} |collect + sobdetect_metrics_vardict_tonly(vardict_tonly_p1_vcfs,vardict_tonly_p2_vcfs) + + vardict_tonly_ffpe_out=vardict_tonly_p2 | map{sample,vcf,info,filtvcf,vcftbi->tuple(sample,"vardict_tonly",filtvcf,vcftbi)} + annotvep_tonly_vardict_ffpe(vardict_tonly_ffpe_out) + vc_ffpe_tonly=vc_ffpe_tonly |concat(vardict_tonly_ffpe_out) + } + } + + if('varscan' in call_list){ + varscan_p1=bamwithsample1 | join(varscan_in,by:[0,1]) + | map{tumor,normal,tbam,tbai,nbam,nbai,vc,normvcf,tbi->tuple("${tumor}_vs_${normal}",normvcf,tbam,vc)} + | sobdetect_pass1_varscan + varscan_p1 | map{sample,vcf,info->info} + | collect + | sobdetect_cohort_params_varscan + varscan_p2 = bamwithsample1 + | join(varscan_in,by:[0,1]) + | map{tumor,normal,tbam,tbai,nbam,nbai,vc,normvcf,tbi->tuple("${tumor}_vs_${normal}",normvcf,tbam,vc)} + | combine(sobdetect_cohort_params_varscan.out) + | sobdetect_pass2_varscan + varscan_p1_vcfs=varscan_p1 | map{sample,vcf,info->vcf} |collect + varscan_p2_vcfs=varscan_p2 | map{sample,vcf,info,filtvcf,vcftbi->vcf} |collect + sobdetect_metrics_varscan(varscan_p1_vcfs,varscan_p2_vcfs) + + varscan_ffpe_out=varscan_p2 | map{sample,vcf,info,filtvcf,vcftbi->tuple(sample.split("_vs_")[0],sample.split("_vs_")[1],"varscan",filtvcf,vcftbi)} + annotvep_tn_varscan_ffpe(varscan_ffpe_out) + vc_ffpe_paired=vc_ffpe_paired | concat(varscan_ffpe_out) + + if (!params.no_tonly){ + varscan_tonly_p1=bamwithsample1 | join(varscan_in_tonly) + | map{tumor,normal,tbam,tbai,nbam,nbai,vc,normvcf,tbi->tuple(tumor,normvcf,tbam,vc)} + | sobdetect_pass1_varscan_tonly + varscan_tonly_p1 | map{sample,vcf,info->info} + | collect + | sobdetect_cohort_params_varscan_tonly + + varscan_tonly_p2 = bamwithsample1 + | join(varscan_in_tonly) + | map{tumor,normal,tbam,tbai,nbam,nbai,vc,normvcf,tbi->tuple(tumor,normvcf,tbam,vc)} + | combine(sobdetect_cohort_params_varscan_tonly.out) + | sobdetect_pass2_varscan_tonly + varscan_tonly_p1_vcfs=varscan_tonly_p1 | map{sample,vcf,info->vcf} |collect + varscan_tonly_p2_vcfs=varscan_tonly_p2 | map{sample,vcf,info,filtvcf,vcftbi->vcf} |collect + sobdetect_metrics_varscan_tonly(varscan_tonly_p1_vcfs,varscan_tonly_p2_vcfs) + + varscan_tonly_ffpe_out=varscan_tonly_p2 | map{sample,vcf,info,filtvcf,vcftbi->tuple(sample,"varscan_tonly",filtvcf,vcftbi)} + annotvep_tonly_varscan_ffpe(varscan_tonly_ffpe_out) + vc_ffpe_tonly=vc_ffpe_tonly |concat(varscan_tonly_ffpe_out) + } + } + } //Combine All Variants Using VCF -> Annotate if (call_list.size()>1){ @@ -430,25 +824,37 @@ workflow VC { | somaticcombine | map{tumor,normal,vcf,index ->tuple(tumor,normal,"combined",vcf,index)} | annotvep_tn_combined - + if(params.ffpe){ + vc_ffpe_paired | groupTuple(by:[0,1]) + | somaticcombine_ffpe + | map{tumor,normal,vcf,index ->tuple(tumor,normal,"combined_ffpe",vcf,index)} + | annotvep_tn_combined_ffpe + } if (!params.no_tonly & call_list_tonly.size()>1){ - vc_tonly | groupTuple() - | somaticcombine_tonly - | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} - | annotvep_tonly_combined + vc_tonly | groupTuple() + | somaticcombine_tonly + | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} + | annotvep_tonly_combined + } + if (!params.no_tonly & call_list_tonly.size()>1 & params.ffpe){ + vc_ffpe_tonly | groupTuple() + | somaticcombine_tonly_ffpe + | map{tumor,vcf,index ->tuple(tumor,"combined_tonly_ffpe",vcf,index)} + | annotvep_tonly_combined_ffpe } } + if("sage" in call_list){ somaticcall_input=sage_in }else if("mutect2" in call_list){ somaticcall_input=mutect2_in + }else if("mutect2" in call_list & params.ffpe){ + somaticcall_input=mutect2_ffpe_out }else{ somaticcall_input=Channel.empty() } - - //Implement PCGR Annotator/CivIC Next emit: somaticcall_input @@ -461,28 +867,57 @@ workflow SV { main: svcall_list = params.svcallers.split(',') as List + svout=Channel.empty() - if ("svaba" in svcall_list){ //Svaba + if ("svaba" in svcall_list){ svaba_out=svaba_somatic(bamwithsample) - .map{ tumor,bps,contigs,discord,alignents,gindel,gsv,so_indel,so_sv,unfil_gindel,unfil_gsv,unfil_so_indel,unfil_sv,log -> + | map{ tumor,bps,contigs,discord,alignents,gindel,gsv,so_indel,so_sv,unfil_gindel,unfil_gsv,unfil_so_indel,unfil_sv,log -> tuple(tumor,so_sv,"svaba")} - //annotsv_svaba(svaba_out).ifEmpty("Empty SV input--No SV annotated") + svout=svout | concat(svaba_out) } - if ("manta" in svcall_list){ + //Manta + if ("manta" in svcall_list){ manta_out=manta_somatic(bamwithsample) - .map{tumor,gsv,so_sv,unfil_sv,unfil_indel -> - tuple(tumor,so_sv,"manta")} + manta_out_forsv=manta_out + | map{tumor,normal,gsv,gsv_tbi,so_sv,so_sv_tbi,unfil_sv,unfil_sv_tbi,unfil_indel,unfil_indel_tbi -> + tuple(tumor,so_sv,"manta")} | gunzip_manta //annotsv_manta(manta_out).ifEmpty("Empty SV input--No SV annotated") + svout=svout | concat(manta_out_forsv) } - if ("manta" in svcall_list & "svaba" in svcall_list){ + //GRIDSS + if ("gridss" in svcall_list){ + gridss_out=gridss_somatic(bamwithsample) + gridss_out_forsv=gridss_out + | map{tumor,normal,vcf,index,bam,gripssvcf,gripsstbi,gripssfilt,filttbi -> + tuple(tumor,gripssfilt,"gridss")} | gunzip_gridss + svout=svout | concat(gridss_out_forsv) + } + + if (svcall_list.size()>1){ //Survivor - gunzip(manta_out).concat(svaba_out).groupTuple() + svout | groupTuple | survivor_sv - | annotsv_survivor_tn | ifEmpty("Empty SV input--No SV annotated") + | annotsv_survivor_tn + | ifEmpty("Empty SV input--No SV annotated") } + + if("gridss" in svcall_list){ + somaticsv_input=gridss_out + | map{tumor,normal,vcf,index,bam,gripssvcf,gripsstbi,gripssfilt,filttbi -> + tuple(tumor,normal,vcf,index,gripssfilt,filttbi)} + }else if("manta" in svcall_list){ + somaticsv_input=manta_out + | map{tumor,normal,gsv,gsv_tbi,so_sv,so_sv_tbi,unfil_sv,unfil_sv_tbi,unfil_indel,unfil_indel_tbi -> + tuple(tumor,normal,unfil_sv,unfil_sv_tbi,so_sv,so_sv_tbi)} + }else{ + somaticsv_input=Channel.empty() + } + + emit: + somaticsv_input } workflow CNVmouse { @@ -492,9 +927,8 @@ workflow CNVmouse { main: cnvcall_list = params.cnvcallers.split(',') as List - if ("sequenza" in cnvcall_list){ - //Sequenza (Preferred for Paired) + if ("sequenza" in cnvcall_list){ chrs=Channel.fromList(params.genomes[params.genome].chromosomes) seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} @@ -504,15 +938,31 @@ workflow CNVmouse { | sequenza } - if ("freec" in cnvcall_list){ //FREEC Paired Mode - bamwithsample | freec_paired + if ("freec" in cnvcall_list){ + if(params.exome){ + FREECPAIR_SCRIPT = params.script_freecpaired_exome + bamwithsample | freec_paired_exome + }else{ + FREECPAIR_SCRIPT = params.script_freecpaired + bamwithsample | freec_paired + } //FREEC Unpaired Mode bamwithsample | map{tname,tumor,tbai,nname,norm,nbai->tuple(tname,tumor,tbai)} | freec } + + //CNVKIT + if ("cnvkit" in cnvcall_list){ + if(params.exome){ + matchbed_cnvkit(intervalbedin) + bamwithsample | combine(matchbed_cnvkit.out) | cnvkit_exome + }else{ + bamwithsample | cnvkit + } + } } workflow CNVhuman { @@ -521,8 +971,20 @@ workflow CNVhuman { somaticcall_input main: + if (params.intervals){ + intervalbedin = Channel.fromPath(params.intervals,checkIfExists: true,type: 'file') + }else{ + intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file') + } + cnvcall_list = params.cnvcallers.split(',') as List scinput = somaticcall_input|map{t1,n1,cal,vcf,ind -> tuple("${t1}_vs_${n1}",cal,vcf,ind)} + + //Drop Purple if using Exome + if (params.exome && "purple" in cnvcall_list){ + cnvcall_list.removeIf { it == 'purple' } + } + if ("purple" in cnvcall_list){ //Purple bamwithsample | amber_tn @@ -533,8 +995,8 @@ workflow CNVhuman { | purple } + //Sequenza if ("sequenza" in cnvcall_list){ - //Sequenza chrs=Channel.fromList(params.genomes[params.genome].chromosomes) seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} @@ -544,6 +1006,36 @@ workflow CNVhuman { | sequenza } + //FREEC + if ("freec" in cnvcall_list){ + if(params.exome){ + FREECPAIR_SCRIPT = params.script_freecpaired_exome + bamwithsample | freec_paired_exome + }else{ + FREECPAIR_SCRIPT = params.script_freecpaired + bamwithsample | freec_paired + } + } + + //ASCAT + if ("ascat" in cnvcall_list){ + if(params.exome){ + matchbed_ascat(intervalbedin) + bamwithsample | combine(matchbed_ascat.out) | ascat_tn_exome + }else{ + bamwithsample | ascat_tn + } + } + + //CNVKIT + if ("cnvkit" in cnvcall_list){ + if(params.exome){ + matchbed_cnvkit(intervalbedin) + bamwithsample | combine(matchbed_cnvkit.out) | cnvkit_exome + }else{ + bamwithsample | cnvkit + } + } } @@ -551,9 +1043,20 @@ workflow CNVhuman_novc { take: bamwithsample - main: + main: + if (params.intervals){ + intervalbedin = Channel.fromPath(params.intervals,checkIfExists: true,type: 'file') + }else{ + intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file') + } + cnvcall_list = params.cnvcallers.split(',') as List + //Drop Purple if using Exome + if (params.exome && "purple" in cnvcall_list){ + cnvcall_list.removeIf { it == 'purple' } + } + if ("purple" in cnvcall_list){ //Purple bamwithsample | amber_tn @@ -563,6 +1066,7 @@ workflow CNVhuman_novc { | purple_novc } + if ("sequenza" in cnvcall_list){ //Sequenza chrs=Channel.fromList(params.genomes[params.genome].chromosomes) @@ -573,6 +1077,38 @@ workflow CNVhuman_novc { .map{pair, seqz -> tuple(pair, seqz.sort{it.name})} | sequenza } + + if ("freec" in cnvcall_list){ + //FREEC + if(params.exome){ + FREECPAIR_SCRIPT = params.script_freecpaired_exome + bamwithsample | freec_paired_exome + }else{ + FREECPAIR_SCRIPT = params.script_freecpaired + bamwithsample | freec_paired + } + } + + if ("ascat" in cnvcall_list){ + //ASCAT + if(params.exome){ + matchbed_ascat(intervalbedin) + bamwithsample |combine(matchbed_ascat.out) | ascat_tn_exome + }else{ + bamwithsample | ascat_tn + } + } + + //CNVKIT + if ("cnvkit" in cnvcall_list){ + if(params.exome){ + matchbed_cnvkit(intervalbedin) + bamwithsample | combine(matchbed_cnvkit.out) | cnvkit_exome + }else{ + bamwithsample | cnvkit + } + } + } @@ -592,6 +1128,7 @@ workflow QC_NOGL { qualimap_bamqc(applybqsr) samtools_flagstats(applybqsr) fastqc(applybqsr) + mosdepth(applybqsr) //Somalier somalier_extract(applybqsr) @@ -603,20 +1140,22 @@ workflow QC_NOGL { kraken_out=kraken.out.map{samplename,taxa,krona -> tuple(taxa,krona)}.collect() qualimap_out=qualimap_bamqc.out.map{genome,rep->tuple(genome,rep)}.collect() + mosdepth_out=mosdepth.out.collect() fastqc_out=fastqc.out.map{samplename,html,zip->tuple(html,zip)}.collect() samtools_flagstats_out=samtools_flagstats.out.collect() - if(params.genome=="hg38"){ + if(params.genome.matches("hg38(.*)")| params.genome.matches("hg19(.*)")){ somalier_analysis_human(som_in) somalier_analysis_out=somalier_analysis_human.out.collect() } - else if(params.genome=="mm10"){ + else if(params.genome.matches("mm10")){ somalier_analysis_mouse(som_in) somalier_analysis_out=somalier_analysis_mouse.out.collect() } - conall=fclane_out.concat(fqs_out,kraken_out,qualimap_out,samtools_flagstats_out, + conall=fclane_out.concat(fqs_out,kraken_out,qualimap_out, + samtools_flagstats_out,fastqc_out,mosdepth_out, somalier_analysis_out).flatten().toList() multiqc(conall) } @@ -638,7 +1177,9 @@ workflow QC_GL { kraken(fastqin) qualimap_bamqc(applybqsr) samtools_flagstats(applybqsr) + mosdepth(applybqsr) fastqc(applybqsr) + //Cohort VCF glout=glnexusout.map{germlinev,germlinenorm,tbi->tuple(germlinenorm,tbi)} vcftools(glout) @@ -652,14 +1193,12 @@ workflow QC_GL { somalier_extract(applybqsr) som_in=somalier_extract.out.collect() - - //Prep for MultiQC input - if(params.genome=="hg38"){ + if(params.genome.matches("hg38(.*)")| params.genome.matches("hg19(.*)")){ somalier_analysis_human(som_in) somalier_analysis_out=somalier_analysis_human.out.collect() } - else if(params.genome=="mm10"){ + else if(params.genome.matches("mm10")){ somalier_analysis_mouse(som_in) somalier_analysis_out=somalier_analysis_mouse.out.collect() } @@ -671,18 +1210,110 @@ workflow QC_GL { qualimap_out=qualimap_bamqc.out.map{genome,rep->tuple(genome,rep)}.collect() fastqc_out=fastqc.out.map{samplename,html,zip->tuple(html,zip)}.collect() samtools_flagstats_out=samtools_flagstats.out.collect() + mosdepth_out=mosdepth.out.collect() bcftools_stats_out= bcftools_stats.out.collect() gatk_varianteval_out= gatk_varianteval.out.collect() snpeff_out=snpeff.out.collect() vcftools_out=vcftools.out collectvariantcallmetrics_out=collectvariantcallmetrics.out - conall=fclane_out.concat(fqs_out,kraken_out,qualimap_out,samtools_flagstats_out,bcftools_stats_out, - gatk_varianteval_out,snpeff_out,vcftools_out,collectvariantcallmetrics_out,somalier_analysis_out).flatten().toList() + conall=fclane_out.concat(fqs_out, + kraken_out,qualimap_out,mosdepth_out, + fastqc_out,samtools_flagstats_out,bcftools_stats_out, + gatk_varianteval_out,snpeff_out,vcftools_out,collectvariantcallmetrics_out,somalier_analysis_out).flatten().toList() + multiqc(conall) +} + +//QC_GL_BAMS +workflow QC_GL_BAM { + take: + applybqsr + glnexusout + bcfout + + main: + //QC Steps + qualimap_bamqc(applybqsr) + samtools_flagstats(applybqsr) + mosdepth(applybqsr) + fastqc(applybqsr) + + //Cohort VCF + glout=glnexusout.map{germlinev,germlinenorm,tbi->tuple(germlinenorm,tbi)} + vcftools(glout) + collectvariantcallmetrics(glout) + //Per sample VCFs + bcfin=bcfout.map{samplename,vcf,vcf_tbi,gvcf,gvcf_tbi -> tuple(samplename,gvcf,gvcf_tbi)} + bcftools_stats(bcfin) + gatk_varianteval(bcfin) + snpeff(bcfin) + //Somalier + somalier_extract(applybqsr) + som_in=somalier_extract.out.collect() + + //Prep for MultiQC input + if(params.genome.matches("hg38(.*)")| params.genome.matches("hg19(.*)")){ + somalier_analysis_human(som_in) + somalier_analysis_out=somalier_analysis_human.out.collect() + } + else if(params.genome.matches("mm10")){ + somalier_analysis_mouse(som_in) + somalier_analysis_out=somalier_analysis_mouse.out.collect() + } + + qualimap_out=qualimap_bamqc.out.map{genome,rep->tuple(genome,rep)}.collect() + samtools_flagstats_out=samtools_flagstats.out.collect() + mosdepth_out=mosdepth.out.collect() + bcftools_stats_out=bcftools_stats.out.collect() + gatk_varianteval_out=gatk_varianteval.out.collect() + snpeff_out=snpeff.out.collect() + vcftools_out=vcftools.out + collectvariantcallmetrics_out=collectvariantcallmetrics.out + + conall=qualimap_out.concat(mosdepth_out, + samtools_flagstats_out,bcftools_stats_out, + gatk_varianteval_out,snpeff_out,vcftools_out,collectvariantcallmetrics_out, + somalier_analysis_out) + | flatten | toList multiqc(conall) } +//QC NOGL-BAMs +workflow QC_NOGL_BAM { + take: + bams + + main: + //BQSR BAMs + fastqc(bams) + samtools_flagstats(bams) + qualimap_bamqc(bams) + mosdepth(bams) + + somalier_extract(bams) + som_in=somalier_extract.out.collect() + if(params.genome.matches("hg38(.*)")| params.genome.matches("hg19(.*)")){ + somalier_analysis_human(som_in) + somalier_analysis_out=somalier_analysis_human.out.collect() + } + else if(params.genome.matches("mm10")){ + somalier_analysis_mouse(som_in) + somalier_analysis_out=somalier_analysis_mouse.out.collect() + } + + //Prep for MultiQC input + qualimap_out=qualimap_bamqc.out.map{genome,rep->tuple(genome,rep)}.collect() + mosdepth_out=mosdepth.out.collect() + samtools_flagstats_out=samtools_flagstats.out.collect() + + conall=qualimap_out.concat( + samtools_flagstats_out,mosdepth_out, + somalier_analysis_out).flatten().toList() + + multiqc(conall) +} + //Variant Calling from BAM only workflow INPUT_BAM { @@ -695,15 +1326,14 @@ workflow INPUT_BAM { row.Normal ) } - } - + } //Either BAM Input or File sheet input if(params.bam_input){ //Check if Index is .bai or .bam.bai - bambai=params.bam_input +".bai" + bambai = params.bam_input + ".bai" baionly = bambai.replace(".bam", "") - bamcheck1=file(bambai) - bamcheck2=file(baionly) + bamcheck1 = file(bambai) + bamcheck2 = file(baionly) if (bamcheck1.size()>0){ baminputonly=Channel.fromPath(params.bam_input) @@ -716,7 +1346,6 @@ workflow INPUT_BAM { }else if (bamcheck1.size==0 && bamcheck2.size==0){ println "Missing BAM Index" } - }else if(params.bam_file_input) { baminputonly=Channel.fromPath(params.bam_file_input) .splitCsv(header: false, sep: "\t", strip:true) @@ -725,11 +1354,12 @@ workflow INPUT_BAM { } } if (params.intervals){ - intervalbedin = Channel.fromPath(params.intervals,checkIfExists: true,type: 'file') + intervalbedin = Channel.fromPath(params.intervals,checkIfExists: true, type: 'file') }else{ - intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file') + intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true, type: 'file') } - splitinterval(intervalbedin) + matchbed(intervalbedin) | splitinterval + if (params.indelrealign){ bqsrs = baminputonly | indelrealign | combine(splitinterval.out.flatten()) @@ -742,15 +1372,28 @@ workflow INPUT_BAM { baminput2=baminputonly.combine(bqsrs,by:0) |applybqsr - bamwithsample=baminput2.combine(sample_sheet,by:0).map{it.swap(3,0)}.combine(baminputonly,by:0).map{it.swap(3,0)} + bamwithsample=baminput2.combine(sample_sheet,by:0).map{it.swap(3,0)}.combine(baminputonly,by:0).map{it.swap(3,0)} + | view() + }else{ + bamwithsample=baminputonly.combine(sample_sheet,by:0).map{it.swap(3,0)}.combine(baminputonly,by:0).map{it.swap(3,0)} + | view() + } - } else { - bamwithsample=baminputonly.combine(sample_sheet,by:0).map{it.swap(3,0)}.combine(baminputonly,by:0).map{it.swap(3,0)} - } + bambyinterval_norm=bamwithsample + | map {tum,tubam,tbai,norm,norbam,norbai -> tuple(norm,norbam,norbai)} + bambyinterval_tum=bamwithsample + | map {tum,tubam,tbai,norm,norbam,norbai -> tuple(tum,tubam,tbai)} + bambyinterval=bambyinterval_tum | concat(bambyinterval_norm) | unique + | combine(splitinterval.out.flatten()) emit: bamwithsample + bambyinterval splitout=splitinterval.out sample_sheet + allbam=bambyinterval_tum | concat(bambyinterval_norm) | unique } + + + diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf index 02d8698..1130e76 100644 --- a/subworkflows/local/workflows_tonly.nf +++ b/subworkflows/local/workflows_tonly.nf @@ -1,59 +1,102 @@ -include {fc_lane; fastq_screen;kraken;qualimap_bamqc; - samtools_flagstats;vcftools;collectvariantcallmetrics; - bcftools_stats;gatk_varianteval; - snpeff;fastqc; - somalier_extract;somalier_analysis_human;somalier_analysis_mouse; - multiqc} from '../../modules/local/qc.nf' - -include {deepvariant_step1;deepvariant_step2;deepvariant_step3; - deepvariant_combined;glnexus} from '../../modules/local/germline.nf' - -include {fastp; bwamem2; +include {splitinterval; matchbed; matchbed as matchbed_ascat; + matchbed as matchbed_cnvkit} from '../../modules/local/splitbed.nf' + +include {fc_lane} from '../../modules/local/fc_lane.nf' +include {fastq_screen} from '../../modules/local/fastq_screen.nf' +include {kraken} from '../../modules/local/kraken.nf' +include {qualimap_bamqc} from '../../modules/local/qualimap.nf' +include {fastqc} from '../../modules/local/fastqc.nf' +include {samtools_flagstats} from '../../modules/local/samtools_flagstats.nf' +include {vcftools} from '../../modules/local/vcftools.nf' +include {bcftools_stats} from '../../modules/local/bcftools_stats.nf' +include {gatk_varianteval; collectvariantcallmetrics} from '../../modules/local/gatk_varianteval.nf' +include {snpeff} from '../../modules/local/snpeff.nf' +include {somalier_extract;somalier_analysis_human;somalier_analysis_mouse} from '../../modules/local/somalier.nf' +include {mosdepth} from '../../modules/local/mosdepth.nf' +include {multiqc} from '../../modules/local/multiqc.nf' + +include {fastp; bwamem2; indelrealign; bqsr_ir; bqsr; gatherbqsr; applybqsr; samtoolsindex} from '../../modules/local/trim_align.nf' - -include {mutect2; mutect2filter; pileup_paired_t; pileup_paired_n; - bcftools_index_octopus; - contamination_paired; learnreadorientationmodel; mergemut2stats; - combineVariants as combineVariants_vardict; combineVariants as combineVariants_varscan; - combineVariants as combineVariants_vardict_tonly; combineVariants as combineVariants_varscan_tonly; - combineVariants as combineVariants_sage; combineVariants as combineVariants_sage_tonly; - combineVariants_alternative; - annotvep_tn as annotvep_tn_mut2; - annotvep_tn as annotvep_tn_varscan; annotvep_tn as annotvep_tn_vardict; - combinemafs_tn} from '../../modules/local/variant_calling.nf' - -include {mutect2_t_tonly; mutect2filter_tonly; pileup_paired_tonly; - varscan_tonly; vardict_tonly; - octopus_tonly; sage_tonly; - contamination_tumoronly; - learnreadorientationmodel_tonly; - mergemut2stats_tonly; octopus_convertvcf_tonly; - annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict; - annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus; - annotvep_tonly as annotvep_tonly_sage; - annotvep_tonly as annotvep_tonly_combined; - combinemafs_tonly; somaticcombine_tonly} from '../../modules/local/variant_calling_tonly.nf' - -include {manta_tonly; svaba_tonly; survivor_sv; gunzip; -annotsv_tonly as annotsv_manta_tonly; annotsv_tonly as annotsv_svaba_tonly; -annotsv_tonly as annotsv_survivor_tonly} from '../../modules/local/structural_variant.nf' - -include {freec; amber_tonly; cobalt_tonly; purple_tonly_novc; purple_tonly } from '../../modules/local/copynumber.nf' - -include {splitinterval} from '../../modules/local/splitbed.nf' - - - +include {pileup_paired as pileup_paired_t; pileup_paired as pileup_paired_n; + pileup_paired_tonly; + learnreadorientationmodel; + mutect2; mutect2filter; contamination_paired; mergemut2stats; + mutect2_t_tonly; mutect2filter_tonly; + contamination_tumoronly; + learnreadorientationmodel_tonly; + mergemut2stats_tonly} from '../../modules/local/mutect2.nf' +include {sage_tn; sage_tonly} from '../../modules/local/sage.nf' +include {vardict_tn; vardict_tonly} from '../../modules/local/vardict.nf' +include {varscan_tn; varscan_tonly} from '../../modules/local/varscan.nf' +include {octopus_tn; bcftools_index_octopus; + bcftools_index_octopus as bcftools_index_octopus_tonly; octopus_convertvcf; + octopus_tonly; octopus_convertvcf_tonly} from '../../modules/local/octopus.nf' +include {deepsomatic_tonly_step1; deepsomatic_tonly_step2; + deepsomatic_step3 as deepsomatic_tonly_step3 } from "../../modules/local/deepsomatic.nf" + + +include {combineVariants as combineVariants_vardict; combineVariants as combineVariants_vardict_tonly; + combineVariants as combineVariants_varscan; combineVariants as combineVariants_varscan_tonly; + combineVariants_alternative; + combineVariants_alternative as combineVariants_deepsomatic; combineVariants_alternative as combineVariants_deepsomatic_tonly; + combineVariants as combineVariants_sage; combineVariants as combineVariants_sage_tonly; + combineVariants_alternative as combineVariants_lofreq; combineVariants as combineVariants_muse; + combineVariants_alternative as combineVariants_octopus; + combineVariants_alternative as combineVariants_octopus_tonly; + combinemafs_tn; somaticcombine;somaticcombine as somaticcombine_ffpe; + combinemafs_tonly;somaticcombine_tonly;somaticcombine_tonly as somaticcombine_tonly_ffpe} from '../../modules/local/combinefilter.nf' + +include {annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka; + annotvep_tn as annotvep_tn_varscan; annotvep_tn as annotvep_tn_vardict; annotvep_tn as annotvep_tn_octopus; + annotvep_tn as annotvep_tn_lofreq; annotvep_tn as annotvep_tn_muse; annotvep_tn as annotvep_tn_sage; + annotvep_tn as annotvep_tn_deepsomatic; + annotvep_tn as annotvep_tn_combined; + annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict; + annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus; + annotvep_tonly as annotvep_tonly_sage; annotvep_tonly as annotvep_tonly_deepsomatic; + annotvep_tonly as annotvep_tonly_combined} from '../../modules/local/annotvep.nf' + +include {sobdetect_pass1 as sobdetect_pass1_mutect2_tonly; sobdetect_pass2 as sobdetect_pass2_mutect2_tonly; + sobdetect_metrics as sobdetect_metrics_mutect2_tonly; sobdetect_cohort_params as sobdetect_cohort_params_mutect2_tonly; + sobdetect_pass1 as sobdetect_pass1_octopus_tonly; sobdetect_pass2 as sobdetect_pass2_octopus_tonly; + sobdetect_metrics as sobdetect_metrics_octopus_tonly; sobdetect_cohort_params as sobdetect_cohort_params_octopus_tonly; + sobdetect_pass1 as sobdetect_pass1_vardict_tonly; sobdetect_pass2 as sobdetect_pass2_vardict_tonly; + sobdetect_metrics as sobdetect_metrics_vardict_tonly; sobdetect_cohort_params as sobdetect_cohort_params_vardict_tonly; + sobdetect_pass1 as sobdetect_pass1_varscan_tonly; sobdetect_pass2 as sobdetect_pass2_varscan_tonly; + sobdetect_metrics as sobdetect_metrics_varscan_tonly; sobdetect_cohort_params as sobdetect_cohort_params_varscan_tonly + } from "../../modules/local/ffpe.nf" + +include {annotvep_tonly as annotvep_tonly_varscan_ffpe; annotvep_tonly as annotvep_tonly_vardict_ffpe; + annotvep_tonly as annotvep_tonly_mut2_ffpe; annotvep_tonly as annotvep_tonly_octopus_ffpe; + annotvep_tonly as annotvep_tonly_sage_ffpe; annotvep_tonly as annotvep_tonly_deepsomatic_ffpe; + annotvep_tonly as annotvep_tonly_combined_ffpe} from '../../modules/local/annotvep.nf' + +include {svaba_tonly} from '../../modules/local/svaba.nf' +include {manta_tonly} from '../../modules/local/manta.nf' +include {gridss_tonly} from '../../modules/local/gridss.nf' +include {survivor_sv; + gunzip as gunzip_manta; gunzip as gunzip_gridss; + annotsv_tonly as annotsv_survivor_tonly; + annotsv_tonly as annotsv_svaba_tonly; + annotsv_tonly as annotsv_gridss_tonly; + annotsv_tonly as annotsv_manta_tonly} from '../../modules/local/annotsv.nf' + +include {freec} from '../../modules/local/freec.nf' +include {amber_tonly; cobalt_tonly; purple_tonly_novc; purple_tonly} from '../../modules/local/purple.nf' +include {cnvkit_exome_tonly; cnvkit_tonly } from '../../modules/local/cnvkit.nf' + + +//Workflows workflow INPUT_TONLY { if(params.fastq_input){ - fastqinput=Channel.fromFilePairs(params.fastq_input) + fastqinput=Channel.fromFilePairs(params.fastq_input) }else if(params.fastq_file_input){ fastqinput=Channel.fromPath(params.fastq_file_input) .splitCsv(header: false, sep: "\t", strip:true) .map{ sample,fq1,fq2 -> tuple(sample, tuple(file(fq1),file(fq2))) - } + } } if(params.sample_sheet){ @@ -62,12 +105,12 @@ workflow INPUT_TONLY { .splitCsv(header:true, sep: "\t") .map { row -> tuple( row.Tumor - )} + )} | view }else{ sample_sheet=fastqinput.map{samplename,f1 -> tuple ( - samplename)} + samplename)} | view } - + emit: fastqinput sample_sheet @@ -86,29 +129,50 @@ workflow ALIGN_TONLY { }else{ intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file') } + matchbed(intervalbedin) | splitinterval - splitinterval(intervalbedin) + //Align fastp(fastqinput) bwamem2(fastp.out) - //indelrealign(bwamem2.out) Consider indelreaglinement using ABRA? - bqsrbambyinterval=bwamem2.out.combine(splitinterval.out.flatten()) + if (params.indelrealign){ + bwaindelre = bwamem2.out | indelrealign + bqsrbambyinterval=bwaindelre.combine(splitinterval.out.flatten()) + bambyinterval=bwaindelre.combine(splitinterval.out.flatten()) - - bqsr(bqsrbambyinterval) - bqsrs=bqsr.out.groupTuple() - .map { samplename,beds -> tuple( samplename, - beds.toSorted{ it -> (it.name =~ /${samplename}_(.*?).recal_data.grp/)[0][1].toInteger() } ) + bqsr_ir(bqsrbambyinterval) + + bqsrs = bqsr_ir.out + | groupTuple + | map { samplename,beds -> + tuple( samplename, beds.toSorted{ it -> (it.name =~ /${samplename}_(.*?).recal_data.grp/)[0][1].toInteger() } )} + gatherbqsr(bqsrs) + + tobqsr=bwaindelre.combine(gatherbqsr.out,by:0) + applybqsr(tobqsr) + + bamwithsample=applybqsr.out.join(sample_sheet) + | map{samplename,tumor,tumorbai -> tuple( samplename,tumor,tumorbai)} + bambyinterval=bamwithsample.combine(splitinterval.out.flatten()) + + }else{ + bqsrbambyinterval=bwamem2.out.combine(splitinterval.out.flatten()) + + bqsr(bqsrbambyinterval) + bqsrs=bqsr.out | groupTuple + | map { samplename,beds -> + tuple( samplename, + beds.toSorted{ it -> (it.name =~ /${samplename}_(.*?).recal_data.grp/)[0][1].toInteger() } )} + gatherbqsr(bqsrs) + + tobqsr=bwamem2.out.combine(gatherbqsr.out,by:0) + applybqsr(tobqsr) + + bamwithsample=applybqsr.out.join(sample_sheet) + | map{samplename,tumor,tumorbai -> tuple( samplename,tumor,tumorbai)} + bambyinterval=bamwithsample.combine(splitinterval.out.flatten()) } - gatherbqsr(bqsrs) - tobqsr=bwamem2.out.combine(gatherbqsr.out,by:0) - applybqsr(tobqsr) - - bamwithsample=applybqsr.out.join(sample_sheet) - .map{samplename,tumor,tumorbai -> tuple( samplename,tumor,tumorbai) - } - bambyinterval=bamwithsample.combine(splitinterval.out.flatten()) emit: bamwithsample @@ -138,6 +202,10 @@ workflow VC_TONLY { call_list_tonly = params.tonlycallers.split(',') as List call_list = call_list.intersect(call_list_tonly) + if (params.exome && "muse" in call_list){ + call_list.removeIf { it == 'muse' } + } + vc_tonly=Channel.empty() if ("mutect2" in call_list | "varscan" in call_list){ @@ -217,6 +285,26 @@ workflow VC_TONLY { vc_tonly=vc_tonly|concat(octopus_in_tonly_sc) } + //DeepSomatic Tonly + if ("deepsomatic" in call_list){ + deepsomatic_tonly_in=deepsomatic_tonly_step1(bambyinterval) + | deepsomatic_tonly_step2 + | deepsomatic_tonly_step3 | groupTuple + | map{samplename,vcf,vcf_tbi -> + tuple(samplename,vcf.toSorted{it -> (it.name =~ /${samplename}_(.*?).bed.vcf.gz/)[0][1].toInteger()},vcf_tbi,"deepsomatic_tonly") + } + | combineVariants_deepsomatic_tonly + | join(sample_sheet) + | map{tumor,marked,markedindex,normvcf,normindex->tuple(tumor,"deepsomatic_tonly",normvcf,normindex)} + + annotvep_tonly_deepsomatic(deepsomatic_tonly_in) + + vc_tonly=vc_tonly | concat(deepsomatic_tonly_in) + + } + + + /* //SAGE if ("sage" in call_list){ sage_in_tonly=sage_tonly(bamwithsample) @@ -229,7 +317,102 @@ workflow VC_TONLY { vc_tonly=vc_tonly | concat(sage_in_tonly) } - + */ + + //FFPE Steps + if(params.ffpe){ + vc_ffpe_tonly=Channel.empty() + bamwithsample1=bamwithsample + + if('mutect2' in call_list){ + mutect2_tonly_p1=bamwithsample1 | join(mutect2_in_tonly) + | map{tumor,tbam,tbai,vc,normvcf,tbi->tuple(tumor,normvcf,tbam,vc)} + | sobdetect_pass1_mutect2_tonly + mutect2_tonly_p1 | map{sample,vcf,info->info} + | collect + | sobdetect_cohort_params_mutect2_tonly + + mutect2_tonly_p2 = bamwithsample1 + | join(mutect2_in_tonly) + | map{tumor,tbam,tbai,vc,normvcf,tbi->tuple(tumor,normvcf,tbam,vc)} + | combine(sobdetect_cohort_params_mutect2_tonly.out) + | sobdetect_pass2_mutect2_tonly + mutect2_tonly_p1_vcfs=mutect2_tonly_p1 | map{sample,vcf,info->vcf} |collect + mutect2_tonly_p2_vcfs=mutect2_tonly_p2 | map{sample,vcf,info,filtvcf,vcftbi->vcf} |collect + sobdetect_metrics_mutect2_tonly(mutect2_tonly_p1_vcfs,mutect2_tonly_p2_vcfs) + + mutect2_tonly_ffpe_out=mutect2_tonly_p2 | map{sample,vcf,info,filtvcf,vcftbi->tuple(sample,"mutect2_tonly",filtvcf,vcftbi)} + annotvep_tonly_mut2_ffpe(mutect2_tonly_ffpe_out) + vc_ffpe_tonly=vc_ffpe_tonly |concat(mutect2_tonly_ffpe_out) + } + + if('octopus' in call_list){ + octopus_tonly_p1=bamwithsample1 | join(octopus_in_tonly) + | map{tumor,tbam,tbai,vc,normvcf,tbi->tuple(tumor,normvcf,tbam,vc)} + | sobdetect_pass1_octopus_tonly + octopus_tonly_p1 | map{sample,vcf,info->info} + | collect + | sobdetect_cohort_params_octopus_tonly + + octopus_tonly_p2 = bamwithsample1 + | join(octopus_in_tonly) + | map{tumor,tbam,tbai,vc,normvcf,tbi->tuple(tumor,normvcf,tbam,vc)} + | combine(sobdetect_cohort_params_octopus_tonly.out) + | sobdetect_pass2_octopus_tonly + octopus_tonly_p1_vcfs=octopus_tonly_p1 | map{sample,vcf,info->vcf} |collect + octopus_tonly_p2_vcfs=octopus_tonly_p2 | map{sample,vcf,info,filtvcf,vcftbi->vcf} |collect + sobdetect_metrics_octopus_tonly(octopus_tonly_p1_vcfs,octopus_tonly_p2_vcfs) + + octopus_tonly_ffpe_out=octopus_tonly_p2 | map{sample,vcf,info,filtvcf,vcftbi->tuple(sample,"octopus_tonly",filtvcf,vcftbi)} + annotvep_tonly_octopus_ffpe(octopus_tonly_ffpe_out) + vc_ffpe_tonly=vc_ffpe_tonly |concat(octopus_tonly_ffpe_out) + } + + if('vardict' in call_list){ + vardict_tonly_p1=bamwithsample1 | join(vardict_in_tonly) + | map{tumor,tbam,tbai,vc,normvcf,tbi->tuple(tumor,normvcf,tbam,vc)} + | sobdetect_pass1_vardict_tonly + vardict_tonly_p1 | map{sample,vcf,info->info} + | collect + | sobdetect_cohort_params_vardict_tonly + + vardict_tonly_p2 = bamwithsample1 + | join(vardict_in_tonly) + | map{tumor,tbam,tbai,vc,normvcf,tbi->tuple(tumor,normvcf,tbam,vc)} + | combine(sobdetect_cohort_params_vardict_tonly.out) + | sobdetect_pass2_vardict_tonly + vardict_tonly_p1_vcfs=vardict_tonly_p1 | map{sample,vcf,info->vcf} |collect + vardict_tonly_p2_vcfs=vardict_tonly_p2 | map{sample,vcf,info,filtvcf,vcftbi->vcf} |collect + sobdetect_metrics_vardict_tonly(vardict_tonly_p1_vcfs,vardict_tonly_p2_vcfs) + + vardict_tonly_ffpe_out=vardict_tonly_p2 | map{sample,vcf,info,filtvcf,vcftbi->tuple(sample,"vardict_tonly",filtvcf,vcftbi)} + annotvep_tonly_vardict_ffpe(vardict_tonly_ffpe_out) + vc_ffpe_tonly=vc_ffpe_tonly |concat(vardict_tonly_ffpe_out) + } + + if('varscan' in call_list){ + varscan_tonly_p1=bamwithsample1 | join(varscan_in_tonly) + | map{tumor,tbam,tbai,vc,normvcf,tbi->tuple(tumor,normvcf,tbam,vc)} + | sobdetect_pass1_varscan_tonly + varscan_tonly_p1 | map{sample,vcf,info->info} + | collect + | sobdetect_cohort_params_varscan_tonly + + varscan_tonly_p2 = bamwithsample1 + | join(varscan_in_tonly) + | map{tumor,tbam,tbai,vc,normvcf,tbi->tuple(tumor,normvcf,tbam,vc)} + | combine(sobdetect_cohort_params_varscan_tonly.out) + | sobdetect_pass2_varscan_tonly + varscan_tonly_p1_vcfs=varscan_tonly_p1 | map{sample,vcf,info->vcf} |collect + varscan_tonly_p2_vcfs=varscan_tonly_p2 | map{sample,vcf,info,filtvcf,vcftbi->vcf} |collect + sobdetect_metrics_varscan_tonly(varscan_tonly_p1_vcfs,varscan_tonly_p2_vcfs) + + varscan_tonly_ffpe_out=varscan_tonly_p2 | map{sample,vcf,info,filtvcf,vcftbi->tuple(sample,"varscan_tonly",filtvcf,vcftbi)} + annotvep_tonly_varscan_ffpe(varscan_tonly_ffpe_out) + vc_ffpe_tonly=vc_ffpe_tonly |concat(varscan_tonly_ffpe_out) + } +} + //Combined Variants and Annotated //Emit for SC downstream, take Oc/Mu2/sage/Vard/Varscan @@ -246,10 +429,12 @@ workflow VC_TONLY { somaticcall_input=sage_in_tonly }else if("mutect2" in call_list){ somaticcall_input=mutect2_in_tonly + }else if("mutect2" in call_list & params.ffpe){ + somaticcall_input=mutect2_ffpe_out }else{ somaticcall_input=Channel.empty() } - + emit: somaticcall_input } @@ -260,23 +445,61 @@ workflow SV_TONLY { bamwithsample main: + svcall_list = params.svcallers.split(',') as List + svout=Channel.empty() + //Svaba - svaba_out=svaba_tonly(bamwithsample) - .map{ tumor,bps,contigs,discord,alignments,so_indel,so_sv,unfil_so_indel,unfil_sv,log -> - tuple(tumor,so_sv,"svaba_tonly")} - annotsv_svaba_tonly(svaba_out).ifEmpty("Empty SV input--No SV annotated") + if ("svaba" in svcall_list){ + svaba_out=svaba_tonly(bamwithsample) + .map{ tumor,bps,contigs,discord,alignments,so_indel,so_sv,unfil_so_indel,unfil_sv,log -> + tuple(tumor,so_sv,"svaba_tonly")} + annotsv_svaba_tonly(svaba_out).ifEmpty("Empty SV input--No SV annotated") + svout=svout | concat(svaba_out) + } //Manta - manta_out=manta_tonly(bamwithsample) - .map{tumor, sv, indel, tumorsv -> - tuple(tumor,tumorsv,"manta_tonly")} - annotsv_manta_tonly(manta_out).ifEmpty("Empty SV input--No SV annotated") + if ("manta" in svcall_list){ + manta_out=manta_tonly(bamwithsample) + .map{tumor, sv, indel, tumorsv -> + tuple(tumor,tumorsv,"manta_tonly")} + annotsv_manta_tonly(manta_out).ifEmpty("Empty SV input--No SV annotated") + svout=svout | concat(manta_out) + } - //Delly-WIP + //GRIDSS + if ("gridss" in svcall_list){ + gridss_out=gridss_tonly(bamwithsample) + gridss_out_forsv=gridss_out + | map{tumor,vcf,index,bam,gripssvcf,gripsstbi,gripssfilt,filttbi -> + tuple(tumor,gripssfilt,"gridss_tonly")} | gunzip_gridss + annotsv_gridss_tonly(gridss_out_forsv).ifEmpty("Empty SV input--No SV annotated") + svout=svout | concat(gridss_out) + } //Survivor - gunzip(manta_out).concat(svaba_out).groupTuple() - | survivor_sv | annotsv_survivor_tonly | ifEmpty("Empty SV input--No SV annotated") + if (svcall_list.size()>1){ + //Survivor + svout | groupTuple + | survivor_sv + | annotsv_survivor_tonly + | ifEmpty("Empty SV input--No SV annotated") + } + + if("gridss" in svcall_list){ + somaticsv_input=gridss_out + | map{tumor,vcf,index,bam,gripssvcf,gripsstbi,gripssfilt,filttbi -> + tuple(tumor,vcf,index,gripsstbi,gripssfilt,filttbi)} + }else if("manta" in svcall_list){ + somaticsv_input=manta_out + | map{tumor,gsv,gsv_tbi,so_sv,so_sv_tbi,unfil_sv,unfil_sv_tbi,unfil_indel,unfil_indel_tbi -> + tuple(tumor,unfil_sv,unfil_sv_tbi,so_sv,so_sv_tbi)} + }else{ + somaticsv_input=Channel.empty() + } + + emit: + somaticsv_input + } @@ -291,6 +514,15 @@ workflow CNVmouse_tonly { if ("freec" in cnvcall_list){ freec(bamwithsample) } + //CNVKIT + if ("cnvkit" in cnvcall_list){ + if(params.exome){ + matchbed_cnvkit(intervalbedin) + bamwithsample | combine(matchbed_cnvkit.out) | cnvkit_exome_tonly + }else{ + bamwithsample | cnvkit_tonly + } + } } @@ -307,6 +539,10 @@ workflow CNVhuman_tonly { bamwithsample | freec } + if (params.exome && "purple" in cnvcall_list ){ + cnvcall_list.removeIf { it == 'purple' } + } + if ("purple" in cnvcall_list){ //Purple bamwithsample | amber_tonly @@ -316,6 +552,16 @@ workflow CNVhuman_tonly { map{t1,amber,cobalt,vc,vcf,index -> tuple(t1,amber,cobalt,vcf,index)} | purple_tonly } + + //CNVKIT + if ("cnvkit" in cnvcall_list){ + if(params.exome){ + matchbed_cnvkit(intervalbedin) + bamwithsample | combine(matchbed_cnvkit.out) | cnvkit_exome_tonly + }else{ + bamwithsample | cnvkit_tonly + } + } } @@ -324,11 +570,17 @@ workflow CNVhuman_novc_tonly { bamwithsample main: + cnvcall_list = params.cnvcallers.split(',') as List + if ("freec" in cnvcall_list){ //FREEC-Unpaired only bamwithsample | freec } + if (params.exome && "purple" in cnvcall_list){ + cnvcall_list.removeIf { it == 'purple' } + } + if ("purple" in cnvcall_list){ //Purple bamwithsample | amber_tonly @@ -337,6 +589,15 @@ workflow CNVhuman_novc_tonly { map{t1,amber,cobalt -> tuple(t1,amber,cobalt)} | purple_tonly_novc } + + if ("cnvkit" in cnvcall_list){ + if(params.exome){ + matchbed_cnvkit(intervalbedin) + bamwithsample | combine(matchbed_cnvkit.out) | cnvkit_exome_tonly + }else{ + bamwithsample | cnvkit_tonly + } + } } @@ -356,14 +617,15 @@ workflow QC_TONLY { fastqc(bqsrout) samtools_flagstats(bqsrout) qualimap_bamqc(bqsrout) + mosdepth(bqsrout) somalier_extract(bqsrout) som_in=somalier_extract.out.collect() - if(params.genome=="hg38"){ + if(params.genome.matches("hg38(.*)")| params.genome.matches("hg19(.*)")){ somalier_analysis_human(som_in) somalier_analysis_out=somalier_analysis_human.out.collect() } - else if(params.genome=="mm10"){ + else if(params.genome.matches("mm10")){ somalier_analysis_mouse(som_in) somalier_analysis_out=somalier_analysis_mouse.out.collect() } @@ -375,22 +637,52 @@ workflow QC_TONLY { kraken_out=kraken.out.map{samplename,taxa,krona -> tuple(taxa,krona)}.collect() qualimap_out=qualimap_bamqc.out.map{genome,rep->tuple(genome,rep)}.collect() fastqc_out=fastqc.out.map{samplename,html,zip->tuple(html,zip)}.collect() - + mosdepth_out=mosdepth.out.collect() samtools_flagstats_out=samtools_flagstats.out.collect() - - conall=fclane_out.concat(fqs_out,kraken_out,qualimap_out,fastqc_out, - samtools_flagstats_out, - somalier_analysis_out).flatten().toList() + samtools_flagstats_out,mosdepth_out, + somalier_analysis_out).flatten().toList() multiqc(conall) } +//QC Tumor Only-BAMs +workflow QC_TONLY_BAM { + take: + bams + main: + //BQSR BAMs + fastqc(bams) + samtools_flagstats(bams) + qualimap_bamqc(bams) + mosdepth(bams) + somalier_extract(bams) + som_in=somalier_extract.out.collect() + if(params.genome.matches("hg38(.*)")| params.genome.matches("hg19(.*)")){ + somalier_analysis_human(som_in) + somalier_analysis_out=somalier_analysis_human.out.collect() + } + else if(params.genome.matches("mm10")){ + somalier_analysis_mouse(som_in) + somalier_analysis_out=somalier_analysis_mouse.out.collect() + } + + //Prep for MultiQC input + qualimap_out=qualimap_bamqc.out.map{genome,rep->tuple(genome,rep)}.collect() + mosdepth_out=mosdepth.out.collect() + samtools_flagstats_out=samtools_flagstats.out.collect() + conall=qualimap_out | concat( + samtools_flagstats_out,mosdepth_out, + somalier_analysis_out) + | flatten | toList + + multiqc(conall) +} //Variant Calling from BAM only workflow INPUT_TONLY_BAM { @@ -432,7 +724,7 @@ workflow INPUT_TONLY_BAM { }else{ intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file') } - splitinterval(intervalbedin) + matchbed(intervalbedin) | splitinterval bamwithsample=baminputonly @@ -440,6 +732,7 @@ workflow INPUT_TONLY_BAM { bamwithsample splitout=splitinterval.out sample_sheet + }