diff --git a/README.md b/README.md index 7858e81..3c3e73e 100644 --- a/README.md +++ b/README.md @@ -35,17 +35,17 @@ LOGAN supports either LOGAN supports inputs of either 1) paired end fastq files -`--fastq_input`- A glob can be used to include all FASTQ files. Like `--fastq_input "*R{1,2}.fastq.gz"`. Globbing requires quotes +`--fastq_input`- A glob can be used to include all FASTQ files. Like `--fastq_input "*R{1,2}.fastq.gz"`. Globbing requires quotes. 2) Pre aligned BAM files with BAI indices -`--bam_input`- A glob can be used to include all FASTQ files. Like `--bam_input "*.bam"`. Globbing requires quotes +`--bam_input`- A glob can be used to include all FASTQ files. Like `--bam_input "*.bam"`. Globbing requires quotes. 3) A sheet that indicates the sample name and either FASTQs or BAM file locations `--fastq_file_input`- A headerless tab delimited sheet that has the sample name, R1, and R2 file locations -`--bam_file_input` - A headerless tab delimited sheet that has the sample name, bam and bai file locations +`--bam_file_input` - A headerless tab delimited sheet that has the sample name, bam, and bam index (bai) file locations ### Operating Modes @@ -64,30 +64,50 @@ No flags are required Adding flags determines SNV (germline and/or somatic), SV, and/or CNV calling modes -`--vc`- Enables somatic SNV calling using mutect2, vardict, varscan, octopus, MUSE (TN only), and lofreq (TN only) +`--vc`- Enables somatic SNV calling using mutect2, vardict, varscan, octopus, sage, MUSE (TN only), and lofreq (TN only) + `--germline`- Enables germline using DV `--sv`- Enables somatic SV calling using Manta and SVABA -`--vc`- Enables somatic CNV calling using FREEC, Sequenza, and Purple (hg38 only) +`--cnv`- Enables somatic CNV calling using FREEC, Sequenza, and Purple (hg38 only) + + #### Optional Arguments `--indelrealign` - Enables indel realignment when running alignment steps. May be helpful for certain callers (VarScan, VarDict) -`--callers`- Comma separated argument for callers, the default is to use all available. Example: `--callers mutect2,octopus,vardict,varscan` +`--callers`- Comma separated argument for callers, the default is to use all available. +Example: `--callers mutect2,octopus` + +`--cnvcallers`- - Comma separated argument for cnvcallers. Adding flag allows only certain callers to run. +Example: `--cnvcallers purple` + ## Running LOGAN +Example of Tumor_Normal calling mode +```bash +# copy the logan config files to your current directory +logan init +# preview the logan jobs that will run +logan run --mode local -profile ci_stub --genome hg38 --sample_sheet samplesheet.tsv --outdir out --fastq_input "*R{1,2}.fastq.gz" -preview --vc --sv --cnv +# run a stub/dryrun of the logan jobs +logan run --mode local -profile ci_stub --genome hg38 --sample_sheet samplesheet.tsv --outdir out --fastq_input "*R{1,2}.fastq.gz" -stub --vc --sv --cnv +# launch a logan run on slurm with the test dataset +logan run --mode slurm -profile biowulf,slurm --genome hg38 --sample_sheet samplesheet.tsv --outdir out --fastq_input "*R{1,2}.fastq.gz" --vc --sv --cnv +``` + Example of Tumor only calling mode ```bash # copy the logan config files to your current directory logan init # preview the logan jobs that will run -logan run --mode local -profile ci_stub --genome hg38 --outdir out --fastq_input "*R{1,2}.fastq.gz" -preview --vc --sv --cnv +logan run --mode local -profile ci_stub --genome hg38 --outdir out --fastq_input "*R{1,2}.fastq.gz" --callers octopus,mutect2 -preview --vc --sv --cnv # run a stub/dryrun of the logan jobs -logan run --mode local -profile ci_stub --genome hg38 --outdir out --fastq_input "*R{1,2}.fastq.gz" -stub --vc --sv --cnv +logan run --mode local -profile ci_stub --genome hg38 --outdir out --fastq_input "*R{1,2}.fastq.gz" --callers octopus,mutect2 -stub --vc --sv --cnv # launch a logan run on slurm with the test dataset -logan run --mode slurm -profile biowulf,slurm --genome hg38 --outdir out --fastq_input "*R{1,2}.fastq.gz" --vc --sv --cnv +logan run --mode slurm -profile biowulf,slurm --genome hg38 --outdir out --fastq_input "*R{1,2}.fastq.gz" --callers octopus,mutect2 --vc --sv --cnv ``` We currently support the hg38, hg19 (in progress), and mm10 genomes. diff --git a/bin/flowcell_lane.py b/bin/flowcell_lane.py index 2521398..0839b05 100644 --- a/bin/flowcell_lane.py +++ b/bin/flowcell_lane.py @@ -40,19 +40,6 @@ def usage(message = '', exitcode = 0): sys.exit(exitcode) -def reader(fname): - """Returns correct file object handler or reader for gzipped - or non-gzipped FastQ files based on the file extension. Assumes - gzipped files endwith the '.gz' extension. - """ - if fname.endswith('.gz'): - # Opens up file with gzip handler - return gzip.open - else: - # Opens up file normal, uncompressed handler - return open - - def get_flowcell_lane(sequence_identifer): """Returns flowcell and lane information for different fastq formats. FastQ files generated with older versions of Casava or downloaded from @@ -130,10 +117,10 @@ def md5sum(filename, blocksize = 65536): md5 = md5sum(filename) # Get Flowcell and Lane information - handle = reader(filename) + handle = gzip.open if filename.endswith('.gz') else open meta = {'flowcell': [], 'lane': [], 'flowcell_lane': []} i = 0 # keeps track of line number - with handle(filename, 'r') as file: + with handle(filename, 'rt') as file: print('sample_name\ttotal_read_pairs\tflowcell_ids\tlanes\tflowcell_lanes\tmd5_checksum') for line in file: line = line.strip() diff --git a/bin/split_Bed_into_equal_regions.py b/bin/split_Bed_into_equal_regions.py old mode 100644 new mode 100755 diff --git a/conf/genomes.config b/conf/genomes.config index d489f56..3c864de 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -28,15 +28,18 @@ params { octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz" chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM'] + //HMFTOOLS + GENOMEVER = "38" + HOTSPOTS = "-hotspots /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/KnownHotspots.somatic.38.vcf.gz" + PANELBED = "-panel_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/ActionableCodingPanel.38.bed.gz" + HCBED = "-high_confidence_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_nosomaticdel_noCENorHET7.bed.gz" + ENSEMBLCACHE = "-ensembl_data_dir /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/common/ensembl_data" //PURPLE - GERMLINEHET = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/GermlineHetPon.38.vcf.gz" - GCPROFILE = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/GC_profile.1000bp.38.cnp" - DIPLODREG = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/DiploidRegions.38.bed.gz' - ENSEMBLCACHE = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/ensembl_data/' - DRIVERS = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/DriverGenePanel.38.tsv' - HOTSPOTS = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/KnownHotspots.somatic.38.vcf.gz' - - } + GERMLINEHET = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/copy_number/AmberGermlineSites.38.tsv.gz" + GCPROFILE = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/copy_number/GC_profile.1000bp.38.cnp" + DIPLODREG = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/copy_number/DiploidRegions.38.bed.gz" + DRIVERS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/common/DriverGenePanel.38.tsv" + } 'hg19' { genome = "/data/CCBR_Pipeliner/db/PipeDB/lib/hg19.with_extra.fa" @@ -65,8 +68,14 @@ params { octopus_gforest= "" //"--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz" chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM'] + //HMFTOOLS + GENOMEVER = "37" + HOTSPOTS = "-hotspots /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/KnownHotspots.38.vcf.gz" + PANELBED = "-panel_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/ActionableCodingPanel.38.bed.gz" + HCBED = "-high_confidence_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_nosomaticdel_noCENorHET7.bed.gz" + ENSEMBLCACHE = "-ensembl_data_dir /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/common/ensembl_data" //PURPLE - GERMLINEHET = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/GermlineHetPon.38.vcf.gz" + GERMLINEHET = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/copy_number/AmberGermlineSites.38.tsv.gz" GCPROFILE = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/GC_profile.1000bp.38.cnp" DIPLODREG = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/DiploidRegions.38.bed.gz' ENSEMBLCACHE = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/ensembl_data/' diff --git a/docker/lofreq/Dockerfile b/docker/lofreq/Dockerfile deleted file mode 100644 index 01ef8de..0000000 --- a/docker/lofreq/Dockerfile +++ /dev/null @@ -1,59 +0,0 @@ -FROM --platform=linux/amd64 ubuntu:22.04 - -# build time variables -ARG BUILD_DATE="000000" -ENV BUILD_DATE=${BUILD_DATE} -ARG BUILD_TAG="000000" -ENV BUILD_TAG=${BUILD_TAG} -ARG REPONAME="000000" -ENV REPONAME=${REPONAME} - -LABEL maintainer - -# Create Container filesystem specific -# working directory and opt directories - -# This section installs system packages required for your project -# If you need extra system packages add them here. -RUN apt-get update \ - && apt-get -y upgrade \ - && DEBIAN_FRONTEND=noninteractive apt-get install -y \ - automake \ - build-essential \ - curl \ - git \ - gcc \ - libbz2-dev \ - libcurl4-gnutls-dev \ - libgsl0-dev \ - libperl-dev \ - liblzma-dev \ - libncurses5-dev \ - libssl-dev \ - python3-dev \ - zlib1g-dev - -RUN ln -s /usr/bin/python3.10 /usr/bin/python - -WORKDIR /opt2 - -ARG htsversion=1.19 - -RUN curl -L https://github.com/samtools/htslib/releases/download/${htsversion}/htslib-${htsversion}.tar.bz2 | tar xj \ - && cd htslib-${htsversion} \ - && ./configure \ - && make \ - && make install - -RUN curl -L https://github.com/samtools/bcftools/releases/download/${htsversion}/bcftools-${htsversion}.tar.bz2 | tar xj \ - && cd bcftools-${htsversion} \ - && ./configure && make && make install - -RUN git clone https://github.com/CSB5/lofreq \ - && cd lofreq \ - && ./bootstrap \ - && ./configure --with-htslib=/usr/local \ - && make \ - && make install - -ENV LD_LIBRARY_PATH /usr/local/lib:$LD_LIBRARY_PATH diff --git a/docker/lofreq/build.sh b/docker/lofreq/build.sh deleted file mode 100644 index 4f87a24..0000000 --- a/docker/lofreq/build.sh +++ /dev/null @@ -1,11 +0,0 @@ -# Build image - -docker build --platform linux/amd64 --tag ccbr_logan_base:v0.3.4 -f Dockerfile . - -docker tag ccbr_lofreq:v0.0.1 dnousome/ccbr_lofreq:v0.0.1 -docker push dnousome/ccbr_lofreq:v0.0.1 - -docker push dnousome/ccbr_logan_base:latest - - - diff --git a/docker/logan_base/Dockerfile b/docker/logan_base/Dockerfile index 844a8b7..b60fadc 100644 --- a/docker/logan_base/Dockerfile +++ b/docker/logan_base/Dockerfile @@ -20,16 +20,18 @@ WORKDIR /opt2 RUN apt-get update \ && apt-get -y upgrade \ && DEBIAN_FRONTEND=noninteractive apt-get install -y \ - bc + bc \ + openjdk-17-jdk # Common bioinformatics tools # bwa/0.7.17-4 bowtie/1.2.3 bowtie2/2.3.5.1 # bedtools/2.27.1 bedops/2.4.37 samtools/1.10 # bcftools/1.10.2 vcftools/0.1.16 -# Previous tools already installed trimmomatic/0.39 tabix/1.10.2 +# Previous tools already installed tabix/1.10.2 trimmomatic/0.39 RUN DEBIAN_FRONTEND=noninteractive apt-get install -y \ - tabix \ - trimmomatic + tabix \ + libhts-dev + # Install BWA-MEM2 v2.2.1 RUN wget https://github.com/bwa-mem2/bwa-mem2/releases/download/v2.2.1/bwa-mem2-2.2.1_x64-linux.tar.bz2 \ @@ -44,13 +46,17 @@ RUN wget https://github.com/biod/sambamba/releases/download/v0.8.1/sambamba-0.8. && mv /opt2/sambamba-0.8.1-linux-amd64-static /opt2/sambamba \ && chmod a+rx /opt2/sambamba -# Install GATK4 (GATK/4.3.0.0) -# Requires Java8 or 1.8 -RUN wget https://github.com/broadinstitute/gatk/releases/download/4.3.0.0/gatk-4.3.0.0.zip \ - && unzip /opt2/gatk-4.3.0.0.zip \ - && rm /opt2/gatk-4.3.0.0.zip \ - && /opt2/gatk-4.3.0.0/gatk --list -ENV PATH="/opt2/gatk-4.3.0.0:$PATH" +# Install GATK4 (GATK/4.4.0.0) +# Requires Java17 +RUN wget https://github.com/broadinstitute/gatk/releases/download/4.4.0.0/gatk-4.4.0.0.zip \ + && unzip /opt2/gatk-4.4.0.0.zip \ + && rm /opt2/gatk-4.4.0.0.zip \ + && /opt2/gatk-4.4.0.0/gatk --list +ENV PATH="/opt2/gatk-4.4.0.0:$PATH" + +# Use DISCVRSeq For CombineVariants Replacement +RUN wget https://github.com/BimberLab/DISCVRSeq/releases/download/1.3.62/DISCVRSeq-1.3.62.jar +ENV DISCVRSeq_JAR="/opt2/DISCVRSeq-1.3.62.jar" # Install last release of GATK3 (GATK/3.8-1) # Only being used for the CombineVariants @@ -168,29 +174,57 @@ RUN wget https://github.com/AstraZeneca-NGS/VarDictJava/releases/download/v1.8.3 ENV PATH="/opt2/VarDict-1.8.3/bin:$PATH" # Fastp From Opengene github -RUN wget http://opengene.org/fastp/fastp.0.23.2 \ +RUN wget http://opengene.org/fastp/fastp.0.23.4 \ && mkdir fastp \ - && mv fastp.0.23.2 fastp/fastp \ + && mv fastp.0.23.4 fastp/fastp \ && chmod a+x fastp/fastp ENV PATH="/opt2/fastp:$PATH" -# HMFtools for PURPLE/COBALT/AMBER -RUN wget https://github.com/hartwigmedical/hmftools/releases/download/amber-v3.9/amber-3.9.jar \ - && wget https://github.com/hartwigmedical/hmftools/releases/download/cobalt-v1.15.1/cobalt_v1.15.1.jar \ - && wget https://github.com/hartwigmedical/hmftools/releases/download/purple-v3.9/purple_v3.9.jar \ - && mkdir hmftools \ - && mv amber-3.9.jar hmftools/amber.jar \ - && mv cobalt_v1.15.1.jar hmftools/cobalt.jar \ - && mv purple_v3.9.jar hmftools/purple.jar \ - && chmod a+x hmftools/amber.jar -ENV PATH="/opt2/hmftools:$PATH" +# ASCAT +RUN Rscript -e 'devtools::install_github("VanLoo-lab/ascat/ASCAT")' # SvABA RUN wget -O svaba_1.2.0 https://github.com/walaj/svaba/releases/download/v1.2.0/svaba \ && mkdir svaba \ - && mv svaba_1.2.0 svaba/svaba + && mv svaba_1.2.0 svaba/svaba \ + && chmod a+x svaba/svaba + ENV PATH="/opt2/svaba:$PATH" +# LOFREQ +RUN git clone https://github.com/CSB5/lofreq \ + && cd /opt2/lofreq \ + && ./bootstrap \ + && ./configure --prefix=/opt2/lofreq/ \ + && make \ + && make install + +ENV PATH="/opt2/lofreq/bin:$PATH" + +# MUSE +RUN wget -O muse_2.0.4.tar.gz https://github.com/wwylab/MuSE/archive/refs/tags/v2.0.4.tar.gz \ + && tar -xzf muse_2.0.4.tar.gz \ + && cd MuSE-2.0.4 \ + && ./install_muse.sh \ + && mv MuSE /opt2/ \ + && chmod a+x /opt2/MuSE \ + && rm -R /opt2/MuSE-2.0.4 \ + && rm /opt2/muse_2.0.4.tar.gz + +ENV PATH="/opt2/MuSE:$PATH" + +# HMFtools for PURPLE/COBALT/AMBER +RUN wget https://github.com/hartwigmedical/hmftools/releases/download/amber-v4.0/amber-4.0.jar \ + && wget https://github.com/hartwigmedical/hmftools/releases/download/cobalt-v1.16/cobalt_v1.16.jar \ + && wget https://github.com/hartwigmedical/hmftools/releases/download/purple-v4.0/purple_v4.0.jar \ + && wget https://github.com/hartwigmedical/hmftools/releases/download/sage-v3.4/sage_v3.4.jar \ + && mkdir hmftools \ + && mv amber-4.0.jar hmftools/amber.jar \ + && mv cobalt_v1.16.jar hmftools/cobalt.jar \ + && mv purple_v4.0.jar hmftools/purple.jar \ + && mv sage.v3.4.jar hmftools/sage.jar \ + && chmod a+x hmftools/amber.jar +ENV PATH="/opt2/hmftools:$PATH" # Add Dockerfile and argparse.bash script # and export environment variables diff --git a/docker/logan_base/build.sh b/docker/logan_base/build.sh index 5ed0769..5515cc0 100644 --- a/docker/logan_base/build.sh +++ b/docker/logan_base/build.sh @@ -1,14 +1,17 @@ + # Build image #docker buildx create --platform linux/amd64 --use #docker buildx use upbeat_ganguly #docker buildx inspect upbeat_ganguly #docker buildx build --platform linux/amd64 -f Dockerfile -t dnousome/ccbr_logan_base:v0.3.0 -t dnousome/ccbr_logan_base:latest --push . -docker build --platform linux/amd64 --tag ccbr_logan_base:v0.3.0 -f Dockerfile . -docker tag ccbr_logan_base:v0.3.0 dnousome/ccbr_logan_base:v0.3.0 -docker tag ccbr_logan_base:v0.3.0 dnousome/ccbr_logan_base +docker build --platform linux/amd64 --tag ccbr_logan_base:v0.3.5 -f Dockerfile . + +docker tag ccbr_logan_base:v0.3.5 dnousome/ccbr_logan_base:v0.3.5 +docker tag ccbr_logan_base:v0.3.5 dnousome/ccbr_logan_base -docker push dnousome/ccbr_logan_base:v0.3.0 + +docker push dnousome/ccbr_logan_base:v0.3.5 docker push dnousome/ccbr_logan_base:latest @@ -21,4 +24,3 @@ docker push dnousome/ccbr_logan_base:latest # Push image to DockerHub #docker push nciccbr/ccbr_wgs_base:v0.1.0 #docker push nciccbr/ccbr_wgs_base:latest - diff --git a/docker/logan_base/meta.yml b/docker/logan_base/meta.yml index b7893e7..8e76b0a 100644 --- a/docker/logan_base/meta.yml +++ b/docker/logan_base/meta.yml @@ -1,4 +1,4 @@ dockerhub_namespace: dnousome image_name: ccbr_logan_base -version: v0.3.4 +version: v0.3.5 container: "$(dockerhub_namespace)/$(image_name):$(version)" diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf index 225ca65..4a00109 100644 --- a/modules/local/copynumber.nf +++ b/modules/local/copynumber.nf @@ -15,10 +15,11 @@ if (params.genome=="mm10"){ } if (params.genome=="hg38" | params.genome=="hg19"){ - GERMLINEHET=file(params.genomes[params.genome].GERMLINEHET) + GENOMEVER=params.genomes[params.genome].GENOMEVER GCPROFILE=file(params.genomes[params.genome].GCPROFILE) + GERMLINEHET=file(params.genomes[params.genome].GERMLINEHET) DIPLODREG=file(params.genomes[params.genome].DIPLODREG) - ENSEMBLCACHE=file(params.genomes[params.genome].ENSEMBLCACHE) + ENSEMBLCACHE=params.genomes[params.genome].ENSEMBLCACHE DRIVERS=file(params.genomes[params.genome].DRIVERS) HOTSPOTS=file(params.genomes[params.genome].HOTSPOTS) } @@ -401,12 +402,14 @@ process purple { -amber ${amberin} \ -cobalt ${cobaltin} \ -gc_profile $GCPROFILE \ - -ref_genome_version 38 \ - -ref_genome $GENOME \ + -ref_genome_version $GENOMEVER \ + -ref_genome $GENOMEREF \ -ensembl_data_dir $ENSEMBLCACHE \ -somatic_vcf ${somaticvcf} \ + -run_drivers \ -driver_gene_panel $DRIVERS \ -somatic_hotspots $HOTSPOTS \ + -threads $task.cpus \ -output_dir ${tumorname} """ @@ -440,10 +443,12 @@ process purple_novc { -amber ${amberin} \ -cobalt ${cobaltin} \ -gc_profile $GCPROFILE \ - -ref_genome_version 38 \ - -ref_genome $GENOME \ + -ref_genome_version $GENOMEVER \ + -ref_genome $GENOMEREF \ -ensembl_data_dir $ENSEMBLCACHE \ + -threads $task.cpus \ -output_dir ${tumorname} + """ stub: @@ -476,12 +481,14 @@ process purple_tonly { -amber ${amberin} \ -cobalt ${cobaltin} \ -gc_profile $GCPROFILE \ - -ref_genome_version 38 \ - -ref_genome $GENOME \ + -ref_genome_version $GENOMEVER \ + -ref_genome $GENOMEREF \ -ensembl_data_dir $ENSEMBLCACHE \ -somatic_vcf ${somaticvcf} \ + -run_drivers \ -driver_gene_panel $DRIVERS \ -somatic_hotspots $HOTSPOTS \ + -threads $task.cpus \ -output_dir ${tumorname} """ @@ -514,9 +521,10 @@ process purple_tonly_novc { -amber ${amberin} \ -cobalt ${cobaltin} \ -gc_profile $GCPROFILE \ - -ref_genome_version 38 \ - -ref_genome $GENOME \ + -ref_genome_version $GENOMEVER \ + -ref_genome $GENOMEREF \ -ensembl_data_dir $ENSEMBLCACHE \ + -threads $task.cpus \ -output_dir ${tumorname} """ diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf index e697d65..f51d546 100644 --- a/modules/local/variant_calling.nf +++ b/modules/local/variant_calling.nf @@ -8,11 +8,16 @@ PON=file(params.genomes[params.genome].pon) VEPCACHEDIR=file(params.genomes[params.genome].vepcache) VEPSPECIES=params.genomes[params.genome].vepspecies VEPBUILD=params.genomes[params.genome].vepbuild +LOFREQ_CONVERT=params.lofreq_convert +//Octopus SOMATIC_FOREST=params.genomes[params.genome].octopus_sforest GERMLINE_FOREST=params.genomes[params.genome].octopus_gforest -LOFREQ_CONVERT=params.lofreq_convert - - +//HMFTOOLS +HOTSPOTS=params.genomes[params.genome].HOTSPOTS +PANELBED=params.genomes[params.genome].PANELBED +HCBED=params.genomes[params.genome].HCBED +ENSEMBLCACHE=params.genomes[params.genome].ENSEMBLCACHE +GENOMEVER=params.genomes[params.genome].GENOMEVER process mutect2 { container "${params.containers.logan}" @@ -463,7 +468,6 @@ process octopus_tn { path("${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz") script: - """ octopus -R $GENOMEREF -I ${normal} ${tumor} --normal-sample ${normalname} \ -C cancer \ @@ -471,13 +475,11 @@ process octopus_tn { --threads $task.cpus \ $GERMLINE_FOREST \ $SOMATIC_FOREST \ - --target-working-memory 92Gb \ - -B 90Gb \ + -B 92Gb \ -o ${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz """ stub: - """ touch "${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz" """ @@ -485,8 +487,40 @@ process octopus_tn { } +process sage_tn { + container "${params.containers.hmftools}" + label 'process_somaticcaller' + + input: + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai) + + output: + tuple val(tumorname), val(normalname), + path("${tumorname}_vs_${normalname}.sage.vcf.gz"), + path("${tumorname}_vs_${normalname}.sage.vcf.gz.tbi") + +script: + """ + java -Xms4G -Xmx32G -cp sage.jar com.hartwig.hmftools.sage.SageApplication \ + -tumor ${tumorname} -tumor_bam ${tumorbam} \ + -reference ${normalname} -reference_bam ${normalbam} \ + -threads $task.cpus \ + -ref_genome_version $GENOMEVER \ + -ref_genome $GENOMEREF \ + $HOTSPOTS $PANELBED $HCBED $ENSEMBLCACHE \ + -output_vcf ${tumorname}_vs_${normalname}.sage.vcf.gz + """ + + stub: + """ + touch "${tumorname}_vs_${normalname}.sage.vcf.gz" "${tumorname}_vs_${normalname}.sage.vcf.gz.tbi" + """ +} + + process lofreq_tn { - container "${params.containers.lofreq}" + container "${params.containers.logan}" label 'process_somaticcaller' input: diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf index 530836f..0ec9762 100644 --- a/modules/local/variant_calling_tonly.nf +++ b/modules/local/variant_calling_tonly.nf @@ -8,8 +8,15 @@ PON=file(params.genomes[params.genome].pon) VEPCACHEDIR=file(params.genomes[params.genome].vepcache) VEPSPECIES=params.genomes[params.genome].vepspecies VEPBUILD=params.genomes[params.genome].vepbuild +//Octopus SOMATIC_FOREST=params.genomes[params.genome].octopus_sforest GERMLINE_FOREST=params.genomes[params.genome].octopus_gforest +//HMFTOOLS +HOTSPOTS=params.genomes[params.genome].HOTSPOTS +PANELBED=params.genomes[params.genome].PANELBED +HCBED=params.genomes[params.genome].HCBED +ENSEMBLCACHE=params.genomes[params.genome].ENSEMBLCACHE +GENOMEVER=params.genomes[params.genome].GENOMEVER process pileup_paired_tonly { @@ -380,6 +387,37 @@ process octopus_convertvcf_tonly { } +process sage_tonly { + container "${params.containers.hmftools}" + label 'process_somaticcaller' + + input: + tuple val(tumorname), path(tumor), path(tumorbai) + + output: + tuple val(tumorname), + path("${tumorname}.tonly.sage.vcf.gz"), + path("${tumorname}.tonly.sage.vcf.gz.tbi") + + script: + """ + java -Xms4G -Xmx32G -cp sage.jar com.hartwig.hmftools.sage.SageApplication \ + -tumor ${tumorname} -tumor_bam ${tumorbam} \ + -threads $task.cpus \ + -ref_genome_version $GENOMEVER \ + -ref_genome $GENOMEREF \ + $HOTSPOTS $PANELBED $HCBED $ENSEMBLCACHE \ + -output_vcf ${tumorname}.tonly.sage.vcf.gz + """ + + stub: + """ + touch "${tumorname}.tonly.sage.vcf.gz" "${tumorname}.tonly.sage.vcf.gz.tbi" + """ + +} + + process somaticcombine_tonly { container "${params.containers.logan}" label 'process_medium' @@ -411,14 +449,14 @@ process somaticcombine_tonly { """ stub: - + vcfin1=[caller, vcfs].transpose().collect { a, b -> a + " " + b } vcfin2="-V:" + vcfin1.join(" -V:") - callerin=caller.join(",").replaceAll("_tonly","") + """ touch ${tumorsample}_combined_tonly.vcf.gz ${tumorsample}_combined_tonly.vcf.gz.tbi - """ + """ } diff --git a/nextflow.config b/nextflow.config index d9dac03..6a35323 100644 --- a/nextflow.config +++ b/nextflow.config @@ -52,7 +52,7 @@ params { bam_input=null BAMINPUT=null - callers = "mutect2,octopus,vardict,varscan" + callers = "mutect2,octopus,lofreq,muse,sage,vardict,varscan" cnvcallers= "purple,sequenza,freec" publish_dir_mode = 'symlink' diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf index af9d31f..094283a 100644 --- a/subworkflows/local/workflows.nf +++ b/subworkflows/local/workflows.nf @@ -14,26 +14,28 @@ include {deepvariant_step1; deepvariant_step2; deepvariant_step3; include {pileup_paired_t; pileup_paired_n; mutect2; mutect2filter; contamination_paired; learnreadorientationmodel;mergemut2stats; - strelka_tn; combineVariants_strelka; - varscan_tn; vardict_tn; lofreq_tn; muse_tn; + strelka_tn; + varscan_tn; vardict_tn; lofreq_tn; muse_tn; sage_tn; octopus_tn; bcftools_index_octopus; bcftools_index_octopus as bcftools_index_octopus_tonly; octopus_convertvcf; + combineVariants_strelka; combineVariants as combineVariants_vardict; combineVariants as combineVariants_vardict_tonly; combineVariants as combineVariants_varscan; combineVariants as combineVariants_varscan_tonly; + combineVariants as combineVariants_sage; combineVariants as combineVariants_sage_tonly; combineVariants_alternative as combineVariants_lofreq; combineVariants as combineVariants_muse; combineVariants_alternative as combineVariants_octopus; combineVariants_alternative as combineVariants_octopus_tonly; annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka; annotvep_tn as annotvep_tn_varscan; annotvep_tn as annotvep_tn_vardict; annotvep_tn as annotvep_tn_octopus; - annotvep_tn as annotvep_tn_lofreq; annotvep_tn as annotvep_tn_muse; + annotvep_tn as annotvep_tn_lofreq; annotvep_tn as annotvep_tn_muse; annotvep_tn as annotvep_tn_sage; annotvep_tn as annotvep_tn_combined; combinemafs_tn; somaticcombine} from '../../modules/local/variant_calling.nf' include {mutect2_t_tonly; mutect2filter_tonly; - varscan_tonly; vardict_tonly; octopus_tonly; + varscan_tonly; vardict_tonly; octopus_tonly; sage_tonly; contamination_tumoronly; learnreadorientationmodel_tonly; mergemut2stats_tonly; octopus_convertvcf_tonly; annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict; - annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus; + annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus; annotvep_tonly as annotvep_tonly_sage; annotvep_tonly as annotvep_tonly_combined; combinemafs_tonly;somaticcombine_tonly} from '../../modules/local/variant_calling_tonly.nf' @@ -175,7 +177,7 @@ workflow VC { //Prep Pileups call_list = params.callers.split(',') as List - + vc_all=Channel.empty() vc_tonly=Channel.empty() @@ -263,8 +265,8 @@ workflow VC { annotvep_tonly_mut2(mutect2_in_tonly) - vc_all=vc_all|concat(mutect2_in) - vc_tonly=vc_tonly|concat(mutect2_in_tonly) + vc_all = vc_all|concat(mutect2_in) + vc_tonly = vc_tonly | concat(mutect2_in_tonly) } @@ -324,7 +326,32 @@ workflow VC { vc_all=vc_all|concat(varscan_in) vc_tonly=vc_tonly|concat(varscan_in_tonly) } - + + + + //SAGE TN + if ("sage" in call_list){ + sage_in=sage_tn(bamwithsample) + | map{tu,no,vcf,vcfindex-> tuple("${tu}_vs_${no}",vcf,"sage")} + | combineVariants_sage + | join(sample_sheet_paired) + | map{sample,marked,markedindex,normvcf,normindex,tumor,normal->tuple(tumor,normal,"sage",normvcf,normindex)} + annotvep_tn_sage(sage_in) + + sage_in_tonly=bamwithsample | map{tumor,tbam,tbai,norm,nbam,nbai -> tuple(tumor,tbam,tbai)} + | sage_tonly + | map{samplename,vcf,vcfindex->tuple(samplename,vcf,"sage_tonly")} + | combineVariants_sage_tonly + | join(sample_sheet) + | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"sage_tonly",normvcf,normindex)} + annotvep_tonly_sage(sage_in_tonly) + + vc_all=vc_all | concat(sage_in) + vc_tonly=vc_tonly | concat(sage_in_tonly) + + } + + //Lofreq TN if ("lofreq" in call_list){ lofreq_in=lofreq_tn(bambyinterval) | groupTuple(by:[0,1]) @@ -379,28 +406,28 @@ workflow VC { //Combine All Variants Using VCF -> Annotate if (call_list.size()>1){ - vc_all | groupTuple(by:[0,1]) + somaticcall_input=vc_all | groupTuple(by:[0,1]) | somaticcombine | map{tumor,normal,vcf,index ->tuple(tumor,normal,"combined",vcf,index)} - | annotvep_tn_combined + somaticcall_input | annotvep_tn_combined + }else if ("octopus" in call_list){ + somaticcall_input=octopus_in_sc + }else if("mutect2" in call_list){ + somaticcall_input=mutect2_in + }else if("sage" in call_list){ + somaticcall_input=sage_in } if (call_list.size()>1){ - vc_tonly + vc_tonly | groupTuple() | somaticcombine_tonly - | map{tumor,vcf,index ->tuple(tumor,normal,"combined_tonly",vcf,index)} - | annotvep_tn_combined - } + | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} + | annotvep_tonly_combined + } //Implement PCGR Annotator/CivIC Next - if ("octopus" in call_list){ - somaticcall_input=octopus_in_sc - }else if("mutect2" in call_list){ - somaticcall_input=mutect2_in - } - emit: somaticcall_input @@ -474,10 +501,10 @@ workflow CNVhuman { //Purple bamwithsample | amber_tn bamwithsample | cobalt_tn - purplein=amber_tn.out.join(cobalt_tn.out) - purplein.join(somaticcall_input)| - map{t1,amber,cobalt,n1,vc,vcf,vcfindex -> tuple(t1,n1,amber,cobalt,vcf,vcfindex)} - | purple + purplein=amber_tn.out.join(cobalt_tn.out) + purplein.join(somaticcall_input) + | map{t1,amber,cobalt,n1,vc,vcf,vcfindex -> tuple(t1,n1,amber,cobalt,vcf,vcfindex)} + | purple } if ("sequenza" in cnvcall_list){ diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf index fe5ad17..5c0fdfb 100644 --- a/subworkflows/local/workflows_tonly.nf +++ b/subworkflows/local/workflows_tonly.nf @@ -16,18 +16,21 @@ include {mutect2; mutect2filter; pileup_paired_t; pileup_paired_n; contamination_paired; learnreadorientationmodel; mergemut2stats; combineVariants as combineVariants_vardict; combineVariants as combineVariants_varscan; combineVariants as combineVariants_vardict_tonly; combineVariants as combineVariants_varscan_tonly; - combineVariants_alternative ; - annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka; annotvep_tn as annotvep_tn_varscan; annotvep_tn as annotvep_tn_vardict; + combineVariants as combineVariants_sage; combineVariants as combineVariants_sage_tonly; + combineVariants_alternative; + annotvep_tn as annotvep_tn_mut2; + annotvep_tn as annotvep_tn_varscan; annotvep_tn as annotvep_tn_vardict; combinemafs_tn} from '../../modules/local/variant_calling.nf' include {mutect2_t_tonly; mutect2filter_tonly; pileup_paired_tonly; varscan_tonly; vardict_tonly; - octopus_tonly; + octopus_tonly; sage_tonly; contamination_tumoronly; learnreadorientationmodel_tonly; mergemut2stats_tonly; octopus_convertvcf_tonly; annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict; annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus; + annotvep_tonly as annotvep_tonly_sage; annotvep_tonly as annotvep_tonly_combined; combinemafs_tonly; somaticcombine_tonly} from '../../modules/local/variant_calling_tonly.nf' @@ -203,8 +206,7 @@ workflow VC_TONLY { if ("octopus" in call_list){ octopus_in_tonly=bambyinterval | octopus_tonly | bcftools_index_octopus | groupTuple() - | map{tumor,vcf,vcfindex -> tuple(tumor,vcf.toSorted{it -> it.name} - ,vcfindex, "octopus_tonly")} + | map{tumor,vcf,vcfindex -> tuple(tumor,vcf.toSorted{it -> it.name},vcfindex, "octopus_tonly")} | combineVariants_alternative | join(sample_sheet) | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"octopus_tonly",normvcf,normindex)} annotvep_tonly_octopus(octopus_in_tonly) @@ -213,26 +215,42 @@ workflow VC_TONLY { vc_tonly=vc_tonly|concat(octopus_in_tonly_sc) } + //SAGE + if ("sage" in call_list){ + sage_in_tonly=sage_tonly(bamwithsample) + | groupTuple() + | map{samplename,vcf,vcfindex -> tuple(samplename,vcf,"sage_tonly")} + | combineVariants_sage_tonly + | join(sample_sheet) + | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"sage_tonly",normvcf,normindex)} + annotvep_tonly_sage(sage_in_tonly) + + vc_tonly=vc_tonly | concat(sage_in_tonly) + } + + //Combined Variants and Annotated + //Emit for SC downstream, take Oc/Mu2/sage/Vard/Varscan + if (call_list.size()>1){ - vc_tonly - | groupTuple() | view() - | somaticcombine_tonly - | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} - | annotvep_tonly_combined + somaticcall_input=vc_tonly + | groupTuple() + | somaticcombine_tonly + | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} + somaticcall_input | annotvep_tonly_combined + }else if("octopus" in call_list){ + somaticcall_input=octopus_in_tonly_sc + }else if("mutect2" in call_list){ + somaticcall_input=mutect2_in_tonly + }else if("sage" in call_list){ + somaticcall_input=sage_in_tonly + }else if("vardict" in call_list){ + somaticcall_input=vardict_in_tonly + }else if("varscan" in call_list){ + somaticcall_input=varscan_in_tonly } - - //Emit for SC downstream, take Oc/Mu2/Vard/Varscan - if("octopus" in call_list){ - somaticcall_input=octopus_in_tonly_sc - }else if("mutect2" in call_list){ - somaticcall_input=mutect2_in_tonly - }else if("vardict" in call_list){ - somaticcall_input=vardict_in_tonly - }else if("varscan" in call_list){ - somaticcall_input=varscan_in_tonly - } + //Emit for SC downstream, take Combined/Oc/Mu2/Vard/Varscan emit: somaticcall_input }