From f0f6bec2380c08dec48b0c67a5369d57261c0b9c Mon Sep 17 00:00:00 2001 From: Stephen Binaansim Date: Wed, 30 Oct 2024 15:24:39 +0000 Subject: [PATCH 1/3] Adding option for star aligner --- .test/config/config_paired_end.yaml | 5 +- workflow/Snakefile | 9 +- workflow/envs/variants.yaml | 1 + workflow/rules/common.smk | 2 +- workflow/rules/hisat2-freebayes.smk | 35 +++-- workflow/rules/qc.smk | 11 +- workflow/rules/star-freebayes.smk | 126 ++++++++++++++++++ workflow/rules/utilities.smk | 15 ++- .../scripts/starGenerateFreebayesParams.R | 74 ++++++++++ 9 files changed, 245 insertions(+), 33 deletions(-) create mode 100644 workflow/rules/star-freebayes.smk create mode 100755 workflow/scripts/starGenerateFreebayesParams.R diff --git a/.test/config/config_paired_end.yaml b/.test/config/config_paired_end.yaml index 3b54b05..0124add 100644 --- a/.test/config/config_paired_end.yaml +++ b/.test/config/config_paired_end.yaml @@ -9,9 +9,12 @@ results-jupyterbook: activate: True fastq: - auto: True # set auto if your files follow + auto: True # set auto if your files follow paired: True +#specify the seq aligner you want to use +aligner: "STAR" #or "HISAT2" + # Paths for reference files reference: genome: diff --git a/workflow/Snakefile b/workflow/Snakefile index 7092b91..cf8c75b 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -24,11 +24,18 @@ welcome(version="v2.0.2") include: "rules/qc.smk" include: "rules/diffexp.smk" include: "rules/utilities.smk" -include: "rules/hisat2-freebayes.smk" include: "rules/snpEff.smk" include: "rules/variantAnalysis.smk" include: "rules/jupyter-book.smk" +if config['aligner'] == 'STAR': + include: "rules/star-freebayes.smk" + +if config['aligner'] == 'HISAT2': + include: "rules/hisat2-freebayes.smk" + + + if config['pipeline'] == 'parabricks': include: "rules/star-haplotypecaller.smk" diff --git a/workflow/envs/variants.yaml b/workflow/envs/variants.yaml index e788024..a740055 100644 --- a/workflow/envs/variants.yaml +++ b/workflow/envs/variants.yaml @@ -3,6 +3,7 @@ channels: - conda-forge dependencies: - hisat2 + - star - samtools - freebayes=1.3.8 - bcftools diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index d152188..93609ef 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -283,4 +283,4 @@ def welcome(version): print(f"Author: Sanjay Curtis Nagi") print(f"Workflow Version: {version}") print("Execution time: ", datetime.datetime.now()) - print(f"Dataset: {config['dataset']}", "\n") + print(f"Dataset: {config['dataset']}", "\n") \ No newline at end of file diff --git a/workflow/rules/hisat2-freebayes.smk b/workflow/rules/hisat2-freebayes.smk index 6e5383a..5c43278 100644 --- a/workflow/rules/hisat2-freebayes.smk +++ b/workflow/rules/hisat2-freebayes.smk @@ -1,22 +1,21 @@ - rule HISAT2index: - """ - Make a HISAT2 index of the reference genome - """ - input: - fasta=config["reference"]["genome"].rstrip(".gz"), - output: - "resources/reference/ht2index/idx.1.ht2", - touch("resources/reference/ht2index/.complete"), - log: - "logs/HISAT2/HISAT2index.log", - conda: - "../envs/variants.yaml" - params: - prefix=lambda w, output: output[0].split(os.extsep)[0], - threads: 8 - shell: - "hisat2-build -p {threads} {input.fasta} {params.prefix} 2> {log}" + """ + Make a HISAT2 index of the reference genome + """ + input: + fasta=config["reference"]["genome"].rstrip(".gz"), + output: + "resources/reference/ht2index/idx.1.ht2", + touch("resources/reference/ht2index/.complete"), + log: + "logs/HISAT2/HISAT2index.log", + conda: + "../envs/variants.yaml" + params: + prefix=lambda w, output: output[0].split(os.extsep)[0], + threads: 8 + shell: + "hisat2-build -p {threads} {input.fasta} {params.prefix} 2> {log}" rule HISAT2align: diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk index 149748c..001af1e 100644 --- a/workflow/rules/qc.smk +++ b/workflow/rules/qc.smk @@ -37,15 +37,13 @@ rule fastp: wrapper: "v2.2.1/bio/fastp" - - rule BamStats: """ QC alignment statistics """ input: - bam="results/alignments/{sample}.star.bam" if config['pipeline'] == 'parabricks' else "results/alignments/{sample}.hisat2.bam", - idx="results/alignments/{sample}.star.bam.bai" if config['pipeline'] == 'parabricks' else "results/alignments/{sample}.hisat2.bam.bai", + bam="results/alignments/{sample}.star.bam" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam", + idx="results/alignments/{sample}.star.bam.bai" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam.bai", output: stats="results/qc/alignments/{sample}.flagstat", log: @@ -53,14 +51,13 @@ rule BamStats: wrapper: "v3.12.1/bio/samtools/flagstat" - rule Coverage: """ Calculate coverage with mosdepth """ input: - bam="results/alignments/{sample}.star.bam" if config['pipeline'] == 'parabricks' else "results/alignments/{sample}.hisat2.bam", - idx="results/alignments/{sample}.star.bam.bai" if config['pipeline'] == 'parabricks' else "results/alignments/{sample}.hisat2.bam.bai", + bam="results/alignments/{sample}.star.bam" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam", + idx="results/alignments/{sample}.star.bam.bai" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam.bai", output: "results/qc/coverage/{sample}.mosdepth.summary.txt", log: diff --git a/workflow/rules/star-freebayes.smk b/workflow/rules/star-freebayes.smk new file mode 100644 index 0000000..c1035ff --- /dev/null +++ b/workflow/rules/star-freebayes.smk @@ -0,0 +1,126 @@ +rule STARindex: + """ + Make a STAR index of the reference genome. + """ + input: + gtfFile=config["reference"]["gff"].rstrip(".gz"), + fasta=config["reference"]["genome"].rstrip(".gz"), + output: + directory("resources/reference/starindex/"), # Flagging the output as a directory + touch("resources/reference/starindex/.complete"), # Completion tracking + log: + "logs/STAR/STARindex.log", + conda: + "../envs/variants.yaml" + params: + prefix="resources/reference/starindex/", + threads: 8 + shell: + """ + STAR --runMode genomeGenerate --genomeDir {params.prefix} --genomeFastaFiles {input.fasta} --sjdbGTFfile {input.gtfFile} --sjdbGTFtagExonParentTranscript Parent --sjdbOverhang 100 --runThreadN {threads} --outFileNamePrefix {params.prefix} 2> {log} + """ + +rule STARalign: + """ + Align reads to the genome with STAR, generate an unsorted BAM, and then sort with samtools. + """ + input: + reads=lambda wildcards: getFASTQs(wildcards=wildcards), + idx="resources/reference/starindex/" + output: + "results/alignments/{sample}.star.bam" + log: + align="logs/STAR/{sample}_align.log", + sort="logs/samtoolsSort/{sample}.log" + conda: + "../envs/variants.yaml" + params: + readflags=lambda wildcards: getFASTQs(wildcards=wildcards), + extra="--outSAMtype BAM Unsorted", + rg="--outSAMattrRGline ID:{wildcards.sample} SM:{wildcards.sample} PL:ILLUMINA" + threads: 12 + shell: + """ + # Generate unsorted BAM with STAR + STAR --genomeDir {input.idx} --readFilesIn {params.readflags} --readFilesCommand zcat {params.extra} {params.rg} --runThreadN {threads} --outFileNamePrefix results/alignments/{wildcards.sample}. 2> {log.align} + + # Sort the BAM file with samtools, following output file convention + samtools sort -@{threads} -o {output} results/alignments/{wildcards.sample}.Aligned.out.bam 2>> {log.sort} + + # Remove intermediate unsorted BAM to save space + rm results/alignments/{wildcards.sample}.Aligned.out.bam + """ + + +chunks = np.arange(1, config["VariantAnalysis"]["chunks"]) + +rule GenerateFreebayesParams: + input: + ref_idx=config["reference"]["genome"].rstrip(".gz"), + index=config["reference"]["genome"].rstrip(".gz") + ".fai", + bams=expand("results/alignments/{sample}.star.bam", sample=samples), + output: + bamlist="results/alignments/bam.list", + pops="results/alignments/populations.tsv", + regions=expand( + "results/variantAnalysis/regions/genome.{contig}.region.{i}.bed", + contig=config["contigs"], + i=chunks, + ), + log: + "logs/GenerateFreebayesParams.log", + params: + aligner='STAR', + metadata=config["metadata"], + contigs=config["contigs"], + chunks=config["VariantAnalysis"]["chunks"], + conda: + "../envs/diffexp.yaml" + script: + "../scripts/starGenerateFreebayesParams.R" + + +rule VariantCallingFreebayes: + """ + Run freebayes on chunks of the genome, splitting the samples by population (strain) + """ + input: + bams=expand("results/alignments/{sample}.star.bam", sample=samples), + index=expand("results/alignments/{sample}.star.bam.bai", sample=samples), + ref=config["reference"]["genome"].rstrip(".gz"), + samples="results/alignments/bam.list", + pops="results/alignments/populations.tsv", + regions="results/variantAnalysis/regions/genome.{contig}.region.{i}.bed", + output: + temp("results/variantAnalysis/vcfs/freebayes/{contig}/variants.{i}.vcf"), + log: + "logs/VariantCallingFreebayes/{contig}.{i}.log", + params: + ploidy=config["VariantAnalysis"]["ploidy"], + conda: + "../envs/variants.yaml" + threads: 8 + shell: + """ + freebayes -f {input.ref} -t {input.regions} --ploidy {params.ploidy} --populations {input.pops} --pooled-discrete --use-best-n-alleles 5 --min-alternate-fraction 0.05 -L {input.samples} > {output} 2> {log} + """ + + +rule ConcatVCFs: + """ + Concatenate VCFs together + """ + input: + calls=expand( + "results/variantAnalysis/vcfs/freebayes/{{contig}}/variants.{i}.vcf", + i=chunks, + ), + output: + temp("results/variantAnalysis/vcfs/freebayes/variants.{contig}.vcf"), + log: + "logs/ConcatVCFs/{contig}.log", + conda: + "../envs/variants.yaml" + threads: 4 + shell: + "bcftools concat {input.calls} | vcfuniq > {output} 2> {log}" diff --git a/workflow/rules/utilities.smk b/workflow/rules/utilities.smk index 740c81c..3fa27e0 100644 --- a/workflow/rules/utilities.smk +++ b/workflow/rules/utilities.smk @@ -44,13 +44,18 @@ rule IndexBams: Index bams with samtools """ input: - bam="results/alignments/{sample}.hisat2.bam", + bam="results/alignments/{sample}.star.bam" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam", output: - idx="results/alignments/{sample}.hisat2.bam.bai", + idx="results/alignments/{sample}.star.bam.bai" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam", log: "logs/IndexBams/{sample}.log", - wrapper: - "v1.15.0/bio/samtools/index" + conda: + "../envs/variants.yaml" + shell: + """ + samtools index {input.bam} {output.idx} 2> {log} + """ + rule RestrictToSNPs: @@ -66,4 +71,4 @@ rule RestrictToSNPs: params: extra="-v snps", wrapper: - "v1.15.0/bio/bcftools/view" + "v1.15.0/bio/bcftools/view" \ No newline at end of file diff --git a/workflow/scripts/starGenerateFreebayesParams.R b/workflow/scripts/starGenerateFreebayesParams.R new file mode 100755 index 0000000..6c926fc --- /dev/null +++ b/workflow/scripts/starGenerateFreebayesParams.R @@ -0,0 +1,74 @@ +#!/usr/bin/env Rscript +log <- file(snakemake@log[[1]], open="wt") +sink(log) +sink(log, type="message") + +## Generate freebayes params ## +# 1) make bed for parallelising freebayes +library(dplyr) +library(data.table) +library(glue) + + +load_metadata <- function(metadata_path) { + # Check the file extension and load metadata accordingly + if (tools::file_ext(metadata_path) == "xlsx") { + metadata <- readxl::read_excel(metadata_path) + } else if (tools::file_ext(metadata_path) == "tsv") { + metadata <- data.table::fread(metadata_path, sep = "\t") + } else if (tools::file_ext(metadata_path) == "csv") { + metadata <- data.table::fread(metadata_path, sep = ",") + } else { + stop("Metadata file must be .xlsx, .tsv, or .csv") + } + return(metadata) +} + +# read inputs +contigs = snakemake@params[['contigs']] +chunks = snakemake@params[['chunks']] +fai = fread(snakemake@input[['index']]) + +# select contigs we want, and start, end columns +fai = fai[fai$V1 %in% contigs, c(1,2)] + +# for each chromsome +for (contig in contigs){ + #subset index to desired contig + f = fai[fai$V1 == contig] + #get sequence of n chunks from 0 to length of contig + bedseq = round(seq(0, f$V2, length.out = chunks)) + + #for each chunk + for (i in 1:(chunks-1)){ + #write bed file, one for each chunk/interval, which will be passed as input to freebayes + row = c(contig, bedseq[i], bedseq[i+1]) + data.frame(row) %>% t() %>% fwrite(., glue("results/variantAnalysis/regions/genome.{contig}.region.{i}.bed"), sep="\t", col.names = FALSE) + } +} + + + +# 2) Make bamlist and populations.tsv file +metadata = load_metadata(snakemake@params[['metadata']]) + +# # Set the BAM file paths based on the pipeline condition +# metadata$bams = if (snakemake@params[['aligner']] == 'STAR') { +# paste0("results/alignments/", metadata$sampleID, ".star.bam") +# } else { +# paste0("results/alignments/", metadata$sampleID, ".hisat2.bam") +# } +metadata$bams = paste0("results/alignments/", metadata$sampleID, ".star.bam") + +# Write the populations file with BAM paths and strain information +metadata %>% + select(bams, strain) %>% + fwrite(., snakemake@output[['pops']], sep="\t", row.names = FALSE, col.names = FALSE) + +# Write the bamlist file with only BAM paths +metadata %>% + select(bams) %>% + fwrite(., snakemake@output[['bamlist']], sep="\t", row.names = FALSE, col.names = FALSE) + + +sessionInfo() \ No newline at end of file From 91847245d8726d0f95b7920a1a92bbca5ca9fa91 Mon Sep 17 00:00:00 2001 From: Stephen Binaansim Date: Sat, 2 Nov 2024 08:36:48 +0000 Subject: [PATCH 2/3] Updated Related Scripts --- workflow/Snakefile | 7 +++- workflow/rules/common.smk | 38 ++++++++++--------- workflow/rules/qc.smk | 3 +- workflow/rules/star-freebayes.smk | 17 +++++---- workflow/rules/utilities.smk | 1 - .../scripts/starGenerateFreebayesParams.R | 13 +++---- 6 files changed, 43 insertions(+), 36 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index cf8c75b..a33f41e 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -23,13 +23,16 @@ welcome(version="v2.0.2") include: "rules/qc.smk" include: "rules/diffexp.smk" + +if config['aligner'] == 'STAR': + include: "rules/star-freebayes.smk" + include: "rules/utilities.smk" include: "rules/snpEff.smk" include: "rules/variantAnalysis.smk" include: "rules/jupyter-book.smk" -if config['aligner'] == 'STAR': - include: "rules/star-freebayes.smk" + if config['aligner'] == 'HISAT2': include: "rules/hisat2-freebayes.smk" diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 93609ef..d778842 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -17,52 +17,56 @@ def load_metadata(metadata_path): return metadata - def getFASTQs(wildcards, rules=None): """ Get FASTQ files from unit sheet. If there are more than one wildcard (aka, sample), only return one fastq file If the rule is HISAT2align, then return the fastqs with -1 and -2 flags + If the rule is STARalign, return the fastqs without flags. """ metadata = load_metadata(config["metadata"]) - if config['fastq']['paired'] == True: - fastq_cols = ['fq1', 'fq2'] - else: - fastq_cols = ['fq1'] + # Determine if the FASTQs are paired or single-end + fastq_cols = ['fq1', 'fq2'] if config['fastq']['paired'] else ['fq1'] - if config["QualityControl"]["fastp-trim"]["activate"] == True: - if rules in ["KallistoQuant", "HISAT2align", "HISAT2align_input"]: + # Check for fastp trimming activation + if config["QualityControl"]["fastp-trim"]["activate"]: + if rules in ["KallistoQuant", "HISAT2align", "HISAT2align_input", "STARalign", "STARalign_input"]: for i, col in enumerate(fastq_cols): metadata = metadata.assign(**{col: f"resources/reads/trimmed/" + metadata["sampleID"] + f"_{i+1}.fastq.gz"}) metadata = metadata.set_index("sampleID") u = metadata.loc[wildcards.sample, fastq_cols].dropna() if rules == "HISAT2align": - return [f"-1 {u.fq1} -2 {u.fq2}"] if config['fastq']['paired'] == True else f"-U {u.fq1}" + return [f"-1 {u.fq1} -2 {u.fq2}"] if config['fastq']['paired'] else f"-U {u.fq1}" + elif rules in ["STARalign", "STARalign_input"]: + return [u.fq1, u.fq2] if config['fastq']['paired'] else [u.fq1] else: - return [u.fq1, u.fq2] if config['fastq']['paired'] == True else [u.fq1] + return [u.fq1, u.fq2] if config['fastq']['paired'] else [u.fq1] + # Auto-generate FASTQ paths if enabled if config["fastq"]["auto"]: for i, col in enumerate(fastq_cols): metadata = metadata.assign(**{col: f"resources/reads/" + metadata["sampleID"] + f"_{i+1}.fastq.gz"}) metadata = metadata.set_index("sampleID") else: - assert ( - "fq1" in metadata.columns - ), f"The fq1 column in the metadata does not seem to exist. Please create one, or use the 'auto' option and name the fastq files as specified in the config/README.md" + assert "fq1" in metadata.columns, "The fq1 column in the metadata does not seem to exist. Please create one, or use the 'auto' option." if config['fastq']['paired']: - assert ( - "fq2" in metadata.columns - ), f"The fq2 column in the metadata does not seem to exist. Please create one, or use the 'auto' option and name the fastq files as specified in the config/README.md" + assert "fq2" in metadata.columns, "The fq2 column in the metadata does not seem to exist. Please create one, or use the 'auto' option." metadata = metadata.set_index("sampleID") + # Retrieve FASTQ paths based on the current rules u = metadata.loc[wildcards.sample, fastq_cols].dropna() if rules == "HISAT2align": - return [f"-1 {u.fq1} -2 {u.fq2}"] if config['fastq']['paired'] == True else f"-U {u.fq1}" + return [f"-1 {u.fq1} -2 {u.fq2}"] if config['fastq']['paired'] else f"-U {u.fq1}" + elif rules in ["STARalign", "STARalign_input"]: + return [u.fq1, u.fq2] if config['fastq']['paired'] else [u.fq1] else: - return [u.fq1, u.fq2] if config['fastq']['paired'] == True else [u.fq1] + return [u.fq1, u.fq2] if config['fastq']['paired'] else [u.fq1] + + + def getBAM(wildcards): diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk index 001af1e..bd860d6 100644 --- a/workflow/rules/qc.smk +++ b/workflow/rules/qc.smk @@ -23,10 +23,9 @@ rule CheckInputs: "../scripts/checkInputs.py" - rule fastp: input: - sample = getFASTQs, + sample=getFASTQs, output: trimmed=["resources/reads/trimmed/{sample}_1.fastq.gz", "resources/reads/trimmed/{sample}_2.fastq.gz"] if config['fastq']['paired'] else ["resources/reads/trimmed/{sample}_1.fastq.gz"], html="results/qc/{sample}.html", diff --git a/workflow/rules/star-freebayes.smk b/workflow/rules/star-freebayes.smk index c1035ff..92ba10a 100644 --- a/workflow/rules/star-freebayes.smk +++ b/workflow/rules/star-freebayes.smk @@ -25,19 +25,19 @@ rule STARalign: Align reads to the genome with STAR, generate an unsorted BAM, and then sort with samtools. """ input: - reads=lambda wildcards: getFASTQs(wildcards=wildcards), + reads=lambda wildcards: getFASTQs(wildcards=wildcards, rules="STARalign"), idx="resources/reference/starindex/" output: "results/alignments/{sample}.star.bam" log: - align="logs/STAR/{sample}_align.log", - sort="logs/samtoolsSort/{sample}.log" + align="logs/STAR/{sample}_align.log", # Log for alignment + sort="logs/samtoolsSort/{sample}.log" # Log for sorting conda: - "../envs/variants.yaml" + "../envs/variants.yaml" # Conda environment params: - readflags=lambda wildcards: getFASTQs(wildcards=wildcards), - extra="--outSAMtype BAM Unsorted", - rg="--outSAMattrRGline ID:{wildcards.sample} SM:{wildcards.sample} PL:ILLUMINA" + readflags=lambda wildcards: " ".join(getFASTQs(wildcards=wildcards, rules="STARalign")), + extra="--outSAMtype BAM Unsorted", # STAR output type + rg=lambda wildcards: f"--outSAMattrRGline ID:{wildcards.sample} SM:{wildcards.sample} PL:ILLUMINA" threads: 12 shell: """ @@ -52,6 +52,9 @@ rule STARalign: """ + + + chunks = np.arange(1, config["VariantAnalysis"]["chunks"]) rule GenerateFreebayesParams: diff --git a/workflow/rules/utilities.smk b/workflow/rules/utilities.smk index 3fa27e0..3cf3676 100644 --- a/workflow/rules/utilities.smk +++ b/workflow/rules/utilities.smk @@ -57,7 +57,6 @@ rule IndexBams: """ - rule RestrictToSNPs: """" Filter out indels diff --git a/workflow/scripts/starGenerateFreebayesParams.R b/workflow/scripts/starGenerateFreebayesParams.R index 6c926fc..6e4e3a2 100755 --- a/workflow/scripts/starGenerateFreebayesParams.R +++ b/workflow/scripts/starGenerateFreebayesParams.R @@ -52,13 +52,12 @@ for (contig in contigs){ # 2) Make bamlist and populations.tsv file metadata = load_metadata(snakemake@params[['metadata']]) -# # Set the BAM file paths based on the pipeline condition -# metadata$bams = if (snakemake@params[['aligner']] == 'STAR') { -# paste0("results/alignments/", metadata$sampleID, ".star.bam") -# } else { -# paste0("results/alignments/", metadata$sampleID, ".hisat2.bam") -# } -metadata$bams = paste0("results/alignments/", metadata$sampleID, ".star.bam") +# Set the BAM file paths based on the pipeline condition +metadata$bams = if (snakemake@params[['aligner']] == 'STAR') { + paste0("results/alignments/", metadata$sampleID, ".star.bam") +} else { + paste0("results/alignments/", metadata$sampleID, ".hisat2.bam") +} # Write the populations file with BAM paths and strain information metadata %>% From 06c85e7314db60847ab6ce3bd4566c75c2682bd6 Mon Sep 17 00:00:00 2001 From: Stephen Binaansim Date: Mon, 4 Nov 2024 12:24:22 +0000 Subject: [PATCH 3/3] Making STAR the default Aligner & Updating Related Scripts --- .test/config/config_paired_end.yaml | 2 -- workflow/Snakefile | 13 +++---------- workflow/rules/qc.smk | 8 ++++---- workflow/rules/star-freebayes.smk | 4 ---- workflow/rules/utilities.smk | 4 ++-- workflow/rules/variantsOfInterest.smk | 4 ++-- 6 files changed, 11 insertions(+), 24 deletions(-) diff --git a/.test/config/config_paired_end.yaml b/.test/config/config_paired_end.yaml index 0124add..e7afe0f 100644 --- a/.test/config/config_paired_end.yaml +++ b/.test/config/config_paired_end.yaml @@ -12,8 +12,6 @@ fastq: auto: True # set auto if your files follow paired: True -#specify the seq aligner you want to use -aligner: "STAR" #or "HISAT2" # Paths for reference files reference: diff --git a/workflow/Snakefile b/workflow/Snakefile index a33f41e..b7d9a61 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -23,21 +23,14 @@ welcome(version="v2.0.2") include: "rules/qc.smk" include: "rules/diffexp.smk" - -if config['aligner'] == 'STAR': - include: "rules/star-freebayes.smk" - +include: "rules/star-freebayes.smk" include: "rules/utilities.smk" include: "rules/snpEff.smk" include: "rules/variantAnalysis.smk" include: "rules/jupyter-book.smk" - - -if config['aligner'] == 'HISAT2': - include: "rules/hisat2-freebayes.smk" - - +#if config['aligner'] == 'HISAT2': + # include: "rules/hisat2-freebayes.smk" if config['pipeline'] == 'parabricks': include: "rules/star-haplotypecaller.smk" diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk index bd860d6..0a72fb9 100644 --- a/workflow/rules/qc.smk +++ b/workflow/rules/qc.smk @@ -41,8 +41,8 @@ rule BamStats: QC alignment statistics """ input: - bam="results/alignments/{sample}.star.bam" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam", - idx="results/alignments/{sample}.star.bam.bai" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam.bai", + bam="results/alignments/{sample}.star.bam", # if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam", + idx="results/alignments/{sample}.star.bam.bai", # if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam.bai", output: stats="results/qc/alignments/{sample}.flagstat", log: @@ -55,8 +55,8 @@ rule Coverage: Calculate coverage with mosdepth """ input: - bam="results/alignments/{sample}.star.bam" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam", - idx="results/alignments/{sample}.star.bam.bai" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam.bai", + bam="results/alignments/{sample}.star.bam",# if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam", + idx="results/alignments/{sample}.star.bam.bai",# if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam.bai", output: "results/qc/coverage/{sample}.mosdepth.summary.txt", log: diff --git a/workflow/rules/star-freebayes.smk b/workflow/rules/star-freebayes.smk index 92ba10a..8500e32 100644 --- a/workflow/rules/star-freebayes.smk +++ b/workflow/rules/star-freebayes.smk @@ -51,10 +51,6 @@ rule STARalign: rm results/alignments/{wildcards.sample}.Aligned.out.bam """ - - - - chunks = np.arange(1, config["VariantAnalysis"]["chunks"]) rule GenerateFreebayesParams: diff --git a/workflow/rules/utilities.smk b/workflow/rules/utilities.smk index 3cf3676..5a166ac 100644 --- a/workflow/rules/utilities.smk +++ b/workflow/rules/utilities.smk @@ -44,9 +44,9 @@ rule IndexBams: Index bams with samtools """ input: - bam="results/alignments/{sample}.star.bam" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam", + bam="results/alignments/{sample}.star.bam", #if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam", output: - idx="results/alignments/{sample}.star.bam.bai" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam", + idx="results/alignments/{sample}.star.bam.bai", #if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam", log: "logs/IndexBams/{sample}.log", conda: diff --git a/workflow/rules/variantsOfInterest.smk b/workflow/rules/variantsOfInterest.smk index 36c3ea6..f40b5b5 100644 --- a/workflow/rules/variantsOfInterest.smk +++ b/workflow/rules/variantsOfInterest.smk @@ -3,8 +3,8 @@ rule mpileupVariantsOfInterest: Get allele count tables of variants of choice (specified in config file ("IRmutations.tsv")) """ input: - bam="results/alignments/{sample}.star.bam" if config['pipeline'] == 'parabricks' else "results/alignments/{sample}.hisat2.bam", - idx="results/alignments/{sample}.star.bam.bai" if config['pipeline'] == 'parabricks' else "results/alignments/{sample}.hisat2.bam.bai", + bam="results/alignments/{sample}.star.bam", #if config['pipeline'] == 'parabricks' else "results/alignments/{sample}.hisat2.bam", + idx="results/alignments/{sample}.star.bam.bai", #if config['pipeline'] == 'parabricks' else "results/alignments/{sample}.hisat2.bam.bai", output: "results/variantAnalysis/variantsOfInterest/counts/{sample}_{mut}_allele_counts.tsv", conda: