Skip to content

Commit

Permalink
add extra params to all applicable rules
Browse files Browse the repository at this point in the history
  • Loading branch information
sterrettJD committed May 6, 2024
1 parent 7ad704c commit d1231e5
Showing 1 changed file with 51 additions and 25 deletions.
76 changes: 51 additions & 25 deletions snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,8 @@ rule host_filter:
threads: get_threads(16, config, "host_filter")
params:
trim_trunc_path=trim_trunc_path,
hostile_db_path=HOSTILE_DB_PATH
hostile_db_path=HOSTILE_DB_PATH,
extra=get_rule_extra_args(config, "host_filter")
shell:
"""
hostile clean \
Expand All @@ -407,7 +408,8 @@ rule host_filter:
--threads {threads} \
--index {params.hostile_db_path} \
--debug \
--aligner bowtie2
--aligner bowtie2 \
{params.extra}

# cleanup filepaths
mv {params.trim_trunc_path}.nonhost/{wildcards.sample}.R1.clean_1.fastq.gz {output.FWD}
Expand All @@ -431,10 +433,12 @@ rule setup_metaphlan:
slurm=get_slurm_extra(config, "setup_metaphlan")
threads: get_threads(8, config, "setup_metaphlan")
conda: "conda_envs/humann.yaml"
params:
extra=get_rule_extra_args(config, "setup_metaphlan")
shell:
"""
mkdir -p {output}
metaphlan --install --nproc {threads} --bowtie2db {output}
metaphlan --install --nproc {threads} --bowtie2db {output} {params.extra}
# Option to do it manually if --install doesn't seem to work
# cd {output}
# wget http://cmprod1.cibio.unitn.it/biobakery4/metaphlan_databases/bowtie2_indexes/mpa_vOct22_CHOCOPhlAnSGB_202212_bt2.tar
Expand All @@ -456,10 +460,12 @@ rule get_biobakery_chocophlan_db:
slurm=get_slurm_extra(config, "get_biobakery_chocophlan_db")
threads: get_threads(1, config, "get_biobakery_chocophlan_db")
conda: "conda_envs/humann.yaml"
params:
extra=get_rule_extra_args(config, "get_biobakery_chocophlan_db")
shell:
"""
mkdir -p {output}
humann_databases --download chocophlan full {output} --update-config yes
humann_databases --download chocophlan full {output} --update-config yes {params.extra}
"""


Expand All @@ -476,10 +482,12 @@ rule get_biobakery_uniref_db:
slurm=get_slurm_extra(config, "get_biobakery_uniref_db")
threads: get_threads(1, config, "get_biobakery_uniref_db")
conda: "conda_envs/humann.yaml"
params:
extra=get_rule_extra_args(config, "get_biobakery_uniref_db")
shell:
"""
mkdir -p {output}
humann_databases --download uniref uniref90_diamond {output} --update-config yes
humann_databases --download uniref uniref90_diamond {output} --update-config yes {params.extra}
"""


Expand All @@ -498,10 +506,12 @@ rule get_utility_mapping_db:
slurm=get_slurm_extra(config, "get_utility_mapping_db")
threads: get_threads(1, config, "get_utility_mapping_db")
conda: "conda_envs/humann.yaml"
params:
extra=get_rule_extra_args(config, "get_utility_mapping_db")
shell:
"""
mkdir -p {output}
humann_databases --download utility_mapping full {output} --update-config yes
humann_databases --download utility_mapping full {output} --update-config yes {params.extra}
"""


Expand Down Expand Up @@ -561,13 +571,15 @@ rule run_humann_nonhost:
conda: "conda_envs/humann.yaml"
params:
dirpath=f"{trim_trunc_path}.nonhost.humann",
metaphlan_bowtie_db=pj(config['metaphlan_bowtie_db'],"mpa_vOct22_CHOCOPhlAnSGB_202212_bt2")
metaphlan_bowtie_db=pj(config['metaphlan_bowtie_db'],"mpa_vOct22_CHOCOPhlAnSGB_202212_bt2"),
extra=get_rule_extra_args(config, "run_humann_nonhost")
shell:
"""
mkdir -p {params.dirpath}
humann -i {input.NONHUMAN_READS} -o {params.dirpath}/{wildcards.sample} \
--threads {threads} --search-mode uniref90 \
--metaphlan-options="--bowtie2db {params.metaphlan_bowtie_db}"
--metaphlan-options="--bowtie2db {params.metaphlan_bowtie_db}" \
{params.extra}

"""

Expand Down Expand Up @@ -798,10 +810,11 @@ rule get_kraken_db:
threads: get_threads(64, config, "get_kraken_db")
conda: "conda_envs/kraken.yaml"
params:
database_dir=kraken_db_loc
database_dir=kraken_db_loc,
extra=get_rule_extra_args(config, "get_kraken_db")
shell:
"""
kraken2-build --standard --db {params.database_dir} --threads {threads}
kraken2-build --standard --db {params.database_dir} --threads {threads} {params.extra}
"""


Expand Down Expand Up @@ -830,12 +843,14 @@ rule run_kraken:
conda: "conda_envs/kraken.yaml"
params:
out_dir=f"{trim_trunc_path}.nonhost.kraken",
database=kraken_db_loc
database=kraken_db_loc,
extra=get_rule_extra_args(config, "run_kraken")
shell:
"""
mkdir -p {params.out_dir}

kraken2 --gzip-compressed --paired --db {params.database} --threads {threads} --output {output.OUTFILE} --report {output.REPORT} --classified-out {params.out_dir}/{wildcards.sample}_classified#.fq --unclassified-out {params.out_dir}/{wildcards.sample}_unclassified#.fq {input.FWD} {input.REV}
# This has to all be on one line due to the way kraken2 parses it...
kraken2 --gzip-compressed --paired --db {params.database} --threads {threads} --output {output.OUTFILE} --report {output.REPORT} --classified-out {params.out_dir}/{wildcards.sample}_classified#.fq --unclassified-out {params.out_dir}/{wildcards.sample}_unclassified#.fq {params.extra} {input.FWD} {input.REV}

"""

Expand All @@ -856,10 +871,11 @@ rule build_bracken:
threads: get_threads(32, config, "build_bracken")
conda: "conda_envs/kraken.yaml"
params:
database=kraken_db_loc
database=kraken_db_loc,
extra=get_rule_extra_args(config, "build_bracken")
shell:
"""
bracken-build -d {params.database} -t {threads} -l 150
bracken-build -d {params.database} -t {threads} -l 150 {params.extra}
"""


Expand All @@ -884,10 +900,11 @@ rule run_bracken:
threads: get_threads(1, config, "run_bracken")
conda: "conda_envs/kraken.yaml"
params:
database=kraken_db_loc
database=kraken_db_loc,
extra=get_rule_extra_args(config, "run_bracken")
shell:
"""
bracken -d {params.database} -i {input.REPORT} -o {output.REPORT} -r 150 -l S -t 10
bracken -d {params.database} -i {input.REPORT} -o {output.REPORT} -r 150 -l S -t 10 {params.extra}
"""


Expand Down Expand Up @@ -952,7 +969,8 @@ rule nonpareil:
threads: get_threads(16, config, "nonpareil")
conda: "conda_envs/nonpareil.yaml"
params:
dirpath=f"{trim_trunc_path}.nonhost.nonpareil"
dirpath=f"{trim_trunc_path}.nonhost.nonpareil",
extra=get_rule_extra_args(config, "nonpareil")
shell:
"""
mkdir -p {params.dirpath}
Expand All @@ -963,7 +981,9 @@ rule nonpareil:
# fastq is recommended for kmer algorithm, so defaulting to those
nonpareil -s {params.dirpath}/{wildcards.sample}_temp_unzipped_input.fq \
-b {params.dirpath}/{wildcards.sample} \
-T kmer -f fastq -t {threads}
-T kmer -f fastq -t {threads} \
{params.extra}


# remove the temp file
rm {params.dirpath}/{wildcards.sample}_temp_unzipped_input.fq
Expand Down Expand Up @@ -1053,15 +1073,16 @@ rule build_host_genome_index:
threads: get_threads(8, config, "build_host_genome_index")
params:
ref_dir=f"{trim_trunc_path}.{get_host_map_method(config)}",
method=get_host_map_method(config)
method=get_host_map_method(config),
extra=get_rule_extra_args(config, "build_host_genome_index")
shell:
"""
mkdir -p {params.ref_dir}
if [ "{params.method}" == "BBMap" ]; then
bbmap.sh ref={input} path={params.ref_dir} threads={threads} -Xmx{resources.mem_mb}m
bbmap.sh ref={input} path={params.ref_dir} threads={threads} -Xmx{resources.mem_mb}m {params.extra}
elif [ "{params.method}" == "HISAT2" ]; then
mkdir -p {output}/
hisat2-build {input} {output}/ -p {threads}
hisat2-build {input} {output}/ -p {threads} {params.extra}
fi
"""

Expand Down Expand Up @@ -1090,7 +1111,8 @@ rule map_host:
params:
out_dir=f"{trim_trunc_path}.{get_host_map_method(config)}",
sam2bam_path=get_sam2bam_path(),
method=get_host_map_method(config)
method=get_host_map_method(config),
extra=get_rule_extra_args(config, "map_host")
shell:
"""
cd {params.out_dir}
Expand All @@ -1100,13 +1122,15 @@ rule map_host:
out={wildcards.sample}.sam \
trimreaddescriptions=t \
threads={threads} \
-Xmx{resources.mem_mb}m
-Xmx{resources.mem_mb}m \
{params.extra}

elif [ "{params.method}" == "HISAT2" ]; then
hisat2 -1 ../{input.FWD} -2 ../{input.REV} \
-S {wildcards.sample}.sam \
-x ref/ \
-p {threads}
-p {threads} \
{params.extra}
fi

bash {params.sam2bam_path} {wildcards.sample}.sam
Expand Down Expand Up @@ -1155,6 +1179,8 @@ rule generate_feature_counts:
runtime=get_runtime(int(2*60), config, "generate_feature_counts"), # min, or 2 hrs
slurm=get_slurm_extra(config, "generate_feature_counts")
threads: get_threads(16, config, "generate_feature_counts")
params:
extra=get_rule_extra_args(config, "generate_feature_counts")
shell:
"""
if [ -z {input.BAM} ]
Expand All @@ -1164,7 +1190,7 @@ rule generate_feature_counts:
touch {output.SUMMARY}
else
featureCounts -T {threads} -p --countReadPairs \
-t exon -g gene_id -a {input.ANNOTATION} -o {output.COUNTS} {input.BAM}
-t exon -g gene_id -a {input.ANNOTATION} -o {output.COUNTS} {params.extra} {input.BAM}
fi
"""

Expand Down

0 comments on commit d1231e5

Please sign in to comment.