diff --git a/snakefile b/snakefile index ee42ede..c42a199 100644 --- a/snakefile +++ b/snakefile @@ -398,7 +398,8 @@ rule host_filter: threads: get_threads(16, config, "host_filter") params: trim_trunc_path=trim_trunc_path, - hostile_db_path=HOSTILE_DB_PATH + hostile_db_path=HOSTILE_DB_PATH, + extra=get_rule_extra_args(config, "host_filter") shell: """ hostile clean \ @@ -407,7 +408,8 @@ rule host_filter: --threads {threads} \ --index {params.hostile_db_path} \ --debug \ - --aligner bowtie2 + --aligner bowtie2 \ + {params.extra} # cleanup filepaths mv {params.trim_trunc_path}.nonhost/{wildcards.sample}.R1.clean_1.fastq.gz {output.FWD} @@ -431,10 +433,12 @@ rule setup_metaphlan: slurm=get_slurm_extra(config, "setup_metaphlan") threads: get_threads(8, config, "setup_metaphlan") conda: "conda_envs/humann.yaml" + params: + extra=get_rule_extra_args(config, "setup_metaphlan") shell: """ mkdir -p {output} - metaphlan --install --nproc {threads} --bowtie2db {output} + metaphlan --install --nproc {threads} --bowtie2db {output} {params.extra} # Option to do it manually if --install doesn't seem to work # cd {output} # wget http://cmprod1.cibio.unitn.it/biobakery4/metaphlan_databases/bowtie2_indexes/mpa_vOct22_CHOCOPhlAnSGB_202212_bt2.tar @@ -456,10 +460,12 @@ rule get_biobakery_chocophlan_db: slurm=get_slurm_extra(config, "get_biobakery_chocophlan_db") threads: get_threads(1, config, "get_biobakery_chocophlan_db") conda: "conda_envs/humann.yaml" + params: + extra=get_rule_extra_args(config, "get_biobakery_chocophlan_db") shell: """ mkdir -p {output} - humann_databases --download chocophlan full {output} --update-config yes + humann_databases --download chocophlan full {output} --update-config yes {params.extra} """ @@ -476,10 +482,12 @@ rule get_biobakery_uniref_db: slurm=get_slurm_extra(config, "get_biobakery_uniref_db") threads: get_threads(1, config, "get_biobakery_uniref_db") conda: "conda_envs/humann.yaml" + params: + extra=get_rule_extra_args(config, "get_biobakery_uniref_db") shell: """ mkdir -p {output} - humann_databases --download uniref uniref90_diamond {output} --update-config yes + humann_databases --download uniref uniref90_diamond {output} --update-config yes {params.extra} """ @@ -498,10 +506,12 @@ rule get_utility_mapping_db: slurm=get_slurm_extra(config, "get_utility_mapping_db") threads: get_threads(1, config, "get_utility_mapping_db") conda: "conda_envs/humann.yaml" + params: + extra=get_rule_extra_args(config, "get_utility_mapping_db") shell: """ mkdir -p {output} - humann_databases --download utility_mapping full {output} --update-config yes + humann_databases --download utility_mapping full {output} --update-config yes {params.extra} """ @@ -561,13 +571,15 @@ rule run_humann_nonhost: conda: "conda_envs/humann.yaml" params: dirpath=f"{trim_trunc_path}.nonhost.humann", - metaphlan_bowtie_db=pj(config['metaphlan_bowtie_db'],"mpa_vOct22_CHOCOPhlAnSGB_202212_bt2") + metaphlan_bowtie_db=pj(config['metaphlan_bowtie_db'],"mpa_vOct22_CHOCOPhlAnSGB_202212_bt2"), + extra=get_rule_extra_args(config, "run_humann_nonhost") shell: """ mkdir -p {params.dirpath} humann -i {input.NONHUMAN_READS} -o {params.dirpath}/{wildcards.sample} \ --threads {threads} --search-mode uniref90 \ - --metaphlan-options="--bowtie2db {params.metaphlan_bowtie_db}" + --metaphlan-options="--bowtie2db {params.metaphlan_bowtie_db}" \ + {params.extra} """ @@ -798,10 +810,11 @@ rule get_kraken_db: threads: get_threads(64, config, "get_kraken_db") conda: "conda_envs/kraken.yaml" params: - database_dir=kraken_db_loc + database_dir=kraken_db_loc, + extra=get_rule_extra_args(config, "get_kraken_db") shell: """ - kraken2-build --standard --db {params.database_dir} --threads {threads} + kraken2-build --standard --db {params.database_dir} --threads {threads} {params.extra} """ @@ -830,12 +843,14 @@ rule run_kraken: conda: "conda_envs/kraken.yaml" params: out_dir=f"{trim_trunc_path}.nonhost.kraken", - database=kraken_db_loc + database=kraken_db_loc, + extra=get_rule_extra_args(config, "run_kraken") shell: """ mkdir -p {params.out_dir} - kraken2 --gzip-compressed --paired --db {params.database} --threads {threads} --output {output.OUTFILE} --report {output.REPORT} --classified-out {params.out_dir}/{wildcards.sample}_classified#.fq --unclassified-out {params.out_dir}/{wildcards.sample}_unclassified#.fq {input.FWD} {input.REV} + # This has to all be on one line due to the way kraken2 parses it... + kraken2 --gzip-compressed --paired --db {params.database} --threads {threads} --output {output.OUTFILE} --report {output.REPORT} --classified-out {params.out_dir}/{wildcards.sample}_classified#.fq --unclassified-out {params.out_dir}/{wildcards.sample}_unclassified#.fq {params.extra} {input.FWD} {input.REV} """ @@ -856,10 +871,11 @@ rule build_bracken: threads: get_threads(32, config, "build_bracken") conda: "conda_envs/kraken.yaml" params: - database=kraken_db_loc + database=kraken_db_loc, + extra=get_rule_extra_args(config, "build_bracken") shell: """ - bracken-build -d {params.database} -t {threads} -l 150 + bracken-build -d {params.database} -t {threads} -l 150 {params.extra} """ @@ -884,10 +900,11 @@ rule run_bracken: threads: get_threads(1, config, "run_bracken") conda: "conda_envs/kraken.yaml" params: - database=kraken_db_loc + database=kraken_db_loc, + extra=get_rule_extra_args(config, "run_bracken") shell: """ - bracken -d {params.database} -i {input.REPORT} -o {output.REPORT} -r 150 -l S -t 10 + bracken -d {params.database} -i {input.REPORT} -o {output.REPORT} -r 150 -l S -t 10 {params.extra} """ @@ -952,7 +969,8 @@ rule nonpareil: threads: get_threads(16, config, "nonpareil") conda: "conda_envs/nonpareil.yaml" params: - dirpath=f"{trim_trunc_path}.nonhost.nonpareil" + dirpath=f"{trim_trunc_path}.nonhost.nonpareil", + extra=get_rule_extra_args(config, "nonpareil") shell: """ mkdir -p {params.dirpath} @@ -963,7 +981,9 @@ rule nonpareil: # fastq is recommended for kmer algorithm, so defaulting to those nonpareil -s {params.dirpath}/{wildcards.sample}_temp_unzipped_input.fq \ -b {params.dirpath}/{wildcards.sample} \ - -T kmer -f fastq -t {threads} + -T kmer -f fastq -t {threads} \ + {params.extra} + # remove the temp file rm {params.dirpath}/{wildcards.sample}_temp_unzipped_input.fq @@ -1053,15 +1073,16 @@ rule build_host_genome_index: threads: get_threads(8, config, "build_host_genome_index") params: ref_dir=f"{trim_trunc_path}.{get_host_map_method(config)}", - method=get_host_map_method(config) + method=get_host_map_method(config), + extra=get_rule_extra_args(config, "build_host_genome_index") shell: """ mkdir -p {params.ref_dir} if [ "{params.method}" == "BBMap" ]; then - bbmap.sh ref={input} path={params.ref_dir} threads={threads} -Xmx{resources.mem_mb}m + bbmap.sh ref={input} path={params.ref_dir} threads={threads} -Xmx{resources.mem_mb}m {params.extra} elif [ "{params.method}" == "HISAT2" ]; then mkdir -p {output}/ - hisat2-build {input} {output}/ -p {threads} + hisat2-build {input} {output}/ -p {threads} {params.extra} fi """ @@ -1090,7 +1111,8 @@ rule map_host: params: out_dir=f"{trim_trunc_path}.{get_host_map_method(config)}", sam2bam_path=get_sam2bam_path(), - method=get_host_map_method(config) + method=get_host_map_method(config), + extra=get_rule_extra_args(config, "map_host") shell: """ cd {params.out_dir} @@ -1100,13 +1122,15 @@ rule map_host: out={wildcards.sample}.sam \ trimreaddescriptions=t \ threads={threads} \ - -Xmx{resources.mem_mb}m + -Xmx{resources.mem_mb}m \ + {params.extra} elif [ "{params.method}" == "HISAT2" ]; then hisat2 -1 ../{input.FWD} -2 ../{input.REV} \ -S {wildcards.sample}.sam \ -x ref/ \ - -p {threads} + -p {threads} \ + {params.extra} fi bash {params.sam2bam_path} {wildcards.sample}.sam @@ -1155,6 +1179,8 @@ rule generate_feature_counts: runtime=get_runtime(int(2*60), config, "generate_feature_counts"), # min, or 2 hrs slurm=get_slurm_extra(config, "generate_feature_counts") threads: get_threads(16, config, "generate_feature_counts") + params: + extra=get_rule_extra_args(config, "generate_feature_counts") shell: """ if [ -z {input.BAM} ] @@ -1164,7 +1190,7 @@ rule generate_feature_counts: touch {output.SUMMARY} else featureCounts -T {threads} -p --countReadPairs \ - -t exon -g gene_id -a {input.ANNOTATION} -o {output.COUNTS} {input.BAM} + -t exon -g gene_id -a {input.ANNOTATION} -o {output.COUNTS} {params.extra} {input.BAM} fi """