diff --git a/CHANGELOG.md b/CHANGELOG.md index d2968ae..d35c400 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ - Major updates to convert CHARLIE from a biowulf-specific to a platform-agnostic pipeline (#102, @kelly-sovacool): - All rules now use containers instead of envmodules. - Default config and cluster config files are provided for use on biowulf and FRCE. + - New entry `TEMPDIR` in the config file sets the temporary directory location for rules that require transient storage. # CHARLIE 0.10.1 diff --git a/config/biowulf/config.yaml b/config/biowulf/config.yaml index 4635749..da1eb19 100644 --- a/config/biowulf/config.yaml +++ b/config/biowulf/config.yaml @@ -2,30 +2,32 @@ # # The working dir... output will be in the results subfolder of the workdir workdir: "WORKDIR" -# + +# temporary directory for intermediate files that are not saved +tempdir: '/lscratch/$SLURM_JOB_ID' + # tab delimited samples file ... should have the following 3 columns # sampleName path_to_R1_fastq path_to_R2_fastq -# samples: "WORKDIR/samples.tsv" -# + # Should the CLEAR pipeline be run? True or False WITHOUT quotes run_clear: True -# + # Should the DCC pipeline be run? True or False WITHOUT quote run_dcc: True -# + # Should the MapSplice pipeline be run? True or False WITHOUT quotes run_mapsplice: False mapsplice_min_map_len: 50 mapsplice_filtering: 2 # 1=less stringent 2=default -# + # Should the circRNA_finder be run? True or False WITHOUT quotes run_circRNAFinder: True # Should the NCLscan pipeline be run? True or False WITHOUT quotes # This can only be run for PE data run_nclscan: False nclscan_config: "WORKDIR/nclscan.config" -# + # Should we also run find_circ? True or False WITHOUT quotes run_findcirc: False # findcirc_params: "--noncanonical --allhits" # this gives way too many circRNAs diff --git a/config/fnlcr/config.yaml b/config/fnlcr/config.yaml index ecea5a1..5f89a8f 100644 --- a/config/fnlcr/config.yaml +++ b/config/fnlcr/config.yaml @@ -2,30 +2,32 @@ # # The working dir... output will be in the results subfolder of the workdir workdir: "WORKDIR" -# + +# temporary directory for intermediate files that are not saved +tempdir: '/scratch/local' + # tab delimited samples file ... should have the following 3 columns # sampleName path_to_R1_fastq path_to_R2_fastq -# samples: "WORKDIR/samples.tsv" -# + # Should the CLEAR pipeline be run? True or False WITHOUT quotes run_clear: True -# + # Should the DCC pipeline be run? True or False WITHOUT quote run_dcc: True -# + # Should the MapSplice pipeline be run? True or False WITHOUT quotes run_mapsplice: False mapsplice_min_map_len: 50 mapsplice_filtering: 2 # 1=less stringent 2=default -# + # Should the circRNA_finder be run? True or False WITHOUT quotes run_circRNAFinder: True # Should the NCLscan pipeline be run? True or False WITHOUT quotes # This can only be run for PE data run_nclscan: False nclscan_config: "WORKDIR/nclscan.config" -# + # Should we also run find_circ? True or False WITHOUT quotes run_findcirc: False # findcirc_params: "--noncanonical --allhits" # this gives way too many circRNAs diff --git a/docs/tutorial.md b/docs/tutorial.md index 2bd761b..158c6fb 100644 --- a/docs/tutorial.md +++ b/docs/tutorial.md @@ -174,6 +174,7 @@ The above command creates `` folder and creates 2 subfolders This file is used to fine tune the execution of the pipeline by setting: * sample sheet location ... aka `samples.tsv` +* the temporary directory -- make sure this is correct for your computing environment. * which circRNA finding tools to use by editing these: * run_clear: True * run_dcc: True diff --git a/workflow/rules/align.smk b/workflow/rules/align.smk index bbda26c..a690c06 100644 --- a/workflow/rules/align.smk +++ b/workflow/rules/align.smk @@ -53,19 +53,12 @@ rule star1p: outdir=join(WORKDIR, "results", "{sample}", "STAR1p"), starindexdir=STAR_INDEX_DIR, alignTranscriptsPerReadNmax=config["alignTranscriptsPerReadNmax"], - randomstr=str(uuid.uuid4()), + tmpdir=f"{TEMPDIR}/{str(uuid.uuid4())}", container: config['containers']["star"] threads: getthreads("star1p") shell: """ set -exo pipefail -if [ -d /lscratch/${{SLURM_JOB_ID}} ];then - TMPDIR="/lscratch/${{SLURM_JOB_ID}}/{params.randomstr}" -else - TMPDIR="/dev/shm/{params.randomstr}" -fi - -if [ ! -d {params.outdir} ];then mkdir {params.outdir};fi if [ "{params.peorse}" == "PE" ];then # paired-end overhang=$(zcat {input.R1} {input.R2} | awk -v maxlen=100 'NR%4==2 {{if (length($1) > maxlen+0) maxlen=length($1)}}; END {{print maxlen-1}}') @@ -99,7 +92,7 @@ if [ "{params.peorse}" == "PE" ];then --alignEndsProtrude 10 ConcordantPair \\ --outFilterIntronMotifs None \\ --sjdbGTFfile {input.gtf} \\ - --outTmpDir ${{TMPDIR}} \\ + --outTmpDir {params.tmpdir} \\ --sjdbOverhang $overhang rm -rf {params.sample}_p1._STARgenome @@ -135,7 +128,7 @@ if [ "{params.peorse}" == "PE" ];then --alignEndsProtrude 10 ConcordantPair \\ --outFilterIntronMotifs None \\ --sjdbGTFfile {input.gtf} \\ - --outTmpDir ${{TMPDIR}} \\ + --outTmpDir {params.tmpdir} \\ --sjdbOverhang $overhang rm -rf {params.sample}_mate1._STARgenome @@ -171,7 +164,7 @@ if [ "{params.peorse}" == "PE" ];then --alignEndsProtrude 10 ConcordantPair \\ --outFilterIntronMotifs None \\ --sjdbGTFfile {input.gtf} \\ - --outTmpDir ${{TMPDIR}} \\ + --outTmpDir {params.tmpdir} \\ --sjdbOverhang $overhang rm -rf {params.sample}_mate2._STARgenome @@ -211,7 +204,7 @@ else --alignEndsProtrude 10 ConcordantPair \\ --outFilterIntronMotifs None \\ --sjdbGTFfile {input.gtf} \\ - --outTmpDir ${{TMPDIR}} \\ + --outTmpDir {params.tmpdir} \\ --sjdbOverhang $overhang mkdir -p $(dirname {output.mate1_chimeric_junctions}) touch {output.mate1_chimeric_junctions} @@ -304,19 +297,13 @@ rule star2p: outdir=join(WORKDIR, "results", "{sample}", "STAR2p"), starindexdir=STAR_INDEX_DIR, alignTranscriptsPerReadNmax=config["alignTranscriptsPerReadNmax"], - randomstr=str(uuid.uuid4()), + tmpdir=f"{TEMPDIR}/{str(uuid.uuid4())}", container: config['containers']['star_ucsc_cufflinks'] threads: getthreads("star2p") shell: """ set -exo pipefail -if [ -d /lscratch/${{SLURM_JOB_ID}} ];then - TMPDIR="/lscratch/${{SLURM_JOB_ID}}/{params.randomstr}" -else - TMPDIR="/dev/shm/{params.randomstr}" -fi -if [ ! -d {params.outdir} ];then mkdir {params.outdir};fi limitSjdbInsertNsj=$(wc -l {input.pass1sjtab}|awk '{{print $1+1}}') if [ "$limitSjdbInsertNsj" -lt "400000" ];then limitSjdbInsertNsj="400000";fi @@ -359,7 +346,7 @@ if [ "{params.peorse}" == "PE" ];then --outFilterIntronMotifs None \\ --sjdbGTFfile {input.gtf} \\ --quantMode GeneCounts \\ - --outTmpDir ${{TMPDIR}} \\ + --outTmpDir {params.tmpdir} \\ --sjdbOverhang $overhang \\ --outBAMcompression 0 \\ --outSAMattributes All @@ -404,7 +391,7 @@ else --outFilterIntronMotifs None \\ --sjdbGTFfile {input.gtf} \\ --quantMode GeneCounts \\ - --outTmpDir ${{TMPDIR}} \\ + --outTmpDir {params.tmpdir} \\ --sjdbOverhang $overhang \\ --outBAMcompression 0 \\ --outSAMattributes All @@ -412,35 +399,35 @@ else rm -rf ${{output_prefix}}_STARgenome fi sleep 120 -if [ ! -d $TMPDIR ];then mkdir -p $TMPDIR;fi -samtools view -H {output.unsortedbam} > ${{TMPDIR}}/{params.sample}_p2.non_chimeric.sam -cp ${{TMPDIR}}/{params.sample}_p2.non_chimeric.sam ${{TMPDIR}}/{params.sample}_p2.chimeric.sam +mkdir -p {params.tmpdir} +samtools view -H {output.unsortedbam} > {params.tmpdir}/{params.sample}_p2.non_chimeric.sam +cp {params.tmpdir}/{params.sample}_p2.non_chimeric.sam {params.tmpdir}/{params.sample}_p2.chimeric.sam # ref https://github.com/alexdobin/STAR/issues/678 -samtools view -@ {threads} {output.unsortedbam} | grep "ch:A:1" >> ${{TMPDIR}}/{params.sample}_p2.chimeric.sam -samtools view -@ {threads} {output.unsortedbam} | grep -v "ch:A:1" >> ${{TMPDIR}}/{params.sample}_p2.non_chimeric.sam +samtools view -@ {threads} {output.unsortedbam} | grep "ch:A:1" >> {params.tmpdir}/{params.sample}_p2.chimeric.sam +samtools view -@ {threads} {output.unsortedbam} | grep -v "ch:A:1" >> {params.tmpdir}/{params.sample}_p2.non_chimeric.sam ls -alrth for i in 1 2 3;do - if [ ! -d ${{TMPDIR}}/{params.randomstr}_${{i}} ];then mkdir -p ${{TMPDIR}}/{params.randomstr}_${{i}};fi + mkdir -p {params.tmpdir}_${{i}} done -samtools view -@ {threads} -b -S ${{TMPDIR}}/{params.sample}_p2.chimeric.sam | \\ +samtools view -@ {threads} -b -S {params.tmpdir}/{params.sample}_p2.chimeric.sam | \\ samtools sort \\ -l 9 \\ - -T ${{TMPDIR}}/{params.randomstr}_1 \\ + -T {params.tmpdir}_1 \\ --write-index \\ -@ {threads} \\ --output-fmt BAM \\ -o {output.chimeric_bam} - -samtools view -@ {threads} -b -S ${{TMPDIR}}/{params.sample}_p2.non_chimeric.sam | \\ +samtools view -@ {threads} -b -S {params.tmpdir}/{params.sample}_p2.non_chimeric.sam | \\ samtools sort \\ -l 9 \\ - -T ${{TMPDIR}}/{params.randomstr}_2 \\ + -T {params.tmpdir}_2 \\ --write-index \\ -@ {threads} \\ --output-fmt BAM \\ -o {output.non_chimeric_bam} - samtools sort \\ -l 9 \\ - -T ${{TMPDIR}}/{params.randomstr}_3 \\ + -T {params.tmpdir}_3 \\ --write-index \\ -@ {threads} \\ --output-fmt BAM \\ @@ -479,17 +466,12 @@ rule star_circrnafinder: flanksize=FLANKSIZE, starindexdir=STAR_INDEX_DIR, alignTranscriptsPerReadNmax=config["alignTranscriptsPerReadNmax"], - randomstr=str(uuid.uuid4()), + tmpdir=f"{TEMPDIR}/{str(uuid.uuid4())}", container: config['containers']['star_ucsc_cufflinks'] threads: getthreads("star_circrnafinder") shell: """ set -exo pipefail -if [ -d /lscratch/${{SLURM_JOB_ID}} ];then - TMPDIR="/lscratch/${{SLURM_JOB_ID}}/{params.randomstr}" -else - TMPDIR="/dev/shm/{params.randomstr}" -fi outdir=$(dirname {output.chimericsam}) if [ ! -d $outdir ];then mkdir -p $outdir;fi @@ -514,7 +496,7 @@ if [ "{params.peorse}" == "PE" ];then --outFilterMultimapNmax 2 \\ --outFileNamePrefix {params.sample}. \\ --outBAMcompression 0 \\ - --outTmpDir $TMPDIR \\ + --outTmpDir {params.tmpdir} \\ --sjdbGTFfile {input.gtf} else @@ -536,7 +518,7 @@ else --outFilterMultimapNmax 2 \\ --outFileNamePrefix {params.sample}. \\ --outBAMcompression 0 \\ - --outTmpDir $TMPDIR \\ + --outTmpDir {params.tmpdir} \\ --sjdbGTFfile {input.gtf} fi @@ -571,18 +553,13 @@ rule find_circ_align: sample="{sample}", reffa=REF_FA, peorse=get_peorse, - randomstr=str(uuid.uuid4()), + tmpdir=f"{TEMPDIR}/{str(uuid.uuid4())}", container: config['containers']['star_ucsc_cufflinks'] threads: getthreads("find_circ_align") shell: """ set -exo pipefail -if [ -d /lscratch/${{SLURM_JOB_ID}} ];then - TMPDIR="/lscratch/${{SLURM_JOB_ID}}/{params.randomstr}" -else - TMPDIR="/dev/shm/{params.randomstr}" -fi -if [ ! -d $TMPDIR ];then mkdir -p $TMPDIR;fi +mkdir -p {params.tmpdir} refdir=$(dirname {input.bt2}) outdir=$(dirname {output.anchorsfq}) @@ -598,7 +575,7 @@ bowtie2 \\ -q \\ -1 {input.R1} \\ -2 {input.R2} \\ - > ${{TMPDIR}}/{params.sample}.sam + > {params.tmpdir}/{params.sample}.sam else bowtie2 \\ -p {threads} \\ @@ -608,35 +585,35 @@ bowtie2 \\ -x ${{refdir}}/ref \\ -q \\ -U {input.R1} \\ - > ${{TMPDIR}}/{params.sample}.sam + > {params.tmpdir}/{params.sample}.sam fi -samtools view -@{threads} -hbuS -o ${{TMPDIR}}/{params.sample}.unsorted.bam ${{TMPDIR}}/{params.sample}.sam +samtools view -@{threads} -hbuS -o {params.tmpdir}/{params.sample}.unsorted.bam {params.tmpdir}/{params.sample}.sam samtools sort -@{threads} \\ -u \\ --write-index \\ --output-fmt BAM \\ - -T ${{TMPDIR}}/{params.sample}.samtoolssort \\ - -o ${{TMPDIR}}/{params.sample}.sorted.bam ${{TMPDIR}}/{params.sample}.unsorted.bam + -T {params.tmpdir}/{params.sample}.samtoolssort \\ + -o {params.tmpdir}/{params.sample}.sorted.bam {params.tmpdir}/{params.sample}.unsorted.bam samtools view -@{threads} \\ --output-fmt BAM \\ --write-index \\ - -o ${{TMPDIR}}/{params.sample}.unmapped.bam \\ + -o {params.tmpdir}/{params.sample}.unmapped.bam \\ -f4 \\ - ${{TMPDIR}}/{params.sample}.sorted.bam + {params.tmpdir}/{params.sample}.sorted.bam unmapped2anchors.py \\ - ${{TMPDIR}}/{params.sample}.unmapped.bam | \\ - gzip -c - > ${{TMPDIR}}/{params.sample}.anchors.fastq.gz + {params.tmpdir}/{params.sample}.unmapped.bam | \\ + gzip -c - > {params.tmpdir}/{params.sample}.anchors.fastq.gz -mv ${{TMPDIR}}/{params.sample}.anchors.fastq.gz {output.anchorsfq} -mv ${{TMPDIR}}/{params.sample}.unmapped.b* ${{outdir}}/ +mv {params.tmpdir}/{params.sample}.anchors.fastq.gz {output.anchorsfq} +mv {params.tmpdir}/{params.sample}.unmapped.b* ${{outdir}}/ sleep 300 -rm -rf $TMPDIR +rm -rf {params.tmpdir} """ diff --git a/workflow/rules/create_index.smk b/workflow/rules/create_index.smk index 8cc4db4..419c036 100644 --- a/workflow/rules/create_index.smk +++ b/workflow/rules/create_index.smk @@ -20,7 +20,6 @@ rule create_index: script1=join(SCRIPTS_DIR, "_add_geneid2genepred.py"), script2=join(SCRIPTS_DIR, "_multifasta2separatefastas.sh"), script3=join(SCRIPTS_DIR, "fix_gtfs.py"), - randomstr=str(uuid.uuid4()), nclscan_config=config["nclscan_config"], container: config['containers']['star_ucsc_cufflinks'] threads: getthreads("create_index") diff --git a/workflow/rules/findcircrna.smk b/workflow/rules/findcircrna.smk index a9687dd..16d0c10 100644 --- a/workflow/rules/findcircrna.smk +++ b/workflow/rules/findcircrna.smk @@ -197,24 +197,18 @@ rule circExplorer: minsize_virus=config["minsize_virus"], maxsize_virus=config["maxsize_virus"], bash_script=join(SCRIPTS_DIR,"_run_circExplorer_star.sh"), - randomstr=str(uuid.uuid4()), + tmpdir=f"{TEMPDIR}/{str(uuid.uuid4())}", # script=join(SCRIPTS_DIR, "circExplorer_get_annotated_counts_per_sample.py"), # this produces an annotated counts table to which counts found in BAMs need to be appended threads: getthreads("circExplorer") container: config['containers']["circexplorer"] shell: """ set -exo pipefail -if [ -d /lscratch/${{SLURM_JOB_ID}} ];then - TMPDIR="/lscratch/${{SLURM_JOB_ID}}/{params.randomstr}" -else - TMPDIR="/dev/shm/{params.randomstr}" -fi -if [ ! -d $TMPDIR ];then mkdir -p $TMPDIR;fi -if [ ! -d {params.outdir} ];then mkdir {params.outdir};fi +mkdir -p {params.outdir} {params.tmpdir} cd {params.outdir} bash {params.bash_script} \\ --junctionfile {input.junctionfile} \\ - --tmpdir $TMPDIR \\ + --tmpdir {params.tmpdir} \\ --outdir {params.outdir} \\ --samplename {params.sample} \\ --genepred {params.genepred} \\ @@ -282,18 +276,12 @@ rule ciri: minsize_virus=config["minsize_virus"], maxsize_virus=config["maxsize_virus"], script=join(SCRIPTS_DIR, "filter_ciriout.py"), - randomstr=str(uuid.uuid4()), + tmpdir=f"{TEMPDIR}/{str(uuid.uuid4())}", threads: getthreads("ciri") container: config['containers']['ciri'] shell: """ -set -exo pipefail -if [ -d /lscratch/${{SLURM_JOB_ID}} ];then - TMPDIR="/lscratch/${{SLURM_JOB_ID}}/{params.randomstr}" -else - TMPDIR="/dev/shm/{params.randomstr}" -fi -if [ ! -d $TMPDIR ];then mkdir -p $TMPDIR;fi +mkdir -p {params.outdir} {params.tmpdir} cd {params.outdir} if [ "{params.peorse}" == "PE" ];then ## paired-end @@ -314,8 +302,8 @@ perl {params.ciripl} \\ -F {params.reffa} \\ -A {input.gtf} \\ -G {output.cirilog} -T {threads} -# samtools view -@{threads} -T {params.reffa} -CS {params.sample}.bwa.sam | samtools sort -l 9 -T $TMPDIR --write-index -@{threads} -O CRAM -o {output.ciribam} - -samtools view -@{threads} -bS {params.sample}.bwa.sam | samtools sort -l 9 -T $TMPDIR --write-index -@{threads} -O BAM -o {output.ciribam} - +# samtools view -@{threads} -T {params.reffa} -CS {params.sample}.bwa.sam | samtools sort -l 9 -T {params.tmpdir} --write-index -@{threads} -O CRAM -o {output.ciribam} - +samtools view -@{threads} -bS {params.sample}.bwa.sam | samtools sort -l 9 -T {params.tmpdir} --write-index -@{threads} -O BAM -o {output.ciribam} - rm -rf {params.sample}.bwa.sam python {params.script} \\ --ciriout {output.ciriout} \\ @@ -371,24 +359,19 @@ rule circExplorer_bwa: minsize_virus=config["minsize_virus"], maxsize_virus=config["maxsize_virus"], bash_script=join(SCRIPTS_DIR,"_run_circExplorer_bwa.sh"), - randomstr=str(uuid.uuid4()), + tmpdir=f"{TEMPDIR}/{str(uuid.uuid4())}", # script=join(SCRIPTS_DIR, "circExplorer_get_annotated_counts_per_sample.py"), # this produces an annotated counts table to which counts found in BAMs need to be appended threads: getthreads("circExplorer") container: config['containers']["circexplorer"] shell: """ set -exo pipefail -if [ -d /lscratch/${{SLURM_JOB_ID}} ];then - TMPDIR="/lscratch/${{SLURM_JOB_ID}}/{params.randomstr}" -else - TMPDIR="/dev/shm/{params.randomstr}" -fi -if [ ! -d $TMPDIR ];then mkdir -p $TMPDIR;fi -if [ ! -d {params.outdir} ];then mkdir {params.outdir};fi +mkdir -p {params.tmpdir} {params.outdir} + cd {params.outdir} bash {params.bash_script} \\ --bwabam {input.ciribam} \\ - --tmpdir $TMPDIR \\ + --tmpdir {params.tmpdir} \\ --outdir {params.outdir} \\ --samplename {params.sample} \\ --genepred {params.genepred} \\ @@ -690,7 +673,7 @@ rule dcc: dcc_strandedness=config["dcc_strandedness"], rep=REPEATS_GTF, fa=REF_FA, - randomstr=str(uuid.uuid4()), + tmpdir=f"{TEMPDIR}/{str(uuid.uuid4())}", script=join(SCRIPTS_DIR, "create_dcc_per_sample_counts_table.py"), bsj_min_nreads=config["minreadcount"], refregions=REF_REGIONS, @@ -705,17 +688,12 @@ rule dcc: shell: """ set -exo pipefail -if [ -d /lscratch/${{SLURM_JOB_ID}} ];then - TMPDIR="/lscratch/${{SLURM_JOB_ID}}/{params.randomstr}" -else - TMPDIR="/dev/shm/{params.randomstr}" -fi -if [ ! -d $TMPDIR ];then mkdir -p $TMPDIR;fi +mkdir -p {params.tmpdir} cd $(dirname {output.cr}) if [ "{params.peorse}" == "PE" ];then DCC @{input.ss} \\ - --temp ${{TMPDIR}}/DCC \\ + --temp {params.tmpdir}/DCC \\ --threads {threads} \\ --detect --gene \\ --bam {input.bam} \\ @@ -729,7 +707,7 @@ DCC @{input.ss} \\ -mt2 @{input.m2} else DCC @{input.ss} \\ - --temp ${{TMPDIR}}/DCC \\ + --temp {params.tmpdir}/DCC \\ --threads {threads} \\ --detect --gene \\ --bam {input.bam} \\ @@ -740,12 +718,12 @@ DCC @{input.ss} \\ --refseq {params.fa} fi -ls -alrth ${{TMPDIR}} +ls -alrth {params.tmpdir} -paste {output.cr} {output.linear} | cut -f1-5,9 > ${{TMPDIR}}/CircRNALinearCount +paste {output.cr} {output.linear} | cut -f1-5,9 > {params.tmpdir}/CircRNALinearCount python {params.script} \\ - --CircCoordinates {output.cc} --CircRNALinearCount ${{TMPDIR}}/CircRNALinearCount -o {output.ct} + --CircCoordinates {output.cc} --CircRNALinearCount {params.tmpdir}/CircRNALinearCount -o {output.ct} python {params.script2} \\ --in_dcc_counts_table {output.ct} \\ @@ -846,18 +824,13 @@ rule mapsplice: separate_fastas=join(REF_DIR, "separate_fastas"), ebwt=join(REF_DIR, "separate_fastas_index"), outdir=join(WORKDIR, "results", "{sample}", "MapSplice"), - randomstr=str(uuid.uuid4()), + tmpdir=f"{TEMPDIR}/{str(uuid.uuid4())}", threads: getthreads("mapsplice") container: config['containers']['mapsplice'] shell: """ set -exo pipefail -if [ -d /lscratch/${{SLURM_JOB_ID}} ];then - TMPDIR="/lscratch/${{SLURM_JOB_ID}}/{params.randomstr}" -else - TMPDIR="/dev/shm/{params.randomstr}" -fi -if [ ! -d $TMPDIR ];then mkdir -p $TMPDIR;fi +mkdir -p {params.tmpdir} MSHOME="/opt/MapSplice2" # singularity exec -B /data/Ziegelbauer_lab,/data/kopardevn \ @@ -867,12 +840,12 @@ if [ "{params.peorse}" == "PE" ];then R1fn=$(basename {input.R1}) R2fn=$(basename {input.R2}) -zcat {input.R1} > ${{TMPDIR}}/${{R1fn%.*}} -zcat {input.R2} > ${{TMPDIR}}/${{R2fn%.*}} +zcat {input.R1} > {params.tmpdir}/${{R1fn%.*}} +zcat {input.R2} > {params.tmpdir}/${{R2fn%.*}} python $MSHOME/mapsplice.py \\ - -1 ${{TMPDIR}}/${{R1fn%.*}} \\ - -2 ${{TMPDIR}}/${{R2fn%.*}} \\ + -1 {params.tmpdir}/${{R1fn%.*}} \\ + -2 {params.tmpdir}/${{R2fn%.*}} \\ -c {params.separate_fastas} \\ -p {threads} \\ --min-map-len {params.minmaplen} \\ @@ -887,10 +860,10 @@ python $MSHOME/mapsplice.py \\ else R1fn=$(basename {input.R1}) -zcat {input.R1} > ${{TMPDIR}}/${{R1fn%.*}} +zcat {input.R1} > {params.tmpdir}/${{R1fn%.*}} python $MSHOME/mapsplice.py \ - -1 ${{TMPDIR}}/${{R1fn%.*}} \ + -1 {params.tmpdir}/${{R1fn%.*}} \ -c {params.separate_fastas} \ -p {threads} \ -x {params.ebwt} \ @@ -945,7 +918,7 @@ rule mapsplice_postprocess: params: script=join(SCRIPTS_DIR, "create_mapsplice_per_sample_counts_table.py"), memG=getmemG("mapsplice_postprocess"), - randomstr=str(uuid.uuid4()), + tmpdir=f"{TEMPDIR}/{str(uuid.uuid4())}", bsj_min_nreads=config["minreadcount"], refregions=REF_REGIONS, reffa=REF_FA, @@ -960,12 +933,7 @@ rule mapsplice_postprocess: shell: """ set -exo pipefail -if [ -d /lscratch/${{SLURM_JOB_ID}} ];then - TMPDIR="/lscratch/${{SLURM_JOB_ID}}/{params.randomstr}" -else - TMPDIR="/dev/shm/{params.randomstr}" -fi -if [ ! -d $TMPDIR ];then mkdir -p $TMPDIR;fi +mkdir -p {params.tmpdir} python {params.script} \\ --circularRNAstxt {input.circRNAs} \\ -o {output.ct} \\ @@ -979,8 +947,8 @@ python {params.script} \\ --host_filter_max {params.maxsize_host} \\ --virus_filter_min {params.minsize_virus} \\ --virus_filter_max {params.maxsize_virus} -cd $TMPDIR -samtools view -@{threads} -T {params.reffa} -CS {input.sam} | samtools sort -l 9 -T $TMPDIR --write-index -@{threads} -O CRAM -o alignments.cram - +cd {params.tmpdir} +samtools view -@{threads} -T {params.reffa} -CS {input.sam} | samtools sort -l 9 -T {params.tmpdir} --write-index -@{threads} -O CRAM -o alignments.cram - rsync -az --progress alignments.cram {output.bam} rsync -az --progress alignments.cram.crai {output.bai} """ @@ -1039,7 +1007,7 @@ rule nclscan: peorse=get_peorse, nclscan_config=config["nclscan_config"], script=join(SCRIPTS_DIR, "create_nclscan_per_sample_counts_table.py"), - randomstr=str(uuid.uuid4()), + tmpdir=f"{TEMPDIR}/{str(uuid.uuid4())}", bsj_min_nreads=config["minreadcount"], refregions=REF_REGIONS, host=HOST, @@ -1052,18 +1020,13 @@ rule nclscan: shell: """ set -exo pipefail -if [ -d /lscratch/${{SLURM_JOB_ID}} ];then - TMPDIR="/lscratch/${{SLURM_JOB_ID}}/{params.randomstr}" -else - TMPDIR="/dev/shm/{params.randomstr}" -fi -if [ ! -d $TMPDIR ];then mkdir -p $TMPDIR;fi +mkdir -p {params.tmpdir} outdir=$(dirname {output.result}) results_bn=$(basename {output.result}) if [ "{params.peorse}" == "PE" ];then -NCLscan.py -c {params.nclscan_config} -pj {params.sample} -o $TMPDIR --fq1 {input.R1} --fq2 {input.R2} -rsync -az --progress ${{TMPDIR}}/${{results_bn}} {output.result} +NCLscan.py -c {params.nclscan_config} -pj {params.sample} -o {params.tmpdir} --fq1 {input.R1} --fq2 {input.R2} +rsync -az --progress {params.tmpdir}/${{results_bn}} {output.result} python {params.script} \\ --result {output.result} \\ -o {output.ct} \\ @@ -1124,17 +1087,12 @@ rule circrnafinder: ), params: bsj_min_nreads=config["minreadcount"], - randomstr=str(uuid.uuid4()), + tmpdir=f"{TEMPDIR}/{str(uuid.uuid4())}", container: config['containers']['circRNA_finder'] shell: """ set -exo pipefail -if [ -d /lscratch/${{SLURM_JOB_ID}} ];then - TMPDIR="/lscratch/${{SLURM_JOB_ID}}/{params.randomstr}" -else - TMPDIR="/dev/shm/{params.randomstr}" -fi -if [ ! -d $TMPDIR ];then mkdir -p $TMPDIR;fi +mkdir -p {params.tmpdir} starDir=$(dirname {input.chimericsam}) outDir=$(dirname {output.bed}) @@ -1199,7 +1157,7 @@ rule find_circ: find_circ_params=config['findcirc_params'], min_reads=config['circexplorer_bsj_circRNA_min_reads'], collapse_script=join(SCRIPTS_DIR,"_collapse_find_circ.py"), - randomstr=str(uuid.uuid4()), + tmpdir=f"{TEMPDIR}/{str(uuid.uuid4())}", container: config['containers']['star_ucsc_cufflinks'] threads: getthreads("find_circ") shell: @@ -1207,14 +1165,8 @@ rule find_circ: set -exo pipefail python --version which python -if [ -d /lscratch/${{SLURM_JOB_ID}} ];then - TMPDIR="/lscratch/${{SLURM_JOB_ID}}/{params.randomstr}" -else - TMPDIR="/dev/shm/{params.randomstr}" -fi - -if [ ! -d $TMPDIR ]; then mkdir -p $TMPDIR;fi -cd $TMPDIR +mkdir -p {params.tmpdir} +cd {params.tmpdir} refdir=$(dirname {input.bt2}) outdir=$(dirname {output.find_circ_bsj_bed}) @@ -1232,41 +1184,41 @@ outdir=$(dirname {output.find_circ_bsj_bed}) # These _A and _B pairs should be retained in the fastq splits # find number of lines in fastq file -cp {input.anchorsfq} ${{TMPDIR}} +cp {input.anchorsfq} {params.tmpdir} fname=$(basename {input.anchorsfq}) fname_wo_gz=$(echo $fname|sed "s/.gz//g") pigz -d $fname total_lines=$(wc -l ${{fname_wo_gz}} | awk '{{print $1}}') split_nlines=$(echo $total_lines| awk '{{print sprintf("%d", $1/10)}}' | awk '{{print sprintf("%d",($1+7)/8+1)}}' | awk '{{print sprintf("%d",$1*8)}}') -split -d -l $split_nlines --suffix-length 1 $fname_wo_gz ${{TMPDIR}}/{params.sample}.samsplit. +split -d -l $split_nlines --suffix-length 1 $fname_wo_gz {params.tmpdir}/{params.sample}.samsplit. -if [ -f ${{TMPDIR}}/do_find_circ ];then rm -f ${{TMPDIR}}/do_find_circ;fi +if [ -f {params.tmpdir}/do_find_circ ];then rm -f {params.tmpdir}/do_find_circ;fi for i in $(seq 0 9);do bowtie2 -p {threads} \\ --score-min=C,-15.0 \\ --reorder --mm \\ - -q -U ${{TMPDIR}}/{params.sample}.samsplit.${{i}} \\ - -x ${{refdir}}/ref > ${{TMPDIR}}/{params.sample}.samsplit.${{i}}.sam + -q -U {params.tmpdir}/{params.sample}.samsplit.${{i}} \\ + -x ${{refdir}}/ref > {params.tmpdir}/{params.sample}.samsplit.${{i}}.sam -cat <>${{TMPDIR}}/do_find_circ -cat ${{TMPDIR}}/{params.sample}.samsplit.${{i}}.sam | \\ +cat <>{params.tmpdir}/do_find_circ +cat {params.tmpdir}/{params.sample}.samsplit.${{i}}.sam | \\ find_circ.py \\ --genome={params.reffa} \\ --prefix={params.sample}.find_circ \\ --name={params.sample} \\ {params.find_circ_params} \\ --stats=${{outdir}}/{params.sample}.bowtie2_stats.${{i}}.txt \\ - --reads=${{TMPDIR}}/{params.sample}.bowtie2_spliced_reads.${{i}}.fa \\ - > ${{TMPDIR}}/{params.sample}.splice_sites.${{i}}.bed + --reads={params.tmpdir}/{params.sample}.bowtie2_spliced_reads.${{i}}.fa \\ + > {params.tmpdir}/{params.sample}.splice_sites.${{i}}.bed EOF done -parallel -j 10 < ${{TMPDIR}}/do_find_circ +parallel -j 10 < {params.tmpdir}/do_find_circ -cat ${{TMPDIR}}/{params.sample}.splice_sites.*.bed > ${{TMPDIR}}/{params.sample}.splice_sites.bed +cat {params.tmpdir}/{params.sample}.splice_sites.*.bed > {params.tmpdir}/{params.sample}.splice_sites.bed -grep CIRCULAR ${{TMPDIR}}/{params.sample}.splice_sites.bed | \\ +grep CIRCULAR {params.tmpdir}/{params.sample}.splice_sites.bed | \\ grep ANCHOR_UNIQUE \\ > {output.find_circ_bsj_bed} diff --git a/workflow/rules/init.smk b/workflow/rules/init.smk index 8429588..9aea06c 100644 --- a/workflow/rules/init.smk +++ b/workflow/rules/init.smk @@ -86,6 +86,10 @@ def _convert_to_int(variable): # resource absolute path WORKDIR = config["workdir"] +TEMPDIR = config["tempdir"] +if not os.access(TEMPDIR, os.W_OK): + raise PermissionError(f"TEMPDIR {TEMPDIR} cannot be written to.\n\tHint: does the path exist and do you have write permissions?") + SCRIPTS_DIR = config["scriptsdir"] RESOURCES_DIR = config["resourcesdir"] FASTAS_GTFS_DIR = config["fastas_gtfs_dir"] @@ -106,6 +110,7 @@ MAPSPLICE_FILTERING = config["mapsplice_filtering"] FLANKSIZE = config['flanksize'] HQCC=config["high_confidence_core_callers"].replace(" ","") CALLERS=["circExplorer","ciri","circExplorer_bwa"] + # if RUN_CLEAR: CALLERS.append("clear") if RUN_DCC: CALLERS.append("dcc") if RUN_MAPSPLICE: CALLERS.append("mapsplice") diff --git a/workflow/rules/post_findcircrna_processing.smk b/workflow/rules/post_findcircrna_processing.smk index 7eeebb0..8368811 100644 --- a/workflow/rules/post_findcircrna_processing.smk +++ b/workflow/rules/post_findcircrna_processing.smk @@ -61,18 +61,13 @@ rule create_circExplorer_BSJ_bam: scriptse=join(SCRIPTS_DIR, "_create_circExplorer_BSJ_bam_se.py"), flankscript=join(SCRIPTS_DIR, "_append_splice_site_flanks_to_BSJs.py"), bam2bwscript=join(SCRIPTS_DIR, "bam_to_bigwig.sh"), - randomstr=str(uuid.uuid4()), + tmpdir=f"{TEMPDIR}/{str(uuid.uuid4())}", container: config['containers']["star_ucsc_cufflinks"] threads: getthreads("create_circExplorer_BSJ_bam") shell: """ set -exo pipefail -if [ -d /lscratch/${{SLURM_JOB_ID}} ];then - TMPDIR="/lscratch/${{SLURM_JOB_ID}}/{params.randomstr}" -else - TMPDIR="/dev/shm/{params.randomstr}" -fi -if [ ! -d $TMPDIR ];then mkdir -p $TMPDIR;fi +mkdir -p {params.tmpdir} outdir=$(dirname {output.BSJbam}) BSJbedbn=$(basename {output.BSJbed}) @@ -82,59 +77,59 @@ if [ "{params.peorse}" == "PE" ];then python3 {params.scriptpe} \\ --inbam {input.chimericbam} \\ --sample_counts_table {input.countstable} \\ - --plusbam ${{TMPDIR}}/{params.sample}.BSJ.plus.bam \\ - --minusbam ${{TMPDIR}}/{params.sample}.BSJ.minus.bam \\ - --outbam ${{TMPDIR}}/{params.sample}.BSJ.bam \\ - --bed ${{TMPDIR}}/${{BSJbedbn}} \\ + --plusbam {params.tmpdir}/{params.sample}.BSJ.plus.bam \\ + --minusbam {params.tmpdir}/{params.sample}.BSJ.minus.bam \\ + --outbam {params.tmpdir}/{params.sample}.BSJ.bam \\ + --bed {params.tmpdir}/${{BSJbedbn}} \\ --sample_name {params.sample} \\ --junctionsfound {output.BSJfoundcounts} \\ --regions {params.refregions} \\ --host "{params.host}" \\ --additives "{params.additives}" \\ --viruses "{params.viruses}" \\ - --outputhostbams --outputvirusbams --outdir $TMPDIR + --outputhostbams --outputvirusbams --outdir {params.tmpdir} else python3 {params.scriptse} \\ --inbam {input.chimericbam} \\ --sample_counts_table {input.countstable} \\ - --plusbam ${{TMPDIR}}/{params.sample}.BSJ.plus.bam \\ - --minusbam ${{TMPDIR}}/{params.sample}.BSJ.minus.bam \\ - --outbam ${{TMPDIR}}/{params.sample}.BSJ.bam \\ - --bed ${{TMPDIR}}/${{BSJbedbn}} \\ + --plusbam {params.tmpdir}/{params.sample}.BSJ.plus.bam \\ + --minusbam {params.tmpdir}/{params.sample}.BSJ.minus.bam \\ + --outbam {params.tmpdir}/{params.sample}.BSJ.bam \\ + --bed {params.tmpdir}/${{BSJbedbn}} \\ --sample_name {params.sample} \\ --junctionsfound {output.BSJfoundcounts} \\ --regions {params.refregions} \\ --host "{params.host}" \\ --additives "{params.additives}" \\ --viruses "{params.viruses}" \\ - --outputhostbams --outputvirusbams --outdir $TMPDIR + --outputhostbams --outputvirusbams --outdir {params.tmpdir} fi -samtools sort -l 9 -T $TMPDIR --write-index -@{threads} -O BAM -o {output.plusBSJbam} ${{TMPDIR}}/{params.sample}.BSJ.plus.bam -samtools sort -l 9 -T $TMPDIR --write-index -@{threads} -O BAM -o {output.minusBSJbam} ${{TMPDIR}}/{params.sample}.BSJ.minus.bam -samtools sort -l 9 -T $TMPDIR --write-index -@{threads} -O BAM -o {output.BSJbam} ${{TMPDIR}}/{params.sample}.BSJ.bam +samtools sort -l 9 -T {params.tmpdir} --write-index -@{threads} -O BAM -o {output.plusBSJbam} {params.tmpdir}/{params.sample}.BSJ.plus.bam +samtools sort -l 9 -T {params.tmpdir} --write-index -@{threads} -O BAM -o {output.minusBSJbam} {params.tmpdir}/{params.sample}.BSJ.minus.bam +samtools sort -l 9 -T {params.tmpdir} --write-index -@{threads} -O BAM -o {output.BSJbam} {params.tmpdir}/{params.sample}.BSJ.bam for b in {output.plusBSJbam} {output.minusBSJbam} {output.BSJbam} # for b in {output.plusBSJbam} {output.minusBSJbam} do - bash {params.bam2bwscript} $b $TMPDIR + bash {params.bam2bwscript} $b {params.tmpdir} done for i in $(echo {params.host}|tr ',' ' ');do - samtools sort -l 9 -T $TMPDIR --write-index -@{threads} -O BAM -o ${{outdir}}/{params.sample}.${{i}}.BSJ.bam ${{TMPDIR}}/{params.sample}.${{i}}.BSJ.bam - bash {params.bam2bwscript} ${{outdir}}/{params.sample}.${{i}}.BSJ.bam $TMPDIR + samtools sort -l 9 -T {params.tmpdir} --write-index -@{threads} -O BAM -o ${{outdir}}/{params.sample}.${{i}}.BSJ.bam {params.tmpdir}/{params.sample}.${{i}}.BSJ.bam + bash {params.bam2bwscript} ${{outdir}}/{params.sample}.${{i}}.BSJ.bam {params.tmpdir} done for i in $(echo {params.viruses}|tr ',' ' ');do - samtools sort -l 9 -T $TMPDIR --write-index -@{threads} -O BAM -o ${{outdir}}/{params.sample}.${{i}}.BSJ.bam ${{TMPDIR}}/{params.sample}.${{i}}.BSJ.bam - bash {params.bam2bwscript} ${{outdir}}/{params.sample}.${{i}}.BSJ.bam $TMPDIR + samtools sort -l 9 -T {params.tmpdir} --write-index -@{threads} -O BAM -o ${{outdir}}/{params.sample}.${{i}}.BSJ.bam {params.tmpdir}/{params.sample}.${{i}}.BSJ.bam + bash {params.bam2bwscript} ${{outdir}}/{params.sample}.${{i}}.BSJ.bam {params.tmpdir} done -python3 {params.flankscript} --reffa {params.reffa} --inbsjbedgz ${{TMPDIR}}/${{BSJbedbn}} --outbsjbedgz {output.BSJbed} +python3 {params.flankscript} --reffa {params.reffa} --inbsjbedgz {params.tmpdir}/${{BSJbedbn}} --outbsjbedgz {output.BSJbed} -rm -rf $TMPDIR +rm -rf {params.tmpdir} """ @@ -209,19 +204,14 @@ rule create_circExplorer_linear_spliced_bams: peorse=get_peorse, bashscript=join(SCRIPTS_DIR, "_create_circExplorer_linear_bam.v2.sh"), outdir=join(WORKDIR, "results", "{sample}", "circExplorer"), - randomstr=str(uuid.uuid4()), + tmpdir=f"{TEMPDIR}/{str(uuid.uuid4())}", container: config['containers']["star_ucsc_cufflinks"] threads: getthreads("create_circExplorer_linear_spliced_bams") shell: """ set -exo pipefail -if [ -d /lscratch/${{SLURM_JOB_ID}} ];then - TMPDIR="/lscratch/${{SLURM_JOB_ID}}/{params.randomstr}" -else - TMPDIR="/dev/shm/{params.randomstr}" -fi -if [ -d $TMPDIR ];then rm -rf $TMPDIR;fi -mkdir -p $TMPDIR +if [ -d {params.tmpdir} ];then rm -rf {params.tmpdir};fi +mkdir -p {params.tmpdir} cd {params.outdir} @@ -232,7 +222,7 @@ bash {params.bashscript} \\ --samplename {params.sample} \\ --peorse {params.peorse} \\ --bsjbed {input.bsjbedgz} \\ - --tmpdir $TMPDIR \\ + --tmpdir {params.tmpdir} \\ --rid2jid {output.rid2jid} \\ --filteredbam {output.filtered_bam} \\ --linearbsjlist {output.linear_readids} \\ @@ -247,7 +237,7 @@ bash {params.bashscript} \\ --linearbam {output.linear_bam} \\ --splicedbam {output.spliced_bam} \\ --threads {threads} -rm -rf $TMPDIR +rm -rf {params.tmpdir} """ @@ -290,16 +280,11 @@ rule create_circExplorer_merged_found_counts_table: SCRIPTS_DIR, "create_circExplorer_per_sample_counts_table.py" ), outdir=join(WORKDIR, "results", "{sample}", "circExplorer"), - randomstr=str(uuid.uuid4()), + tmpdir=f"{TEMPDIR}/{str(uuid.uuid4())}", shell: """ set -exo pipefail -if [ -d /lscratch/${{SLURM_JOB_ID}} ];then - TMPDIR="/lscratch/${{SLURM_JOB_ID}}" -else - TMPDIR="/dev/shm/{params.randomstr}" - mkdir -p $TMPDIR -fi +mkdir -p {params.tmpdir} python3 {params.pythonscript} \\ -b {input.bsj_found_counts} \\ -l {input.linear_spliced_counts} \\ @@ -312,32 +297,6 @@ python3 {params.pythonscript2} \\ """ -# localrules: create_circExplorer_per_sample_counts_table -# rule create_circExplorer_per_sample_counts_table: -# input: -# annotation_counts=rules.circExplorer.output.annotation_counts_table, -# found_counts=rules.create_circExplorer_merged_found_counts_table.output.found_counts_table -# output: -# count_counts_table=join(WORKDIR,"results","{sample}","circExplorer","{sample}.circExplorer.counts_table.tsv") -# params: -# sample="{sample}", -# pythonscript=join(SCRIPTS_DIR,"create_circExplorer_per_sample_counts_table.py"), -# outdir=join(WORKDIR,"results","{sample}","circExplorer"), -# randomstr=str(uuid.uuid4()) -# shell:""" -# set -exo pipefail -# if [ -d /lscratch/${{SLURM_JOB_ID}} ];then -# TMPDIR="/lscratch/${{SLURM_JOB_ID}}" -# else -# TMPDIR="/dev/shm/{params.randomstr}" -# mkdir -p $TMPDIR -# fi -# python3 {params.pythonscript} \\ -# --annotationcounts {input.annotation_counts} \\ -# --allfoundcounts {input.found_counts} \\ -# --countstable {output.count_counts_table} -# """ - if RUN_MAPSPLICE: rule alignment_stats: @@ -357,32 +316,27 @@ if RUN_MAPSPLICE: peorse=get_peorse, run_mapsplice=N_RUN_MAPSPLICE, bash2nreads_pyscript=join(SCRIPTS_DIR, "_bam_get_alignment_stats.py"), - randomstr=str(uuid.uuid4()), + tmpdir=f"{TEMPDIR}/{str(uuid.uuid4())}", threads: getthreads("alignment_stats") container: config['containers']["base"] shell: """ set -exo pipefail - if [ -d /lscratch/${{SLURM_JOB_ID}} ];then - TMPDIR="/lscratch/${{SLURM_JOB_ID}}" - else - TMPDIR="/dev/shm/{params.randomstr}" - mkdir -p $TMPDIR - fi + mkdir -p {params.tmpdir} for bamfile in {input};do bamfile_bn=$(basename $bamfile) if [ "{params.peorse}" == "PE" ];then - echo "python3 {params.bash2nreads_pyscript} --inbam $bamfile --regions {params.regions} --pe > ${{TMPDIR}}/${{bamfile_bn}}.counts" + echo "python3 {params.bash2nreads_pyscript} --inbam $bamfile --regions {params.regions} --pe > {params.tmpdir}/${{bamfile_bn}}.counts" else - echo "python3 {params.bash2nreads_pyscript} --inbam $bamfile --regions {params.regions} > ${{TMPDIR}}/${{bamfile_bn}}.counts" + echo "python3 {params.bash2nreads_pyscript} --inbam $bamfile --regions {params.regions} > {params.tmpdir}/${{bamfile_bn}}.counts" fi - done > ${{TMPDIR}}/do_bamstats - parallel -j 2 < ${{TMPDIR}}/do_bamstats + done > {params.tmpdir}/do_bamstats + parallel -j 2 < {params.tmpdir}/do_bamstats print_bam_results () {{ bamfile=$1 bamfile_bn=$(basename $bamfile) - stats_file=${{TMPDIR}}/${{bamfile_bn}}.counts + stats_file={params.tmpdir}/${{bamfile_bn}}.counts prefix=$2 while read b a;do echo -ne "${{prefix}}_${{a}}\\t${{b}}\\n";done < $stats_file }} @@ -412,32 +366,27 @@ else: peorse=get_peorse, run_mapsplice=N_RUN_MAPSPLICE, bash2nreads_pyscript=join(SCRIPTS_DIR, "_bam_get_alignment_stats.py"), - randomstr=str(uuid.uuid4()), + tmpdir=f"{TEMPDIR}/{str(uuid.uuid4())}", threads: getthreads("alignment_stats") container: config['containers']["base"] shell: """ set -exo pipefail - if [ -d /lscratch/${{SLURM_JOB_ID}} ];then - TMPDIR="/lscratch/${{SLURM_JOB_ID}}" - else - TMPDIR="/dev/shm/{params.randomstr}" - mkdir -p $TMPDIR - fi + mkdir -p {params.tmpdir} for bamfile in {input};do bamfile_bn=$(basename $bamfile) if [ "{params.peorse}" == "PE" ];then - echo "python3 {params.bash2nreads_pyscript} --inbam $bamfile --regions {params.regions} --pe > ${{TMPDIR}}/${{bamfile_bn}}.counts" + echo "python3 {params.bash2nreads_pyscript} --inbam $bamfile --regions {params.regions} --pe > {params.tmpdir}/${{bamfile_bn}}.counts" else - echo "python3 {params.bash2nreads_pyscript} --inbam $bamfile --regions {params.regions} > ${{TMPDIR}}/${{bamfile_bn}}.counts" + echo "python3 {params.bash2nreads_pyscript} --inbam $bamfile --regions {params.regions} > {params.tmpdir}/${{bamfile_bn}}.counts" fi - done > ${{TMPDIR}}/do_bamstats - parallel -j 2 < ${{TMPDIR}}/do_bamstats + done > {params.tmpdir}/do_bamstats + parallel -j 2 < {params.tmpdir}/do_bamstats print_bam_results () {{ bamfile=$1 bamfile_bn=$(basename $bamfile) - stats_file=${{TMPDIR}}/${{bamfile_bn}}.counts + stats_file={params.tmpdir}/${{bamfile_bn}}.counts prefix=$2 while read b a;do echo -ne "${{prefix}}_${{a}}\\t${{b}}\\n";done < $stats_file }} @@ -467,25 +416,21 @@ rule merge_alignment_stats: output: join(WORKDIR, "results", "alignmentstats.txt"), params: - randomstr=str(uuid.uuid4()), + tmpdir=f"{TEMPDIR}/{str(uuid.uuid4())}", shell: """ set -exo pipefail -if [ -d /lscratch/${{SLURM_JOB_ID}} ];then - TMPDIR="/lscratch/${{SLURM_JOB_ID}}" -else - TMPDIR="/dev/shm/{params.randomstr}" - mkdir -p $TMPDIR -fi +mkdir -p {params.tmpdir} + count=0 for f in {input};do count=$((count+1)) if [ "$count" == "1" ];then cp $f {output} else - cut -f2 $f > ${{TMPDIR}}/${{count}} - paste {output} ${{TMPDIR}}/${{count}} > ${{TMPDIR}}/${{count}}.tmp - mv ${{TMPDIR}}/${{count}}.tmp {output} + cut -f2 $f > {params.tmpdir}/${{count}} + paste {output} {params.tmpdir}/${{count}} > {params.tmpdir}/${{count}}.tmp + mv {params.tmpdir}/${{count}}.tmp {output} fi done """ diff --git a/workflow/rules/preprocessing.smk b/workflow/rules/preprocessing.smk index e3a83f7..1abeaed 100644 --- a/workflow/rules/preprocessing.smk +++ b/workflow/rules/preprocessing.smk @@ -30,23 +30,14 @@ rule cutadapt: cutadapt_O=config["cutadapt_O"], cutadapt_q=config["cutadapt_q"], adapters=join(RESOURCES_DIR, "TruSeq_and_nextera_adapters.consolidated.fa"), - randomstr=str(uuid.uuid4()), + tmpdir=f"{TEMPDIR}/{str(uuid.uuid4())}", container: config['containers']['cutadapt'] threads: getthreads("cutadapt") shell: """ set -exo pipefail - # set TMPDIR - if [ -d /lscratch/${{SLURM_JOB_ID}} ];then - TMPDIR="/lscratch/${{SLURM_JOB_ID}}/{params.randomstr}" - else - TMPDIR="/dev/shm/{params.randomstr}" - fi - if [ ! -d $TMPDIR ];then mkdir -p $TMPDIR;fi - - if [ ! -d {params.outdir} ];then mkdir {params.outdir};fi - + mkdir -p {params.tmpdir} of1bn=$(basename {output.of1}) of2bn=$(basename {output.of2}) @@ -61,14 +52,14 @@ rule cutadapt: -b file:{params.adapters} \\ -B file:{params.adapters} \\ -j {threads} \\ - -o ${{TMPDIR}}/${{of1bn}} -p ${{TMPDIR}}/${{of2bn}} \\ + -o {params.tmpdir}/${{of1bn}} -p {params.tmpdir}/${{of2bn}} \\ {input.R1} {input.R2} # filter for average read quality fastq-filter \\ -q {params.cutadapt_q} \\ -o {output.of1} -o {output.of2} \\ - ${{TMPDIR}}/${{of1bn}} ${{TMPDIR}}/${{of2bn}} + {params.tmpdir}/${{of1bn}} {params.tmpdir}/${{of2bn}} else ## Single-end @@ -80,7 +71,7 @@ rule cutadapt: -q {params.cutadapt_q},{params.cutadapt_q} -m {params.cutadapt_min_length} \\ -b file:{params.adapters} \\ -j {threads} \\ - -o ${{TMPDIR}}/${{of1bn}} \\ + -o {params.tmpdir}/${{of1bn}} \\ {input.R1} touch {output.of2} @@ -89,7 +80,7 @@ rule cutadapt: fastq-filter \\ -q {params.cutadapt_q} \\ -o {output.of1} \\ - ${{TMPDIR}}/${{of1bn}} + {params.tmpdir}/${{of1bn}} fi """