diff --git a/bio/bowtie2/align/environment.linux-64.pin.txt b/bio/bowtie2/align/environment.linux-64.pin.txt index f60ee3221ab..208d5ee4af3 100644 --- a/bio/bowtie2/align/environment.linux-64.pin.txt +++ b/bio/bowtie2/align/environment.linux-64.pin.txt @@ -1,46 +1,88 @@ # This file may be used to create an environment using: # $ conda create --name --file # platform: linux-64 +# created-by: conda 24.9.2 @EXPLICIT https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 -https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.8.30-hbcca054_0.conda#c27d1c142233b5bc9ca570c6e2e0c244 -https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-hf3520f5_7.conda#b80f2f396ca2c28b8c14c437a4ed1e74 -https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h8827d51_1.conda#8bfdead4e0fff0383ae4c9c50d0531bd -https://conda.anaconda.org/conda-forge/linux-64/libgomp-14.1.0-h77fa898_1.conda#23c255b008c4f2ae008f81edcabaca89 +https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.12.14-hbcca054_0.conda#720523eb0d6a9b0f6120c16b2aa4e7de +https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb +https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_3.conda#49023d73832ef61042f6a237cb2687e7 +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.43-h712a8e2_2.conda#048b02e3962f066da18efe3a21b77672 +https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.13-5_cp313.conda#381bbd2a92c863f640a55b6ff3c35161 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024b-hc8b5060_0.conda#8ac3367aafb1cc0a068483c580af8015 +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 +https://conda.anaconda.org/conda-forge/linux-64/libgomp-14.2.0-h77fa898_1.conda#cc3573974587f12dda90d96e3e55a702 https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d -https://conda.anaconda.org/conda-forge/linux-64/libgcc-14.1.0-h77fa898_1.conda#002ef4463dd1e2b44a94a4ace468f5d2 -https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.3-h5888daf_0.conda#59f4c43bb1b5ef1c71946ff2cbf59524 -https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.1.0-h69a702a_1.conda#1efc0ad219877a73ef977af7dbb51f17 -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-14.1.0-hc0a3c3a_1.conda#9dbb9699ea467983ba8a4ba89b08b066 -https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.2-hb9d3cd8_0.conda#4d638782050ab6faa27275bed57e9b4e +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab +https://conda.anaconda.org/conda-forge/linux-64/libgcc-14.2.0-h77fa898_1.conda#3cb76c3f10d3bc7f1105b2fc9db984df +https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.13-hb9d3cd8_0.conda#ae1370588aa6a5157c34c73e9bbb36a0 +https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.34.4-hb9d3cd8_0.conda#e2775acf57efd5af15b8e3d1d74d72d3 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.22-hb9d3cd8_0.conda#b422943d5d772b7cc858b36ad2a92db5 +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.4-h5888daf_0.conda#db833e03127376d461e1e13e76f09b6c +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.2.0-h69a702a_1.conda#e39480b9ca41323497b05492a63bc35b +https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.6.3-hb9d3cd8_1.conda#2ecf2f1c7e4e21fcfe6423a51a992d84 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-14.2.0-hc0a3c3a_1.conda#234a5554c53625688d51062645337328 +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda#edb0dca6bc32e4f4789199455a1dbeb8 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.4.0-hb9d3cd8_0.conda#23cc74f77eb99315c0360ec3533147a9 +https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-hb9d3cd8_1002.conda#b3c17d95b5a10c6e64a21fa17573e70e +https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.2-hb9d3cd8_0.conda#fb901ff28063514abb6046c9ec2c4a45 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.12-hb9d3cd8_0.conda#f6ebe2cb3f82ba6c057dde5d9debe4f7 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.5-hb9d3cd8_0.conda#8035c64cb77ed555e3f150b7b3972480 https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h4bc722e_7.conda#62ee74e96c5ebb0af99386de58cf9553 -https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.33.1-heb4867d_0.conda#0d3c60291342c0c025db231353376dfb +https://conda.anaconda.org/conda-forge/linux-64/giflib-5.2.2-hd590300_0.conda#3bf7b9fd5a7136126e0234db4b87c8b6 https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.20-hd590300_0.conda#8e88f9389f1165d7c0936fe40d9a9a79 https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda#172bf1cd1ff8629f2b1179945ed45055 https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 -https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7 -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-14.1.0-h4852527_1.conda#bd2598399a70bb86d8218e95548d735e +https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda#d66573916ffcf376178462f1b61c941e +https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda#ea25936bb4080d843790b586850f82b8 +https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-h4bc722e_0.conda#aeb98fdeb2e8f25d43ef71fbacbeec80 +https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.44-hadc24fc_0.conda#f4cc49d7aa68316213e4b12be35308d1 +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.47.2-hee588c1_0.conda#b58da17db24b6e08bcbf8fed2fb8c915 +https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.1-hf672d98_0.conda#be2de152d8073ef1c01b7728475f2fe7 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-14.2.0-h4852527_1.conda#8371ac6457591af2cf6159439c1fd051 https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b +https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.4.0-hd590300_0.conda#b26e8aa824079e1be0294e7152ca4559 +https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.17.0-h8a09558_0.conda#92ed62436b625154323d40d5f2f11dd7 https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc -https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-h4ab18f5_1.conda#57d7dc60e9325e3de37ff8dffd18e814 https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-he02047a_1.conda#70caf8bb6cf39a0b6b7efc885f51c0fe -https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 +https://conda.anaconda.org/conda-forge/linux-64/pixman-0.44.2-h29eaf8c_0.conda#5e2a7acfa2c24188af39e7944e1b3604 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc +https://conda.anaconda.org/conda-forge/linux-64/zlib-1.3.1-hb9d3cd8_2.conda#c9f075ab2f33b3bbee9e62d4ad0a6cd8 +https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb +https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c +https://conda.anaconda.org/conda-forge/linux-64/icu-75.1-he02047a_0.conda#8b189310083baabfb622af68fd9d3ae3 +https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 -https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.58.0-h47da74e_1.conda#700ac6ea6d53d5510591c4344d5c989a -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.46.1-hadc24fc_0.conda#36f79405ab16bf271edb55b213836dac -https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.0-h0841786_0.conda#1f5a58e686b13bcfde88b93f547d23fe +https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.64.0-h161d5f1_0.conda#19e57602824042dfd0446292ef90488b +https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.44-hba22ea6_2.conda#df359c09c41cd186fffb93a2d87aa6f5 https://conda.anaconda.org/conda-forge/linux-64/perl-5.32.1-7_hd590300_perl5.conda#f2cfec9406850991f4e3d960cc9e3321 https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 -https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc +https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.5-he73a12e_0.conda#4c3e9fab69804ec6077697922d70c6e2 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.10-h4f16b4b_1.conda#125f34a17d7b4bea418a83904ea82ea6 https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.6-ha6fb4c9_0.conda#4d056880988120e29d75bfff282e0f45 +https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.15.0-h7e30c49_1.conda#8f5b0b297b59e1ac160ad4beec99dbee https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda#3f43953b7d3fb3aaa1d0d0723d91e368 -https://conda.anaconda.org/conda-forge/linux-64/python-3.12.5-h2ad013b_0_cpython.conda#9c56c4df45f6571b13111d8df2448692 -https://conda.anaconda.org/bioconda/linux-64/bowtie2-2.5.4-h7071971_4.tar.bz2#69822858766e6c8b12ae90d78d54d8ea -https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.10.0-hbbe4b11_0.conda#657ea309ad90675ef144e7d27a271ab9 -https://conda.anaconda.org/conda-forge/noarch/setuptools-73.0.1-pyhd8ed1ab_0.conda#f0b618d7673d1b2464f600b34d912f6f +https://conda.anaconda.org/conda-forge/linux-64/libglib-2.82.2-h2ff4ddf_0.conda#13e8e54035ddd2b91875ba399f0f7c04 +https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.7.0-hc4654cb_2.conda#be54fb40ea32e8fe9dbaa94d4528b57e +https://conda.anaconda.org/conda-forge/linux-64/python-3.13.1-ha99a958_102_cp313.conda#6e7535f1d1faf524e9210d2689b3149b +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.6-hb9d3cd8_0.conda#febbab7d15033c913d53c7a2c102309d +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxfixes-6.0.1-hb9d3cd8_0.conda#4bdb303603e9821baf5fe5fdff1dc8f8 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.12-hb9d3cd8_0.conda#96d57aba173e878a2089d5638016dc5e +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxt-1.3.1-hb9d3cd8_0.conda#279b0de5f6ba95457190a1c459a64e31 +https://conda.anaconda.org/bioconda/linux-64/bowtie2-2.5.4-he96a11b_5.tar.bz2#16af1b2c107e07dde4a6d98e44e72cd3 +https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.2-h3394656_1.conda#b34c2833a1f56db610aeb27f206d800d +https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda#51bb7010fc86f70eee639b4bb7a894f5 +https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3 +https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.11.1-h332b0f4_0.conda#2b3e0081006dc21e8bf53a91c83a055c +https://conda.anaconda.org/conda-forge/noarch/pip-24.3.1-pyh145f28c_2.conda#76601b0ccfe1fe13a21a5f8813cb38de https://conda.anaconda.org/bioconda/noarch/snakemake-wrapper-utils-0.6.2-pyhdfd78af_0.tar.bz2#fd8759bbd04116eace828c4fab906096 -https://conda.anaconda.org/conda-forge/noarch/wheel-0.44.0-pyhd8ed1ab_0.conda#d44e3b085abcaef02983c6305b84b584 -https://conda.anaconda.org/bioconda/linux-64/htslib-1.21-h5efdd21_0.tar.bz2#06b995dc2244c024b45bbb3e53ae2f27 -https://conda.anaconda.org/conda-forge/noarch/pip-24.2-pyh8b19718_1.conda#6c78fbb8ddfd64bcb55b5cbafd2d2c43 -https://conda.anaconda.org/bioconda/linux-64/samtools-1.21-h50ea8bc_0.tar.bz2#4a7fe11223f61cb2d950ed54e20c12ce +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxi-1.8.2-hb9d3cd8_0.conda#17dcc85db3c7886650b8908b183d6876 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrandr-1.5.4-hb9d3cd8_0.conda#2de7f99d6581a4a7adbff607b5c278ca +https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-10.1.0-h0b3b770_0.conda#ab1d7d56034814f4c3ed9f69f8c68806 +https://conda.anaconda.org/bioconda/linux-64/htslib-1.21-h566b1c6_1.tar.bz2#944598fba531a668e8fafea92ca39bb4 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxtst-1.2.5-hb9d3cd8_3.conda#7bbe9a0cc0df0ac5f5a8ad6d6a11af2f +https://conda.anaconda.org/conda-forge/linux-64/openjdk-23.0.1-h68779a4_1.conda#eae06cb5a47244d3f4659f366015a85b +https://conda.anaconda.org/bioconda/linux-64/samtools-1.21-h96c455f_1.tar.bz2#0ff9d5d48561198378ad3cb34ce830bf +https://conda.anaconda.org/bioconda/noarch/picard-slim-3.3.0-hdfd78af_0.tar.bz2#fab4a4639fd22cd155b740fce3064944 diff --git a/bio/bowtie2/align/environment.yaml b/bio/bowtie2/align/environment.yaml index d961b36c609..7dc5dc02174 100644 --- a/bio/bowtie2/align/environment.yaml +++ b/bio/bowtie2/align/environment.yaml @@ -4,5 +4,6 @@ channels: - nodefaults dependencies: - bowtie2 =2.5.4 + - picard-slim =3.3.0 - samtools =1.21 - snakemake-wrapper-utils =0.6.2 diff --git a/bio/bowtie2/align/meta.yaml b/bio/bowtie2/align/meta.yaml index 8e7ad69b9ad..225c7a809da 100644 --- a/bio/bowtie2/align/meta.yaml +++ b/bio/bowtie2/align/meta.yaml @@ -5,6 +5,7 @@ authors: - Johannes Köster - Filipe G. Vieira - Thibault Dayris + - Jorge Langa input: - sample: FASTQ file(s) - idx: Bowtie2 indexed reference index @@ -13,13 +14,17 @@ input: output: - SAM/BAM/CRAM file. This must be the first output file in the output file list. - idx: Optional path to bam index. - - metrics: Optional path to metrics file. - - unaligned: Optional path to unaligned unpaired reads. - - unpaired: Optional path to unpaired reads that aligned at least once. - - unconcordant: Optional path to pairs that didn't align concordantly. - - concordant: Optional path to pairs that aligned concordantly at least once. + # - metrics: Optional path to metrics file. + # - unaligned: Optional path to unaligned unpaired reads. + # - unpaired: Optional path to unpaired reads that aligned at least once. + # - unconcordant: Optional path to pairs that didn't align concordantly. + # - concordant: Optional path to pairs that aligned concordantly at least once. params: - extra: additional program arguments (except for `-x`, `-U`, `-1`, `-2`, `--interleaved`, `-b`, `--met-file`, `--un`, `--al`, `--un-conc`, `--al-conc`, `-f`, `--tab6`, `--tab5`, `-q`, or `-p/--threads`) - interleaved: Input `sample` contains interleaved paired-end FASTQ/FASTA reads. `False`(default) or `True`. + - sort_program: program to sort the output. `none`(default), `samtools`, or `picard`. + - sort_extra: additional arguments for samtools or picard when sorting. + - sort_order: choose between `coordinate`(default) or `queryname`. notes: | + * The `extra` param allows for additional arguments for bowtie2. * This wrapper uses an inner pipe. Make sure to use at least two threads in your Snakefile. diff --git a/bio/bowtie2/align/test/Snakefile b/bio/bowtie2/align/test/Snakefile index d820dd8bfb0..e75638c4aa1 100644 --- a/bio/bowtie2/align/test/Snakefile +++ b/bio/bowtie2/align/test/Snakefile @@ -1,4 +1,4 @@ -rule test_bowtie2: +rule test_bowtie2_sam: input: sample=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], idx=multiext( @@ -10,25 +10,16 @@ rule test_bowtie2: ".rev.1.bt2", ".rev.2.bt2", ), - # ref="genome.fasta", #Required for CRAM output output: - "mapped/{sample}.bam", - # idx="", - # metrics="", - # unaligned="", - # unpaired="", - # unconcordant="", - # concordant="", + "mapped_sam/{sample}.sam", log: - "logs/bowtie2/{sample}.log", - params: - extra="", # optional parameters + "logs/bowtie2/mapped_sam_{sample}.log", threads: 8 # Use at least two threads wrapper: "master/bio/bowtie2/align" -use rule test_bowtie2 as test_bowtie2_se_gz with: +use rule test_bowtie2_sam as test_bowtie2_se_gz with: input: sample=["reads/{sample}.1.fastq.gz"], idx=multiext( @@ -42,38 +33,26 @@ use rule test_bowtie2 as test_bowtie2_se_gz with: ), output: "mapped_se_gz/{sample}.bam", + log: + "logs/bowtie2/mapped_se_gz_{sample}.log", -rule test_bowtie2_index: - input: - sample=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], - idx=multiext( - "index/genome", - ".1.bt2", - ".2.bt2", - ".3.bt2", - ".4.bt2", - ".rev.1.bt2", - ".rev.2.bt2", - ), +use rule test_bowtie2_sam as test_bowtie2_extra_outputs with: output: - "mapped_idx/{sample}.bam", - idx="mapped_idx/{sample}.bam.bai", - metrics="mapped_idx/{sample}.metrics.txt", - unaligned="mapped_idx/{sample}.unaligned.sam", - unpaired="mapped_idx/{sample}.unpaired.sam", - # unconcordant="", - # concordant="", + "mapped_multi/{sample}.bam", + idx="mapped_multi/{sample}.bam.bai", + # metrics="mapped_multi/{sample}.metrics.txt", + # unaligned=["mapped_multi/{sample}.unaligned.1.fq"], + # unpaired="mapped_multi/{sample}.unpaired.fq"], + # unconcordant="mapped_multi/{sample}.unconcordant.sam", + # concordant="mapped_multi/{sample}.concordant.sam", log: - "logs/bowtie2/{sample}.log", + "logs/bowtie2/mapped_multi_{sample}.log", params: - extra="", # optional parameters - threads: 8 # Use at least two threads - wrapper: - "master/bio/bowtie2/align" + sort_program="samtools", -rule test_bowtie2_cram: +use rule test_bowtie2_sam as test_bowtie2_cram with: input: sample=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], idx=multiext( @@ -86,18 +65,26 @@ rule test_bowtie2_cram: ".rev.2.bt2", ), ref="genome.fasta", + ref_fai="genome.fasta.fai", output: - "mapped_idx/{sample}.cram", - # idx="", - # metrics="", - # unaligned="", - # unpaired="", - # unconcordant="", - # concordant="", + "mapped_cram/{sample}.cram", log: - "logs/bowtie2/{sample}.log", + "logs/bowtie2/samtools_cram_{sample}.log", + + +use rule test_bowtie2_sam as test_bowtie2_sort_samtools with: + output: + "mapped_samtools/{sample}.bam", + log: + "logs/bowtie2/mapped_samtools_{sample}.log", params: - extra="", # optional parameters - threads: 8 # Use at least two threads - wrapper: - "master/bio/bowtie2/align" + sort_program="samtools" + + +use rule test_bowtie2_sam as test_bowtie2_sort_picard with: + output: + "mapped_picard/{sample}.bam", + log: + "logs/bowtie2/mapped_picard_{sample}.log", + params: + sort_program="picard" diff --git a/bio/bowtie2/align/test/genome.fasta.fai b/bio/bowtie2/align/test/genome.fasta.fai new file mode 100644 index 00000000000..f3cdedb5518 --- /dev/null +++ b/bio/bowtie2/align/test/genome.fasta.fai @@ -0,0 +1 @@ +Sheila 20 8 20 21 diff --git a/bio/bowtie2/align/wrapper.py b/bio/bowtie2/align/wrapper.py index cd8b4d64f02..e49fd621326 100644 --- a/bio/bowtie2/align/wrapper.py +++ b/bio/bowtie2/align/wrapper.py @@ -1,101 +1,258 @@ -__author__ = "Johannes Köster" -__copyright__ = "Copyright 2016, Johannes Köster" +__author__ = "Johannes Köster, Jorge Langa" +__copyright__ = "Copyright 2024, Johannes Köster, Jorge Langa" __email__ = "koester@jimmy.harvard.edu" __license__ = "MIT" -import os +import tempfile +from os import path + from snakemake.shell import shell +from snakemake_wrapper_utils.java import get_java_opts from snakemake_wrapper_utils.samtools import get_samtools_opts -def get_format(path: str) -> str: +# helpers +def get_extension(filename: str) -> str: """ Return file format since Bowtie2 reads files that could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2). """ - if path.endswith((".gz", ".bz2")): - return path.split(".")[-2].lower() - return path.split(".")[-1].lower() + filename = filename.lower() + base, ext = path.splitext(filename) + if ext in (".gz", ".bz2"): + return path.splitext(base)[1][1:] # Remove leading dot + return ext[1:] # Remove leading dot + + +# input +SAMPLE = snakemake.input.sample +INDEX = snakemake.input.idx +REF = snakemake.input.get("ref", None) +REF_FAI = snakemake.input.get("ref_fai", None) + +# output +BAM = str(snakemake.output[0]) +# TODO: These outputs are temporarily disabled due to complexity with SE/PE handling +# They can be re-enabled once we implement proper SE/PE output handling +# METRICS = snakemake.output.get("metrics", None) +# UNALIGNED = snakemake.output.get("unaligned", None) +# UNPAIRED = snakemake.output.get("unpaired", None) +# UNCONCORDANT = snakemake.output.get("unconcordant", None) +# CONCORDANT = snakemake.output.get("concordant", None) +BAI = snakemake.output.get("idx", None) + + +# log +LOG = snakemake.log_fmt_shell(stdout=False, stderr=True) + +# threads +THREADS = snakemake.threads + + +# params +EXTRA = snakemake.params.get("extra", "") +IS_INTERLEAVED = snakemake.params.get("interleaved", False) +SORT_PROGRAM = snakemake.params.get("sort_program", "none") +SORT_ORDER = snakemake.params.get("sort_order", "coordinate") +SORT_EXTRA = snakemake.params.get("sort_extra", "") +SAMTOOLS_OPTS = ( + get_samtools_opts(snakemake, parse_threads=False, param_name="sort_extra") + " " +) +JAVA_OPTS = get_java_opts(snakemake) + + +# checks + +# check inputs +if not isinstance(SAMPLE, str) and len(SAMPLE) not in [1, 2]: + raise ValueError( + "Input must have 1 (single-end) or 2 (paired-end) elements, " + f"got {len(SAMPLE)} elements: {SAMPLE}" + ) + +REQUIRED_IDX = {".1.bt2", ".2.bt2", ".3.bt2", ".4.bt2", ".rev.1.bt2", ".rev.2.bt2"} + +index_prefix = path.commonprefix(snakemake.input.idx).rstrip(".") + + +if len(index_prefix) == 0: + raise ValueError("Could not determine common prefix of inputs.idx files.") + +index_extensions = [idx[len(index_prefix) :] for idx in snakemake.input.idx] +missing_idx = REQUIRED_IDX - set(index_extensions) +if len(missing_idx) > 0: + raise ValueError( + f"Missing required indices: {missing_idx} declared as input.idx.\n" + f"Identified reference file is {index_prefix} with extensions {index_extensions}" + ) + + +# check ouptuts +bam_extension = get_extension(BAM) +bai_extension = get_extension(BAI) if BAI else None +if bam_extension.lower() not in {"sam", "bam", "cram"}: + raise ValueError( + f"Unrecognized extension for output file: {bam_extension}. " + "Valid extensions are: 'sam', 'bam' or 'cram'" + ) + +if bai_extension not in {None, "bai", "crai"}: + raise ValueError( + f"Unrecognized extension for index file: {bai_extension}. " + "Valid extensions are: 'bai' or 'crai'" + ) + + +# check params +if not isinstance(IS_INTERLEAVED, bool): + raise ValueError("params.interleaved must be a boolean") + +if SORT_ORDER not in {"coordinate", "queryname"}: + raise ValueError( + f"Unexpected value for sort_order ({SORT_ORDER})" + "Valid values are 'coordinate' or 'queryname'" + ) + +if SORT_PROGRAM not in {"none", "samtools", "picard"}: + raise ValueError( + f"Invalid sort_program '{SORT_PROGRAM}'. " + "Valid values are: 'none', 'samtools' or 'picard'" + ) -bowtie2_threads = snakemake.threads - 1 -if bowtie2_threads < 1: +if SORT_PROGRAM != "none" and THREADS < 2: raise ValueError( - f"This wrapper expected at least two threads, got {snakemake.threads}" + "Not enough threads requested. This wrapper requires at least two threads: " + "one for bowtie2 and one for samtools/picard." ) -# Setting parse_threads to false since samtools performs only -# bam compression. Thus the wrapper would use *twice* the amount -# of threads reserved by user otherwise. -samtools_opts = get_samtools_opts(snakemake, parse_threads=False) -extra = snakemake.params.get("extra", "") -log = snakemake.log_fmt_shell(stdout=True, stderr=True) +# check input - output compatibility + +if bam_extension == "cram" and (REF is None or REF_FAI is None): + raise ValueError( + "Reference file and index are required for CRAM output." + "Please specify them as input.ref and input.ref_fai\n" + f"input.ref: {REF}\n" + f"input.ref_fai: {REF_FAI}" + ) + +if BAI is not None and SORT_PROGRAM == "none": + raise ValueError( + "Index file is requested but no sort program is specified." + "Please specify a sort program to generate the index file." + ) -n = len(snakemake.input.sample) -assert ( - n == 1 or n == 2 -), "input->sample must have 1 (single-end) or 2 (paired-end) elements." +# compose shell command -reads = "" -if n == 1: - if get_format(snakemake.input.sample[0]) in ("bam", "sam"): - reads = f"-b {snakemake.input.sample}" +# input part +cmd_input = "" +if len(SAMPLE) == 1: + if get_extension(SAMPLE[0]) in ("bam", "sam"): + cmd_input = f"-b {SAMPLE}" else: - if snakemake.params.get("interleaved", False): - reads = f"--interleaved {snakemake.input.sample}" - else: - reads = f"-U {snakemake.input.sample}" + cmd_input = f"--interleaved {SAMPLE}" if IS_INTERLEAVED else f"-U {SAMPLE}" else: - reads = "-1 {} -2 {}".format(*snakemake.input.sample) + cmd_input = f"-1 {SAMPLE[0]} -2 {SAMPLE[1]}" +cmd_index = index_prefix +cmd_threads = THREADS +sort_threads = snakemake.threads - 1 -if all(get_format(sample) in ("fastq", "fq") for sample in snakemake.input.sample): - extra += " -q " -elif all(get_format(sample) == "tab5" for sample in snakemake.input.sample): - extra += " --tab5 " -elif all(get_format(sample) == "tab6" for sample in snakemake.input.sample): - extra += " --tab6 " -elif all( - get_format(sample) in ("fa", "mfa", "fasta") for sample in snakemake.input.sample -): - extra += " -f " +# extra part +cmd_extra = EXTRA +if all(get_extension(sample) in ("fastq", "fq") for sample in SAMPLE): + cmd_extra += " -q " +elif all(get_extension(sample) == "tab5" for sample in SAMPLE): + cmd_extra += " --tab5 " +elif all(get_extension(sample) == "tab6" for sample in SAMPLE): + cmd_extra += " --tab6 " +elif all(get_extension(sample) in ("fa", "mfa", "fasta") for sample in SAMPLE): + cmd_extra += " -f " -metrics = snakemake.output.get("metrics") -if metrics: - extra += f" --met-file {metrics} " +# if METRICS: +# cmd_extra += f" --met-file {METRICS} " +# if UNALIGNED: +# cmd_extra += f" --un {UNALIGNED} " +# if UNPAIRED: +# cmd_extra += f" --al {UNPAIRED} " +# if UNCONCORDANT: +# cmd_extra += f" --un-conc {UNCONCORDANT} " +# if CONCORDANT: +# cmd_extra += f" --al-conc {CONCORDANT} " -unaligned = snakemake.output.get("unaligned") -if unaligned: - extra += f" --un {unaligned} " -unpaired = snakemake.output.get("unpaired") -if unpaired: - extra += f" --al {unpaired} " +# sort or not part -unconcordant = snakemake.output.get("unconcordant") -if unconcordant: - extra += f" --un-conc {unconcordant} " +# Determine which pipe command to use for converting to bam or sorting. +match SORT_PROGRAM: -concordant = snakemake.output.get("concordant") -if concordant: - extra += f" --al-conc {concordant} " + case "samtools": + SAMTOOLS_OPTS += f"--threads {sort_threads} " + if BAI: + bam = f"{BAM}##idx##{BAI}" + SAMTOOLS_OPTS += "--write-index " + else: + bam = BAM + if SORT_ORDER == "queryname": + SORT_EXTRA += "-n " + if bam_extension == "cram": + SAMTOOLS_OPTS += f"--reference {REF} --output-fmt CRAM " + cmd_output = ( + "| samtools sort " + "{SAMTOOLS_OPTS} " + "{SORT_EXTRA} " + "-T {TMPDIR} " + "-o {bam} " + ) + case "picard": + PICARD_OPTS = "" + if bam_extension == "cram": + PICARD_OPTS += f"--REFERENCE_SEQUENCE {REF} " + if BAI: + PICARD_OPTS += "--CREATE_INDEX true " + cmd_output = ( + "| picard SortSam {JAVA_OPTS} {SORT_EXTRA} " + "--INPUT /dev/stdin " + "--TMP_DIR {TMPDIR} " + "--SORT_ORDER {SORT_ORDER} " + "--OUTPUT {BAM} " + ) -index = os.path.commonprefix(snakemake.input.idx).rstrip(".") + case _: + if sort_threads >= 1: + SAMTOOLS_OPTS += f"--threads {sort_threads} " + if bam_extension == "bam": + cmd_output = ( + f"| samtools view " + f"--with-header " + f"{SAMTOOLS_OPTS} " + f"--output-fmt BAM " + f"--output {BAM}" + ) + elif bam_extension == "cram": + cmd_output = ( + f"| samtools view " + f"--with-header " + f"{SAMTOOLS_OPTS} " + f"--output {BAM} " + f"--output-fmt CRAM " + f"--reference {REF}" + ) + else: + cmd_output = "> {BAM} " -shell( - "(bowtie2" - " --threads {bowtie2_threads}" - " {reads} " - " -x {index}" - " {extra}" - "| samtools view --with-header " - " {samtools_opts}" - " -" - ") {log}" -) +# let's rock! +with tempfile.TemporaryDirectory() as TMPDIR: + shell( + "( bowtie2 " + "--threads {THREADS} " + "{cmd_input} " + "-x {cmd_index} " + "{cmd_extra} " + cmd_output + " ) {LOG}" + ) diff --git a/test_wrappers.py b/test_wrappers.py index 0a7a3e8798e..253f839ba03 100644 --- a/test_wrappers.py +++ b/test_wrappers.py @@ -196,16 +196,15 @@ def test_nonpareil(run): ) - def test_ngsbits_samplesimilarity(run): run( "bio/ngsbits/samplesimilarity", [ - "snakemake", - "--cores", - "1", - "--use-conda", - "-F", + "snakemake", + "--cores", + "1", + "--use-conda", + "-F", "similarity.tsv", ], ) @@ -2043,22 +2042,32 @@ def test_blast_blastn(run): def test_bowtie2_align(run): run( "bio/bowtie2/align", - ["snakemake", "--cores", "2", "mapped_idx/a.cram", "--use-conda", "-F"], + ["snakemake", "--cores", "1", "mapped_sam/a.sam", "--use-conda", "-F"], ) run( "bio/bowtie2/align", - ["snakemake", "--cores", "2", "mapped_idx/a.bam", "--use-conda", "-F"], + ["snakemake", "--cores", "2", "mapped_se_gz/a.bam", "--use-conda", "-F"], ) run( "bio/bowtie2/align", - ["snakemake", "--cores", "2", "mapped/a.bam", "--use-conda", "-F"], + ["snakemake", "--cores", "2", "mapped_multi/a.bam", "--use-conda", "-F"], ) run( "bio/bowtie2/align", - ["snakemake", "--cores", "2", "mapped_se_gz/a.bam", "--use-conda", "-F"], + ["snakemake", "--cores", "2", "mapped_cram/a.cram", "--use-conda", "-F"], + ) + + run( + "bio/bowtie2/align", + ["snakemake", "--cores", "2", "mapped_samtools/a.bam", "--use-conda", "-F"], + ) + + run( + "bio/bowtie2/align", + ["snakemake", "--cores", "2", "mapped_picard/a.bam", "--use-conda", "-F"], ) @@ -4675,6 +4684,7 @@ def test_sexdeterrmine(run): ["snakemake", "--cores", "1", "results.tsv", "-F", "--use-conda"], ) + def test_sourmash_compute(run): run( "bio/sourmash/compute/", @@ -6027,12 +6037,14 @@ def test_vg_construct(run): ["snakemake", "--cores", "1", "graph/c.vg", "--use-conda", "-F"], ) + def test_vg_giraffe(run): run( "bio/vg/giraffe", ["snakemake", "--cores", "1", "mapped/a.bam", "--use-conda", "-F"], ) + def test_vg_merge(run): run( "bio/vg/merge",