Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ribotricer/detectorfs #5112

Merged
merged 24 commits into from
Mar 14, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
ab8184b
Add ribotricer/detectorfs
pinin4fjords Mar 13, 2024
49f5fd3
Merge branch 'master' into ribotricer/detectorfs
pinin4fjords Mar 13, 2024
57091f1
Don't snapshot PDFs
pinin4fjords Mar 13, 2024
d9eb3ca
Complete meta.yml
pinin4fjords Mar 13, 2024
bcbdbc9
Add test for --report_all, address linting things
pinin4fjords Mar 13, 2024
f93450b
Fix tag, add stub tests
pinin4fjords Mar 13, 2024
aadbccd
Appease eclint
pinin4fjords Mar 13, 2024
1fcb999
Merge branch 'master' into ribotricer/detectorfs
pinin4fjords Mar 13, 2024
ae7c5b4
Add missing configs
pinin4fjords Mar 13, 2024
ad9dcee
Make ribotricer versioning more reliable in conda
pinin4fjords Mar 13, 2024
fd25d09
Conda differs in floating point precision
pinin4fjords Mar 13, 2024
6254042
Update nf-test.config
pinin4fjords Mar 14, 2024
649e5c3
Apply suggestions from code review
pinin4fjords Mar 14, 2024
c068c11
Update modules/nf-core/ribotricer/detectorfs/main.nf
pinin4fjords Mar 14, 2024
d74c37c
Clarify language around strandedness inference
pinin4fjords Mar 14, 2024
2f7fbea
default -> filtered
pinin4fjords Mar 14, 2024
94d3e4a
Fix typo
pinin4fjords Mar 14, 2024
2964fcd
ifelse -> switch to appease @adamrtalbot
pinin4fjords Mar 14, 2024
de50acb
Merge branch 'ribotricer/detectorfs' of github.com:nf-core/modules in…
pinin4fjords Mar 14, 2024
6c7c120
variables need to be defined outsite switch
pinin4fjords Mar 14, 2024
9db37ff
unstranded is broken
pinin4fjords Mar 14, 2024
ee96988
More tests
pinin4fjords Mar 14, 2024
8b79666
Appease eclint
pinin4fjords Mar 14, 2024
9244a25
Fix configs
pinin4fjords Mar 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions modules/nf-core/ribotricer/detectorfs/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
name: "ribotricer_detectorfs"
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- "bioconda::ribotricer=1.3.3"
76 changes: 76 additions & 0 deletions modules/nf-core/ribotricer/detectorfs/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
process RIBOTRICER_DETECTORFS {
tag "$meta.id"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ribotricer:1.3.3--pyhdfd78af_0':
'biocontainers/ribotricer:1.3.3--pyhdfd78af_0' }"

input:
tuple val(meta), path(bam), path(bai)
tuple val(meta2), path(candidate_orfs)

output:
tuple val(meta), path('*_protocol.txt') , emit: protocol, optional: true
tuple val(meta), path('*_bam_summary.txt') , emit: bam_summary
tuple val(meta), path('*_read_length_dist.pdf') , emit: read_length_dist
tuple val(meta), path('*_metagene_profiles_5p.tsv') , emit: metagene_profile_5p
tuple val(meta), path('*_metagene_profiles_3p.tsv') , emit: metagene_profile_3p
tuple val(meta), path('*_metagene_plots.pdf') , emit: metagene_plots
tuple val(meta), path('*_psite_offsets.txt') , emit: psite_offsets, optional: true
tuple val(meta), path('*_pos.wig') , emit: pos_wig
tuple val(meta), path('*_neg.wig') , emit: neg_wig
tuple val(meta), path('*_translating_ORFs.tsv') , emit: orfs
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

def strandedness_cmd = ''
if (meta.strandedness == 'forward') {
strandedness_cmd = '--stranded yes'
} else if (meta.strandedness == 'reverse') {
strandedness_cmd = '--stranded revers'
pinin4fjords marked this conversation as resolved.
Show resolved Hide resolved
} else if (mea.strandedness == 'unstranded') {
pinin4fjords marked this conversation as resolved.
Show resolved Hide resolved
strandedness_cmd = '--stranded no'
}
pinin4fjords marked this conversation as resolved.
Show resolved Hide resolved
"""
ribotricer detect-orfs \\
--bam $bam \\
--ribotricer_index $candidate_orfs \\
--prefix $prefix \\
$strandedness_cmd \\
$args

cat <<-END_VERSIONS > versions.yml
"${task.process}":
ribotricer: \$(ribotricer --version 2>&1 | grep ribotricer | sed '1!d ; s/ribotricer, version //')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}_protocol.txt
touch ${prefix}_bam_summary.txt
touch ${prefix}_read_length_dist.pdf
touch ${prefix}_metagene_profiles_5p.tsv
touch ${prefix}_metagene_profiles_3p.tsv
touch ${prefix}_metagene_plots.pdf
touch ${prefix}_psite_offsets.txt
touch ${prefix}_pos.wig
touch ${prefix}_neg.wig
touch ${prefix}_translating_ORFs.tsv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
ribotricer: \$(ribotricer --version 2>&1 | grep ribotricer | sed '1!d ; s/ribotricer, version //')
END_VERSIONS
"""
}
99 changes: 99 additions & 0 deletions modules/nf-core/ribotricer/detectorfs/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "ribotricer_detectorfs"
description: "Accurate detection of short and long active ORFs using Ribo-seq data"
keywords:
- riboseq
- orf
- genomics
tools:
- "ribotricer":
description: "Python package to detect translating ORF from Ribo-seq data"
homepage: "https://github.com/smithlabcode/ribotricer"
documentation: "https://github.com/smithlabcode/ribotricer"
tool_dev_url: "https://github.com/smithlabcode/ribotricer"
doi: "10.1093/bioinformatics/btz878"
licence: ["GNU General Public v3 (GPL v3)"]

input:
- meta:
type: map
description: |
Groovy Map containing riboseq sample information
pinin4fjords marked this conversation as resolved.
Show resolved Hide resolved
e.g. `[ id:'sample1', single_end:false ]
pinin4fjords marked this conversation as resolved.
Show resolved Hide resolved
- bam_ribo:
type: file
description: Sorted riboseq BAM file(s)
pattern: "*.{bam}"
- bai_ribo:
type: file
description: Index for sorted riboseq bam file(s)
pattern: "*.{bai}"
- meta2:
type: map
description: |
Groovy Map containing reference information for the candidate ORFs
pinin4fjords marked this conversation as resolved.
Show resolved Hide resolved
e.g. `[ id:'Ensembl human v.111' ]`
- candidate_orfs:
type: file
description: "TSV file with candidate ORFs from 'ribotricer prepareorfs'"
pattern: "*.tsv"

output:
- meta:
type: map
description: |
Groovy Map containing riboseq sample information
e.g. `[ id:'sample1', single_end:false ]
- protocol:
type: file
description: "txt file containing inferred protocol (if not specified, optional)"
pinin4fjords marked this conversation as resolved.
Show resolved Hide resolved
pinin4fjords marked this conversation as resolved.
Show resolved Hide resolved
pattern: "*_protocol.txt"
- bam_summary:
type: file
description: "Text summary of reads found in the BAM"
pattern: "*_bam_summary.txt"
- read_length_dist:
type: file
description: "PDF-format read length distribution as quality control"
pattern: "*_read_length_dist.pdf"
- metagene_profile_5p:
type: file
description: "Metagene profile aligning with the start codon"
pattern: "*_metagene_profiles_5p.tsv"
- metagene_profile_3p:
type: file
description: "Metagene profile aligning with the stop codon"
pattern: "*_metagene_profiles_3p.tsv"
- metagene_plots:
type: file
description: "Metagene plots for quality control"
pattern: "*_metagene_plots.pdf"
- psite_offsets:
type: file
description: |
"If the P-site offsets are not provided, txt file containing the
derived relative offsets"
pattern: "*_psite_offsets.txt"
- pos_wig:
type: file
description: "Positive strand WIG file for visualization in Genome Browser"
pattern: "*_pos.wig"
- neg_wig:
type: file
description: "Negative strand WIG file for visualization in Genome Browser"
pattern: "*_neg.wig"
- orfs:
type: file
description: |
"TSV with ORFs assessed as translating in this BAM file. You can output
all ORFs regardless of the translation status with option --report_all"
pattern: "*_translating_ORFs.tsv"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@pinin4fjords"
maintainers:
- "@pinin4fjords"
190 changes: 190 additions & 0 deletions modules/nf-core/ribotricer/detectorfs/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
nextflow_process {

name "Test Process RIBOTRICER_DETECTORFS"
script "../main.nf"
process "RIBOTRICER_DETECTORFS"

tag "modules"
tag "modules_nfcore"
tag "ribotricer"
tag "ribotricer/prepareorfs"
tag "ribotricer/detectorfs"
tag "gunzip"

setup {
run("GUNZIP") {
script "modules/nf-core/gunzip/main.nf"
process {
"""
input[0] = [
[ ],
file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.dna.chromosome.20.fa.gz", checkIfExists: true)
]
"""
}
}
run("RIBOTRICER_PREPAREORFS") {
script "modules/nf-core/ribotricer/prepareorfs/main.nf"
process {
"""
input[0] = GUNZIP.out.gunzip.map{[
[id:'homo_sapiens_chr20'],
it[1],
file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf", checkIfExists: true)
]}
"""
}
}

}

test("human chr20 - bam - default") {

config './nextflow.default.config'

when {
process {
"""
input[0] = [
[ id:'test', single_end:true, strandedness:'forward' ], // meta map
file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam", checkIfExists: true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam.bai", checkIfExists: true)
]
input[1] = RIBOTRICER_PREPAREORFS.out.candidate_orfs
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.protocol).match('protocol') },
{ assert snapshot(process.out.bam_summary).match('bam_summary') },
{ assert snapshot(file(process.out.read_length_dist[0][1]).name).match("read_length_dist") },
{ assert snapshot(process.out.metagene_profile_5p).match('metagene_profile_5p') },
{ assert path(process.out.metagene_profile_3p[0][1]).getText().contains("26\t0\t[0.0, 0.0, 0.0, 0.0, 2.6132404181184667") },
{ assert snapshot(file(process.out.metagene_plots[0][1]).name).match("metagene_plots") },
{ assert snapshot(process.out.psite_offsets).match('psite_offsets') },
{ assert snapshot(process.out.pos_wig).match('pos_wig') },
{ assert snapshot(process.out.neg_wig).match('neg_wig') },
{ assert path(process.out.orfs[0][1]).getText().contains("ENST00000370861_62136860_62140830_534\tannotated\ttranslating\t0.515078753637712") },
{ assert snapshot(process.out.versions).match('versions') }
)
}
}

test("human chr20 - bam - all") {
pinin4fjords marked this conversation as resolved.
Show resolved Hide resolved

config './nextflow.all.config'

when {
process {
"""
input[0] = [
[ id:'test', single_end:true, strandedness:'forward' ], // meta map
pinin4fjords marked this conversation as resolved.
Show resolved Hide resolved
file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam", checkIfExists: true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam.bai", checkIfExists: true)
]
input[1] = RIBOTRICER_PREPAREORFS.out.candidate_orfs
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.protocol).match('protocol_all') },
{ assert snapshot(process.out.bam_summary).match('bam_summary_all') },
{ assert snapshot(file(process.out.read_length_dist[0][1]).name).match("read_length_dist_all") },
{ assert snapshot(process.out.metagene_profile_5p).match('metagene_profile_5p_all') },
{ assert path(process.out.metagene_profile_3p[0][1]).getText().contains("26\t0\t[0.0, 0.0, 0.0, 0.0, 2.6132404181184667") },
{ assert snapshot(file(process.out.metagene_plots[0][1]).name).match("metagene_plots_all") },
{ assert snapshot(process.out.psite_offsets).match('psite_offsets_all') },
{ assert snapshot(process.out.pos_wig).match('pos_wig_all') },
{ assert snapshot(process.out.neg_wig).match('neg_wig_all') },
{ assert path(process.out.orfs[0][1]).getText().contains("ENST00000370861_62136860_62140830_534\tannotated\ttranslating\t0.515078753637712") },
{ assert snapshot(process.out.versions).match('versions_all') }
)
}

}

test("human chr20 - bam - default - stub") {

options '-stub'

config './nextflow.default.config'

when {
process {
"""
input[0] = [
[ id:'test', single_end:true, strandedness:'forward' ], // meta map
file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam", checkIfExists: true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam.bai", checkIfExists: true)
]
input[1] = RIBOTRICER_PREPAREORFS.out.candidate_orfs
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.protocol).match('protocol_stub') },
{ assert snapshot(process.out.bam_summary).match('bam_summary_stub') },
{ assert snapshot(file(process.out.read_length_dist[0][1]).name).match("read_length_dist_stub") },
{ assert snapshot(process.out.metagene_profile_5p).match('metagene_profile_5p_stub') },
{ assert snapshot(process.out.metagene_profile_3p).match('metagene_profile_3p_stub') },
{ assert snapshot(file(process.out.metagene_plots[0][1]).name).match("metagene_plots_stub") },
{ assert snapshot(process.out.psite_offsets).match('psite_offsets_stub') },
{ assert snapshot(process.out.pos_wig).match('pos_wig_stub') },
{ assert snapshot(process.out.neg_wig).match('neg_wig_stub') },
{ assert snapshot(process.out.orfs).match('orfs_stub') },
{ assert snapshot(process.out.versions).match('versions_stub') }
)
}
}

test("human chr20 - bam - all - stub") {

options '-stub'

config './nextflow.all.config'

when {
process {
"""
input[0] = [
[ id:'test', single_end:true, strandedness:'forward' ], // meta map
file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam", checkIfExists: true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780888_chr20.bam.bai", checkIfExists: true)
]
input[1] = RIBOTRICER_PREPAREORFS.out.candidate_orfs
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.protocol).match('protocol_all_stub') },
{ assert snapshot(process.out.bam_summary).match('bam_summary_all_stub') },
{ assert snapshot(file(process.out.read_length_dist[0][1]).name).match("read_length_dist_all_stub") },
{ assert snapshot(process.out.metagene_profile_5p).match('metagene_profile_5p_all_stub') },
{ assert snapshot(process.out.metagene_profile_3p).match('metagene_profile_3p_all_stub') },
{ assert snapshot(file(process.out.metagene_plots[0][1]).name).match("metagene_plots_all_stub") },
{ assert snapshot(process.out.psite_offsets).match('psite_offsets_all_stub') },
{ assert snapshot(process.out.pos_wig).match('pos_wig_all_stub') },
{ assert snapshot(process.out.neg_wig).match('neg_wig_all_stub') },
{ assert snapshot(process.out.orfs).match('orfs_all_stub') },
{ assert snapshot(process.out.versions).match('versions_all_stub') }
)
}

}
}




Loading
Loading