From 678370d23635cb003dbade11c352ee7379ad9993 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Tue, 30 Aug 2022 13:35:05 +0000 Subject: [PATCH 01/56] Template update for nf-core/tools version 2.5 --- .editorconfig | 2 +- .github/PULL_REQUEST_TEMPLATE.md | 3 +- .github/workflows/ci.yml | 23 ++------ .github/workflows/linting.yml | 38 +++++++++++-- CITATION.cff | 56 +++++++++++++++++++ README.md | 21 +++---- assets/email_template.txt | 1 - bin/check_samplesheet.py | 41 +++++++------- conf/base.config | 5 ++ docs/usage.md | 12 ++-- lib/WorkflowMain.groovy | 9 ++- lib/WorkflowRnadnavar.groovy | 5 +- main.nf | 2 +- modules.json | 22 +++++--- .../templates/dumpsoftwareversions.py | 14 +++-- nextflow.config | 21 ++++++- 16 files changed, 184 insertions(+), 91 deletions(-) create mode 100644 CITATION.cff diff --git a/.editorconfig b/.editorconfig index b6b3190..b78de6e 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,7 +8,7 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{md,yml,yaml,html,css,scss,js}] +[*.{md,yml,yaml,html,css,scss,js,cff}] indent_size = 2 # These files are edited and tested upstream in nf-core/modules diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 62b5c4c..419b08a 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -15,8 +15,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/rnad - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/rnadnavar/tree/master/.github/CONTRIBUTING.md) - - [ ] If necessary, also make a PR on the nf-core/rnadnavar _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/rnadnavar/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/rnadnavar _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a4fd81b..5e8ce54 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,6 @@ on: env: NXF_ANSI_LOG: false - CAPSULE_LOG: none jobs: test: @@ -20,27 +19,17 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - # Nextflow versions - include: - # Test pipeline minimum Nextflow version - - NXF_VER: "21.10.3" - NXF_EDGE: "" - # Test latest edge release of Nextflow - - NXF_VER: "" - NXF_EDGE: "1" + NXF_VER: + - "21.10.3" + - "latest-everything" steps: - name: Check out pipeline code uses: actions/checkout@v2 - name: Install Nextflow - env: - NXF_VER: ${{ matrix.NXF_VER }} - # Uncomment only if the edge release is more recent than the latest stable release - # See https://github.com/nextflow-io/nextflow/issues/2467 - # NXF_EDGE: ${{ matrix.NXF_EDGE }} - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v1 + with: + version: "${{ matrix.NXF_VER }}" - name: Run pipeline with test data # TODO nf-core: You can customise CI pipeline run tests as required diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 77358de..8a5ce69 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -35,6 +35,36 @@ jobs: - name: Run Prettier --check run: prettier --check ${GITHUB_WORKSPACE} + PythonBlack: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Check code lints with Black + uses: psf/black@stable + + # If the above check failed, post a comment on the PR explaining the failure + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@v1 + with: + message: | + ## Python linting (`black`) is failing + + To keep the code consistent with lots of contributors, we run automated code consistency checks. + To fix this CI test, please run: + + * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` + * Fix formatting errors in your pipeline: `black .` + + Once you push these changes the test should pass, and you can hide this comment :+1: + + We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false + nf-core: runs-on: ubuntu-latest steps: @@ -42,15 +72,11 @@ jobs: uses: actions/checkout@v2 - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v1 - uses: actions/setup-python@v3 with: - python-version: "3.6" + python-version: "3.7" architecture: "x64" - name: Install dependencies diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..4533e2f --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,56 @@ +cff-version: 1.2.0 +message: "If you use `nf-core tools` in your work, please cite the `nf-core` publication" +authors: + - family-names: Ewels + given-names: Philip + - family-names: Peltzer + given-names: Alexander + - family-names: Fillinger + given-names: Sven + - family-names: Patel + given-names: Harshil + - family-names: Alneberg + given-names: Johannes + - family-names: Wilm + given-names: Andreas + - family-names: Ulysse Garcia + given-names: Maxime + - family-names: Di Tommaso + given-names: Paolo + - family-names: Nahnsen + given-names: Sven +title: "The nf-core framework for community-curated bioinformatics pipelines." +version: 2.4.1 +doi: 10.1038/s41587-020-0439-x +date-released: 2022-05-16 +url: https://github.com/nf-core/tools +prefered-citation: + type: article + authors: + - family-names: Ewels + given-names: Philip + - family-names: Peltzer + given-names: Alexander + - family-names: Fillinger + given-names: Sven + - family-names: Patel + given-names: Harshil + - family-names: Alneberg + given-names: Johannes + - family-names: Wilm + given-names: Andreas + - family-names: Ulysse Garcia + given-names: Maxime + - family-names: Di Tommaso + given-names: Paolo + - family-names: Nahnsen + given-names: Sven + doi: 10.1038/s41587-020-0439-x + journal: nature biotechnology + start: 276 + end: 278 + title: "The nf-core framework for community-curated bioinformatics pipelines." + issue: 3 + volume: 38 + year: 2020 + url: https://dx.doi.org/10.1038/s41587-020-0439-x diff --git a/README.md b/README.md index 291a13f..80cebb0 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,14 @@ # ![nf-core/rnadnavar](docs/images/nf-core-rnadnavar_logo_light.png#gh-light-mode-only) ![nf-core/rnadnavar](docs/images/nf-core-rnadnavar_logo_dark.png#gh-dark-mode-only) -[![GitHub Actions CI Status](https://github.com/nf-core/rnadnavar/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/rnadnavar/actions?query=workflow%3A%22nf-core+CI%22) -[![GitHub Actions Linting Status](https://github.com/nf-core/rnadnavar/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/rnadnavar/actions?query=workflow%3A%22nf-core+linting%22) -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?logo=Amazon%20AWS)](https://nf-co.re/rnadnavar/results) -[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/rnadnavar/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/) -[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?logo=anaconda)](https://docs.conda.io/en/latest/) -[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?logo=docker)](https://www.docker.com/) -[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg)](https://sylabs.io/docs/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) +[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/rnadnavar) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23rnadnavar-4A154B?logo=slack)](https://nfcore.slack.com/channels/rnadnavar) -[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?logo=twitter)](https://twitter.com/nf_core) -[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23rnadnavar-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/rnadnavar)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction @@ -25,7 +20,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool -On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/rnadnavar/results). +On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/rnadnavar/results). ## Pipeline summary @@ -42,7 +37,7 @@ On release, automated continuous integration tests run the pipeline on a full-si 3. Download the pipeline and test it on a minimal dataset with a single command: - ```console + ```bash nextflow run nf-core/rnadnavar -profile test,YOURPROFILE --outdir ``` @@ -57,7 +52,7 @@ On release, automated continuous integration tests run the pipeline on a full-si - ```console + ```bash nextflow run nf-core/rnadnavar --input samplesheet.csv --outdir --genome GRCh37 -profile ``` diff --git a/assets/email_template.txt b/assets/email_template.txt index 24890ad..e83e595 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -6,7 +6,6 @@ `._,._,' nf-core/rnadnavar v${version} ---------------------------------------------------- - Run Name: $runName <% if (success){ diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 3652c63..9a8b896 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -11,7 +11,6 @@ from collections import Counter from pathlib import Path - logger = logging.getLogger() @@ -79,13 +78,15 @@ def validate_and_transform(self, row): def _validate_sample(self, row): """Assert that the sample name exists and convert spaces to underscores.""" - assert len(row[self._sample_col]) > 0, "Sample input is required." + if len(row[self._sample_col]) <= 0: + raise AssertionError("Sample input is required.") # Sanitize samples slightly. row[self._sample_col] = row[self._sample_col].replace(" ", "_") def _validate_first(self, row): """Assert that the first FASTQ entry is non-empty and has the right format.""" - assert len(row[self._first_col]) > 0, "At least the first FASTQ file is required." + if len(row[self._first_col]) <= 0: + raise AssertionError("At least the first FASTQ file is required.") self._validate_fastq_format(row[self._first_col]) def _validate_second(self, row): @@ -97,36 +98,34 @@ def _validate_pair(self, row): """Assert that read pairs have the same file extension. Report pair status.""" if row[self._first_col] and row[self._second_col]: row[self._single_col] = False - assert ( - Path(row[self._first_col]).suffixes[-2:] == Path(row[self._second_col]).suffixes[-2:] - ), "FASTQ pairs must have the same file extensions." + if Path(row[self._first_col]).suffixes[-2:] != Path(row[self._second_col]).suffixes[-2:]: + raise AssertionError("FASTQ pairs must have the same file extensions.") else: row[self._single_col] = True def _validate_fastq_format(self, filename): """Assert that a given filename has one of the expected FASTQ extensions.""" - assert any(filename.endswith(extension) for extension in self.VALID_FORMATS), ( - f"The FASTQ file has an unrecognized extension: {filename}\n" - f"It should be one of: {', '.join(self.VALID_FORMATS)}" - ) + if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): + raise AssertionError( + f"The FASTQ file has an unrecognized extension: {filename}\n" + f"It should be one of: {', '.join(self.VALID_FORMATS)}" + ) def validate_unique_samples(self): """ Assert that the combination of sample name and FASTQ filename is unique. - In addition to the validation, also rename the sample if more than one sample, - FASTQ file combination exists. + In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the + number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. """ - assert len(self._seen) == len(self.modified), "The pair of sample name and FASTQ must be unique." - if len({pair[0] for pair in self._seen}) < len(self._seen): - counts = Counter(pair[0] for pair in self._seen) - seen = Counter() - for row in self.modified: - sample = row[self._sample_col] - seen[sample] += 1 - if counts[sample] > 1: - row[self._sample_col] = f"{sample}_T{seen[sample]}" + if len(self._seen) != len(self.modified): + raise AssertionError("The pair of sample name and FASTQ must be unique.") + seen = Counter() + for row in self.modified: + sample = row[self._sample_col] + seen[sample] += 1 + row[self._sample_col] = f"{sample}_T{seen[sample]}" def read_head(handle, num_lines=10): diff --git a/conf/base.config b/conf/base.config index 4cbf9ab..ede794b 100644 --- a/conf/base.config +++ b/conf/base.config @@ -26,6 +26,11 @@ process { // adding in your local modules too. // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } withLabel:process_low { cpus = { check_max( 2 * task.attempt, 'cpus' ) } memory = { check_max( 12.GB * task.attempt, 'memory' ) } diff --git a/docs/usage.md b/docs/usage.md index 3d8ffad..219b1d9 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -12,7 +12,7 @@ You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. -```console +```bash --input '[path to samplesheet file]' ``` @@ -56,7 +56,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p The typical command for running the pipeline is as follows: -```console +```bash nextflow run nf-core/rnadnavar --input samplesheet.csv --outdir --genome GRCh37 -profile docker ``` @@ -64,9 +64,9 @@ This will launch the pipeline with the `docker` configuration profile. See below Note that the pipeline will create the following files in your working directory: -```console +```bash work # Directory containing the nextflow working files - # Finished results in specified location (defined with --outdir) + # Finished results in specified location (defined with --outdir) .nextflow_log # Log file from Nextflow # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` @@ -75,7 +75,7 @@ work # Directory containing the nextflow working files When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: -```console +```bash nextflow pull nf-core/rnadnavar ``` @@ -251,6 +251,6 @@ Some HPC setups also allow you to run nextflow within a cluster job submitted yo In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): -```console +```bash NXF_OPTS='-Xms1g -Xmx4g' ``` diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 964654f..0ee1e14 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -59,6 +59,7 @@ class WorkflowMain { } // Print parameter summary log to screen + log.info paramsSummaryLog(workflow, params, log) // Check that a -profile or Nextflow config has been provided to run the pipeline @@ -78,17 +79,15 @@ class WorkflowMain { System.exit(1) } } - // // Get attribute from genome config file e.g. fasta // - public static String getGenomeAttribute(params, attribute) { - def val = '' + public static Object getGenomeAttribute(params, attribute) { if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { if (params.genomes[ params.genome ].containsKey(attribute)) { - val = params.genomes[ params.genome ][ attribute ] + return params.genomes[ params.genome ][ attribute ] } } - return val + return null } } diff --git a/lib/WorkflowRnadnavar.groovy b/lib/WorkflowRnadnavar.groovy index a31dbbf..324eb1f 100755 --- a/lib/WorkflowRnadnavar.groovy +++ b/lib/WorkflowRnadnavar.groovy @@ -10,6 +10,7 @@ class WorkflowRnadnavar { public static void initialise(params, log) { genomeExistsError(params, log) + if (!params.fasta) { log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." System.exit(1) @@ -41,9 +42,7 @@ class WorkflowRnadnavar { yaml_file_text += "data: |\n" yaml_file_text += "${summary_section}" return yaml_file_text - } - - // + }// // Exit pipeline if incorrect --genome key provided // private static void genomeExistsError(params, log) { diff --git a/main.nf b/main.nf index fb6e1e4..4cea479 100644 --- a/main.nf +++ b/main.nf @@ -4,7 +4,7 @@ nf-core/rnadnavar ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Github : https://github.com/nf-core/rnadnavar - Website: https://nf-co.re/rnadnavar +Website: https://nf-co.re/rnadnavar Slack : https://nfcore.slack.com/channels/rnadnavar ---------------------------------------------------------------------------------------- */ diff --git a/modules.json b/modules.json index 12311e4..8b5feee 100644 --- a/modules.json +++ b/modules.json @@ -3,14 +3,20 @@ "homePage": "https://github.com/nf-core/rnadnavar", "repos": { "nf-core/modules": { - "custom/dumpsoftwareversions": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "fastqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "multiqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_url": "https://github.com/nf-core/modules.git", + "modules": { + "custom/dumpsoftwareversions": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", + "branch": "master" + }, + "fastqc": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", + "branch": "master" + }, + "multiqc": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", + "branch": "master" + } } } } diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py index d139039..787bdb7 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -1,9 +1,10 @@ #!/usr/bin/env python -import yaml import platform from textwrap import dedent +import yaml + def _make_versions_html(versions): html = [ @@ -58,11 +59,12 @@ def _make_versions_html(versions): for process, process_versions in versions_by_process.items(): module = process.split(":")[-1] try: - assert versions_by_module[module] == process_versions, ( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) except KeyError: versions_by_module[module] = process_versions diff --git a/nextflow.config b/nextflow.config index 5c820e2..051f286 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,11 +13,11 @@ params { // Input options input = null + // References genome = null igenomes_base = 's3://ngi-igenomes/igenomes' igenomes_ignore = false - // MultiQC options multiqc_config = null multiqc_title = null @@ -37,6 +37,7 @@ params { schema_ignore_params = 'genomes' enable_conda = false + // Config options custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" @@ -45,6 +46,7 @@ params { config_profile_url = null config_profile_name = null + // Max resource options // Defaults only, expecting to be overwritten max_memory = '128.GB' @@ -72,6 +74,7 @@ try { // } + profiles { debug { process.beforeScript = 'echo $HOSTNAME' } conda { @@ -82,6 +85,15 @@ profiles { shifter.enabled = false charliecloud.enabled = false } + mamba { + params.enable_conda = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } docker { docker.enabled = true docker.userEmulation = true @@ -119,10 +131,16 @@ profiles { podman.enabled = false shifter.enabled = false } + gitpod { + executor.name = 'local' + executor.cpus = 16 + executor.memory = 60.GB + } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } } + // Load igenomes.config if required if (!params.igenomes_ignore) { includeConfig 'conf/igenomes.config' @@ -130,6 +148,7 @@ if (!params.igenomes_ignore) { params.genomes = [:] } + // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. From fd51fc178995eaad2eebc8dbabb644f0fd728728 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Thu, 1 Sep 2022 13:31:17 +0000 Subject: [PATCH 02/56] Template update for nf-core/tools version 2.5.1 --- bin/check_samplesheet.py | 9 ++++++--- pyproject.toml | 10 ++++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 pyproject.toml diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 9a8b896..11b1557 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -98,7 +98,9 @@ def _validate_pair(self, row): """Assert that read pairs have the same file extension. Report pair status.""" if row[self._first_col] and row[self._second_col]: row[self._single_col] = False - if Path(row[self._first_col]).suffixes[-2:] != Path(row[self._second_col]).suffixes[-2:]: + first_col_suffix = Path(row[self._first_col]).suffixes[-2:] + second_col_suffix = Path(row[self._second_col]).suffixes[-2:] + if first_col_suffix != second_col_suffix: raise AssertionError("FASTQ pairs must have the same file extensions.") else: row[self._single_col] = True @@ -157,7 +159,7 @@ def sniff_format(handle): handle.seek(0) sniffer = csv.Sniffer() if not sniffer.has_header(peek): - logger.critical(f"The given sample sheet does not appear to contain a header.") + logger.critical("The given sample sheet does not appear to contain a header.") sys.exit(1) dialect = sniffer.sniff(peek) return dialect @@ -195,7 +197,8 @@ def check_samplesheet(file_in, file_out): reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) # Validate the existence of the expected header columns. if not required_columns.issubset(reader.fieldnames): - logger.critical(f"The sample sheet **must** contain the column headers: {', '.join(required_columns)}.") + req_cols = ", ".join(required_columns) + logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") sys.exit(1) # Validate each row. checker = RowChecker() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..0d62beb --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. +# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. +[tool.black] +line-length = 120 +target_version = ["py37", "py38", "py39", "py310"] + +[tool.isort] +profile = "black" +known_first_party = ["nf_core"] +multi_line_output = 3 From bc5d3666d1ed22de8b5336a4a7d039f016e6d8a9 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Tue, 4 Oct 2022 22:02:29 +0000 Subject: [PATCH 03/56] Template update for nf-core/tools version 2.6 --- .github/workflows/awsfulltest.yml | 4 ++ .github/workflows/awstest.yml | 4 ++ .prettierignore | 1 + CITATION.cff | 8 +-- assets/adaptivecard.json | 67 +++++++++++++++++++ assets/methods_description_template.yml | 25 +++++++ assets/multiqc_config.yml | 6 +- docs/usage.md | 8 +++ lib/NfcoreTemplate.groovy | 55 +++++++++++++++ lib/Utils.groovy | 21 ++++-- lib/WorkflowRnadnavar.groovy | 19 ++++++ main.nf | 3 +- modules.json | 27 ++++---- .../custom/dumpsoftwareversions/main.nf | 8 +-- .../custom/dumpsoftwareversions/meta.yml | 0 .../templates/dumpsoftwareversions.py | 0 modules/nf-core/{modules => }/fastqc/main.nf | 12 ++++ modules/nf-core/{modules => }/fastqc/meta.yml | 0 modules/nf-core/modules/multiqc/main.nf | 31 --------- modules/nf-core/multiqc/main.nf | 53 +++++++++++++++ .../nf-core/{modules => }/multiqc/meta.yml | 15 +++++ nextflow.config | 5 +- nextflow_schema.json | 18 +++++ workflows/rnadnavar.nf | 26 ++++--- 24 files changed, 345 insertions(+), 71 deletions(-) create mode 100644 assets/adaptivecard.json create mode 100644 assets/methods_description_template.yml mode change 100755 => 100644 lib/Utils.groovy rename modules/nf-core/{modules => }/custom/dumpsoftwareversions/main.nf (79%) rename modules/nf-core/{modules => }/custom/dumpsoftwareversions/meta.yml (100%) rename modules/nf-core/{modules => }/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py (100%) rename modules/nf-core/{modules => }/fastqc/main.nf (85%) rename modules/nf-core/{modules => }/fastqc/meta.yml (100%) delete mode 100644 modules/nf-core/modules/multiqc/main.nf create mode 100644 modules/nf-core/multiqc/main.nf rename modules/nf-core/{modules => }/multiqc/meta.yml (73%) diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 40381b6..dbe2c65 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -28,3 +28,7 @@ jobs: "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnadnavar/results-${{ github.sha }}" } profiles: test_full,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: tower_action_*.log diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index e7fffec..e0b93e2 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -23,3 +23,7 @@ jobs: "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnadnavar/results-test-${{ github.sha }}" } profiles: test,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: tower_action_*.log diff --git a/.prettierignore b/.prettierignore index d0e7ae5..eb74a57 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,4 +1,5 @@ email_template.html +adaptivecard.json .nextflow* work/ data/ diff --git a/CITATION.cff b/CITATION.cff index 4533e2f..017666c 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -13,8 +13,8 @@ authors: given-names: Johannes - family-names: Wilm given-names: Andreas - - family-names: Ulysse Garcia - given-names: Maxime + - family-names: Garcia + given-names: Maxime Ulysse - family-names: Di Tommaso given-names: Paolo - family-names: Nahnsen @@ -39,8 +39,8 @@ prefered-citation: given-names: Johannes - family-names: Wilm given-names: Andreas - - family-names: Ulysse Garcia - given-names: Maxime + - family-names: Garcia + given-names: Maxime Ulysse - family-names: Di Tommaso given-names: Paolo - family-names: Nahnsen diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100644 index 0000000..f95e198 --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "nf-core/rnadnavar v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml new file mode 100644 index 0000000..a1032c9 --- /dev/null +++ b/assets/methods_description_template.yml @@ -0,0 +1,25 @@ +id: "nf-core-rnadnavar-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "nf-core/rnadnavar Methods Description" +section_href: "https://github.com/nf-core/rnadnavar" +plot_type: "html" +## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline +## You inject any metadata in the Nextflow '${workflow}' object +data: | +

Methods

+

Data was processed using nf-core/rnadnavar v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

References

+
    +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
  • +
+
+
Notes:
+
    + ${nodoi_text} +
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • +
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • +
+
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 399d270..79fc3e1 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -3,9 +3,11 @@ report_comment: > analysis pipeline. For information about how to interpret these results, please see the documentation. report_section_order: - software_versions: + "nf-core-rnadnavar-methods-description": order: -1000 - "nf-core-rnadnavar-summary": + software_versions: order: -1001 + "nf-core-rnadnavar-summary": + order: -1002 export_plots: true diff --git a/docs/usage.md b/docs/usage.md index 219b1d9..edbf390 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -237,6 +237,14 @@ See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). +## Azure Resource Requests + +To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. +We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. + +Note that the choice of VM size depends on your quota and the overall workload during the analysis. +For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). + ## Running in the background Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 2fc0a9b..27feb00 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -145,6 +145,61 @@ class NfcoreTemplate { output_tf.withWriter { w -> w << email_txt } } + // + // Construct and send adaptive card + // https://adaptivecards.io + // + public static void adaptivecard(workflow, params, summary_params, projectDir, log) { + def hook_url = params.hook_url + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = workflow.manifest.version + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + def hf = new File("$projectDir/assets/adaptivecard.json") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } + } + // // Print pipeline summary on completion // diff --git a/lib/Utils.groovy b/lib/Utils.groovy old mode 100755 new mode 100644 index 28567bd..8d030f4 --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -21,19 +21,26 @@ class Utils { } // Check that all channels are present - def required_channels = ['conda-forge', 'bioconda', 'defaults'] - def conda_check_failed = !required_channels.every { ch -> ch in channels } + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean // Check that they are in the right order - conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) - conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } - if (conda_check_failed) { + if (channels_missing | channel_priority_violation) { log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " There is a problem with your Conda configuration!\n\n" + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + - " NB: The order of the channels matters!\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" } } diff --git a/lib/WorkflowRnadnavar.groovy b/lib/WorkflowRnadnavar.groovy index 324eb1f..0a66f15 100755 --- a/lib/WorkflowRnadnavar.groovy +++ b/lib/WorkflowRnadnavar.groovy @@ -2,6 +2,8 @@ // This file holds several functions specific to the workflow/rnadnavar.nf in the nf-core/rnadnavar pipeline // +import groovy.text.SimpleTemplateEngine + class WorkflowRnadnavar { // @@ -42,6 +44,23 @@ class WorkflowRnadnavar { yaml_file_text += "data: |\n" yaml_file_text += "${summary_section}" return yaml_file_text + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = run_workflow.toMap() + meta["manifest_map"] = run_workflow.manifest.toMap() + + meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" + meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + def methods_text = mqc_methods_yaml.text + + def engine = new SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html }// // Exit pipeline if incorrect --genome key provided // diff --git a/main.nf b/main.nf index 4cea479..5f5c90c 100644 --- a/main.nf +++ b/main.nf @@ -4,7 +4,8 @@ nf-core/rnadnavar ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Github : https://github.com/nf-core/rnadnavar -Website: https://nf-co.re/rnadnavar + + Website: https://nf-co.re/rnadnavar Slack : https://nfcore.slack.com/channels/rnadnavar ---------------------------------------------------------------------------------------- */ diff --git a/modules.json b/modules.json index 8b5feee..206ad68 100644 --- a/modules.json +++ b/modules.json @@ -2,20 +2,21 @@ "name": "nf-core/rnadnavar", "homePage": "https://github.com/nf-core/rnadnavar", "repos": { - "nf-core/modules": { - "git_url": "https://github.com/nf-core/modules.git", + "https://github.com/nf-core/modules.git": { "modules": { - "custom/dumpsoftwareversions": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", - "branch": "master" - }, - "fastqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", - "branch": "master" - }, - "multiqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", - "branch": "master" + "nf-core": { + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "fastqc": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "multiqc": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + } } } } diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf similarity index 79% rename from modules/nf-core/modules/custom/dumpsoftwareversions/main.nf rename to modules/nf-core/custom/dumpsoftwareversions/main.nf index 327d510..cebb6e0 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -1,11 +1,11 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_low' + label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? "bioconda::multiqc=1.11" : null) + conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml similarity index 100% rename from modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml rename to modules/nf-core/custom/dumpsoftwareversions/meta.yml diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py similarity index 100% rename from modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py rename to modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/fastqc/main.nf similarity index 85% rename from modules/nf-core/modules/fastqc/main.nf rename to modules/nf-core/fastqc/main.nf index ed6b8c5..0573036 100644 --- a/modules/nf-core/modules/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -44,4 +44,16 @@ process FASTQC { END_VERSIONS """ } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml similarity index 100% rename from modules/nf-core/modules/fastqc/meta.yml rename to modules/nf-core/fastqc/meta.yml diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/modules/multiqc/main.nf deleted file mode 100644 index 1264aac..0000000 --- a/modules/nf-core/modules/multiqc/main.nf +++ /dev/null @@ -1,31 +0,0 @@ -process MULTIQC { - label 'process_medium' - - conda (params.enable_conda ? 'bioconda::multiqc=1.12' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" - - input: - path multiqc_files - - output: - path "*multiqc_report.html", emit: report - path "*_data" , emit: data - path "*_plots" , optional:true, emit: plots - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - multiqc -f $args . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf new file mode 100644 index 0000000..a8159a5 --- /dev/null +++ b/modules/nf-core/multiqc/main.nf @@ -0,0 +1,53 @@ +process MULTIQC { + label 'process_single' + + conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + + input: + path multiqc_files, stageAs: "?/*" + path(multiqc_config) + path(extra_multiqc_config) + path(multiqc_logo) + + output: + path "*multiqc_report.html", emit: report + path "*_data" , emit: data + path "*_plots" , optional:true, emit: plots + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def config = multiqc_config ? "--config $multiqc_config" : '' + def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + """ + multiqc \\ + --force \\ + $args \\ + $config \\ + $extra_config \\ + . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ + + stub: + """ + touch multiqc_data + touch multiqc_plots + touch multiqc_report.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml similarity index 73% rename from modules/nf-core/modules/multiqc/meta.yml rename to modules/nf-core/multiqc/meta.yml index 6fa891e..ebc29b2 100644 --- a/modules/nf-core/modules/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -12,11 +12,25 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] + input: - multiqc_files: type: file description: | List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. + pattern: "*.{yml,yaml}" + - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + output: - report: type: file @@ -38,3 +52,4 @@ authors: - "@abhi18av" - "@bunop" - "@drpatelh" + - "@jfy133" diff --git a/nextflow.config b/nextflow.config index 051f286..452a963 100644 --- a/nextflow.config +++ b/nextflow.config @@ -21,7 +21,9 @@ params { // MultiQC options multiqc_config = null multiqc_title = null + multiqc_logo = null max_multiqc_email_size = '25.MB' + multiqc_methods_description = null // Boilerplate options outdir = null @@ -31,6 +33,7 @@ params { email_on_fail = null plaintext_email = false monochrome_logs = false + hook_url = null help = false validate_params = true show_hidden_params = false @@ -74,7 +77,6 @@ try { // } - profiles { debug { process.beforeScript = 'echo $HOSTNAME' } conda { @@ -189,6 +191,7 @@ manifest { mainScript = 'main.nf' nextflowVersion = '!>=21.10.3' version = '1.0dev' + doi = '' } // Load modules.config for DSL2 module specific options diff --git a/nextflow_schema.json b/nextflow_schema.json index c5e08ff..dee5fcb 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -213,12 +213,30 @@ "fa_icon": "fas fa-palette", "hidden": true }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, only MS Teams is supported.", + "hidden": true + }, "multiqc_config": { "type": "string", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true }, + "multiqc_logo": { + "type": "string", + "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", + "fa_icon": "fas fa-image", + "hidden": true + }, + "multiqc_methods_description": { + "type": "string", + "description": "Custom MultiQC yaml file containing HTML including a methods description.", + "fa_icon": "fas fa-cog" + }, "tracedir": { "type": "string", "description": "Directory to keep pipeline Nextflow logs and reports.", diff --git a/workflows/rnadnavar.nf b/workflows/rnadnavar.nf index 582784b..43f631d 100644 --- a/workflows/rnadnavar.nf +++ b/workflows/rnadnavar.nf @@ -23,8 +23,10 @@ if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input sample ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -ch_multiqc_config = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() +ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() +ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -46,9 +48,9 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' // // MODULE: Installed directly from nf-core/modules // -include { FASTQC } from '../modules/nf-core/modules/fastqc/main' -include { MULTIQC } from '../modules/nf-core/modules/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -89,15 +91,20 @@ workflow RNADNAVAR { workflow_summary = WorkflowRnadnavar.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) + methods_description = WorkflowRnadnavar.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) + ch_methods_description = Channel.value(methods_description) + ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config)) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) MULTIQC ( - ch_multiqc_files.collect() + ch_multiqc_files.collect(), + ch_multiqc_config.collect().ifEmpty([]), + ch_multiqc_custom_config.collect().ifEmpty([]), + ch_multiqc_logo.collect().ifEmpty([]) ) multiqc_report = MULTIQC.out.report.toList() ch_versions = ch_versions.mix(MULTIQC.out.versions) @@ -114,6 +121,9 @@ workflow.onComplete { NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) } NfcoreTemplate.summary(workflow, params, log) + if (params.hook_url) { + NfcoreTemplate.adaptivecard(workflow, params, summary_params, projectDir, log) + } } /* From defce219a347efca30eb3abf111a57190024beec Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Thu, 8 Dec 2022 13:13:40 +0000 Subject: [PATCH 04/56] Template update for nf-core/tools version 2.7.1 --- .devcontainer/devcontainer.json | 27 +++++++++++++ .gitattributes | 1 + .github/CONTRIBUTING.md | 16 ++++++++ .github/ISSUE_TEMPLATE/bug_report.yml | 2 +- .github/workflows/ci.yml | 8 +++- .github/workflows/fix-linting.yml | 6 +-- .github/workflows/linting.yml | 18 +++++---- .github/workflows/linting_comment.yml | 2 +- .prettierignore | 2 + CITATION.cff | 56 --------------------------- README.md | 4 +- assets/slackreport.json | 34 ++++++++++++++++ docs/usage.md | 24 +++++++----- lib/NfcoreSchema.groovy | 1 - lib/NfcoreTemplate.groovy | 41 +++++++++++++++----- lib/WorkflowMain.groovy | 18 ++++++--- modules.json | 9 +++-- modules/local/samplesheet_check.nf | 4 ++ nextflow.config | 12 ++++-- nextflow_schema.json | 8 +++- workflows/rnadnavar.nf | 11 +++--- 21 files changed, 193 insertions(+), 111 deletions(-) create mode 100644 .devcontainer/devcontainer.json delete mode 100644 CITATION.cff create mode 100644 assets/slackreport.json diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..ea27a58 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,27 @@ +{ + "name": "nfcore", + "image": "nfcore/gitpod:latest", + "remoteUser": "gitpod", + + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "python.defaultInterpreterPath": "/opt/conda/bin/python", + "python.linting.enabled": true, + "python.linting.pylintEnabled": true, + "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", + "python.formatting.yapfPath": "/opt/conda/bin/yapf", + "python.linting.flake8Path": "/opt/conda/bin/flake8", + "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", + "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", + "python.linting.pylintPath": "/opt/conda/bin/pylint" + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } +} diff --git a/.gitattributes b/.gitattributes index 050bb12..7a2dabc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,4 @@ *.config linguist-language=nextflow +*.nf.test linguist-language=nextflow modules/nf-core/** linguist-generated subworkflows/nf-core/** linguist-generated diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 19446fd..4b8b024 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -101,3 +101,19 @@ If you are using a new feature from core Nextflow, you may bump the minimum requ ### Images and figures For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). + +## GitHub Codespaces + +This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. + +To get started: + +- Open the repo in [Codespaces](https://github.com/nf-core/rnadnavar/codespaces) +- Tools installed + - nf-core + - Nextflow + +Devcontainer specs: + +- [DevContainer config](.devcontainer/devcontainer.json) +- [Dockerfile](.devcontainer/Dockerfile) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index b5af267..289d8c8 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,7 +42,7 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 21.10.3)_ + * Nextflow version _(eg. 22.10.1)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5e8ce54..354e966 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,6 +11,10 @@ on: env: NXF_ANSI_LOG: false +concurrency: + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true + jobs: test: name: Run pipeline with test data @@ -20,11 +24,11 @@ jobs: strategy: matrix: NXF_VER: - - "21.10.3" + - "22.10.1" - "latest-everything" steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index 2a12fe4..cac0662 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -24,7 +24,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v3 - name: Install Prettier run: npm install -g prettier @prettier/plugin-php @@ -34,9 +34,9 @@ jobs: id: prettier_status run: | if prettier --check ${GITHUB_WORKSPACE}; then - echo "::set-output name=result::pass" + echo "name=result::pass" >> $GITHUB_OUTPUT else - echo "::set-output name=result::fail" + echo "name=result::fail" >> $GITHUB_OUTPUT fi - name: Run 'prettier --write' diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 8a5ce69..858d622 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -4,6 +4,8 @@ name: nf-core linting # that the code meets the nf-core guidelines. on: push: + branches: + - dev pull_request: release: types: [published] @@ -12,9 +14,9 @@ jobs: EditorConfig: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v3 - name: Install editorconfig-checker run: npm install -g editorconfig-checker @@ -25,9 +27,9 @@ jobs: Prettier: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v3 - name: Install Prettier run: npm install -g prettier @@ -38,7 +40,7 @@ jobs: PythonBlack: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Check code lints with Black uses: psf/black@stable @@ -69,12 +71,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 - - uses: actions/setup-python@v3 + - uses: actions/setup-python@v4 with: python-version: "3.7" architecture: "x64" @@ -97,7 +99,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 04758f6..3963518 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -18,7 +18,7 @@ jobs: - name: Get PR number id: pr_number - run: echo "::set-output name=pr_number::$(cat linting-logs/PR_number.txt)" + run: echo "name=pr_number::$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment uses: marocchino/sticky-pull-request-comment@v2 diff --git a/.prettierignore b/.prettierignore index eb74a57..437d763 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,5 +1,6 @@ email_template.html adaptivecard.json +slackreport.json .nextflow* work/ data/ @@ -8,3 +9,4 @@ results/ testing/ testing* *.pyc +bin/ diff --git a/CITATION.cff b/CITATION.cff deleted file mode 100644 index 017666c..0000000 --- a/CITATION.cff +++ /dev/null @@ -1,56 +0,0 @@ -cff-version: 1.2.0 -message: "If you use `nf-core tools` in your work, please cite the `nf-core` publication" -authors: - - family-names: Ewels - given-names: Philip - - family-names: Peltzer - given-names: Alexander - - family-names: Fillinger - given-names: Sven - - family-names: Patel - given-names: Harshil - - family-names: Alneberg - given-names: Johannes - - family-names: Wilm - given-names: Andreas - - family-names: Garcia - given-names: Maxime Ulysse - - family-names: Di Tommaso - given-names: Paolo - - family-names: Nahnsen - given-names: Sven -title: "The nf-core framework for community-curated bioinformatics pipelines." -version: 2.4.1 -doi: 10.1038/s41587-020-0439-x -date-released: 2022-05-16 -url: https://github.com/nf-core/tools -prefered-citation: - type: article - authors: - - family-names: Ewels - given-names: Philip - - family-names: Peltzer - given-names: Alexander - - family-names: Fillinger - given-names: Sven - - family-names: Patel - given-names: Harshil - - family-names: Alneberg - given-names: Johannes - - family-names: Wilm - given-names: Andreas - - family-names: Garcia - given-names: Maxime Ulysse - - family-names: Di Tommaso - given-names: Paolo - - family-names: Nahnsen - given-names: Sven - doi: 10.1038/s41587-020-0439-x - journal: nature biotechnology - start: 276 - end: 278 - title: "The nf-core framework for community-curated bioinformatics pipelines." - issue: 3 - volume: 38 - year: 2020 - url: https://dx.doi.org/10.1038/s41587-020-0439-x diff --git a/README.md b/README.md index 80cebb0..66c2fdc 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/rnadnavar/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) @@ -31,7 +31,7 @@ On release, automated continuous integration tests run the pipeline on a full-si ## Quick Start -1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=21.10.3`) +1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=22.10.1`) 2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_. diff --git a/assets/slackreport.json b/assets/slackreport.json new file mode 100644 index 0000000..043d02f --- /dev/null +++ b/assets/slackreport.json @@ -0,0 +1,34 @@ +{ + "attachments": [ + { + "fallback": "Plain-text summary of the attachment.", + "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "author_name": "sanger-tol/readmapping v${version} - ${runName}", + "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", + "fields": [ + { + "title": "Command used to launch the workflow", + "value": "```${commandLine}```", + "short": false + } + <% + if (!success) { %> + , + { + "title": "Full error message", + "value": "```${errorReport}```", + "short": false + }, + { + "title": "Pipeline configuration", + "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", + "short": false + } + <% } + %> + ], + "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" + } + ] +} diff --git a/docs/usage.md b/docs/usage.md index edbf390..2e9c566 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -83,9 +83,9 @@ nextflow pull nf-core/rnadnavar It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -First, go to the [nf-core/rnadnavar releases page](https://github.com/nf-core/rnadnavar/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. +First, go to the [nf-core/rnadnavar releases page](https://github.com/nf-core/rnadnavar/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. -This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. ## Core Nextflow arguments @@ -95,7 +95,7 @@ This version number will be logged in reports when you run the pipeline, so that Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. When using Biocontainers, most of these software packaging methods pull Docker containers from quay.io e.g [FastQC](https://quay.io/repository/biocontainers/fastqc) except for Singularity which directly downloads Singularity images via https hosted by the [Galaxy project](https://depot.galaxyproject.org/singularity/) and Conda which downloads and installs software locally from [Bioconda](https://bioconda.github.io/). +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. > We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. @@ -104,8 +104,11 @@ The pipeline also dynamically loads configurations from [https://github.com/nf-c Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! They are loaded in sequence, so later profiles can overwrite earlier profiles. -If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters - `docker` - A generic configuration profile to be used with [Docker](https://docker.com/) - `singularity` @@ -118,9 +121,6 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) - `conda` - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. -- `test` - - A profile with a complete configuration for automated testing - - Includes links to test data so needs no other parameters ### `-resume` @@ -169,8 +169,14 @@ Work dir: Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` ``` +#### For beginners + +A first step to bypass this error, you could try to increase the amount of CPUs, memory, and time for the whole pipeline. Therefor you can try to increase the resource for the parameters `--max_cpus`, `--max_memory`, and `--max_time`. Based on the error above, you have to increase the amount of memory. Therefore you can go to the [parameter documentation of rnaseq](https://nf-co.re/rnaseq/3.9/parameters) and scroll down to the `show hidden parameter` button to get the default value for `--max_memory`. In this case 128GB, you than can try to run your pipeline again with `--max_memory 200GB -resume` to skip all process, that were already calculated. If you can not increase the resource of the complete pipeline, you can try to adapt the resource for a single process as mentioned below. + +#### Advanced option on process level + To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN). -We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/software/star/align/main.nf`. +We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/star/align/main.nf`. If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9). The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB. @@ -189,7 +195,7 @@ process { > > If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. -### Updating containers +### Updating containers (advanced users) The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`. diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy index b3d092f..33cd4f6 100755 --- a/lib/NfcoreSchema.groovy +++ b/lib/NfcoreSchema.groovy @@ -46,7 +46,6 @@ class NfcoreSchema { 'quiet', 'syslog', 'v', - 'version', // Options for `nextflow run` command 'ansi', diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 27feb00..25a0a74 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -32,6 +32,25 @@ class NfcoreTemplate { } } + // + // Generate version string + // + public static String version(workflow) { + String version_string = "" + + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string + } + // // Construct and send completion email // @@ -61,7 +80,7 @@ class NfcoreTemplate { misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp def email_fields = [:] - email_fields['version'] = workflow.manifest.version + email_fields['version'] = NfcoreTemplate.version(workflow) email_fields['runName'] = workflow.runName email_fields['success'] = workflow.success email_fields['dateComplete'] = workflow.complete @@ -146,10 +165,10 @@ class NfcoreTemplate { } // - // Construct and send adaptive card - // https://adaptivecards.io + // Construct and send a notification to a web server as JSON + // e.g. Microsoft Teams and Slack // - public static void adaptivecard(workflow, params, summary_params, projectDir, log) { + public static void IM_notification(workflow, params, summary_params, projectDir, log) { def hook_url = params.hook_url def summary = [:] @@ -170,7 +189,7 @@ class NfcoreTemplate { misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp def msg_fields = [:] - msg_fields['version'] = workflow.manifest.version + msg_fields['version'] = NfcoreTemplate.version(workflow) msg_fields['runName'] = workflow.runName msg_fields['success'] = workflow.success msg_fields['dateComplete'] = workflow.complete @@ -178,13 +197,16 @@ class NfcoreTemplate { msg_fields['exitStatus'] = workflow.exitStatus msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") msg_fields['projectDir'] = workflow.projectDir msg_fields['summary'] = summary << misc_fields // Render the JSON template def engine = new groovy.text.GStringTemplateEngine() - def hf = new File("$projectDir/assets/adaptivecard.json") + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("$projectDir/assets/${json_path}") def json_template = engine.createTemplate(hf).make(msg_fields) def json_message = json_template.toString() @@ -209,7 +231,7 @@ class NfcoreTemplate { if (workflow.stats.ignoredCount == 0) { log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" } } else { log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" @@ -297,6 +319,7 @@ class NfcoreTemplate { // public static String logo(workflow, monochrome_logs) { Map colors = logColours(monochrome_logs) + String workflow_version = NfcoreTemplate.version(workflow) String.format( """\n ${dashedLine(monochrome_logs)} @@ -305,7 +328,7 @@ class NfcoreTemplate { ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} ${dashedLine(monochrome_logs)} """.stripIndent() ) diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 0ee1e14..b3dbcb9 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -19,7 +19,7 @@ class WorkflowMain { } // - // Print help to screen if required + // Generate help string // public static String help(workflow, params, log) { def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" @@ -32,7 +32,7 @@ class WorkflowMain { } // - // Print parameter summary log to screen + // Generate parameter summary log string // public static String paramsSummaryLog(workflow, params, log) { def summary_log = '' @@ -53,15 +53,21 @@ class WorkflowMain { System.exit(0) } - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) + // Print workflow version and exit on --version + if (params.version) { + String workflow_version = NfcoreTemplate.version(workflow) + log.info "${workflow.manifest.name} ${workflow_version}" + System.exit(0) } // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params, log) + // Validate workflow parameters via the JSON schema + if (params.validate_params) { + NfcoreSchema.validateParameters(workflow, params, log) + } + // Check that a -profile or Nextflow config has been provided to run the pipeline NfcoreTemplate.checkConfigProvided(workflow, log) diff --git a/modules.json b/modules.json index 206ad68..d7a351b 100644 --- a/modules.json +++ b/modules.json @@ -7,15 +7,18 @@ "nf-core": { "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "installed_by": ["modules"] } } } diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index af32d3c..85b6baa 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -1,5 +1,6 @@ process SAMPLESHEET_CHECK { tag "$samplesheet" + label 'process_single' conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -13,6 +14,9 @@ process SAMPLESHEET_CHECK { path '*.csv' , emit: csv path "versions.yml", emit: versions + when: + task.ext.when == null || task.ext.when + script: // This script is bundled with the pipeline, in nf-core/rnadnavar/bin/ """ check_samplesheet.py \\ diff --git a/nextflow.config b/nextflow.config index 452a963..dbac55c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -35,6 +35,7 @@ params { monochrome_logs = false hook_url = null help = false + version = false validate_params = true show_hidden_params = false schema_ignore_params = 'genomes' @@ -81,6 +82,7 @@ profiles { debug { process.beforeScript = 'echo $HOSTNAME' } conda { params.enable_conda = true + conda.enabled = true docker.enabled = false singularity.enabled = false podman.enabled = false @@ -89,6 +91,7 @@ profiles { } mamba { params.enable_conda = true + conda.enabled = true conda.useMamba = true docker.enabled = false singularity.enabled = false @@ -104,6 +107,9 @@ profiles { shifter.enabled = false charliecloud.enabled = false } + arm { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + } singularity { singularity.enabled = true singularity.autoMounts = true @@ -185,11 +191,11 @@ dag { manifest { name = 'nf-core/rnadnavar' - author = 'Raquel Manzano-Garcia, Praveen Raj, Maxime U Garcia' + author = """Raquel Manzano-Garcia, Praveen Raj, Maxime U Garcia""" homePage = 'https://github.com/nf-core/rnadnavar' - description = 'Pipeline for RNA and DNA integrated analysis for somatic mutation detection' + description = """Pipeline for RNA and DNA integrated analysis for somatic mutation detection""" mainScript = 'main.nf' - nextflowVersion = '!>=21.10.3' + nextflowVersion = '!>=22.10.1' version = '1.0dev' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index dee5fcb..abc8bce 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -176,6 +176,12 @@ "fa_icon": "fas fa-question-circle", "hidden": true }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, "publish_dir_mode": { "type": "string", "default": "copy", @@ -217,7 +223,7 @@ "type": "string", "description": "Incoming hook URL for messaging service", "fa_icon": "fas fa-people-group", - "help_text": "Incoming hook URL for messaging service. Currently, only MS Teams is supported.", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", "hidden": true }, "multiqc_config": { diff --git a/workflows/rnadnavar.nf b/workflows/rnadnavar.nf index 43f631d..b41cbaa 100644 --- a/workflows/rnadnavar.nf +++ b/workflows/rnadnavar.nf @@ -82,7 +82,7 @@ workflow RNADNAVAR { ch_versions = ch_versions.mix(FASTQC.out.versions.first()) CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') + ch_versions.unique{ it.text }.collectFile(name: 'collated_versions.yml') ) // @@ -102,12 +102,11 @@ workflow RNADNAVAR { MULTIQC ( ch_multiqc_files.collect(), - ch_multiqc_config.collect().ifEmpty([]), - ch_multiqc_custom_config.collect().ifEmpty([]), - ch_multiqc_logo.collect().ifEmpty([]) + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() ) multiqc_report = MULTIQC.out.report.toList() - ch_versions = ch_versions.mix(MULTIQC.out.versions) } /* @@ -122,7 +121,7 @@ workflow.onComplete { } NfcoreTemplate.summary(workflow, params, log) if (params.hook_url) { - NfcoreTemplate.adaptivecard(workflow, params, summary_params, projectDir, log) + NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) } } From ba5ec3f3640731aba15eff359e38775c4fe94e5c Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Mon, 19 Dec 2022 12:07:47 +0000 Subject: [PATCH 05/56] Template update for nf-core/tools version 2.7.2 --- .github/workflows/fix-linting.yml | 4 +- .github/workflows/linting_comment.yml | 2 +- lib/WorkflowMain.groovy | 2 +- modules.json | 6 +- modules/local/samplesheet_check.nf | 2 +- .../custom/dumpsoftwareversions/main.nf | 2 +- .../templates/dumpsoftwareversions.py | 99 ++++++++++--------- modules/nf-core/fastqc/main.nf | 40 +++----- modules/nf-core/multiqc/main.nf | 2 +- nextflow.config | 3 - nextflow_schema.json | 6 -- workflows/rnadnavar.nf | 2 +- 12 files changed, 82 insertions(+), 88 deletions(-) mode change 100644 => 100755 modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index cac0662..16c90f3 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -34,9 +34,9 @@ jobs: id: prettier_status run: | if prettier --check ${GITHUB_WORKSPACE}; then - echo "name=result::pass" >> $GITHUB_OUTPUT + echo "result=pass" >> $GITHUB_OUTPUT else - echo "name=result::fail" >> $GITHUB_OUTPUT + echo "result=fail" >> $GITHUB_OUTPUT fi - name: Run 'prettier --write' diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 3963518..0bbcd30 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -18,7 +18,7 @@ jobs: - name: Get PR number id: pr_number - run: echo "name=pr_number::$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment uses: marocchino/sticky-pull-request-comment@v2 diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index b3dbcb9..fe5c2a2 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -72,7 +72,7 @@ class WorkflowMain { NfcoreTemplate.checkConfigProvided(workflow, log) // Check that conda channels are set-up correctly - if (params.enable_conda) { + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { Utils.checkCondaChannels(log) } diff --git a/modules.json b/modules.json index d7a351b..7e93635 100644 --- a/modules.json +++ b/modules.json @@ -7,17 +7,17 @@ "nf-core": { "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", "installed_by": ["modules"] } } diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index 85b6baa..bcb3b85 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -2,7 +2,7 @@ process SAMPLESHEET_CHECK { tag "$samplesheet" label 'process_single' - conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + conda "conda-forge::python=3.8.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.8.3' : 'quay.io/biocontainers/python:3.8.3' }" diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index cebb6e0..3df2176 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,7 +2,7 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) + conda "bioconda::multiqc=1.13" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py old mode 100644 new mode 100755 index 787bdb7..e55b8d4 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -1,5 +1,9 @@ #!/usr/bin/env python + +"""Provide functions to merge multiple versions.yml files.""" + + import platform from textwrap import dedent @@ -7,6 +11,7 @@ def _make_versions_html(versions): + """Generate a tabular HTML output of all versions for MultiQC.""" html = [ dedent( """\\ @@ -45,47 +50,53 @@ def _make_versions_html(versions): return "\\n".join(html) -versions_this_module = {} -versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, -} - -with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - -# aggregate versions by the module name (derived from fully-qualified process name) -versions_by_module = {} -for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - if versions_by_module[module] != process_versions: - raise AssertionError( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - -versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", -} - -versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), -} - -with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) -with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - -with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) +def main(): + """Load all version files and generate merged output.""" + versions_this_module = {} + versions_this_module["${task.process}"] = { + "python": platform.python_version(), + "yaml": yaml.__version__, + } + + with open("$versions") as f: + versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module + + # aggregate versions by the module name (derived from fully-qualified process name) + versions_by_module = {} + for process, process_versions in versions_by_process.items(): + module = process.split(":")[-1] + try: + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) + except KeyError: + versions_by_module[module] = process_versions + + versions_by_module["Workflow"] = { + "Nextflow": "$workflow.nextflow.version", + "$workflow.manifest.name": "$workflow.manifest.version", + } + + versions_mqc = { + "id": "software_versions", + "section_name": "${workflow.manifest.name} Software Versions", + "section_href": "https://github.com/${workflow.manifest.name}", + "plot_type": "html", + "description": "are collected at run time from the software output.", + "data": _make_versions_html(versions_by_module), + } + + with open("software_versions.yml", "w") as f: + yaml.dump(versions_by_module, f, default_flow_style=False) + with open("software_versions_mqc.yml", "w") as f: + yaml.dump(versions_mqc, f, default_flow_style=False) + + with open("versions.yml", "w") as f: + yaml.dump(versions_this_module, f, default_flow_style=False) + + +if __name__ == "__main__": + main() diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 0573036..9ae5838 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,7 +2,7 @@ process FASTQC { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) + conda "bioconda::fastqc=0.11.9" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : 'quay.io/biocontainers/fastqc:0.11.9--0' }" @@ -20,30 +20,22 @@ process FASTQC { script: def args = task.ext.args ?: '' - // Add soft-links to original FastQs for consistent naming in pipeline def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - """ - [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz - fastqc $args --threads $task.cpus ${prefix}.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } else { - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz - fastqc $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } + // Make list of old name and new name pairs to use for renaming in the bash while loop + def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } + def rename_to = old_new_pairs*.join(' ').join(' ') + def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + """ + printf "%s %s\\n" $rename_to | while read old_name new_name; do + [ -f "\${new_name}" ] || ln -s \$old_name \$new_name + done + fastqc $args --threads $task.cpus $renamed_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ stub: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index a8159a5..68f66be 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,7 +1,7 @@ process MULTIQC { label 'process_single' - conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) + conda "bioconda::multiqc=1.13" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" diff --git a/nextflow.config b/nextflow.config index dbac55c..42970a6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -39,7 +39,6 @@ params { validate_params = true show_hidden_params = false schema_ignore_params = 'genomes' - enable_conda = false // Config options @@ -81,7 +80,6 @@ try { profiles { debug { process.beforeScript = 'echo $HOSTNAME' } conda { - params.enable_conda = true conda.enabled = true docker.enabled = false singularity.enabled = false @@ -90,7 +88,6 @@ profiles { charliecloud.enabled = false } mamba { - params.enable_conda = true conda.enabled = true conda.useMamba = true docker.enabled = false diff --git a/nextflow_schema.json b/nextflow_schema.json index abc8bce..2ed8507 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -263,12 +263,6 @@ "description": "Show all params when using `--help`", "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." - }, - "enable_conda": { - "type": "boolean", - "description": "Run this workflow with Conda. You can also use '-profile conda' instead of providing this parameter.", - "hidden": true, - "fa_icon": "fas fa-bacon" } } } diff --git a/workflows/rnadnavar.nf b/workflows/rnadnavar.nf index b41cbaa..d43b66b 100644 --- a/workflows/rnadnavar.nf +++ b/workflows/rnadnavar.nf @@ -82,7 +82,7 @@ workflow RNADNAVAR { ch_versions = ch_versions.mix(FASTQC.out.versions.first()) CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique{ it.text }.collectFile(name: 'collated_versions.yml') + ch_versions.unique().collectFile(name: 'collated_versions.yml') ) // From f6c964be4e86a2cbe07444b734974f1b81f15cbf Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Fri, 28 Apr 2023 14:25:51 +0000 Subject: [PATCH 06/56] Template update for nf-core/tools version 2.8 --- .editorconfig | 2 +- .github/ISSUE_TEMPLATE/bug_report.yml | 2 +- .github/PULL_REQUEST_TEMPLATE.md | 3 +- .github/workflows/awsfulltest.yml | 2 +- .github/workflows/awstest.yml | 2 +- .github/workflows/branch.yml | 2 +- .github/workflows/clean-up.yml | 24 ++++ .github/workflows/linting.yml | 2 +- .pre-commit-config.yaml | 5 + README.md | 74 ++++++---- bin/check_samplesheet.py | 3 - conf/base.config | 2 +- conf/igenomes.config | 8 ++ conf/test_full.config | 2 + docs/usage.md | 130 +++++------------- lib/NfcoreSchema.groovy | 4 +- lib/WorkflowMain.groovy | 13 +- lib/WorkflowRnadnavar.groovy | 12 +- main.nf | 1 - modules.json | 4 +- modules/local/samplesheet_check.nf | 2 +- .../custom/dumpsoftwareversions/main.nf | 6 +- .../custom/dumpsoftwareversions/meta.yml | 2 + modules/nf-core/multiqc/main.nf | 6 +- modules/nf-core/multiqc/meta.yml | 3 +- nextflow.config | 29 +++- tower.yml | 5 + 27 files changed, 193 insertions(+), 157 deletions(-) create mode 100644 .github/workflows/clean-up.yml create mode 100644 .pre-commit-config.yaml create mode 100644 tower.yml diff --git a/.editorconfig b/.editorconfig index b78de6e..b6b3190 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,7 +8,7 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{md,yml,yaml,html,css,scss,js,cff}] +[*.{md,yml,yaml,html,css,scss,js}] indent_size = 2 # These files are edited and tested upstream in nf-core/modules diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 289d8c8..b271a62 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -45,6 +45,6 @@ body: * Nextflow version _(eg. 22.10.1)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ - * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ * OS _(eg. CentOS Linux, macOS, Linux Mint)_ * Version of nf-core/rnadnavar _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 419b08a..189cebf 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -15,7 +15,8 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/rnad - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! -- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/rnadnavar/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/rnadnavar _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/rnadnavar/tree/master/.github/CONTRIBUTING.md) +- [ ] If necessary, also make a PR on the nf-core/rnadnavar _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index dbe2c65..11e9fb8 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + uses: seqeralabs/action-tower-launch@v1 # TODO nf-core: You can customise AWS full pipeline tests as required # Add full size test data (but still relatively small datasets for few samples) # on the `test_full.config` test runs with only one set of parameters diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index e0b93e2..cdeb29a 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -12,7 +12,7 @@ jobs: steps: # Launch workflow using Tower CLI tool action - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + uses: seqeralabs/action-tower-launch@v1 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index ea3c12b..647a5d2 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -13,7 +13,7 @@ jobs: - name: Check PRs if: github.repository == 'nf-core/rnadnavar' run: | - { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/rnadnavar ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/rnadnavar ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] # If the above check failed, post a comment on the PR explaining the failure # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100644 index 0000000..694e90e --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,24 @@ +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@v7 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 858d622..888cb4b 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -78,7 +78,7 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: "3.7" + python-version: "3.8" architecture: "x64" - name: Install dependencies diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..0c31cdb --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,5 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v2.7.1" + hooks: + - id: prettier diff --git a/README.md b/README.md index 66c2fdc..9674d40 100644 --- a/README.md +++ b/README.md @@ -8,57 +8,71 @@ [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/rnadnavar) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23rnadnavar-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/rnadnavar)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23rnadnavar-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/rnadnavar)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction - +**nf-core/rnadnavar** is a bioinformatics pipeline that ... -**nf-core/rnadnavar** is a bioinformatics best-practice analysis pipeline for Pipeline for RNA and DNA integrated analysis for somatic mutation detection. - -The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! - - - -On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/rnadnavar/results). - -## Pipeline summary + + 1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) 2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) -## Quick Start +## Usage + +> **Note** +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how +> to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) +> with `-profile test` before running the workflow on actual data. + + - Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. +Now, you can run the pipeline using: - > - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. - > - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. - > - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. - > - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. + -4. Start running your own analysis! +```bash +nextflow run nf-core/rnadnavar \ + -profile \ + --input samplesheet.csv \ + --outdir +``` - +> **Warning:** +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those +> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). - ```bash - nextflow run nf-core/rnadnavar --input samplesheet.csv --outdir --genome GRCh37 -profile - ``` +For more details, please refer to the [usage documentation](https://nf-co.re/rnadnavar/usage) and the [parameter documentation](https://nf-co.re/rnadnavar/parameters). -## Documentation +## Pipeline output -The nf-core/rnadnavar pipeline comes with documentation about the pipeline [usage](https://nf-co.re/rnadnavar/usage), [parameters](https://nf-co.re/rnadnavar/parameters) and [output](https://nf-co.re/rnadnavar/output). +To see the the results of a test run with a full size dataset refer to the [results](https://nf-co.re/rnadnavar/results) tab on the nf-core website pipeline page. +For more details about the output files and reports, please refer to the +[output documentation](https://nf-co.re/rnadnavar/output). ## Credits diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 11b1557..4a758fe 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -158,9 +158,6 @@ def sniff_format(handle): peek = read_head(handle) handle.seek(0) sniffer = csv.Sniffer() - if not sniffer.has_header(peek): - logger.critical("The given sample sheet does not appear to contain a header.") - sys.exit(1) dialect = sniffer.sniff(peek) return dialect diff --git a/conf/base.config b/conf/base.config index ede794b..483d09b 100644 --- a/conf/base.config +++ b/conf/base.config @@ -15,7 +15,7 @@ process { memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' diff --git a/conf/igenomes.config b/conf/igenomes.config index 7a1b3ac..3f11437 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -36,6 +36,14 @@ params { macs_gsize = "2.7e9" blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" } + 'CHM13' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" + gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" + mito_name = "chrM" + } 'GRCm38' { fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" diff --git a/conf/test_full.config b/conf/test_full.config index 580b89c..f897552 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -10,6 +10,8 @@ ---------------------------------------------------------------------------------------- */ +cleanup = true + params { config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' diff --git a/docs/usage.md b/docs/usage.md index 2e9c566..f5c572d 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -71,6 +71,29 @@ work # Directory containing the nextflow working files # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. + +Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. + +> ⚠️ Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +> The above pipeline run specified with a params file in yaml format: + +```bash +nextflow run nf-core/rnadnavar -profile docker -params-file params.yaml +``` + +with `params.yaml` containing: + +```yaml +input: './samplesheet.csv' +outdir: './results/' +genome: 'GRCh37' +input: 'data' +<...> +``` + +You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: @@ -87,6 +110,10 @@ First, go to the [nf-core/rnadnavar releases page](https://github.com/nf-core/rn This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. +To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. + +> 💡 If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. + ## Core Nextflow arguments > **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). @@ -95,7 +122,7 @@ This version number will be logged in reports when you run the pipeline, so that Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. > We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. @@ -119,8 +146,10 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) - `charliecloud` - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) - `conda` - - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. ### `-resume` @@ -138,102 +167,19 @@ Specify the path to a specific config file (this is a core Nextflow command). Se Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. -For example, if the nf-core/rnaseq pipeline is failing after multiple re-submissions of the `STAR_ALIGN` process due to an exit code of `137` this would indicate that there is an out of memory issue: - -```console -[62/149eb0] NOTE: Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -- Execution is retried (1) -Error executing process > 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)' - -Caused by: - Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) - -Command executed: - STAR \ - --genomeDir star \ - --readFilesIn WT_REP1_trimmed.fq.gz \ - --runThreadN 2 \ - --outFileNamePrefix WT_REP1. \ - - -Command exit status: - 137 - -Command output: - (empty) - -Command error: - .command.sh: line 9: 30 Killed STAR --genomeDir star --readFilesIn WT_REP1_trimmed.fq.gz --runThreadN 2 --outFileNamePrefix WT_REP1. -Work dir: - /home/pipelinetest/work/9d/172ca5881234073e8d76f2a19c88fb - -Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` -``` - -#### For beginners - -A first step to bypass this error, you could try to increase the amount of CPUs, memory, and time for the whole pipeline. Therefor you can try to increase the resource for the parameters `--max_cpus`, `--max_memory`, and `--max_time`. Based on the error above, you have to increase the amount of memory. Therefore you can go to the [parameter documentation of rnaseq](https://nf-co.re/rnaseq/3.9/parameters) and scroll down to the `show hidden parameter` button to get the default value for `--max_memory`. In this case 128GB, you than can try to run your pipeline again with `--max_memory 200GB -resume` to skip all process, that were already calculated. If you can not increase the resource of the complete pipeline, you can try to adapt the resource for a single process as mentioned below. - -#### Advanced option on process level - -To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN). -We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/star/align/main.nf`. -If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9). -The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. -The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB. -Providing you haven't set any other standard nf-core parameters to **cap** the [maximum resources](https://nf-co.re/usage/configuration#max-resources) used by the pipeline then we can try and bypass the `STAR_ALIGN` process failure by creating a custom config file that sets at least 72GB of memory, in this case increased to 100GB. -The custom config below can then be provided to the pipeline via the [`-c`](#-c) parameter as highlighted in previous sections. - -```nextflow -process { - withName: 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN' { - memory = 100.GB - } -} -``` - -> **NB:** We specify the full process name i.e. `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN` in the config file because this takes priority over the short name (`STAR_ALIGN`) and allows existing configuration using the full process name to be correctly overridden. -> -> If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. - -### Updating containers (advanced users) - -The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`. - -1. Check the default version used by the pipeline in the module file for [Pangolin](https://github.com/nf-core/viralrecon/blob/a85d5969f9025409e3618d6c280ef15ce417df65/modules/nf-core/software/pangolin/main.nf#L14-L19) -2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags) -3. Create the custom config accordingly: - - - For Docker: +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. - ```nextflow - process { - withName: PANGOLIN { - container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +### Custom Containers - - For Singularity: +In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. - ```nextflow - process { - withName: PANGOLIN { - container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. - - For Conda: +### Custom Tool Arguments - ```nextflow - process { - withName: PANGOLIN { - conda = 'bioconda::pangolin=3.0.5' - } - } - ``` +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. -> **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. ### nf-core/configs diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy index 33cd4f6..9b34804 100755 --- a/lib/NfcoreSchema.groovy +++ b/lib/NfcoreSchema.groovy @@ -2,6 +2,7 @@ // This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. // +import nextflow.Nextflow import org.everit.json.schema.Schema import org.everit.json.schema.loader.SchemaLoader import org.everit.json.schema.ValidationException @@ -83,6 +84,7 @@ class NfcoreSchema { 'stub-run', 'test', 'w', + 'with-apptainer', 'with-charliecloud', 'with-conda', 'with-dag', @@ -177,7 +179,7 @@ class NfcoreSchema { } if (has_error) { - System.exit(1) + Nextflow.error('Exiting!') } } diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index fe5c2a2..f819664 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -2,6 +2,8 @@ // This file holds several functions specific to the main.nf workflow in the nf-core/rnadnavar pipeline // +import nextflow.Nextflow + class WorkflowMain { // @@ -21,7 +23,7 @@ class WorkflowMain { // // Generate help string // - public static String help(workflow, params, log) { + public static String help(workflow, params) { def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" def help_string = '' help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) @@ -34,7 +36,7 @@ class WorkflowMain { // // Generate parameter summary log string // - public static String paramsSummaryLog(workflow, params, log) { + public static String paramsSummaryLog(workflow, params) { def summary_log = '' summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) @@ -49,7 +51,7 @@ class WorkflowMain { public static void initialise(workflow, params, log) { // Print help to screen if required if (params.help) { - log.info help(workflow, params, log) + log.info help(workflow, params) System.exit(0) } @@ -61,7 +63,7 @@ class WorkflowMain { } // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params, log) + log.info paramsSummaryLog(workflow, params) // Validate workflow parameters via the JSON schema if (params.validate_params) { @@ -81,8 +83,7 @@ class WorkflowMain { // Check input has been provided if (!params.input) { - log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'" - System.exit(1) + Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") } } // diff --git a/lib/WorkflowRnadnavar.groovy b/lib/WorkflowRnadnavar.groovy index 0a66f15..78c5233 100755 --- a/lib/WorkflowRnadnavar.groovy +++ b/lib/WorkflowRnadnavar.groovy @@ -2,6 +2,7 @@ // This file holds several functions specific to the workflow/rnadnavar.nf in the nf-core/rnadnavar pipeline // +import nextflow.Nextflow import groovy.text.SimpleTemplateEngine class WorkflowRnadnavar { @@ -14,8 +15,7 @@ class WorkflowRnadnavar { if (!params.fasta) { - log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." - System.exit(1) + Nextflow.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." } } @@ -61,17 +61,19 @@ class WorkflowRnadnavar { def description_html = engine.createTemplate(methods_text).make(meta) return description_html - }// + } + + // // Exit pipeline if incorrect --genome key provided // private static void genomeExistsError(params, log) { if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + " Currently, the available genome keys are:\n" + " ${params.genomes.keySet().join(", ")}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - System.exit(1) + Nextflow.error(error_string) } } } diff --git a/main.nf b/main.nf index 5f5c90c..fb6e1e4 100644 --- a/main.nf +++ b/main.nf @@ -4,7 +4,6 @@ nf-core/rnadnavar ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Github : https://github.com/nf-core/rnadnavar - Website: https://nf-co.re/rnadnavar Slack : https://nfcore.slack.com/channels/rnadnavar ---------------------------------------------------------------------------------------- diff --git a/modules.json b/modules.json index 7e93635..7435cc8 100644 --- a/modules.json +++ b/modules.json @@ -7,7 +7,7 @@ "nf-core": { "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "76cc4938c1f6ea5c7d83fed1eeffc146787f9543", "installed_by": ["modules"] }, "fastqc": { @@ -17,7 +17,7 @@ }, "multiqc": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "f2d63bd5b68925f98f572eed70993d205cc694b7", "installed_by": ["modules"] } } diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index bcb3b85..402b5c6 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -5,7 +5,7 @@ process SAMPLESHEET_CHECK { conda "conda-forge::python=3.8.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'quay.io/biocontainers/python:3.8.3' }" + 'biocontainers/python:3.8.3' }" input: path samplesheet diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index 3df2176..800a609 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.13" + conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index 60b546a..c32657d 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,7 +1,9 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json name: custom_dumpsoftwareversions description: Custom module used to dump software versions within the nf-core pipeline template keywords: - custom + - dump - version tools: - custom: diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 68f66be..4b60474 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.13" + conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index ebc29b2..f93b5ee 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json name: MultiQC description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: @@ -37,7 +38,7 @@ output: description: MultiQC report file pattern: "multiqc_report.html" - data: - type: dir + type: directory description: MultiQC data dir pattern: "multiqc_data" - plots: diff --git a/nextflow.config b/nextflow.config index 42970a6..5ea0131 100644 --- a/nextflow.config +++ b/nextflow.config @@ -78,7 +78,11 @@ try { profiles { - debug { process.beforeScript = 'echo $HOSTNAME' } + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + } conda { conda.enabled = true docker.enabled = false @@ -86,6 +90,7 @@ profiles { podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } mamba { conda.enabled = true @@ -95,14 +100,18 @@ profiles { podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } docker { docker.enabled = true + docker.registry = 'quay.io' docker.userEmulation = true + conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } arm { docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' @@ -110,31 +119,49 @@ profiles { singularity { singularity.enabled = true singularity.autoMounts = true + conda.enabled = false docker.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } podman { podman.enabled = true + podman.registry = 'quay.io' + conda.enabled = false docker.enabled = false singularity.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } shifter { shifter.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false podman.enabled = false charliecloud.enabled = false + apptainer.enabled = false } charliecloud { charliecloud.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false } gitpod { executor.name = 'local' diff --git a/tower.yml b/tower.yml new file mode 100644 index 0000000..787aedf --- /dev/null +++ b/tower.yml @@ -0,0 +1,5 @@ +reports: + multiqc_report.html: + display: "MultiQC HTML report" + samplesheet.csv: + display: "Auto-created samplesheet with collated metadata and FASTQ paths" From 5cd65a2da455f22025ba236de5e54b791e460420 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Fri, 30 Jun 2023 16:15:40 +0000 Subject: [PATCH 07/56] Template update for nf-core/tools version 2.9 --- .github/CONTRIBUTING.md | 1 - .github/ISSUE_TEMPLATE/bug_report.yml | 2 +- .github/workflows/awsfulltest.yml | 11 +- .github/workflows/awstest.yml | 10 +- .github/workflows/ci.yml | 2 +- .gitpod.yml | 5 + CITATIONS.md | 6 + README.md | 6 +- assets/methods_description_template.yml | 12 +- assets/multiqc_config.yml | 4 +- assets/nf-core-rnadnavar_logo_light.png | Bin 11474 -> 70448 bytes assets/slackreport.json | 2 +- conf/test_full.config | 2 - docs/usage.md | 6 +- lib/NfcoreSchema.groovy | 530 ------------------------ lib/NfcoreTemplate.groovy | 2 +- lib/WorkflowMain.groovy | 37 -- lib/WorkflowRnadnavar.groovy | 45 +- main.nf | 16 + nextflow.config | 52 ++- nextflow_schema.json | 36 +- workflows/rnadnavar.nf | 25 +- 22 files changed, 175 insertions(+), 637 deletions(-) delete mode 100755 lib/NfcoreSchema.groovy diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 4b8b024..a7899f7 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -116,4 +116,3 @@ To get started: Devcontainer specs: - [DevContainer config](.devcontainer/devcontainer.json) -- [Dockerfile](.devcontainer/Dockerfile) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index b271a62..05fc28a 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,7 +42,7 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 22.10.1)_ + * Nextflow version _(eg. 23.04.0)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 11e9fb8..ff2f598 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v1 + uses: seqeralabs/action-tower-launch@v2 # TODO nf-core: You can customise AWS full pipeline tests as required # Add full size test data (but still relatively small datasets for few samples) # on the `test_full.config` test runs with only one set of parameters @@ -22,13 +22,18 @@ jobs: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnadnavar/work-${{ github.sha }} parameters: | { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnadnavar/results-${{ github.sha }}" } - profiles: test_full,aws_tower + profiles: test_full + - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index cdeb29a..51f83f8 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -12,18 +12,22 @@ jobs: steps: # Launch workflow using Tower CLI tool action - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v1 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnadnavar/work-${{ github.sha }} parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnadnavar/results-test-${{ github.sha }}" } - profiles: test,aws_tower + profiles: test + - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 354e966..39b4108 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: strategy: matrix: NXF_VER: - - "22.10.1" + - "23.04.0" - "latest-everything" steps: - name: Check out pipeline code diff --git a/.gitpod.yml b/.gitpod.yml index 85d95ec..25488dc 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,4 +1,9 @@ image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update vscode: extensions: # based on nf-core.nf-core-extensionpack diff --git a/CITATIONS.md b/CITATIONS.md index 725cb57..cfc58cc 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -12,7 +12,10 @@ - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. Available online https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. + - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. ## Software packaging/containerisation tools @@ -31,5 +34,8 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/README.md b/README.md index 9674d40..41d5bd8 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/rnadnavar/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) @@ -66,11 +66,11 @@ nextflow run nf-core/rnadnavar \ > provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; > see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -For more details, please refer to the [usage documentation](https://nf-co.re/rnadnavar/usage) and the [parameter documentation](https://nf-co.re/rnadnavar/parameters). +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/rnadnavar/usage) and the [parameter documentation](https://nf-co.re/rnadnavar/parameters). ## Pipeline output -To see the the results of a test run with a full size dataset refer to the [results](https://nf-co.re/rnadnavar/results) tab on the nf-core website pipeline page. +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/rnadnavar/results) tab on the nf-core website pipeline page. For more details about the output files and reports, please refer to the [output documentation](https://nf-co.re/rnadnavar/output). diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index a1032c9..954e820 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,17 +3,21 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "nf-core/rnadnavar Methods Description" section_href: "https://github.com/nf-core/rnadnavar" plot_type: "html" -## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline +## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline ## You inject any metadata in the Nextflow '${workflow}' object data: |

    Methods

    -

    Data was processed using nf-core/rnadnavar v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

    +

    Data was processed using nf-core/rnadnavar v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

    The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

    ${workflow.commandLine}
    +

    ${tool_citations}

    References

      -
    • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
    • -
    • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
    • +
    • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
    • +
    • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
    • +
    • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
    • +
    • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
    • + ${tool_bibliography}
    Notes:
    diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 79fc3e1..9d498ef 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/rnadnavar + This report has been generated by the nf-core/rnadnavar analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-rnadnavar-methods-description": order: -1000 diff --git a/assets/nf-core-rnadnavar_logo_light.png b/assets/nf-core-rnadnavar_logo_light.png index b5c61660dc02a2c4fb1d2aae7604fb48d7096f20..89daddbf9ada26ed8404849bbf27e0e61043ef60 100644 GIT binary patch literal 70448 zcmeEt`9IX_`~RRQS?ZKSWhn*~p=96c5GGq9OZHNfecuPCQz(&w!1jG0qM))u18{N;szxKLnntC7*Z0~7*=;B1!jv^4p5Gb_^hQ29NgTYTSd@O|5 zS3HI44fR<@BwC_WweNAg^K`t?ay|Ua^`zuS;o*5X;p5j0nLR_3TdTw-*C$<<{Vk$; z9`%au>-b1%=CCl=x~!Jp!Br{RFpzjKp!3X+Tb;*QRKss@Kb){h^c+@seV?p-3zMBT zv9)Zlu({<`v3Pc z_~QTk@G~L)&kz6ShyTBGp!b^mFYH1%8g&}PE+NMRdy{Rgwkaa9QvrRQY2HJz)6`6H z9;J$!8p?T$p0J;N*Ye!J#ykH8M)iUCxVX5E!@pK|Rzc1t45Gxe-2E^GvsRWhY(8G+ zqQw!LH!;zIl^)J$8$X^IcCItbD!;xEnF(K*M&+X@JSfW~(%%?AjAD}I{FvT)!b;+< zT`3RVvHyDV#tr{F?pFSzX|tN{P8k1QHN6RI-9sVD@-lUEm%l0Eg`Uqb{CpIznVgoC zqUmmd=@Irb{U+;BnnF@S4JpEd=f8=bxA|}L4A?vsm9JMY?xEj%PSrz{(B9T6zCrD{ z5aNCa{cB^cli-wq*o{Dpv7Lu_ua|VKlQa68K&C3~Q72#9XybNMzba}b4=Acza~8q2n+%iDoFDn0jDk39X?^7A)!^mJ;E z5ekGVYdquWg)k>J@LX5^<&$Ub>jptvS20#izP!}h(}bdq;~{4o<`Z~-?Z6?eBvmOx zsE#!^me;!Al9p_BB9-oh+Bc@3zYqDCn3hx{MhJ+VI+>dJOaT*E;koA-_dUK}Uzf&# zH;{fF7_10)<{MQM8t=)+Bc#9Hzz?%a`@_R0){SISt$Kn@K8L}>h6mZ|Sq!BZKB@H20kftU}^PiE` z)c*Xdd@3S@t0+sw_uO~aLtzgUG2d;xQ1Q*1H#0qHdV%)wP1#8svyWz%C}A74L_x?B3pf9H&Y@2X=|G$}7iYO?E5Lr+QZ zunjfr@njOx!!AI9VRd9th^kl#?3g$t5Dxfn?H4g>K($Nt+fHaOY#hv@QlJIXl)td!4Cw33#odkl6Y zV>S|OhL=y33;S(CMLA9S@}2)++OhBFrXf0zRg_T_+T~HTPwd7xJV6cPBJX{fB~&hK zs$Fc?B(tfBkrDJu$X3Q1{1zTNRk(@T;z!+JtsYJ#VQFEI95Bp+1d)p+`Gk3TG-5Wg zkhB!>_0%li8!7wS)(5l@KDF!}dm%NoRf{a39g|I_D;7#><0*1`M%3kp01AB_Dq!Zg z8ht}kcgMfVhs)|`f(tl+ixNr3KYnoDKRVH}!H24qCWtT&%xd}zW+opB3MoDNJ0-8f zNvx7d#yy3T+j3B!o%L;!;b>EGDQXB~+h}0EX^k<%)ZBpGVwTz%Bc=Z{6LNVVmQ)Zs z#qHX&f?Rw4S8Pz4H6Vlw2CL`ph1rxV>T3%^&1h1dBkPo8>RjJw|7HE<#P4E!4_OE` zO$@0HI!7pPZx!b@3)8f7f(6Vl`(n8hAxh@*>=H@8QQ)g9oK9SqBFr%3t$}fQ3U0|& zMTUI5{BLzyt1e{`H?CqHGJTzP#T38;zV<;^=nNbG6N-_k!KrUQDx)Z|AC(bG|5a8Z zB*H@M#uON%NKm+sWqkHO`)aB@we3grs9;DMV?Q{%PqLj~`hASTUIF*q`ZO5WR)wVFI`G?Zxevi{$Td5LndKR;aC(U=|9wR~L8w;+zr-%IHsbY> zUgGTk{6DWrVb zYX7qj`>+ae$t5+}$|T_!B3=Erhn`P}k1ai*^PzUqmU{4eDXuat%oMLHRxej$e~5m@ z@ADVp?D3O)y6!#xyXd$s{yrf~zYM$Yrd~^{xM%^*VgG&MleV6Y&|SUNwG!INi~rl; z<-XXdqpn!99)UghSN}nCVm|NOx&~&TmiGceJ?{6R>laTmSZ>pxJbelcMsk4R0F=Ar(?q*%!}BhZw%+9K`8y{Yh!MT%%c;Bib&k(wxLRjmW=N{ro zoje;XgQ^~##P@&C)S#ViS*=Lu%Jg6vf7wA7B1zehn!53h9Ut=hiFVdZ2A1)BWO+Or zT}sR*gJqqhOx-8b1SCR0`&Ue?BhO8gDxoY*R=fY z+Cyn|_k)xr7Y`wB{C-T)JdQ-^IL_#4Kt|xti;{O2Uif`>)vlM+z~WAes&vp2#~e;> zaP#^zhn)Ghwj{nES?XIu)mFnEPiGi7&MHYgMRFdBqLYyRcM0|3NrSwRzt{zDC$Q16 z*lJ*$9KIG@s!K*lv(_p8gm-n5bjuuJKPNIbLluNw9-=Anc+g>>{ftA1)Liqyomg7G z0lZGlRAqUVOzOE5hF~nSdqkDH#ahTn%b<|fSG~?U$lf?xD}R^!j=>M6H8HyWF6y2} zPGPZ%iKNdTp7uW4JWgAQE8vm;X_WJc)Enn#$({*pabQ-s4krlc*`UTUP?m@IrR(4uk6XT&bDN%A5aA~}3fQZ}+Rd6c3 z*IAG-N{$P(j4Q>Srfr2tpV8=0h{!#~3-AoOv!u9tWom_0YBxR+7|^?x3!H1(U)HeMcJvM;GiZDK%TC8~?<`}ApK9*l&Oz?(AV;afU?!7R7^1E3 zn(zjAZ>L6+)k_BZ;z(Js8zvb4U#rVK@}KTN_B?4j^DOxi6XO26e;wx5>Meq@OeH16 zPKhP&D9lsS_dDnqJvA_TPayL?T-&Eo4MaN$Vsh~LOFAw$sP98vj^)e3erB(Ix)0Ed zcRcmT-^mAK97kIoOzJos^3BBIn=oowuyWRsVNp-Q8QI%4?47^vYmBj55kB(7-5G-Jw=*jed)*MV}zlKa?!7quxNI9Dqv5~0*qxF{ z-|ays&_rj1kTx$F^uK@^zBGGr$N8@D5U_4!fjHEh%d}?#HzMqS1VBYf&^KYut?s3z z#x(Dl-G0}fkFA#VYCT#)Cajcq(Xx9}P9Gs}$ynv!cB`zU=s>7GEmrr*<+Gsc;!_6q z1=Fl1&esa#1l?YLx5t#zFs9X%$7g7LW1T&4gw?plYc~G0M)WlGL4fi~%|d=l{ONR0 z(ExtJ#m(uPIko8AUgyCi5<6xC?H?P${GQ>p{S!2bzAysv+#gde=;uWi-SN!d&Z0cl z=Vxa<6L=w~xspnfYZmT}S`g$EU~=c)X2)i+nZgjfLi{{7BR9A9V@M?IiAzae66wR{ zbVBUFuw%J$iY49n2)JM4(tQT$^3x(BBAJp1iSJ3%-4{`4VM1nRNn{A0Wy;eaWAc95 zmX5rTQxA~AmcS{swE)2-o_n~AHzPLsJI(%{&@RtXp}uWD?G!-#W|yZ}HlXQ(*l93tqTy}~zd~*$CAgPi|Hx9G?WY5}M z02i&|#Gzt|tMhtL2iunNy9`lKjcFtdl5U(c0=}qQSucG4Onn{mfpPuC~ zUODq^;@FC~c)^rubE~#vvhN#etKRV16JtlmZIYdM@X)Bpn0CtGAJ@B}v82Whya624 zAWNK=gJR5mxMhoFA9d`R9<}|+y@96bmehO5?J{6J#mA%^uw=C3g0&=Yhgqk{lD6Pl zA2MNCrS_F=zGQJRW^*O@TbhT;+S9Ov8I?CaYg*B%^XJm?+K0UD#yYZ6KNnk=2?@=p zc=mdfEVeY#XB$fMFMFYgxxJ-=GENxkH(mxUP$i=}qjnpYz~jsE$`XWx{Ko z{su~~zYEKQH!jQXa{LphLJz|!xE7Bz&XW0HhkW@%MrHfMT?G}tx!TNXzI;CFJ5KS| z+d?rqica4@b;u}fj(?1w;vxQs=2i$^nPv}O^2q1a?fY1*LTE(|m4YKGJh`lI0QgB5 zLd7Q`gSl>EmtO3M%k!8F{Q_tbt)Q?GgUEKEQ{K}&yDmX?P&-6cwO7Pf5_I02N$U;D z^>}L)h~66K!L}xBeQR1XE4$^_To%#xacxYw<_$IFVFHr~HRaRStq6wUxxh^9K{nwv zGSbBg62eHHrLdO9f=R$peChd;#blkTAnf=uz@z{+E z09mH;dkVd2@B;WHFHWdCk-9TsY`B4HF0mG@Y0w_n%lfxep=Py_`>pF8HAic zI5>Dzt5K|fzC3L9WK7<5F*_$RAK>TKRTAWIyYol#>f`FxkO*AF7vCO4Eh?p$q_x59cLmsMlbT+}V zaI|PtAk*V&lNx5bTV?I&R}u~D-glvDnrJQ!d9;*d={1AV_H|(ab9o^1DGx zEg*8wH=cWZ&jMWl(Bb3=VVJ2CsbSv&R{t)jDfS@mUP+~{)vZwNT@_+ChG}txxpgN5 zoEUkoKQHx6+acPT(tX;P1!#WopOG#Ay=mGdgRh0xa7Yzn`F)du8^WH4JELXyeXy9XZNETOysflQOlCGBF*;iJnGrL6%1H`;Ol5>#tPMvU^qdFg6f+ zJ15{3Uw%mDwl9BEHY@WzC}z+7&<^JkfyR=ThRTwkPyL*}H=xoj`;$p= zzvcr(!zV$+TpgsJOE5~&Iu_a!B5G-Szdsm3JB-9Fv?8G!dg;0Im|<{;?oNIT>Mw_u zc)4N9LGY&l#N!Pr@+CYtT`7<%?rS-11^B9A3X|D zz`k>awRwQ!@Zpjy&@Rq`BKE}8fF_hR1+je_VFF#Pw4WYkP`_+9>`NqEb*gHg1zKK# z9$UEbB;f-%d{2K8i4zlOMLs6c2Alex9lj=y7xD?ln8j|GV)T%Ht{_O8$oT_~^dpxb zh6WP}2HLBBFTy$k4vuWXZp^LOJN}+>so%B{$y?m^&t!i3t`;ZptDkukl%4!I;I-4amD{4_C|db zZO)L6QpS)3z?ueRT_Op~KDooYukNekjPxi;Afr7!vZ@W`8FH7KQEehTFy}6Xhdg}Bj%BxLhz^5<=~ zrJ&XZ1!n?b)vw=MrncjT`pUz!c7_Mm_2vn-!H_(%@uWNm`l$j4BYD3>1G>f&!KDEh zuXthGF+96Nj(Oc46AUNoKh0wc3yq*^&k*k3OQ%^>h~DYB_{L#K11?8(IF=tl4VlX` zMOG$&kXWFZlMd!&o2S^Ck@w$&+a4-RQxde8 zhGZVKLiQTS?|R%5$A%c8!MMTUp3#~rR4ufb%a_T=gv~&9CX$k42Q1}xh5@QxJ5-Se zO<11i9!(6?i7+79&@ktMc#3qHQhSn3jY# zn()HALZ!onAgu|0NiBT3VTe(OOFYa_MqYyO+Igr4F>MH!VT0Sdb_l2_5AA)BkRplz zY67NS#Pi%uH)8<~6fiX}J=utEmR9nJ$b(Slx}(J%bj-eu-&-8ZJ$G2ML6xQA zAn$*S1b*Nrux5H7vK9w{fGcQ-XFC?hb{WqE`jYR|FDtK<7QdrH5269ZQVSZR5JsC% zYD*y4oDl33NA7(pbp}7Lf=ANz3oMdIKMMhB_~RphsVuLXpoz@ncSX`BrMlA2&3=Le zr=R#GVf5O_Xw@XE`ka;gE+ojMDkPy4EYh2}2^PujSTtg^Dwjxl`x8^S*#Bo-a)~MA z>X3;%V(y9P{#itTa%OHjdaY7hm6%u0FA6rueZa!(z z55fR4_!W(|Y)7QOjkW(ASX(RZ05^mIM!wMa#KRYB6NL2nLt0$|L~%@$H13UkWcF=r z`R6Sb*U{lvTj&`WWK&2m$Hbo+Hj_uVHq@qrle~7EG{CIF^po4H9ib5MAw#`nF)#2a zskzw?mkZ`ZT3m&w({4j*Y3f&}v`ym3{rX>ST8FkF4wX+EYy#6Da?BGl^l2ksF*uF_ zSf~FIiseqVB)Xk7I-U)Z3xPLz)#r(2_XdOp+Q|V>M&R-JqC5!o-U^;CyNQJ96Fkol z0ui+IH8F;9L=Cclw!91!P9v0{6Ux$3o=Kw61;|qUDTx1^F2F78u$?LlqwQc#!YOyj z3wao0qG>yrwC#IMe%(Q5{p2e7gCJtkB>*DP;%-TMG&e^bSEfYxsr6E4u8>&@`vA)k zxdcFVEn&Lu2qsQM&ZGW+Xv1=NzHkVxy8(U~=QJ_fFaS@1l%flfx{Z7aNx5?ikptdu z{Iz(pIxZe5Lz~Z)10m7UbOc0FEs_(8Gq;xm5{Y)7VO{DbvU5p+_xE>uE!9gj!Iaau z%TFIXWBQcl8QS$m&d-|+{G1^WoC~bS1nb3WC$J$>;x_+XN(!O`AFjVa!rEXG5`K;b zLkucjdLoFq=2sw)uk#>uh1rhcpfy5-0i{s0rF|25=m!O-h2=Vit8$brH`j`EeQw`? zL6`I+b)0m}!FGYHzOt7qDQX zIS6n~695KoovaVSl!6c;GgU4mm$Y?s0f=D8&_)T~62QOo>)(U|a=<8| zmh<}3Vo5buv9oOvSK7;t4{f@qTbfzW%O{eaBbhLPRl$D5)gGw(des^iu6^*W01VD= zV`SCyCXV!F^g(CP^s5eD;YpQ(DVV+nE2t1WsC?LjMo#~>30v%zN7F=bEEDaTetXht zD1o#E_J1y^GsUSdbxb#c*pR9T1iLgE)cIhl2K;)5od|btFs`W=y+@_Ni2Go$G z@Q{h=CgX5+t#?(wO8mjy&(d?s1W;^(en=qu=JwRZH31Ya4A+#T-}62FOj(4Ize6K}@W6YZr^?Dem#2jOqCXeRmww! zGoXHbb(q>X%pi-d^xzQ?UExb;e0Y9E7+$IvUKF2wG*%JQ^{QuCsPZgsEN-9sivbU` z^o-vqspl3owq}(i0*$Rkr}*|_c^%3<0OR+;sp0(+>IjV)o+Gz$AOr8Yi18q}9&GBb zhCVk~4W$D)%R_z?rKpk>Y~a!^-}tp}xLZErW@WFlQsU52v7F)kHR6QLkLPa`e7PWu zP*($;n`-Gse6jdZF{fFHdOy&oao;`%FPORU1nYRZVCpQF<}Y*}i+P1BV@o7}St8x_r>2-9wNP;M8 zcD9UX^E6p$%+jaBD+&%Za`9O#c7)A0(g;|qKb}NcWL6&jTBlfN|LX0O_N>=8LS}~s zEG>-LxD6U{;Q6zLS7gq*oU)Xj)4UHIuOt8#v3%G9OgVIN1CN5DR`a*hn4WcMhgXDB zET3mhL~RFhA}g0OW>3rX=Z(1R8A>B*u+jHze?P<-rw@NK&kIl&y4o0 z%LA25?zFbbb0q!k(@9RF=!8@GnzM3FN?D7!<#~RA`YxsQ0HN@LgA74Kd!kPf;JS7( z{bOMTc9-*QcbLo2OA#@Kh`ezN@SyqA0S*o(*?$tUfu^W(7FFBZ2>=wKiV0x*H62-`5Fclu*L zA~Ipi-Mq2=6WV6m{YiUEZ;SypCJhiu0!L}LK>g?tkyI=$n*VCQQ_2pQKnKvZ`dcf( zW!^7Wh9_W1bPC5%$)`mLLn%YIqI6mGFsa$VK&*8n>!rELxi1ZUF(i)7X}Hj`zyj*c{HII61u=Y<{rl8{jrhqkAEU5q=%DQdXOIh0xDvYHV8Foh+13dBI$3Yd4~3b%RKPN&QF6obt$IcIBy*HauFFq|vp$<%f`KJ5a8XFyi<8}qXRuV}*ahZQ{g zB#I4Eenr^N1*2yg6?F<4vjkE^Y?n-RvKCWFXJJauev8uSfw0=yUMsh4+Z)tnp0TtN zhyM5PYvE0}LBHz<(y1Rt%#K}6GXFh~JA5SnU z(4kC|If7CaB`fZtoKX}kjSw>H4J{xGWQ8v&vsvc129b3({jj$U9dAK)8^_krX6J!# zIxW_rTP7Mp)wT=zd62oUF0=NxDXnf+`wUUv71&SpDi__ySdKB&|8%(&Ba<$!0N(do?Y0_U~$B}&=QlWP~%Hr~FH$qctY?fm)58_koMPp*h( zJn3j+J$KN@k#?RE6iF6U1l#d{Cx%pb1cTHP~un?rQDjRQ5zSi@)HkbH|YsJFE} z%IdEucy<51w_zb#xgMV1E)d6-W~&UlNK=dTyp9)j12D5bqpWdPHZl%RmduPR=4A;e0bB0cAG9A(?*V0)a!t%S*Pumi8vLLfTp)urZ-phYc`kn znQgB;!M50G<(_T&5zyFZTCoXVP2ukAo;;Y=wPf?8DSysHM5M?H_ zM?Wme+|<<6)Qt}@hB3?{hFEjUbOat=K2*|1U#4c`%Hy{-#+zE$7d#W!Jx0&BJ4!lA zfa!-QG4}*ZK9e$>O|?5TBlv}c?B5%;0m^F+?`B+!rxzE*;;)*`YcRhV4_Pc=nV4M|q$8`7S9o({=o;ipR}!KWvPa>3ogeEH1k6m9Ibd z*&c6fMz6k4v9uNlNMFG7E4_Rd&GH2dKT9!=t9!6PxVA|wDCi6ghLEN0zV&88OHD1q zXW-+DVY*u(O|nr_*!s|ws&Z<�ev`Q}H7y#R1zKkC5n?0_OP7^FqWWeXhX0t0pNK z(bt$TL*ehNPtM(;VA@5R9zN!e8~K<~cX3NnUF1p*`5e(DU1F8lRX-)8KbL`E|L`3V zNx2$Zf1S7Do%}yd%DH81m#>ET4sG1bNkca-B!p$@$27Ju`3?2uL@BKov2V<7mu!_y zZ{zyp_2QITSG-eP=P-{N#gu#(3@bdT4+KZJNda3|h8Nf=HS=!63yn&_8xd=3Jkhf$ z!}BGTsS9Rf-o-Z?Q?|cG3CC|q^rGJn>M0i8LCYqr+E3?cMnhr-$;c_-;y3nImk_jg z*SB>)9>F^Z*<}?lDtFvDC)3w(;J|^ymifdvBjSktDB*-0?<&&u_8~@@7`@G>U0<++ z9+SbA7tkuQpQRryewLjRBRYX|j#Qk}?Z|6*YO7K~og$D#s)y)BWmu8L?D||OjOHli z(rd40>4_~TSlT+@@R3Vwl4m533X}aO_w!RFZu2~QpnL7?*4I%LpD*2+wLVo|@%I8{ zzZ*2>_N_CqtE}T$qqCAa_KGgmtQr5qR1iS0X_i)@emeG`q0wmFbyr~nZu(wbqnm8n zm>_weO@nuHR=8~I#88`0`PS5U9d(wcUZTt7AX?2|`@=qRC83w>Mlt@JqGP!z*B~9k zLWkYhn<%5xrfan)FuTkCh{hk_05N^8n#jP+e{_`}<+~B3W?CiNuAua}a_MTdYyUEu zusJz*oM-`=N*{Piw?l43yLb=$GNYte%b+5I@-V7dC>B1^m zR*$`EP?Yr|V3rCL9eeM`ru`w7D!cmZMv3U8-`dIMVpnov@J7;{b@x9^3m-Z3Y{Z&* zD_zX0=I>)SdOkw+&z36W$kA!;9RD64IRcJ9N)qO^ytsAe+9S#M%>(p0L@&TU7Z<6d zXj3LQe0J3d7TseiYm0wOit-x`{PWm{J|RZs<&$+&Hgo2h z5yoyB+HQt44OJ{z%<^Nov&O3L_s`N7xT*-x6tM{ij1IE&RK^F;>C|9s3ZaVQ%s1ZD z&nS+C*X#c67*TD{>-$e&9F_U?(pP^n73=qY;t~6n@8+=ca8aLp%dr}3!iDJCk?<^K z&vypzO3_=}Gj~EnkD5>38d&H~S$*Q#8lks$jjwQi7#*)n;Y=>q4V;``tYFUD_J8e# zh|!nSX8$YmI;3~P|A88khWk?zH-)?If|Hk_xY3dxFKoZ2t zJhyn*p%TVmg-uCC^US3grB{BCe;gjJc~y-@ArHqhvcIIv>?>x{3Ka?IQMYkLr(_(> zW9Yhih|wXG9m5&4$o+&R?gWb^T_Edb8q`Plm^+Gd%I_1>MvGg_x>l(|hG zXL8v{RZZI(QAKaWHr5s{+1W7^G~V*hY!i97m?+bvfBkF?1U{OvO;CKD`v$kh#Mp6S zW}dnS&g=07uy2cfao?kBg`l52EM{x5^{qZ9WVy(?lQ9ObhGymV&M6W5@vZoDNTGn5;{NXx zX<|J~8H=}B&gYFdI$k|n(j)EUEB-F--tzpx?lX!kjav~2haKue-^}@3(<2`l9v*%V zpct`r=&rGCgdyq>V-|xIQ&eFazpBmQxvNAkeJ+~rNaF6(0Q}arT=aY7^=HiHH|9($ z2FqKi7a4zW5&2$7`1++}teA$yJok{Vzq)`Pmy%Nml3Kg-F zXgU?f+Q^T}S6DR=!9a6CFTM63I1qE;!8>bUFzl|a`*)PGkDYY|aNoPCe2S{MV#&TC z!F=~d-rdNg6D;BHXbe@$z9Ddm+VuDVjk-}hr>I}r58#I@|Hf&`?C6on@5rDQ;BtN* zCm#GK9DZNG)n!xr>vw+e68-Re^a17vyB)GrmOgb32YfBAX7Z}B^qsjdl3ZJRYm~<- zu>14DocgGES;E)15;iXQOAcTgE-RVS%WN{_ViKsrj|B?;TuuS3;|dS!u*jwlru ztBk1E6!us{JY>%V92A6y^0s)NzF5~my5ZE6)b0sJz-@?W8pFoHx$16HHPOny-p6#g{Jl;f&|&AJU;;%xQ`;X{=fW1tN4U72f4 zG2cMw-+5+3LoqX^{p5EUUI>9<26SbY{c>rF%o(YY8`tmLVq6s@K1cKBOl@2}*jRT~ zwnF^kOUr9N0z8a!ueni;qm=x6K}x5od!>a{9A3?Y6I!_mV$%j)A(Y*B&e?@v8S-a( zSs!W+gCwB|RuzEbEPOpaAT+ZfMs4{P_i7&;wmSDNBc#h04lydP z5hC|$bEW#=|eu-u>CWszC&qFp66I!fh(Y*Z8a;X4HJEb(E8rIV;uNI`YuH-0LG z_x|L@M;I=omg$aE(ovAcYk2X;oS)P(zTYR)WiNgO zyKe)d4l{1;mgU^sK2|@v0DmngV>`~z-{GLowF<(4%{)|B5!HIprtr|JB(XfNq)F41 zdBg7zqyK>m2|zW_rj-*ODz_K43Ai6K?;X2D^odN@Trxj!?`>nAs;1XPoBi~&g)}9R z%Mk9FZFTg7bZi1w?Ot=Hz}>6#t^$S6^%~71Rd%7%yXx;S_t zt$ev7PH)oT_RV1JM{E6CffG#%%Bw8`QG6>kQr&(jVIfv&iAif$%O5ydUwiap6W<&v z6Fcmpmhs~C*}t_NH&TIG85T<+5v{-jE2d1K8R0F3_wzj=JtlSsiU1_P;jIu^rVt_$ z12*~{@dWX^EGlooFiB*1lh^f3mtR~?6WXJ5B!8FTMy%2r1aV71x1-&JDdv*D$fk(E zVm%|}?A;~_a#xV!!8snvf{hP7d)bjzB}+edZ+|(zqRkJa54CYhAB$vW9i)=5Jb1Td zsKHz4h5CdIc?r6d&$A<`fhL|44`p0}NYs9xL{5hW#nr+3gyFT9ae7LB7N1huo;yjb z&wqUL-Jo$kkm45a9E#{1v?(hCYS$&-Bp%v6bD5a*gN`dT>3kVm>-w&YhaNy*!&?ij985sS&kCNa*JE8-5_j zl*)Ynf_EvK>~Nl0&OdOB-Lk>%-s?G}==9cy*Z4c0bLjG)or+@Iy6*0Mt>7%jftcqU z_udxaRbCWFgPc{vTfq-3ZDye=9>R0)Bi@CaU_mpj1{f~K9QZafW~F|U&y<^Q)&CHq zFo4D-zr(JPUg2U$d;*Q;!ZuHD4D6}d<7)|w^W(gcEkIi(h^Cp!=CPKa!I7uay&pJ8vY}rHdBkJ~S=vi+eT$}~wv;e%L7}&a*03xDe z641-lqNOI{=)U4uT~qf@4QM{Q=j=M%-eZ{#(dJS=iu^w{4uPI2(A91YbOkq5dnMu^ z15m)6Dz4IgZaQj_0FM0W-{F6{QB$+Ehc;Vmu4mC%2G{h-{o+HBkP?7|AROl^&*XlN zc{98Ncz*GL$dj#;uK8Yn9=-%52mw7idF*<#&aI$(UQuEe&OGOBRZcJaVH|)#IH90w zbu(d01*q~5_r>ReULX$yb~x$fg?8DnBhL)Ur!y5BcXn#3)B#SIPF@jTO#X+%}kW$rp4 z3HUieI@rAoBzq4wsev^5inv}1Sydf6MvtALXt@YrrxxtnRhJqC@h{PQq)%?!|2&PT zpP5>5)3pHS*KMqIO&W(WVY_EfVp{Cxd02)`XoJK9h!XVb@0(q4F2# zJ}mNy&+|Bnmlqv1P4hM{I*^EWBi?`d-6?cN$lB^``8zBA%$r;9tA!NF3I$fVIxVhD(!OdjKfxSyz0@J8@s*BK_WI$@|uGw$m!mVLT+5xsx z{KGk7{QTE}Jx58gK}JV44rH?!|6Sc8AJ)Wgapd0HBQ)FW>n>WJ;vmc9Ex!(h$pqqc z8QU$FAE6>prrggQ0J;1iHDkRVI|CX7z+Xi`kvVmn`a8x4e!nt|yE*#)L1tRH72FwP zy}zc8@yNOTAu%*!f}4v0+e|0--z5ooD6v-%V({(K1kI(3Hm*lpE4|pVS;4rleR&L?aN7Kv{&uC*`91Y|dCsl=N?)>V1R&soy^VyDmb4<38D)!4InyyH&6 z0f16w;%OKKXPivp?+|A&o!mWFCBUZO|8%zX^pC0=yn*wtvWC$=-ao&Z+91td6AYAd z!l-jeHRp2*41eHtPKGkGu>*&tXe0PnR3d5W%~sw)$Ql@8vJhADJi-kl%mUo*d9lT8 zdO|NQ3VcSJDtZcmSOat* zd%gvZvK$-FccrVC9p44n&2AF*>TduE);a!3ZvJ$2;kOrUzvKx9m&SqQ!UN^W&SlX+ z_Hcl^&Kr0c z2vJj0bsAlsEv3mQa4tNe+GnM*KG3D{Q6u-#U4aBKIj{YuYvU4kcx;N)(KzJ_={MjAFuLS?R3PHnijg*CMuZ5>*2TkknWmFH2nAKDBSVjNthgj z441SWzajgc%#wb9c|*XjDC@+^q1o~Vlsx-%@yuDGtMxmaxH4MIRjAOva6YW< zFzABA!sNW}3mFRe+N-*g+!j?W@*&}0ItKAZ)+U!^?=F6e$Ue;R>Y}Z+=M``$sRg*X z9$@rO*o*(H{6N!|M=q5ABL$mP{Yh>C$9-$4KFZ$y)1!4et}IvZ0*zuhK_@)7;<(0tx5Cm_Jqrzhea(H>C6xM|;cjg@1w zuhx7IF^WgVevuFJ96L?gU2apvTk)CZr*?qQ0T>mo@y@AFigJ|DC6+=ZF1>);wJ#Cu zDa?V5@}Slt@1I~fKZ#UZR_hF6Yx$E1Q;krj-qL{*Dcz1rXXlpGW8$14M)cyxf&+86 zb*Tj>$~LRK_QxFY6Hb~b5oSkV5zY@{Jq_yE{tzZJQm%6JAS#yb&kA8{GXB0jbBM@+ zZ-sfD+rX?hr|H;u2ge6bu>%Jfg6}b_?6b%wEAyYV2h7wQtU*A5!NroL-j;1`xMFXl zSIF@ao{GJz(ymN%m&LQ_-=mTq*Y&xolD`)q0IyOuhKmz0DmK-x?U?ez%3%;&B#Y{S zcKR?(;6!&T+oz`g-5p!NRnzvJ6bzS72tE*=SBRT1B(eV_cWQj_)tsbu+pee*w$Jyt zRxwb!*;1R4{axORv&G?Db8yEHS>c3Nrx=?IqPE^|29fmMJMR9n$Ws#wzY1@%hl{Me zuGwB}y&sGyjixIdegma38z|1h&!9G$bc@^0?E2B9rCdj+sHEFr^(c06LKYQpZMio= z76r-X?~#%*%On(P#i*>Itgrc}#_nA)Z+(Sb|M3cE_KU1Bq~yw?3QE%!Ve8I z9KS)gws75Rc>?g|TG-=@N6W~{#?UmcP!q$slAzUy+*sozSkNX+A83(}7TO4(!uk=9 z6Va5j?R6NedEbwrGJ0r_1||=l28w=M_x-k9VG9n6&^?A#^Z4V4!Jvb%UYl;`opV4| z;Z1V^!i5d;YOIR%0~g^wrmm@n+sVsiG`f6x8kvy1M}m&KHhD$QV>bF&@P?OfaBbW* zxC}sWl=Du-BRX~mTduC%3r-Ub)*q5Be2=qg>HmW=_D4LO-pQbvta6x_UG5C>KBJ-hc}&vz zZ?nwzsH)wou7?;C7=js7Y?7NI*=tx=u?=#zFkCg+SJMYG01Dn zo%MX{qLuA=X@pPb$z?@^;@3Ope7MJ1t2@9nbhOCgCt?bRQ_wPD-e}3QosK=x7I`@6u*Y&)f*YmpW*O8rQDj_T- z@}h93a%r@n4-iJLCjaHc3#jMD1SXhc+xbu3*;h{e`x*=6qom#zvWJ(#VRL)Mwh5FD zA0d`5DcpW``T@6y6l!V5ZR^l;J}ey_*!gm4(E^kZCR_v6K-n{-9Et|1+Lt*&ziqBQ$XXl>)uE;ekq^JE{zl2xhx>V^#t*KS+K zP0(&@ExRQ?$zXr$n%Dj#=U@Uz?nRyL=HXx`y4PR$SGem;yYr-~-?)EOog~+FoJ9S! z^}+KTC^n_Om%rQps2kVDz7Uj}>*sq300^hGGECx5S4OgZFRLSaA!}pE*q3yI3#(9Rwg zftY|o_2f243lz7s_IJkF&Y(}!ocZ|lN`{4U@K+-xfF@Axau+YY$CebSMlT85x3iTz6X+C|GlUiRiaRrN50`ZGJoy6g(1VHJP#d@Y%C0_2v zeYdcGU4|6zDE%cm!D{w4ai~PwHdO55>o4ybp>NxXRH^@{QnUNOWCB8!qO7Z$VqlOW zNasf1dlf(7u?<}0-|N+PPrsxK%R}dMt#wXIJ?7yJFwIe&*6ct5cq>Lx?JcV_@!1{5 zxQbJ)?BL5ZN@}2fTBX#POz(p`#V@-&1#e4weCz*<|E{ISg{KUPtp!_k}9@K1@mB7?>dG`_Z5$0R*ozIiaia!mt8GUhq z$~EQA9U*yf>BGuLPvX+Nw}Pz%q-T)V;^sF5ss~VD zy(CckI%aWcUnxOK?KOdRL_cF%NM6DF>OnbFKnx7&sH1Oa-U2g%&U+c!W{%+fc|@ZG zC4(%NFXpT@8&G^Sczd)3|3bNxP89@WTy0DehHRe*kQdMvQ_?#%_3v1zbOlB&+#4n^Bg7TZuyFk@ec%HdtcvOyuuyy_98 z1PLHr`$^>|ztey~!)%SAfT}ZiL3!FB2_vRVRpq1)N5sK|07RG#oIm)D_~ze2iXy3G=N#aGe$H}bppmCMKC15urD zBYDNQzvwY8e425y&2uCm)}6k=6p`>XSWXF~5a^BTO{bq#+6H+A{qeP@6X&}5nAUNN zu#wG1-AjyIyfBOrU-5N3DVgPM z3?=KCa-{Ojnx35U%-EKTxru8&E)k9df36s%fJ!BD+8tlXH;z1b(E6P8j_&lu1UG#3 ziZ8MVA<1mE}kilZE7d-S>a7_8p1orxsQgIJ+HwbBgyuar`a415jpG?foKE=+Qi zH>gOEyM)rngbbfAs~q2F`i1cmdLq)-MqBZ%tTP;?n==}492R#!+*R%jtSj!lOF9w2 zc4kh5HvcqN0Stt3%=2$3O1;sIOWl7K7v-z*1_DR`k4D~9+SBRYjmHZK)JkY*{l&gF zghnKz|6Y#^4qHzZl5Zzv@i{V&%lH{rgsg{nRRMju4Jq}g9vostXa33?lm!U5zCHOo z&cJS+b>H$hWH@>g>YV=g7?GF@ogKeFu0s`Zt~pibL;h%{eQl?}S8J#7HJix_NC^gz zh6GiYtN(!a`*wesFswSDd9&X1Gru=7&HAXRgqd>P$-TWrd_{zh>c>jmOHMD@DY0cY z)O0(8iAw+`u6?|trmC#XT)~0 zqwlp9+cAU$BJC2qb>>T1FQflL6m)rc9u{Mli6NR{^ap(cWgKTpfFc=!WSsg2v~0L8 zi^j_z1#;p=lss3d2tl(sOU;h=K|{vWk=Iycyv^Bs8&VrTM_;t*QGVc2#r)#}RwssE zi!PocnX4lDe;U56iSUWna@tQaj<$co+iO2N=*daUEbNQX=wYq4ga)f>ETQ1O10w} z8$$isCm3D;Kx~$^!0e{l=ZMk*FmFOi^}rucr?(R@7PLJvx@5!maM};SWbp2*(G{UC zxGvTTSP%>q%k~L)+uldo*MzpAy3^^vVl|1Zi~eh``Z_$W1~2#!7afz|c9p3!wdVwr z0HncX!lya*7wIA4Y0j!j#hZ9`wQu)ZQ8BpmH|Raw{9>unZ`((JOkwc;xrNo(Y^r)v z5EMJob?M@XiSsYrw;ZMW8@Lt3JjFhwmDzcIi2bSl;P4WM(i;0@%aEfe72l|3l*g3t zXaWcGr22~jgPPJ1yVEw%Nik-GWC}egHFHN{c5)tBPc^j*)935%%%7D(Jpu1M87GB` z&I$uYmhLO;gA6yCiOeHf^O*7o#%OK! z&qg`>1%9l^TZA1Ee2OBqU7ZSj!5J_01=AJy>agDL+(OK9-}Qd zDy*aLP4MgZ-Rz3YweCfbCSeql3lES(5cYCWckWFWzhGVoqYwS~BK~bQqs!eW5CM8(&Zj zxg=~lFlwE+$wJi8MzmJb=NYb@P4jInnsIGy<4OJ2*xusTj*}|em|{l)$zXzM%O3BA zZ%w^~0q(8Hy0g1X8!kBKPwI(0zIdSh5T#3Y@pGOYS$ed!9@)kB6}eKyI2NO?NGUo7 z!WtM#kV?j@{c8b-;aIZc?g>7~@PhOlPO5q783-N(xeNAs!OdcE;tu}e=tLDg-UBk{ zI5@Qg(P}d12!m$+8oiyKcmk=tJ2>)v_lPLHwby+gCc03JQ;WM-dF*e*x0zrQ6S{Ze zo9p8-bi!*mfVdfN_=c3IAG%+IwC|3idF|u)M%Tux{a75CME{NOZTx&`<7+!`Ea>j2!4}ZP zlt%a*35=!pk0h@>r?=2<*^r{@8OsMv=?PcwSEyA1gy`*fIf>DBB*V{-iX9 zPg!-H-RnV30eQQ97F^viW#E}A)xyx0F7ELxiybA;iq$`UXD+sF>kZW6FYOnG_ zfWim=M^6?Xp_ca8Q)x`&+m&l?e|VP7b~P}*5QtMhss3|lhRPsV_uX5-mG&q<_ak5V zOzV=Jy~O0GH@#s77@x`2m9A1i`S4gY<;dM;Vd4vrsa{DsCC;RF7nXUl+qpUTkb)*7 zKTdq-Qt(#6!uV-!jLr{d62?4(m8O|+E4B#p3qudh6;#Z6G*`>rz2C<+jyK<5^b@NY ztzr1ZzUcyx?Bly>%HWB*Z806YB~q2&HZ9t2Nf#ipwV~trE!Uyw>ZmUa>$BUWI#Mz- z`h^t*u}-8Y!iY(CZ;uPk|ZX(5ZB^t`IQfO-e)uXQ+0C|ztXd8hYu=Z z{bXBWYX|#Z#$E`Z;`a)tSqM!Z-aMoUdxLu!fZuQv}SUI!Pyc%^@K!ES@c~@-~fT&+GK3MR#{`ZMxJe za0)Iq6gxFz+gB9M+au=-MMfLA-)y+lTTM5xv+Pb_+pW8tIja1(7X8F?Rl8CBk8}?v z!^+z$$zE`o+3LuM$v;aoY}R)7l8(fK*Wql_sLA9+;mP zGgs;m|9DZLqWXh9Xtpx(;Z$xE24y~}WmeH%6-5{16sZ|x>M2Igwl?%lrZz0k;69Gd zgr1_kl+wuPHh!e^(oILs{h?AvpGME6Crkyyk z?O7B0&V4b;FxRE3a_M(lhFBP#@RtB1MVA-1#r=$okm)#NX=8I^iBR(n&uj zIhw_cxr9?@#db`v?h#shxK8?lC#~9*Lj1@%p+D1rN2Pji-+#hAhivOqtI4_k(@+QK zRw>iV#zU7}Sab~WQZc2f?G`>IfGiupBzSlBK0cvwDyu|3gKUfGE#k^Amr4!)5#VuR}%HzxIn)&=tSj*{!GC77J9w%G1?x9}J`2UhRs3 z0{zJ|?BbM9JAMP|rF(vMJ$|ezguidRfa>$S3D$1aG^$fYHGOp;%#*G8PT9Gj>5!fJ zD3`@8ok*3LOO{dQ$jNxzOTp36l>D{iClB{p{G0CApGahSTFE~#j$sfU>^Br{uZ$_qsv*vtZZJxC+_{ zsS34kSPtmFKEyNJ6b5k)N#^CL4*_QO(lcl>HwNLUjTR2!qXh{%THEjLc z^?^I+M5_8}#rZEoeLL}Q$xL#Kx=_m`F2mu+u%@sds72m;mknKDg>nk@o6LpH39nUHP!sCv1Tu_@k z%dD)njLcUtIgNdvve}Tt~%S~&z2ldUoj2ACMql5qgn#V{O zKXdZ_lYJ4mzhZhrxX-;zy+3AGw4s@o{8bshtC*ESA$&x5zyG5vDsbj_?$-Ldd}hN3 zCO!oj+nl~*uX4jTfoMvOBRT^1Ahen@@2a=C>SU1fD0{KF*%YyLul(?Dxq!AYikI5A zQ!2rLJC>W)p0BouFKcF<#`0_PeBn@d0&gDwVjA08xW9<><3lzvE4PWqDg|_<{TkZ2+u8gD!dVu7akbNQ+2itVA%5pH;ocR5OtTz5bYBo# zRuEoLTbZS?ch?$Wr=Xn6Ubka3tJLqyp|dX)p8BHfd`16My1}L`WDgPJ-}tEpkp`e~ z2hdTtq~OQ_m9*A!&#H;@@RA_YaC+Bxp4<5K;m3$4;7?zv(pS0^m#<=D_&JxLl1JmE z5YapS=RFUH@u(D!M0ZaQ(dV=UPAu=M zS+a5Wmt}}dl>RAwC+X>iR54RfNn7YbjZb1KFK?V^rwxcV5%UCm;qi|lcQHV5`eIIdyWcuEX|NxMzk5b@IgYakiJr5bGBPu%dt zm6r}GPa1#|BDe&k*mvZosws42DrK! zM*BJzH!Z3klBOQL+SFK8C3jo%LECDTyT8hw$LhvNSfo(|>n;r$yMp9cuiNAwWY{aP zg1zOJtJtOS@zcUfn|y-#W@c`~T8Dl=hf!06=s+#a2VA-jahL30C)zbq$1D+p98~8$ zOFIQ=q9g{0|L!=v{0NRqqjWE@@d-uOsa=#%Q?(zB#`bLByKESn@fVVxhAPQ-{R^9N zTkpF`spJBg`E~qFg>GelrqYop4+ZI{O{d%^5mB}C-x>X9MNp_W=6Tb0uj7BVv+mKP zT(PNV5UgO>Gm_~^!*QH@yo;v zYfIyaWv?o8cuUW5a(H+d=bq))%*NqlEF!f2u)&#Zs`L_?Jc9#C_^RU7ZIz=H#}e)9 zAh|`6Q7NE$QQPdI1$5R4K0b|0A|Le0I$nMg+Xc^}Ym!noE!UMhVD)lV>sbq3C2t?0 z7F+i1F0mPUJbJKct}?VL9EfON&Yrm0YZe$X`qa%|#XN?Jp)wbTTO)5!n6Cxw^kjd# z95jO&3!cPYv?och%QqXD&!(Dxu(`S>V7zp(#xVQ?&e+VsUy)gRlMn<*oopnn=N-^H zdXV3JceP;snrVB1a)Qt?sUY{E#Z%YMN?YZ4zryE(T@xB|abb|$d>5LY#izmucSwlf zmf=C{!Z;?5PlfkSD%)O}>1Vz0`SX1J-h;8baggmI1D zq`*{VlbB})JHOqW#`Xs?;6T^Dv7UZ;qs|Vm1J8;b6t;l}<#eAQ3mJw2@&w!}xu^-l zfdnHa|6NR=o@K^&+ezhM`U7NO?A>N3_U+H}lPOISlUs33QkYdTe?D~v7LHWv z@=%qjy%giJ+V^Vx=2GBfuvQ&9)(n|*Er;oY;h_}~YNQ!xj_UhH_+h%!$WElU90_nx zp6?^|HgWnjHyd0$<7XMaUGvLfkdeM}`;Jre_ z@RwC~HT%CYEP|^IEq(U1eP3F%FsAWXx;Oi6G*=s2#Okfg;v2M8krrMe1z{fk!2NIX zrGLM=m!-UQ-kT8$vd6(h_+npscuAb;-6tp?Z|*P9Z3z!m=GZ&T^5F@O2i&LiZ6v@C z?LqHk+|M)0!#|On;lp%k<*oYbaoI)9S)!^9O0DKzqV?Jl6>1}N3F_0sr=3?{r%OUU9P-p z(lgc*X?xv^CS5WB@I`Z)+Acqlb?N?LG;>?ls>7bWzMOBC=$Lo_)#a)~{xAR^(5SU^UdBP%kEhDthlQ&|rJ$UP)WyN|L zhBc?|7@4Nz%?^c^jyVZaEI1v#Y12T6P*LT1=uL{fU#7LJ_fJ)|bKx)w(P8b5AUOc`~cnUA*?OAp5iI=;!P&v|g~g3Vf(dNKn@=jdpn%yZ@47a9djS?dEsJp~c;$T?w~}V8bCa=8ww>T@D-g zm;8zoo`&^b#)qU-a%cSSnD?Gu2%Q1!Xijrhng6O7CjSk|c`sbX-JO-oTHjZZ_4Iif zq%qv+sJ8EMo84ED^OXwMaA#_kSq>doD2w~7X&dYeLn9RL*DHMHKr46D?YT|hFo{9GSbOCU$c_3fl#;h6Wu{k)LaQ(;qusA>QMOvLn zKhdRc*#?wz;l?6cV)nviBFOV@`@FRV-K!pX>bO-!suumoC;q|9pdrM+U3N|-r#1Mv zxjN9Wn2r02k3v+&!nl~=a!sinq502tOKDHuMsgZSNyWWv5dl5Hi z6{pspRvk(Hqv|!ub*F>fCkNUY3+h+g%*;2m#PZn;#|4&~#U}H(p-g8mHbzbVu*K%} zCDm8N*$lvppuzf~2y{Ma#2F3>Kei z<}Yg!u9u4MG+}VpB5f|HS{RS0NsT7zMv-a8-=8REJwqGzmQSIcvG%rf`oXhyZlx19 zQ_s+Ld9bnUO^jN4KENvf8qj_U3oXG%;-k{9_lHljgQ06jD`=;rHdBt5En``I0q!)P zbxHgGJx2+klL=IKN~mxduQxF1Dbrky6GeSqw2Z_* z_aM~>A3V7cz1$mIJ~%pQ$ye9F$n9~op`Lc`+a_F=y4|>vIaqNDq@=tGTF<%lLKzd@ z`}oo#@oW3vk1aMzk`+{C!+4p@`&mj9{QeJ}BY0t{CK8q)5Pg^~p1<{hj3G`<852Pl zep*mk{YT&~d$Z7vBfHY1e=vXJh%j$fcTza-=3lH+so$$y*wUPvzqz=8>?cFs z<*U2QLFbF3a;}KIEcqJi;daXABYrZU^q=QS{KE&R`C&eN$q$>F?7_9?GMT7k z-V>?Cb>OX6EbTV=sGJ}?qSs>5unV(Ry-z-Xb?#%o^J-_wDPcW-Prp3iCE1#EE~ll+ zH5_}C<50trknp<#wUCyr56<)Tz>PdJw#OsZqEh!wP}I34Q2UwK&Nv4(6>fxSz3Sn;E80Tt;Hm>z|-y9W`7JoXh5Si9Q<>3-Fj0SGl-0GQq6&CLhNvxW- z=ih95pjG-+B@Ry=s38Spyie05ONXv@FOiwf^vu^QE62I*B|f(iXlhT-yj0zfmoj

    )bNtXB<>| z?zw$VG?;}cA_WMLuWxkpU`bqq^-gI`l!vzyJIgmqm5DEFjm;@^zl*oW_s|8wm8e*b zz0XFbT9w}8+|d^`xK_6-vkAYgt=Keh)4pg{f8qatTnp1$c}kL8Q8Mn_uNQo(tIlKi zpX6ZQc^`-|an(4vp*vd)^SNh=Ro#iKRpvBh@*kGgjw6S?q%KHqoeH6(_1wIA`lV^z zAiRs`A3r0$<3C?@`aE7#*py0h!ZV&RT$9)V_a4o83@+F_%Eo_IXpu`p#0RmnkYKV6>PRTk%i$*vH0e2KA$-EIE^&JXaojXAE*53ZKr9x)`Qum z7UB9BUT@5(waVq@friz=*QwcTSIWnOG4BIs|6G-zA;m{oOAc}4!>le3X(;(rUNgef z(7*5!tt5aZn8P0!173!kFHC$!crh8;jTxMQSIE;}csC5F6Vx;H$&(nH3E%(&HAh^MAf}e0nfSMQPOniL_ z7j57+Bi!(wmiNfn2t9a|2C1x>?Ls7;Mf~#%uyxQ4XbR0iiZG~93)7HJPQ|COV0;>D z#;*;}%i>vM=bScHgBHF=!NCGns4A2;tr8_sKh_4a@ zt{B5ZWXgYDXOdJtuC%DBe?Lald9&;{9%iclNek+#CCvfe_-`5NJW@!FZA`&&O&=p9 zUwlVLYHm&ldOFGYwv^64tn!6!H32EqrT>2?b9bz=kKq{R5PdaZBW0#`LK1sQ18{uJjq4Q*}wb*uTa%(>{4%;VK01*KSq zh^qcE(^@tu>pk>REghc5E4ZPCWk%EaO%C z&%%0tbPv5YmqdT&R)}mL3i4XV6jvmR@TXK!7qX{ZJj;Gln!(~06Vc5%7Z>XGw*|CW z{3(&T7JDu_+<_&!Qbi0h)Zwm?Xj;_}Cbifn__LJbIWH-7#rR}P@spEbTfxO^XYW%M zhJEnJEAHE}H`p5>4E?|@|MY1)YOBU;fR@a2X-nTo)!{n3Xe8yyJAvAW=7UAr+^*hFU0;)||N9fTIy zB@~>=9fZueR+b%uo2$%=%7YAE@|9h4K3Gnr3xsLX&S#8Hmt95P4}F2SFI?k!cZE44 z^2&Ay?B%9a<(R{>NER!X`!cultn!S|gQPK!EeGM-a%y_zD!WSZ*gKbs4pw(8pY<-^ zZBJZw0{4iaQ9^ zT8kD}ql$!cJZi)g!$|5ll7vYeP!8VLd+Mk=2qkg8GX(MjA-$f&*W^R5TcrikeH_3g z2RzjTDrfB$SYPI)M3L--)_uH^7i!obxP{DPi zM5t48>!<|&hzBc#kyj=3dbup07F$XBsm!&;-|?ih7;FeG61KWhHgd-0#CxaI2<~64 zohOXU9U8pb+TZb2+zY+0l&eo_^T46u{q~Ue|CxIAMORWHakreaG}#%Q%Wu`*Og7GV zU(<`Cn@pWKnelXBd)xB7O*ED&nM^4DsVG+&`L>C}E7;)|eoNuO5us;xlLaK?UPnWL z9oIsOax`n6NWdBgeD0uZkVvFNYZ%?+(*c2XdpL?3?WayfRx`iGtCGnq$3sx;Vx(au zeMO66%Z|@fLcKSiZ}rdp!ka9fSR9_AmJ&!TPG)LeAcVXh*qv(ZH>Fx_p?Z7S7nWz) z)ey*k3!|#s(e?>@K9M-NqOo)0su5>}F+r^NmaMFtnvw_?(x_3SS5a+IXoVT<|7f5n z-$buLmMlGF3C@o%cq8VqPK?AJsprrN^WyKE4no3s8pPF}Mx72q;$0I|xYfakYG_Gc z357U>Rwm+~cQ?0o5ZVLAvyHORs^qFRX=&JXjNyp<-C>)ib3q~29*v;gHnL2YMhrPvbt=vSuYW4(cr@f z8=UnNlqNf&edfv)#HSxS=HRS5$s<37`H)w=WnJZkdw)=f6Q~4HzGpHu=cCi6ALdP1 zOCr9WAv56gk*@9&ED&R5pq8^O508?s7~M)Fejy@&lnCqs11Ju?5*TNoMVw8rVifFj zD0Up1el31t94lNCfFJZE_M$Bg$??f}Y%#sOy>j30VgauF7cy3Jc`~NLc@mm zb8?LBF*sBh>XCT{wRV0tuIBgEOClz^!hqnpS-}56WzSQ*Z%VqH3wb{?>5ydo4tnPU zxyUu-egF3R#hbM+cj|mFzLvWi^Qho&TOYdh=><&`I1208d#|_`Ht* zfRdAjL*2={gxY5jye5M9Fzx%{!{{ykj`IBreyhrM>4S#a(B$UT4niMF_`CmYdt<}! zv8TF&?0Y&h^K-)qPt6Bqvdv`30^U!{lAW*_lN~5#lp;HEsikw`{me=8=mP$JDi?Wt zpa#P;VlYn}B(4JBW&+~lL7B{A@a#9uw?wkCvgxV=oB4M7kt}3Vvit@|LV5W!K?I|L z;3>H|#C-&2vSf0SPNeU_A;)l4Y=bTzbFMEopMuqayJ>Lz%MeuS)id4_(^6#Vsx^#o zqJb}O-d?j;t$TRbuU`6g@^K<|lER|I)?xgC5t-FXN4tI4sFc_8?ck z_s6pNjh^u1IPD}Zwz6z0QHJgOnmH*Tb6H$7o)*DF6c6r@K!6SodT)WI{mhGGYJ}Iv z!G7g_coQcvliHBmNaKOzCs7eL*ZUIhBH6^Vh1?Ut9Hgq~`^Uy{HQT9hx&FUXSiT-x%ApC;r_aezH z5*`hvJZYm4$ztvx)wS-`9#1_?{hdO*b6x)e;_Sl70nEZD-K&s5e7azHJS6&nIr0Jy z?hX=4@T`nG|L}!jp#>f|MKlg4`HoU`vDo%oI}t>JFDa7b*?2-Xjg7j)tL_sR)!fA4 z23JD&1o4a40%LCb>_Aj+KL-dDo6-q&IyRM3Vtl zU6Y4%0zY5B3a3h_CFR^*rw14cAhz554#zc6UOiEcHj1tR-a)J!uynF>Gtjm(L5vac zkXVJ}Py~5D=3bgQMWH~wV;yehqYQ&q*5boqKlP*5;s z`X$CJ`Am|30f|^+vYK=ms{$_?=mVJC$3(L1Ny~P_IR~dzTaL2&%qKA?v&>rSREbn1 zkzOFc&M>~dF3>-o5p){uFYMDUgU?T*?8t2ujbV>sTsYHiSGuKX-cIu3QDPS6oVyA4EfZW2Xu4$^yXXbD|MOyt_HljBV9W z6`249m?4$_7Z3xlgJsFO8%4&}bYl3;ZyYtwQ0-PxX`kA^+oQ_p*x74by-6~1385-` za4&r=N%(~UHR7s(Dk}VPdPzeDZiiDz89;xt4p`a7Tg6>H)D3wmCj|!yibe7T{AVh; z*4=`{Lh%R{UP?R~u#_Hh;B9SUj(aupz6921>-B58q3%Q7{#bHcIb^a=%!{q|0`7%`CQcJU~7Riz({dUF&@K;~-%)}AK|MpP z6Vq)quNDoPAyEd~Zbr-yWc;Z)i+Ff@&0EFP-0rD^+#qCOLB+7J0{)#VaJAHF?AKT} z(v`Yr>SbyflDqkG5@ggM7A>wpIw7u#q*V7aSJ^-QJIP#+3%@TSRBw}~2Sq{JXiSHN zCvYnL$RPDV$sdq;5H!BCyKVExK{i3sTToWE`yQkVVmeuft0<@iSmwbkZ&W0`8Hq}1 z8pY?Q4kVmBAl-6C3703W%N+{L$2-ptYO!Xr_!s~_mYIKk#TD0f#l(r)50*1O zT~}6fshz-2@bN`%=&ax6Q3Rtco!>Xw+yDk&7V_`#v@)#s*R1XPkO;Kw|0ka~6a zdfJPaG8moV6TDf9k{=LetjpsNUZc}^*~h?omwZo}fmCQuOonx^b(n-}IZ3?t4W_#PZ236ID--qTq5GeclbvmU%r!C#T|19f7bM={LI z<$K@Ay!9H!DU!u7g?@d<%}CWobKJz-j;*zV=OZy49x4J6K894zlL`2^25M^|_z#AL zXRIxR;0&gwh`h+Me|Am;a4OM@*YSZ%LB0eoh2dUNAF~gb%BmMX2lz)ubQF>z&k;|v zXuXMHT#4$qC6F(|-5iTQ5?njvOXssIn6VZBhjT-nLXa_9J10)*#OMc(E~FW4_y!tr zpyow~JQ9{b<=G(42t7}_U*5Jis{Ng*(?eYKObubVVF;gk1;H1)`_hAs*i5FhyV1qL zn_mH!s86VWez=1m?V;$Vt0F!bK8UlrJ+X$$yoR+V$RpVdzGVrSVUrMb0r)I=BJkO% z_;ZL~1d55oZ&JGEJ7*n_=(lfD$}1Lk%(0H%06I0>{Em<8P@p2|9wmtwi94%en3joo zs5BV`Jf6IO|8BL{_3tX)rCp({-nhh}lkUihBo@j<`rW%CNRvD3+-zQN=HxCtvKuP| zNIYrR(!Tx^zCmRB+hK=BhiGvJBknGgf?KLqy8EO(XPvTw#;&~3B2aSu>7@gR1*ApI z0LrjP!rn1=%VhYywzo8Vfkez_K2wE(bANl+7!(j-Sw4~|2#VgPke%2TlsM#>2O zLM}42U(mDn^%}D32eRO)0Fs^#4_|RAO#u$wk7Qv?pvUbXdt{J;J3n6>YPP3zAc%2| zPvr-S$1_O%i!FnFDWk38P|nv@7)5NtM)P?EpeFjkip85!G?Z>Kt`3TKiU>k@Ntcr2 z#P?Bns)Ks){v6ddC*TseBo`@*_fg`m*AQz7*N~vkU=p*%bz-r|l&0E^;EHG2hogJ7 zCu*dN>lLXcfPHZSc%61JbC4yDBXEzmnAxoc&$#U`**7>xwezv8^?kb+LEiUk*vCQ< z7L||Hhfe6z;xo~-EvoBw=Vec1^%8ZRv&%|J+Be~9bP{&_y^J(7RzC_{lIY+z4=tj@ z<}I-`VGYH;h+>$^M(_cWr_3@9AZT<{dA$!Xh+&&#MKY6opZk-mKsA(SpLEx<$y^Cn z4gkx||C00p3n8eH*|2aioZK-IBa-L-fWcVn}SELDwx)Jllb2CHe3m@i&x>cGr9Ixs~!M zOG^|wxxkH`PTJTw$Vx6q7Ax79yy+6I=BgXb-)k6Y82cgezic&j=wqQLOON1tK{+=X zpWj+L2-Kss&cf)H4VjJEQG?~4_z1!Cfu8!z!_~*+8S%dTn}^P&d(*_}T)uaQKEDMB z0M~w`LHBpvNQK~#Louu+Jzk=+1pSQ(JmX9iy~{1i%Eh*0F-nab-tJ2*b{NC1GBZkm z<5WTuPy?R>lK%5c)Rw5S8C1f%69VqqvsTC+|9xOtHLX(Gm(+n1R|+kgDIR!cZe^SRw}7d z;1&em1-gDV6g*@e4JNquZCras|!I3mmu2_8wnNe^b(RX!YgJmR@kpN_+ke zN`AvRg&|j zlt6_`N3vKGh+P?G>H$^=Hk26yRz|@`CzS8?a?UqmvhMU)n#Q*q&hVAJM7=7`g@9pe z89^<=G(sm_Xlz7mRswoTyYz60oQcfIC5`WJn*c#XDC%LR1XncX@lk5zthKr8aWR6g z*hz(MArpKerN|aCl=H|}N;ULiw!VkJdB6UT&f3!vDrVG_N30uZJ*3FGavst7@RE(% zQ3-P_&_?8bq2tAqnG~n{@01>-qa3GMUVkVib@76t>i+aY#M?422j6bHc9ILyvS*B> zQQ;hTorEx+5%Ejntqj?MpK@L-A>*grn3}Xmf~eL9A<3fu@V^M${v%Mb`npo{-kWab zY$g4;waJ-CY5_)}&t6?C)$H8ON*&Z{gA*WkD2AnI$WqGr+dDx4Jha4IECI7ORlX%xLkM2S>PMcfQAoTHXiHgre$Ng``C+UO#Tf z%h)nwFM(vfd1`y)$+e<9#vF(0WB#2seWeOrC8+#Sznrt;aTFq+VHge(W zrLULV-9kwxSkZvb=A>{4q$?@Los{c>y!(<4Z}}x7H_1eA)Vm2%hAVvAq&Gr=X3qss z%ZI$*`HOR832P|h_`UCt@YeCB?vDk`1ijIFpj0~S;5t0+y?on^xUzWvD01NIzw-6X zg!GOMi0ue9#H92NEiey6Cu+B^icR#ZYNp@eiUFO?Nfr7Ruph>k>z8L==o+C44y|SzJlM0I*>xbKB8ipr}PC$Vq1>q1lcQUVmYSy6QkL>A*e-!H* zE^(h_rDTROBbAFN7eq_a_1wd0CwYNzI#a@`n-!AuwhhFxQXr+>8N&+;k^;lb@8IM0MP++-^ot&?qrdT% z@mt^g{?3Z;HrZm^T9}sx)ecIrLxK@CD-D*|m9|IDBSIvWPqVHyJ{kM@xVB3677f>}YM!uoen+4Oz@ixxU4lLhmdnA5_Cq zn!eQCP6VBdu#5-q++!n15F&4}luzs{UuR55zOLgFrsna*>NC!J?Cp@C$r2nxuAoQ6_@4>i!6BY@q3nq~DerN>eBtm6*u#Q`uY>m(|fJDWc zpd*|pqn5K+7*%^nTL*KYS_V1t6%vq`ecJ&{84B}oF zCzG?le%RKJAo5Za*j|fNy}S>y9=!0XA^r$uwZD_MT)i18>}k80A($6~-0{+6T>DhH z))3w`G*u{EYE@%Bnl`c);H`-I_l(mxT>~H9CT$R>H^+UeV*&En!Rqu z{b+UcK~w&8PUYTj?1*4Qo4e_xVehcV!aJ`ri#6`$VfW$Z)xp#{#z~hsQAf`=ZCNL{JQMT4Pss0(=nZcMfFg6F79R(b&tT1 zA~R(|O243sb%AyG9^}`bKkgKq*>=nPf)x~SUzz6ij(RZ7+V`Tx0@d|mcE1L^^tM(30<+-Ybq|(J5AS4>HfrK@Y`q@59{K__?e~yDbZ00uR4!EC zK}u!5t72Q@REmf9ef}1&kj+`|1rPau?7at96KK~i97Mzl`Zx+wEdhqHAkql|9Ylx- zs1OmUiV~$mfY8f0ib|72k&Xfu0!WkIVgn;hdJRMgp@&{VJv->U^PC^>m9y4y&2m7J zJNw>!U)K(S)GnVlos5J*|C0G_74?F1w&BP0_;>+K7Nj62ry#e#u(AHwFwf*}80vcj znSqDCmtqWUv(m2btNiw$xRf*@aVsb-ZvC&YeHQCS-*!Egsqknn=j|@60`#tNihG7yC?KuiM07z?*)YW_528FGdB_YAZ3y=r1b0J|5*?NhluHWQrZ2 zDFm`9)f`hIC-2{@3}F2R?PUUQ{5gdoypdI3@1)To=?5Eq*JT)&VU%Xek)Jj(T4Pl7ODCqNmfvV<8;uRC8 z9ck@2K=}&@SzkbFq4E$v47BuLeZfOS5dK>uU$;jYi$q!=DGNEA6ysMH8th)=!k~@%pbs`#aXPZvkJdor#gug->Qi^ zR$?dcC~1AESGZEK`1-m5q<8)_MXj(+mZy*WN;BJAG;aK08)X4V`j+w(5a4NZlh_Pi z^-ytvSkY$}9pmrz4TWp!N zF{HjrjwMs+BtpY-Ep0)B4OyxBoKoGU%B-lr7J1G9fVw>jTL02qdH9XpJ6rdjxjYK* zy}uMCDI93WMY%WpsdAZ6u5}2E=k~WIeE?LETJmwqjC)z`LWWnsnjqwEBGV2eM26(82nxA5X<;%xV_y}H%=mCyL#r8$TCrPhHg zr;?Sy(PjhMO!^U}3l1ud3ze@LS9-5)|FQQKNOoC|q2;9q1kOvLr30Uhmvy*nLZ+L* zBhI<6JTmypRx*A^S*C`)blS@$?tnp?JGuW^L^zU21wS*PHm?pj>=ob2?Wh%d+nT=! zncdE{rELxf`lJ`X$-<}LSB0yM>XpS)kNeEBou(fb>jtuYX4FAthEEpsGGkZf%151K z{L9(Fm0i*PSv{VV!in+)O4{*E)j!*I{ZE1rf(8sk3^+gcX)dM_eOVMlhG`i_t({|g zX(?5)n>dL&I9U_f3Iw+J*QT2alPYBEQ>kqn9`iIwad!sSiLmg;M#3QJGsj-vK7Xb4 z=|ta|^&y1LB`qgV%}C!40KLV#T%M?4Jq%dll<(w4Q8km+FkWdx+orz9bOSJUCL(96@` zym;HOD9HEH2f z*ThASq^V3N*9N^d^y8bF3BCrPKARH5X=gN`of%&&fAJd*`>%Yvpkgq~eqZ0?xw*A! zd)gd~H^|9g%A=wEtzX@ZO#My(?p>mFnHz#@?sls-B57xRP?y zNu=vXfUU~>3CIW`VKK_9>6YRcNVNZ-uMXW3P;BM8Q!|v-W`2Fr|SHJV0XdcRHHF zgqN&7N6(~)&#tdX+_}QgiUC;eH&2n)$xQh_gbHa=GGEg;x#ry=VDZziu<8}}bE8GU zY<*KpG37|0ZiSHHn~F8vrtlsh8S`7np!~X@%0GA3;z>Ga zm2dJay=q8o{He&I0DS@e!NuZ<(pg>wJ?P7sTI>sS10bA1nzviT0CLlvNjBRHzT>x~ zjj4EQo8HsLd(JqUlH;iWXdb# zGou<@Cw9~~j>RRb-k2MpOn_wZ(0$dx8=QsxbLBBXocZwQqC#2)G*?nV?tp02*!%e+ ze=O*r}PIm~+h_kM~vV(_vk?+YgU ziq$QiH`iybxkVsXExj&58CRM~_EmKjbUq|Dr(k;pp_{fPHwoMn3eDSy@K`PxuKBQV z!PDTdoQ~GE;kZVVvpGsuQGI(vNNUy3Za~OoHKoFWUoF(OhB{4$EvD^sDn*>B2E(G!^OW5S;s^7q1(j;K+h8U@yNnWw0 zU!p7rsVS$2z2)sih(I3V_}g$5@jHYxRK>gSnNdSm%W642QBF zXTq);mVoouwz8CXN`z`c>|7ha#l0b5A(o5=cP)gA2O$J=y@57$^q&)Y2+FZ%~48rzA zk#;HE#++gbB(M}Y;;yk_`4Vj+4ric0=Q7Pj(K9BVyAoj7Sq(o{Zf%c(cQt032nx07 zrw76BR2}N;Yf3&OjsWKGH)RRfA0%9Iuzr#{JgFmd$MbExps-VG|L{{2{ z>USAf%j8?FHZ|U4Yy&HM%KJSfo&Rw5wCei3G-l-YH|wcg^!`^`S2-NU!m{U?CWK>3 zSMKPJ=+w@I0dT65xSFWruo?_uZF@|2Z=i(HT@3>+;7+!jCEVKgEk$VBq*r`@~=rZI#2p| zr&=p*mytl^6-3|2x@XwO9pkW(6=Y-@kf@*k;=Q#Aiy+lnO??&!SzvbCX^hb6fZzLn zBY#ga4D_=Ou}w!r;n%0kjsm`agEVp+=;Tek2Y?5OfOOLY8M_V`-@Z*N=5E3AKTaeG z3I|++9kY%33ONp34U?PKaM#)~F22z|8LnK)yYie^+kK05l)|+80*-YdO?lM0gR94{ zhP+Wkp!a)TSTFefq}^54kF|b8{zzAerJRHIm!JDWvcXf0t{bHA_{jS!WW9fn2 z%Z1`|6HeTY9;H8K=L+||-ORUL|G+2fndCBc)ju!iZ9u#0k64e-^-y7+F$N^yXR>KJ zHq3uUjsc{w8@?aa0Lh4h|60&A;L= zTUV}Ay1yuLIoRhdcU#H8dE)(a4ST=eTPGWiEH7wZ+-0RSWp_b;PSAe&MPheQE8ko# zP_b2S!e2N%aspLJLGFuv-8@>)=01KKlu$t10Kpbt7ebF@pYOi9YkPFvx(skb`a-knB2M5l7U#q_KQD|$|S%oi#_V~&m z+YayUAP^T5tc|mTR`I!NRH1YtEWEzth?~|t>c2@D4Z!TBtXM(ebt|{OQ@>^>oqhri zj)WAGc4AE@vq@vWqr-Z^4Q-9r8o#}JJff4$bGn4KTA;C3H@@ceXe4>$9NlzE=!kFX-LzBm!X*t0%Bm%b^*!h#imkZrupIEr zXQypBhhnc2nm5_y3pC#46u)iI+I430arOh7R(}?5*o9pZz)XzvAu+LgR*$Qi2D`CB zGaDZt591U3#^bpE$-8G2{}DYH_D0DdG)w^Q#%lj0mjH}9=O&mYN1zLLd!BKtMr@~R za&GQ$!`_#eU~lyFU9}_kngYBK_s_`Mc=&DAz0u;LvVkE$ZorDv9i*?c>S-Za9GvY0 zDE{m(kePG1{0$iT_{$`rGlNxB9iaoy;O`@hKj^s-I{eHHV0=S{l)+*O7iS6ZJoFfV zEeIWG=eFZ4iew+Ssw%4}Mmuze_t#xPhC6@XytB#KgR|fV&9}TNIMVAtel3E|5&9hX zx)P8+U9vxekR!3s@oLU8>rVQ9`XhMVBZJ%&BcPV7EV+7t%)c}EUkAVAY}vBCE}c`- z5t4tEKLNekzE9-&odMybZn)IoAs{?1hCg5Mjpf zz4^}uapS>_Q$n$GJ?lWT0ZoVu%SaD`b&J&dUZbBn7CC)^M;3;E>3=L$myI9UInH!YbCHlHd}lvCEQ^vyewQYxS=hHhKb%?ufAy zo9PcYEj$0Qi15cPe4Jcz$w4M^*6L{BYd{(V;PG32F4AsTvW~!^1%K|vUM`L2z5vEh zz;ioK!%goG?Z({wFRg5W6<#ZP3vBrHZ1YtG?=HO%8fM8~b_B52$N>sTs;a78J6NYn z?q?6r5vRp|1A>AEfn#Ocmr9!oIWI8jPk#uKkwBRFp7`UY1!t8wjRfbvR5VOl>J-t_ z`)Q~2;sS#QwY-dB((?e8FM@YXu0(Tt?l#XkknSGm$%(MSxbZY3#`f*A#`x>vff1;& zBhpRq0HvUXvjiQkwoL&)vLobG|4XH?(igLXgKccAxNq}BJ8QcAEq}Hh5s@e;Y`F&v z!2C%eaE`t#hu)`*RK0;{XJ`%ccwoMR`|hBdP)dgO-`U#Bqju+pea|kRrUx;3fW^R` zTIPas3#joce6)r?c z1~+G1Pr_0Jz>)5la?E#GbV4EwNR8l;tun!Wse2RwuKwo;=qr5F*HN3Mx<= zfjb+lBS&|oE5MV}VcM@#2Qc)PmeK&$>EtiszPcNCyd>%l%ySiexh{@zSA>=rC}K;#T3-DS)}5j-iYvWf zEn)|`ul>k!;c9==xhABu*kmF^LS@_eyo@E5kcH3rduX?q5s?u8#<+& zMJKK#ZdhY(IvE!f5*WeNt`7;StJ7d?wq%)Go2wcj!^3yr9DVDxT`O|V-#>MD?h!pA zegEIMQaB6ynp{b+k4_nYk@PsMY`jW`62{xhTXwkQ<)IUsdgWymD{Q}6b_~07iSd_- zD2wXgPT8NO?JN^rTm*+<4hdp%{>x1c0a__TF9Ls8gAI|R5XX0Rubz2Q`9dh;Tet&a zBXe|KD9ho{^guKqj>SGKRJm~l;lyg|9^!7luVIbzA*M8l(?Y>KUJbh zoYw(XND(^SzlNe>iw}4*L5fiEb`Veiw%6);=HrG7`%W7pY68lEP9(RM>+Fe+T8^`nF<+VE|0ChK~RqdR&L7e^tDuYw`CDI znFBu(PO(O|KKu8?R?b@o5E@)eE$G}-Ii1^n2(ovDOCO~Mxw0NOZu1bGWORzncU}R~ zNhg1NFrTt=y}S=G8fakX@Z=gR$DH`;Cr7+tY+KkpP=XJpg%Bu&0m~6Qv@VrHC5CeE zNHN`%fzTIYW;Yaoa6vEtLG*rar%4Cu1{olXA+b2|u_9Ye{;NMDYfhWk#cF=Cqt@^= zOyCM2n|w1od(B8wqv3($f#iKgaej(tgTa~a2DTeInA-lRN~)s_7{&Q=3_{qOn2P?d zBBlUKW;KBkX9+v$&EPOH;4n6Zz#`lmiO!N}Jf1gpja;Yx{iAv}q@IvKe4tzq&wG_p z{9gEkZv}d6$*RS9+Y|p*EppB=y{K>TtSag8o!KIu=M#91 zK;A#;?i9PAW6HR61R`H}zH5_UD70sXx_o)1Rg3(obui<>4_ zz~+ZlZ(amClFRw<-vx0SVK8{jz2h(Z zmMIyE3M3W7#zQ`w$l?iSu6<9EpsaB9e)>a9xdXonVwlUtOJjU$|E);SB#^FRc6njdsVrAQlf+Z?t>)N`rJZ`bz3 zT<-oir^BgSOK|~wT>6_5|5Vueq<*^8oW~4d2E9Oqe^lQyCG3YxWB;2ppn%1WIB{LOpsFh-vfQsk zYa~ihF*^G5D!7TkrE*_&JM?I$3VbtRg40+oN$m5ZE$|}{i0D1s8t}i3KPQiD@&D&* zt8+V7{`r~`D!9n?>zlv>vXk8Z{6Ovoo%f%wZ9Y*r=WYC{eGnx6&)5Gizd&#aN4k^z z*tsayPSh-awP2xp@P;g&$1Ep{8~om)I1iA!h_&M{1-yTcQlZJ>%np?P@U*ON-8m`2$~6@r;1J1V+7VRmXQr zUK?EZAlhqL`6!i?>!r6w(DwDU74u(h-P6F&`0hW^7~w$vvb;F7K_Dl z)bZ}>0F0|0(Lk!$J=CS|jIUoC{3f9W=JI0gc>nu|^<{e@O7pa{;Uaw|(y2Gxez(Eb zk73`xA5pKEkQF1~W>)r|JDWW}MAc&~bUXy@3=1M=NGrwT)q^+X7)z>nMJ%j4)30#tyWPQwZ3(`P$dH{3c z%d4x2&%Acl(V`fy#{2su*;k2iRf7EQUL~7j6C&#traC*1nXSbfQTNa0il1IsOi)S2 zT$mOo^$;0M*{TG;9QqJ>WK7LWw?a8i={mo9{n%IcO{_2T$>)h0UH&UBS(;3nQmcUL z#1NwI=Xv%@?Wp3HBXNln{g^f9<&~V(9v+kbO54w$B_8k|is7MwKSi@;Q7$+5}^1x3}kY`&Ah6+x6k7h`41|d z=xaz6u)(|B3{#I|_+k8|sZOG!!x~%sqvg;ui9<0eC%u{IMmcWh4=p|DOZXV%XcT{r z<2Ik0^k8fn8BbD^p<343yvK!WUlx1En;Vn7T8yk5N-S*7E$1C&dojk(oe0%>)bEa0 z3`wj^U~JjjKlXXPK%?ROv_hwu8hZr!y5N57Yae2n2Gz+%&dpgHFSh@7>5dTzOoap$ z-=Dv9djaWXK(y4Et|&L_k)Ql1G8D6rkQihA3fHUTZG#9uUq@52J#jq4l=aYES9v?P z#<8w~PCrM*d(&Pf6Z))vcS|BclAR-4RY$wtW>b1`ss$;4%MQt@<^zCue~2_p6$Bl@9cke0n$tc-#)rKKZUG($}p}HOd8|+=<8rG z?RA~#)Tf@*6L(vYb>7vSXct~!_m#qLn1bt7LCZy88p`CnncSDLS+yn0RT1H`>W!j* z1!G(9b?$V`Xk4xmEc6JD{zHL)Ycb>d+r4J54edW-pDFwyx#b-`7@||{tBV$bbu53+ zgK@Mz4f(2~G*s(ESo=JAm?;h4}D)DuHR>|Cwll6CU67jf$IyPx_+{<4v z?88nRljnJv@+Uo|~8g9*U29FZGKF`*Y zp5uD8v2%FL+p+q_p}W?hS)+XKKaxiXxomcc;(`Rb**dhe^zMawv8ZjfC#vg827o$*Ky}9YkNeVrh|Rv%j;L&U716gRa;9FpaF)c}!nTj+$54fXl!CW2)yFV+px_ zGt@;Bzn|~R{7{PkZl-Ef#blwrUfSzmT>p~*^*F=LEW7%TpX_B~%ap03xt*e)*OqP* zl|X@$bQ!_L>(6Fu^OA8ETaj{0I|)GF=e z`QiuB-b13aj%z1_DPF!rF3wb1l*O0p zft`C{@12^@{vz0KSE@Nij4c(kWt}%EjE78+5gsHi>EPo#tebboJL$keetc|1m}~_Z z_31Zz1EC^iPHM>c@#AX7)w@2V6(gc}{sUQa>|81mUS%AU*eDXb>W(KWNqLp--5ixg zetTh>pQKi-vVHVA9hWp5W31bt#_i42YL=V0gvAmy^j*&pqR#cHRuxsCKE*m1JXmZn z8FZgKS|vC!#^>^_rANyUZ%io}V3-u%bl?Yb%#)!Se-35=}>jBtSJZa^s$enuD zfyGW%`_k>`-aH(d73AS9F8sRZ`J9}~OtCk+@CA>gY z$v{SiH;w3iYVzYvk;cgid8%}sOGg}bU;Vg#)*zCti?I-_&X{I9>P_F5t&$<|nxv0Y zxt>X}gOCPJ;U8Hbiyp@Rt3H|C&9X0-j9X69HOk^hRY`ar$d!eY|^($+gxJ3FH?4+a8~$ zo@VJyw=k7ONE}!#C-;h}l{ZDfCSEaE#Y)=$ZaEWJ`nTWR^(S8cVQ$f)IQ) zg{-PU4H&9#q&pKeN>dY0*9}~1={b<&sNxf1S@^fM+@$7?YU2}Chj-whurVO}D^Mj| z)Q(gS_9C4`tXGT(k0rWUPnOa#YaYbB*#48~q?d9CO=HjQC5k#bZVxSZS$Nus@_kGvg9=wHz7n8l`=g5FuOf67|XdBHnA0 zE$eTSLbFh7QC;Ed47I(Axi=QbGcdJ}mDQ_M0(+zdj zgV!XM?2S=e$k)h1?I0~oA+toSeKSy7BeKXAB!<+F2RC1Rq%@>J9X$<&AQ{IL9Eyp! zh+UT*^{4sv3-*`O4z;}^uW1rzK*q%o>(B*4u<|9yt*7e}wvI-Hyo+|%?KsM$t$km7B#tMv)3Pl3FS4o(72cuwE#aPt*FmZQ z4*!UPrd{{EicN@O;PMU}4wROi+>4e&y@X5;{!B{rN+++KBZ`kg!e5fDf`W9FL@xQA zD(*IT+qp)tU$;aveyhT>{#2%7u4D1-Jd47=`>)f3x%0{gCxvrk!ZX2rLQVG9lS`DT z>3Jqs)iPE1MyE~_C&aB+oM>m5Z^>(w%2%HD6HFCD9xA!lV#Z3R(h#l($ z80?-280?|*isJHH@`((xrS7yg#$ulqY1R;GKyN6K^C9CHipGMlVD%mxlFk^6cd9$SXQwlZ%jL=}(8yRD`g^ zqY&P;12irns9g@UL-RqgH4m@))zNsMnIs-qyxwQjs)d~!#jKsAzMNY940)ah>#nC* zn)^0{MGzK{I4qrL<_-q~eWZUr|g~_{Ndke~ySJFFstQWOtUI#Jc5u(G9 zg$a@`foK?az_)qV2;W(@@vQ_A!Jn}Y~>Tigd2`)w)R3|;(=G!^U*W& zZHeOJVDy#ogWGv09U1@EQWD_tJO$ofjRC1yv zR{Z>QR_-0fO1w2WF|KaYed~Z1?J5}mLUI{f!>T)pP#MF>WgW}*@V2oz+px1ao0;e} z&b>{atzw0Bzf61v`EfE*wo0m0ld?Ipn3;GIR47#Zrdqa}RqO9ZTt5O;swnICaq`H4 zZmp0$%#DRTcoz)OvcsZ>YbidK(N4Av6EGnpll66{Wk_n3s2v$Gi7_!vav$Y0@^Nbe zNP>It?b#*=nTGdX{zPE!=m$;p{|+eF`QovXDDjbvnkM?gm0M?19N zF|2T=M2=coqDlFb`Iadr19#~HcQ7bu&e4d40YEJi#ZUQA1_Az>wfi)m1RLjZaXblH zr8v^r6Jzz8C}S{C3AJ^|S&|q*tleQfZKHwqPA2RhERkJrg{?(cd|YzbRk5YGw&Pri zJ69(ECXbXM*CnZ={XSdh&sVNuJ6-r!_0LbwUll9bHH!zre=*uN2b;Ol3V_&x@-wr0O zoW~|~if;Zw#jJT@(J!Gb!nVFZLfjHr#}LOJfeTqXpl4cXT(K$xgH~l3sMB}*1g(48 zL^s*kI4CT<0S8he28kzrDU|TKOxPOu6o%-0+Mz%s^#I1doUA|0cgnC`((5{5D`Gze zyd)dCYlx2v zi8=9r=d<+{Y?i?^nI!^b7)LBU^h~UirrR}QO^N2QTHRcgd}}?&1J-4gsLiz1= zFuCv|UJcq7N(WLswXg>)I#bBrmxxO4g9xjAv$ZEh?S^CeVi^@+4vTJ4tLtxv_`Xqw z)t!H|UB+{D903&cB--E+4?dcho<0)K+HGizHPD;hF(t+wNCU0EJ5~W&?@3??a9RT! ztob+58{1%}KsJom=U6XDdYKZoUVZSn-3&2b|Jf4}{mS@CD|jgaLp1#Hg^@xglSnJ) ziH4%#MAYmEuKw>_|gzhry`(L@2G z)*9##J4}gc`KZl^^c39vJ0`vjYE_G8iG35G+Naq3f-_I=PUf-@yL#$(%(wrr1C3a7 zG&1^w?0tr?m3|1)q+yBX1wCx}Vd_(w3gYTd2RyK$f|42f`FhO!by7h5gtgVk!NyMXuJqjaCFtSZ*uFdL{zD! zysK%}Uc6UZ;fXxvAr)mEyEnxMwV-^3D8xx6ONZ)H7rycvwH}}yl4J}LAU`n1c-O_nU(ZH#jmZRaBx7laCEaD;GhAH{=P7 zD~7;_=zuKYmhl4TuS?{G#GoP#S?L7U-JiIl6hGGCZXNoOa}4JyoH-J4(yt+%yaQ~* z@1=)Dr=iZiCaWq@`3ZxFz#~I3*RSoTL5)x+F_d8PD9I&f92Y6gPhjDdIi7CC!;wZ= zRZj&``*9SoW_}d~D#0W8wueK{*BybwjXx!+c!fHhl1D0f`Xi^b46ag+gXta|BuJsf z3?UW_=y-Inu@M%KX|d@NvTBQHvL@C%mca(I&}IKe253g2px1)zAqa&CQ$GQoW+(RO zuDPjO9S?o#z#>>*1#{En@0@E`RHSWOqf69EfIJe9%o?ue zLij-Wy<_6`Z>W5c*3iBLz@66}aC6g{-6C!q%ADCyrq}!V8+%Uf5-@Hhm1vGp`s$dU zHY1|Bf5l09DYv{gb^Ds1!KWEgd?US|R*IjGGU9YTc`ZY27R2!FpCrBKO@R-DSMm*9$&y}ga zz6H^|ks~-47sR@o6W5DTx}^xF^8(jXU=6``v6Ai$PbyD7PlD#SUOYIjIKSy`1gWs$wU$%+KrxMr4{C_m>*cZ6%S7K(R4L-h`+0Vi zY}M-W>b-(>4+CA->qJf#QRF^@uzP{C;6=~)9xK49e=sGkShcay^Rr~{D#3nh+8Ifv zR_98ONeImH!KIqcRxLg3tD1E@R=PR&R%cEV0o_dqCS30;^b6HS^Em~qb9Za_D94$O zp?mu;igg|nXDi=YR(4;WEMa=mk^q~7$w6=gMOtYepFwx}PV^=PS4N1hB&tp8^}t;1 zyzG00xTBz9EDPt(OaGe|$A}v5FF>x}TJ|+1mJ)qELR}Ve{F}znB@JJgHzVY7RfhWk z-IJiQyVtKs&G(sL-C^cwV;$tB`{WMGDXRyQR{V+3_L+9>Tu^hK)yUNV{77&5{dm6= zDcG@Lz6aU&gyA*iw4++HK~3D7$!?aDsUi9>kyOF{0`gL3#^~8+N6epoqBI;S&p=W5 zMC)ya!T`s4c56!E85(+;qja7v+5{(J!%h5|g>W@rpL?i02e-n8g-}_R!`H>FSoh!g z1sa=4YIl}BC;Qjc`7N*dPrBGBYDK#jX;Aer>WK{gLh%6GP?zpAzBy#o<8gbpB*rrA zNNQ5lV&19zD+~p6lXXz-#m7#wV{A`i0EtnBREm&w)Oa9MP`>C&AR3_C*8AEX z03z#7%)+?35NG%f*K9{0jq|@QOC}Rn@>wEs^0_KiYwg6c3C|Oc#Gc4jz0NyuxyaGS zCumTD-G-E@dqvnR_6nD%0j>a{Z0&-cu`W^_6Z?BhLQ_)yb&UQrGnkapH^3R-L|Tq2ZWkmD5_mZSLEW=8BsCV2mkCVPGK)ey)klhG|bbJu+9%=e5&wH^Qdrh<-7S~erV z$9a)^VMd1{_EoTdM~NIjM7)bU-PHj1ol_Lh*HEZhB-h*XU+Z2N*$Wm2v=dGBDJ*46 zrE^$@n?db1c`0|x+0d2?muR}ZAxkUBEa32^@qPN!YqC{gbzb^kJa~WTl6;L7HAXv2 zBlKQMvYwHbXvl`KdRcDq3)3G#ZV%p0eyqDAv0cIot5}-RHg>dHP<(o~-gTAUwVc1qv(O@RV9`VjJ+m|vvkbtCT$0lzKsh>3TwI5~tpxV? zb7iaaXopm>tp_@N`7;LGD%!(j{mH1a0ty7BmLBVZj@Z(Ih5WYT5p_@6j$bzxapw_H z(!&2oJ+e+Xfn%Qn*(x7qiF2fVc!ikt%02sVk%T$EC!(I$2!T(FRpR;$D_ujUK3j*v zt;(Fz1TGP8hE4&dolf$-L$tgweHx#dhI&bka!X>TmvCR`Na$?ol`$R3HXuR{5Vl@% zBg$Z=$6a(qn0gIm7T!!6hjmE$(genMSFYC&t|LEfak%){=z;lSC2BX|B#&d$nd*_) z7Bf$I!1*GB!~?ySoQVL%_l4c9fv?n2 z_$fQXWE0U?sbOP483#hcYDXaxn1C3r61i6Sg+h5Y*F6tZBsB?yKK;~q71U+ zK1pQ1Dd^i_XZ}F_2=yiHP>E5i3I--c>)^M;eA&*ntp1Dmg`4{n1gyqkUO9QkIy8>t zdy7bL$HhEe4(+Wytb^p*9_}; zu4d-TCn9P)AYaN>S?ar*6QUmGn6iN$nz%5H)h%zWKGSLOZ8;WM*_UwZM*T3Kb210+ zl3~@$9x9adniuR(p`Ae_K1gJ{496IRCbE@>W5nCXe3;ggHJCPURR*3s(!<)V*XD-G zQsB=Q-K?;noK_rRrw13#=UiCWIy9YFmXE6b+xI#lFlNb~xUN7Ih>(2?n0N;2n@E@} zP)(zxve&4ZzmI+Y^Xm>0^!PUpJ5rf9lT1tRB1CqeX9mIY;^-OmZ*m&1B;e+!s+8mB z_^u~DVUSt7ORr!DrHZpr)!{uGoym?DOcuAof^6n1=BA7oXqs(qg-(qiC*VeV3|M?8 z9aas`55#W5HliqX?>)76$zjsIn6r(6-d!p;$>D8I=KoVxPt~PO)2(vXc@7t@NDFVwB7*`*gF(%%)_7Y@9& z7!E@$pDY(N577_1Nxn{ssF(h}p)Qt$=MtS3^yMh~Eyy<3J5DeE!_z^gs`5FidUH=A zL%2%Nu6%VNiYNgF5Ra}7j z6q;7dt5fIqH@4L*F#psfH)YIz!g4he!Bd$Na4^pD2b;svCYwJ*gWN`&b zJ5MF)ab7|Hy!f~h0edcEZP``km^CP_oz^=ShFEjvm^R;awuyyK8d*xp z>lz{a9FUX_@KSf51Cc9!ra?^K&*F2APiz)_-pL;(>o~#3<$Q6r z)|)vLBc;`#)}6>M3ga{_;Vcoo`gHQ3N3gP$%qU;>5r7$d;6@kmQfKSTEg>Zc!#I2F zy#d(M%@#5#yimAom6B6mnVhK8SLUB|A%GY9XWaqe1hT?%g}Y6&p-kVKIou?d)eviN zu9%FXoaszFywC(g+4Q_@)wKY}aUcQBVVnuMMduBa@PytN=6aLSxB`Qlcz!ojjpXf6 z7h~cm%*HA$qI6~RCei1O!qtNOZo!C-phB&NGDUCZwq`gV5Cn{R6L4TJvF_f42i(&u zNSs@6N2CIOCVGzo;R8&rO`2n)|jRA=M5TDCwPV^oT7qRXEI4rrP#|y@(t4NM| zK4-b}-p(!g90FyvT5y3dNFRb%(8;X|3Bs2>&Sf_H`4^!O6reba|7=j>_hw?fjfp70 z5lng<%710hoFjIkSA71oWvKz8aqKCiqNpF`7}h9XBPCAp)z~0RhSB|*FMuICP3k5= zaoHLu3ed*vP>eAUqROM?nY931|KPNcS{U&09PUCXF9Km}2QNp{fYStOb+A&|&V=wS zwkn466)eOURj!0vzSuGJ`v-vy#0Uvpido!7y3$1r4048GM#*@#P;El&4$6-rIv0iL z`;IdJC+WC45gv%f^&_ZSV0g913>?go@_X>VZ+EJtL6hu0+2ef400}? zNWG2DuBg-C!7dPy*~s-0I4yg8SQ^Uv`~Gxt9?ZN6Derf6-`gLk0Eo-qaM~zDfu6pe zs~6#c-!u^^1*3Zq@Gl&XUOodSJ={3SBi6RlvhA~`CWrV^;VXtZ&{y(h21o|C7Eqi`!XxE0jV z_TgBNqoRatv#Op-uGg9F0e^OG7VN(ZNJml{nCub9WK2047o6ipK!lIzg2@(FWlwgw z`u(#DP_>~a4k86Z%J4XbHO|*~ll6MVEQ$eeljzmai$XO}iLyCp!`0YsxI;vC!47GB zw~G52Lg1FlUQvYACJtWV&D{Yz!Hb{5LbfFLW(YO$jryF+e1qwVL<^p*YG!;M{kqEM?6!kMr#e+U>9h#qb z`tx6$D}`-biN!)t22Gl^K6H-7cUinqe+ktS$^-l!ppByd2C#Smy*@_02O`nTY1G}T zE$8hI4~t_lAjXZ17qUE7=4*YNLyEIdh=^fna2a3c(C;?{0s#Wjfv(a{G@}TVI?CrF zV$mjQ#p?)waEs_BzQ>#cL`L`!h~-*gMM;=BsLZoZ4fBRShWmoktsN0tkSGoabt#e> zugJyQ#@0h)LnoASBQE@%7(;~&^da7xY3^wZJh2K8^x+9l>ZCQC#`toLIm90ax}#hR z{&q#X&YTCT#CMJ-$uAIit;{*7>!5^+&+=lj{fU-Dmrwn(_9vc;%nU!m&Ye~iz_N5) zm*?5x$N=9@S$;76j&57S@)5N@-&y(r@ThtlnCb9r?bwqf~zjNWUvmwoX>Q_1XQ+YNwxaV|X*SdbeUnRitTGoV zWvds^Uhb_+tc;m6&(b(b4Ix7F^hL}ZIsx9d35|$0a$2#}O})N;yQR`i8BJZirx6G> zpqZqpF!dj$=8@Fo;><4@dbo;Je8Za=PR-x7;tLTEnFeun zJ6dim3Hl`LTq06txW0*=AM>7K04o-CCqENK^{3@%hZcPy9Zh<|T@jpNnWUycRg&~_ zRIC=3c~9*$+wI+UxAG!3>FY;$=Lu#hjVq1$b7O@N&DfUROJZg@fQ+lAwK#28UPX^R_|Ha77!!299+ozkOxHiJ|&j>tJHp zAJp|GQN}hij*D{obq`p&@Tc`h2;zYt%!B^N{j~|(9Rwuyq}+HC-oR(#dW5; zb4s0Z$RjQG9<7;Fad%!sU@&dH>YD8On2(+n^BEWr>5 zUNAWfAm2%M*uQF)ONuU@trhK!V)U|h@2bxv54r~{KYHfQ+rb40TQo34wycMj@Lc=r zAs1%f#?EDsm+I{G7Z&T0uNAdp;kUXM8>|44mjx^()}8IKaA3y6269oM2izmANiJzO z*aRHFiAVt6*A=<}7}A4ti$6CJX{0&-zVY~2pFbhjdj7^S$6;*iXAqd^`#W_<%7n_L zr*+|ESkRKfy1N745r5|q6XMuGY6YSjoCRTJ3&WFsLzxnOIHojXF7>Hyy6vM0!^EV4 z$^h?WkB(68R3;0hKycz1q4^>L5po$jc%>Cs({iMt`hR@uf$BqJ*4~iUw6NyauuEUP zOqO^?wM+4kGXR6Wa4*qI3f2#vZKSV`gp@N9UbhGjs_7mU@w*iWQXuvf-S^yx{Vx>P z&M0ahu1rNWJc6~gED5#&cFw!{_XeB6mSGN79L~`_W`i6(T9FKLe&ge|c)aTe7&@nOoEP z<9ylHg#^&Eqc;-++X=?)xCG5ngG@3G`n3-sxho6$!0=o>tU&k=N7VOV!IP#~_cxsV z5(25nU_l;O7roB0)Ft6=(MY>f;2rA@P~uASPzX+8Gax3$q;&O$&*Pxi;&f_zpv7^f zI2uf|Si5zTnfgRdApzVPVUtF#$4tLKY6zn1eoKg9R1;TTBL@|5-)D~dJsU~!E8raX zP0k<@4lGureg{`OkxrgpILcYC6>u9=wsJaPEx4sa%9h38R2JeZZbWnf*;}8ealzFE z|65{*2}U0vA5DDm)h>T7zv{IK&}g=uH`An1ZpI7N-2khB)&*b{XatZl)Lf^}@APyX#KW1W;{E(=H3%XPRieT1F?;AaxUOPuj!m-;+3;$7Wlui z)da9@fU$3DF_YB)CtuhbakzB@q7h+#bg{&Qk>4?Zrx1bW&;_`L$~x+}8iEuJZ-5^6 znTBnsbK7P!oHIB5Bz!I6<4Bk9)k)v=Ze2B5XfNhS3#iV_l z>1evED$CC#YBg~fCQAUPp~i&KoFTm)2jIN+1jdqGN07r9qnPgTSWSoIuYY zVKC6+^!vMD5`I~_mJBo)F@&({P8dqGzyC#9-}Z!D3Ukx3cu0X(x#W>K%$f~>@9J|< zA&|)k1g}=Z&U6l#B5ADPTUV1e=1wY%khOonc_QMZg07<(SKxtkvVfGAA%Rc%9KKTp zx_iX_(+#y3>R#d!LjmQ&s6&Xom%z?F5e=w&HoiDI9^36DDy8r&29TJI{Bu$TJ2#75 zKjyv9DxiqML-_$jm3y1&o18pDfAYv*NCDQ!sKO$J11RstLvlnIzEpu*za;DHO{Zh@ z$Llv$Uq5kI0#7ZrjE9*v8P+n2^XHP62bPe)2HkePg&iOH`||7MNv{?&oBy$Si&@;w zc5RQ3T^SAt8;8P^+*a(_+U!$6>S5l(atpl{Dbw57UgE25`TC;+rk5sq;}wLPRW7H9 zkuG6j+h>|3;=E}ib#ZG*aRPQV8E3=AY?3o*%r~)=NgAUUSYX{R5oh?9M&(O9Lciw| zLR-xCQ%1!$x()>CA{Hmfe#jEZ{*Rq0@(suJ#X$r|>(K7Jdc$k5JAo6QY%lr|s1fiw zl7TDHtc4a2Q5(du)gNy$uHJ;L%4dG5oQY|u{ZB2ZoNNc}y|Oeloo~_3%!86t**wR= zrZj6G$JYGkA5Xh&jBU`HhT$xmpXqw&mIJp2yhnwzTwwAT$_5EGM-Nf;V3aLedamxm zA=jQ8>>hbnbtq?VGBfue4Dug1>ERl-6@%&RN#MR>11IQ1=h-$}OG(La3G9z2d#`c% zu_XHHM@djl3Bl2O6$B8`d%-tP7ak?4m7_L~k&4ISOR|8(RDobEEsZnA)5O)avc0)g-?R6+-)b?7f>C&*$e)fIzn5Q0gIB-p5W)@h4-we^ z61PPRRxh0tH8t#JlF3~Yy=!a}*77}A7fwNaiswX34<}+&&0}DrXlHiPFY5)6AyVT5 zkEY;kOgQ$V2^l!1+51bfUOyY38H^(qhRL>hl#D(sIc0mICiz#qJZ}~*;ijC=bf6E` zg6*84MW+lNuNE(H)j`q4iK|vh4<0jD?`rtt4|Ixs+ ziE%!}QRd&k`tqXXoaNi4ipx=F@21ZW&zR*HB;g*x9D7oe6R9W|EdmqL$#|k#7K&FS z5LADyo6>Lq1_Sz4Fncz~>`GOh2Z4(Qv%e$%ulBz5t*LX}JBUgv3Rb%v3W$_Iv`_@o ziV&uX6(d8e8ZgWXB-J1wkwm}6-oAOR(;T_^l%efO-;G zoI4ly_TMJmIxmx@Z~jp6u`s$78G)gBtz8pQxC8K-vFpr!k9D`l{BstI1+5KZ77s_y z;9KKTv(0|Yy7mb(Z2WMc1*obJn_Sz4aR?O_5=NQB^{ERC&uOn><3fi=r39}=r+q*} zpW-4uD=xL|rhbtM79{J_wT8a(<>b6X9Pj!|=nQwPI|#YL__4x@W~ki$hR;?TwSlcU z?2gAEh87ie5^wP1^zn?db_NXA{x>t~HR^ZeK3HUgaF&3q>2(RoR-AeLbVKEj!8o?g z(qso{Ai|=$9Oe+EXq4CRtwC!Nn~G>0(_A^U%*u7K0K?0-vBm+xl26h0Zr8_owc2y^ zvP<=_g99b_cxZDkDmVv-PaYmpzc!=B1tWkjz(_cEX_je&HQNWeKSk(NMs62BBDETd zdYrK?j3t_hjlI?h%Hy$nh61@M*xm`qO<^ZA|Dt|r z%qM$lAFPev#7X<|?vFXAh0*!V&pV+{PMA1=aPTnax}bMb6ZBn#4|@em85!^e`Ymv)6QL-x)X(Ao-k$` z6at}Cn^QBdH``lIWa5+9XNJ=I4t(|ICh)r~tO|w;YVu4fJ|tgJfOK`{L>4G-u~d+t zk=VPa6BD|%a16i{maCmuDjp*YAfo(qmlaEHH#5^9l;+>$5aJoo0NchgB|i;Xf#qpG zti{#ZaJii1Q$RU0 zVTXg32Zdx_J7oT2ZjCn>!HyrV>p`D~g&0Aq*4Dh*wX^QhwWwKlu9IrL3M=*wHci-n zu6TSRqpz`cank6nYahMWhibli%c(t+jaZ+i64Ue9JcmQabD?pw8pBrAiuu1 z-DK#*+zeyjD-IaeXogb^njXA!srPtFWwk<|=kFS<*jn2*Abvk&_ox@THtDfUC?EP@ z@&RIcxeA~HIuuA>2OwoyLO=-)x_tqPK>%OtxFy-_U!-D%quj_ubENrE}vHJ^hDJ$p)*~WtKV|F)#nU~p| zWRX?S714&&poeKM{3|hYR=W%%a12|>eujq)n2k7x{W!~(#4uQyhSdT(F4AuvZRUv>Wj%}@TC z#!a(%YLp0(YjG)2ID6*CZ5mZAQOScnV|@^fb6wJmVZpq$m$$@vYZmGfS+rS6EuUW& zk>}VPXRjDAxs>8p-co%P>hbjkBw{I*J!bH%uF{{`_@3{@Wd)EPB>tou0jTOBA0B;jXDImtdfO<;OX zeE#ybxt`X?s;1u7>=CFp5A*qDkp)wzO4ehxDFV_t)@4jSukPLOk?%x`67(*eex8?` za0y;uvq#{38>l(>4T;MQ3K0v@TfsfA#~jw+7EeWLa0iGT8QTpl9A1z%5;S`S{7!6*>7h)z?Myi_ zJ?a#pu8Kr%nVR7okd>LR1r33@4#cc$wk#dG&nrmj@68eGdJWW!8Q=>3Kw$_|!%6eW z<10+W5b%B(kk)1vp?VozJ*4^vdA-v+Vk!PrCGg6e;m)%y;&-7VA zj`airc6}xQGmq^%o6ers73)5epS0gLNQo`woJ_OXGTfM$ic4`Gaihvmk zW94_N9=3zA&+8@PKFbg}eCL4q@yj=&m#vvTpKrQH5Yz=qIL% z6c>BU=nufS=|5qTKYAsF_S-Mf-}F2uxiMg0WFx5GIu8?993wABD6mZ$bl1fn0!^fnNf1|2U zBz;^~x4JV+aTkbq{J)4Ofk+o`vCw?dOM^2~Tp+v1{YOTcwsXTYaHt(5ef=*{@ar~_ z%ee~z%x8r!=dQ!q9>3PTd7`C9=PkXB00~(!J~5%MZDp3_#JJc!GTVOC0B6g>*lz>& zQB@uOHhi9Jaw@ikZTJ8C>wwxMba&SJ<;Q;NZX(s5h-Wa>tpUtFtwB=&Q$0Zr?j`Bf zO51ryRtL3{aD=!3s`Crd11}zA^VZA$jjJs$LZ6gHU3Vf$EX3~59=@gcO#1^9Z=3Kn zw{5Dd+o?k%=C$QzMB&OS8Yv5+sYhb!%VW8NLT_o{5-^x-2m45!p-=SH zu1rO!W}(sBp;vM4leW@$h7V)X#|S?b9g^sYQOU$LTQve zG0Wn=p8upN>S|z?lj08v>Z{N>qA z9vM3{3yqtp{o9sw*8e}G=71NH2$C8#AobH%Ck6-!dTlY9&BGr3-%2AzE$mw@E=WYp zrrJGzZiba$o7wz$p0bqKxZ$&Z96a$Uis+YCf}VL54dKdJ(p-q@wijHn;L$2-u~CE7 z=5~^B1#dF(lu?75uWUA#2}NU50+Idg7P^*2WvbthX95sY75^HM@B!!C#>;Aud9CrZ zIT(!^p?jk>3-v>lHd#FV+DNJD(GR%N=U8CMNdNww*0p~BPr$)9Gm=8oF;{V-$6@YJ z6Fz^-FhL+PjS}!%NzdAU6^KhkV^-wD_L9?yeeVS5*N2l=3`iRy7}rmulZTKsqq4f~ zt9E(cmj>AomnOG=g6Uy54UQjpUg4)?&w5-4@SG#B(S0;*nxMB#$`U$$Go5t1rFgGE zd|gz2g>`**l_>ArMhC%z&7@s( z*0OZ6bAC2BfD28aiU^Wz=~rhm@uvMIe`I7?F8||w>Cf~b#f!T08foc}lV_|YC4`Bw_Y?pA)ldui&fvGpsul{{cnoC* zc}#2Y)1o&;lI#ll;06qh`c44)UjtpfeH$bYStX{veR&oGt<3imcQ4p%9!|r5TrJ7J z-7=HNRA>1!dt3bzBJ?ZXDznE-b>2Tmhy*oAT zSMI=Mnl>)VH_3$(yi`!K#(TI5-xw0IG;-?W{fUX!Jvzxh3yGaP#y&b}BzM4T*j?;y z-6evKCgR7Uo5^KDNOoG^RuZh)#O)!`nQ~! z&TAI&b(4;wkM|Cw&N}Dxj0D|nHl0NDDn_f@{42kgFcTQGKh4R-py4CZKtS*l$_v4` z+Vd{?;bWBf{G)z^iIkQO3vo(hR!fH!9OeVG7o|U#>Y?SC)1F!-2MMzxe7r9O685<9~(x`3mk1(`S+xJQ^;v_u$-o_yMt$=!th+|BO_-ZsOGXH>nX7S2$Q;O{X*RvML6p z!mU&fB(R%QKTBPV1|+2Jv?Y$O;Z=<6hSD_|qgvDmay?Rwm(=?+lV12I`Z4!;Xxs{} zfYXS^l-S|)C)S!%g@mWWo3>E7W|ERIna<)#L5~h@H^XHKX~any%gEy~|Jre~Fi1|S zy~nF)3|Gohc>1L{EPc6(g@$yk?B-=ST$hD>C~4`S+-}M1B|RIu)gs#`5EqNeiIRNF zi55`p-793zOT+B!#ZOW?8aQ3LjVF~PWGZRyHt|g8ilXl&N zP@r$S!wSOj*&Nsg2IL<23-5YS;Ss=u)rAATl-zEy%Wm#Af#`HbRSb8UkJX9H-C2k3 zdL-(#!AgG;Q+*vDJ;zuaSnrA7ogV?}c$N6BmFDNM%=ZZ5RnaUtZ%I76A2 z%_CW2nwjs4yOc_Kc=r5u?XXr`f*6CU%_Lkn>|bQsW$Il#E5nfz^;VwZB|1vG_5)(*f6CX2(ZOE@U|{iL;e2Hk zzV}XbTH%l5PL7k{XXV?e&RzafF}@USow1rc+zx!^g)>~j*vhBt`uXv6Hr%`bsT(Jn z+TBf>O&_G$vg~s(E%rft!dYh;rAnVJP0-7(rPQB$CT;adF!nHvS;VF*>0gpY0t2(c zL$JS&(BPbo^BTJTL(^!PmE={AS*U(@E2)-Ugr<;fG<@Cix@vsQGt%r@yMf>Mym;QU z%xP(VcaopRm^ii(>4y-#n*C535e#|K91QC3{)sx1qXys)A1^K1`xT|eSH57V!(7?7 z6y2KyxgCYUpwpKKiH(nCcptZ}6_t=mX%>F4AZGFGplh`O@Z;^fUf?4@G1jAF$)*`= z8V?OnhBXPPxb-?DBJ1WYe*8hIcI$_VeW~WO3x#b?bQ{*IMElwe0Gqh(9da%?svkaI z8$D)VdksynmKcu8bOIEFPoWQ2AI6|>S|Wn)Ul8j`pD~J5!1F;%V%GAxUwf)>xJ5_F z8-LpIUsBWs33|!G1GW+Y6pjM4*fVy!#yBRk`gcViM$jBKebep7{CLYK@-Wn$#48&` z?b5(a4Di^K;IYrBR9Lca7PD$;?+;K-1|TAF>YpY(io;71i&UF6D{%xMXKrQ{(Qu#G zDhOQv%s}yXy^Z$SyoULhUjBgGKJutl+2lj3GjA7*bw^>L6(1jlxQm6g%ili)D}z;& z=1>Mb{B$>u@z)hH$~noqgK}^_r+5vIF`fK^=>+R01vZoPa*uS=O!n98q{c-f;~U6_8X+&rxL>%kifSc0Q#hZr@LY{Py zonW$c8`YFDJOZq(?#NesPkpmGE_5SEBb;TTaIFBVrtt;&FO;2%kD=|wJ~5BjC(R^@_>~13zcJ0QNl^9QxBLVZ)Ygit^^-G zf}!i#U#QSKt$-26&!&H(x{$p7rd=E!O^)@@SjWur+adRcOg?u3iI{Se*m(Nh6JBKj zIyh)@v^h+$C=E`ut6 zO%A@G$m^l4lq{e*U(mOOFt#q2A79PhH1z5~P^FEh-6~?%0?fj)F`b-4RgAJpi|->5 zIB3&JdhkUH<)_0eq%kh-UK#fm+T%|>yn2ol;qw-VfTwm8O+~s|NcC=M_zD!r$>TMg z=)gF8j&bHa*1dnj7dcmus)*?)-}(GRHF>Pk2HR_8_rB$pqCL$31G5LDk&8$pYX=5Q zLRa4QmERY;mqthf*PY_&V==0#58zN*8A_)+uKCbKW;k%)ZAy815O2qxSga_DAUmj*gQ zPV@dFCK|IHa&t^GGd;AO=fPB_5>m6_MD-d;0}|Lv(ig_M`$m}BcO8KosreK?{s^Fr zFKJmldzC~4Byr(YfGG#(VI(bsB(;d}=O8fCPC^v*sA{<9C*_~$n4i28fo&rVf;zFf(sTShP*eYp<)^Vg)DA_Z(X28m1>8K836lgSh9_Jnr1!@QBWRT7X3 zBSDW>shv`WEV1`+AaX3?C6gmxBwuOc@sjzx9r7SpYvPBwuXg|b7_VVA_8L#@H9i9P z?l3+zv2T~}iY>znsBeCqJhemaPSySf6F;A+S1*iV7nB_Q3$>EO&}oBWx%?cTcV%_C zQ?oNQjshLGdTE3aczW*dU<9_XrV!)nL2zvX!NhQpEUKI`qg;f1+ul2D-MhNW=iCOxhguDKr@=pFpj8Um9^5d*P>i?G_3Lay^l*7bN}r9T^I5 zs3#cf?bk+5dpL;w4!IGkdfu}Ow+Vn%zm4Z`9dq!G9*1?07t^MZ zyyI(G?X#d^Q{aWM9ZXdhq4pN>Oaz>8*D?>}&<_$n>II|yMpw}&pJ6J^Bz17ltBB4V z#WantVzD%~ICCM9bz(r8`&&`jWT06E^cpx{Os-`)xMMH%hL>ia)lS{0`3|yn#;(_J zQxc3+^U!Up+TEye2f4vNA%t z7z1=>ICYT>yMbsif<*of1+0Clbho> z=!%X&!%nu7+Kq&BG{*eZ&ier|Dd5hf7MSnQU?g6aVDT(szk9?U1Ir3aiC;2RU#-NdrVnGd&7Rf^9jSb(ewih}YcIo1k`p>}tWX6H0YgMI*c ze2+CorNLB_LNtihy>+4(KR)LDU|l7g5wOP4 z-t{=+bbdUfs*y!#N2h>A_@Vy+FWuxDCPTBppPtcqDyLcLvtXS|UJKP+()kbgy8#8@?0mGaM0=vfmnd zoaYooa#Z1Wn!9~>;G!`6?b|IqISP$Myr=J*{L=|a|Kt=htf2Ihql!^y_*Chf0jc{< zVlyN<-Tol{)-l=I%h0v6HKh-31$enTWtB9vH0e3qx>);Yk3j4s$wA1Gqy_L&Gc1YBz24F>f>b$5ipm_#6vxT3 zci-%_p31La5w)kt{mZ;2;fI8WG2Ob2P8Qk#s(sgQR90vXhub z!2jk;p=y7v4~+3|q4>B=Xr~eUnw=qyDjw6wSfXN%z0Ju55>lMqb!<)9%CIuHWbBQ! zD-7#{SYV&n*W_QhzR2jiMhaZpmY0J~_^c(zmyhfzGAHLj4u{ku1GJTQw#^u(m1UJ{lP3LOsPnYpRdQ*in-#k@W_I;Fw^b8!jgc@j1&#m3o4!5p- z1I8px2efz76Np%4-_0@RI+G+JK}l7iTz;0E{4FK8uegMT3>@sJH}C&AVeS!>n(@M} z*qv;!BFA@egmRc`IvqEyxNxI7Gyky36IK7Hq@$&yVi%p@S~}`C%ufjnB_AEL;El%-;xr`1zTO;qK>07fYr8ev*%1X7cw_ z>jn__`3YSKb@b=QcHuv-$NeK5%o_f&91y}kZUsX42NXaE|9}Dr;r|AO8Pk7R@4f8j SvHmRfYviAOHWqIPJ^DW{RTn7$ literal 11474 zcmb_?byQSe^e+q`B}jLJbVv!p&>$&DcL+mwGc+ht1I*BkfRxgHDXF0wl#qs@MFfVB zkb2kO`~R)?)_QB*nL9J*%s%_mG9~#j z`M(rdV_18_{F)Rf<4U+SlKu^YT8Ubb_w!@OM0rffn75Rp`pDI!)H}UG2%VQ7kWsif;On4;32U2G?tRDeJjM z0s6)~D%|Fgm-XjsnZZ(RKVSk~gY5_#^A#-|Q`;OMHI&P*XBjVHjsXlk-(;|1+MIE6 z-NF1M4+zy|B=&Gh+DyEK9q)gCM4}jTvQQ46J7lrh<7?xgn_g5xU+eOwYG2Euv+RkXm?KqA zI0*@M9_13t@#9#u$_-k$sTY@bNDqi{Q&R?Y?k9N-n?OctmgadS2bE%OqFw%e4vU03 zV&c7A58~!C1unJ&2Ak*K>>q?={BIrxRgdqBXd8;bS0_7`Q|^EN99zk8UEl2M^Gz_` z<*fZqLzW`L!d7KM4z3($83gS0qP09PK&XJ;Lyo^ueSg1(nAxt^a8DrLAKi0Jv)hRT zU_^*W=DUuDr0-8~Mz&zABE6ApquEPRTE+0c1II6KEkjg0Zsi@_{v$b#Sj}HSx;kbM zyeb^FLb)RU0P85tjjrPvL}@9OOSIDjB~J_QXS~KK;igopxNQN+@%j#nzwK*_a7qWO zxP2IhA~TTF&-UW-LS;L{bi!TMC$Ap5oMqW(q|!3Fn~I2#ivY#EUW;}_WkI-)tZL9s zF}DWJ@1w0ok=VzW>rOB+J4o=CFHy%>4~l9#n6J@f@y7S9W9x0VL?=8UxY2?fSs=Ma z!bP=G0tg5YMks35VXTk9<-n=bDo2Zeih8Z8~Mg!0l>qp4cA?9n4 zVAR-;C}wqoD28!$%;YepAbKwvG~LZvr2gG+ zL$8Cr;N(0`>ZOZa9lW1BL4Usa8gk!Ic*?EOj?ljLMpdL+9eRQ?9~1A;jj$>`C6-HSJ%JUZ_ksm6R`!f zR0?x8&rZxa=sZMIVRP$0*+RDl`LOuq3wNr9+n+<^q)GHAF3hT%XL#f=`IYl)XlXQN2q^}zl(qT-N?qrM@_Txs|MdC*Qp$#vbCZ1L-(_8HlweOj zh#BrXX45A&DcYZ?#R-mbjOI-PJVIa?B33BzNOFL-`P(K0hH-h}fBD}>_Eqg@O$2;5 z4dMNXVoVm~rde@I9m&jw5EC*MH7(N*C#wn%eNL9g+~044@uNcjeoIYn;k^`Z3$|YJ zmZh5J3_G{AW%KZ)-)O{Ro}4d&aiJHv6`*6K!9jn@&KCuz%os;>43pwgGuI*-<}#;Z z6?>{SGq0tXrXB{)tFCwS^-oM-SH4(%*RkO@8Y0FtH5DoqdN}CvZzNn_4QYJgb3^gN z&EA4dqQy11vV`EDOR?-Td3Pe{Dmu#uHUZPo{@PO=T-mp7IiEY_=%YN@_{It_x6z2o zY(8#lr>P8(-uFLrHtqrx@dd~fC5_u64SxqX=$njWzWI*VswCMGRQy?Yg6nQnNEBc6 zN?!d({d+AGb-GdY6IQ#|p1Q;=z+z7^Irqpb%6$2(MMY(0zI)3PkJW{Hz7%}eNxl*a z_spl-(YI~5EtyU4z2&hA*8mS5pT=C3ui%-7%WbA;-)9g39*$U!nf1p7;&T8i@uw#& zxm{NC=8rvbRgXu?Oudh(|ovqHdDYKIwrWuT|M&n1pM zV*W{F$TZKxY0riQH?xt{j*e>j^3z$teg5RP4*~_Jdu;7hW-F!GXi`+V-{^x4*54#+ z`TpSW1>5-A_NCrx<)Nf``>#ye{kHg!mYjj3ack)T&9%Q=5$SRfXAfh?qsM$NrY4Fu zlR}+-ocG4hOvg_q2cjp zqkQ69KvFVYbE9k$gs!zx%&jqlYoAB9B^h&A4xXwq~@-PE>aF~ z-0`9L6>V6Z!X}$xCPE*yz>C0j3H7jfSWs!$8*&@mMTVoi`(v9agjJ{_BtRBl`MQtk zBhLSRPFT0>bUpKLVe$X6i#2{u7EgbFRlgEr-aK@f#MBz1Gqe@cT_|JsF=X&_Dr3V! zf59CuYP1%I&sy+awOk?ut8wfW8?H8Bi*V!7)yZ4E7IRdT0=2S_?my`EXfH)1X6NLV zgZYo;0}x4YJ0A4$emXNm`G8=&s2OaIIKcarlhYzlbChX+U3gUWIT)i`_-?DuVVU6Xy%VGkX&5nl3d zE9Y-U5FykZoDYR^`MWx=nQ;1r{06JNN3lKtRuKr0vsbf+9X=g5dsSiC7c>Q0TXkbK zi%dkQH&jh2NbO8vcD>qPQu||riDo>|_bTS$#6FPz>5lv(9TlNoar-RhLBp1sCoAg( zMA|DtppG_D2Dt(7H?JiY0dVG%G&_jHejOYwuFbE0DH}g-0k8)6 z*z42s&_k&xla9qR*Wmke<8I!-u-*FY^eY8UzR0X7p%-YJM5J6TmR^6hXQ_8I}$Wb;yP6E)%> zyF*?u=*wL8<-4zPd-kR(p7Dk+Ihq4)$3a`mOxs6`q=#qh`b1v~3qD=YzViOKF)vD< zhIV^58v^&ToJX$*7Cn(i?Rr!l63ZxlyGJ~?Ach62WHTj8nk8qo&+4q6$lfP zeA{FeitmEMXLFcEw4)4AwJhx^+~hbojcw7V82@}jAMj-nbkT5af6TnmQ`3TBu%CiA zrPK?o?v%Ww?zIR{DXGnV;38-IM#_~LN3QAjEI00p(Dxpv5-c)C3bGTKWjXXz24@es z7qeWX5QcGk=1P6q0@m~8Hp2Lab*oLZT~+>u)wVESY~thg8J=$XE1?y(tV~v2#ixfG zuTXDW564f3C++lhbRk}ns!}`FG1>jRJNe@(r_Ddw@T6!1yk84a_y`;t!o`Ny^+tK8 zeUufVBxeAdmzc@xq`OMqGIpWU)l-^OK4l~>e>RFv?6%#`dZ5RR8?ft7#kbWbI~C3O zV5fl_|50@)9Xp-}KYO#mGXZy`pB^FgdPkByGQS06{9Ya1xV{+A{Xvg`8I?Z&Xpt4b zLjmRb_kZHZzmj}h6A;1CUt3taqOGCnE z^8|)#Kc!BuCm1+65t<1T9C&jsB#x2baGgLnNEV52Vac4ah4)Y&$iWduX(>Jn=|LQq zT0i+h(TOtnJL0FBUl3z_>pTK^phnS=tjxNIU|HPoaW^JkCH^?BlHEN;IlpUZ4nIm! zXu}P(p3PIr@3x}2V-pOOM#%)vp0s|Ov#!ZGpB9OAQw&Hy>?a|kvWRzd6fmJxj}4+R z)Bb0V=j>_bA-Xj7PODSG-3tN_$qM|Wu@GEg9a-N#7 zM1O}whmByH_)^6u9N*}zP!m0gy>+$VdB64*tw%Y(7->E*$89L>p9H7Jly!Y&rQjvp zG%kOuT6Sx^!<{ABnl*TNTPgj8sv0uCdtkw^)m`2GjOe2wrP|Hwmr#m8TH$~DizXIQ zh@EGZCaQa@DAMzj!f^-lC=HVQw=TVWqwtIFNS%K2Ljc(0rPL0WD+$zq(BiFJs#<4T zFo|kmmOKFv0jb%bTxG58_%d?8CTl0=qw+>N-0GD+fA`_6ZnhQKAx%+GOn-Di-jYz- zmsWtQuFDhyi4M{W;7)EeVq!$POIr~s_^5nB4EVR0$%@oA%zoZAd>wsm&Etb#MqLg? zm7_P6bFn=Xq;*X`%sw_`nXo0>QE=0O_Wzy=YP9P<<>pQ74eC~HWLGoJGJe67H#~DMgnd{9k13Rh1z;<1&lTzd#n8=O_cIiuRNVR-Lo2xN{}-s%yF$P%j-?m z+~(J?i^Gb-izSwZin;7e3*V?y<)ID|$`|>8kY=T=r~fitp0_i#t(<#IM#emJFkZM) z;`$4w`4N-4cfq9FD{~z0zJ|BRKPxlRiM|RgY19N~oU(3C{JQ?mZNa|RdzHbK2>gZ6 zIM#<@BoU9imsUBGbj7G`E?qR@?Ur{)y!JaS__IGx)dnMEy!0xc!>)jw&<*$3^x-o_ zhQpwIS0*D(AaVP~rsi~{uJ$9MbC~t%kfjmCvwBmi7C(BK&A_Zr{G0|A2SnH~|Fr%j zcAoW$yJn51uVy*GX^^;;>55(N1z6nXVQLZ+WkvBc&2wzYi@RnPE6e)b$Hp_c_C5M!``;uAQ6jH#o!*$k($%GQ0JD7f0bmd5k2{At@apYUp~#x}b9gC(H)=T}Jx9DM zlzLxvW7LxLv2Z8uVtlK}6X%)R2|y^d`BP5$(bn-RpNlXfVngUrYu|&K-bD4#$F08{JtA2*Z3R#|fSMZ9`SlTdx1k?* z0{_{FQzP*e9l8p>e(9YeRn@PLbXlRdjFM#KinxjP?L4F>Q|ZDBjr7uxYu^Ujo~c3nX{D%nLKea`hF= zLR5U?T7Hk8+6rInUOvSoz=<#*_N_`M8Ck_ucGRpELjaq{Kr5gi9E4wuD9F$A+0+5> z18C|VU-FUn`FtuXiV#`Ul6uN*JG85S@Zs;U5uNQd`!;Qf&bWJR_u6G$u6y!@0<2-DJg!??U0 zX&xnqrTsWp>$zlp2qsV?dqh@jI1zNLT$1yzU{zaPQfVzynv>f>b(QrpKm-XRT zwbxw^uLFoDEzPr0&;2f^MfO~F3Ym3R#bq@6qqzt5b;y5gBoD>V=BIh-hbv5V5?sC) zLaZ7Yy(Q`l1qHDcoK~=HJlZS4yAfI1gRv8{D z(QD#2*dzFhwsJzOmo8zECs2tUr37>TOH8sm3XbdlbVKXjk;{>_2F@4}Lq{`4oM!ul zwclfx42MFdnHq}}33Y4@b#2{z;%GUZY#gV_Tz2b!bsGvm`9D8z-2tk})a)AK8g962 z-{x0Eb0Rp&fEu|sCjt4HB<<ic@X*>5ZbL+oiD6xzj6K3w}s`nGL z73x6uB8)6Wb#|+?RPBkmhRu($>xJSF+k~!p*zlo2=r9zIomN-E0cae9YI%x25_+*D z8dWv?ieEcA1l}8-7g>a%gK2*72MsNp$LGgN#?E~9Tu_vjK5rB2$1&83xt=tL&O~2l zad0FXI~hkCI~hd9YG)RDMHm;od;nd0iK0clL0t!r8ope^7J08(Q(cEEf^<_p(%vo@ z#==5+e2}btz^@h^{9TpihG98;CkqN`k6Yh+{Xd zmVKbr>HxaD=K97V=fR({iEQ9s+dG-syK1m_a%+uJw;Q*bKZqGfzOmS5l%mUt-DN)x zif*`+-h*0sA3^DYuooC`%BImiM;E&ekzXQbNCv*sZW_=>KB2R-^6T(mXnVmI*}|&f zDtja}sZ4-OL^2{*2eXUedpx7KBBn6rEN8>fi&uQ+5#o)n`p-IYkoHKNxN!y36s^%c zNrraDAa@`Eqg>GidM3id+u%HO`7%R2T`RtcWlePR)V2t!6WNZy=lFeQl79Fcb;6R! zH+7cDo+v_nfuRW#ZbPvhbu%HKAi6ur*pYcKVQJoc(d3o&OHc)i9c@4q7#kb~+aYhV zh%+5%W00gr_>%S%4S~@Z=x9TL}r@T7?~azX>gQgDG3-S_-)p7Hy{P;A6aVU0iZ6jSd9!U14o<)m|6S2>g~8C`?pm z**&pzrX!{?E5XMf9e5V;+&ka6PQaeTji|reO*ydy!yyfwkM;;&ty0D{#XCmv!4J&@ zatxw^3MH_vu)Rndmz}Y3l<{xvzTr-sbyOf2Rb`;GlFKw-gvYZE!#c2nB3_k!f~A1~ zl0057yqGou{(G}PnEwU_6&cO~PaS~5^_daGC|Q(ZR?OQyD6^8$ieNRS_G9Vd;KIvG zWK(i1vnR~deAnVE()t}42TdR-3)}J}q%cb%{rWnNw$VsgF=|$vVC<};4e2|a+5Ka* zl))IIs0>JLA^dWQ++*C0G*?fi&OmkHzmHC&#jK#%8`8rBb-KWK;8|UO3Zu#-R+roV*+v~+~ zG`g3`EtxRn&55qU80sd(A9OY^`TMO!WF}Ha!aPodU)89*HXn3rSO9URHd@dzxP~?L z7;jRonBx{5w|>bPZfl*84v7i@Dh_KF_OG@HKFR+Yfc|hKF%)c&PHbE#7=88AcT;hJ z63U)U5L!8{Fqr*HjU$WD7cngC_*-zo|HQ|$rWDt;-><)@m?R1e_x}B;d9esf&BPWn zrz16v$eWIgeO1N1)Ro|3iT-ek!B(><)~)UB`VcN7>|SgJqvIJroska;%!3iSlP}zAffGWaFIS-*u7bgPGNib z9n2LIT294gN9X5%M?7`5DUNr|FPlgY|Jl`f@leht+sTfqr-x_T_0K1tm5=6kdTa`;3n-&vmMv7JnEo)_STGPOZ#4NB#ogD%JDz2(~_okr=dr?TZdtVBX?1`udbjV>e*rR9dtVsQ>0K14rJ8 z)ZunfqgI+A8N$1=cH;`8SQle0L??D(G=W9(`6%hP3~2w4)5w#-Z1W^xdpr-B++)*`3`#&9)u_j{hfVItPKT_D?W6`)V9jfd1E;>JY$qAvB z{Y^6$2=b7=lKp6)O2VW>Wtp`+VDmv}x#9M)9&1mGl|HXv~ zr_9-ax+pIx&(aCHBl5PO1jspEJ~d;dBLc5#S5&_TB!fXRF$^3q{`QI>f82X8yFl$x zE8&^9=tFC6od;oS;}Mp&=%5=By5Md7e1{Cu*u_$~P(f~djZA@Mzu$=~&Hc)+m4<&Z(law*hPfD@p$V@JS)>kQiaw_e`>kxWxIk8o z1=O!5EAjb23)5WdEk!flMIF+l_P*qed~I)+3`U5^-fYtKR>AttI`sc_H+FqisC~a6 zZu6}^#-Z#n1?l~!+nFz4e<$SP+q2btXy4B0z^f)?S-x@wI)D;r!{{BFiI)`XeaI<< z#)bxlL6jKn)LF88E+tO9Qqf~()2KGi`;0uTXbzsa)sOQYL4|?BBF={9qVB}#me%f} zD2lj0*KL0}sMu7fxO_N0dT`z-2A+VA_2Wc{X0jZ>zER!yUL8mBuW_ag+)R&Sz2l}l zpD7e3nmyHLBm;=?HrlySo}C=Fu5XRx?+fmJULE$}B57VaBocFE(#MJBg_pl62(vD3 zus<5oKf=K)p%BaePU>3GowUo7^6Xm0oWwej&@o9!%!yF4*6V6*H4Xc1`heA*gCDUg zuhi9q9jk|cWu5{&%akweYg$>bzNV;`zXx@}zRG0rDFZpjSs0j_UA#Cq=1AVDz}-*U zhpA^Uf?comXee$gIIaP<@K~=BE&1_Qt%7d0g@#@5V!__Hn^eAi4lo~kR{X+z(&*sS0bTu8TzsL@o>>|21)@kU+ zNFn^~z%B6G0;iuq3NL*3(!a>PIJv3i%_w$Z=L%dw;{WXVqA1Q<~EBwvCRtRJPDz;SKD??qT zP%0f<$qV1Yy}8B?f9cKO?lWvcTRg?4w8GD#VRRXHB3P}LHbrpyo;(zIN zyt=X3`dF!Kq`#?&M=h}B7{`Du7FV*F(W_vX4Eq?g$o+$TqR^%1uKCK5#^RN;`Y(yh zuUHC{YWnrd0mjUIDqD63jFpBq{lOfz0~zFzML~s^q~>A%df=(NedyXcR*@I@81L|; z@zK6P&;Z0&;_e0re+KYdZuAgf9(R>{pGWizpbvr^(Cg6u2Y z>zty|4i9Mfc9XabDic9;?DLOB?`cW2XwSflekpiiG z>|Lt?V~||^%vlGY`wO5&Oukh$Ta~VkZ^TL-B#vJ{#WEFP(#)o7B}n+0zkd0w!*#6b z6^%q@?SFkG=d#w{(R7sh@W$XQaoo*a_50PZnk(n%2J z+=H$aAlHq(b6@+{aSo$7JR(9EOyVIU>oKq$tDA>{II`T7|55`CS=cXphf4`QLc!5D z2^7k@p`gGSou|AnO=<1^%1pIK+gM(^j9_>BEuygw@_jHs5dG1B)>-+$u2u$;*Tvst76Jga+}=Ow|qqIBhyhk z^QBaMz82RNe=V@Y2>iNl(0Z9f--sr7G-n5dmP8v6O_IOAS?-9>75yZiJ0?dei1-0~-=w&!t zC4fA06T)eJ63xyr{(9~ku@xL)A1!p{- z^Xsr8f)z~cVO6GBB6i~rD6y41E5k955A6x)t!5=kwKUknPYZ30Nz!EB*- z-Zpf0r-Rv_qwFeE_CG{stK84hoar1^f)i-$tT66jfAhW+V%(3@=%3x#AM1RxR1;8) z`hA;oE_8a->me$H_NL0JwfTYnzt)8;^URy6HE{9;=%$5(PU^ax?^=?mS~w{-vr{= z_?U-m(cb=5a$1v#ku81dhbudd&(}D{5n4DeERM(2+Cn4MCkc=I$9&%0Q-vfgs=YX| zi!4*iKJweXzBa}wnSFGl$MNEI6tb22167(=O+8`k z2*vR0^lg~PMPZgood<% } else { %>danger<%} %>", - "author_name": "sanger-tol/readmapping v${version} - ${runName}", + "author_name": "nf-core/rnadnavar v${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/conf/test_full.config b/conf/test_full.config index f897552..580b89c 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -10,8 +10,6 @@ ---------------------------------------------------------------------------------------- */ -cleanup = true - params { config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' diff --git a/docs/usage.md b/docs/usage.md index f5c572d..f72864e 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -57,7 +57,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/rnadnavar --input samplesheet.csv --outdir --genome GRCh37 -profile docker +nextflow run nf-core/rnadnavar --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -76,7 +76,8 @@ If you wish to repeatedly use the same parameters for multiple runs, rather than Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. > ⚠️ Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). -> The above pipeline run specified with a params file in yaml format: + +The above pipeline run specified with a params file in yaml format: ```bash nextflow run nf-core/rnadnavar -profile docker -params-file params.yaml @@ -88,7 +89,6 @@ with `params.yaml` containing: input: './samplesheet.csv' outdir: './results/' genome: 'GRCh37' -input: 'data' <...> ``` diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy deleted file mode 100755 index 9b34804..0000000 --- a/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,530 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import nextflow.Nextflow -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-apptainer', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - Nextflow.error('Exiting!') - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 25a0a74..408951a 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -128,7 +128,7 @@ class NfcoreTemplate { def email_html = html_template.toString() // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] def sf = new File("$projectDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index f819664..9c6c835 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -20,40 +20,11 @@ class WorkflowMain { " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" } - // - // Generate help string - // - public static String help(workflow, params) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Generate parameter summary log string - // - public static String paramsSummaryLog(workflow, params) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } // // Validate parameters and print summary to screen // public static void initialise(workflow, params, log) { - // Print help to screen if required - if (params.help) { - log.info help(workflow, params) - System.exit(0) - } // Print workflow version and exit on --version if (params.version) { @@ -62,14 +33,6 @@ class WorkflowMain { System.exit(0) } - // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params) - - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) - } - // Check that a -profile or Nextflow config has been provided to run the pipeline NfcoreTemplate.checkConfigProvided(workflow, log) diff --git a/lib/WorkflowRnadnavar.groovy b/lib/WorkflowRnadnavar.groovy index 78c5233..cbd3c30 100755 --- a/lib/WorkflowRnadnavar.groovy +++ b/lib/WorkflowRnadnavar.groovy @@ -11,6 +11,7 @@ class WorkflowRnadnavar { // Check and validate parameters // public static void initialise(params, log) { + genomeExistsError(params, log) @@ -46,15 +47,57 @@ class WorkflowRnadnavar { return yaml_file_text } - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + // + // Generate methods description for MultiQC + // + + public static String toolCitationText(params) { + + // TODO Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text + } + + public static String toolBibliographyText(params) { + + // TODO Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "

  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file def meta = [:] meta.workflow = run_workflow.toMap() meta["manifest_map"] = run_workflow.manifest.toMap() + // Pipeline DOI meta["doi_text"] = meta.manifest_map.doi ? "(doi:
    ${meta.manifest_map.doi})" : "" meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + //meta["tool_bibliography"] = toolBibliographyText(params) + + def methods_text = mqc_methods_yaml.text def engine = new SimpleTemplateEngine() diff --git a/main.nf b/main.nf index fb6e1e4..35e61be 100644 --- a/main.nf +++ b/main.nf @@ -25,6 +25,22 @@ params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +include { validateParameters; paramsHelp } from 'plugin/nf-validation' + +// Print help message if needed +if (params.help) { + def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) + def citation = '\n' + WorkflowMain.citation(workflow) + '\n' + def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" + log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) + System.exit(0) +} + +// Validate input parameters +if (params.validate_params) { + validateParameters() +} + WorkflowMain.initialise(workflow, params, log) /* diff --git a/nextflow.config b/nextflow.config index 5ea0131..4e8ee29 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,12 +12,12 @@ params { // TODO nf-core: Specify your pipeline's command line flags // Input options input = null - - // References genome = null igenomes_base = 's3://ngi-igenomes/igenomes' igenomes_ignore = false + + // MultiQC options multiqc_config = null multiqc_title = null @@ -27,7 +27,6 @@ params { // Boilerplate options outdir = null - tracedir = "${params.outdir}/pipeline_info" publish_dir_mode = 'copy' email = null email_on_fail = null @@ -36,19 +35,15 @@ params { hook_url = null help = false version = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' - // Config options + config_profile_name = null + config_profile_description = null custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null config_profile_contact = null config_profile_url = null - config_profile_name = null - + // Max resource options // Defaults only, expecting to be overwritten @@ -56,6 +51,13 @@ params { max_cpus = 16 max_time = '240.h' + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes' + validationShowHiddenParams = false + validate_params = true + } // Load base.config by default for all pipelines @@ -75,13 +77,11 @@ try { // } catch (Exception e) { // System.err.println("WARNING: Could not load nf-core/config/rnadnavar profiles: ${params.custom_config_base}/pipeline/rnadnavar.config") // } - - profiles { debug { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' - cleanup = false + cleanup = false } conda { conda.enabled = true @@ -104,7 +104,6 @@ profiles { } docker { docker.enabled = true - docker.registry = 'quay.io' docker.userEmulation = true conda.enabled = false singularity.enabled = false @@ -128,7 +127,6 @@ profiles { } podman { podman.enabled = true - podman.registry = 'quay.io' conda.enabled = false docker.enabled = false singularity.enabled = false @@ -172,6 +170,18 @@ profiles { test_full { includeConfig 'conf/test_full.config' } } +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Nextflow plugins +plugins { + id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} // Load igenomes.config if required if (!params.igenomes_ignore) { @@ -179,8 +189,6 @@ if (!params.igenomes_ignore) { } else { params.genomes = [:] } - - // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -198,19 +206,19 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { @@ -219,7 +227,7 @@ manifest { homePage = 'https://github.com/nf-core/rnadnavar' description = """Pipeline for RNA and DNA integrated analysis for somatic mutation detection""" mainScript = 'main.nf' - nextflowVersion = '!>=22.10.1' + nextflowVersion = '!>=23.04.0' version = '1.0dev' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 2ed8507..51e46af 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -15,9 +15,9 @@ "input": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/rnadnavar/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" @@ -57,6 +57,7 @@ "fasta": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", @@ -157,7 +158,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } @@ -174,12 +175,14 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", + "default": false, "hidden": true }, "version": { "type": "boolean", "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", + "default": false, "hidden": true }, "publish_dir_mode": { @@ -203,6 +206,7 @@ "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", + "default": false, "hidden": true }, "max_multiqc_email_size": { @@ -217,6 +221,7 @@ "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", + "default": false, "hidden": true }, "hook_url": { @@ -228,6 +233,7 @@ }, "multiqc_config": { "type": "string", + "format": "file-path", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true @@ -243,13 +249,6 @@ "description": "Custom MultiQC yaml file containing HTML including a methods description.", "fa_icon": "fas fa-cog" }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", - "hidden": true - }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", @@ -257,12 +256,29 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", + "default": false, "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "default": false, + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "default": false, + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } } diff --git a/workflows/rnadnavar.nf b/workflows/rnadnavar.nf index d43b66b..a6d97eb 100644 --- a/workflows/rnadnavar.nf +++ b/workflows/rnadnavar.nf @@ -1,21 +1,19 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS + PRINT PARAMS SUMMARY ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) +include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' -// Validate input parameters -WorkflowRnadnavar.initialise(params, log) +def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) +def citation = '\n' + WorkflowMain.citation(workflow) + '\n' +def summary_params = paramsSummaryMap(workflow) -// TODO nf-core: Add all file path parameters for the pipeline to the list below -// Check input path parameters to see if they exist -def checkPathParamList = [ params.input, params.multiqc_config, params.fasta ] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } +// Print parameter summary log to screen +log.info logo + paramsSummaryLog(workflow) + citation -// Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } +WorkflowRnadnavar.initialise(params, log) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -69,9 +67,12 @@ workflow RNADNAVAR { // SUBWORKFLOW: Read in samplesheet, validate and stage input files // INPUT_CHECK ( - ch_input + file(params.input) ) ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) + // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") + // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ + // ! There is currently no tooling to help you write a sample sheet schema // // MODULE: Run FastQC @@ -91,7 +92,7 @@ workflow RNADNAVAR { workflow_summary = WorkflowRnadnavar.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) - methods_description = WorkflowRnadnavar.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) + methods_description = WorkflowRnadnavar.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) ch_methods_description = Channel.value(methods_description) ch_multiqc_files = Channel.empty() From 6f3d36b48e9edf8c2a4d8fffb78523eae9f0b975 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Thu, 17 Aug 2023 09:56:11 +0100 Subject: [PATCH 08/56] Removed snpeff bits, we only annotate with VEP. Adapt parameters according to skip_tools as well. --- assets/multiqc_config.yml | 2 +- conf/modules.config | 26 ++--- nextflow.config | 16 +--- nextflow_schema.json | 94 +++++++++++-------- .../nf-core/annotation/snpeff/main.nf | 28 ------ .../nf-core/annotation/snpeff/meta.yml | 29 ------ subworkflows/nf-core/snpeff_annotate.nf | 34 ------- tests/test_annotation.yml | 23 +---- tests/test_skipbasecalib.yml | 2 +- workflows/rnadnavar.nf | 1 - 10 files changed, 65 insertions(+), 190 deletions(-) delete mode 100644 subworkflows/nf-core/annotation/snpeff/main.nf delete mode 100644 subworkflows/nf-core/annotation/snpeff/meta.yml delete mode 100644 subworkflows/nf-core/snpeff_annotate.nf diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index c430403..093ce79 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -42,7 +42,7 @@ module_order: name: "Read Alignment (STAR)" - picard: name: "GATK4 MarkDuplicates" - info: " metrics generated either by GATK4 MarkDuplicates or EstimateLibraryComplexity (with --use_gatk_spark)." + info: " metrics generated either by GATK4 MarkDuplicates." - samtools: name: "Samtools Flagstat" - mosdepth: diff --git a/conf/modules.config b/conf/modules.config index d157316..4b688b4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -325,10 +325,8 @@ process { } withName: ".*:GATK4_MAPPING:(BWAMEM.*_MEM|DRAGMAP_ALIGN)" { - // Markduplicates Spark NEEDS name-sorted reads or runtime goes through the roof // However if it's skipped, reads need to be coordinate-sorted - // Only name sort if Spark for Markduplicates + duplicate marking is not skipped - ext.args2 = { params.use_gatk_spark && params.use_gatk_spark.contains('markduplicates') && (!params.skip_tools || (params.skip_tools && !params.skip_tools.split(',').contains('markduplicates'))) ? '-n' : '' } + ext.args2 = { (!params.skip_tools || (params.skip_tools && !params.skip_tools.split(',').contains('markduplicates'))) ? '-n' : '' } ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.tokenize('.')[0]) : "" } publishDir = [ mode: params.publish_dir_mode, @@ -650,21 +648,9 @@ process { // ] // } - if (params.use_gatk_spark && params.use_gatk_spark.split(',').contains('markduplicates')) { - withName: '.*:GATK_PREPROCESSING:MARKDUPLICATES_SPARK:SAMTOOLS_CRAMTOBAM'{ - ext.prefix = { "${meta.id}.md" } - ext.when = { params.save_output_as_bam} - publishDir = [ - enabled: params.save_output_as_bam, - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/markduplicates/${meta.patient}/${meta.id}/" }, - pattern: "*{md.bam,md.bam.bai}" - ] - } - } // PREPARE_RECALIBRATION - withName: 'BASERECALIBRATOR|BASERECALIBRATOR_SPARK' { + withName: 'BASERECALIBRATOR' { ext.args = { meta.status == 2 ? "--lenient" : "" } ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.recal" : "${meta.id}_${intervals.simpleName}.recal" } publishDir = [ @@ -687,7 +673,7 @@ process { // RECALIBRATE - withName: 'APPLYBQSR|APPLYBQSR_SPARK' { + withName: 'APPLYBQSR' { ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.recal" : "${meta.id}_${intervals.simpleName}.recal" } publishDir = [ enabled: !params.save_output_as_bam, @@ -700,7 +686,7 @@ process { if ((params.step == 'mapping' || params.step == 'markduplicates'|| params.step == 'prepare_recalibration'|| params.step == 'recalibrate') && (!(params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator')))) { - withName: '.*:GATK_PREPROCESSING:(RECALIBRATE|RECALIBRATE_SPARK):MERGE_INDEX_CRAM:MERGE_CRAM' { + withName: '.*:GATK_PREPROCESSING:RECALIBRATE:MERGE_INDEX_CRAM:MERGE_CRAM' { ext.prefix = { "${meta.id}.recal" } ext.when = { meta.num_intervals > 1 } publishDir = [ @@ -711,7 +697,7 @@ process { ] } - withName: '.*:GATK_PREPROCESSING:(RECALIBRATE|RECALIBRATE_SPARK):MERGE_INDEX_CRAM:INDEX_CRAM' { + withName: '.*:GATK_PREPROCESSING:RECALIBRATE:MERGE_INDEX_CRAM:INDEX_CRAM' { publishDir = [ enabled: !params.save_output_as_bam, mode: params.publish_dir_mode, @@ -1326,7 +1312,7 @@ process { path: { "${params.outdir}/reports"}, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: !params.skip_multiqc + enabled: !(params.tools && (params.skip_tools.split(',').contains('multiqc'))) ] errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'} } diff --git a/nextflow.config b/nextflow.config index b14d378..eda1e07 100644 --- a/nextflow.config +++ b/nextflow.config @@ -32,7 +32,6 @@ params { // Alignment aligner = 'bwa-mem' // Only STAR is currently supported. - use_gatk_spark = null // GATK Spark implementation of their tools in local mode not used by default star_twopass = true star_ignore_sjdbgtf = false // Ignore GTF file while creating index or alignment by STAR star_max_memory_bamsort = 0 // STAR parameter limitBAMsortRAM to specify maximum RAM for sorting BAM @@ -48,12 +47,6 @@ params { dragmap = null hisat2_build_memory = null - // Skip steps - skip_baserecalibration = false - skip_intervallisttools = false - skip_variantfiltration = false - skip_variantannotation = false - skip_multiqc = false // Preprocessing of alignment remove_duplicates = false @@ -95,16 +88,9 @@ params { ignore_soft_clipped_bases = true // Variant annotation tools = null // No default Variant_Calling or Annotation tools - annotate_tools = null // List of annotation tools to run - snpeff or vep or merge + annotate_tools = null // List of annotation tools to run - only vep available annotation_cache = false // Annotation cache disabled - cadd_cache = null // CADD cache disabled - cadd_indels = null // No CADD InDels file - cadd_indels_tbi = null // No CADD InDels index - cadd_wg_snvs = null // No CADD SNVs file - cadd_wg_snvs_tbi = null // No CADD SNVs index genesplicer = null // genesplicer disabled within VEP - snpeff_cache = null // No directory for snpEff cache - snpeff_db = null // No default db for snpeff vep_cache = null // No directory for VEP cache vep_genome = null // No default genome for VEP // vep_cache_version = '106' // No default cache version for VEP diff --git a/nextflow_schema.json b/nextflow_schema.json index 968ae1b..19e2582 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -333,8 +333,8 @@ "type": "string", "fa_icon": "fas fa-toolbox", "description": "Tools to use for variant calling and/or for annotation.", - "help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, HaplotypeCaller, mpileup, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, mpileup, Mutect2, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka2\n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC\n- Microsatellite Instability: MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.", - "pattern": "^((freebayes|manta|merge|sage|mutect2|snpeff|strelka|vep|consensus|filtering|normalise|normalize|rna_filtering|vcf_qc|vcf2maf|preprocessing|second_run)*,?)*$" + "help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka2, SAGE\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- VEP (only).\n\n> **NB** As RNADNAVAR will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.", + "pattern": "^((freebayes|manta|merge|sage|mutect2|strelka|vep|consensus|filtering|normalise|normalize|rna_filtering|vcf_qc|vcf2maf|preprocessing|second_run)*,?)*$" }, "skip_tools": { "type": "string", @@ -504,79 +504,93 @@ "annotate_tools": { "type": "string", "fa_icon": "fas fa-hammer", - "description": "Specify which tools RNADNAvar should use for annotating variants. Values can be 'snpeff', 'vep' or 'merge'. If you specify 'merge', the pipeline runs both snpeff and VEP annotation.", + "description": "Specify which tools RNADNAvar should use for annotating variants. Only VEP implemented.", "help_text": "List of tools to be used for variant annotation.", - "pattern": "^((snpeff|vep|merge)*(,)*)*$", + "pattern": "^((vep)*(,)*)*$", "hidden": true }, "annotation_cache": { "type": "boolean", "fa_icon": "fas fa-database", "description": "Enable the use of cache for annotation", - "help_text": "And disable usage of snpeff and vep specific containers for annotation\n\nTo be used with `--snpeff_cache` and/or `--vep_cache`", + "help_text": "And disable usage of vep specific containers for annotation\n\nTo be used with `--vep_cache`", "hidden": true }, - "cadd_cache": { + "genesplicer": { "type": "boolean", - "fa_icon": "fas fa-database", - "description": "Enable CADD cache.", + "fa_icon": "fas fa-gavel", + "description": "Enable the use of the VEP GeneSplicer plugin.", "hidden": true }, - "cadd_indels": { - "type": "string", - "fa_icon": "fas fa-file", - "description": "Path to CADD InDels file.", - "hidden": true + "vep_loftee": { + "type": "boolean", + "fa_icon": "fas fa-database", + "description": "Enable the use of the VEP LOFTEE plugin.", + "hidden": true, + "help_text": "For details, see [here](https://github.com/konradjk/loftee)." }, - "cadd_indels_tbi": { + "vep_cache": { "type": "string", - "fa_icon": "fas fa-file", - "description": "Path to CADD InDels index.", + "fa_icon": "fas fa-database", + "description": "Path to VEP cache", + "help_text": "To be used with `--annotation_cache`", "hidden": true }, - "cadd_wg_snvs": { + "vep_dbnsfp": { + "type": "boolean", + "fa_icon": "fas fa-database", + "description": "Enable the use of the VEP dbNSFP plugin.", + "hidden": true, + "help_text": "For details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#dbnsfp)." + }, + "vep_spliceai": { + "type": "boolean", + "fa_icon": "fas fa-database", + "description": "Enable the use of the VEP SpliceAI plugin.", + "hidden": true, + "help_text": "For details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#spliceai)." + }, + "spliceai_snv": { "type": "string", "fa_icon": "fas fa-file", - "description": "Path to CADD SNVs file.", + "description": "Path to spliceai raw scores snv file.", + "help_text": "To be used with `--vep_spliceai`.", "hidden": true }, - "cadd_wg_snvs_tbi": { + "spliceai_snv_tbi": { "type": "string", "fa_icon": "fas fa-file", - "description": "Path to CADD SNVs index.", - "hidden": true - }, - "genesplicer": { - "type": "boolean", - "fa_icon": "fas fa-gavel", - "description": "Enable the use of the VEP GeneSplicer plugin.", + "description": "Path to spliceai raw scores snv tabix indexed file.", + "help_text": "To be used with `--vep_spliceai`.", "hidden": true }, - "snpeff_cache": { + "spliceai_indel": { "type": "string", - "fa_icon": "fas fa-database", - "description": "Path to snpEff cache", - "help_text": "To be used with `--annotation_cache`", + "fa_icon": "fas fa-file", + "description": "Path to spliceai raw scores indel file.", + "help_text": "To be used with `--vep_spliceai`.", "hidden": true }, - "vep_cache": { + "spliceai_indel_tbi": { "type": "string", - "fa_icon": "fas fa-database", - "description": "Path to VEP cache", - "help_text": "To be used with `--annotation_cache`", + "fa_icon": "fas fa-file", + "description": "Path to spliceai raw scores indel tabix indexed file.", + "help_text": "To be used with `--vep_spliceai`.", "hidden": true }, - "vep_dbnsfp": { + "vep_spliceregion": { "type": "boolean", "fa_icon": "fas fa-database", - "description": "Enable the use of the VEP dbNSFP plugin.", + "description": "Enable the use of the VEP SpliceRegion plugin.", "hidden": true, - "help_text": "For details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#dbnsfp)." + "help_text": "For details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#spliceregion) and [here](https://www.ensembl.info/2018/10/26/cool-stuff-the-vep-can-do-splice-site-variant-annotation/)." }, - "snpeff_db": { + "vep_custom_args": { "type": "string", - "fa_icon": "fas fa-database", - "description": "snpEff DB version" + "fa_icon": "fas fa-toolbox", + "description": "Add an extra custom argument to VEP.", + "hidden": true, + "help_text": "Using this params you can add custom args to VEP." }, "vep_genome": { "type": "string", diff --git a/subworkflows/nf-core/annotation/snpeff/main.nf b/subworkflows/nf-core/annotation/snpeff/main.nf deleted file mode 100644 index 54bfb9c..0000000 --- a/subworkflows/nf-core/annotation/snpeff/main.nf +++ /dev/null @@ -1,28 +0,0 @@ -// -// Run SNPEFF to annotate VCF files -// - -include { SNPEFF } from '../../../../modules/nf-core/modules/snpeff/main' -include { TABIX_BGZIPTABIX } from '../../../../modules/nf-core/modules/tabix/bgziptabix/main' - -workflow ANNOTATION_SNPEFF { - take: - vcf // channel: [ val(meta), vcf ] - snpeff_db // value: db version to use - snpeff_cache // path: /path/to/snpeff/cache (optionnal) - - main: - ch_versions = Channel.empty() - - SNPEFF(vcf, snpeff_db, snpeff_cache) - TABIX_BGZIPTABIX(SNPEFF.out.vcf) - - // Gather versions of all tools used - ch_versions = ch_versions.mix(SNPEFF.out.versions.first()) - ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions.first()) - - emit: - vcf_tbi = TABIX_BGZIPTABIX.out.gz_tbi // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] - reports = SNPEFF.out.report // path: *.html - versions = ch_versions // path: versions.yml -} diff --git a/subworkflows/nf-core/annotation/snpeff/meta.yml b/subworkflows/nf-core/annotation/snpeff/meta.yml deleted file mode 100644 index e077362..0000000 --- a/subworkflows/nf-core/annotation/snpeff/meta.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: annotation_snpeff -description: | - Perform annotation with snpeff and bgzip + tabix index the resulting VCF file -keywords: - - snpeff -modules: - - snpeff - - tabix/bgziptabix -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - input: - type: vcf - description: list containing one vcf file - pattern: "[ *.{vcf,vcf.gz} ]" -output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - vcf_tbi: - type: file - description: Compressed vcf file + tabix index - pattern: "[ *{.vcf.gz,vcf.gz.tbi} ]" -authors: - - "@maxulysse" diff --git a/subworkflows/nf-core/snpeff_annotate.nf b/subworkflows/nf-core/snpeff_annotate.nf deleted file mode 100644 index 9a8b65b..0000000 --- a/subworkflows/nf-core/snpeff_annotate.nf +++ /dev/null @@ -1,34 +0,0 @@ -// -// Run snpEff to annotate VCF files -// - -include { SNPEFF } from '../../modules/nf-core/modules/snpeff/main' -include { TABIX_BGZIPTABIX } from '../../modules/nf-core/modules/tabix/bgziptabix/main' - -workflow SNPEFF_ANNOTATE { - take: - vcf // channel: [ val(meta), vcf, tbi ] - snpeff_db // value: version of db to use - snpeff_cache // path: path_to_snpeff_cache (optionnal) - - main: - - ch_versions = Channel.empty() - - SNPEFF ( - vcf, - snpeff_db, - snpeff_cache - ) - ch_versions = ch_versions.mix(SNPEFF.out.versions.first()) - - TABIX_BGZIPTABIX ( - SNPEFF.out.vcf - ) - ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions.first()) - - emit: - vcf_tbi = TABIX_BGZIPTABIX.out.gz_tbi // channel: [ val(meta), vcf, tbi ] - reports = SNPEFF.out.report // path: *.html - versions = ch_versions // channel: [versions.yml] -} diff --git a/tests/test_annotation.yml b/tests/test_annotation.yml index b6fb529..14ad4f1 100644 --- a/tests/test_annotation.yml +++ b/tests/test_annotation.yml @@ -1,27 +1,8 @@ -- name: Run snpEff - command: nextflow run main.nf -profile test,docker --annotate_tools snpeff - tags: - - annotation - - snpeff - files: - - path: results/variant_annotation/GM12878/GM12878_snpEff.ann.vcf.gz - - path: results/variant_annotation/GM12878/GM12878_snpEff.ann.vcf.gz.tbi - - path: results/reports/multiqc_report.html - name: Run VEP - command: nextflow run main.nf -profile test,docker --annotate_tools vep --skip_multiqc + command: nextflow run main.nf -profile test,docker --tools vep --skip_tools 'multiqc' tags: - annotation - vep files: - path: results/variant_annotation/GM12878/GM12878_VEP.ann.vcf.gz - - path: results/variant_annotation/GM12878/GM12878_VEP.ann.vcf.gz.tbi -- name: Run snpEff followed by VEP - command: nextflow run main.nf -profile test,docker --annotate_tools merge --skip_multiqc - tags: - - annotation - - merge - - snpeff - - vep - files: - - path: results/variant_annotation/GM12878/GM12878_snpEff_VEP.ann.vcf.gz - - path: results/variant_annotation/GM12878/GM12878_snpEff_VEP.ann.vcf.gz.tbi + - path: results/variant_annotation/GM12878/GM12878_VEP.ann.vcf.gz.tbi \ No newline at end of file diff --git a/tests/test_skipbasecalib.yml b/tests/test_skipbasecalib.yml index 7b5b213..6c2ccc0 100644 --- a/tests/test_skipbasecalib.yml +++ b/tests/test_skipbasecalib.yml @@ -1,5 +1,5 @@ - name: Run pipeline without base calibration step - command: nextflow run main.nf -profile test,docker --skip_baserecalibration true + command: nextflow run main.nf -profile test,docker --skip_tools 'baserecalibrator' tags: - skipbasecalib - preprocessing diff --git a/workflows/rnadnavar.nf b/workflows/rnadnavar.nf index 8624491..955383c 100644 --- a/workflows/rnadnavar.nf +++ b/workflows/rnadnavar.nf @@ -31,7 +31,6 @@ def checkPathParamList = [ params.known_indels, params.known_indels_tbi, params.multiqc_config, - params.snpeff_cache, params.vep_cache, params.star_index, params.hisat2_index, From 4e1779dd7eb259277683e6026a7e24132e1cdfd5 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Thu, 17 Aug 2023 19:27:00 +0100 Subject: [PATCH 09/56] Huge effort to finally update modules done :) --- .nf-core.yml | 14 + conf/{ => modules}/modules.config | 0 conf/slurm.config | 63 --- conf/tcga.config | 64 ---- conf/tcga2.config | 64 ---- conf/tcga_train_set.config | 95 ----- main.nf | 1 - modules.json | 358 +++++++++++++----- modules/nf-core/bcftools/sort/main.nf | 39 +- modules/nf-core/bcftools/stats/main.nf | 42 +- modules/nf-core/bcftools/stats/meta.yml | 30 ++ modules/nf-core/bwa/index/main.nf | 30 +- modules/nf-core/bwa/index/meta.yml | 10 + modules/nf-core/bwa/mem/main.nf | 22 +- modules/nf-core/bwa/mem/meta.yml | 5 + modules/nf-core/bwamem2/index/main.nf | 12 +- modules/nf-core/bwamem2/index/meta.yml | 14 +- modules/nf-core/bwamem2/mem/main.nf | 10 +- modules/nf-core/bwamem2/mem/meta.yml | 10 + modules/nf-core/cat/cat/main.nf | 4 +- modules/nf-core/cat/cat/meta.yml | 4 +- modules/nf-core/cat/fastq/main.nf | 41 +- modules/nf-core/cat/fastq/meta.yml | 3 +- .../custom/dumpsoftwareversions/main.nf | 2 +- .../templates/dumpsoftwareversions.py | 3 +- modules/nf-core/fastp/main.nf | 43 ++- modules/nf-core/fastp/meta.yml | 11 +- modules/nf-core/fastqc/main.nf | 8 +- modules/nf-core/fgbio/fastqtobam/main.nf | 41 -- modules/nf-core/fgbio/fastqtobam/meta.yml | 47 --- modules/nf-core/freebayes/main.nf | 64 +--- modules/nf-core/freebayes/meta.yml | 2 +- modules/nf-core/gatk4/applybqsr/main.nf | 12 +- modules/nf-core/gatk4/applybqsr/meta.yml | 17 +- modules/nf-core/gatk4/applybqsrspark/main.nf | 12 +- modules/nf-core/gatk4/applybqsrspark/meta.yml | 17 +- modules/nf-core/gatk4/applyvqsr/main.nf | 24 +- .../nf-core/gatk4/baserecalibrator/main.nf | 12 +- .../nf-core/gatk4/baserecalibrator/meta.yml | 16 +- .../gatk4/baserecalibratorspark/main.nf | 10 +- .../gatk4/baserecalibratorspark/meta.yml | 18 +- .../nf-core/gatk4/bedtointervallist/main.nf | 16 +- .../nf-core/gatk4/bedtointervallist/meta.yml | 7 + .../gatk4/calculatecontamination/main.nf | 12 +- .../gatk4/createsequencedictionary/main.nf | 20 +- .../gatk4/createsequencedictionary/meta.yml | 41 +- .../gatk4/estimatelibrarycomplexity/main.nf | 12 +- .../nf-core/gatk4/filtermutectcalls/main.nf | 39 +- .../nf-core/gatk4/filtermutectcalls/meta.yml | 24 +- modules/nf-core/gatk4/filtersamreads/main.nf | 39 -- modules/nf-core/gatk4/filtersamreads/meta.yml | 50 --- .../gatk4/filtervarianttranches/main.nf | 14 +- .../gatk4/filtervarianttranches/meta.yml | 17 +- .../nf-core/gatk4/gatherbqsrreports/main.nf | 12 +- .../gatk4/gatherpileupsummaries/main.nf | 16 +- .../gatk4/gatherpileupsummaries/meta.yml | 2 +- .../nf-core/gatk4/genomicsdbimport/main.nf | 50 ++- modules/nf-core/gatk4/genotypegvcfs/main.nf | 25 +- .../nf-core/gatk4/getpileupsummaries/main.nf | 18 +- .../nf-core/gatk4/getpileupsummaries/meta.yml | 15 + .../nf-core/gatk4/indexfeaturefile/main.nf | 12 +- .../nf-core/gatk4/intervallisttobed/main.nf | 12 +- .../nf-core/gatk4/intervallisttobed/meta.yml | 2 + .../nf-core/gatk4/intervallisttools/main.nf | 12 +- .../gatk4/learnreadorientationmodel/main.nf | 12 +- modules/nf-core/gatk4/markduplicates/main.nf | 2 +- modules/nf-core/gatk4/markduplicates/meta.yml | 2 +- .../nf-core/gatk4/markduplicatesspark/main.nf | 12 +- .../gatk4/markduplicatesspark/meta.yml | 10 +- .../nf-core/gatk4/mergemutectstats/main.nf | 12 +- modules/nf-core/gatk4/mergevcfs/main.nf | 78 ++-- modules/nf-core/gatk4/mergevcfs/meta.yml | 9 +- modules/nf-core/gatk4/mutect2/main.nf | 108 +++--- modules/nf-core/gatk4/mutect2/meta.yml | 20 +- .../nf-core/gatk4/splitncigarreads/main.nf | 65 ++-- .../nf-core/gatk4/variantfiltration/main.nf | 29 +- .../nf-core/gatk4/variantfiltration/meta.yml | 17 + .../nf-core/gatk4/variantrecalibrator/main.nf | 26 +- modules/nf-core/gffread/main.nf | 4 +- modules/nf-core/gunzip/main.nf | 26 +- modules/nf-core/gunzip/meta.yml | 35 +- modules/nf-core/hisat2/align/main.nf | 35 +- modules/nf-core/hisat2/align/meta.yml | 11 + modules/nf-core/hisat2/build/main.nf | 25 +- modules/nf-core/hisat2/build/meta.yml | 28 +- .../nf-core/hisat2/extractsplicesites/main.nf | 14 +- .../hisat2/extractsplicesites/meta.yml | 11 + modules/nf-core/manta/somatic/main.nf | 12 +- modules/nf-core/modules/fastqc/main.nf | 59 --- modules/nf-core/modules/multiqc/main.nf | 49 --- modules/nf-core/mosdepth/main.nf | 6 +- modules/nf-core/mosdepth/meta.yml | 2 +- modules/nf-core/multiqc/main.nf | 6 +- modules/nf-core/multiqc/meta.yml | 14 +- modules/nf-core/sage/main.nf | 92 ----- modules/nf-core/sage/meta.yml | 77 ---- modules/nf-core/samblaster/main.nf | 6 +- modules/nf-core/samblaster/meta.yml | 4 +- modules/nf-core/samtools/bam2fq/main.nf | 2 +- modules/nf-core/samtools/bam2fq/meta.yml | 2 +- modules/nf-core/samtools/collatefastq/main.nf | 2 +- .../nf-core/samtools/collatefastq/meta.yml | 2 +- modules/nf-core/samtools/convert/main.nf | 2 +- modules/nf-core/samtools/convert/meta.yml | 2 +- modules/nf-core/samtools/faidx/main.nf | 2 +- modules/nf-core/samtools/faidx/meta.yml | 2 +- modules/nf-core/samtools/flagstat/main.nf | 24 +- modules/nf-core/samtools/flagstat/meta.yml | 2 +- modules/nf-core/samtools/idxstats/main.nf | 25 +- modules/nf-core/samtools/idxstats/meta.yml | 2 +- modules/nf-core/samtools/index/main.nf | 2 +- modules/nf-core/samtools/index/meta.yml | 2 +- modules/nf-core/samtools/merge/main.nf | 2 +- modules/nf-core/samtools/merge/meta.yml | 2 +- modules/nf-core/samtools/mpileup/main.nf | 9 +- modules/nf-core/samtools/mpileup/meta.yml | 2 +- modules/nf-core/samtools/sort/main.nf | 15 +- modules/nf-core/samtools/sort/meta.yml | 6 +- modules/nf-core/samtools/stats/main.nf | 2 +- modules/nf-core/samtools/stats/meta.yml | 2 +- modules/nf-core/samtools/view/main.nf | 2 +- modules/nf-core/samtools/view/meta.yml | 2 +- modules/nf-core/snpeff/meta.yml | 46 --- modules/nf-core/star/align/main.nf | 64 +++- modules/nf-core/star/align/meta.yml | 8 + modules/nf-core/star/genomegenerate/main.nf | 41 +- modules/nf-core/strelka/somatic/main.nf | 80 ++-- modules/nf-core/tabix/bgziptabix/main.nf | 2 +- modules/nf-core/tabix/bgziptabix/meta.yml | 2 +- modules/nf-core/tabix/tabix/main.nf | 6 +- modules/nf-core/untar/main.nf | 42 +- modules/nf-core/untar/meta.yml | 9 +- modules/nf-core/unzip/main.nf | 20 +- modules/nf-core/unzip/meta.yml | 2 + modules/nf-core/vcflib/filter/main.nf | 34 -- modules/nf-core/vcflib/filter/meta.yml | 45 --- modules/nf-core/vcftools/main.nf | 6 +- modules/nf-core/vcftools/meta.yml | 5 +- nextflow.config | 11 - nextflow_schema.json | 343 ++++++++++++----- .../fgbio_create_umi_consensus/main.nf | 67 ---- 141 files changed, 1804 insertions(+), 1825 deletions(-) rename conf/{ => modules}/modules.config (100%) delete mode 100644 conf/slurm.config delete mode 100644 conf/tcga.config delete mode 100644 conf/tcga2.config delete mode 100644 conf/tcga_train_set.config delete mode 100644 modules/nf-core/fgbio/fastqtobam/main.nf delete mode 100644 modules/nf-core/fgbio/fastqtobam/meta.yml delete mode 100644 modules/nf-core/gatk4/filtersamreads/main.nf delete mode 100644 modules/nf-core/gatk4/filtersamreads/meta.yml delete mode 100644 modules/nf-core/modules/fastqc/main.nf delete mode 100644 modules/nf-core/modules/multiqc/main.nf delete mode 100644 modules/nf-core/sage/main.nf delete mode 100644 modules/nf-core/sage/meta.yml delete mode 100644 modules/nf-core/snpeff/meta.yml delete mode 100644 modules/nf-core/vcflib/filter/main.nf delete mode 100644 modules/nf-core/vcflib/filter/meta.yml delete mode 100644 subworkflows/nf-core/fgbio_create_umi_consensus/main.nf diff --git a/.nf-core.yml b/.nf-core.yml index 3805dc8..d0c71e0 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1 +1,15 @@ repository_type: pipeline +lint: + files_exist: + - conf/modules.config + - conf/test.config + - conf/test_full.config + files_unchanged: + - assets/multiqc_config.yml + - assets/nf-core-sarek_logo_light.png + - docs/images/nf-core-sarek_logo_dark.png + - docs/images/nf-core-sarek_logo_light.png + - lib/NfcoreTemplate.groovy + - lib/NfcoreSchema.groovy + schema_params: False + template_strings: False \ No newline at end of file diff --git a/conf/modules.config b/conf/modules/modules.config similarity index 100% rename from conf/modules.config rename to conf/modules/modules.config diff --git a/conf/slurm.config b/conf/slurm.config deleted file mode 100644 index 56327d9..0000000 --- a/conf/slurm.config +++ /dev/null @@ -1,63 +0,0 @@ -singularity { - enabled = true - autoMounts = true -} -process { - executor = 'slurm' - clusterOptions = "--account caldas-sl2-cpu --partition cclake" - pollInterval = '1 min' - queueStatInterval = '5 min' - queueSize = 2000 -// cache = 'lenient' -} - - -params { - config_profile_name = 'Slurm test profile' - config_profile_description = 'Minimal real dataset to check pipeline function' - max_time = 32.h - max_cpus = 24 - max_memory = 600.GB - - // Input data - step = "mapping" - input = '/rds/project/rds-upenYb9rdtk/Work/rm889/rna_mutect/nextflow/rnadnavar/assets/samplesheet_mytest.csv' - outdir = 'results' - genome = 'GRCh38' - wes = true - fasta = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/genome/hg38/chr3.fa' - gtf = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/Homo_sapiens/NCBI/GRCh38Decoy/Annotation/Genes.gencode/gencode.v33.annotation.chr3.gtf' - known_indels = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/Homo_sapiens_assembly38.known_indels.vcf.gz' - known_indels_tbi = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/Homo_sapiens_assembly38.known_indels.vcf.gz.tbi' - pon = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/1000g_pon.hg38.vcf.gz' - pon_tbi = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/1000g_pon.hg38.vcf.gz.tbi' - germline_resource = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/af-only-gnomad.hg38.vcf.gz' - germline_resource_tbi = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/af-only-gnomad.hg38.vcf.gz.tbi' - nucleotides_per_second = 1000 - star_index = null // set it to null so it generates one - tools = 'sage,manta,mutect2,strelka,vep' - vep_include_fasta = true - vep_cache = '/rds/project/rds-upenYb9rdtk/Work/rm889/software/vep/ensembl-vep/' - vep_cache_version = '107' - vep_genome = 'GRCh38' - vep_species = "homo_sapiens" - vep_output_format = "vcf" - dbnsfp = null - dbnsfp_consequence = null - vep_loftee = null - vep_spliceai = null - spliceai_snv = null - spliceai_indel = null - vep_spliceregion = null - vep_dbnsfp = null - ignore_soft_clipped_bases = true - whitelist = "/rds/project/rds-upenYb9rdtk/Work/rm889/rna_mutect/nextflow/rnadnavar/catalog_of_validated_oncogenic_mutations_final.hg38.bed" - darned = "/rds/project/rds-upenYb9rdtk/Work/rm889/rna_mutect/rna_edits/Darned.chrv2.hg38.bed" - radar = "/rds/project/rds-upenYb9rdtk/Work/rm889/rna_mutect/rna_edits/Radar.chrv2.hg38.bed" - nat = "/rds/project/rds-upenYb9rdtk/Work/rm889/rna_mutect/rna_edits/NatComms2022.hg38.bed" - redi = "/rds/project/rds-upenYb9rdtk/Work/rm889/rna_mutect/rna_edits/REDIportal.chrv2.hg38.bed" - knownhot = "/rds/project/rds-upenYb9rdtk/Work/rm889/resources/hmftools/variants/KnownHotspots.somatic.38.vcf.gz" - actionablepanel = "/rds/project/rds-upenYb9rdtk/Work/rm889/resources/hmftools/variants/ActionableCodingPanel.38.bed.gz" - highconfidence = "/rds/project/rds-upenYb9rdtk/Work/rm889/resources/hmftools/variants/HighConfidence.38.bed" - ensbl_sage = "/rds/project/rds-upenYb9rdtk/Work/rm889/resources/hmftools/common/ensembl_data" -} diff --git a/conf/tcga.config b/conf/tcga.config deleted file mode 100644 index 223912d..0000000 --- a/conf/tcga.config +++ /dev/null @@ -1,64 +0,0 @@ -singularity { - enabled = true - autoMounts = true -} -process { - executor = 'slurm' - clusterOptions = "--account caldas-sl2-cpu --partition cclake" - pollInterval = '1 min' - queueStatInterval = '5 min' - queueSize = 5000 - cache = 'lenient' -} - - -params { - config_profile_name = 'RNADNAVAR profile for TCGA samples' - config_profile_description = 'Pipeline to analyse a cohort of TCGA samples' - max_time = 36.h - max_cpus = 32 - max_memory = 250.GB - - // Input data - input = '/rds/project/rds-upenYb9rdtk/Work/rm889/rna_mutect/nextflow/rnadnavar/assets/TargetsFileTCGAOne.csv' - outdir = 'results' - genome = 'GRCh38' - wes = true - fasta = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/Homo_sapiens/NCBI/GRCh38Decoy/Annotation/Genes.gencode/GRCh38.p13.genome.fa' - gtf = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/Homo_sapiens/NCBI/GRCh38Decoy/Annotation/Genes.gencode/gencode.v33.annotation.gtf' - known_indels = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/Homo_sapiens_assembly38.known_indels.vcf.gz' - known_indels_tbi = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/Homo_sapiens_assembly38.known_indels.vcf.gz.tbi' - pon = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/1000g_pon.hg38.vcf.gz' - pon_tbi = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/1000g_pon.hg38.vcf.gz.tbi' - germline_resource = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/af-only-gnomad.hg38.vcf.gz' - germline_resource_tbi = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/af-only-gnomad.hg38.vcf.gz.tbi' - nucleotides_per_second = 1000 - star_index = null // set it to null so it generates one - tools = 'sage,manta,mutect2,strelka,vep' - vep_include_fasta = true - vep_cache = '/rds/project/rds-upenYb9rdtk/Work/rm889/software/vep/ensembl-vep/' - vep_cache_version = '107' - vep_genome = 'GRCh38' - vep_species = "homo_sapiens" - vep_output_format = "vcf" - dbnsfp = null - dbnsfp_consequence = null - vep_loftee = null - vep_spliceai = null - spliceai_snv = null - spliceai_indel = null - vep_spliceregion = null - vep_dbnsfp = null - ignore_soft_clipped_bases = true - whitelist = "/rds/project/rds-upenYb9rdtk/Work/rm889/rna_mutect/nextflow/rnadnavar/catalog_of_validated_oncogenic_mutations_final.hg38.bed" - darned = "/rds/project/rds-upenYb9rdtk/Work/rm889/Raquel/Darned.chrv2.hg38.bed" - radar = "./rds/project/rds-upenYb9rdtk/Work/rm889/Raquel/Radar.chrv2.hg38.bed" - nat = "/rds/project/rds-upenYb9rdtk/Work/rm889/Raquel/NatComms2022.hg38.bed" - redi = "/rds/project/rds-upenYb9rdtk/Work/rm889/Raquel/REDIportal.chrv2.hg38.bed" - intervals = "intervals_with_long_alts.bed" - knownhot = "/rds/project/rds-upenYb9rdtk/Work/rm889/resources/hmftools/variants/KnownHotspots.somatic.38.vcf.gz" - actionablepanel = "/rds/project/rds-upenYb9rdtk/Work/rm889/resources/hmftools/variants/ActionableCodingPanel.38.bed.gz" - highconfidence = "/rds/project/rds-upenYb9rdtk/Work/rm889/resources/hmftools/variants/HighConfidence.38.bed" - ensbl_sage = "/rds/project/rds-upenYb9rdtk/Work/rm889/resources/hmftools/common/ensembl_data" - -} diff --git a/conf/tcga2.config b/conf/tcga2.config deleted file mode 100644 index 09842df..0000000 --- a/conf/tcga2.config +++ /dev/null @@ -1,64 +0,0 @@ -singularity { - enabled = true - autoMounts = true -} -process { - executor = 'slurm' - clusterOptions = "--account caldas-sl2-cpu --partition cclake" - pollInterval = '1 min' - queueStatInterval = '5 min' - queueSize = 5000 - cache = 'lenient' -} - - -params { - config_profile_name = 'RNADNAVAR profile for TCGA samples' - config_profile_description = 'Pipeline to analyse a cohort of TCGA samples' - max_time = 36.h - max_cpus = 32 - max_memory = 250.GB - - // Input data - input = '/rds/project/rds-upenYb9rdtk/Work/rm889/rna_mutect/nextflow/rnadnavar/assets/TargetsFileTCGA.csv' - outdir = 'results' - genome = 'GRCh38' - wes = true - fasta = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/Homo_sapiens/NCBI/GRCh38Decoy/Annotation/Genes.gencode/GRCh38.p13.genome.fa' - gtf = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/Homo_sapiens/NCBI/GRCh38Decoy/Annotation/Genes.gencode/gencode.v33.annotation.gtf' - known_indels = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/Homo_sapiens_assembly38.known_indels.vcf.gz' - known_indels_tbi = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/Homo_sapiens_assembly38.known_indels.vcf.gz.tbi' - pon = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/1000g_pon.hg38.vcf.gz' - pon_tbi = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/1000g_pon.hg38.vcf.gz.tbi' - germline_resource = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/af-only-gnomad.hg38.vcf.gz' - germline_resource_tbi = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/af-only-gnomad.hg38.vcf.gz.tbi' - nucleotides_per_second = 1000 - star_index = null // set it to null so it generates one - tools = 'sage,manta,mutect2,strelka,vep' - vep_include_fasta = true - vep_cache = '/rds/project/rds-upenYb9rdtk/Work/rm889/software/vep/ensembl-vep/' - vep_cache_version = '107' - vep_genome = 'GRCh38' - vep_species = "homo_sapiens" - vep_output_format = "vcf" - dbnsfp = null - dbnsfp_consequence = null - vep_loftee = null - vep_spliceai = null - spliceai_snv = null - spliceai_indel = null - vep_spliceregion = null - vep_dbnsfp = null - ignore_soft_clipped_bases = true - whitelist = "/rds/project/rds-upenYb9rdtk/Work/rm889/rna_mutect/nextflow/rnadnavar/catalog_of_validated_oncogenic_mutations_final.hg38.bed" - darned = "/rds/project/rds-upenYb9rdtk/Work/rm889/Raquel/Darned.chrv2.hg38.bed" - radar = "./rds/project/rds-upenYb9rdtk/Work/rm889/Raquel/Radar.chrv2.hg38.bed" - nat = "/rds/project/rds-upenYb9rdtk/Work/rm889/Raquel/NatComms2022.hg38.bed" - redi = "/rds/project/rds-upenYb9rdtk/Work/rm889/Raquel/REDIportal.chrv2.hg38.bed" - intervals = "/rds/project/rds-upenYb9rdtk/Work/rm889/rna_mutect/nextflow/rnadnavar/tcga/intervals_with_long_alts.bed" - knownhot = "/rds/project/rds-upenYb9rdtk/Work/rm889/resources/hmftools/variants/KnownHotspots.somatic.38.vcf.gz" - actionablepanel = "/rds/project/rds-upenYb9rdtk/Work/rm889/resources/hmftools/variants/ActionableCodingPanel.38.bed.gz" - highconfidence = "/rds/project/rds-upenYb9rdtk/Work/rm889/resources/hmftools/variants/HighConfidence.38.bed" - ensbl_sage = "/rds/project/rds-upenYb9rdtk/Work/rm889/resources/hmftools/common/ensembl_data" - -} diff --git a/conf/tcga_train_set.config b/conf/tcga_train_set.config deleted file mode 100644 index e71ce0b..0000000 --- a/conf/tcga_train_set.config +++ /dev/null @@ -1,95 +0,0 @@ -singularity { - enabled = true - autoMounts = true -} -process { - executor = 'slurm' - clusterOptions = "--account caldas-sl2-cpu" - queue = "cclake" - cache = 'lenient' - errorStrategy = "retry" // retry if error - maxRetries = 2 -} - -executor { - queueSize = 2000 - pollInterval = '3 min' - queueStatInterval = '5 min' - submitRateLimit = '50sec' - exitReadTimeout = "5 min" - -} - - -params { - config_profile_name = 'RNADNAVAR profile for TCGA samples' - config_profile_description = 'Pipeline to analyse a cohort of TCGA samples' - max_time = 36.h - max_cpus = 32 - max_memory = 250.GB - - // Input data - publish_dir_mode = "copy" - step = "splitncigar" - input = '/rds/project/rds-upenYb9rdtk/Work/rm889/rna_mutect/nextflow/rnadnavar/assets/TargetsFileTCGAAll.csv' -// input = '/rds/project/rds-nRVsLqKmyyw/work/tcga/small_test/results/csv/recalibrated.csv' - outdir = 'results' - genome = 'GRCh38' - wes = true - fasta = '/rds/project/rds-nRVsLqKmyyw/work/genome/gdc/GRCh38.d1.vd1.fa' - fasta19 = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/genome/hg19/hs37d5.fa' - fasta_fai = '/rds/project/rds-nRVsLqKmyyw/work/genome/gdc/GRCh38.d1.vd1.fa.fai' - dict = '/rds/project/rds-nRVsLqKmyyw/work/genome/gdc/GRCh38.d1.vd1.dict' - gtf = '/rds/project/rds-nRVsLqKmyyw/work/genome/gdc/gencode.v36.annotation.gtf' - gtf = '/rds/project/rds-nRVsLqKmyyw/work/genome/gdc/gencode.v36.annotation.gtf' - exon_bed = '/rds/project/rds-nRVsLqKmyyw/work/genome/gdc/exome.bed' -// fasta = '/rds/project/rds-nRVsLqKmyyw/work/genome/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set.fna' -// fasta_fai = '/rds/project/rds-nRVsLqKmyyw/work/genome/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set.fna.fai' -// dict = '/rds/project/rds-nRVsLqKmyyw/work/genome/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set.dict' -// gtf = '/rds/project/rds-nRVsLqKmyyw/work/genome/GCA_000001405.15_GRCh38_full_analysis_set.refseq_annotation.gtf' -// exon_bed = 'exome.bed' // set it to null so it generates one - known_indels = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/Homo_sapiens_assembly38.known_indels.vcf.gz' - known_indels_tbi = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/Homo_sapiens_assembly38.known_indels.vcf.gz.tbi' - rna_pon = '/rds/project/rds-upenYb9rdtk/Work/rm889/rna_mutect/gtex/GTEX_HG38_PoN.bin' - rna_pon19 = '/rds/project/rds-upenYb9rdtk/Work/rm889/rna_mutect/gtex/PoN_GTEx' - pon = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/1000g_pon.hg38.vcf.gz' - pon_tbi = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/1000g_pon.hg38.vcf.gz.tbi' - germline_resource = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/af-only-gnomad.hg38.vcf.gz' - germline_resource_tbi = '/rds/project/rds-upenYb9rdtk/Work/rm889/resources/af-only-gnomad.hg38.vcf.gz.tbi' - nucleotides_per_second = 1000 - star_index = '/rds/project/rds-nRVsLqKmyyw/work/genome/gdc/STARindex' // set it to null so it generates one - bwa = '/rds/project/rds-nRVsLqKmyyw/work/genome/gdc/BWAindex/' // set it to null so it generates one - hisat2_index = '/rds/project/rds-nRVsLqKmyyw/work/genome/gdc/HISAT2index/' // set it to null so it generates one - splicesites = '/rds/project/rds-nRVsLqKmyyw/work/genome/gdc/gencode.v36.annotation.splicesites.txt' // set it to null so it generates one - tools = 'sage,mutect2,strelka,vep' - vep_include_fasta = true - vep_cache = '/rds/project/rds-upenYb9rdtk/Work/rm889/software/vep/ensembl-vep/' - vep_cache_version = '107' - vep_genome = 'GRCh38' - vep_species = "homo_sapiens" - vep_output_format = "vcf" - dbnsfp = null - dbnsfp_consequence = null - vep_loftee = null - vep_spliceai = null - spliceai_snv = null - spliceai_indel = null - vep_spliceregion = null - vep_dbnsfp = null - ignore_soft_clipped_bases = true - chain = "/rds/project/rds-upenYb9rdtk/Work/rm889/resources/hg38ToHg19.over.chain.gz" - whitelist = "/rds/project/rds-upenYb9rdtk/Work/rm889/rna_mutect/nextflow/rnadnavar/catalog_of_validated_oncogenic_mutations_final.hg38.bed" - blacklist = "/rds/project/rds-upenYb9rdtk/Work/rm889/rna_mutect/nextflow/rnadnavar/ENCODE-hg38-blacklist.v2.bed.gz" - rnaedits = "/rds/project/rds-upenYb9rdtk/Work/rm889/Raquel/Darned.chrv2.hg38.bed,/rds/project/rds-upenYb9rdtk/Work/rm889/Raquel/Radar.chrv2.hg38.bed,/rds/project/rds-upenYb9rdtk/Work/rm889/Raquel/REDIportal.chrv2.hg38.bed,/rds/project/rds-upenYb9rdtk/Work/rm889/Raquel/NatComms2022.hg38.2.bed" - no_intervals = false - intervals = "/rds/project/rds-upenYb9rdtk/Work/rm889/rna_mutect/nextflow/rnadnavar/tcga/intervals_with_long_alts.bed" - knownhot = "/rds/project/rds-upenYb9rdtk/Work/rm889/resources/hmftools/variants/KnownHotspots.somatic.38.vcf.gz" - actionablepanel = "/rds/project/rds-upenYb9rdtk/Work/rm889/resources/hmftools/variants/ActionableCodingPanel.38.bed.gz" - highconfidence = "/rds/project/rds-upenYb9rdtk/Work/rm889/resources/hmftools/variants/HighConfidence.38.bed" - ensbl_sage = "/rds/project/rds-upenYb9rdtk/Work/rm889/resources/hmftools/common/ensembl_data" - skip_tools = "baserecalibrator,baserecalibrator_report"// "baserecalibrator,baserecalibrator_report,bcftools,documentation,fastqc,markduplicates,markduplicates_report,mosdepth,multiqc,samtools,vcftools,versions" - trim_fastq= true - split_fastq = 100 - save_bam_mapped = true - read_length = 76 -} diff --git a/main.nf b/main.nf index 9093a4d..621b548 100644 --- a/main.nf +++ b/main.nf @@ -28,7 +28,6 @@ params.dbsnp = WorkflowMain.getGenomeAttribute(params, 'dbsnp') params.dbsnp_tbi = WorkflowMain.getGenomeAttribute(params, 'dbsnp_tbi') params.known_indels = WorkflowMain.getGenomeAttribute(params, 'known_indels') params.known_indels_tbi = WorkflowMain.getGenomeAttribute(params, 'known_indels_tbi') -params.snpeff_db = WorkflowMain.getGenomeAttribute(params, 'snpeff_db') params.vep_cache_version = WorkflowMain.getGenomeAttribute(params, 'vep_cache_version') params.vep_genome = WorkflowMain.getGenomeAttribute(params, 'vep_genome') params.vep_species = WorkflowMain.getGenomeAttribute(params, 'vep_species') diff --git a/modules.json b/modules.json index 60fe2a0..f698e63 100644 --- a/modules.json +++ b/modules.json @@ -2,219 +2,379 @@ "name": "nf-core/rnadnavar", "homePage": "https://github.com/nf-core/rnadnavar", "repos": { - "https://github.com/nf-core/modules.git": {, + "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { - "custom/dumpsoftwareversions": { + "bcftools/sort": { "branch": "master", - "git_sha": "76cc4938c1f6ea5c7d83fed1eeffc146787f9543", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, - "fastqc": { + "bcftools/stats": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "e2693a7e2d773b92e0649b25880ee22fe82bb79d", "installed_by": ["modules"] }, - "multiqc": { + "bwa/index": { "branch": "master", - "git_sha": "f2d63bd5b68925f98f572eed70993d205cc694b7", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, - "bcftools/sort": { - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" - }, - "bcftools/stats": { - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" - }, - "bwa/index": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, "bwa/mem": { - "git_sha": "4f5274c3de0c9521f5033893ff61057a74c45ba9" + "branch": "master", + "git_sha": "3dc300ddcaa563c1e3503477557c0e0def6df2ce", + "installed_by": ["modules"] }, "bwamem2/index": { - "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "bwamem2/mem": { - "git_sha": "4f5274c3de0c9521f5033893ff61057a74c45ba9" + "branch": "master", + "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", + "installed_by": ["modules"] }, "cat/cat": { - "git_sha": "eeda4136c096688d04cc40bb3c70d948213ed641" + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "cat/fastq": { - "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" - }, - "deepvariant": { - "git_sha": "fd5f6f5f4ffef4ab5a4e809bd3211bbc71c38d30" - }, - "dragmap/align": { - "git_sha": "4f5274c3de0c9521f5033893ff61057a74c45ba9" + "branch": "master", + "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", + "installed_by": ["modules"] }, - "dragmap/hashtable": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "ensemblvep": { - "git_sha": "973151e9eab9bac400aa99f099075a10cdd8e84c" - }, - "fastp": { - "git_sha": "9b51362a532a14665f513cf987531f9ea5046b74" + "branch": "master", + "git_sha": "29984d70aea47d06f0062a1785d76c357dd40ea9", + "installed_by": ["modules"] }, - "fgbio/callmolecularconsensusreads": { - "git_sha": "6720d88f4e46e67b9a225f06bfb10c8e8ae04a84" + "ensemblvep/vep": { + "branch": "master", + "git_sha": "9f9e1fc31cb35876922070c0e601ae05abae5cae", + "installed_by": ["vcf_annotate_ensemblvep"] }, - "fgbio/fastqtobam": { - "git_sha": "6720d88f4e46e67b9a225f06bfb10c8e8ae04a84" + "fastp": { + "branch": "master", + "git_sha": "d497a4868ace3302016ea8ed4b395072d5e833cd", + "installed_by": ["modules"] }, - "fgbio/groupreadsbyumi": { - "git_sha": "6720d88f4e46e67b9a225f06bfb10c8e8ae04a84" + "fastqc": { + "branch": "master", + "git_sha": "bd8092b67b5103bdd52e300f75889442275c3117", + "installed_by": ["modules"] }, "freebayes": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "gatk4/applybqsr": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" + "branch": "master", + "git_sha": "240937a2a9c30298110753292be041188891f2cb", + "installed_by": ["modules"] }, "gatk4/applybqsrspark": { - "git_sha": "6a4732ef3b76b54d75533e6be9ba57e3008d4853" + "branch": "master", + "git_sha": "bd8092b67b5103bdd52e300f75889442275c3117", + "installed_by": ["modules"] }, "gatk4/applyvqsr": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" + "branch": "master", + "git_sha": "359dcb06bda60c43955752e356e25c91cfd38ae0", + "installed_by": ["modules"] }, "gatk4/baserecalibrator": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "gatk4/baserecalibratorspark": { - "git_sha": "6a4732ef3b76b54d75533e6be9ba57e3008d4853" + "branch": "master", + "git_sha": "4b7d4863a5883b76e6bff13b6e52468fab090c5b", + "installed_by": ["modules"] }, - "gatk4/calculatecontamination": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" + "gatk4/bedtointervallist": { + "branch": "master", + "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "installed_by": ["modules"] }, - "gatk4/cnnscorevariants": { - "git_sha": "5d72500d601432f5396e9022c3a709854197db1a" + "gatk4/calculatecontamination": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "gatk4/createsequencedictionary": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" + "branch": "master", + "git_sha": "541811d779026c5d395925895fa5ed35e7216cc0", + "installed_by": ["modules"] }, "gatk4/estimatelibrarycomplexity": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "gatk4/filtermutectcalls": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" + "branch": "master", + "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "installed_by": ["modules"] }, "gatk4/filtervarianttranches": { - "git_sha": "175ea9b7f95a0e2fd3679f7a052c6dcb60b61a6e" + "branch": "master", + "git_sha": "541811d779026c5d395925895fa5ed35e7216cc0", + "installed_by": ["modules"] }, "gatk4/gatherbqsrreports": { - "git_sha": "848ee9a215d02d80be033bfa60881700f2bd914c" + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "gatk4/gatherpileupsummaries": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "gatk4/genomicsdbimport": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "gatk4/genotypegvcfs": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "gatk4/getpileupsummaries": { - "git_sha": "1ac223ad436c1410e9c16a5966274b7ca1f8d855" + "branch": "master", + "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "installed_by": ["modules"] }, - "gatk4/haplotypecaller": { - "git_sha": "e53d091a6de1ae9fd681351c085d8abe076ba1ec" + "gatk4/indexfeaturefile": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "gatk4/intervallisttobed": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "gatk4/intervallisttools": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "gatk4/learnreadorientationmodel": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "gatk4/markduplicates": { - "git_sha": "df2620cfc7e4c21b14ed03c1c928f09fbabf83c4" + "branch": "master", + "git_sha": "0a261469640941da2488e1a5aa023b64db837c70", + "installed_by": ["modules"] }, "gatk4/markduplicatesspark": { - "git_sha": "8e8f4c9c51a7b229dd45e9b287d48115c238baf3" + "branch": "master", + "git_sha": "0a261469640941da2488e1a5aa023b64db837c70", + "installed_by": ["modules"] }, "gatk4/mergemutectstats": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "gatk4/mergevcfs": { - "git_sha": "4199a05aeb0ec277d40cb112949bb85893310873" + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "gatk4/mutect2": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" + "branch": "master", + "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "installed_by": ["modules"] + }, + "gatk4/splitncigarreads": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "gatk4/variantfiltration": { + "branch": "master", + "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "installed_by": ["modules"] }, "gatk4/variantrecalibrator": { - "git_sha": "edfe28a5e0088b66ee92e7c58186059f9b5e62d5" + "branch": "master", + "git_sha": "359dcb06bda60c43955752e356e25c91cfd38ae0", + "installed_by": ["modules"] }, - "manta/germline": { - "git_sha": "ffedf09b6e84b479c9c901274f74bb33f3777243" + "gffread": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, - "manta/somatic": { - "git_sha": "979e57b7ac6a405a395dd7a6dbe1a275c5bc226b" + "gunzip": { + "branch": "master", + "git_sha": "e06548bfa36ee31869b81041879dd6b3a83b1d57", + "installed_by": ["modules"] + }, + "hisat2/align": { + "branch": "master", + "git_sha": "a1881f6374506f9e031b7af814768cdb44a6a7d3", + "installed_by": ["modules"] }, - "manta/tumoronly": { - "git_sha": "979e57b7ac6a405a395dd7a6dbe1a275c5bc226b" + "hisat2/build": { + "branch": "master", + "git_sha": "f2f48836bf5c59434966a6c3b2211b29363f31ab", + "installed_by": ["modules"] + }, + "hisat2/extractsplicesites": { + "branch": "master", + "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "installed_by": ["modules"] + }, + "manta/somatic": { + "branch": "master", + "git_sha": "b178a8cc4b8d38b9dc2d0a1e6a9b63e6786ce263", + "installed_by": ["modules"] }, "mosdepth": { - "git_sha": "72a31b76eb1b58879e0d91fb1d992e0118693098" + "branch": "master", + "git_sha": "ebb27711cd5f4de921244bfa81c676504072d31c", + "installed_by": ["modules"] + }, + "multiqc": { + "branch": "master", + "git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80", + "installed_by": ["modules"] + }, + "picard/filtersamreads": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "samblaster": { - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" + "branch": "master", + "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", + "installed_by": ["modules"] }, "samtools/bam2fq": { - "git_sha": "5510ea39fe638594bc26ac34cadf4a84bf27d159" + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "samtools/collatefastq": { - "git_sha": "705f8c9ac4dfdf07666e71abde28f267e2dfd5eb" + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "samtools/convert": { - "git_sha": "da79396f066a96450d9cc9f115c17c9d738595fd" + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "samtools/faidx": { - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" + "branch": "master", + "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", + "installed_by": ["modules"] + }, + "samtools/flagstat": { + "branch": "master", + "git_sha": "570ec5bcfe19c49e16c9ca35a7a116563af6cc1c", + "installed_by": ["modules"] + }, + "samtools/idxstats": { + "branch": "master", + "git_sha": "e662ab16e0c11f1e62983e21de9871f59371a639", + "installed_by": ["modules"] }, "samtools/index": { - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "samtools/merge": { - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" + "branch": "master", + "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", + "installed_by": ["modules"] }, "samtools/mpileup": { - "git_sha": "24e05f6097a5dde57dd80d33295ed120f1b81aef" + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "samtools/sort": { + "branch": "master", + "git_sha": "a0f7be95788366c1923171e358da7d049eb440f9", + "installed_by": ["modules"] }, "samtools/stats": { - "git_sha": "f48a24770e24358e58de66e9b805a70d77cd154b" + "branch": "master", + "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "installed_by": ["modules"] }, "samtools/view": { - "git_sha": "6b64f9cb6c3dd3577931cc3cd032d6fb730000ce" + "branch": "master", + "git_sha": "3ffae3598260a99e8db3207dead9f73f87f90d1f", + "installed_by": ["modules"] }, - "snpeff": { - "git_sha": "ffe037504897df081a7497fa5f495d8e23e93e24" + "star/align": { + "branch": "master", + "git_sha": "57d75dbac06812c59798a48585032f6e50bb1914", + "installed_by": ["modules"] }, - "strelka/germline": { - "git_sha": "e5b44499efcf6f7fb24874886bac60591c5d94dd" + "star/genomegenerate": { + "branch": "master", + "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", + "installed_by": ["modules"] }, "strelka/somatic": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "branch": "master", + "git_sha": "80dbd95c558a0ebb2123d95f50c093a7f714a0d7", + "installed_by": ["modules"] }, "tabix/bgziptabix": { - "git_sha": "9dbaffff88013bc21460a989cde7a5aa172c2e0b" + "branch": "master", + "git_sha": "591b71642820933dcb3c954c934b397bd00d8e5e", + "installed_by": ["modules"] }, "tabix/tabix": { - "git_sha": "b3e9b88e80880f450ad79a95b2b7aa05e1de5484" - }, - "tiddit/sv": { - "git_sha": "b689b8ed88a9f89eb2f7c75d3eb0bace77ade109" + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules", "vcf_annotate_ensemblvep"] }, "untar": { - "git_sha": "51be617b1ca9bff973655eb899d591ed6ab253b5" + "branch": "master", + "git_sha": "d0b4fc03af52a1cc8c6fb4493b921b57352b1dd8", + "installed_by": ["modules"] }, "unzip": { - "git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918" + "branch": "master", + "git_sha": "cf67a6d7d043e2bd6a3099be84c72046fc71508f", + "installed_by": ["modules"] }, "vcftools": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - } + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + } + } + }, + "subworkflows": { + "nf-core": { + "vcf_annotate_ensemblvep": { + "branch": "master", + "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", + "installed_by": ["subworkflows"] + } + } } } } diff --git a/modules/nf-core/bcftools/sort/main.nf b/modules/nf-core/bcftools/sort/main.nf index 9552b57..ef41fd2 100644 --- a/modules/nf-core/bcftools/sort/main.nf +++ b/modules/nf-core/bcftools/sort/main.nf @@ -2,28 +2,34 @@ process BCFTOOLS_SORT { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) + conda "bioconda::bcftools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': - 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': + 'biocontainers/bcftools:1.17--haef29d1_0' }" input: tuple val(meta), path(vcf) output: - tuple val(meta), path("*.gz"), emit: vcf - path "versions.yml" , emit: versions + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}") , emit: vcf + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '--output-type z' def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + """ bcftools \\ sort \\ - --output ${prefix}.vcf.gz \\ + --output ${prefix}.${extension} \\ $args \\ $vcf @@ -32,4 +38,23 @@ process BCFTOOLS_SORT { bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') END_VERSIONS """ + + stub: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/bcftools/stats/main.nf b/modules/nf-core/bcftools/stats/main.nf index 1e0f3a4..7ccb9bf 100644 --- a/modules/nf-core/bcftools/stats/main.nf +++ b/modules/nf-core/bcftools/stats/main.nf @@ -1,14 +1,19 @@ process BCFTOOLS_STATS { tag "$meta.id" - label 'process_medium' + label 'process_single' - conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) + conda "bioconda::bcftools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': - 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': + 'biocontainers/bcftools:1.17--haef29d1_0' }" input: - tuple val(meta), path(vcf) + tuple val(meta), path(vcf), path(tbi) + tuple val(meta2), path(regions) + tuple val(meta3), path(targets) + tuple val(meta4), path(samples) + tuple val(meta5), path(exons) + tuple val(meta6), path(fasta) output: tuple val(meta), path("*stats.txt"), emit: stats @@ -20,8 +25,33 @@ process BCFTOOLS_STATS { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def regions_file = regions ? "--regions-file ${regions}" : "" + def targets_file = targets ? "--targets-file ${targets}" : "" + def samples_file = samples ? "--samples-file ${samples}" : "" + def reference_fasta = fasta ? "--fasta-ref ${fasta}" : "" + def exons_file = exons ? "--exons ${exons}" : "" """ - bcftools stats $args $vcf > ${prefix}.bcftools_stats.txt + bcftools stats \\ + $args \\ + $regions_file \\ + $targets_file \\ + $samples_file \\ + $reference_fasta \\ + $exons_file \\ + $vcf > ${prefix}.bcftools_stats.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.bcftools_stats.txt + cat <<-END_VERSIONS > versions.yml "${task.process}": bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') diff --git a/modules/nf-core/bcftools/stats/meta.yml b/modules/nf-core/bcftools/stats/meta.yml index 304b88e..5850d25 100644 --- a/modules/nf-core/bcftools/stats/meta.yml +++ b/modules/nf-core/bcftools/stats/meta.yml @@ -23,6 +23,34 @@ input: type: file description: VCF input file pattern: "*.{vcf}" + - tbi: + type: file + description: | + The tab index for the VCF file to be inspected. Optional: only required when parameter regions is chosen. + pattern: "*.tbi" + - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. (VCF, BED or tab-delimited) + - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon tbi index files) + - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' + - exons: + type: file + description: | + Tab-delimited file with exons for indel frameshifts (chr,beg,end; 1-based, inclusive, optionally bgzip compressed). + e.g. 'exons.tsv.gz' + - fasta: + type: file + description: | + Faidx indexed reference sequence file to determine INDEL context. + e.g. 'reference.fa' output: - meta: type: map @@ -40,3 +68,5 @@ output: authors: - "@joseespinosa" - "@drpatelh" + - "@SusiJo" + - "@TCLamnidis" diff --git a/modules/nf-core/bwa/index/main.nf b/modules/nf-core/bwa/index/main.nf index 9e5dd4c..8d2e56d 100644 --- a/modules/nf-core/bwa/index/main.nf +++ b/modules/nf-core/bwa/index/main.nf @@ -1,18 +1,18 @@ process BWA_INDEX { tag "$fasta" - label 'process_high' + label 'process_single' - conda (params.enable_conda ? "bioconda::bwa=0.7.17" : null) + conda "bioconda::bwa=0.7.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' : - 'quay.io/biocontainers/bwa:0.7.17--hed695b0_7' }" + 'biocontainers/bwa:0.7.17--hed695b0_7' }" input: - path fasta + tuple val(meta), path(fasta) output: - path "bwa" , emit: index - path "versions.yml", emit: versions + tuple val(meta), path(bwa) , emit: index + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -20,7 +20,7 @@ process BWA_INDEX { script: def args = task.ext.args ?: '' """ - mkdir -p bwa + mkdir bwa bwa \\ index \\ $args \\ @@ -32,4 +32,20 @@ process BWA_INDEX { bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') END_VERSIONS """ + + stub: + """ + mkdir bwa + + touch bwa/genome.amb + touch bwa/genome.ann + touch bwa/genome.bwt + touch bwa/genome.pac + touch bwa/genome.sa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/bwa/index/meta.yml b/modules/nf-core/bwa/index/meta.yml index 2bbd81d..2c6cfcd 100644 --- a/modules/nf-core/bwa/index/meta.yml +++ b/modules/nf-core/bwa/index/meta.yml @@ -15,10 +15,20 @@ tools: arxiv: arXiv:1303.3997 licence: ["GPL-3.0-or-later"] input: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] - fasta: type: file description: Input genome fasta file output: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] - index: type: file description: BWA genome index files diff --git a/modules/nf-core/bwa/mem/main.nf b/modules/nf-core/bwa/mem/main.nf index f55af94..8ba99df 100644 --- a/modules/nf-core/bwa/mem/main.nf +++ b/modules/nf-core/bwa/mem/main.nf @@ -2,14 +2,14 @@ process BWA_MEM { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::bwa=0.7.17 bioconda::samtools=1.15.1" : null) + conda "bioconda::bwa=0.7.17 bioconda::samtools=1.16.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:8110a70be2bfe7f75a2ea7f2a89cda4cc7732095-0' : - 'quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:8110a70be2bfe7f75a2ea7f2a89cda4cc7732095-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' : + 'biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' }" input: tuple val(meta), path(reads) - path index + tuple val(meta2), path(index) val sort_bam output: @@ -25,7 +25,7 @@ process BWA_MEM { def prefix = task.ext.prefix ?: "${meta.id}" def samtools_command = sort_bam ? 'sort' : 'view' """ - INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` bwa mem \\ $args \\ @@ -40,4 +40,16 @@ process BWA_MEM { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/bwa/mem/meta.yml b/modules/nf-core/bwa/mem/meta.yml index f84c522..62357bf 100644 --- a/modules/nf-core/bwa/mem/meta.yml +++ b/modules/nf-core/bwa/mem/meta.yml @@ -28,6 +28,11 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] - index: type: file description: BWA genome index files diff --git a/modules/nf-core/bwamem2/index/main.nf b/modules/nf-core/bwamem2/index/main.nf index 900f27d..3094085 100644 --- a/modules/nf-core/bwamem2/index/main.nf +++ b/modules/nf-core/bwamem2/index/main.nf @@ -1,18 +1,18 @@ process BWAMEM2_INDEX { tag "$fasta" - label 'process_high' + label 'process_single' - conda (params.enable_conda ? "bioconda::bwa-mem2=2.2.1" : null) + conda "bioconda::bwa-mem2=2.2.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bwa-mem2:2.2.1--he513fc3_0' : - 'quay.io/biocontainers/bwa-mem2:2.2.1--he513fc3_0' }" + 'biocontainers/bwa-mem2:2.2.1--he513fc3_0' }" input: - path fasta + tuple val(meta), path(fasta) output: - path "bwamem2" , emit: index - path "versions.yml" , emit: versions + tuple val(meta), path("bwamem2"), emit: index + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/bwamem2/index/meta.yml b/modules/nf-core/bwamem2/index/meta.yml index 1b52448..40c26c3 100644 --- a/modules/nf-core/bwamem2/index/meta.yml +++ b/modules/nf-core/bwamem2/index/meta.yml @@ -6,7 +6,7 @@ keywords: - genome - reference tools: - - bwa: + - bwamem2: description: | BWA-mem2 is a software package for mapping DNA sequences against a large reference genome, such as the human genome. @@ -14,14 +14,24 @@ tools: documentation: https://github.com/bwa-mem2/bwa-mem2#usage licence: ["MIT"] input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - fasta: type: file description: Input genome fasta file output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - index: type: file description: BWA genome index files - pattern: "*.{0132,amb,ann,bwt.2bit.64,pac}" + pattern: "*.{0123,amb,ann,bwt.2bit.64,pac}" - versions: type: file description: File containing software versions diff --git a/modules/nf-core/bwamem2/mem/main.nf b/modules/nf-core/bwamem2/mem/main.nf index 978c401..d427dea 100644 --- a/modules/nf-core/bwamem2/mem/main.nf +++ b/modules/nf-core/bwamem2/mem/main.nf @@ -2,14 +2,14 @@ process BWAMEM2_MEM { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::bwa-mem2=2.2.1 bioconda::samtools=1.15.1" : null) + conda "bioconda::bwa-mem2=2.2.1 bioconda::samtools=1.16.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:38aed4501da19db366dc7c8d52d31d94e760cfaf-0' : - 'quay.io/biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:38aed4501da19db366dc7c8d52d31d94e760cfaf-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2cdf6bf1e92acbeb9b2834b1c58754167173a410-0' : + 'biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2cdf6bf1e92acbeb9b2834b1c58754167173a410-0' }" input: tuple val(meta), path(reads) - path index + tuple val(meta2), path(index) val sort_bam output: @@ -25,7 +25,7 @@ process BWAMEM2_MEM { def prefix = task.ext.prefix ?: "${meta.id}" def samtools_command = sort_bam ? 'sort' : 'view' """ - INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` bwa-mem2 \\ mem \\ diff --git a/modules/nf-core/bwamem2/mem/meta.yml b/modules/nf-core/bwamem2/mem/meta.yml index 25c97f9..bc3dfcd 100644 --- a/modules/nf-core/bwamem2/mem/meta.yml +++ b/modules/nf-core/bwamem2/mem/meta.yml @@ -28,6 +28,11 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + - meta2: + type: map + description: | + Groovy Map containing reference/index information + e.g. [ id:'test' ] - index: type: file description: BWA genome index files @@ -37,6 +42,11 @@ input: description: use samtools sort (true) or samtools view (false) pattern: "true or false" output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - bam: type: file description: Output BAM file containing read alignments diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf index 40e53f3..9f06221 100644 --- a/modules/nf-core/cat/cat/main.nf +++ b/modules/nf-core/cat/cat/main.nf @@ -2,10 +2,10 @@ process CAT_CAT { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "conda-forge::pigz=2.3.4" : null) + conda "conda-forge::pigz=2.3.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : - 'quay.io/biocontainers/pigz:2.3.4' }" + 'biocontainers/pigz:2.3.4' }" input: tuple val(meta), path(files_in) diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml index 5eeff5a..8acc0bf 100644 --- a/modules/nf-core/cat/cat/meta.yml +++ b/modules/nf-core/cat/cat/meta.yml @@ -7,9 +7,9 @@ keywords: tools: - cat: description: Just concatenation - homepage: None + documentation: https://man7.org/linux/man-pages/man1/cat.1.html - tool_dev_url: None + licence: ["GPL-3.0-or-later"] input: - meta: diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf index b685489..5021e6f 100644 --- a/modules/nf-core/cat/fastq/main.nf +++ b/modules/nf-core/cat/fastq/main.nf @@ -1,11 +1,11 @@ process CAT_FASTQ { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + conda "conda-forge::sed=4.7" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: tuple val(meta), path(reads, stageAs: "input*/*") @@ -20,9 +20,9 @@ process CAT_FASTQ { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def readList = reads.collect{ it.toString() } + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] if (meta.single_end) { - if (readList.size > 1) { + if (readList.size >= 1) { """ cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz @@ -33,7 +33,7 @@ process CAT_FASTQ { """ } } else { - if (readList.size > 2) { + if (readList.size >= 2) { def read1 = [] def read2 = [] readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v } @@ -48,4 +48,33 @@ process CAT_FASTQ { """ } } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size > 1) { + """ + touch ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size > 2) { + """ + touch ${prefix}_1.merged.fastq.gz + touch ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } + } diff --git a/modules/nf-core/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml index c836598..8a39e30 100644 --- a/modules/nf-core/cat/fastq/meta.yml +++ b/modules/nf-core/cat/fastq/meta.yml @@ -1,6 +1,7 @@ name: cat_fastq description: Concatenates fastq files keywords: + - cat - fastq - concatenate tools: @@ -16,7 +17,7 @@ input: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - reads: - type: list + type: file description: | List of input FastQ files to be concatenated. output: diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index 800a609..ebc8727 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -5,7 +5,7 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py index e55b8d4..da03340 100755 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -4,11 +4,10 @@ """Provide functions to merge multiple versions.yml files.""" +import yaml import platform from textwrap import dedent -import yaml - def _make_versions_html(versions): """Generate a tabular HTML output of all versions for MultiQC.""" diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf index 120392c..831b7f1 100644 --- a/modules/nf-core/fastp/main.nf +++ b/modules/nf-core/fastp/main.nf @@ -2,13 +2,14 @@ process FASTP { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::fastp=0.23.2' : null) + conda "bioconda::fastp=0.23.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastp:0.23.2--h79da9fb_0' : - 'quay.io/biocontainers/fastp:0.23.2--h79da9fb_0' }" + 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' : + 'biocontainers/fastp:0.23.4--h5f740d0_0' }" input: tuple val(meta), path(reads) + path adapter_fasta val save_trimmed_fail val save_merged @@ -26,28 +27,53 @@ process FASTP { script: def args = task.ext.args ?: '' - // Added soft-links to original fastqs for consistent naming in MultiQC def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - def fail_fastq = save_trimmed_fail ? "--failed_out ${prefix}.fail.fastq.gz" : '' + def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" + def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' + // Added soft-links to original fastqs for consistent naming in MultiQC + // Use single ended for interleaved. Add --interleaved_in in config. + if ( task.ext.args?.contains('--interleaved_in') ) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> ${prefix}.fastp.log \\ + | gzip -c > ${prefix}.fastp.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else if (meta.single_end) { """ [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + fastp \\ --in1 ${prefix}.fastq.gz \\ - --out1 ${prefix}.fastp.fastq.gz \\ + --out1 ${prefix}.fastp.fastq.gz \\ --thread $task.cpus \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ + $adapter_list \\ $fail_fastq \\ $args \\ 2> ${prefix}.fastp.log + cat <<-END_VERSIONS > versions.yml "${task.process}": fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") END_VERSIONS """ } else { - def fail_fastq = save_trimmed_fail ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' """ [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz @@ -59,6 +85,7 @@ process FASTP { --out2 ${prefix}_2.fastp.fastq.gz \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ + $adapter_list \\ $fail_fastq \\ $merge_fastq \\ --thread $task.cpus \\ diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml index 2bd2b1a..197ea7c 100644 --- a/modules/nf-core/fastp/meta.yml +++ b/modules/nf-core/fastp/meta.yml @@ -9,19 +9,24 @@ tools: description: | A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. documentation: https://github.com/OpenGene/fastp - doi: https://doi.org/10.1093/bioinformatics/bty560 + doi: 10.1093/bioinformatics/bty560 licence: ["MIT"] input: - meta: type: map description: | - Groovy Map containing sample information + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. e.g. [ id:'test', single_end:false ] - reads: type: file description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. + respectively. If you wish to run interleaved paired-end data, supply as single-end data + but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + - adapter_fasta: + type: file + description: File in FASTA format containing possible adapters to remove. + pattern: "*.{fasta,fna,fas,fa}" - save_trimmed_fail: type: boolean description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 9ae5838..249f906 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -5,7 +5,7 @@ process FASTQC { conda "bioconda::fastqc=0.11.9" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'quay.io/biocontainers/fastqc:0.11.9--0' }" + 'biocontainers/fastqc:0.11.9--0' }" input: tuple val(meta), path(reads) @@ -29,7 +29,11 @@ process FASTQC { printf "%s %s\\n" $rename_to | while read old_name new_name; do [ -f "\${new_name}" ] || ln -s \$old_name \$new_name done - fastqc $args --threads $task.cpus $renamed_files + + fastqc \\ + $args \\ + --threads $task.cpus \\ + $renamed_files cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/fgbio/fastqtobam/main.nf b/modules/nf-core/fgbio/fastqtobam/main.nf deleted file mode 100644 index 76cfc07..0000000 --- a/modules/nf-core/fgbio/fastqtobam/main.nf +++ /dev/null @@ -1,41 +0,0 @@ -process FGBIO_FASTQTOBAM { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::fgbio=2.0.2" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fgbio:2.0.2--hdfd78af_0' : - 'quay.io/biocontainers/fgbio:2.0.2--hdfd78af_0' }" - - input: - tuple val(meta), path(reads) - val read_structure - - output: - tuple val(meta), path("*_umi_converted.bam"), emit: umibam - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - - fgbio \\ - --tmp-dir=. \\ - FastqToBam \\ - -i $reads \\ - -o "${prefix}_umi_converted.bam" \\ - --read-structures $read_structure \\ - --sample $meta.id \\ - --library $meta.id \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/fgbio/fastqtobam/meta.yml b/modules/nf-core/fgbio/fastqtobam/meta.yml deleted file mode 100644 index 3081caf..0000000 --- a/modules/nf-core/fgbio/fastqtobam/meta.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: fgbio_fastqtobam -description: | - Using the FGBIO tools, converts FASTQ files sequenced with UMIs into BAM files, moving the UMI barcode into the RX field of the BAM file -keywords: - - fastqtobam - - fgbio -tools: - - fgbio: - description: A set of tools for working with genomic and high throughput sequencing data, including UMIs - homepage: http://fulcrumgenomics.github.io/fgbio/ - documentation: http://fulcrumgenomics.github.io/fgbio/tools/latest/ - tool_dev_url: https://github.com/fulcrumgenomics/fgbio - doi: "" - licence: ["MIT"] - -input: - - reads: - type: file - description: pair of reads to be converted into BAM file - pattern: "*.{fastq.gz}" - - - read_structure: - type: string - description: | - A read structure should always be provided for each of the fastq files. - If single end, the string will contain only one structure (i.e. "2M11S+T"), if paired-end the string - will contain two structures separated by a blank space (i.e. "2M11S+T 2M11S+T"). - If the read does not contain any UMI, the structure will be +T (i.e. only template of any length). - https://github.com/fulcrumgenomics/fgbio/wiki/Read-Structures - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - version: - type: file - description: File containing software version - pattern: "*.{version.yml}" - - umibam: - type: file - description: Converted, unsorted BAM file with RX tag reporting UMI sequence (if any) - pattern: "*.{bam}" - -authors: - - "@lescai" diff --git a/modules/nf-core/freebayes/main.nf b/modules/nf-core/freebayes/main.nf index 73b1da9..1466f08 100644 --- a/modules/nf-core/freebayes/main.nf +++ b/modules/nf-core/freebayes/main.nf @@ -1,11 +1,11 @@ process FREEBAYES { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::freebayes=1.3.5" : null) + conda "bioconda::freebayes=1.3.6" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/freebayes:1.3.5--py38ha193a2f_3' : - 'quay.io/biocontainers/freebayes:1.3.5--py38ha193a2f_3' }" + 'https://depot.galaxyproject.org/singularity/freebayes:1.3.6--hbfe0e7f_2' : + 'biocontainers/freebayes:1.3.6--hbfe0e7f_2' }" input: tuple val(meta), path(input_1), path(input_1_index), path(input_2), path(input_2_index), path(target_bed) @@ -31,43 +31,21 @@ process FREEBAYES { def populations_file = populations ? "--populations ${populations}" : "" def cnv_file = cnv ? "--cnv-map ${cnv}" : "" - if (task.cpus > 1) { - """ - freebayes-parallel \\ - <(fasta_generate_regions.py $fasta_fai 10000) $task.cpus \\ - -f $fasta \\ - $targets_file \\ - $samples_file \\ - $populations_file \\ - $cnv_file \\ - $args \\ - $input > ${prefix}.vcf - - bgzip ${prefix}.vcf - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - freebayes: \$(echo \$(freebayes --version 2>&1) | sed 's/version:\s*v//g' ) - END_VERSIONS - """ - - } else { - """ - freebayes \\ - -f $fasta \\ - $targets_file \\ - $samples_file \\ - $populations_file \\ - $cnv_file \\ - $args \\ - $input > ${prefix}.vcf - - bgzip ${prefix}.vcf - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - freebayes: \$(echo \$(freebayes --version 2>&1) | sed 's/version:\s*v//g' ) - END_VERSIONS - """ - } + """ + freebayes \\ + -f $fasta \\ + $targets_file \\ + $samples_file \\ + $populations_file \\ + $cnv_file \\ + $args \\ + $input > ${prefix}.vcf + + bgzip ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freebayes: \$(echo \$(freebayes --version 2>&1) | sed 's/version:\s*v//g' ) + END_VERSIONS + """ } diff --git a/modules/nf-core/freebayes/meta.yml b/modules/nf-core/freebayes/meta.yml index cbbd297..17d83cb 100644 --- a/modules/nf-core/freebayes/meta.yml +++ b/modules/nf-core/freebayes/meta.yml @@ -15,7 +15,7 @@ tools: homepage: https://github.com/freebayes/freebayes documentation: https://github.com/freebayes/freebayes tool_dev_url: https://github.com/freebayes/freebayes - doi: "arXiv:1207.3907" + doi: "10.48550/arXiv.1207.3907" licence: ["MIT"] input: diff --git a/modules/nf-core/gatk4/applybqsr/main.nf b/modules/nf-core/gatk4/applybqsr/main.nf index a0e2c45..b515f1c 100644 --- a/modules/nf-core/gatk4/applybqsr/main.nf +++ b/modules/nf-core/gatk4/applybqsr/main.nf @@ -2,10 +2,10 @@ process GATK4_APPLYBQSR { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals) @@ -26,14 +26,14 @@ process GATK4_APPLYBQSR { def prefix = task.ext.prefix ?: "${meta.id}" def interval_command = intervals ? "--intervals $intervals" : "" - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK ApplyBQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" ApplyBQSR \\ + gatk --java-options "-Xmx${avail_mem}M" ApplyBQSR \\ --input $input \\ --output ${prefix}.${input.getExtension()} \\ --reference $fasta \\ diff --git a/modules/nf-core/gatk4/applybqsr/meta.yml b/modules/nf-core/gatk4/applybqsr/meta.yml index 3fc93f1..3002ab6 100644 --- a/modules/nf-core/gatk4/applybqsr/meta.yml +++ b/modules/nf-core/gatk4/applybqsr/meta.yml @@ -3,16 +3,17 @@ description: Apply base quality score recalibration (BQSR) to a bam file keywords: - bqsr - bam + - base quality score recalibration tools: - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - licence: ["Apache-2.0"] + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] input: - meta: diff --git a/modules/nf-core/gatk4/applybqsrspark/main.nf b/modules/nf-core/gatk4/applybqsrspark/main.nf index 5890278..6451b1b 100644 --- a/modules/nf-core/gatk4/applybqsrspark/main.nf +++ b/modules/nf-core/gatk4/applybqsrspark/main.nf @@ -2,8 +2,8 @@ process GATK4_APPLYBQSR_SPARK { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null) - container 'broadinstitute/gatk:4.2.6.1' + conda "bioconda::gatk4=4.3.0.0 conda-forge::openjdk=8.0.312" + container "nf-core/gatk:4.4.0.0" input: tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals) @@ -24,14 +24,16 @@ process GATK4_APPLYBQSR_SPARK { def prefix = task.ext.prefix ?: "${meta.id}" def interval_command = intervals ? "--intervals $intervals" : "" - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK ApplyBQSRSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" ApplyBQSRSpark \\ + gatk \\ + --java-options "-Xmx${avail_mem}M" \\ + ApplyBQSRSpark \\ --input $input \\ --output ${prefix}.${input.getExtension()} \\ --reference $fasta \\ diff --git a/modules/nf-core/gatk4/applybqsrspark/meta.yml b/modules/nf-core/gatk4/applybqsrspark/meta.yml index 070b37a..9acdecc 100644 --- a/modules/nf-core/gatk4/applybqsrspark/meta.yml +++ b/modules/nf-core/gatk4/applybqsrspark/meta.yml @@ -3,16 +3,17 @@ description: Apply base quality score recalibration (BQSR) to a bam file keywords: - bqsr - bam + - gatk tools: - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - licence: ["Apache-2.0"] + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] input: - meta: diff --git a/modules/nf-core/gatk4/applyvqsr/main.nf b/modules/nf-core/gatk4/applyvqsr/main.nf index d3da833..381af40 100644 --- a/modules/nf-core/gatk4/applyvqsr/main.nf +++ b/modules/nf-core/gatk4/applyvqsr/main.nf @@ -2,10 +2,10 @@ process GATK4_APPLYVQSR { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(vcf), path(vcf_tbi), path(recal), path(recal_index), path(tranches) @@ -26,14 +26,14 @@ process GATK4_APPLYVQSR { def prefix = task.ext.prefix ?: "${meta.id}" def reference_command = fasta ? "--reference $fasta" : '' - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK ApplyVQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" ApplyVQSR \\ + gatk --java-options "-Xmx${avail_mem}M" ApplyVQSR \\ --variant ${vcf} \\ --output ${prefix}.vcf.gz \\ $reference_command \\ @@ -47,4 +47,16 @@ process GATK4_APPLYVQSR { gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') END_VERSIONS """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/gatk4/baserecalibrator/main.nf b/modules/nf-core/gatk4/baserecalibrator/main.nf index fb26d3d..318703a 100644 --- a/modules/nf-core/gatk4/baserecalibrator/main.nf +++ b/modules/nf-core/gatk4/baserecalibrator/main.nf @@ -2,10 +2,10 @@ process GATK4_BASERECALIBRATOR { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(input), path(input_index), path(intervals) @@ -28,14 +28,14 @@ process GATK4_BASERECALIBRATOR { def interval_command = intervals ? "--intervals $intervals" : "" def sites_command = known_sites.collect{"--known-sites $it"}.join(' ') - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" BaseRecalibrator \\ + gatk --java-options "-Xmx${avail_mem}M" BaseRecalibrator \\ --input $input \\ --output ${prefix}.table \\ --reference $fasta \\ diff --git a/modules/nf-core/gatk4/baserecalibrator/meta.yml b/modules/nf-core/gatk4/baserecalibrator/meta.yml index 08c1ebb..a6b06c7 100644 --- a/modules/nf-core/gatk4/baserecalibrator/meta.yml +++ b/modules/nf-core/gatk4/baserecalibrator/meta.yml @@ -4,14 +4,14 @@ keywords: - sort tools: - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - licence: ["Apache-2.0"] + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] input: - meta: diff --git a/modules/nf-core/gatk4/baserecalibratorspark/main.nf b/modules/nf-core/gatk4/baserecalibratorspark/main.nf index 755122c..d240a10 100644 --- a/modules/nf-core/gatk4/baserecalibratorspark/main.nf +++ b/modules/nf-core/gatk4/baserecalibratorspark/main.nf @@ -2,8 +2,8 @@ process GATK4_BASERECALIBRATOR_SPARK { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null) - container 'broadinstitute/gatk:4.2.6.1' + conda "bioconda::gatk4=4.4.0.0 conda-forge::openjdk=8.0.312" + container "nf-core/gatk:4.4.0.0" input: tuple val(meta), path(input), path(input_index), path(intervals) @@ -26,14 +26,14 @@ process GATK4_BASERECALIBRATOR_SPARK { def interval_command = intervals ? "--intervals $intervals" : "" def sites_command = known_sites.collect{"--known-sites $it"}.join(' ') - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK BaseRecalibratorSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" BaseRecalibratorSpark \\ + gatk --java-options "-Xmx${avail_mem}M" BaseRecalibratorSpark \\ --input $input \\ --output ${prefix}.table \\ --reference $fasta \\ diff --git a/modules/nf-core/gatk4/baserecalibratorspark/meta.yml b/modules/nf-core/gatk4/baserecalibratorspark/meta.yml index 581c48e..a4a73ad 100644 --- a/modules/nf-core/gatk4/baserecalibratorspark/meta.yml +++ b/modules/nf-core/gatk4/baserecalibratorspark/meta.yml @@ -2,16 +2,18 @@ name: gatk4_baserecalibrator_spark description: Generate recalibration table for Base Quality Score Recalibration (BQSR) keywords: - sort + - bqsr + - gatk tools: - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - licence: ["Apache-2.0"] + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] input: - meta: diff --git a/modules/nf-core/gatk4/bedtointervallist/main.nf b/modules/nf-core/gatk4/bedtointervallist/main.nf index 19a390e..a23abd0 100644 --- a/modules/nf-core/gatk4/bedtointervallist/main.nf +++ b/modules/nf-core/gatk4/bedtointervallist/main.nf @@ -1,15 +1,15 @@ process GATK4_BEDTOINTERVALLIST { tag "$meta.id" - label 'process_very_low' + label 'process_medium' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(bed) - path dict + tuple val(meta2), path(dict) output: tuple val(meta), path('*.interval_list'), emit: interval_list @@ -22,14 +22,14 @@ process GATK4_BEDTOINTERVALLIST { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK BedToIntervalList] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" BedToIntervalList \\ + gatk --java-options "-Xmx${avail_mem}M" BedToIntervalList \\ --INPUT $bed \\ --OUTPUT ${prefix}.interval_list \\ --SEQUENCE_DICTIONARY $dict \\ diff --git a/modules/nf-core/gatk4/bedtointervallist/meta.yml b/modules/nf-core/gatk4/bedtointervallist/meta.yml index 986f159..40daf75 100644 --- a/modules/nf-core/gatk4/bedtointervallist/meta.yml +++ b/modules/nf-core/gatk4/bedtointervallist/meta.yml @@ -3,6 +3,7 @@ description: Creates an interval list from a bed file and a reference dict keywords: - bed - interval list + - bedtointervallist tools: - gatk4: description: | @@ -23,6 +24,11 @@ input: type: file description: Input bed file pattern: "*.bed" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - dict: type: file description: Sequence dictionary @@ -38,3 +44,4 @@ output: pattern: "versions.yml" authors: - "@kevinmenden" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/calculatecontamination/main.nf b/modules/nf-core/gatk4/calculatecontamination/main.nf index c289684..4fccf8b 100644 --- a/modules/nf-core/gatk4/calculatecontamination/main.nf +++ b/modules/nf-core/gatk4/calculatecontamination/main.nf @@ -2,10 +2,10 @@ process GATK4_CALCULATECONTAMINATION { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(pileup), path(matched) @@ -23,14 +23,14 @@ process GATK4_CALCULATECONTAMINATION { def prefix = task.ext.prefix ?: "${meta.id}" def matched_command = matched ? "--matched-normal $matched" : '' - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK CalculateContamination] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" CalculateContamination \\ + gatk --java-options "-Xmx${avail_mem}M" CalculateContamination \\ --input $pileup \\ --output ${prefix}.contamination.table \\ $matched_command \\ diff --git a/modules/nf-core/gatk4/createsequencedictionary/main.nf b/modules/nf-core/gatk4/createsequencedictionary/main.nf index 13fa9e8..15a86be 100644 --- a/modules/nf-core/gatk4/createsequencedictionary/main.nf +++ b/modules/nf-core/gatk4/createsequencedictionary/main.nf @@ -2,17 +2,17 @@ process GATK4_CREATESEQUENCEDICTIONARY { tag "$fasta" label 'process_medium' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: - path fasta + tuple val(meta), path(fasta) output: - path "*.dict" , emit: dict - path "versions.yml" , emit: versions + tuple val(meta), path('*.dict') , emit: dict + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -20,14 +20,14 @@ process GATK4_CREATESEQUENCEDICTIONARY { script: def args = task.ext.args ?: '' - def avail_mem = 6 + def avail_mem = 6144 if (!task.memory) { log.info '[GATK CreateSequenceDictionary] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" CreateSequenceDictionary \\ + gatk --java-options "-Xmx${avail_mem}M" CreateSequenceDictionary \\ --REFERENCE $fasta \\ --URI $fasta \\ --TMP_DIR . \\ @@ -41,7 +41,7 @@ process GATK4_CREATESEQUENCEDICTIONARY { stub: """ - touch test.dict + touch ${fasta.baseName}.dict cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gatk4/createsequencedictionary/meta.yml b/modules/nf-core/gatk4/createsequencedictionary/meta.yml index bd24788..a421e68 100644 --- a/modules/nf-core/gatk4/createsequencedictionary/meta.yml +++ b/modules/nf-core/gatk4/createsequencedictionary/meta.yml @@ -3,30 +3,37 @@ description: Creates a sequence dictionary for a reference sequence keywords: - dictionary - fasta + - createsequencedictionary tools: - gatk: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - licence: ["Apache-2.0"] + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: - type: file - description: Input fasta file - pattern: "*.{fasta,fa}" + type: file + description: Input fasta file + pattern: "*.{fasta,fa}" output: - dict: - type: file - description: gatk dictionary file - pattern: "*.{dict}" + type: file + description: gatk dictionary file + pattern: "*.{dict}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@maxulysse" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/estimatelibrarycomplexity/main.nf b/modules/nf-core/gatk4/estimatelibrarycomplexity/main.nf index 46e09a9..0f33c7c 100644 --- a/modules/nf-core/gatk4/estimatelibrarycomplexity/main.nf +++ b/modules/nf-core/gatk4/estimatelibrarycomplexity/main.nf @@ -2,10 +2,10 @@ process GATK4_ESTIMATELIBRARYCOMPLEXITY { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(input) @@ -25,14 +25,14 @@ process GATK4_ESTIMATELIBRARYCOMPLEXITY { def prefix = task.ext.prefix ?: "${meta.id}" def input_list = input.collect(){"--INPUT $it"}.join(" ") - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK EstimateLibraryComplexity] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" EstimateLibraryComplexity \\ + gatk --java-options "-Xmx${avail_mem}M" EstimateLibraryComplexity \\ $input_list \\ --OUTPUT ${prefix}.metrics \\ --REFERENCE_SEQUENCE ${fasta} \\ diff --git a/modules/nf-core/gatk4/filtermutectcalls/main.nf b/modules/nf-core/gatk4/filtermutectcalls/main.nf index e3edb22..d0cf5b4 100644 --- a/modules/nf-core/gatk4/filtermutectcalls/main.nf +++ b/modules/nf-core/gatk4/filtermutectcalls/main.nf @@ -2,16 +2,16 @@ process GATK4_FILTERMUTECTCALLS { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(vcf), path(vcf_tbi), path(stats), path(orientationbias), path(segmentation), path(table), val(estimate) - path fasta - path fai - path dict + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) output: tuple val(meta), path("*.vcf.gz") , emit: vcf @@ -26,19 +26,19 @@ process GATK4_FILTERMUTECTCALLS { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def orientationbias_command = orientationbias.name.endsWith('NO_ARTPRIOR') ? '' : orientationbias.collect{"--orientation-bias-artifact-priors $it"}.join(' ') - def segmentation_command = segmentation.name.endsWith('NO_SEG') ? '' : segmentation.collect{"--tumor-segmentation $it"}.join(' ') - def estimate_command = estimate ? " --contamination-estimate ${estimate} " : '' - def table_command = table.name.endsWith('NO_TABLE') ? '' : " --contamination-table ${table} " + def orientationbias_command = orientationbias ? orientationbias.collect{"--orientation-bias-artifact-priors $it"}.join(' ') : '' + def segmentation_command = segmentation ? segmentation.collect{"--tumor-segmentation $it"}.join(' ') : '' + def estimate_command = estimate ? " --contamination-estimate ${estimate} " : '' + def table_command = table ? table.collect{"--contamination-table $it"}.join(' ') : '' - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK FilterMutectCalls] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" FilterMutectCalls \\ + gatk --java-options "-Xmx${avail_mem}M" FilterMutectCalls \\ --variant $vcf \\ --output ${prefix}.vcf.gz \\ --reference $fasta \\ @@ -54,4 +54,17 @@ process GATK4_FILTERMUTECTCALLS { gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + touch ${prefix}.vcf.gz.filteringStats.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/gatk4/filtermutectcalls/meta.yml b/modules/nf-core/gatk4/filtermutectcalls/meta.yml index d1972d7..1a6faec 100644 --- a/modules/nf-core/gatk4/filtermutectcalls/meta.yml +++ b/modules/nf-core/gatk4/filtermutectcalls/meta.yml @@ -35,28 +35,43 @@ input: description: Stats file that pairs with output vcf file pattern: "*vcf.gz.stats" - orientationbias: - type: list + type: file description: files containing artifact priors for input vcf. Optional input. pattern: "*.artifact-prior.tar.gz" - segmentation: - type: list + type: file description: tables containing segmentation information for input vcf. Optional input. pattern: "*.segmentation.table" - table: - type: list + type: file description: table(s) containing contamination data for input vcf. Optional input, takes priority over estimate. pattern: "*.contamination.table" - estimate: - type: val + type: float description: estimation of contamination value as a double. Optional input, will only be used if table is not specified. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: The reference fasta file pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fai: type: file description: Index of reference fasta file pattern: "*.fasta.fai" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - dict: type: file description: GATK sequence dictionary @@ -83,3 +98,4 @@ output: authors: - "@GCJMackenzie" - "@maxulysse" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/filtersamreads/main.nf b/modules/nf-core/gatk4/filtersamreads/main.nf deleted file mode 100644 index ca28c7b..0000000 --- a/modules/nf-core/gatk4/filtersamreads/main.nf +++ /dev/null @@ -1,39 +0,0 @@ -process GATK4_FILTERSAMREADS { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" - - input: - tuple val(meta), path(bam), path(bai), path(read_ids) - val fasta // treat it as a string because FilterSamReads is unable to solve softlinking - - output: - tuple val(meta), path("*.bam"), path("*.bai") , emit: bam - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def fastastr = fasta[0].toString() - - """ - gatk FilterSamReads \\ - --INPUT $bam \\ - --OUTPUT ${prefix}.bam \\ - --TMP_DIR . \\ - -R ${fastastr} \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/gatk4/filtersamreads/meta.yml b/modules/nf-core/gatk4/filtersamreads/meta.yml deleted file mode 100644 index 92ca390..0000000 --- a/modules/nf-core/gatk4/filtersamreads/meta.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: gatk4_filtersamreads -description: | - Subsets reads from a SAM or BAM file by applying one of several filters. -keywords: - - gatk4 - - reads - - BAM - - SAM -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - bam: - type: file - description: compressed vcf file of mutect2calls - pattern: "*.bam" - - read_ids: - type: file - description: File with read ids to keep - pattern: "*.txt" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - bam: - type: file - description: compressed vcf file of mutect2calls - pattern: "*.bam" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@RaqManzano" diff --git a/modules/nf-core/gatk4/filtervarianttranches/main.nf b/modules/nf-core/gatk4/filtervarianttranches/main.nf index 98e620e..be232ab 100644 --- a/modules/nf-core/gatk4/filtervarianttranches/main.nf +++ b/modules/nf-core/gatk4/filtervarianttranches/main.nf @@ -2,10 +2,10 @@ process GATK4_FILTERVARIANTTRANCHES { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(vcf), path(tbi), path(intervals) @@ -27,16 +27,16 @@ process GATK4_FILTERVARIANTTRANCHES { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def resources = resources.collect{"--resource $it"}.join(' ') - def avail_mem = 3 + + def avail_mem = 3072 if (!task.memory) { log.info '[GATK FilterVariantTranches] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" FilterVariantTranches \\ + gatk --java-options "-Xmx${avail_mem}M" FilterVariantTranches \\ --variant $vcf \\ $resources \\ --output ${prefix}.filtered.vcf.gz \\ diff --git a/modules/nf-core/gatk4/filtervarianttranches/meta.yml b/modules/nf-core/gatk4/filtervarianttranches/meta.yml index f89063a..4152a65 100644 --- a/modules/nf-core/gatk4/filtervarianttranches/meta.yml +++ b/modules/nf-core/gatk4/filtervarianttranches/meta.yml @@ -3,15 +3,17 @@ description: Apply tranche filtering keywords: - gatk4 - filtervarianttranches - + - tranche_filtering tools: - "gatk4": - description: Genome Analysis Toolkit (GATK4) + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us - tool_dev_url: https://github.com/broadinstitute/gatk - doi: "10.1158/1538-7445.AM2017-3590" - licence: ["BSD-3-clause"] + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360051308071-FilterVariantTranches + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] input: - meta: @@ -44,6 +46,9 @@ input: description: Index of reference fasta file pattern: "fasta.fai" - dict: + type: file + description: GATK sequence dictionary + pattern: ".dict" output: - meta: diff --git a/modules/nf-core/gatk4/gatherbqsrreports/main.nf b/modules/nf-core/gatk4/gatherbqsrreports/main.nf index 231bd39..8ee92e2 100644 --- a/modules/nf-core/gatk4/gatherbqsrreports/main.nf +++ b/modules/nf-core/gatk4/gatherbqsrreports/main.nf @@ -2,10 +2,10 @@ process GATK4_GATHERBQSRREPORTS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(table) @@ -22,14 +22,14 @@ process GATK4_GATHERBQSRREPORTS { def prefix = task.ext.prefix ?: "${meta.id}" def input_list = table.collect{"--input $it"}.join(' ') - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK GatherBQSRReports] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" GatherBQSRReports \\ + gatk --java-options "-Xmx${avail_mem}M" GatherBQSRReports \\ $input_list \\ --output ${prefix}.table \\ --tmp-dir . \\ diff --git a/modules/nf-core/gatk4/gatherpileupsummaries/main.nf b/modules/nf-core/gatk4/gatherpileupsummaries/main.nf index 0b763f4..3e92eb0 100644 --- a/modules/nf-core/gatk4/gatherpileupsummaries/main.nf +++ b/modules/nf-core/gatk4/gatherpileupsummaries/main.nf @@ -2,10 +2,10 @@ process GATK4_GATHERPILEUPSUMMARIES { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: @@ -13,7 +13,7 @@ process GATK4_GATHERPILEUPSUMMARIES { path dict output: - tuple val(meta), path("*.pileupsummaries.table"), emit: table + tuple val(meta), path("*.pileups.table"), emit: table path "versions.yml" , emit: versions when: @@ -24,16 +24,16 @@ process GATK4_GATHERPILEUPSUMMARIES { def prefix = task.ext.prefix ?: "${meta.id}" def input_list = pileup.collect{ "--I $it" }.join(' ') - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK GatherPileupSummaries] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" GatherPileupSummaries \\ + gatk --java-options "-Xmx${avail_mem}M" GatherPileupSummaries \\ $input_list \\ - --O ${prefix}.pileupsummaries.table \\ + --O ${prefix}.pileups.table \\ --sequence-dictionary $dict \\ --tmp-dir . \\ $args diff --git a/modules/nf-core/gatk4/gatherpileupsummaries/meta.yml b/modules/nf-core/gatk4/gatherpileupsummaries/meta.yml index 823ea36..695335c 100644 --- a/modules/nf-core/gatk4/gatherpileupsummaries/meta.yml +++ b/modules/nf-core/gatk4/gatherpileupsummaries/meta.yml @@ -31,7 +31,7 @@ output: - table: type: file description: pileup summaries table file - pattern: "*.pileupsummaries.table" + pattern: "*.pileups.table" - versions: type: file description: File containing software versions diff --git a/modules/nf-core/gatk4/genomicsdbimport/main.nf b/modules/nf-core/gatk4/genomicsdbimport/main.nf index 810f2b6..dc77345 100644 --- a/modules/nf-core/gatk4/genomicsdbimport/main.nf +++ b/modules/nf-core/gatk4/genomicsdbimport/main.nf @@ -1,11 +1,11 @@ process GATK4_GENOMICSDBIMPORT { tag "$meta.id" - label 'process_low' + label 'process_medium' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(vcf), path(tbi), path(interval_file), val(interval_value), path(wspace) @@ -31,6 +31,7 @@ process GATK4_GENOMICSDBIMPORT { genomicsdb_command = "--genomicsdb-workspace-path ${prefix}" interval_command = interval_file ? "--intervals ${interval_file}" : "--intervals ${interval_value}" + updated_db = "" // settings changed for running get intervals list mode if run_intlist is true if (run_intlist) { @@ -45,14 +46,14 @@ process GATK4_GENOMICSDBIMPORT { updated_db = "${wspace}" } - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK GenomicsDBImport] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" GenomicsDBImport \\ + gatk --java-options "-Xmx${avail_mem}M" GenomicsDBImport \\ $input_command \\ $genomicsdb_command \\ $interval_command \\ @@ -64,4 +65,39 @@ process GATK4_GENOMICSDBIMPORT { gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') END_VERSIONS """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + + genomicsdb_command = "--genomicsdb-workspace-path ${prefix}" + interval_command = interval_file ? "--intervals ${interval_file}" : "--intervals ${interval_value}" + updated_db = "" + + // settings changed for running get intervals list mode if run_intlist is true + if (run_intlist) { + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = "--output-interval-list-to-file ${prefix}.interval_list" + } + + // settings changed for running update gendb mode. input_command same as default, update_db forces module to emit the updated gendb + if (run_updatewspace) { + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = '' + updated_db = "${wspace}" + } + + def stub_genomicsdb = genomicsdb_command == "--genomicsdb-workspace-path ${prefix}" ? "touch ${prefix}" : "" + def stub_interval = interval_command == "--output-interval-list-to-file ${prefix}.interval_list" ? "touch ${prefix}.interval_list" : "" + def stub_update = updated_db != "" ? "touch ${wspace}" : "" + + """ + ${stub_genomicsdb} + ${stub_interval} + ${stub_update} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/gatk4/genotypegvcfs/main.nf b/modules/nf-core/gatk4/genotypegvcfs/main.nf index 11024b1..fac131f 100644 --- a/modules/nf-core/gatk4/genotypegvcfs/main.nf +++ b/modules/nf-core/gatk4/genotypegvcfs/main.nf @@ -2,10 +2,10 @@ process GATK4_GENOTYPEGVCFS { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(gvcf), path(gvcf_index), path(intervals), path(intervals_index) @@ -30,14 +30,14 @@ process GATK4_GENOTYPEGVCFS { def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : "" def interval_command = intervals ? "--intervals $intervals" : "" - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK GenotypeGVCFs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" GenotypeGVCFs \\ + gatk --java-options "-Xmx${avail_mem}M" GenotypeGVCFs \\ --variant $gvcf_command \\ --output ${prefix}.vcf.gz \\ --reference $fasta \\ @@ -51,4 +51,17 @@ process GATK4_GENOTYPEGVCFS { gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/gatk4/getpileupsummaries/main.nf b/modules/nf-core/gatk4/getpileupsummaries/main.nf index 5945a93..cde39fb 100644 --- a/modules/nf-core/gatk4/getpileupsummaries/main.nf +++ b/modules/nf-core/gatk4/getpileupsummaries/main.nf @@ -2,16 +2,16 @@ process GATK4_GETPILEUPSUMMARIES { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(input), path(index), path(intervals) - path fasta - path fai - path dict + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) path variants path variants_tbi @@ -28,14 +28,14 @@ process GATK4_GETPILEUPSUMMARIES { def interval_command = intervals ? "--intervals $intervals" : "--intervals $variants" def reference_command = fasta ? "--reference $fasta" : '' - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK GetPileupSummaries] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" GetPileupSummaries \\ + gatk --java-options "-Xmx${avail_mem}M" GetPileupSummaries \\ --input $input \\ --variant $variants \\ --output ${prefix}.pileups.table \\ diff --git a/modules/nf-core/gatk4/getpileupsummaries/meta.yml b/modules/nf-core/gatk4/getpileupsummaries/meta.yml index 3a940de..0f531c5 100644 --- a/modules/nf-core/gatk4/getpileupsummaries/meta.yml +++ b/modules/nf-core/gatk4/getpileupsummaries/meta.yml @@ -35,14 +35,29 @@ input: type: file description: File containing specified sites to be used for the summary. If this option is not specified, variants file is used instead automatically. pattern: "*.interval_list" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: The reference fasta file pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fai: type: file description: Index of reference fasta file pattern: "*.fasta.fai" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - dict: type: file description: GATK sequence dictionary diff --git a/modules/nf-core/gatk4/indexfeaturefile/main.nf b/modules/nf-core/gatk4/indexfeaturefile/main.nf index 264f71e..d3bb04a 100644 --- a/modules/nf-core/gatk4/indexfeaturefile/main.nf +++ b/modules/nf-core/gatk4/indexfeaturefile/main.nf @@ -2,10 +2,10 @@ process GATK4_INDEXFEATUREFILE { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(feature_file) @@ -20,14 +20,14 @@ process GATK4_INDEXFEATUREFILE { script: def args = task.ext.args ?: '' - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK IndexFeatureFile] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" IndexFeatureFile \\ + gatk --java-options "-Xmx${avail_mem}M" IndexFeatureFile \\ --input $feature_file \\ --tmp-dir . \\ $args diff --git a/modules/nf-core/gatk4/intervallisttobed/main.nf b/modules/nf-core/gatk4/intervallisttobed/main.nf index 84f3c47..afa4423 100644 --- a/modules/nf-core/gatk4/intervallisttobed/main.nf +++ b/modules/nf-core/gatk4/intervallisttobed/main.nf @@ -2,10 +2,10 @@ process GATK4_INTERVALLISTTOBED { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(intervals) @@ -21,14 +21,14 @@ process GATK4_INTERVALLISTTOBED { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK IntervalListToBed] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" IntervalListToBed \\ + gatk --java-options "-Xmx${avail_mem}M" IntervalListToBed \\ --INPUT $intervals \\ --OUTPUT ${prefix}.bed \\ --TMP_DIR . \\ diff --git a/modules/nf-core/gatk4/intervallisttobed/meta.yml b/modules/nf-core/gatk4/intervallisttobed/meta.yml index 90b78c0..f09f1ee 100644 --- a/modules/nf-core/gatk4/intervallisttobed/meta.yml +++ b/modules/nf-core/gatk4/intervallisttobed/meta.yml @@ -1,7 +1,9 @@ name: gatk4_intervallisttobed +description: Converts an Picard IntervalList file to a BED file. keywords: - interval - bed + - conversion tools: - gatk4: description: Genome Analysis Toolkit (GATK4) diff --git a/modules/nf-core/gatk4/intervallisttools/main.nf b/modules/nf-core/gatk4/intervallisttools/main.nf index 7ab26c1..0054659 100644 --- a/modules/nf-core/gatk4/intervallisttools/main.nf +++ b/modules/nf-core/gatk4/intervallisttools/main.nf @@ -2,10 +2,10 @@ process GATK4_INTERVALLISTTOOLS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(intervals) @@ -21,17 +21,17 @@ process GATK4_INTERVALLISTTOOLS { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK IntervalListTools] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ mkdir ${prefix}_split - gatk --java-options "-Xmx${avail_mem}g" IntervalListTools \\ + gatk --java-options "-Xmx${avail_mem}M" IntervalListTools \\ --INPUT $intervals \\ --OUTPUT ${prefix}_split \\ --TMP_DIR . \\ diff --git a/modules/nf-core/gatk4/learnreadorientationmodel/main.nf b/modules/nf-core/gatk4/learnreadorientationmodel/main.nf index 8c4ac94..b1e8780 100644 --- a/modules/nf-core/gatk4/learnreadorientationmodel/main.nf +++ b/modules/nf-core/gatk4/learnreadorientationmodel/main.nf @@ -2,10 +2,10 @@ process GATK4_LEARNREADORIENTATIONMODEL { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(f1r2) @@ -22,14 +22,14 @@ process GATK4_LEARNREADORIENTATIONMODEL { def prefix = task.ext.prefix ?: "${meta.id}" def input_list = f1r2.collect{"--input $it"}.join(' ') - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK LearnReadOrientationModel] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" LearnReadOrientationModel \\ + gatk --java-options "-Xmx${avail_mem}M" LearnReadOrientationModel \\ $input_list \\ --output ${prefix}.tar.gz \\ --tmp-dir . \\ diff --git a/modules/nf-core/gatk4/markduplicates/main.nf b/modules/nf-core/gatk4/markduplicates/main.nf index 3aa1a88..f4b3f6d 100644 --- a/modules/nf-core/gatk4/markduplicates/main.nf +++ b/modules/nf-core/gatk4/markduplicates/main.nf @@ -64,4 +64,4 @@ process GATK4_MARKDUPLICATES { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/nf-core/gatk4/markduplicates/meta.yml b/modules/nf-core/gatk4/markduplicates/meta.yml index ae7443d..ddf98d2 100644 --- a/modules/nf-core/gatk4/markduplicates/meta.yml +++ b/modules/nf-core/gatk4/markduplicates/meta.yml @@ -69,4 +69,4 @@ output: authors: - "@ajodeh-juma" - "@FriederikeHanssen" - - "@maxulysse" \ No newline at end of file + - "@maxulysse" diff --git a/modules/nf-core/gatk4/markduplicatesspark/main.nf b/modules/nf-core/gatk4/markduplicatesspark/main.nf index b8c315f..30f4703 100644 --- a/modules/nf-core/gatk4/markduplicatesspark/main.nf +++ b/modules/nf-core/gatk4/markduplicatesspark/main.nf @@ -2,8 +2,8 @@ process GATK4_MARKDUPLICATES_SPARK { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null) - container 'broadinstitute/gatk:4.2.6.1' + conda "bioconda::gatk4=4.4.0.0 conda-forge::openjdk=8.0.312" + container "nf-core/gatk:4.4.0.0" input: tuple val(meta), path(bam) @@ -13,6 +13,7 @@ process GATK4_MARKDUPLICATES_SPARK { output: tuple val(meta), path("${prefix}"), emit: output + tuple val(meta), path("${prefix}.bai"), emit: bam_index, optional:true tuple val(meta), path("*.metrics"), emit: metrics, optional: true path "versions.yml" , emit: versions @@ -24,15 +25,14 @@ process GATK4_MARKDUPLICATES_SPARK { prefix = task.ext.prefix ?: "${meta.id}" def input_list = bam.collect{"--input $it"}.join(' ') - - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK MarkDuplicatesSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" MarkDuplicatesSpark \\ + gatk --java-options "-Xmx${avail_mem}M" MarkDuplicatesSpark \\ $input_list \\ --output $prefix \\ --reference $fasta \\ diff --git a/modules/nf-core/gatk4/markduplicatesspark/meta.yml b/modules/nf-core/gatk4/markduplicatesspark/meta.yml index 59be9b6..00d0808 100644 --- a/modules/nf-core/gatk4/markduplicatesspark/meta.yml +++ b/modules/nf-core/gatk4/markduplicatesspark/meta.yml @@ -49,10 +49,14 @@ output: type: file description: File containing software versions pattern: "versions.yml" - - bam: + - output: type: file - description: Marked duplicates BAM file - pattern: "*.{bam}" + description: Marked duplicates BAM/CRAM file + pattern: "*.{bam,cram}" + - bam_index: + type: file + description: Optional BAM index file + pattern: "*.bai" authors: - "@ajodeh-juma" diff --git a/modules/nf-core/gatk4/mergemutectstats/main.nf b/modules/nf-core/gatk4/mergemutectstats/main.nf index 54311df..5133859 100644 --- a/modules/nf-core/gatk4/mergemutectstats/main.nf +++ b/modules/nf-core/gatk4/mergemutectstats/main.nf @@ -2,10 +2,10 @@ process GATK4_MERGEMUTECTSTATS { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(stats) @@ -22,14 +22,14 @@ process GATK4_MERGEMUTECTSTATS { prefix = task.ext.prefix ?: "${meta.id}" def input_list = stats.collect{ "--stats ${it}"}.join(' ') - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK MergeMutectStats] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" MergeMutectStats \\ + gatk --java-options "-Xmx${avail_mem}M" MergeMutectStats \\ $input_list \\ --output ${prefix}.vcf.gz.stats \\ --tmp-dir . \\ diff --git a/modules/nf-core/gatk4/mergevcfs/main.nf b/modules/nf-core/gatk4/mergevcfs/main.nf index 54752b7..dfb5b33 100644 --- a/modules/nf-core/gatk4/mergevcfs/main.nf +++ b/modules/nf-core/gatk4/mergevcfs/main.nf @@ -2,46 +2,58 @@ process GATK4_MERGEVCFS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: - tuple val(meta), path(vcf) - path dict + tuple val(meta), path(vcf) + tuple val(meta2), path(dict) output: - tuple val(meta), path('*.vcf.gz'), emit: vcf - tuple val(meta), path("*.tbi") , emit: tbi - path "versions.yml" , emit: versions + tuple val(meta), path('*.vcf.gz'), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + path "versions.yml" , emit: versions when: - task.ext.when == null || task.ext.when + task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def input_list = vcf.collect{ "--INPUT $it"}.join(' ') - def reference_command = dict ? "--SEQUENCE_DICTIONARY $dict" : "" - - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK MergeVcfs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - gatk --java-options "-Xmx${avail_mem}g" MergeVcfs \\ - $input_list \\ - --OUTPUT ${prefix}.vcf.gz \\ - $reference_command \\ - --TMP_DIR . \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_list = vcf.collect{ "--INPUT $it"}.join(' ') + def reference_command = dict ? "--SEQUENCE_DICTIONARY $dict" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK MergeVcfs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M" MergeVcfs \\ + $input_list \\ + --OUTPUT ${prefix}.vcf.gz \\ + $reference_command \\ + --TMP_DIR . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/gatk4/mergevcfs/meta.yml b/modules/nf-core/gatk4/mergevcfs/meta.yml index 3ebce0b..db8c4cb 100644 --- a/modules/nf-core/gatk4/mergevcfs/meta.yml +++ b/modules/nf-core/gatk4/mergevcfs/meta.yml @@ -23,13 +23,16 @@ input: type: list description: Two or more VCF files pattern: "*.{vcf,vcf.gz}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] - ref_dict: type: file description: Optional Sequence Dictionary as input pattern: "*.dict" - - use_ref_dict: - type: boolean - description: Specify whether or not to use a given reference dictionary + output: - vcf: type: file diff --git a/modules/nf-core/gatk4/mutect2/main.nf b/modules/nf-core/gatk4/mutect2/main.nf index 6c1f572..bddc368 100644 --- a/modules/nf-core/gatk4/mutect2/main.nf +++ b/modules/nf-core/gatk4/mutect2/main.nf @@ -2,73 +2,73 @@ process GATK4_MUTECT2 { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: - tuple val(meta), path(input), path(input_index), path(intervals) - path fasta - path fai - path dict - path germline_resource - path germline_resource_tbi - path panel_of_normals - path panel_of_normals_tbi + tuple val(meta), path(input), path(input_index), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + path(germline_resource) + path(germline_resource_tbi) + path(panel_of_normals) + path(panel_of_normals_tbi) output: - tuple val(meta), path("*.vcf.gz") , emit: vcf - tuple val(meta), path("*.tbi") , emit: tbi - tuple val(meta), path("*.stats") , emit: stats - tuple val(meta), path("*.f1r2.tar.gz"), optional:true, emit: f1r2 - path "versions.yml" , emit: versions + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + tuple val(meta), path("*.stats") , emit: stats + tuple val(meta), path("*.f1r2.tar.gz"), optional:true, emit: f1r2 + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def inputs = input.collect{ "--input $it"}.join(" ") - def interval_command = intervals ? "--intervals $intervals" : "" - def pon_command = panel_of_normals ? "--panel-of-normals $panel_of_normals" : "" - def gr_command = germline_resource ? "--germline-resource $germline_resource" : "" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def inputs = input.collect{ "--input $it"}.join(" ") + def interval_command = intervals ? "--intervals $intervals" : "" + def pon_command = panel_of_normals ? "--panel-of-normals $panel_of_normals" : "" + def gr_command = germline_resource ? "--germline-resource $germline_resource" : "" - def avail_mem = 4 - if (!task.memory) { - log.info '[GATK Mutect2] Available memory not known - defaulting to 4GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - gatk --java-options "-Xmx${avail_mem}g" Mutect2 \\ - $inputs \\ - --output ${prefix}.vcf.gz \\ - --reference $fasta \\ - $pon_command \\ - $gr_command \\ - $interval_command \\ - --tmp-dir . \\ - $args + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M" Mutect2 \\ + $inputs \\ + --output ${prefix}.vcf.gz \\ + --reference $fasta \\ + $pon_command \\ + $gr_command \\ + $interval_command \\ + --tmp-dir . \\ + $args - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.vcf.gz - touch ${prefix}.vcf.gz.tbi - touch ${prefix}.vcf.gz.stats - touch ${prefix}.f1r2.tar.gz + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + touch ${prefix}.vcf.gz.stats + touch ${prefix}.f1r2.tar.gz - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/gatk4/mutect2/meta.yml b/modules/nf-core/gatk4/mutect2/meta.yml index aa0a02a..4842c22 100644 --- a/modules/nf-core/gatk4/mutect2/meta.yml +++ b/modules/nf-core/gatk4/mutect2/meta.yml @@ -31,17 +31,32 @@ input: description: list of BAM file indexes, also able to take CRAM indexes as an input pattern: "*.{bam.bai/cram.crai}" - intervals: - type: File/string + type: file description: Specify region the tools is run on. - pattern: ".{bed,interval_list}/chrM" + pattern: ".{bed,interval_list}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: The reference fasta file pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fai: type: file description: Index of reference fasta file pattern: "*.fasta.fai" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - dict: type: file description: GATK sequence dictionary @@ -87,3 +102,4 @@ output: authors: - "@GCJMackenzie" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/splitncigarreads/main.nf b/modules/nf-core/gatk4/splitncigarreads/main.nf index 1562f7e..0178976 100644 --- a/modules/nf-core/gatk4/splitncigarreads/main.nf +++ b/modules/nf-core/gatk4/splitncigarreads/main.nf @@ -2,46 +2,47 @@ process GATK4_SPLITNCIGARREADS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::gatk=3.8" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk:3.8--hdfd78af_11': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: - tuple val(meta), path(bam), path(bai), path(intervals) - path fasta - path fai - path dict + tuple val(meta), path(bam), path(bai), path(intervals) + path fasta + path fai + path dict output: - tuple val(meta), path('*.bam'), emit: bam - path "versions.yml" , emit: versions + tuple val(meta), path('*.bam'), emit: bam + path "versions.yml" , emit: versions when: - task.ext.when == null || task.ext.when + task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def interval_command = intervals ? "--intervals $intervals" : "" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def interval_command = intervals ? "--intervals $intervals" : "" - def avail_mem = 4 - if (!task.memory) { - log.info '[GATK SplitNCigarReads] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - GenomeAnalysisTK -Xmx${avail_mem}g -Djava.io.tmpdir=. -T SplitNCigarReads \\ - -I $bam \\ - -o ${prefix}.bam \\ - -R $fasta \\ - $interval_command \\ - $args + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK SplitNCigarReads] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M" SplitNCigarReads \\ + --input $bam \\ + --output ${prefix}.bam \\ + --reference $fasta \\ + $interval_command \\ + --tmp-dir . \\ + $args - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk: \$(echo \$(GenomeAnalysisTK --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} \ No newline at end of file + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/variantfiltration/main.nf b/modules/nf-core/gatk4/variantfiltration/main.nf index cda06e1..387ff8c 100644 --- a/modules/nf-core/gatk4/variantfiltration/main.nf +++ b/modules/nf-core/gatk4/variantfiltration/main.nf @@ -2,16 +2,16 @@ process GATK4_VARIANTFILTRATION { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(vcf), path(tbi) - path fasta - path fai - path dict + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) output: tuple val(meta), path("*.vcf.gz"), emit: vcf @@ -25,20 +25,31 @@ process GATK4_VARIANTFILTRATION { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK VariantFiltration] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.toGiga() + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}G" VariantFiltration \\ + gatk --java-options "-Xmx${avail_mem}M" VariantFiltration \\ --variant $vcf \\ --output ${prefix}.vcf.gz \\ --reference $fasta \\ --tmp-dir . \\ $args + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + cat <<-END_VERSIONS > versions.yml "${task.process}": gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') diff --git a/modules/nf-core/gatk4/variantfiltration/meta.yml b/modules/nf-core/gatk4/variantfiltration/meta.yml index 04b1c08..2260f37 100644 --- a/modules/nf-core/gatk4/variantfiltration/meta.yml +++ b/modules/nf-core/gatk4/variantfiltration/meta.yml @@ -3,6 +3,7 @@ description: Filter variants keywords: - vcf - filter + - variantfiltration tools: - gatk4: description: | @@ -27,14 +28,29 @@ input: type: list description: List of VCF file indexes pattern: "*.{idx,tbi}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: Fasta file of reference genome pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fai: type: file description: Index of fasta file pattern: "*.fasta.fai" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - dict: type: file description: Sequence dictionary of fastea file @@ -54,3 +70,4 @@ output: pattern: "versions.yml" authors: - "@kevinmenden" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/variantrecalibrator/main.nf b/modules/nf-core/gatk4/variantrecalibrator/main.nf index 961e60d..adfd106 100644 --- a/modules/nf-core/gatk4/variantrecalibrator/main.nf +++ b/modules/nf-core/gatk4/variantrecalibrator/main.nf @@ -2,10 +2,10 @@ process GATK4_VARIANTRECALIBRATOR { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(vcf), path(tbi) // input vcf and tbi of variants to recalibrate @@ -32,14 +32,14 @@ process GATK4_VARIANTRECALIBRATOR { def reference_command = fasta ? "--reference $fasta " : '' def labels_command = labels.join(' ') - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK VariantRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" VariantRecalibrator \\ + gatk --java-options "-Xmx${avail_mem}M" VariantRecalibrator \\ --variant $vcf \\ --output ${prefix}.recal \\ --tranches-file ${prefix}.tranches \\ @@ -53,4 +53,18 @@ process GATK4_VARIANTRECALIBRATOR { gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') END_VERSIONS """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.recal + touch ${prefix}.idx + touch ${prefix}.tranches + touch ${prefix}plots.R + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/gffread/main.nf b/modules/nf-core/gffread/main.nf index 7c575c9..f4472b0 100644 --- a/modules/nf-core/gffread/main.nf +++ b/modules/nf-core/gffread/main.nf @@ -2,10 +2,10 @@ process GFFREAD { tag "$gff" label 'process_low' - conda (params.enable_conda ? "bioconda::gffread=0.12.1" : null) + conda "bioconda::gffread=0.12.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gffread:0.12.1--h8b12597_0' : - 'quay.io/biocontainers/gffread:0.12.1--h8b12597_0' }" + 'biocontainers/gffread:0.12.1--h8b12597_0' }" input: path gff diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf index 61bf1af..73bf08c 100644 --- a/modules/nf-core/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -1,11 +1,11 @@ process GUNZIP { tag "$archive" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + conda "conda-forge::sed=4.7" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: tuple val(meta), path(archive) @@ -21,14 +21,28 @@ process GUNZIP { def args = task.ext.args ?: '' gunzip = archive.toString() - '.gz' """ - gunzip \\ - -f \\ + # Not calling gunzip itself because it creates files + # with the original group ownership rather than the + # default one for that user / the work directory + gzip \\ + -cd \\ $args \\ - $archive + $archive \\ + > $gunzip cat <<-END_VERSIONS > versions.yml "${task.process}": gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') END_VERSIONS """ + + stub: + gunzip = archive.toString() - '.gz' + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml index 4d2ebc8..4cdcdf4 100644 --- a/modules/nf-core/gunzip/meta.yml +++ b/modules/nf-core/gunzip/meta.yml @@ -3,31 +3,32 @@ description: Compresses and decompresses files. keywords: - gunzip - compression + - decompression tools: - gunzip: - description: | - gzip is a file format and a software application used for file compression and decompression. - documentation: https://www.gnu.org/software/gzip/manual/gzip.html - licence: ["GPL-3.0-or-later"] + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] input: - meta: - type: map - description: | - Optional groovy Map containing meta information - e.g. [ id:'test', single_end:false ] + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] - archive: - type: file - description: File to be compressed/uncompressed - pattern: "*.*" + type: file + description: File to be compressed/uncompressed + pattern: "*.*" output: - gunzip: - type: file - description: Compressed/uncompressed file - pattern: "*.*" + type: file + description: Compressed/uncompressed file + pattern: "*.*" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@joseespinosa" - "@drpatelh" diff --git a/modules/nf-core/hisat2/align/main.nf b/modules/nf-core/hisat2/align/main.nf index 4b9d578..db8e8bb 100644 --- a/modules/nf-core/hisat2/align/main.nf +++ b/modules/nf-core/hisat2/align/main.nf @@ -1,18 +1,17 @@ -def VERSION = '2.2.1' // Version information not provided by tool on CLI - process HISAT2_ALIGN { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::hisat2=2.2.1 bioconda::samtools=1.15.1" : null) + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda "bioconda::hisat2=2.2.1 bioconda::samtools=1.16.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:38aed4501da19db366dc7c8d52d31d94e760cfaf-0' : - 'quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:38aed4501da19db366dc7c8d52d31d94e760cfaf-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:2cdf6bf1e92acbeb9b2834b1c58754167173a410-0' : + 'biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:2cdf6bf1e92acbeb9b2834b1c58754167173a410-0' }" input: tuple val(meta), path(reads) - path index - path splicesites + tuple val(meta2), path(index) + tuple val(meta3), path(splicesites) output: tuple val(meta), path("*.bam") , emit: bam @@ -26,26 +25,28 @@ process HISAT2_ALIGN { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def rg_tag = meta.rg_id ? "--rg-id ID:${meta.rg_id} --rg SM:${meta.rg_sm} --rg PU:${meta.rg_pu} --rg LB:${meta.rg_lb} --rg PL:${meta.rg_pl}" : '' + def VERSION = '2.2.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def strandedness = '' if (meta.strandedness == 'forward') { strandedness = meta.single_end ? '--rna-strandness F' : '--rna-strandness FR' } else if (meta.strandedness == 'reverse') { strandedness = meta.single_end ? '--rna-strandness R' : '--rna-strandness RF' } - if (params.single_end) { + ss = "$splicesites" ? "--known-splicesite-infile $splicesites" : '' + def seq_center = params.seq_center ? "--rg-id ${prefix} --rg SM:$prefix --rg CN:${params.seq_center.replaceAll('\\s','_')}" : "--rg-id ${prefix} --rg SM:$prefix" + if (meta.single_end) { def unaligned = params.save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : '' - """ - INDEX=`find -L ./ -name "*.1.ht2" | sed 's/.1.ht2//'` + INDEX=`find -L ./ -name "*.1.ht2" | sed 's/\\.1.ht2\$//'` hisat2 \\ -x \$INDEX \\ -U $reads \\ $strandedness \\ - --known-splicesite-infile $splicesites \\ + $ss \\ --summary-file ${prefix}.hisat2.summary.log \\ --threads $task.cpus \\ - $rg_tag \\ + $seq_center \\ $unaligned \\ $args \\ | samtools view -bS -F 4 -F 256 - > ${prefix}.bam @@ -59,17 +60,19 @@ process HISAT2_ALIGN { } else { def unaligned = params.save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : '' """ - INDEX=`find -L ./ -name "*.1.ht2" | sed 's/.1.ht2//'` + INDEX=`find -L ./ -name "*.1.ht2" | sed 's/\\.1.ht2\$//'` hisat2 \\ -x \$INDEX \\ -1 ${reads[0]} \\ -2 ${reads[1]} \\ $strandedness \\ - --known-splicesite-infile $splicesites \\ + $ss \\ --summary-file ${prefix}.hisat2.summary.log \\ --threads $task.cpus \\ - $rg_tag \\ + $seq_center \\ $unaligned \\ + --no-mixed \\ + --no-discordant \\ $args \\ | samtools view -bS -F 4 -F 8 -F 256 - > ${prefix}.bam diff --git a/modules/nf-core/hisat2/align/meta.yml b/modules/nf-core/hisat2/align/meta.yml index 7550aef..008a961 100644 --- a/modules/nf-core/hisat2/align/meta.yml +++ b/modules/nf-core/hisat2/align/meta.yml @@ -25,10 +25,20 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - index: type: file description: HISAT2 genome index file pattern: "*.ht2" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - splicesites: type: file description: Splices sites in gtf file @@ -55,3 +65,4 @@ output: authors: - "@ntoda03" + - "@ramprasadn" diff --git a/modules/nf-core/hisat2/build/main.nf b/modules/nf-core/hisat2/build/main.nf index 7714acf..90f8efc 100644 --- a/modules/nf-core/hisat2/build/main.nf +++ b/modules/nf-core/hisat2/build/main.nf @@ -1,22 +1,22 @@ -def VERSION = '2.2.1' // Version information not provided by tool on CLI - process HISAT2_BUILD { tag "$fasta" label 'process_high' label 'process_high_memory' - conda (params.enable_conda ? "bioconda::hisat2=2.2.1" : null) + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda "bioconda::hisat2=2.2.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hisat2:2.2.1--h1b792b2_3' : - 'quay.io/biocontainers/hisat2:2.2.1--h1b792b2_3' }" + 'biocontainers/hisat2:2.2.1--h1b792b2_3' }" + input: - path fasta - path gtf - path splicesites + tuple val(meta), path(fasta) + tuple val(meta2), path(gtf) + tuple val(meta3), path(splicesites) output: - path "hisat2" , emit: index - path "versions.yml" , emit: versions + tuple val(meta), path("hisat2") , emit: index + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -37,13 +37,14 @@ process HISAT2_BUILD { def hisat2_build_memory = params.hisat2_build_memory ? (params.hisat2_build_memory as nextflow.util.MemoryUnit).toGiga() : 0 if (avail_mem >= hisat2_build_memory) { log.info "[HISAT2 index build] At least ${hisat2_build_memory} GB available, so using splice sites and exons to build HISAT2 index" - extract_exons = "hisat2_extract_exons.py $gtf > ${gtf.baseName}.exons.txt" - ss = "--ss $splicesites" - exon = "--exon ${gtf.baseName}.exons.txt" + extract_exons = gtf ? "hisat2_extract_exons.py $gtf > ${gtf.baseName}.exons.txt" : "" + ss = splicesites ? "--ss $splicesites" : "" + exon = gtf ? "--exon ${gtf.baseName}.exons.txt" : "" } else { log.info "[HISAT2 index build] Less than ${hisat2_build_memory} GB available, so NOT using splice sites and exons to build HISAT2 index." log.info "[HISAT2 index build] Use --hisat2_build_memory [small number] to skip this check." } + def VERSION = '2.2.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ mkdir hisat2 $extract_exons diff --git a/modules/nf-core/hisat2/build/meta.yml b/modules/nf-core/hisat2/build/meta.yml index a2e1fd6..e61bf2a 100644 --- a/modules/nf-core/hisat2/build/meta.yml +++ b/modules/nf-core/hisat2/build/meta.yml @@ -15,28 +15,48 @@ tools: licence: ["MIT"] input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: Reference fasta file pattern: "*.{fa,fasta,fna}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - gtf: type: file description: Reference gtf annotation file pattern: "*.{gtf}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - splicesites: type: file description: Splices sites in gtf file pattern: "*.{txt}" output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - index: type: file description: HISAT2 genome index file pattern: "*.ht2" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@ntoda03" diff --git a/modules/nf-core/hisat2/extractsplicesites/main.nf b/modules/nf-core/hisat2/extractsplicesites/main.nf index 70e4375..a6e59e2 100644 --- a/modules/nf-core/hisat2/extractsplicesites/main.nf +++ b/modules/nf-core/hisat2/extractsplicesites/main.nf @@ -1,26 +1,26 @@ -def VERSION = '2.2.1' // Version information not provided by tool on CLI - process HISAT2_EXTRACTSPLICESITES { tag "$gtf" label 'process_medium' - conda (params.enable_conda ? 'bioconda::hisat2=2.2.1' : null) + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda "bioconda::hisat2=2.2.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hisat2:2.2.1--h1b792b2_3' : - 'quay.io/biocontainers/hisat2:2.2.1--h1b792b2_3' }" + 'biocontainers/hisat2:2.2.1--h1b792b2_3' }" input: - path gtf + tuple val(meta), path(gtf) output: - path "*.splice_sites.txt", emit: txt - path "versions.yml" , emit: versions + tuple val(meta), path("*.splice_sites.txt"), emit: txt + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' + def VERSION = '2.2.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ hisat2_extract_splice_sites.py $gtf > ${gtf.baseName}.splice_sites.txt cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/hisat2/extractsplicesites/meta.yml b/modules/nf-core/hisat2/extractsplicesites/meta.yml index 7dc1bac..f70de08 100644 --- a/modules/nf-core/hisat2/extractsplicesites/meta.yml +++ b/modules/nf-core/hisat2/extractsplicesites/meta.yml @@ -15,12 +15,22 @@ tools: licence: ["MIT"] input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - gtf: type: file description: Reference gtf annotation file pattern: "*.{gtf}" output: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - versions: type: file description: File containing software versions @@ -32,3 +42,4 @@ output: authors: - "@ntoda03" + - "@ramprasadn" diff --git a/modules/nf-core/manta/somatic/main.nf b/modules/nf-core/manta/somatic/main.nf index 6313c38..8ff8d90 100644 --- a/modules/nf-core/manta/somatic/main.nf +++ b/modules/nf-core/manta/somatic/main.nf @@ -1,11 +1,12 @@ process MANTA_SOMATIC { tag "$meta.id" - label 'process_high' + label 'process_medium' + label 'error_retry' - conda (params.enable_conda ? "bioconda::manta=1.6.0" : null) + conda "bioconda::manta=1.6.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/manta:1.6.0--h9ee0642_1' : - 'quay.io/biocontainers/manta:1.6.0--h9ee0642_1' }" + 'biocontainers/manta:1.6.0--h9ee0642_1' }" input: tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor), path(target_bed), path(target_bed_tbi) @@ -29,15 +30,16 @@ process MANTA_SOMATIC { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def options_manta = target_bed ? "--exome --callRegions $target_bed" : "" + def options_manta = target_bed ? "--callRegions $target_bed" : "" """ configManta.py \ --tumorBam $input_tumor \ --normalBam $input_normal \ --reference $fasta \ + --runDir manta \ $options_manta \ - --runDir manta + $args python manta/runWorkflow.py -m local -j $task.cpus diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/modules/fastqc/main.nf deleted file mode 100644 index f363176..0000000 --- a/modules/nf-core/modules/fastqc/main.nf +++ /dev/null @@ -1,59 +0,0 @@ -process FASTQC { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'quay.io/biocontainers/fastqc:0.11.9--0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip") , emit: zip - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - // Add soft-links to original FastQs for consistent naming in pipeline - def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - """ - [ ! -f ${prefix}.bam.gz ] && ln -s $reads ${prefix}.bam.gz - fastqc -f bam $args --threads $task.cpus ${prefix}.bam.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } else { - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz - fastqc $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.html - touch ${prefix}.zip - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/modules/multiqc/main.nf deleted file mode 100644 index 1e7d6af..0000000 --- a/modules/nf-core/modules/multiqc/main.nf +++ /dev/null @@ -1,49 +0,0 @@ -process MULTIQC { - label 'process_medium' - - conda (params.enable_conda ? 'bioconda::multiqc=1.13a' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1' : - 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }" - - input: - path multiqc_files, stageAs: "?/*" - tuple path(multiqc_config), path(multiqc_logo) - - output: - path "*multiqc_report.html", emit: report - path "*_data" , emit: data - path "*_plots" , optional:true, emit: plots - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def config = multiqc_config ? "--config $multiqc_config" : '' - """ - multiqc \\ - --force \\ - $config \\ - $args \\ - . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS - """ - - stub: - """ - touch multiqc_data - touch multiqc_plots - touch multiqc_report.html - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/mosdepth/main.nf b/modules/nf-core/mosdepth/main.nf index 898514a..74db3a2 100644 --- a/modules/nf-core/mosdepth/main.nf +++ b/modules/nf-core/mosdepth/main.nf @@ -35,10 +35,10 @@ process MOSDEPTH { def reference = fasta ? "--fasta ${fasta}" : "" def interval = bed ? "--by ${bed}" : "" if (bed && args.contains("--by")) { - exit 1, "'--by' can only be specified once when running mosdepth! Either remove input BED file definition or remove '--by' from 'ext.args' definition" + error "'--by' can only be specified once when running mosdepth! Either remove input BED file definition or remove '--by' from 'ext.args' definition" } if (!bed && args.contains("--thresholds")) { - exit 1, "'--thresholds' can only be specified in conjunction with '--by'" + error "'--thresholds' can only be specified in conjunction with '--by'" } """ @@ -77,4 +77,4 @@ process MOSDEPTH { mosdepth: \$(mosdepth --version 2>&1 | sed 's/^.*mosdepth //; s/ .*\$//') END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/nf-core/mosdepth/meta.yml b/modules/nf-core/mosdepth/meta.yml index 6926354..adf3893 100644 --- a/modules/nf-core/mosdepth/meta.yml +++ b/modules/nf-core/mosdepth/meta.yml @@ -106,4 +106,4 @@ authors: - "@joseespinosa" - "@drpatelh" - "@ramprasadn" - - "@matthdsm" \ No newline at end of file + - "@matthdsm" diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 4b60474..65d7dd0 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.14" + conda "bioconda::multiqc=1.15" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : + 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index bf3a27f..f93b5ee 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json name: MultiQC description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: @@ -12,6 +13,7 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] + input: - multiqc_files: type: file @@ -19,19 +21,24 @@ input: List of reports / files recognised by MultiQC, for example the html and zip output of FastQC - multiqc_config: type: file - description: Config yml for MultiQC + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. pattern: "*.{yml,yaml}" - multiqc_logo: type: file - description: Logo file for MultiQC + description: Optional logo file for MultiQC pattern: "*.{png}" + output: - report: type: file description: MultiQC report file pattern: "multiqc_report.html" - data: - type: dir + type: directory description: MultiQC data dir pattern: "multiqc_data" - plots: @@ -46,3 +53,4 @@ authors: - "@abhi18av" - "@bunop" - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/sage/main.nf b/modules/nf-core/sage/main.nf deleted file mode 100644 index 1523430..0000000 --- a/modules/nf-core/sage/main.nf +++ /dev/null @@ -1,92 +0,0 @@ -def VERSION = '3.1' // Version information not provided by tool on CLI - -process SAGE { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::hmftools-sage=3.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-sage:3.1--hdfd78af_0' : - 'quay.io/biocontainers/hmftools-sage:3.1--hdfd78af_0' }" - - input: - tuple val(meta), path(normal), path(normal_index), path(tumor), path(tumor_index), path(intervals) - path fasta - path fasta_fai - path dict - path highconfidence - path actionablepanel - path knownhot - path ensbl_sage - - output: - tuple val(meta), path("*.vcf"), emit: vcf - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def reference = normal ? "-reference ${meta.normal_id} -reference_bam ${normal}" : "" - def HighConfidence = highconfidence ? "-high_confidence_bed ${highconfidence}" : "" - def ActionableCodingPanel = actionablepanel ? "-panel_bed ${actionablepanel}" : "" - def KnownHotspots = knownhot ? "-hotspots ${knownhot}" : "" - def avail_mem = 4 - if (!task.memory) { - log.info '[SAGE] Available memory not known - defaulting to 4GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - if (intervals){ // If no reads the intervals don't work in sage - """ - export _JAVA_OPTIONS="-Xmx${avail_mem}g" - INTER=\$(sed -E 's/\\s+0\\s+/\\t1\\t/g' $intervals | grep -v chrM | sed 's/\t/:/g' | paste -s -d ';') - - SAGE \\ - -out ${prefix}.vcf \\ - -ref_genome $fasta \\ - -threads $task.cpus \\ - -tumor ${meta.tumor_id} \\ - -tumor_bam ${tumor} \\ - $reference \\ - -ensembl_data_dir $ensbl_sage \\ - $HighConfidence \\ - $ActionableCodingPanel \\ - $KnownHotspots \\ - -specific_regions \$INTER \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sage: $VERSION - END_VERSIONS - """ - - } else { - """ - export _JAVA_OPTIONS="-Xmx${avail_mem}g" - SAGE \\ - -out ${prefix}.vcf \\ - -ref_genome $fasta \\ - -threads $task.cpus \\ - -tumor ${meta.tumor_id} \\ - -tumor_bam ${tumor} \\ - $reference \\ - -ensembl_data_dir $ensbl_sage \\ - $HighConfidence \\ - $ActionableCodingPanel \\ - $KnownHotspots \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sage: $VERSION - END_VERSIONS - """ - } - - - -} diff --git a/modules/nf-core/sage/meta.yml b/modules/nf-core/sage/meta.yml deleted file mode 100644 index dea957b..0000000 --- a/modules/nf-core/sage/meta.yml +++ /dev/null @@ -1,77 +0,0 @@ -name: freebayes -description: A haplotype-based variant detector -keywords: - - variant caller - - SNP - - indels - - somatic variant calling - - hmftools - -tools: - - sage: - description: Bayesian haplotype-based polymorphism discovery and genotyping - homepage: https://github.com/freebayes/freebayes - documentation: https://github.com/hartwigmedical/hmftools/tree/master/sage - tool_dev_url: https://github.com/hartwigmedical/hmftools/tree/master/sage - licence: ["GPL-3.0"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - normal: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - normal_index: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai}" - - tumor: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - tumor_index: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai}" - - intervals: - type: file - description: BED file for intervals - pattern: "*.bed" - - fasta: - type: file - description: reference fasta file - pattern: ".{fa,fa.gz,fasta,fasta.gz}" - - highconfidence: - type: file - description: Optional. - pattern: "*.bed" - - actionablepanel: - type: file - description: Optional. - pattern: "*.bed" - - knownhot: - type: file - description: Optional. - pattern: "*.bed" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - vcf: - type: file - description: Compressed VCF file - pattern: "*.vcf.gz" - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" - -authors: - - "@RaqManzano" diff --git a/modules/nf-core/samblaster/main.nf b/modules/nf-core/samblaster/main.nf index 225c715..af1f71d 100644 --- a/modules/nf-core/samblaster/main.nf +++ b/modules/nf-core/samblaster/main.nf @@ -2,10 +2,10 @@ process SAMBLASTER { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samblaster=0.1.26 bioconda::samtools=1.15.1" : null) + conda "bioconda::samblaster=0.1.26 bioconda::samtools=1.16.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-19fa9f1a5c3966b63a24166365e81da35738c5ab:fff03944e664bbf9a139f7b174b9cb2d4163271a-0' : - 'quay.io/biocontainers/mulled-v2-19fa9f1a5c3966b63a24166365e81da35738c5ab:fff03944e664bbf9a139f7b174b9cb2d4163271a-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-19fa9f1a5c3966b63a24166365e81da35738c5ab:cee56b506ceb753d4bbef7e05b81e1bfc25d937f-0' : + 'biocontainers/mulled-v2-19fa9f1a5c3966b63a24166365e81da35738c5ab:cee56b506ceb753d4bbef7e05b81e1bfc25d937f-0' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/samblaster/meta.yml b/modules/nf-core/samblaster/meta.yml index 776bef2..f090c89 100644 --- a/modules/nf-core/samblaster/meta.yml +++ b/modules/nf-core/samblaster/meta.yml @@ -10,6 +10,8 @@ description: | options.args3 for the output bam file keywords: - sort + - duplicate marking + - bam tools: - samblaster: description: | @@ -17,7 +19,7 @@ tools: It can also optionally output discordant read pairs and/or split read mappings to separate SAM files, and/or unmapped/clipped reads to a separate FASTQ file. By default, samblaster reads SAM input from stdin and writes SAM to stdout. - homepage: None + documentation: https://github.com/GregoryFaust/samblaster tool_dev_url: https://github.com/GregoryFaust/samblaster doi: "10.1093/bioinformatics/btu314" diff --git a/modules/nf-core/samtools/bam2fq/main.nf b/modules/nf-core/samtools/bam2fq/main.nf index e7b0cf6..858f2ae 100644 --- a/modules/nf-core/samtools/bam2fq/main.nf +++ b/modules/nf-core/samtools/bam2fq/main.nf @@ -53,4 +53,4 @@ process SAMTOOLS_BAM2FQ { END_VERSIONS """ } -} \ No newline at end of file +} diff --git a/modules/nf-core/samtools/bam2fq/meta.yml b/modules/nf-core/samtools/bam2fq/meta.yml index 171ff35..c7ad3ba 100644 --- a/modules/nf-core/samtools/bam2fq/meta.yml +++ b/modules/nf-core/samtools/bam2fq/meta.yml @@ -51,4 +51,4 @@ output: pattern: "*.fq.gz" authors: - - "@lescai" \ No newline at end of file + - "@lescai" diff --git a/modules/nf-core/samtools/collatefastq/main.nf b/modules/nf-core/samtools/collatefastq/main.nf index fa2e3d5..4469faf 100644 --- a/modules/nf-core/samtools/collatefastq/main.nf +++ b/modules/nf-core/samtools/collatefastq/main.nf @@ -52,4 +52,4 @@ process SAMTOOLS_COLLATEFASTQ { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/nf-core/samtools/collatefastq/meta.yml b/modules/nf-core/samtools/collatefastq/meta.yml index efc8c19..b647cba 100644 --- a/modules/nf-core/samtools/collatefastq/meta.yml +++ b/modules/nf-core/samtools/collatefastq/meta.yml @@ -73,4 +73,4 @@ output: authors: - "@lescai" - "@maxulysse" - - "@matthdsm" \ No newline at end of file + - "@matthdsm" diff --git a/modules/nf-core/samtools/convert/main.nf b/modules/nf-core/samtools/convert/main.nf index b9cfad6..29722ba 100644 --- a/modules/nf-core/samtools/convert/main.nf +++ b/modules/nf-core/samtools/convert/main.nf @@ -39,4 +39,4 @@ process SAMTOOLS_CONVERT { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/nf-core/samtools/convert/meta.yml b/modules/nf-core/samtools/convert/meta.yml index 21cdc9f..866c228 100644 --- a/modules/nf-core/samtools/convert/meta.yml +++ b/modules/nf-core/samtools/convert/meta.yml @@ -49,4 +49,4 @@ output: pattern: "*.{version.txt}" authors: - "@FriederikeHanssen" - - "@maxulysse" \ No newline at end of file + - "@maxulysse" diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf index 9ca1dfd..59ed308 100644 --- a/modules/nf-core/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -47,4 +47,4 @@ process SAMTOOLS_FAIDX { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml index 6111f19..957b25e 100644 --- a/modules/nf-core/samtools/faidx/meta.yml +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -54,4 +54,4 @@ output: authors: - "@drpatelh" - "@ewels" - - "@phue" \ No newline at end of file + - "@phue" diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf index b87b210..b75707e 100644 --- a/modules/nf-core/samtools/flagstat/main.nf +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -1,11 +1,11 @@ process SAMTOOLS_FLAGSTAT { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(bam), path(bai) @@ -19,12 +19,24 @@ process SAMTOOLS_FLAGSTAT { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ samtools \\ flagstat \\ - --threads ${task.cpus-1} \\ + --threads ${task.cpus} \\ $bam \\ - > ${bam}.flagstat + > ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.flagstat cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml index 9526906..954225d 100644 --- a/modules/nf-core/samtools/flagstat/meta.yml +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -14,7 +14,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: diff --git a/modules/nf-core/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf index a49ff35..83c7c34 100644 --- a/modules/nf-core/samtools/idxstats/main.nf +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -1,11 +1,11 @@ process SAMTOOLS_IDXSTATS { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(bam), path(bai) @@ -19,11 +19,26 @@ process SAMTOOLS_IDXSTATS { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ samtools \\ idxstats \\ + --threads ${task.cpus-1} \\ $bam \\ - > ${bam}.idxstats + > ${prefix}.idxstats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.idxstats cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/idxstats/meta.yml b/modules/nf-core/samtools/idxstats/meta.yml index 3710ab8..dda87e1 100644 --- a/modules/nf-core/samtools/idxstats/meta.yml +++ b/modules/nf-core/samtools/idxstats/meta.yml @@ -15,7 +15,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index bc4913a..0b20aa4 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -45,4 +45,4 @@ process SAMTOOLS_INDEX { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml index 6037b9e..8bd2fa6 100644 --- a/modules/nf-core/samtools/index/meta.yml +++ b/modules/nf-core/samtools/index/meta.yml @@ -50,4 +50,4 @@ output: authors: - "@drpatelh" - "@ewels" - - "@maxulysse" \ No newline at end of file + - "@maxulysse" diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf index 4d349ad..b73b7cb 100644 --- a/modules/nf-core/samtools/merge/main.nf +++ b/modules/nf-core/samtools/merge/main.nf @@ -53,4 +53,4 @@ process SAMTOOLS_MERGE { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/nf-core/samtools/merge/meta.yml b/modules/nf-core/samtools/merge/meta.yml index 483878d..3a815f7 100644 --- a/modules/nf-core/samtools/merge/meta.yml +++ b/modules/nf-core/samtools/merge/meta.yml @@ -70,4 +70,4 @@ authors: - "@yuukiiwa " - "@maxulysse" - "@FriederikeHanssen" - - "@ramprasadn" \ No newline at end of file + - "@ramprasadn" diff --git a/modules/nf-core/samtools/mpileup/main.nf b/modules/nf-core/samtools/mpileup/main.nf index cfab5c9..d772498 100644 --- a/modules/nf-core/samtools/mpileup/main.nf +++ b/modules/nf-core/samtools/mpileup/main.nf @@ -1,11 +1,11 @@ process SAMTOOLS_MPILEUP { tag "$meta.id" - label 'process_medium' + label 'process_single' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(input), path(intervals) path fasta @@ -26,6 +26,7 @@ process SAMTOOLS_MPILEUP { --fasta-ref $fasta \\ --output ${prefix}.mpileup \\ $args \\ + $intervals \\ $input bgzip ${prefix}.mpileup cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/samtools/mpileup/meta.yml b/modules/nf-core/samtools/mpileup/meta.yml index ae499e9..7597ef4 100644 --- a/modules/nf-core/samtools/mpileup/meta.yml +++ b/modules/nf-core/samtools/mpileup/meta.yml @@ -12,7 +12,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf index b4fc1cb..2b7753f 100644 --- a/modules/nf-core/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -2,16 +2,17 @@ process SAMTOOLS_SORT { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(bam) output: tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("*.csi"), emit: csi, optional: true path "versions.yml" , emit: versions when: @@ -22,7 +23,13 @@ process SAMTOOLS_SORT { def prefix = task.ext.prefix ?: "${meta.id}" if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ - samtools sort $args -@ $task.cpus -o ${prefix}.bam -T $prefix $bam + samtools sort \\ + $args \\ + -@ $task.cpus \\ + -o ${prefix}.bam \\ + -T $prefix \\ + $bam + cat <<-END_VERSIONS > versions.yml "${task.process}": samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml index a820c55..0732843 100644 --- a/modules/nf-core/samtools/sort/meta.yml +++ b/modules/nf-core/samtools/sort/meta.yml @@ -12,7 +12,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: @@ -39,6 +39,10 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - csi: + type: file + description: BAM index file (optional) + pattern: "*.csi" authors: - "@drpatelh" - "@ewels" diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf index d8ca702..4a2607d 100644 --- a/modules/nf-core/samtools/stats/main.nf +++ b/modules/nf-core/samtools/stats/main.nf @@ -46,4 +46,4 @@ process SAMTOOLS_STATS { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml index 9bd0eb1..90e6345 100644 --- a/modules/nf-core/samtools/stats/meta.yml +++ b/modules/nf-core/samtools/stats/meta.yml @@ -56,4 +56,4 @@ output: authors: - "@drpatelh" - "@FriederikeHanssen" - - "@ramprasadn" \ No newline at end of file + - "@ramprasadn" diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf index 883f21e..cb91fac 100644 --- a/modules/nf-core/samtools/view/main.nf +++ b/modules/nf-core/samtools/view/main.nf @@ -63,4 +63,4 @@ process SAMTOOLS_VIEW { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml index 3800bcd..3b05450 100644 --- a/modules/nf-core/samtools/view/meta.yml +++ b/modules/nf-core/samtools/view/meta.yml @@ -81,4 +81,4 @@ authors: - "@drpatelh" - "@joseespinosa" - "@FriederikeHanssen" - - "@priyanka-surana" \ No newline at end of file + - "@priyanka-surana" diff --git a/modules/nf-core/snpeff/meta.yml b/modules/nf-core/snpeff/meta.yml deleted file mode 100644 index 2f0d866..0000000 --- a/modules/nf-core/snpeff/meta.yml +++ /dev/null @@ -1,46 +0,0 @@ -name: snpEff -description: Genetic variant annotation and functional effect prediction toolbox -keywords: - - annotation -tools: - - snpeff: - description: | - SnpEff is a variant annotation and effect prediction tool. - It annotates and predicts the effects of genetic variants on genes and proteins (such as amino acid changes). - homepage: https://pcingola.github.io/SnpEff/ - documentation: https://pcingola.github.io/SnpEff/se_introduction/ - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - vcf: - type: file - description: | - vcf to annotate - - db: - type: value - description: | - which db to annotate with - - cache: - type: file - description: | - path to snpEff cache (optional) -output: - - vcf: - type: file - description: | - annotated vcf - pattern: "*.ann.vcf" - - report: - type: file - description: snpEff report file - pattern: "*.html" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@maxulysse" diff --git a/modules/nf-core/star/align/main.nf b/modules/nf-core/star/align/main.nf index 1c3dfbf..8cb8e9a 100644 --- a/modules/nf-core/star/align/main.nf +++ b/modules/nf-core/star/align/main.nf @@ -2,57 +2,62 @@ process STAR_ALIGN { tag "$meta.id" label 'process_high' - // Note: 2.7X indices incompatible with AWS iGenomes. TODO - // Note2: genomegenerate and align must be on the same STAR version - conda (params.enable_conda ? "bioconda::star=2.7.9a bioconda::samtools=1.15.1 conda-forge::gawk=5.1.0" : null) + conda "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1c4c32d87798d425c970ececfbadd155e7560277-0' : - 'quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1c4c32d87798d425c970ececfbadd155e7560277-0' }" - + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' : + 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }" input: - tuple val(meta), path(reads) - path index - path gtf + tuple val(meta), path(reads, stageAs: "input*/*") + path index + path gtf val star_ignore_sjdbgtf val seq_platform val seq_center output: - tuple val(meta), path('*d.out.bam') , emit: bam tuple val(meta), path('*Log.final.out') , emit: log_final tuple val(meta), path('*Log.out') , emit: log_out tuple val(meta), path('*Log.progress.out'), emit: log_progress path "versions.yml" , emit: versions + tuple val(meta), path('*d.out.bam') , optional:true, emit: bam tuple val(meta), path('*sortedByCoord.out.bam') , optional:true, emit: bam_sorted tuple val(meta), path('*toTranscriptome.out.bam'), optional:true, emit: bam_transcript tuple val(meta), path('*Aligned.unsort.out.bam') , optional:true, emit: bam_unsorted tuple val(meta), path('*fastq.gz') , optional:true, emit: fastq tuple val(meta), path('*.tab') , optional:true, emit: tab + tuple val(meta), path('*.SJ.out.tab') , optional:true, emit: spl_junc_tab + tuple val(meta), path('*.ReadsPerGene.out.tab') , optional:true, emit: read_per_gene_tab tuple val(meta), path('*.out.junction') , optional:true, emit: junction tuple val(meta), path('*.out.sam') , optional:true, emit: sam + tuple val(meta), path('*.wig') , optional:true, emit: wig + tuple val(meta), path('*.bg') , optional:true, emit: bedgraph when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def reads1 = [], reads2 = [] + meta.single_end ? [reads].flatten().each{reads1 << it} : reads.eachWithIndex{ v, ix -> ( ix & 1 ? reads2 : reads1) << v } def ignore_gtf = star_ignore_sjdbgtf ? '' : "--sjdbGTFfile $gtf" + def seq_platform = seq_platform ? "'PL:$seq_platform'" : "" + def seq_center = seq_center ? "'CN:$seq_center'" : "" + def attrRG = args.contains("--outSAMattrRGline") ? "" : "--outSAMattrRGline 'ID:$prefix' $seq_center 'SM:$prefix' $seq_platform" def out_sam_type = (args.contains('--outSAMtype')) ? '' : '--outSAMtype BAM Unsorted' def mv_unsorted_bam = (args.contains('--outSAMtype BAM Unsorted SortedByCoordinate')) ? "mv ${prefix}.Aligned.out.bam ${prefix}.Aligned.unsort.out.bam" : '' """ STAR \\ --genomeDir $index \\ - --readFilesIn $reads \\ + --readFilesIn ${reads1.join(",")} ${reads2.join(",")} \\ --runThreadN $task.cpus \\ --outFileNamePrefix $prefix. \\ $out_sam_type \\ $ignore_gtf \\ - $args \\ - $args2 + $attrRG \\ + $args $mv_unsorted_bam @@ -68,6 +73,37 @@ process STAR_ALIGN { cat <<-END_VERSIONS > versions.yml "${task.process}": star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}Xd.out.bam + touch ${prefix}.Log.final.out + touch ${prefix}.Log.out + touch ${prefix}.Log.progress.out + touch ${prefix}.sortedByCoord.out.bam + touch ${prefix}.toTranscriptome.out.bam + touch ${prefix}.Aligned.unsort.out.bam + touch ${prefix}.Aligned.sortedByCoord.out.bam + touch ${prefix}.unmapped_1.fastq.gz + touch ${prefix}.unmapped_2.fastq.gz + touch ${prefix}.tab + touch ${prefix}.SJ.out.tab + touch ${prefix}.ReadsPerGene.out.tab + touch ${prefix}.Chimeric.out.junction + touch ${prefix}.out.sam + touch ${prefix}.Signal.UniqueMultiple.str1.out.wig + touch ${prefix}.Signal.UniqueMultiple.str1.out.bg + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') END_VERSIONS """ } diff --git a/modules/nf-core/star/align/meta.yml b/modules/nf-core/star/align/meta.yml index 7ee10f1..bce16d3 100644 --- a/modules/nf-core/star/align/meta.yml +++ b/modules/nf-core/star/align/meta.yml @@ -74,6 +74,14 @@ output: type: file description: STAR chimeric junction output file (optional) pattern: "*.out.junction" + - wig: + type: file + description: STAR output wiggle format file(s) (optional) + pattern: "*.wig" + - bedgraph: + type: file + description: STAR output bedGraph format file(s) (optional) + pattern: "*.bg" authors: - "@kevinmenden" diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf index e5568f1..2407d00 100644 --- a/modules/nf-core/star/genomegenerate/main.nf +++ b/modules/nf-core/star/genomegenerate/main.nf @@ -2,19 +2,18 @@ process STAR_GENOMEGENERATE { tag "$fasta" label 'process_high' - // Note: 2.7X indices incompatible with AWS iGenomes. - conda (params.enable_conda ? "bioconda::star=2.7.9a bioconda::samtools=1.15.1 conda-forge::gawk=5.1.0" : null) + conda "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1c4c32d87798d425c970ececfbadd155e7560277-0' : - 'quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1c4c32d87798d425c970ececfbadd155e7560277-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' : + 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }" input: path fasta path gtf output: - path "star" , emit: index - path "versions.yml" , emit: versions + path "star" , emit: index + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -22,7 +21,7 @@ process STAR_GENOMEGENERATE { script: def args = task.ext.args ?: '' def args_list = args.tokenize() - def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' if (args_list.contains('--genomeSAindexNbases')) { """ mkdir star @@ -66,4 +65,32 @@ process STAR_GENOMEGENERATE { END_VERSIONS """ } + + stub: + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/exonGeTrInfo.tab + touch star/exonInfo.tab + touch star/geneInfo.tab + touch star/genomeParameters.txt + touch star/sjdbInfo.txt + touch star/sjdbList.fromGTF.out.tab + touch star/sjdbList.out.tab + touch star/transcriptInfo.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/strelka/somatic/main.nf b/modules/nf-core/strelka/somatic/main.nf index 222279c..17d700c 100644 --- a/modules/nf-core/strelka/somatic/main.nf +++ b/modules/nf-core/strelka/somatic/main.nf @@ -1,53 +1,55 @@ process STRELKA_SOMATIC { tag "$meta.id" - label 'process_high' + label 'process_medium' + label 'error_retry' - conda (params.enable_conda ? "bioconda::strelka=2.9.10" : null) + conda "bioconda::strelka=2.9.10" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/strelka:2.9.10--h9ee0642_1' : - 'quay.io/biocontainers/strelka:2.9.10--h9ee0642_1' }" + 'biocontainers/strelka:2.9.10--h9ee0642_1' }" input: - tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor), path(manta_candidate_small_indels), path(manta_candidate_small_indels_tbi), path(target_bed), path(target_bed_index) - path fasta - path fai + tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor), path(manta_candidate_small_indels), path(manta_candidate_small_indels_tbi), path(target_bed), path(target_bed_index) + path fasta + path fai output: - tuple val(meta), path("*.somatic_indels.vcf.gz") , emit: vcf_indels - tuple val(meta), path("*.somatic_indels.vcf.gz.tbi"), emit: vcf_indels_tbi - tuple val(meta), path("*.somatic_snvs.vcf.gz") , emit: vcf_snvs - tuple val(meta), path("*.somatic_snvs.vcf.gz.tbi") , emit: vcf_snvs_tbi - path "versions.yml" , emit: versions + tuple val(meta), path("*.somatic_indels.vcf.gz") , emit: vcf_indels + tuple val(meta), path("*.somatic_indels.vcf.gz.tbi"), emit: vcf_indels_tbi + tuple val(meta), path("*.somatic_snvs.vcf.gz") , emit: vcf_snvs + tuple val(meta), path("*.somatic_snvs.vcf.gz.tbi") , emit: vcf_snvs_tbi + path "versions.yml" , emit: versions when: - task.ext.when == null || task.ext.when + task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def options_target_bed = target_bed ? "--callRegions ${target_bed}" : "" - def options_manta = manta_candidate_small_indels ? "--indelCandidates ${manta_candidate_small_indels}" : "" - """ - - configureStrelkaSomaticWorkflow.py \\ - --tumor $input_tumor \\ - --normal $input_normal \\ - --referenceFasta $fasta \\ - ${options_target_bed} \\ - ${options_manta} \\ - $args \\ - --runDir strelka - - python strelka/runWorkflow.py -m local -j $task.cpus - - mv strelka/results/variants/somatic.indels.vcf.gz ${prefix}.somatic_indels.vcf.gz - mv strelka/results/variants/somatic.indels.vcf.gz.tbi ${prefix}.somatic_indels.vcf.gz.tbi - mv strelka/results/variants/somatic.snvs.vcf.gz ${prefix}.somatic_snvs.vcf.gz - mv strelka/results/variants/somatic.snvs.vcf.gz.tbi ${prefix}.somatic_snvs.vcf.gz.tbi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - strelka: \$( configureStrelkaSomaticWorkflow.py --version ) - END_VERSIONS - """ + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def options_target_bed = target_bed ? "--callRegions ${target_bed}" : "" + def options_manta = manta_candidate_small_indels ? "--indelCandidates ${manta_candidate_small_indels}" : "" + """ + + configureStrelkaSomaticWorkflow.py \\ + --tumor $input_tumor \\ + --normal $input_normal \\ + --referenceFasta $fasta \\ + ${options_target_bed} \\ + ${options_manta} \\ + $args \\ + --runDir strelka + + sed -i s/"isEmail = isLocalSmtp()"/"isEmail = False"/g strelka/runWorkflow.py + + python strelka/runWorkflow.py -m local -j $task.cpus + mv strelka/results/variants/somatic.indels.vcf.gz ${prefix}.somatic_indels.vcf.gz + mv strelka/results/variants/somatic.indels.vcf.gz.tbi ${prefix}.somatic_indels.vcf.gz.tbi + mv strelka/results/variants/somatic.snvs.vcf.gz ${prefix}.somatic_snvs.vcf.gz + mv strelka/results/variants/somatic.snvs.vcf.gz.tbi ${prefix}.somatic_snvs.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + strelka: \$( configureStrelkaSomaticWorkflow.py --version ) + END_VERSIONS + """ } diff --git a/modules/nf-core/tabix/bgziptabix/main.nf b/modules/nf-core/tabix/bgziptabix/main.nf index 330f038..d6c5a76 100644 --- a/modules/nf-core/tabix/bgziptabix/main.nf +++ b/modules/nf-core/tabix/bgziptabix/main.nf @@ -44,4 +44,4 @@ process TABIX_BGZIPTABIX { tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/nf-core/tabix/bgziptabix/meta.yml b/modules/nf-core/tabix/bgziptabix/meta.yml index 1745d97..2761e27 100644 --- a/modules/nf-core/tabix/bgziptabix/meta.yml +++ b/modules/nf-core/tabix/bgziptabix/meta.yml @@ -47,4 +47,4 @@ output: pattern: "versions.yml" authors: - "@maxulysse" - - "@DLBPointon" \ No newline at end of file + - "@DLBPointon" diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf index e155e46..5bf332e 100644 --- a/modules/nf-core/tabix/tabix/main.nf +++ b/modules/nf-core/tabix/tabix/main.nf @@ -1,11 +1,11 @@ process TABIX_TABIX { tag "$meta.id" - label 'process_medium' + label 'process_single' - conda (params.enable_conda ? 'bioconda::tabix=1.11' : null) + conda "bioconda::tabix=1.11" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : - 'quay.io/biocontainers/tabix:1.11--hdfd78af_0' }" + 'biocontainers/tabix:1.11--hdfd78af_0' }" input: tuple val(meta), path(tab) diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index 058d176..61461c3 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -1,18 +1,18 @@ process UNTAR { tag "$archive" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + conda "conda-forge::sed=4.7 conda-forge::grep=3.11 conda-forge::tar=1.34" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: tuple val(meta), path(archive) output: - tuple val(meta), path("$untar"), emit: untar - path "versions.yml" , emit: versions + tuple val(meta), path("$prefix"), emit: untar + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -20,13 +20,28 @@ process UNTAR { script: def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' - untar = archive.toString() - '.tar.gz' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + """ - tar \\ - -xzvf \\ - $args \\ - $archive \\ - $args2 \\ + mkdir $prefix + + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + tar \\ + -C $prefix --strip-components 1 \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + else + tar \\ + -C $prefix \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + fi cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -35,9 +50,10 @@ process UNTAR { """ stub: - untar = archive.toString() - '.tar.gz' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) """ - touch $untar + mkdir $prefix + touch ${prefix}/file.txt cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml index d426919..db241a6 100644 --- a/modules/nf-core/untar/meta.yml +++ b/modules/nf-core/untar/meta.yml @@ -3,6 +3,7 @@ description: Extract files. keywords: - untar - uncompress + - extract tools: - untar: description: | @@ -26,9 +27,9 @@ output: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - untar: - type: file - description: - pattern: "*.*" + type: directory + description: Directory containing contents of archive + pattern: "*/" - versions: type: file description: File containing software versions @@ -36,3 +37,5 @@ output: authors: - "@joseespinosa" - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/modules/nf-core/unzip/main.nf b/modules/nf-core/unzip/main.nf index 8476a2e..cf977f1 100644 --- a/modules/nf-core/unzip/main.nf +++ b/modules/nf-core/unzip/main.nf @@ -1,29 +1,31 @@ process UNZIP { tag "$archive" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::p7zip=15.09" : null) + conda "conda-forge::p7zip=16.02" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/p7zip:15.09--h2d50403_4' : - 'quay.io/biocontainers/p7zip:15.09--h2d50403_4' }" + 'https://depot.galaxyproject.org/singularity/p7zip:16.02' : + 'biocontainers/p7zip:16.02' }" input: tuple val(meta), path(archive) output: - tuple val(meta), path("${archive.baseName}/"), emit: unzipped_archive - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}/"), emit: unzipped_archive + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - if ( archive instanceof List && archive.name.size > 1 ) { exit 1, "[UNZIP] error: 7za only accepts a single archive as input. Please check module input." } + if ( archive instanceof List && archive.name.size > 1 ) { error "[UNZIP] error: 7za only accepts a single archive as input. Please check module input." } + + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName) """ 7za \\ - e \\ - -o"${archive.baseName}"/ \\ + x \\ + -o"${prefix}"/ \\ $args \\ $archive diff --git a/modules/nf-core/unzip/meta.yml b/modules/nf-core/unzip/meta.yml index f924bfb..2910e0f 100644 --- a/modules/nf-core/unzip/meta.yml +++ b/modules/nf-core/unzip/meta.yml @@ -3,6 +3,8 @@ description: Unzip ZIP archive files keywords: - unzip - decompression + - zip + - archiving tools: - unzip: description: p7zip is a quick port of 7z.exe and 7za.exe (command line version of 7zip, see www.7-zip.org) for Unix. diff --git a/modules/nf-core/vcflib/filter/main.nf b/modules/nf-core/vcflib/filter/main.nf deleted file mode 100644 index 91d6f4e..0000000 --- a/modules/nf-core/vcflib/filter/main.nf +++ /dev/null @@ -1,34 +0,0 @@ -process VCFFILTER { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::vcftools=0.1.16" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/vcflib:1.0.3--hecb563c_1' : - 'quay.io/biocontainers/vcflib:1.0.3--hecb563c_1' }" - - input: - tuple val(meta), path(vcf) - - output: - tuple val(meta), path("*.vcf.gz") , emit: vcf - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - - gunzip -c $vcf | vcffilter \\ - $args > ${prefix}.vcf - gzip ${prefix}.vcf - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - vcffilter: \$(echo \$(vcffilter 2>&1) | sed 's/^.*vcflib ( //;s/).*//' | cut -f2 -d ' ') - END_VERSIONS - """ -} diff --git a/modules/nf-core/vcflib/filter/meta.yml b/modules/nf-core/vcflib/filter/meta.yml deleted file mode 100644 index 70acca0..0000000 --- a/modules/nf-core/vcflib/filter/meta.yml +++ /dev/null @@ -1,45 +0,0 @@ -name: vcffilter -description: VCF filter the specified vcf file using the set of filters - -keywords: - - VCF - - filter - - variant calling -tools: - - vcffilter: - description: VCF filter the specified vcf file using the set of filters - homepage: https://github.com/vcflib/vcflib - documentation: https://github.com/vcflib/vcflib - tool_dev_url: https://github.com/vcflib/vcflib - doi: https://doi.org/10.1101/2021.05.21.445151 - licence: ["MIT"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - vcf: - type: file - description: vcf file (optional) - pattern: "*.vcf.gz" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - vcf: - type: file - description: vcf file (optional) - pattern: "*.vcf.gz" - - -authors: - - "@RaqManzano" diff --git a/modules/nf-core/vcftools/main.nf b/modules/nf-core/vcftools/main.nf index 78b95fa..cf6d296 100644 --- a/modules/nf-core/vcftools/main.nf +++ b/modules/nf-core/vcftools/main.nf @@ -1,11 +1,11 @@ process VCFTOOLS { tag "$meta.id" - label 'process_medium' + label 'process_single' - conda (params.enable_conda ? "bioconda::vcftools=0.1.16" : null) + conda "bioconda::vcftools=0.1.16" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/vcftools:0.1.16--he513fc3_4' : - 'quay.io/biocontainers/vcftools:0.1.16--he513fc3_4' }" + 'biocontainers/vcftools:0.1.16--he513fc3_4' }" input: // Owing to the nature of vcftools we here provide solutions to working with optional bed files and optional diff --git a/modules/nf-core/vcftools/meta.yml b/modules/nf-core/vcftools/meta.yml index 7a85bde..04b786f 100644 --- a/modules/nf-core/vcftools/meta.yml +++ b/modules/nf-core/vcftools/meta.yml @@ -1,14 +1,13 @@ name: vcftools description: A set of tools written in Perl and C++ for working with VCF files -keywords: VCF +keywords: + - VCF - sort tools: - vcftools: description: A set of tools written in Perl and C++ for working with VCF files. This package only contains the C++ libraries whereas the package perl-vcftools-vcf contains the perl libraries homepage: http://vcftools.sourceforge.net/ documentation: http://vcftools.sourceforge.net/man_latest.html - tool_dev_url: None - doi: licence: ["LGPL"] input: diff --git a/nextflow.config b/nextflow.config index eda1e07..abe78c8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -44,7 +44,6 @@ params { save_align_intermeds = false bwa = null bwamem2 = null - dragmap = null hisat2_build_memory = null @@ -63,11 +62,6 @@ params { save_trimmed_fail = false save_split_fastqs = false - // UMI tagged reads - umi_read_structure = null // no UMI - group_by_umi_strategy = 'Adjacency' // default strategy when running with UMI for GROUPREADSBYUMI - - // Variant calling no_intervals = false intervals = null @@ -88,8 +82,6 @@ params { ignore_soft_clipped_bases = true // Variant annotation tools = null // No default Variant_Calling or Annotation tools - annotate_tools = null // List of annotation tools to run - only vep available - annotation_cache = false // Annotation cache disabled genesplicer = null // genesplicer disabled within VEP vep_cache = null // No directory for VEP cache vep_genome = null // No default genome for VEP @@ -122,9 +114,6 @@ params { hook_url = null help = false version = false - show_hidden_params = false - schema_ignore_params = 'genomes' - enable_conda = false // Config options config_profile_name = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 19e2582..2de8d4e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -110,6 +110,44 @@ "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\nIf you wish to recompute indices available on igenomes, set `--bwamem2 false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference, if `--aligner bwa-mem2` is specified. Combine with `--save_reference` to save for future runs.", "hidden": true }, + "star_index": { + "type": "string", + "description": "Path to STAR index folder or compressed file (tar.gz)", + "help_text": "This parameter can be used if there is an pre-defined STAR index available. You can either give the full path to the index directory or a compressed file in tar.gz format." + }, + "star_twopass": { + "type": "boolean", + "description": "Enable STAR 2-pass mapping mode.", + "help_text": "This parameter enables STAR to perform 2-pass mapping. Default true." + }, + "star_ignore_sjdbgtf": { + "type": "boolean", + "description": "Do not use GTF file during STAR index buidling step", + "help_text": "Do not use parameter --sjdbGTFfile during the STAR genomeGenerate process." + }, + "star_max_memory_bamsort": { + "type": "integer", + "default": 0, + "description": "Option to limit RAM when sorting BAM file. Value to be specified in bytes. If 0, will be set to the genome index size.", + "help_text": "This parameter specifies the maximum available RAM (bytes) for sorting BAM during STAR alignment." + }, + "star_bins_bamsort": { + "type": "integer", + "default": 50, + "description": "Specifies the number of genome bins for coordinate-sorting", + "help_text": "This parameter specifies the number of bins to be used for coordinate sorting during STAR alignment step." + }, + "star_max_collapsed_junc": { + "type": "integer", + "default": 1000000, + "description": "Specifies the maximum number of collapsed junctions" + }, + "read_length": { + "type": "number", + "default": 151.0, + "description": "Read length", + "help_text": "Specify the read length for the STAR aligner." + }, "nucleotides_per_second": { "type": "number", "fa_icon": "fas fa-clock", @@ -117,6 +155,27 @@ "help_text": "Intervals are parts of the chopped up genome used to speed up preprocessing and variant calling. See `--intervals` for more info. \n\nChanging this parameter, changes the number of intervals that are grouped and processed together. Bed files from target sequencing can contain thousands or small intervals. Spinning up a new process for each can be quite resource intensive. Instead it can be desired to process small intervals together on larger nodes. \nIn order to make use of this parameter, no runtime estimate can be present in the bed file (column 5). ", "default": 1000.0 }, + "dbsnp": { + "type": "string", + "fa_icon": "fas fa-file", + "description": "Path to dbsnp file.", + "hidden": true, + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." + }, + "dbsnp_tbi": { + "type": "string", + "fa_icon": "fas fa-file", + "description": "Path to dbsnp index.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\n> **NB** If none provided, will be generated automatically from the dbsnp file. Combine with `--save_reference` to save for future runs.", + "hidden": true + }, + "dict": { + "type": "string", + "fa_icon": "fas fa-file", + "description": "Path to FASTA dictionary file.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs.", + "hidden": true + }, "fasta": { "type": "string", "format": "file-path", @@ -124,12 +183,14 @@ "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\nThis parameter is *mandatory* if `--genome` is not specified.", "fa_icon": "far fa-file-code" }, "fasta_fai": { "type": "string", - "default": "None" + "fa_icon": "fas fa-file", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs.", + "description": "Path to FASTA reference index." }, "germline_resource": { "type": "string", @@ -145,6 +206,20 @@ "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\n> **NB** If none provided, will be generated automatically from the Germline Resource file, if provided. Combine with `--save_reference` to save for future runs.", "hidden": true }, + "known_indels": { + "type": "string", + "fa_icon": "fas fa-copy", + "description": "Path to known indels file.", + "hidden": true, + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." + }, + "known_indels_tbi": { + "type": "string", + "fa_icon": "fas fa-copy", + "description": "Path to known indels file index.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\n> **NB** If none provided, will be generated automatically from the known index file, if provided. Combine with `--save_reference` to save for future runs.", + "hidden": true + }, "known_snps": { "type": "string", "fa_icon": "fas fa-copy", @@ -156,15 +231,51 @@ "description": "Path to known snps file snps.", "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\n> **NB** If none provided, will be generated automatically from the known index file, if provided. Combine with `--save_reference` to save for future runs." }, + "vep_genome": { + "type": "string", + "fa_icon": "fas fa-microscope", + "description": "VEP genome.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nThis is used to specify the genome when using the container with pre-downloaded cache.", + "hidden": true + }, + "vep_species": { + "type": "string", + "fa_icon": "fas fa-microscope", + "description": "VEP species.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nAlternatively species listed in Ensembl Genomes caches can be used.", + "hidden": true + }, + "vep_cache_version": { + "type": "number", + "fa_icon": "fas fa-tag", + "description": "VEP cache version.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nAlternatively cache version can be use to specify the correct Ensembl Genomes version number as these differ from the concurrent Ensembl/VEP version numbers", + "hidden": true + }, + "vep_version": { + "type": "string", + "fa_icon": "fas fa-tag", + "description": "VEP version.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nThis is used to specify the VEP version when using the container with pre-downloaded cache.", + "hidden": true + }, "save_reference": { "type": "boolean", "fa_icon": "fas fa-download", "description": "Save built references.", "help_text": "Set this parameter, if you wish to save all computed reference files. This is useful to avoid re-computation on future runs." }, - "dict": { - "type": "string", - "default": "None" + "build_only_index": { + "type": "boolean", + "fa_icon": "fas fa-download", + "description": "Only built references.", + "help_text": "Set this parameter, if you wish to compute and save all computed reference files. No alignment or any other downstream steps will be performed." + }, + "download_cache": { + "type": "boolean", + "fa_icon": "fas fa-download", + "description": "Download annotation cache.", + "help_text": "Set this parameter, if you wish to download annotation cache." }, "igenomes_base": { "type": "string", @@ -181,6 +292,14 @@ "hidden": true, "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." }, + "hisat2_build_memory": { + "type": "string", + "default": "200.GB", + "fa_icon": "fas fa-memory", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", + "description": "Minimum memory required to use splice sites and exons in the HiSAT2 index build process.", + "help_text": "HiSAT2 requires a huge amount of RAM to build a genome index for larger genomes, if including splice sites and exons e.g. the human genome might typically require 200GB. If you specify less than this threshold for the `HISAT2_BUILD` process then the splice sites and exons will be ignored, meaning that the process will require a lot less memory. If you are working with a small genome, set this parameter to a lower value to reduce the threshold for skipping this check. If using a larger genome, consider supplying more memory to the `HISAT2_BUILD` process." + }, "gtf": { "type": "string", "description": "Path to GTF annotation file.", @@ -194,45 +313,9 @@ "exon_bed": { "type": "string", "description": "Path to BED file containing exon intervals. This will be created from the GTF file if not specified." - }, - "read_length": { - "type": "number", - "default": 151.0, - "description": "Read length", - "help_text": "Specify the read length for the STAR aligner." - }, - "known_indels": { - "type": "string", - "description": "Path to known indels VCF file" - }, - "known_indels_tbi": { - "type": "string", - "description": "Path to known indels index file" - }, - "dbsnp": { - "type": "string", - "description": "Path to dbSNP VCF file" - }, - "dbsnp_tbi": { - "type": "string", - "description": "Path to dbSNP VCF index file" - }, - "dragmap": { - "type": "string", - "fa_icon": "fas fa-copy", - "description": "Path to dragmap indices.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\nIf you wish to recompute indices available on igenomes, set `--dragmap false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference, if `--aligner dragmap` is specified. Combine with `--save_reference` to save for future runs.", - "hidden": true - }, - "hisat2_build_memory": { - "type": "string", - "default": "200.GB", - "fa_icon": "fas fa-memory", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "description": "Minimum memory required to use splice sites and exons in the HiSAT2 index build process.", - "help_text": "HiSAT2 requires a huge amount of RAM to build a genome index for larger genomes, if including splice sites and exons e.g. the human genome might typically require 200GB. If you specify less than this threshold for the `HISAT2_BUILD` process then the splice sites and exons will be ignored, meaning that the process will require a lot less memory. If you are working with a small genome, set this parameter to a lower value to reduce the threshold for skipping this check. If using a larger genome, consider supplying more memory to the `HISAT2_BUILD` process." } - } + }, + "help_text": "The pipeline config files come bundled with paths to the Illumina iGenomes reference index files.\nThe configuration is set up to use the AWS-iGenomes resource\ncf https://ewels.github.io/AWS-iGenomes/." }, "fastq_preprocessing": { "title": "FASTQ Preprocessing", @@ -491,35 +574,64 @@ "fa_icon": "fas fa-file", "description": "Known hotspots used as input in Sage variant caller", "hidden": true + }, + "ignore_soft_clipped_bases": { + "type": "boolean", + "fa_icon": "fas fa-ban", + "description": "Do not analyze soft clipped bases in the reads for GATK Mutect2.", + "help_text": "use the `--dont-use-soft-clipped-bases` params with GATK Mutect2.", + "hidden": true } } }, - "variant_annotation": { - "title": "Variant Annotation", + "annotation": { + "title": "Annotation", "type": "object", "description": "", "default": "", "fa_icon": "fas fa-toolbox", "properties": { - "annotate_tools": { - "type": "string", - "fa_icon": "fas fa-hammer", - "description": "Specify which tools RNADNAvar should use for annotating variants. Only VEP implemented.", - "help_text": "List of tools to be used for variant annotation.", - "pattern": "^((vep)*(,)*)*$", - "hidden": true + "vep_include_fasta": { + "type": "boolean", + "fa_icon": "fas fa-file", + "description": "Allow usage of fasta file for annotation with VEP", + "hidden": true, + "help_text": "By pointing VEP to a FASTA file, it is possible to retrieve reference sequence locally. This enables VEP to retrieve HGVS notations (--hgvs), check the reference sequence given in input data, and construct transcript models from a GFF or GTF file without accessing a database.\n\nFor details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_cache.html#fasta)." }, - "annotation_cache": { + "vep_dbnsfp": { "type": "boolean", "fa_icon": "fas fa-database", - "description": "Enable the use of cache for annotation", - "help_text": "And disable usage of vep specific containers for annotation\n\nTo be used with `--vep_cache`", + "description": "Enable the use of the VEP dbNSFP plugin.", + "hidden": true, + "help_text": "For details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#dbnsfp)." + }, + "dbnsfp": { + "type": "string", + "fa_icon": "fas fa-file", + "description": "Path to dbNSFP processed file.", + "help_text": "To be used with `--vep_dbnsfp`.\ndbNSFP files and more information are available at https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#dbnsfp and https://sites.google.com/site/jpopgen/dbNSFP/", "hidden": true }, - "genesplicer": { - "type": "boolean", - "fa_icon": "fas fa-gavel", - "description": "Enable the use of the VEP GeneSplicer plugin.", + "dbnsfp_tbi": { + "type": "string", + "fa_icon": "fas fa-file", + "description": "Path to dbNSFP tabix indexed file.", + "help_text": "To be used with `--vep_dbnsfp`.", + "hidden": true + }, + "dbnsfp_consequence": { + "type": "string", + "fa_icon": "fas fa-arrow-alt-circle-right", + "description": "Consequence to annotate with", + "help_text": "To be used with `--vep_dbnsfp`.\nThis params is used to filter/limit outputs to a specific effect of the variant.\nThe set of consequence terms is defined by the Sequence Ontology and an overview of those used in VEP can be found here: https://www.ensembl.org/info/genome/variation/prediction/predicted_data.html\nIf one wants to filter using several consequences, then separate those by using '&' (i.e. 'consequence=3_prime_UTR_variant&intron_variant'.", + "hidden": true + }, + "dbnsfp_fields": { + "type": "string", + "fa_icon": "fas fa-border-all", + "description": "Fields to annotate with", + "default": "rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF", + "help_text": "To be used with `--vep_dbnsfp`.\nThis params can be used to retrieve individual values from the dbNSFP file. The values correspond to the name of the columns in the dbNSFP file and are separated by comma.\nThe column names might differ between the different dbNSFP versions. Please check the Readme.txt file, which is provided with the dbNSFP file, to obtain the correct column names. The Readme file contains also a short description of the provided values and the version of the tools used to generate them.\n\nDefault value are explained below:\n\nrs_dbSNP - rs number from dbSNP\nHGVSc_VEP - HGVS coding variant presentation from VEP. Multiple entries separated by ';', corresponds to Ensembl_transcriptid\nHGVSp_VEP - HGVS protein variant presentation from VEP. Multiple entries separated by ';', corresponds to Ensembl_proteinid\n1000Gp3_EAS_AF - Alternative allele frequency in the 1000Gp3 East Asian descendent samples\n1000Gp3_AMR_AF - Alternative allele counts in the 1000Gp3 American descendent samples\nLRT_score - Original LRT two-sided p-value (LRTori), ranges from 0 to 1\nGERP++_RS - Conservation score. The larger the score, the more conserved the site, ranges from -12.3 to 6.17\ngnomAD_exomes_AF - Alternative allele frequency in the whole gnomAD exome samples.", "hidden": true }, "vep_loftee": { @@ -529,19 +641,11 @@ "hidden": true, "help_text": "For details, see [here](https://github.com/konradjk/loftee)." }, - "vep_cache": { - "type": "string", - "fa_icon": "fas fa-database", - "description": "Path to VEP cache", - "help_text": "To be used with `--annotation_cache`", - "hidden": true - }, - "vep_dbnsfp": { + "genesplicer": { "type": "boolean", "fa_icon": "fas fa-database", - "description": "Enable the use of the VEP dbNSFP plugin.", - "hidden": true, - "help_text": "For details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#dbnsfp)." + "description": "Enable the use of the VEP genesplicer plugin.", + "help_text": "For details, see [here](https://ccb.jhu.edu/software/genesplicer/)." }, "vep_spliceai": { "type": "boolean", @@ -587,32 +691,38 @@ }, "vep_custom_args": { "type": "string", + "default": "--everything --filter_common --per_gene --total_length --offline --format vcf", "fa_icon": "fas fa-toolbox", "description": "Add an extra custom argument to VEP.", "hidden": true, "help_text": "Using this params you can add custom args to VEP." }, - "vep_genome": { - "type": "string", - "fa_icon": "fas fa-microscope", - "description": "VEP genome", - "help_text": "If you use AWS iGenomes or a local resource with genomes.conf, this has already been set for you appropriately." - }, - "vep_species": { + "vep_cache": { "type": "string", - "fa_icon": "fas fa-microscope", - "description": "VEP species", - "help_text": "If you use AWS iGenomes or a local resource with genomes.conf, this has already been set for you appropriately." + "fa_icon": "fas fa-file", + "description": "Path to VEP cache.", + "help_text": "To be used with `--annotation_cache`.", + "hidden": true }, - "vep_cache_version": { + "outdir_cache": { "type": "string", - "fa_icon": "fas fa-tag", - "description": "VEP cache version" + "format": "directory-path", + "description": "The output directory where the cache will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open", + "hidden": true }, "vep_out_format": { "type": "string", - "fa_icon": "fas fa-tag", - "description": "VEP output format" + "default": "vcf", + "description": "VEP output-file format.", + "enum": [ + "json", + "tab", + "vcf" + ], + "help_text": "Sets the format of the output-file from VEP. Available formats: json, tab and vcf.", + "fa_icon": "fas fa-table", + "hidden": true }, "whitelist": { "type": "string", @@ -623,6 +733,11 @@ "type": "string", "fa_icon": "fas fa-database", "description": "Path to BED file with positions to blacklist during filtering (e.g. regions difficult to map)" + }, + "annotation_cache": { + "type": "string", + "fa_icon": "fas fa-database", + "description": "" } } }, @@ -671,6 +786,27 @@ "description": "Institutional config URL link.", "hidden": true, "fa_icon": "fas fa-users-cog" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/sarek3", + "description": "Base path / URL for data used in the test profiles", + "help_text": "Warning: The `-profile test` samplesheet file itself contains remote paths. Setting this parameter does not alter the contents of that file.", + "hidden": true + }, + "seq_center": { + "type": "string", + "fa_icon": "fas fa-university", + "description": "Sequencing center information to be added to read group (CN field).", + "hidden": true + }, + "seq_platform": { + "type": "string", + "fa_icon": "fas fa-university", + "default": "ILLUMINA", + "description": "Sequencing platform information to be added to read group (PL field).", + "help_text": "Default: ILLUMINA. Will be used to create a proper header for further GATK4 downstream analysis.", + "hidden": true } } }, @@ -687,7 +823,7 @@ "default": 16, "fa_icon": "fas fa-microchip", "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" + "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`." }, "max_memory": { "type": "string", @@ -696,7 +832,7 @@ "fa_icon": "fas fa-memory", "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", "hidden": true, - "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" + "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`." }, "max_time": { "type": "string", @@ -705,7 +841,7 @@ "fa_icon": "far fa-clock", "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, - "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" + "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`." } } }, @@ -720,14 +856,12 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "version": { "type": "boolean", "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "publish_dir_mode": { @@ -746,6 +880,13 @@ ], "hidden": true }, + "email": { + "type": "string", + "description": "Email address for completion summary.", + "fa_icon": "fas fa-envelope", + "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + }, "email_on_fail": { "type": "string", "description": "Email address for completion summary, only when pipeline fails.", @@ -758,7 +899,6 @@ "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", - "default": false, "hidden": true }, "max_multiqc_email_size": { @@ -773,15 +913,12 @@ "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", - "default": false, "hidden": true }, - "hook_url": { + "multiqc_title": { "type": "string", - "description": "Incoming hook URL for messaging service", - "fa_icon": "fas fa-people-group", - "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", - "hidden": true + "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", + "fa_icon": "fas fa-file-signature" }, "multiqc_config": { "type": "string", @@ -812,7 +949,6 @@ "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", - "default": false, "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, @@ -820,7 +956,6 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters fails when an unrecognised parameter is found.", - "default": false, "hidden": true, "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." }, @@ -828,9 +963,15 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters in lenient more.", - "default": false, "hidden": true, "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", + "hidden": true } } } @@ -855,7 +996,7 @@ "$ref": "#/definitions/variant_calling" }, { - "$ref": "#/definitions/variant_annotation" + "$ref": "#/definitions/annotation" }, { "$ref": "#/definitions/institutional_config_options" diff --git a/subworkflows/nf-core/fgbio_create_umi_consensus/main.nf b/subworkflows/nf-core/fgbio_create_umi_consensus/main.nf deleted file mode 100644 index 030397a..0000000 --- a/subworkflows/nf-core/fgbio_create_umi_consensus/main.nf +++ /dev/null @@ -1,67 +0,0 @@ -// -// Runs FGBIO tools to remove UMI tags from FASTQ reads -// Convert them to unmapped BAM file, map them to the reference genome, -// use the mapped information to group UMIs and generate consensus reads -// -// For all modules here: -// A when clause condition is defined in the conf/modules.config to determine if the module should be run - -include { FGBIO_CALLMOLECULARCONSENSUSREADS as CALLUMICONSENSUS } from '../../../modules/nf-core/modules/fgbio/callmolecularconsensusreads/main.nf' -include { FGBIO_FASTQTOBAM as FASTQTOBAM } from '../../../modules/nf-core/modules/fgbio/fastqtobam/main' -include { FGBIO_GROUPREADSBYUMI as GROUPREADSBYUMI } from '../../../modules/nf-core/modules/fgbio/groupreadsbyumi/main' -include { GATK4_MAPPING as MAPPING_UMI } from '../gatk4/mapping/main' -include { SAMBLASTER } from '../../../modules/nf-core/modules/samblaster/main' -include { SAMTOOLS_BAM2FQ as BAM2FASTQ } from '../../../modules/nf-core/modules/samtools/bam2fq/main.nf' - -workflow CREATE_UMI_CONSENSUS { - take: - reads // channel: [mandatory] [ val(meta), [ reads ] ] - fasta // channel: [mandatory] /path/to/reference/fasta - map_index // channel: [mandatory] Pre-computed mapping index - read_structure // string: [mandatory] "read_structure" - groupreadsbyumi_strategy // string: [mandatory] grouping strategy - default: "Adjacency" - - main: - ch_versions = Channel.empty() - - // using information in val(read_structure) FASTQ reads are converted into - // a tagged unmapped BAM file (uBAM) - FASTQTOBAM(reads, read_structure) - - // in order to map uBAM using BWA MEM, we need to convert uBAM to FASTQ - // TODO check if DRAGMAP works well with BAM inputs - // but keep the appropriate UMI tags in the FASTQ comment field and produce - // an interleaved FASQT file (hence, split = false) - split = false - BAM2FASTQ(FASTQTOBAM.out.umibam, split) - - // appropriately tagged interleaved FASTQ reads are mapped to the reference - // bams will not be sorted (hence, sort = false) - sort = false - MAPPING_UMI(BAM2FASTQ.out.reads, map_index, sort) - - // samblaster is used in order to tag mates information in the BAM file - // this is used in order to group reads by UMI - SAMBLASTER(MAPPING_UMI.out.bam) - - // appropriately tagged reads are now grouped by UMI information - GROUPREADSBYUMI(SAMBLASTER.out.bam, groupreadsbyumi_strategy) - - // Using newly created groups - // To call a consensus across reads in the same group - // And emit a consensus BAM file - CALLUMICONSENSUS(GROUPREADSBYUMI.out.bam) - - ch_versions = ch_versions.mix(BAM2FASTQ.out.versions) - ch_versions = ch_versions.mix(MAPPING_UMI.out.versions) - ch_versions = ch_versions.mix(CALLUMICONSENSUS.out.versions) - ch_versions = ch_versions.mix(FASTQTOBAM.out.versions) - ch_versions = ch_versions.mix(GROUPREADSBYUMI.out.versions) - ch_versions = ch_versions.mix(SAMBLASTER.out.versions) - - emit: - umibam = FASTQTOBAM.out.umibam // channel: [ val(meta), [ bam ] ] - groupbam = GROUPREADSBYUMI.out.bam // channel: [ val(meta), [ bam ] ] - consensusbam = CALLUMICONSENSUS.out.bam // channel: [ val(meta), [ bam ] ] - versions = ch_versions // channel: [ versions.yml ] -} From 6b39dd8b38b33bc34a4aaac1713fb7f6d2eb406e Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Thu, 17 Aug 2023 19:27:19 +0100 Subject: [PATCH 10/56] Huge effort to finally update modules done :) --- modules/local/gatk4/filtersamreads/main.nf | 39 ++++++++ modules/local/gatk4/filtersamreads/meta.yml | 50 ++++++++++ modules/local/sage/main.nf | 92 +++++++++++++++++++ modules/local/sage/meta.yml | 77 ++++++++++++++++ modules/local/vcflib/filter/main.nf | 34 +++++++ modules/local/vcflib/filter/meta.yml | 45 +++++++++ modules/nf-core/ensemblvep/vep/main.nf | 71 ++++++++++++++ modules/nf-core/ensemblvep/vep/meta.yml | 88 ++++++++++++++++++ modules/nf-core/picard/filtersamreads/main.nf | 62 +++++++++++++ .../nf-core/picard/filtersamreads/meta.yml | 51 ++++++++++ .../nf-core/vcf_annotate_ensemblvep/main.nf | 45 +++++++++ .../nf-core/vcf_annotate_ensemblvep/meta.yml | 61 ++++++++++++ 12 files changed, 715 insertions(+) create mode 100644 modules/local/gatk4/filtersamreads/main.nf create mode 100644 modules/local/gatk4/filtersamreads/meta.yml create mode 100644 modules/local/sage/main.nf create mode 100644 modules/local/sage/meta.yml create mode 100644 modules/local/vcflib/filter/main.nf create mode 100644 modules/local/vcflib/filter/meta.yml create mode 100644 modules/nf-core/ensemblvep/vep/main.nf create mode 100644 modules/nf-core/ensemblvep/vep/meta.yml create mode 100644 modules/nf-core/picard/filtersamreads/main.nf create mode 100644 modules/nf-core/picard/filtersamreads/meta.yml create mode 100644 subworkflows/nf-core/vcf_annotate_ensemblvep/main.nf create mode 100644 subworkflows/nf-core/vcf_annotate_ensemblvep/meta.yml diff --git a/modules/local/gatk4/filtersamreads/main.nf b/modules/local/gatk4/filtersamreads/main.nf new file mode 100644 index 0000000..ca28c7b --- /dev/null +++ b/modules/local/gatk4/filtersamreads/main.nf @@ -0,0 +1,39 @@ +process GATK4_FILTERSAMREADS { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': + 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + + input: + tuple val(meta), path(bam), path(bai), path(read_ids) + val fasta // treat it as a string because FilterSamReads is unable to solve softlinking + + output: + tuple val(meta), path("*.bam"), path("*.bai") , emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def fastastr = fasta[0].toString() + + """ + gatk FilterSamReads \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.bam \\ + --TMP_DIR . \\ + -R ${fastastr} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/gatk4/filtersamreads/meta.yml b/modules/local/gatk4/filtersamreads/meta.yml new file mode 100644 index 0000000..92ca390 --- /dev/null +++ b/modules/local/gatk4/filtersamreads/meta.yml @@ -0,0 +1,50 @@ +name: gatk4_filtersamreads +description: | + Subsets reads from a SAM or BAM file by applying one of several filters. +keywords: + - gatk4 + - reads + - BAM + - SAM +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - bam: + type: file + description: compressed vcf file of mutect2calls + pattern: "*.bam" + - read_ids: + type: file + description: File with read ids to keep + pattern: "*.txt" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - bam: + type: file + description: compressed vcf file of mutect2calls + pattern: "*.bam" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@RaqManzano" diff --git a/modules/local/sage/main.nf b/modules/local/sage/main.nf new file mode 100644 index 0000000..1523430 --- /dev/null +++ b/modules/local/sage/main.nf @@ -0,0 +1,92 @@ +def VERSION = '3.1' // Version information not provided by tool on CLI + +process SAGE { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::hmftools-sage=3.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-sage:3.1--hdfd78af_0' : + 'quay.io/biocontainers/hmftools-sage:3.1--hdfd78af_0' }" + + input: + tuple val(meta), path(normal), path(normal_index), path(tumor), path(tumor_index), path(intervals) + path fasta + path fasta_fai + path dict + path highconfidence + path actionablepanel + path knownhot + path ensbl_sage + + output: + tuple val(meta), path("*.vcf"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = normal ? "-reference ${meta.normal_id} -reference_bam ${normal}" : "" + def HighConfidence = highconfidence ? "-high_confidence_bed ${highconfidence}" : "" + def ActionableCodingPanel = actionablepanel ? "-panel_bed ${actionablepanel}" : "" + def KnownHotspots = knownhot ? "-hotspots ${knownhot}" : "" + def avail_mem = 4 + if (!task.memory) { + log.info '[SAGE] Available memory not known - defaulting to 4GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + if (intervals){ // If no reads the intervals don't work in sage + """ + export _JAVA_OPTIONS="-Xmx${avail_mem}g" + INTER=\$(sed -E 's/\\s+0\\s+/\\t1\\t/g' $intervals | grep -v chrM | sed 's/\t/:/g' | paste -s -d ';') + + SAGE \\ + -out ${prefix}.vcf \\ + -ref_genome $fasta \\ + -threads $task.cpus \\ + -tumor ${meta.tumor_id} \\ + -tumor_bam ${tumor} \\ + $reference \\ + -ensembl_data_dir $ensbl_sage \\ + $HighConfidence \\ + $ActionableCodingPanel \\ + $KnownHotspots \\ + -specific_regions \$INTER \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sage: $VERSION + END_VERSIONS + """ + + } else { + """ + export _JAVA_OPTIONS="-Xmx${avail_mem}g" + SAGE \\ + -out ${prefix}.vcf \\ + -ref_genome $fasta \\ + -threads $task.cpus \\ + -tumor ${meta.tumor_id} \\ + -tumor_bam ${tumor} \\ + $reference \\ + -ensembl_data_dir $ensbl_sage \\ + $HighConfidence \\ + $ActionableCodingPanel \\ + $KnownHotspots \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sage: $VERSION + END_VERSIONS + """ + } + + + +} diff --git a/modules/local/sage/meta.yml b/modules/local/sage/meta.yml new file mode 100644 index 0000000..b54f9dd --- /dev/null +++ b/modules/local/sage/meta.yml @@ -0,0 +1,77 @@ +name: sage +description: SAGE is a precise and highly sensitive somatic SNV, MNV and small INDEL caller. It has been optimised for 100x tumor / 40x normal coverage, but has a flexible set of filters that can be adapted to lower or higher depth coverage. +keywords: + - variant caller + - SNP + - indels + - somatic variant calling + - hmftools + +tools: + - sage: + description: SAGE is a precise and highly sensitive somatic SNV, MNV and small INDEL caller. + homepage: https://github.com/hartwigmedical/hmftools/blob/master/sage/README.md + documentation: https://github.com/hartwigmedical/hmftools/tree/master/sage + tool_dev_url: https://github.com/hartwigmedical/hmftools/tree/master/sage + licence: ["GPL-3.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - normal: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - normal_index: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai}" + - tumor: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - tumor_index: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai}" + - intervals: + type: file + description: BED file for intervals + pattern: "*.bed" + - fasta: + type: file + description: reference fasta file + pattern: ".{fa,fa.gz,fasta,fasta.gz}" + - highconfidence: + type: file + description: Optional. + pattern: "*.bed" + - actionablepanel: + type: file + description: Optional. + pattern: "*.bed" + - knownhot: + type: file + description: Optional. + pattern: "*.bed" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: Compressed VCF file + pattern: "*.vcf.gz" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + +authors: + - "@RaqManzano" diff --git a/modules/local/vcflib/filter/main.nf b/modules/local/vcflib/filter/main.nf new file mode 100644 index 0000000..91d6f4e --- /dev/null +++ b/modules/local/vcflib/filter/main.nf @@ -0,0 +1,34 @@ +process VCFFILTER { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::vcftools=0.1.16" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/vcflib:1.0.3--hecb563c_1' : + 'quay.io/biocontainers/vcflib:1.0.3--hecb563c_1' }" + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + + gunzip -c $vcf | vcffilter \\ + $args > ${prefix}.vcf + gzip ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vcffilter: \$(echo \$(vcffilter 2>&1) | sed 's/^.*vcflib ( //;s/).*//' | cut -f2 -d ' ') + END_VERSIONS + """ +} diff --git a/modules/local/vcflib/filter/meta.yml b/modules/local/vcflib/filter/meta.yml new file mode 100644 index 0000000..70acca0 --- /dev/null +++ b/modules/local/vcflib/filter/meta.yml @@ -0,0 +1,45 @@ +name: vcffilter +description: VCF filter the specified vcf file using the set of filters + +keywords: + - VCF + - filter + - variant calling +tools: + - vcffilter: + description: VCF filter the specified vcf file using the set of filters + homepage: https://github.com/vcflib/vcflib + documentation: https://github.com/vcflib/vcflib + tool_dev_url: https://github.com/vcflib/vcflib + doi: https://doi.org/10.1101/2021.05.21.445151 + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: vcf file (optional) + pattern: "*.vcf.gz" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: vcf file (optional) + pattern: "*.vcf.gz" + + +authors: + - "@RaqManzano" diff --git a/modules/nf-core/ensemblvep/vep/main.nf b/modules/nf-core/ensemblvep/vep/main.nf new file mode 100644 index 0000000..da0e364 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/main.nf @@ -0,0 +1,71 @@ +process ENSEMBLVEP_VEP { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::ensembl-vep=110.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ensembl-vep:110.0--pl5321h2a3209d_0' : + 'biocontainers/ensembl-vep:110.0--pl5321h2a3209d_0' }" + + input: + tuple val(meta), path(vcf), path(custom_extra_files) + val genome + val species + val cache_version + path cache + tuple val(meta2), path(fasta) + path extra_files + + output: + tuple val(meta), path("*.vcf.gz") , optional:true, emit: vcf + tuple val(meta), path("*.tab.gz") , optional:true, emit: tab + tuple val(meta), path("*.json.gz") , optional:true, emit: json + path "*.summary.html" , emit: report + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def file_extension = args.contains("--vcf") ? 'vcf' : args.contains("--json")? 'json' : args.contains("--tab")? 'tab' : 'vcf' + def compress_cmd = args.contains("--compress_output") ? '' : '--compress_output bgzip' + def prefix = task.ext.prefix ?: "${meta.id}" + def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" + def reference = fasta ? "--fasta $fasta" : "" + """ + vep \\ + -i $vcf \\ + -o ${prefix}.${file_extension}.gz \\ + $args \\ + $compress_cmd \\ + $reference \\ + --assembly $genome \\ + --species $species \\ + --cache \\ + --cache_version $cache_version \\ + --dir_cache $dir_cache \\ + --fork $task.cpus \\ + --stats_file ${prefix}.summary.html \\ + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf.gz + touch ${prefix}.tab.gz + touch ${prefix}.json.gz + touch ${prefix}.summary.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/ensemblvep/vep/meta.yml b/modules/nf-core/ensemblvep/vep/meta.yml new file mode 100644 index 0000000..7783847 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/meta.yml @@ -0,0 +1,88 @@ +name: ENSEMBLVEP_VEP +description: Ensembl Variant Effect Predictor (VEP). The output-file-format is controlled through `task.ext.args`. +keywords: + - annotation + - vcf + - json + - tab +tools: + - ensemblvep: + description: | + VEP determines the effect of your variants (SNPs, insertions, deletions, CNVs + or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions. + homepage: https://www.ensembl.org/info/docs/tools/vep/index.html + documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + vcf to annotate + - custom_extra_files: + type: file + description: | + extra sample-specific files to be used with the `--custom` flag to be configured with ext.args + (optional) + - genome: + type: string + description: | + which genome to annotate with + - species: + type: string + description: | + which species to annotate with + - cache_version: + type: integer + description: | + which version of the cache to annotate with + - cache: + type: file + description: | + path to VEP cache (optional) + - meta2: + type: map + description: | + Groovy Map containing fasta reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: | + reference FASTA file (optional) + pattern: "*.{fasta,fa}" + - extra_files: + type: file + description: | + path to file(s) needed for plugins (optional) +output: + - vcf: + type: file + description: | + annotated vcf (optional) + pattern: "*.ann.vcf.gz" + - tab: + type: file + description: | + tab file with annotated variants (optional) + pattern: "*.ann.tab.gz" + - json: + type: file + description: | + json file with annotated variants (optional) + pattern: "*.ann.json.gz" + - report: + type: file + description: VEP report file + pattern: "*.html" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" + - "@matthdsm" + - "@nvnieuwk" diff --git a/modules/nf-core/picard/filtersamreads/main.nf b/modules/nf-core/picard/filtersamreads/main.nf new file mode 100644 index 0000000..f7bd191 --- /dev/null +++ b/modules/nf-core/picard/filtersamreads/main.nf @@ -0,0 +1,62 @@ +process PICARD_FILTERSAMREADS { + tag "$meta.id" + label 'process_low' + + conda "bioconda::picard=3.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : + 'biocontainers/picard:3.0.0--hdfd78af_1' }" + + input: + tuple val(meta), path(bam), path(readlist) + val filter + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3072 + if (!task.memory) { + log.info '[Picard FilterSamReads] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + if ( filter == 'includeAligned' || filter == 'excludeAligned' ) { + """ + picard \\ + FilterSamReads \\ + -Xmx${avail_mem}M \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.bam \\ + --FILTER $filter \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard FilterSamReads --version 2>&1 | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + } else if ( filter == 'includeReadList' || filter == 'excludeReadList' ) { + """ + picard \\ + FilterSamReads \\ + -Xmx${avail_mem}M \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.bam \\ + --FILTER $filter \\ + --READ_LIST_FILE $readlist \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard FilterSamReads --version 2>&1 | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/picard/filtersamreads/meta.yml b/modules/nf-core/picard/filtersamreads/meta.yml new file mode 100644 index 0000000..56bf896 --- /dev/null +++ b/modules/nf-core/picard/filtersamreads/meta.yml @@ -0,0 +1,51 @@ +name: picard_filtersamreads +description: Filters SAM/BAM files to include/exclude either aligned/unaligned reads or based on a read list +keywords: + - bam + - filter +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://broadinstitute.github.io/picard/ + tool_dev_url: https://github.com/broadinstitute/picard + + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: List of BAM files. If filtering without read list must be sorted by queryname with picard sortsam + pattern: "*.{bam}" + - filter: + type: value + description: Picard filter type + pattern: "includeAligned|excludeAligned|includeReadList|excludeReadList" + - readlist: + type: file + description: Optional text file containing reads IDs to include or exclude + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Filtered BAM file + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@jfy133" diff --git a/subworkflows/nf-core/vcf_annotate_ensemblvep/main.nf b/subworkflows/nf-core/vcf_annotate_ensemblvep/main.nf new file mode 100644 index 0000000..291eddc --- /dev/null +++ b/subworkflows/nf-core/vcf_annotate_ensemblvep/main.nf @@ -0,0 +1,45 @@ +// +// Run VEP to annotate VCF files +// + +include { ENSEMBLVEP_VEP } from '../../../modules/nf-core/ensemblvep/vep/main' +include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' + +workflow VCF_ANNOTATE_ENSEMBLVEP { + take: + ch_vcf // channel: [ val(meta), path(vcf), [path(custom_file1), path(custom_file2)... (optionnal)]] + ch_fasta // channel: [ val(meta2), path(fasta) ] (optional) + val_genome // value: genome to use + val_species // value: species to use + val_cache_version // value: cache version to use + ch_cache // channel: [ val(meta3), path(cache) ] (optional) + ch_extra_files // channel: [ path(file1), path(file2)... ] (optional) + + main: + ch_versions = Channel.empty() + + ENSEMBLVEP_VEP( + ch_vcf, + val_genome, + val_species, + val_cache_version, + ch_cache, + ch_fasta, + ch_extra_files + ) + + TABIX_TABIX(ENSEMBLVEP_VEP.out.vcf) + + ch_vcf_tbi = ENSEMBLVEP_VEP.out.vcf.join(TABIX_TABIX.out.tbi, failOnDuplicate: true, failOnMismatch: true) + + // Gather versions of all tools used + ch_versions = ch_versions.mix(ENSEMBLVEP_VEP.out.versions) + ch_versions = ch_versions.mix(TABIX_TABIX.out.versions) + + emit: + vcf_tbi = ch_vcf_tbi // channel: [ val(meta), path(vcf), path(tbi) ] + json = ENSEMBLVEP_VEP.out.json // channel: [ val(meta), path(json) ] + tab = ENSEMBLVEP_VEP.out.tab // channel: [ val(meta), path(tab) ] + reports = ENSEMBLVEP_VEP.out.report // channel: [ path(html) ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/vcf_annotate_ensemblvep/meta.yml b/subworkflows/nf-core/vcf_annotate_ensemblvep/meta.yml new file mode 100644 index 0000000..7a9fd10 --- /dev/null +++ b/subworkflows/nf-core/vcf_annotate_ensemblvep/meta.yml @@ -0,0 +1,61 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: vcf_annotate_ensemblvep +description: Perform annotation with ensemblvep and bgzip + tabix index the resulting VCF file +keywords: + - vcf + - annotation + - ensemblvep +components: + - ensemblvep/vep + - tabix/tabix +input: + - ch_vcf: + description: | + vcf file to annotate + Structure: [ val(meta), path(vcf), [path(custom_file1), path(custom_file2)... (optionnal)] ] + - ch_fasta: + description: | + Reference genome fasta file (optional) + Structure: [ val(meta2), path(fasta) ] + - val_genome: + type: string + description: genome to use + - val_species: + type: string + description: species to use + - val_cache_version: + type: integer + description: cache version to use + - ch_cache: + description: | + the root cache folder for ensemblvep (optional) + Structure: [ val(meta3), path(cache) ] + - ch_extra_files: + description: | + any extra files needed by plugins for ensemblvep (optional) + Structure: [ path(file1), path(file2)... ] +output: + - vcf_tbi: + description: | + Compressed vcf file + tabix index + Structure: [ val(meta), path(vcf), path(tbi) ] + - json: + description: | + json file + Structure: [ val(meta), path(json) ] + - tab: + description: | + tab file + Structure: [ val(meta), path(tab) ] + - reports: + type: file + description: html reports + pattern: "*.html" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" + - "@matthdsm" + - "@nvnieuwk" From 28fe888353bfe090f4162f75cff60ee1b01968bb Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Mon, 21 Aug 2023 09:43:47 +0100 Subject: [PATCH 11/56] Updating alignment subworkflow and modules --- lib/WorkflowRnadnavar.groovy | 79 +++---- main.nf | 17 +- modules.json | 105 +++++++-- modules/nf-core/ensemblvep/Dockerfile | 31 --- modules/nf-core/ensemblvep/build.sh | 28 --- modules/nf-core/ensemblvep/environment.yml | 10 - modules/nf-core/ensemblvep/main.nf | 56 ----- modules/nf-core/ensemblvep/meta.yml | 73 ------ modules/nf-core/hisat2/align/meta.yml | 2 +- modules/nf-core/hisat2/build/meta.yml | 2 +- .../hisat2/extractsplicesites/meta.yml | 2 +- modules/nf-core/samtools/flagstat/meta.yml | 2 +- modules/nf-core/samtools/idxstats/meta.yml | 2 +- nextflow_schema.json | 42 +++- subworkflows/local/bam_align/main.nf | 207 ++++++++++++++++++ .../bam_convert_samtools/main.nf} | 16 +- .../local/bam_merge_index_samtools/main.nf | 6 +- .../local/channel_align_create_csv/main.nf | 23 ++ .../fastq_align_bwamem_mem2_dragmap/main.nf | 46 ++++ subworkflows/local/mapping.nf | 199 ----------------- subworkflows/local/mapping_csv.nf | 21 -- subworkflows/local/prepare_genome.nf | 189 ---------------- subworkflows/local/prepare_genome/main.nf | 189 ++++++++++++++++ .../main.nf} | 12 +- .../local/prepare_reference_and_intervals.nf | 22 +- subworkflows/nf-core/align_star.nf | 55 ----- subworkflows/nf-core/bam_sort_samtools.nf | 6 +- subworkflows/nf-core/merge_index_bam.nf | 45 ---- 28 files changed, 671 insertions(+), 816 deletions(-) delete mode 100644 modules/nf-core/ensemblvep/Dockerfile delete mode 100644 modules/nf-core/ensemblvep/build.sh delete mode 100644 modules/nf-core/ensemblvep/environment.yml delete mode 100644 modules/nf-core/ensemblvep/main.nf delete mode 100644 modules/nf-core/ensemblvep/meta.yml create mode 100644 subworkflows/local/bam_align/main.nf rename subworkflows/{nf-core/alignment_to_fastq.nf => local/bam_convert_samtools/main.nf} (87%) create mode 100644 subworkflows/local/channel_align_create_csv/main.nf create mode 100644 subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf delete mode 100644 subworkflows/local/mapping.nf delete mode 100644 subworkflows/local/mapping_csv.nf delete mode 100644 subworkflows/local/prepare_genome.nf create mode 100644 subworkflows/local/prepare_genome/main.nf rename subworkflows/local/{prepare_intervals.nf => prepare_intervals/main.nf} (90%) delete mode 100644 subworkflows/nf-core/align_star.nf delete mode 100644 subworkflows/nf-core/merge_index_bam.nf diff --git a/lib/WorkflowRnadnavar.groovy b/lib/WorkflowRnadnavar.groovy index cbd3c30..a80e6ca 100755 --- a/lib/WorkflowRnadnavar.groovy +++ b/lib/WorkflowRnadnavar.groovy @@ -11,11 +11,9 @@ class WorkflowRnadnavar { // Check and validate parameters // public static void initialise(params, log) { - genomeExistsError(params, log) - - if (!params.fasta) { + if (!params.fasta && params.step == 'annotate') { Nextflow.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." } } @@ -47,57 +45,15 @@ class WorkflowRnadnavar { return yaml_file_text } - // - // Generate methods description for MultiQC - // - - public static String toolCitationText(params) { - - // TODO Optionally add in-text citation tools to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() - - return citation_text - } - - public static String toolBibliographyText(params) { - - // TODO Optionally add bibliographic entries to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() - - return reference_text - } - - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file def meta = [:] meta.workflow = run_workflow.toMap() meta["manifest_map"] = run_workflow.manifest.toMap() - // Pipeline DOI meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " - // Tool references - meta["tool_citations"] = "" - meta["tool_bibliography"] = "" - - // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - //meta["tool_bibliography"] = toolBibliographyText(params) - - def methods_text = mqc_methods_yaml.text def engine = new SimpleTemplateEngine() @@ -119,4 +75,33 @@ class WorkflowRnadnavar { Nextflow.error(error_string) } } -} + // TODO add consensus and filtering steps here + public static String retrieveInput(params, log){ + def input = '' + if (params.input) input = params.input + else { + switch (params.step) { + case 'mapping': Nextflow.error("Can't start with step $params.step without samplesheet") + break + case 'markduplicates': log.warn("Using file ${params.outdir}/csv/mapped.csv"); + input = params.outdir + "/csv/mapped.csv" + break + case 'prepare_recalibration': log.warn("Using file ${params.outdir}/csv/markduplicates_no_table.csv"); + input = params.outdir + "/csv/markduplicates_no_table.csv" + break + case 'recalibrate': log.warn("Using file ${params.outdir}/csv/markduplicates.csv"); + input = params.outdir + "/csv/markduplicates.csv" + break + case 'variant_calling': log.warn("Using file ${params.outdir}/csv/recalibrated.csv"); + input = params.outdir + "/csv/recalibrated.csv" + break + case 'annotate': log.warn("Using file ${params.outdir}/csv/variantcalled.csv"); + input = params.outdir + "/csv/variantcalled.csv" + break + default: log.warn("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") + Nextflow.error("Unknown step $params.step") + } + } + return input + } +} \ No newline at end of file diff --git a/main.nf b/main.nf index 621b548..fa73e1d 100644 --- a/main.nf +++ b/main.nf @@ -1,4 +1,5 @@ #!/usr/bin/env nextflow + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nf-core/rnadnavar @@ -17,7 +18,9 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.bwa = WorkflowMain.getGenomeAttribute(params, 'bwa') +params.bwamem2 = WorkflowMain.getGenomeAttribute(params, 'bwamem2') +params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') params.fasta_fai = WorkflowMain.getGenomeAttribute(params, 'fasta_fai') params.dict = WorkflowMain.getGenomeAttribute(params, 'dict') params.gtf = WorkflowMain.getGenomeAttribute(params, 'gtf') @@ -33,6 +36,15 @@ params.vep_genome = WorkflowMain.getGenomeAttribute(params, 'vep_genom params.vep_species = WorkflowMain.getGenomeAttribute(params, 'vep_species') +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ALTERNATIVE INPUT FILE ON RESTART +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +params.input_restart = WorkflowRnadnavar.retrieveInput(params, log) + + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ VALIDATE & PRINT PARAMETER SUMMARY @@ -45,7 +57,7 @@ include { validateParameters; paramsHelp } from 'plugin/nf-validation' if (params.help) { def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) def citation = '\n' + WorkflowMain.citation(workflow) + '\n' - def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" + def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GATK.GRCh38 -profile docker --outdir results" log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) System.exit(0) } @@ -81,7 +93,6 @@ workflow NFCORE_RNADNAVAR { // // WORKFLOW: Execute a single named workflow for the pipeline // See: https://github.com/nf-core/rnaseq/issues/619 -// workflow { NFCORE_RNADNAVAR () } diff --git a/modules.json b/modules.json index f698e63..bf5dfad 100644 --- a/modules.json +++ b/modules.json @@ -23,7 +23,7 @@ "bwa/mem": { "branch": "master", "git_sha": "3dc300ddcaa563c1e3503477557c0e0def6df2ce", - "installed_by": ["modules"] + "installed_by": ["modules", "fastq_align_bwa"] }, "bwamem2/index": { "branch": "master", @@ -50,15 +50,30 @@ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, + "dragmap/align": { + "branch": "master", + "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", + "installed_by": ["modules"] + }, + "dragmap/hashtable": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, "ensemblvep": { "branch": "master", "git_sha": "29984d70aea47d06f0062a1785d76c357dd40ea9", "installed_by": ["modules"] }, + "ensemblvep/download": { + "branch": "master", + "git_sha": "9f9e1fc31cb35876922070c0e601ae05abae5cae", + "installed_by": ["modules"] + }, "ensemblvep/vep": { "branch": "master", "git_sha": "9f9e1fc31cb35876922070c0e601ae05abae5cae", - "installed_by": ["vcf_annotate_ensemblvep"] + "installed_by": ["vcf_annotate_ensemblvep", "modules"] }, "fastp": { "branch": "master", @@ -108,7 +123,7 @@ "gatk4/calculatecontamination": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": ["bam_tumor_normal_somatic_variant_calling_gatk", "modules"] }, "gatk4/createsequencedictionary": { "branch": "master", @@ -123,7 +138,7 @@ "gatk4/filtermutectcalls": { "branch": "master", "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", - "installed_by": ["modules"] + "installed_by": ["bam_tumor_normal_somatic_variant_calling_gatk", "modules"] }, "gatk4/filtervarianttranches": { "branch": "master", @@ -153,7 +168,7 @@ "gatk4/getpileupsummaries": { "branch": "master", "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", - "installed_by": ["modules"] + "installed_by": ["bam_tumor_normal_somatic_variant_calling_gatk", "modules"] }, "gatk4/indexfeaturefile": { "branch": "master", @@ -173,7 +188,7 @@ "gatk4/learnreadorientationmodel": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": ["bam_tumor_normal_somatic_variant_calling_gatk", "modules"] }, "gatk4/markduplicates": { "branch": "master", @@ -198,7 +213,7 @@ "gatk4/mutect2": { "branch": "master", "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", - "installed_by": ["modules"] + "installed_by": ["bam_tumor_normal_somatic_variant_calling_gatk", "modules"] }, "gatk4/splitncigarreads": { "branch": "master", @@ -228,7 +243,7 @@ "hisat2/align": { "branch": "master", "git_sha": "a1881f6374506f9e031b7af814768cdb44a6a7d3", - "installed_by": ["modules"] + "installed_by": ["fastq_align_hisat2", "modules"] }, "hisat2/build": { "branch": "master", @@ -255,11 +270,31 @@ "git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80", "installed_by": ["modules"] }, + "picard/collecthsmetrics": { + "branch": "master", + "git_sha": "0ce3ab0ac301f160225b22254fa238478b4389f2", + "installed_by": ["bam_qc_picard"] + }, + "picard/collectmultiplemetrics": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["bam_qc_picard"] + }, + "picard/collectwgsmetrics": { + "branch": "master", + "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "installed_by": ["bam_qc_picard"] + }, "picard/filtersamreads": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, + "picard/markduplicates": { + "branch": "master", + "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "installed_by": ["bam_markduplicates_picard"] + }, "samblaster": { "branch": "master", "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", @@ -288,17 +323,17 @@ "samtools/flagstat": { "branch": "master", "git_sha": "570ec5bcfe19c49e16c9ca35a7a116563af6cc1c", - "installed_by": ["modules"] + "installed_by": ["bam_stats_samtools", "modules"] }, "samtools/idxstats": { "branch": "master", "git_sha": "e662ab16e0c11f1e62983e21de9871f59371a639", - "installed_by": ["modules"] + "installed_by": ["bam_stats_samtools", "modules"] }, "samtools/index": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": ["bam_markduplicates_picard", "modules", "bam_sort_stats_samtools"] }, "samtools/merge": { "branch": "master", @@ -313,12 +348,12 @@ "samtools/sort": { "branch": "master", "git_sha": "a0f7be95788366c1923171e358da7d049eb440f9", - "installed_by": ["modules"] + "installed_by": ["bam_sort_stats_samtools", "modules"] }, "samtools/stats": { "branch": "master", "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", - "installed_by": ["modules"] + "installed_by": ["bam_stats_samtools", "modules"] }, "samtools/view": { "branch": "master", @@ -328,7 +363,7 @@ "star/align": { "branch": "master", "git_sha": "57d75dbac06812c59798a48585032f6e50bb1914", - "installed_by": ["modules"] + "installed_by": ["fastq_align_star", "modules"] }, "star/genomegenerate": { "branch": "master", @@ -348,7 +383,7 @@ "tabix/tabix": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules", "vcf_annotate_ensemblvep"] + "installed_by": ["vcf_annotate_ensemblvep", "modules"] }, "untar": { "branch": "master", @@ -369,6 +404,46 @@ }, "subworkflows": { "nf-core": { + "bam_markduplicates_picard": { + "branch": "master", + "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", + "installed_by": ["subworkflows"] + }, + "bam_qc_picard": { + "branch": "master", + "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", + "installed_by": ["subworkflows"] + }, + "bam_sort_stats_samtools": { + "branch": "master", + "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", + "installed_by": ["fastq_align_bwa", "fastq_align_star", "fastq_align_hisat2"] + }, + "bam_stats_samtools": { + "branch": "master", + "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", + "installed_by": ["bam_markduplicates_picard", "bam_sort_stats_samtools"] + }, + "bam_tumor_normal_somatic_variant_calling_gatk": { + "branch": "master", + "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", + "installed_by": ["subworkflows"] + }, + "fastq_align_bwa": { + "branch": "master", + "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", + "installed_by": ["subworkflows"] + }, + "fastq_align_hisat2": { + "branch": "master", + "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", + "installed_by": ["subworkflows"] + }, + "fastq_align_star": { + "branch": "master", + "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", + "installed_by": ["subworkflows"] + }, "vcf_annotate_ensemblvep": { "branch": "master", "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", diff --git a/modules/nf-core/ensemblvep/Dockerfile b/modules/nf-core/ensemblvep/Dockerfile deleted file mode 100644 index 7d2c99c..0000000 --- a/modules/nf-core/ensemblvep/Dockerfile +++ /dev/null @@ -1,31 +0,0 @@ -FROM nfcore/base:1.14 -LABEL \ - author="Maxime Garcia" \ - description="VEP image for nf-core pipelines" \ - maintainer="maxime.garcia@scilifelab.se" - -# Install the conda environment -COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a - -# Setup default ARG variables -ARG GENOME=GRCh38 -ARG SPECIES=homo_sapiens -ARG VEP_CACHE_VERSION=106 -ARG VEP_VERSION=106.1 - -# Add conda installation dir to PATH (instead of doing 'conda activate') -ENV PATH /opt/conda/envs/nf-core-vep-${VEP_VERSION}/bin:$PATH - -# Download Genome -RUN vep_install \ - -a c \ - -c .vep \ - -s ${SPECIES} \ - -y ${GENOME} \ - --CACHE_VERSION ${VEP_CACHE_VERSION} \ - --CONVERT \ - --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE - -# Dump the details of the installed packages to a file for posterity -RUN conda env export --name nf-core-vep-${VEP_VERSION} > nf-core-vep-${VEP_VERSION}.yml diff --git a/modules/nf-core/ensemblvep/build.sh b/modules/nf-core/ensemblvep/build.sh deleted file mode 100644 index eaa3ed5..0000000 --- a/modules/nf-core/ensemblvep/build.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Build and push all containers - -build_push() { - GENOME=$1 - SPECIES=$2 - VEP_CACHE_VERSION=$3 - VEP_VERSION=$4 - - docker build \ - . \ - -t nfcore/vep:${VEP_VERSION}.${GENOME} \ - --build-arg GENOME=${GENOME} \ - --build-arg SPECIES=${SPECIES} \ - --build-arg VEP_CACHE_VERSION=${VEP_CACHE_VERSION} \ - --build-arg VEP_VERSION=${VEP_VERSION} - - docker push nfcore/vep:${VEP_VERSION}.${GENOME} -} - -build_push "GRCh37" "homo_sapiens" "106" "106.1" -build_push "GRCh38" "homo_sapiens" "106" "106.1" -build_push "GRCm38" "mus_musculus" "102" "106.1" -build_push "GRCm39" "mus_musculus" "106" "106.1" -build_push "CanFam3.1" "canis_lupus_familiaris" "104" "106.1" -build_push "WBcel235" "caenorhabditis_elegans" "106" "106.1" diff --git a/modules/nf-core/ensemblvep/environment.yml b/modules/nf-core/ensemblvep/environment.yml deleted file mode 100644 index d378f81..0000000 --- a/modules/nf-core/ensemblvep/environment.yml +++ /dev/null @@ -1,10 +0,0 @@ -# You can use this file to create a conda environment for this module: -# conda env create -f environment.yml -name: nf-core-vep-106.1 -channels: - - conda-forge - - bioconda - - defaults - -dependencies: - - bioconda::ensembl-vep=106.1 diff --git a/modules/nf-core/ensemblvep/main.nf b/modules/nf-core/ensemblvep/main.nf deleted file mode 100644 index 6e78546..0000000 --- a/modules/nf-core/ensemblvep/main.nf +++ /dev/null @@ -1,56 +0,0 @@ -process ENSEMBLVEP { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::ensembl-vep=107.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ensembl-vep:107.0--pl5321h4a94de4_0' : - 'quay.io/biocontainers/ensembl-vep:107.0--pl5321h4a94de4_0' }" - - input: - tuple val(meta), path(vcf) - val genome - val species - val cache_version - path cache - path fasta - path extra_files - - output: - tuple val(meta), path("*.ann.vcf") , optional:true, emit: vcf - tuple val(meta), path("*.ann.tab") , optional:true, emit: tab - tuple val(meta), path("*.ann.json") , optional:true, emit: json - path "*.summary.html" , emit: report - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def file_extension = args.contains("--vcf") ? 'vcf' : args.contains("--json")? 'json' : args.contains("--tab")? 'tab' : 'vcf' - def prefix = task.ext.prefix ?: "${meta.id}" - def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" - def reference = fasta ? "--fasta $fasta" : "" - - """ - vep \\ - -i $vcf \\ - -o ${prefix}.ann.${file_extension} \\ - $args \\ - $reference \\ - --assembly $genome \\ - --species $species \\ - --cache \\ - --cache_version $cache_version \\ - --dir_cache $dir_cache \\ - --fork $task.cpus \\ - --stats_file ${prefix}.summary.html \\ - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/ensemblvep/meta.yml b/modules/nf-core/ensemblvep/meta.yml deleted file mode 100644 index a4dde8a..0000000 --- a/modules/nf-core/ensemblvep/meta.yml +++ /dev/null @@ -1,73 +0,0 @@ -name: ENSEMBLVEP -description: Ensembl Variant Effect Predictor (VEP). The output-file-format is controlled through `task.ext.args`. -keywords: - - annotation -tools: - - ensemblvep: - description: | - VEP determines the effect of your variants (SNPs, insertions, deletions, CNVs - or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions. - homepage: https://www.ensembl.org/info/docs/tools/vep/index.html - documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html - licence: ["Apache-2.0"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - vcf: - type: file - description: | - vcf to annotate - - genome: - type: value - description: | - which genome to annotate with - - species: - type: value - description: | - which species to annotate with - - cache_version: - type: value - description: | - which version of the cache to annotate with - - cache: - type: file - description: | - path to VEP cache (optional) - - fasta: - type: file - description: | - reference FASTA file (optional) - pattern: "*.{fasta,fa}" - - extra_files: - type: tuple - description: | - path to file(s) needed for plugins (optional) -output: - - vcf: - type: file - description: | - annotated vcf (optional) - pattern: "*.ann.vcf" - - tab: - type: file - description: | - tab file with annotated variants (optional) - pattern: "*.ann.tab" - - json: - type: file - description: | - json file with annotated variants (optional) - pattern: "*.ann.json" - - report: - type: file - description: VEP report file - pattern: "*.html" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@maxulysse" diff --git a/modules/nf-core/hisat2/align/meta.yml b/modules/nf-core/hisat2/align/meta.yml index 008a961..001e5d8 100644 --- a/modules/nf-core/hisat2/align/meta.yml +++ b/modules/nf-core/hisat2/align/meta.yml @@ -8,7 +8,7 @@ keywords: tools: - hisat2: - description: HISAT2 is a fast and sensitive alignment program for mapping next-generation sequencing reads (both DNA and RNA) to a population of human genomes as well as to a single reference genome. + description: HISAT2 is a fast and sensitive alignment program for bam_align next-generation sequencing reads (both DNA and RNA) to a population of human genomes as well as to a single reference genome. homepage: https://daehwankimlab.github.io/hisat2/ documentation: https://daehwankimlab.github.io/hisat2/manual/ doi: "10.1038/s41587-019-0201-4" diff --git a/modules/nf-core/hisat2/build/meta.yml b/modules/nf-core/hisat2/build/meta.yml index e61bf2a..854732f 100644 --- a/modules/nf-core/hisat2/build/meta.yml +++ b/modules/nf-core/hisat2/build/meta.yml @@ -8,7 +8,7 @@ keywords: - reference tools: - hisat2: - description: HISAT2 is a fast and sensitive alignment program for mapping next-generation sequencing reads (both DNA and RNA) to a population of human genomes as well as to a single reference genome. + description: HISAT2 is a fast and sensitive alignment program for bam_align next-generation sequencing reads (both DNA and RNA) to a population of human genomes as well as to a single reference genome. homepage: https://daehwankimlab.github.io/hisat2/ documentation: https://daehwankimlab.github.io/hisat2/manual/ doi: "10.1038/s41587-019-0201-4" diff --git a/modules/nf-core/hisat2/extractsplicesites/meta.yml b/modules/nf-core/hisat2/extractsplicesites/meta.yml index f70de08..756e98e 100644 --- a/modules/nf-core/hisat2/extractsplicesites/meta.yml +++ b/modules/nf-core/hisat2/extractsplicesites/meta.yml @@ -8,7 +8,7 @@ keywords: tools: - hisat2: - description: HISAT2 is a fast and sensitive alignment program for mapping next-generation sequencing reads (both DNA and RNA) to a population of human genomes as well as to a single reference genome. + description: HISAT2 is a fast and sensitive alignment program for bam_align next-generation sequencing reads (both DNA and RNA) to a population of human genomes as well as to a single reference genome. homepage: https://daehwankimlab.github.io/hisat2/ documentation: https://daehwankimlab.github.io/hisat2/manual/ doi: "10.1038/s41587-019-0201-4" diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml index 954225d..adc7f53 100644 --- a/modules/nf-core/samtools/flagstat/meta.yml +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -2,7 +2,7 @@ name: samtools_flagstat description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type keywords: - stats - - mapping + - bam_align - counts - bam - sam diff --git a/modules/nf-core/samtools/idxstats/meta.yml b/modules/nf-core/samtools/idxstats/meta.yml index dda87e1..a258c1b 100644 --- a/modules/nf-core/samtools/idxstats/meta.yml +++ b/modules/nf-core/samtools/idxstats/meta.yml @@ -2,7 +2,7 @@ name: samtools_idxstats description: Reports alignment summary statistics for a BAM/CRAM/SAM file keywords: - stats - - mapping + - bam_align - counts - chromosome - bam diff --git a/nextflow_schema.json b/nextflow_schema.json index 2de8d4e..4cec090 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -16,14 +16,32 @@ ], "properties": { "input": { + "description": "Path to comma-separated file containing information about the samples in the experiment.", + "help_text": "A design file with information about the samples in your experiment. Use this parameter to specify the location of the input files. It has to be a comma-separated file with a header row. See [usage docs](https://nf-co.re/sarek/usage#input).\n\nIf no input file is specified, sarek will attempt to locate one in the `{outdir}` directory. If no input should be supplied, i.e. when --step is supplied or --build_from_index, then set --input false", + "fa_icon": "fas fa-file-csv", + "schema": "assets/schema_input.json", + "anyOf": [ + { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/csv", + "pattern": "^\\S+\\.csv$" + }, + { + "type": "boolean", + "enum": ["false"] + } + ] + }, + "input_restart": { "type": "string", "format": "file-path", "exists": true, "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/rnadnavar/usage#samplesheet-input).", - "fa_icon": "fas fa-file-csv" + "hidden": true, + "schema": "assets/schema_input.json" }, "split_fastq": { "type": "integer", @@ -110,6 +128,13 @@ "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\nIf you wish to recompute indices available on igenomes, set `--bwamem2 false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference, if `--aligner bwa-mem2` is specified. Combine with `--save_reference` to save for future runs.", "hidden": true }, + "dragmap": { + "type": "string", + "fa_icon": "fas fa-copy", + "description": "Path to dragmap indices.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\nIf you wish to recompute indices available on igenomes, set `--dragmap false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference, if `--aligner dragmap` is specified. Combine with `--save_reference` to save for future runs.", + "hidden": true + }, "star_index": { "type": "string", "description": "Path to STAR index folder or compressed file (tar.gz)", @@ -424,7 +449,7 @@ "fa_icon": "fas fa-forward", "description": "Disable specified tools.", "help_text": "Multiple tools can be specified, separated by commas.\n\n> **NB** `--skip_tools baserecalibrator_report` is actually just not saving the reports.\n> **NB** `--skip_tools markduplicates_report` does not skip `MarkDuplicates` but prevent the collection of duplicate metrics that slows down performance.", - "pattern": "^((contamination|learnreadorientation|baserecalibrator|baserecalibrator_report|bcftools|documentation|fastqc|markduplicates|markduplicates_report|mosdepth|multiqc|samtools|vcftools|versions|splitncigar)*(,)*)*$" + "pattern": "^((contamination|learnreadorientation|baserecalibrator|baserecalibrator_report|bcftools|documentation|fastqc|markduplicates|markduplicates_report|mosdepth|multiqc|samtools|vcftools|versions|splitncigar|second_pass)*(,)*)*$" }, "wes": { "type": "boolean", @@ -442,9 +467,12 @@ "properties": { "aligner": { "type": "string", - "default": "star", - "description": "Specifies the alignment algorithm to use. Currently available option is 'star'", - "help_text": "This parameter define which aligner is to be used for aligning the RNA reads to the reference genome. Currently only STAR aligner is supported. So use 'star' as the value for this option." + "default": "bwa-mem", + "fa_icon": "fas fa-puzzle-piece", + "enum": ["bwa-mem", "bwa-mem2", "dragmap"], + "description": "Specify aligner to be used to map reads to reference genome.", + "help_text": "`Rnadnavar` will build missing indices automatically if not provided. Set `--bwa false` if indices should be (re-)built.\nIf `DragMap` is selected as aligner, it is recommended to skip baserecalibration with `--skip_tools baserecalibrator`. See [here](https://gatk.broadinstitute.org/hc/en-us/articles/4407897446939--How-to-Run-germline-single-sample-short-variant-discovery-in-DRAGEN-mode) for more info.\n", + "hidden": true }, "star_index": { "type": "string", diff --git a/subworkflows/local/bam_align/main.nf b/subworkflows/local/bam_align/main.nf new file mode 100644 index 0000000..9d77841 --- /dev/null +++ b/subworkflows/local/bam_align/main.nf @@ -0,0 +1,207 @@ +// +// DNA and RNA ALIGNMENT +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +// SUBWORKFLOWS +// Convert BAM files to FASTQ files +include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_INPUT } from '../bam_convert_samtools/main' +// Map input reads to reference genome in DNA +include { FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP } from '../fastq_align_bwamem_mem2_dragmap/main' +// Map input reads to reference genome in RNA +include { FASTQ_ALIGN_STAR } from '../../nf-core/fastq_align_star/main' +// Merge and index BAM files (optional) +include { BAM_MERGE_INDEX_SAMTOOLS } from '../bam_merge_index_samtools/main' +// Create samplesheets to restart from mapping +include { CHANNEL_ALIGN_CREATE_CSV } from '../channel_align_create_csv/main' + +// MODULES +// Run FASTQC +include { FASTQC } from '../../../modules/nf-core/fastqc/main' +// TRIM/SPLIT FASTQ Files +include { FASTP } from '../../../modules/nf-core/fastp/main' + + +workflow BAM_ALIGN { + + take: + bwa + bwamem2 + dragmap + star_index + gtf + input_sample + + main: + reports = Channel.empty() + versions = Channel.empty() + + // Gather index for mapping given the chosen aligner for DNA + index_alignement = params.aligner == "bwa-mem" ? bwa : + params.aligner == "bwa-mem2" ? bwamem2 : + dragmap + if (params.step == 'mapping') { + + // Figure out if input is bam or fastq + input_sample_type = input_sample.branch{ + bam: it[0].data_type == "bam" + fastq: it[0].data_type == "fastq" + } + + // convert any bam input to fastq + // fasta are not needed when converting bam to fastq -> [ id:"fasta" ], [] + // No need for fasta.fai -> [] + interleave_input = false // Currently don't allow interleaved input + CONVERT_FASTQ_INPUT( + input_sample_type.bam, + [ [ id:"fasta" ], [] ], // fasta + [ [ id:'null' ], [] ], // fasta_fai + interleave_input) + + // Gather fastq (inputed or converted) + // Theorically this could work on mixed input (fastq for one sample and bam for another) + // But not sure how to handle that with the samplesheet + // Or if we really want users to be able to do that + input_fastq = input_sample_type.fastq.mix(CONVERT_FASTQ_INPUT.out.reads) + + + + // STEP 1.B: QC + if (!(params.skip_tools && params.skip_tools.split(',').contains('fastqc'))) { + FASTQC(input_fastq) + + reports = reports.mix(FASTQC.out.zip.collect{ meta, logs -> logs }) + versions = versions.mix(FASTQC.out.versions.first()) + } + + + + // STEP 1.C: Trimming and/or splitting + if (params.trim_fastq || params.split_fastq > 0) { + + save_trimmed_fail = false + save_merged = false + FASTP( + input_fastq, + [], // we are not using any adapter fastas at the moment + save_trimmed_fail, + save_merged + ) + + reports = reports.mix(FASTP.out.json.collect{ meta, json -> json }) + reports = reports.mix(FASTP.out.html.collect{ meta, html -> html }) + + if (params.split_fastq) { + reads_for_alignment = FASTP.out.reads.map{ meta, reads -> + read_files = reads.sort(false) { a,b -> a.getName().tokenize('.')[0] <=> b.getName().tokenize('.')[0] }.collate(2) + [ meta + [ size:read_files.size() ], read_files ] + }.transpose() + } else reads_for_alignment = FASTP.out.reads + + versions = versions.mix(FASTP.out.versions) + + } else { + reads_for_alignment = input_fastq + } + + // STEP 1.D: MAPPING READS TO REFERENCE GENOME + // Generate mapped reads channel for alignment + // reads will be sorted + reads_for_alignment = reads_for_alignment.map{ meta, reads -> + // Update meta.id to meta.sample no multiple lanes or splitted fastqs + if (meta.size * meta.num_lanes == 1) [ meta + [ id:meta.sample ], reads ] + else [ meta, reads ] + } + // Separate DNA from RNA samples, DNA samples will be aligned with bwa, and RNA samples with star + reads_for_alignment_status = reads_for_alignment.branch{ + dna: it[0].status < 2 + rna: it[0].status == 2 + } + + // STEP 1.D.1: DNA mapping with BWA + sort_bam = true + FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP(reads_for_alignment_status.dna, index_alignement, sort_bam) + + // Grouping the bams from the same samples not to stall the workflow + bam_mapped_dna = FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP.out.bam.map{ meta, bam -> + + // Update meta.id to be meta.sample, ditching sample-lane that is not needed anymore + // Update meta.data_type + // Remove no longer necessary fields: + // read_group: Now in the BAM header + // num_lanes: only needed for mapping + // size: only needed for mapping + + // Use groupKey to make sure that the correct group can advance as soon as it is complete + // and not stall the workflow until all reads from all channels are mapped + [ groupKey( meta - meta.subMap('num_lanes', 'read_group', 'size') + [ data_type:'bam', id:meta.sample ], (meta.num_lanes ?: 1) * (meta.size ?: 1)), bam ] + }.groupTuple() + bam_mapped_dna,dump(tag:"bam_mapped_dna") + + // RNA will be aligned with STAR + // Run STAR + ALIGN_STAR ( + ch_reads_to_map_status.rna, + star_index, + gtf, + params.star_ignore_sjdbgtf, + params.seq_platform ? params.seq_platform : [], + params.seq_center ? params.seq_center : [], + [ [ id:"fasta" ], [] ] // fasta + ) + // Grouping the bams from the same samples not to stall the workflow + bam_mapped_rna = ALIGN_STAR.out.bam.map{ meta, bam -> + + // Update meta.id to be meta.sample, ditching sample-lane that is not needed anymore + // Update meta.data_type + // Remove no longer necessary fields: + // read_group: Now in the BAM header + // num_lanes: only needed for mapping + // size: only needed for mapping + + // Use groupKey to make sure that the correct group can advance as soon as it is complete + // and not stall the workflow until all reads from all channels are mapped + [ groupKey( meta - meta.subMap('num_lanes', 'read_group', 'size') + [ data_type:'bam', id:meta.sample ], (meta.num_lanes ?: 1) * (meta.size ?: 1)), bam ] + }.groupTuple() + bam_mapped_rna,dump(tag:"bam_mapped_rna") + // Gather QC reports + reports = reports.mix(ALIGN_STAR.out.stats.collect{it[1]}.ifEmpty([])) + reports = reports.mix(ALIGN_STAR.out.log_final.collect{it[1]}.ifEmpty([])) + versions = versions.mix(ALIGN_STAR.out.versions) + + // mix dna and rna in one channel + bam_mapped = bam_mapped_dna.mix(bam_mapped_rna) + + // gatk4 markduplicates can handle multiple bams as input, so no need to merge/index here + // Except if and only if skipping markduplicates or saving mapped bams + if (params.save_mapped || (params.skip_tools && params.skip_tools.split(',').contains('markduplicates'))) { + + // bams are merged (when multiple lanes from the same sample), indexed and then converted to cram + BAM_MERGE_INDEX_SAMTOOLS(bam_mapped) + + BAM_TO_CRAM_MAPPING(BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai, fasta, fasta_fai) + // Create CSV to restart from this step + params.save_output_as_bam ? CHANNEL_ALIGN_CREATE_CSV(BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai) : CHANNEL_ALIGN_CREATE_CSV(BAM_TO_CRAM_MAPPING.out.alignment_index) + + // Gather used softwares versions + versions = versions.mix(BAM_MERGE_INDEX_SAMTOOLS.out.versions) + versions = versions.mix(BAM_TO_CRAM_MAPPING.out.versions) + } + + // Gather used softwares versions + versions = versions.mix(CONVERT_FASTQ_INPUT.out.versions) + versions = versions.mix(FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP.out.versions) + versions = versions.mix(ALIGN_STAR.out.versions) + + } + + emit: + // TODO: do I need to output RNA and DNA separately or cam I directly use bam_mapped but separating them? + bam_mapped_rna = bam_mapped_rna //second pass with RG tags + bam_mapped_dna = bam_mapped_dna // second pass with RG tags + bam_mapped = bam_mapped // for preprocessing + reports = reports + versions = versions + +} \ No newline at end of file diff --git a/subworkflows/nf-core/alignment_to_fastq.nf b/subworkflows/local/bam_convert_samtools/main.nf similarity index 87% rename from subworkflows/nf-core/alignment_to_fastq.nf rename to subworkflows/local/bam_convert_samtools/main.nf index b9e4341..ed1f659 100644 --- a/subworkflows/nf-core/alignment_to_fastq.nf +++ b/subworkflows/local/bam_convert_samtools/main.nf @@ -2,14 +2,14 @@ // BAM/CRAM to FASTQ conversion, paired end only // -include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_MAP_MAP } from '../../modules/nf-core/modules/samtools/view/main' -include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_UNMAP_UNMAP } from '../../modules/nf-core/modules/samtools/view/main' -include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_UNMAP_MAP } from '../../modules/nf-core/modules/samtools/view/main' -include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_MAP_UNMAP } from '../../modules/nf-core/modules/samtools/view/main' -include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_UNMAP } from '../../modules/nf-core/modules/samtools/merge/main' -include { SAMTOOLS_COLLATEFASTQ as COLLATE_FASTQ_UNMAP } from '../../modules/nf-core/modules/samtools/collatefastq/main' -include { SAMTOOLS_COLLATEFASTQ as COLLATE_FASTQ_MAP } from '../../modules/nf-core/modules/samtools/collatefastq/main' -include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_MAP_MAP } from '../../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_UNMAP_UNMAP } from '../../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_UNMAP_MAP } from '../../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_MAP_UNMAP } from '../../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_UNMAP } from '../../../modules/nf-core/samtools/merge/main' +include { SAMTOOLS_COLLATEFASTQ as COLLATE_FASTQ_UNMAP } from '../../../modules/nf-core/samtools/collatefastq/main' +include { SAMTOOLS_COLLATEFASTQ as COLLATE_FASTQ_MAP } from '../../../modules/nf-core/samtools/collatefastq/main' +include { CAT_FASTQ } from '../../../modules/nf-core/cat/fastq/main' workflow BAM_CONVERT_SAMTOOLS { take: diff --git a/subworkflows/local/bam_merge_index_samtools/main.nf b/subworkflows/local/bam_merge_index_samtools/main.nf index 6aa5d93..9e8735f 100644 --- a/subworkflows/local/bam_merge_index_samtools/main.nf +++ b/subworkflows/local/bam_merge_index_samtools/main.nf @@ -4,8 +4,8 @@ // For all modules here: // A when clause condition is defined in the conf/modules.config to determine if the module should be run -include { SAMTOOLS_INDEX as INDEX_MERGE_BAM } from '../../../modules/nf-core/modules/samtools/index/main' -include { SAMTOOLS_MERGE as MERGE_BAM } from '../../../modules/nf-core/modules/samtools/merge/main' +include { SAMTOOLS_INDEX as INDEX_MERGE_BAM } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_MERGE as MERGE_BAM } from '../../../modules/nf-core/samtools/merge/main' workflow BAM_MERGE_INDEX_SAMTOOLS { take: @@ -15,14 +15,12 @@ workflow BAM_MERGE_INDEX_SAMTOOLS { versions = Channel.empty() // Figuring out if there is one or more bam(s) from the same sample - bam.dump(tag:"bam") bam_to_merge = bam.branch{ meta, bam -> // bam is a list, so use bam.size() to asses number of intervals single: bam.size() <= 1 return [ meta, bam[0] ] multiple: bam.size() > 1 } - bam_to_merge.dump(tag:"bam_to_merge") // Only when using intervals MERGE_BAM(bam_to_merge.multiple, [ [ id:'null' ], []], [ [ id:'null' ], []]) diff --git a/subworkflows/local/channel_align_create_csv/main.nf b/subworkflows/local/channel_align_create_csv/main.nf new file mode 100644 index 0000000..9ab83f5 --- /dev/null +++ b/subworkflows/local/channel_align_create_csv/main.nf @@ -0,0 +1,23 @@ +// +// CHANNEL_ALIGN_CREATE_CSV +// + +workflow CHANNEL_ALIGN_CREATE_CSV { + take: + bam_indexed // channel: [mandatory] meta, bam, bai + + main: + // Creating csv files to restart from this step + bam_indexed.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/csv") { meta, bam, bai -> + patient = meta.patient + sample = meta.sample + status = meta.status + bam = "${params.outdir}/preprocessing/mapped/${sample}/${bam.name}" + bai = "${params.outdir}/preprocessing/mapped/${sample}/${bai.name}" + + type = params.save_output_as_bam ? "bam" : "cram" + type_index = params.save_output_as_bam ? "bai" : "crai" + + ["mapped.csv", "patient,sex,status,sample,${type},${type_index}\n${patient},${status},${sample},${bam},${bai}\n"] + } +} \ No newline at end of file diff --git a/subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf b/subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf new file mode 100644 index 0000000..d7ee78e --- /dev/null +++ b/subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf @@ -0,0 +1,46 @@ +// +// MAPPING +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { BWAMEM2_MEM } from '../../../modules/nf-core/bwamem2/mem/main' +include { BWA_MEM as BWAMEM1_MEM } from '../../../modules/nf-core/bwa/mem/main' +include { DRAGMAP_ALIGN } from '../../../modules/nf-core/dragmap/align/main' + +workflow FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP { + take: + reads // channel: [mandatory] meta, reads + index // channel: [mandatory] index + sort // boolean: [mandatory] true -> sort, false -> don't sort + + main: + + versions = Channel.empty() + reports = Channel.empty() + + // Only one of the following should be run + BWAMEM1_MEM(reads, index.map{ it -> [ [ id:'index' ], it ] }, sort) // If aligner is bwa-mem + BWAMEM2_MEM(reads, index.map{ it -> [ [ id:'index' ], it ] }, sort) // If aligner is bwa-mem2 + DRAGMAP_ALIGN(reads, index.map{ it -> [ [ id:'index' ], it ] }, sort) // If aligner is dragmap + + // Get the bam files from the aligner + // Only one aligner is run + bam = Channel.empty() + bam = bam.mix(BWAMEM1_MEM.out.bam) + bam = bam.mix(BWAMEM2_MEM.out.bam) + bam = bam.mix(DRAGMAP_ALIGN.out.bam) + + // Gather reports of all tools used + reports = reports.mix(DRAGMAP_ALIGN.out.log) + + // Gather versions of all tools used + versions = versions.mix(BWAMEM1_MEM.out.versions) + versions = versions.mix(BWAMEM2_MEM.out.versions) + versions = versions.mix(DRAGMAP_ALIGN.out.versions) + + emit: + bam // channel: [ [meta], bam ] + reports + versions // channel: [ versions.yml ] +} \ No newline at end of file diff --git a/subworkflows/local/mapping.nf b/subworkflows/local/mapping.nf deleted file mode 100644 index d4b0c65..0000000 --- a/subworkflows/local/mapping.nf +++ /dev/null @@ -1,199 +0,0 @@ -include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_INPUT } from '../nf-core/alignment_to_fastq' -include { RUN_FASTQC } from '../nf-core/run_fastqc' -include { FASTP } from '../../modules/nf-core/modules/fastp/main' -include { GATK4_MAPPING } from '../nf-core/gatk4/mapping/main' -include { ALIGN_STAR } from '../nf-core/align_star' -include { BAM_MERGE_INDEX_SAMTOOLS } from '../nf-core/merge_index_bam' -include { MAPPING_CSV } from '../local/mapping_csv' - - - -workflow MAPPING { - - take: - bwa - bwamem2 - dragmap - star_index - gtf - ch_input_sample - - main: - ch_reports = Channel.empty() - ch_versions = Channel.empty() - - // Gather index for mapping given the chosen aligner - ch_map_index = params.aligner == "bwa-mem" ? bwa : params.aligner == "bwa-mem2" ? bwamem2 : dragmap - - if (params.step in ['mapping']) { - // Separate input in bam or fastq - ch_input_sample.branch{ - bam: it[0].data_type == "bam" - fastq: it[0].data_type == "fastq" - }.set{ch_input_sample_type} - - // STEP 1.A: convert any bam input to fastq - CONVERT_FASTQ_INPUT(ch_input_sample_type.bam, - [ [ id:"fasta" ], [] ], // fasta - [ [ id:'null' ], [] ], // fasta_fai - false - ) - ch_versions = ch_versions.mix(CONVERT_FASTQ_INPUT.out.versions) - // gather fastq (from input or converted with BAM_TO_FASTQ) - // Theorically this could work on mixed input (fastq for one sample and bam for another) - ch_input_fastq = ch_input_sample_type.fastq.mix(CONVERT_FASTQ_INPUT.out.reads) - - - // STEP 1.B: QC - if (!(params.skip_tools && params.skip_tools.split(',').contains('fastqc'))) { - RUN_FASTQC(ch_input_fastq) - ch_reports = ch_reports.mix(RUN_FASTQC.out.fastqc_zip.collect{meta, logs -> logs}) - ch_versions = ch_versions.mix(RUN_FASTQC.out.versions) - } - - - // STEP 1.C: Trimming and/or splitting - if (params.trim_fastq || params.split_fastq > 0) { - // Call FASTP for trimming - FASTP(ch_input_fastq, params.save_trimmed_fail, params.save_merged_fastq) - ch_reports = ch_reports.mix( - FASTP.out.json.collect{meta, json -> json}, - FASTP.out.html.collect{meta, html -> html} - ) - // Map channel by split group - if(params.split_fastq){ - ch_reads_to_map = FASTP.out.reads.map{ key, reads -> - read_files = reads.collate(2) // removed sorting because gace concurrent error - it looks like files are sorted already - [[ - data_type:key.data_type, - id:key.id, - numLanes:key.numLanes, - patient: key.patient, - read_group:key.read_group, - sample:key.sample, - size:read_files.size(), - status:key.status, - ], - read_files] - }.transpose() - } else { - ch_reads_to_map = FASTP.out.reads - } - - ch_versions = ch_versions.mix(FASTP.out.versions) - } else { - ch_reads_to_map = ch_input_fastq - } - - - // STEP 1.D: MAPPING READS TO REFERENCE GENOME - // Generate mapped reads channel for alignment - ch_reads_to_map = ch_reads_to_map.map{ meta, reads -> - // update ID when no multiple lanes or splitted fastqs - new_id = meta.size * meta.numLanes == 1 ? meta.sample : meta.id - - [[ - data_type: meta.data_type, - id: new_id, - numLanes: meta.numLanes, - patient: meta.patient, - read_group: meta.read_group, - sample: meta.sample, - size: meta.size, - status: meta.status, - ], - reads] - } - // Separate DNA from RNA samples, DNA samples will be aligned with bwa, and RNA samples with star - ch_reads_to_map.branch{ - dna: it[0].status < 2 - rna: it[0].status == 2 - }.set{ch_reads_to_map_status} - - // STEP 1.D.1: DNA mapping with BWA - sort_bam = true // TODO: set up as parameter - GATK4_MAPPING(ch_reads_to_map_status.dna, ch_map_index, sort_bam) - ch_versions = ch_versions.mix(GATK4_MAPPING.out.versions) - // Grouping the bams from the same samples - bwa_bams = GATK4_MAPPING.out.bam - ch_bam_mapped_dna = GATK4_MAPPING.out.bam.map{ meta, bam -> - [ groupKey( meta - meta.subMap('num_lanes', 'read_group', 'size') + [ data_type:'bam', id:meta.sample ], (meta.num_lanes ?: 1) * (meta.size ?: 1)), bam ] - - }.groupTuple() - - - // RNA will be aligned with STAR - // ch_reads_to_map_rna = ch_reads_to_map_status.rna.map{ meta, reads -> [meta, reads] } - // STAR - ALIGN_STAR ( - ch_reads_to_map_status.rna, - star_index, - gtf, - params.star_ignore_sjdbgtf, - params.seq_platform ? params.seq_platform : [], - params.seq_center ? params.seq_center : [], - [ [ id:"fasta" ], [] ] // fasta - ) - // Grouping the bams from the same samples not to stall the workflow - star_bams = ALIGN_STAR.out.bam.groupTuple(sort: true) - ch_bam_mapped_rna = ALIGN_STAR.out.bam.map{ meta, bam -> - [ groupKey( meta - meta.subMap('num_lanes', 'read_group', 'size') + [ data_type:'bam', id:meta.sample ], (meta.num_lanes ?: 1) * (meta.size ?: 1)), bam ] - }.groupTuple() - // Gather QC reports - ch_reports = ch_reports.mix(ALIGN_STAR.out.stats.collect{it[1]}.ifEmpty([])) - ch_reports = ch_reports.mix(ALIGN_STAR.out.log_final.collect{it[1]}.ifEmpty([])) - ch_versions = ch_versions.mix(ALIGN_STAR.out.versions) - - // mix dna and rna in one channel - ch_bam_mapped = ch_bam_mapped_dna.mix(ch_bam_mapped_rna) - // Grouping the bams from the same samples not to stall the workflow - bam_mapped = ch_bam_mapped.map{ meta, bam -> - - // Update meta.id to be meta.sample, ditching sample-lane that is not needed anymore - // Update meta.data_type - // Remove no longer necessary fields: - // read_group: Now in the BAM header - // num_lanes: only needed for mapping - // size: only needed for mapping - - // Use groupKey to make sure that the correct group can advance as soon as it is complete - // and not stall the workflow until all reads from all channels are mapped - [ groupKey( meta - meta.subMap('num_lanes', 'read_group', 'size') + [ data_type:'bam', id:meta.sample ], (meta.num_lanes ?: 1) * (meta.size ?: 1)), - bam ] - }.groupTuple().map{meta, bam -> [meta, bam.flatten()]} - // gatk4 markduplicates can handle multiple bams as input, so no need to merge/index here - // Except if and only if skipping markduplicates or saving mapped bams - if (params.save_bam_mapped || (params.skip_tools && params.skip_tools.split(',').contains('markduplicates'))) { - // bams are merged (when multiple lanes from the same sample), indexed and then converted to cram - bam_mapped.dump(tag:"bam_mapped") - BAM_MERGE_INDEX_SAMTOOLS(bam_mapped) - // Create CSV to restart from this step - MAPPING_CSV(BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai.transpose()) - - // Gather used softwares versions - ch_versions = ch_versions.mix(BAM_MERGE_INDEX_SAMTOOLS.out.versions) - } - } - else { - ch_input_sample.branch{ - bam: it[0].data_type == "bam" - fastq: it[0].data_type == "fastq" - }.set{ch_input_sample_type} - ch_bam_mapped = ch_input_sample_type.bam - - ch_bam_mapped.branch{ - rna: it[0].status >= 2 - dna: it[0].status < 2 - }.set{ch_input_sample_class} - star_bams = ch_input_sample_class.rna - bwa_bams = ch_input_sample_class.dna - } - - emit: - star_bams = star_bams //second pass with RG tags - bwa_bams = bwa_bams // second pass with RG tags - ch_bam_mapped = bam_mapped // for preprocessing - reports = ch_reports - versions = ch_versions - -} \ No newline at end of file diff --git a/subworkflows/local/mapping_csv.nf b/subworkflows/local/mapping_csv.nf deleted file mode 100644 index a9d8c32..0000000 --- a/subworkflows/local/mapping_csv.nf +++ /dev/null @@ -1,21 +0,0 @@ -// -// MAPPING_CSV -// - -workflow MAPPING_CSV { - take: - bam_indexed // channel: [mandatory] meta, bam, bai - - main: - // Creating csv files to restart from this step - bam_indexed.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/csv") { meta, bam, bai -> - id = meta.id - patient = meta.patient - sample = meta.sample - status = meta.status - lane = meta.lane - bam = "${params.outdir}/preprocessing/mapped/${patient}/${id}/${bam.name}" - bai = "${params.outdir}/preprocessing/mapped/${patient}/${id}/${bai.name}" - ["mapped.csv", "patient,status,sample,lane,fastq_1,fastq_2,bam,bai,cram,crai,table,vcf\n${patient},${status},${sample},${lane},,,${bam},${bai},,,,\n"] - } -} diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf deleted file mode 100644 index 43d9a58..0000000 --- a/subworkflows/local/prepare_genome.nf +++ /dev/null @@ -1,189 +0,0 @@ -// -// PREPARE GENOME -// - -// Initialize channels based on params or indices that were just built -// For all modules here: -// A when clause condition is defined in the conf/modules.config to determine if the module should be run -// Condition is based on params.step and params.tools -// If and extra condition exists, it's specified in comments - -include { BWA_INDEX as BWAMEM1_INDEX } from '../../modules/nf-core/modules/bwa/index/main' -include { BWAMEM2_INDEX } from '../../modules/nf-core/modules/bwamem2/index/main' -include { DRAGMAP_HASHTABLE } from '../../modules/nf-core/modules/dragmap/hashtable/main' -include { GTF2BED } from '../../modules/local/gtf2bed' //addParams(options: params.genome_options) -include { GUNZIP as GUNZIP_GENE_BED } from '../../modules/nf-core/modules/gunzip/main' //addParams(options: params.genome_options) -include { STAR_GENOMEGENERATE } from '../../modules/nf-core/modules/star/genomegenerate/main' //addParams(options: params.star_index_options) -include { GATK4_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/modules/gatk4/createsequencedictionary/main' -include { SAMTOOLS_FAIDX } from '../../modules/nf-core/modules/samtools/faidx/main' -include { TABIX_TABIX as TABIX_DBSNP } from '../../modules/nf-core/modules/tabix/tabix/main' -include { TABIX_TABIX as TABIX_GERMLINE_RESOURCE } from '../../modules/nf-core/modules/tabix/tabix/main' -include { TABIX_TABIX as TABIX_KNOWN_INDELS } from '../../modules/nf-core/modules/tabix/tabix/main' -include { TABIX_TABIX as TABIX_KNOWN_SNPS } from '../../modules/nf-core/modules/tabix/tabix/main' -include { TABIX_TABIX as TABIX_PON } from '../../modules/nf-core/modules/tabix/tabix/main' -include { UNZIP as UNZIP_ALLELES } from '../../modules/nf-core/modules/unzip/main' -include { UNZIP as UNZIP_LOCI } from '../../modules/nf-core/modules/unzip/main' -include { UNZIP as UNZIP_GC } from '../../modules/nf-core/modules/unzip/main' -include { UNZIP as UNZIP_RT } from '../../modules/nf-core/modules/unzip/main' -include { HISAT2_EXTRACTSPLICESITES } from '../../modules/nf-core/modules/hisat2/extractsplicesites/main' -include { HISAT2_BUILD } from '../../modules/nf-core/modules/hisat2/build/main' - -workflow PREPARE_GENOME { - take: - dbsnp // channel: [optional] dbsnp - fasta // channel: [mandatory] fasta - fasta_fai // channel: [optional] fasta_fai - germline_resource // channel: [optional] germline_resource - known_indels // channel: [optional] known_indels - known_snps - pon // channel: [optional] pon - - - main: - - ch_versions = Channel.empty() - - - // If aligner is bwa-mem - BWAMEM1_INDEX(fasta) // If aligner is bwa-mem - BWAMEM2_INDEX(fasta) // If aligner is bwa-mem2 - DRAGMAP_HASHTABLE(fasta) // If aligner is dragmap - - GATK4_CREATESEQUENCEDICTIONARY(fasta) - SAMTOOLS_FAIDX(fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] }, [['id':null], []]) - - // - // Uncompress GTF annotation file or create from GFF3 if required - // - ch_gffread_version = Channel.empty() - if (params.gtf) { - if (params.gtf.endsWith('.gz')) { - GUNZIP_GTF ( - Channel.fromPath(params.gtf).map{ it -> [[id:it[0].baseName], it] } - ) - ch_gtf = GUNZIP_GTF.out.gunzip.map{ meta, gtf -> [gtf] }.collect() - ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions) - } else { - ch_gtf = Channel.fromPath(params.gtf).collect() - } - } else if (params.gff) { - if (params.gff.endsWith('.gz')) { - GUNZIP_GFF ( - Channel.fromPath(params.gff).map{ it -> [[id:it[0].baseName], it] } - ) - ch_gff = GUNZIP_GFF.out.gunzip.map{ meta, gff -> [gff] }.collect() - ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions) - } else { - ch_gff = Channel.fromPath(params.gff).collect() - } - - GFFREAD ( - ch_gff - ) - .gtf - .set { ch_gtf } - - ch_versions = ch_versions.mix(GFFREAD.out.versions) - } - - // - // Uncompress exon BED annotation file or create from GTF if required - // - if (params.exon_bed) { - if (params.exon_bed.endsWith('.gz')) { - GUNZIP_GENE_BED ( - Channel.fromPath(params.exon_bed).map{ it -> [[id:it[0].baseName], it] } - ) - ch_gene_bed = GUNZIP_GENE_BED.out.gunzip.map{ meta, bed -> [bed] }.collect() - ch_versions = ch_versions.mix(GUNZIP_GENE_BED.out.versions) - } else { - ch_exon_bed = Channel.fromPath(params.exon_bed).collect() - } - } else { - ch_exon_bed = GTF2BED ( ch_gtf ).bed.collect() - ch_versions = ch_versions.mix(GTF2BED.out.versions) - } - - // - // Uncompress STAR index or generate from scratch if required - // - ch_star_index = Channel.empty() - if (params.star_index) { - if (params.star_index.endsWith('.tar.gz')) { - UNTAR_STAR_INDEX ( - Channel.fromPath(params.star_index).map{ it -> [[id:it[0].baseName], it] } - ) - ch_star_index = UNTAR_STAR_INDEX.out.untar.map{ meta, star_index -> [star_index] }.collect() - ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions) - } else { - ch_star_index = Channel.fromPath(params.star_index).collect() - } - } - else { - STAR_GENOMEGENERATE ( - fasta,ch_gtf - ) - .index - .set { ch_star_index } - ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) - } - // HISAT mandatory for realignment TODO: make opt with arguments - if (params.splicesites) { - ch_splicesites = Channel.fromPath(params.splicesites).collect() - } else{ - ch_splicesites = HISAT2_EXTRACTSPLICESITES ( ch_gtf ).txt - ch_versions = ch_versions.mix(HISAT2_EXTRACTSPLICESITES.out.versions) - - } - - if (params.hisat2_index) { - ch_hisat2_index = Channel.fromPath(params.hisat2_index).collect() - } else{ - ch_hisat2_index = HISAT2_BUILD ( fasta, ch_gtf, ch_splicesites ).index - ch_versions = ch_versions.mix(HISAT2_BUILD.out.versions) - } - - - - - // the following are flattened and mapped in case the user supplies more than one value for the param - // [file1,file2] becomes [[meta1,file1],[meta2,file2]] - // outputs are collected to maintain a single channel for relevant TBI files - TABIX_DBSNP(dbsnp.flatten().map{ it -> [[id:it.baseName], it] }) - TABIX_GERMLINE_RESOURCE(germline_resource.flatten().map{ it -> [[id:it.baseName], it] }) - TABIX_KNOWN_SNPS( known_snps.flatten().map{ it -> [[id:it.baseName], it] } ) - TABIX_KNOWN_INDELS( known_indels.flatten().map{ it -> [[id:it.baseName], it] } ) - TABIX_PON(pon.flatten().map{ it -> [[id:it.baseName], it] }) - - - - // Gather versions of all tools used - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) - ch_versions = ch_versions.mix(BWAMEM1_INDEX.out.versions) - ch_versions = ch_versions.mix(BWAMEM2_INDEX.out.versions) - ch_versions = ch_versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) - ch_versions = ch_versions.mix(TABIX_DBSNP.out.versions) - ch_versions = ch_versions.mix(TABIX_GERMLINE_RESOURCE.out.versions) - ch_versions = ch_versions.mix(TABIX_KNOWN_SNPS.out.versions) - ch_versions = ch_versions.mix(TABIX_KNOWN_INDELS.out.versions) - ch_versions = ch_versions.mix(TABIX_PON.out.versions) - - - emit: - bwa = BWAMEM1_INDEX.out.index // path: bwa/* - bwamem2 = BWAMEM2_INDEX.out.index // path: bwamem2/* - hashtable = DRAGMAP_HASHTABLE.out.hashmap // path: dragmap/* - dbsnp_tbi = TABIX_DBSNP.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: dbsnb.vcf.gz.tbi - dict = GATK4_CREATESEQUENCEDICTIONARY.out.dict // path: genome.fasta.dict - fasta_fai = SAMTOOLS_FAIDX.out.fai.map{ meta, fai -> [fai] } // path: genome.fasta.fai - star_index = ch_star_index // path: star/index/ - gtf = ch_gtf // path: genome.gtf - exon_bed = ch_exon_bed // path: exon.bed - germline_resource_tbi = TABIX_GERMLINE_RESOURCE.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: germline_resource.vcf.gz.tbi - known_snps_tbi = TABIX_KNOWN_SNPS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi - known_indels_tbi = TABIX_KNOWN_INDELS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi - pon_tbi = TABIX_PON.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: pon.vcf.gz.tbi - hisat2_index = ch_hisat2_index // path: hisat2/index/ - splicesites = ch_splicesites // path: genome.splicesites.txt - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf new file mode 100644 index 0000000..1e8f21b --- /dev/null +++ b/subworkflows/local/prepare_genome/main.nf @@ -0,0 +1,189 @@ +// +// PREPARE GENOME +// + +// Initialize channels based on params or indices that were just built +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run +// Condition is based on params.step and params.tools +// If and extra condition exists, it's specified in comments + +include { BWA_INDEX as BWAMEM1_INDEX } from '../../../modules/nf-core/bwa/index/main' +include { BWAMEM2_INDEX } from '../../../modules/nf-core/bwamem2/index/main' +include { DRAGMAP_HASHTABLE } from '../../../modules/nf-core/dragmap/hashtable/main' +include { GTF2BED } from '../../../modules/local/gtf2bed' //addParams(options: params.genome_options) +include { GUNZIP as GUNZIP_GENE_BED } from '../../../modules/nf-core/gunzip/main' //addParams(options: params.genome_options) +include { STAR_GENOMEGENERATE } from '../../../modules/nf-core/star/genomegenerate/main' //addParams(options: params.star_index_options) +include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary/main' +include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main' +include { TABIX_TABIX as TABIX_DBSNP } from '../../../modules/nf-core/tabix/tabix/main' +include { TABIX_TABIX as TABIX_GERMLINE_RESOURCE } from '../../../modules/nf-core/tabix/tabix/main' +include { TABIX_TABIX as TABIX_KNOWN_INDELS } from '../../../modules/nf-core/tabix/tabix/main' +include { TABIX_TABIX as TABIX_KNOWN_SNPS } from '../../../modules/nf-core/tabix/tabix/main' +include { TABIX_TABIX as TABIX_PON } from '../../../modules/nf-core/tabix/tabix/main' +include { HISAT2_EXTRACTSPLICESITES } from '../../../modules/nf-core/hisat2/extractsplicesites/main' +include { HISAT2_BUILD } from '../../../modules/nf-core/hisat2/build/main' + +workflow PREPARE_GENOME { + take: + dbsnp // channel: [optional] dbsnp + fasta // channel: [mandatory] fasta + fasta_fai // channel: [optional] fasta_fai + germline_resource // channel: [optional] germline_resource + known_indels // channel: [optional] known_indels + known_snps // channel: [optional] known_snps + pon // channel: [optional] pon + + + main: + + fasta = fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] } + versions = Channel.empty() + + // If aligner is bwa-mem + BWAMEM1_INDEX(fasta) // If aligner is bwa-mem + BWAMEM2_INDEX(fasta) // If aligner is bwa-mem2 + DRAGMAP_HASHTABLE(fasta) // If aligner is dragmap + + + GATK4_CREATESEQUENCEDICTIONARY(fasta) + SAMTOOLS_FAIDX(fasta, [['id':null], []]) + + + // the following are flattened and mapped in case the user supplies more than one value for the param + // written for KNOWN_INDELS, but preemptively applied to the rest + // [ file1, file2 ] becomes [ [ meta1, file1 ], [ meta2, file2 ] ] + // outputs are collected to maintain a single channel for relevant TBI files + TABIX_DBSNP(dbsnp.flatten().map{ it -> [ [ id:it.baseName ], it ] }) + TABIX_GERMLINE_RESOURCE(germline_resource.flatten().map{ it -> [ [ id:it.baseName ], it ] }) + TABIX_KNOWN_SNPS(known_snps.flatten().map{ it -> [ [ id:it.baseName ], it ] } ) + TABIX_KNOWN_INDELS(known_indels.flatten().map{ it -> [ [ id:it.baseName ], it ] } ) + TABIX_PON(pon.flatten().map{ it -> [ [ id:it.baseName ], it ] }) + + // + // Uncompress GTF annotation file or create from GFF3 if required + // + ch_gffread_version = Channel.empty() + if (params.gtf) { + if (params.gtf.endsWith('.gz')) { + GUNZIP_GTF ( + Channel.fromPath(params.gtf).map{ it -> [[id:it[0].baseName], it] } + ) + ch_gtf = GUNZIP_GTF.out.gunzip.map{ meta, gtf -> [gtf] }.collect() + versions = versions.mix(GUNZIP_GTF.out.versions) + } else { + ch_gtf = Channel.fromPath(params.gtf).collect() + } + } else if (params.gff) { + if (params.gff.endsWith('.gz')) { + GUNZIP_GFF ( + Channel.fromPath(params.gff).map{ it -> [[id:it[0].baseName], it] } + ) + ch_gff = GUNZIP_GFF.out.gunzip.map{ meta, gff -> [gff] }.collect() + versions = versions.mix(GUNZIP_GFF.out.versions) + } else { + ch_gff = Channel.fromPath(params.gff).collect() + } + + GFFREAD ( + ch_gff + ) + .gtf + .set { ch_gtf } + + versions = versions.mix(GFFREAD.out.versions) + } + + // + // Uncompress exon BED annotation file or create from GTF if required + // + if (params.exon_bed) { + if (params.exon_bed.endsWith('.gz')) { + GUNZIP_GENE_BED ( + Channel.fromPath(params.exon_bed).map{ it -> [[id:it[0].baseName], it] } + ) + ch_gene_bed = GUNZIP_GENE_BED.out.gunzip.map{ meta, bed -> [bed] }.collect() + versions = versions.mix(GUNZIP_GENE_BED.out.versions) + } else { + ch_gene_bed = Channel.fromPath(params.exon_bed).collect() + } + } else { + ch_exon_bed = GTF2BED ( ch_gtf ).bed.collect() + versions = versions.mix(GTF2BED.out.versions) + } + + // + // Uncompress STAR index or generate from scratch if required + // + ch_star_index = Channel.empty() + if (params.star_index) { + if (params.star_index.endsWith('.tar.gz')) { + UNTAR_STAR_INDEX ( + Channel.fromPath(params.star_index).map{ it -> [[id:it[0].baseName], it] } + ) + ch_star_index = UNTAR_STAR_INDEX.out.untar.map{ meta, star_index -> [star_index] }.collect() + versions = versions.mix(UNTAR_STAR_INDEX.out.versions) + } else { + ch_star_index = Channel.fromPath(params.star_index).collect() + } + } + else { + STAR_GENOMEGENERATE ( fasta.map{meta, fasta -> fasta},ch_gtf ) + ch_star_index = STAR_GENOMEGENERATE.out.index + versions = versions.mix(STAR_GENOMEGENERATE.out.versions) + } + + + // HISAT2 not necessary if second pass skipped + if (!params.skip_tools.split(',').contains("second_pass")){ + if (params.splicesites) { + ch_splicesites = Channel.fromPath(params.splicesites).collect() + } else{ + ch_splicesites = HISAT2_EXTRACTSPLICESITES ( ch_gtf ).txt + versions = versions.mix(HISAT2_EXTRACTSPLICESITES.out.versions) + + } + + if (params.hisat2_index) { + ch_hisat2_index = Channel.fromPath(params.hisat2_index).collect() + } else{ + ch_hisat2_index = HISAT2_BUILD ( fasta, ch_gtf, ch_splicesites ).index + versions = versions.mix(HISAT2_BUILD.out.versions) + } + } else { + ch_hisat2_index = Channel.empty() + ch_splicesites = Channel.empty() + } + + + // Gather versions of all tools used + versions = versions.mix(SAMTOOLS_FAIDX.out.versions) + versions = versions.mix(BWAMEM1_INDEX.out.versions) + versions = versions.mix(BWAMEM2_INDEX.out.versions) + versions = versions.mix(DRAGMAP_HASHTABLE.out.versions) + versions = versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) + versions = versions.mix(TABIX_DBSNP.out.versions) + versions = versions.mix(TABIX_GERMLINE_RESOURCE.out.versions) + versions = versions.mix(TABIX_KNOWN_SNPS.out.versions) + versions = versions.mix(TABIX_KNOWN_INDELS.out.versions) + versions = versions.mix(TABIX_PON.out.versions) + + + emit: + bwa = BWAMEM1_INDEX.out.index.map{ meta, index -> [index] }.collect() // path: bwa/* + bwamem2 = BWAMEM2_INDEX.out.index.map{ meta, index -> [index] }.collect() // path: bwamem2/* + hashtable = DRAGMAP_HASHTABLE.out.hashmap.map{ meta, index -> [index] }.collect() // path: dragmap/* + dbsnp_tbi = TABIX_DBSNP.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: dbsnb.vcf.gz.tbi + dict = GATK4_CREATESEQUENCEDICTIONARY.out.dict // path: genome.fasta.dict + fasta_fai = SAMTOOLS_FAIDX.out.fai.map{ meta, fai -> [fai] } // path: genome.fasta.fai + germline_resource_tbi = TABIX_GERMLINE_RESOURCE.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: germline_resource.vcf.gz.tbi + known_snps_tbi = TABIX_KNOWN_SNPS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi + known_indels_tbi = TABIX_KNOWN_INDELS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi + pon_tbi = TABIX_PON.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: pon.vcf.gz.tbi + star_index = ch_star_index // path: star/index/ + gtf = ch_gtf // path: genome.gtf + exon_bed = ch_exon_bed // path: exon.bed + hisat2_index = ch_hisat2_index // path: hisat2/index/ + splicesites = ch_splicesites // path: genome.splicesites.txt + versions = versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/prepare_intervals.nf b/subworkflows/local/prepare_intervals/main.nf similarity index 90% rename from subworkflows/local/prepare_intervals.nf rename to subworkflows/local/prepare_intervals/main.nf index 2a84942..2d5e42b 100644 --- a/subworkflows/local/prepare_intervals.nf +++ b/subworkflows/local/prepare_intervals/main.nf @@ -1,11 +1,11 @@ // // PREPARE INTERVALS // -include { BUILD_INTERVALS } from '../../modules/local/build_intervals/main' -include { CREATE_INTERVALS_BED } from '../../modules/local/create_intervals_bed/main' -include { GATK4_INTERVALLISTTOBED } from '../../modules/nf-core/modules/gatk4/intervallisttobed/main' -include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_INTERVAL_SPLIT } from '../../modules/nf-core/modules/tabix/bgziptabix/main' -include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_INTERVAL_COMBINED } from '../../modules/nf-core/modules/tabix/bgziptabix/main' +include { BUILD_INTERVALS } from '../../../modules/local/build_intervals/main' +include { CREATE_INTERVALS_BED } from '../../../modules/local/create_intervals_bed/main' +include { GATK4_INTERVALLISTTOBED } from '../../../modules/nf-core/gatk4/intervallisttobed/main' +include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_INTERVAL_SPLIT } from '../../../modules/nf-core/tabix/bgziptabix/main' +include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_INTERVAL_COMBINED } from '../../../modules/nf-core/tabix/bgziptabix/main' workflow PREPARE_INTERVALS { take: @@ -19,6 +19,7 @@ workflow PREPARE_INTERVALS { intervals_bed = Channel.empty() // List of [ bed, num_intervals ], one for each region intervals_bed_gz_tbi = Channel.empty() // List of [ bed.gz, bed,gz.tbi, num_intervals ], one for each region intervals_combined = Channel.empty() // Single bed file containing all intervals + if (no_intervals) { file("${params.outdir}/no_intervals.bed").text = "no_intervals\n" file("${params.outdir}/no_intervals.bed.gz").text = "no_intervals\n" @@ -43,7 +44,6 @@ workflow PREPARE_INTERVALS { } else { intervals_combined = Channel.fromPath(file(intervals)).map{it -> [ [ id:it.baseName ], it ] } intervals_bed = CREATE_INTERVALS_BED(file(intervals)).bed - intervals_bed.dump(tag:"intervals_bed") versions = versions.mix(CREATE_INTERVALS_BED.out.versions) diff --git a/subworkflows/local/prepare_reference_and_intervals.nf b/subworkflows/local/prepare_reference_and_intervals.nf index 09f63fe..9e4ec35 100644 --- a/subworkflows/local/prepare_reference_and_intervals.nf +++ b/subworkflows/local/prepare_reference_and_intervals.nf @@ -1,10 +1,10 @@ // // PREPARE REFERENCE AND INTERVAL FILES FOR PIPELINE // -include { PREPARE_GENOME } from './prepare_genome' -include { PREPARE_INTERVALS } from './prepare_intervals' -include { GATK4_BEDTOINTERVALLIST } from '../../modules/nf-core/modules/gatk4/bedtointervallist/main' -include { GATK4_INTERVALLISTTOOLS } from '../../modules/nf-core/modules/gatk4/intervallisttools/main' +include { PREPARE_GENOME } from './prepare_genome/main' +include { PREPARE_INTERVALS } from './prepare_intervals/main' +include { GATK4_BEDTOINTERVALLIST } from '../../modules/nf-core/gatk4/bedtointervallist/main' +include { GATK4_INTERVALLISTTOOLS } from '../../modules/nf-core/gatk4/intervallisttools/main' workflow PREPARE_REFERENCE_AND_INTERVALS { @@ -90,13 +90,13 @@ workflow PREPARE_REFERENCE_AND_INTERVALS { } emit: - fasta = fasta - fasta_fai = fasta_fai - dict = dict - bwa = bwa - germline_resource = germline_resource - germline_resource_tbi = germline_resource_tbi - bwamem2 = bwamem2 + fasta = fasta + fasta_fai = fasta_fai + dict = dict + bwa = bwa + germline_resource = germline_resource + germline_resource_tbi = germline_resource_tbi + bwamem2 = bwamem2 dragmap = dragmap star_index = PREPARE_GENOME.out.star_index gtf = PREPARE_GENOME.out.gtf diff --git a/subworkflows/nf-core/align_star.nf b/subworkflows/nf-core/align_star.nf deleted file mode 100644 index 979cd3c..0000000 --- a/subworkflows/nf-core/align_star.nf +++ /dev/null @@ -1,55 +0,0 @@ -// -// Alignment with STAR -// - -include { STAR_ALIGN } from '../../modules/nf-core/modules/star/align/main' -include { BAM_SORT_SAMTOOLS } from './bam_sort_samtools' - -workflow ALIGN_STAR { - take: - reads // channel: [ val(meta), [ reads ] ] - index // channel: /path/to/star/index/ - gtf // channel: /path/to/genome.gtf - star_ignore_sjdbgtf // value: ignore gtf - seq_platform // value: sequencing platform - seq_center // value: sequencing centre - fasta - - main: - - ch_versions = Channel.empty() - // Map reads with STAR - STAR_ALIGN ( - reads, - index, - gtf, - star_ignore_sjdbgtf, - seq_platform, - seq_center - ) - ch_versions = ch_versions.mix(STAR_ALIGN.out.versions.first()) - // Sort, index BAM file and run samtools stats, flagstat and idxstats - BAM_SORT_SAMTOOLS ( - STAR_ALIGN.out.bam, - fasta - ) - ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions.first()) - - emit: - orig_bam = STAR_ALIGN.out.bam // channel: [ val(meta), bam ] - log_final = STAR_ALIGN.out.log_final // channel: [ val(meta), log_final ] - log_out = STAR_ALIGN.out.log_out // channel: [ val(meta), log_out ] - log_progress = STAR_ALIGN.out.log_progress // channel: [ val(meta), log_progress ] - bam_sorted = STAR_ALIGN.out.bam_sorted // channel: [ val(meta), bam_sorted ] - bam_transcript = STAR_ALIGN.out.bam_transcript // channel: [ val(meta), bam_transcript ] - fastq = STAR_ALIGN.out.fastq // channel: [ val(meta), fastq ] - tab = STAR_ALIGN.out.tab // channel: [ val(meta), tab ] - bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] - bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] - csi = BAM_SORT_SAMTOOLS.out.csi // channel: [ val(meta), [ csi ] ] - stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - versions = ch_versions // channel: [ versions.yml ] - -} diff --git a/subworkflows/nf-core/bam_sort_samtools.nf b/subworkflows/nf-core/bam_sort_samtools.nf index 8d8a347..c071268 100644 --- a/subworkflows/nf-core/bam_sort_samtools.nf +++ b/subworkflows/nf-core/bam_sort_samtools.nf @@ -2,9 +2,9 @@ // Sort, index BAM file and run samtools stats, flagstat and idxstats // -include { SAMTOOLS_SORT } from '../../modules/nf-core/modules/samtools/sort/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/modules/samtools/index/main' -include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' +include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools/main' workflow BAM_SORT_SAMTOOLS { take: diff --git a/subworkflows/nf-core/merge_index_bam.nf b/subworkflows/nf-core/merge_index_bam.nf deleted file mode 100644 index b260507..0000000 --- a/subworkflows/nf-core/merge_index_bam.nf +++ /dev/null @@ -1,45 +0,0 @@ -// -// MERGE INDEX BAM -// -// For all modules here: -// A when clause condition is defined in the conf/modules.config to determine if the module should be run - -include { SAMTOOLS_INDEX as INDEX_MERGE_BAM } from '../../modules/nf-core/modules/samtools/index/main' -include { SAMTOOLS_MERGE as MERGE_BAM } from '../../modules/nf-core/modules/samtools/merge/main' - -workflow BAM_MERGE_INDEX_SAMTOOLS { - take: - bam // channel: [mandatory] meta, bam - - main: - versions = Channel.empty() - - // Figuring out if there is one or more bam(s) from the same sample - bam_to_merge = bam.branch{ meta, bam -> - // bam is a list, so use bam.size() to asses number of intervals - single: bam.size() <= 1 - return [ meta, bam[0] ] - multiple: bam.size() > 1 - } - - // Only when using intervals - MERGE_BAM(bam_to_merge.multiple, [ [ id:'null' ], []], [ [ id:'null' ], []]) - - // Mix intervals and no_intervals channels together - bam_all = MERGE_BAM.out.bam.mix(bam_to_merge.single) - - // Index bam - INDEX_MERGE_BAM(bam_all) - - // Join with the bai file - bam_bai = bam_all.join(INDEX_MERGE_BAM.out.bai, failOnDuplicate: true, failOnMismatch: true) - - // Gather versions of all tools used - versions = versions.mix(INDEX_MERGE_BAM.out.versions) - versions = versions.mix(MERGE_BAM.out.versions) - - emit: - bam_bai - - versions -} \ No newline at end of file From fac25eacc62e6a6b0eeffd1cc0b246e4106cd21a Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Mon, 21 Aug 2023 10:05:51 +0100 Subject: [PATCH 12/56] Updating schema and check_samplesheet.py --- assets/schema_input.json | 141 +++++++++++++++++++++++++++++++++++++-- bin/check_samplesheet.py | 2 +- 2 files changed, 135 insertions(+), 8 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 237ad38..2775191 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -7,15 +7,47 @@ "items": { "type": "object", "properties": { + "patient": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Patient ID must be provided and cannot contain spaces", + "meta": ["patient"] + }, "sample": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "errorMessage": "Sample ID must be provided and cannot contain spaces", + "meta": ["sample"] }, - "fastq_1": { + "status": { + "type": "integer", + "errorMessage": "Status can only be 0 (normal), 1 (tumor) or 2 (rna tumour). Defaults to 1, if none is supplied.", + "meta": ["status"], + "default": "1", + "minimum": 0, + "maximum": 2 + }, + "lane": { "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "pattern": "^\\S+$", + "unique": ["patient", "sample"], + "dependentRequired": ["fastq_1"], + "meta": ["lane"] + }, + "fastq_1": { + "errorMessage": "FastQ file for reads 1 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz$" + }, + { + "type": "string", + "maxLength": 0 + } + ], + "format": "file-path", + "exists": true }, "fastq_2": { "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", @@ -28,9 +60,104 @@ "type": "string", "maxLength": 0 } - ] + ], + "format": "file-path", + "exists": true + }, + "table": { + "errorMessage": "Recalibration table cannot contain spaces and must have extension '.table'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.table$" + }, + { + "type": "string", + "maxLength": 0 + } + ], + "format": "file-path", + "exists": true + }, + "cram": { + "errorMessage": "CRAM file cannot contain spaces and must have extension '.cram'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.cram$" + }, + { + "type": "string", + "maxLength": 0 + } + ], + "format": "file-path", + "exists": true + }, + "crai": { + "errorMessage": "CRAM index file cannot contain spaces and must have extension '.crai'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.crai$" + }, + { + "type": "string", + "maxLength": 0 + } + ], + "format": "file-path", + "exists": true + }, + "bam": { + "errorMessage": "BAM file cannot contain spaces and must have extension '.bam'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.bam$" + }, + { + "type": "string", + "maxLength": 0 + } + ], + "format": "file-path", + "exists": true + }, + "bai": { + "errorMessage": "BAM index file cannot contain spaces and must have extension '.bai'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.bai$" + }, + { + "type": "string", + "maxLength": 0 + } + ], + "format": "file-path", + "exists": true + }, + "vcf": { + "errorMessage": "VCF file for reads 1 cannot contain spaces and must have extension '.vcf' or '.vcf.gz'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.vcf(\\.gz)?$" + }, + { + "type": "string", + "maxLength": 0 + } + ], + "format": "file-path", + "exists": true + }, + "variantcaller": { + "type": "string" } }, - "required": ["sample", "fastq_1"] + "required": ["patient", "sample"] } -} +} \ No newline at end of file diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 7fae8ed..b1de246 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -71,7 +71,7 @@ def validate_and_transform(self, row): Perform all validations on the given row and insert the read pairing status. Args: - row (dict): A mapping from column headers (keys) to elements of that row + row (dict): A bam_align from column headers (keys) to elements of that row (values). """ From 770495e59888da772c6ded32fb0b2ff4f3e61cf5 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Mon, 21 Aug 2023 10:07:02 +0100 Subject: [PATCH 13/56] Starting to generate separated config files. --- conf/modules/bam_align/bam_align.config | 174 ++++++++++++++++++ .../prepare_genome.config | 138 ++++++++++++++ .../prepare_intervals.config | 45 +++++ .../main.nf | 161 ++++++++++++++++ 4 files changed, 518 insertions(+) create mode 100644 conf/modules/bam_align/bam_align.config create mode 100644 conf/modules/prepare_genome_and_intervals/prepare_genome.config create mode 100644 conf/modules/prepare_genome_and_intervals/prepare_intervals.config create mode 100644 subworkflows/local/bam_variant_calling_pre_post_processing/main.nf diff --git a/conf/modules/bam_align/bam_align.config b/conf/modules/bam_align/bam_align.config new file mode 100644 index 0000000..2888658 --- /dev/null +++ b/conf/modules/bam_align/bam_align.config @@ -0,0 +1,174 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +// BAM_ALIGN config + +process { + + if (params.step == 'mapping'){ + + // DNA aligners + + withName: "BWAMEM1_MEM" { + ext.when = { params.aligner == "bwa-mem" } + } + + withName: "BWAMEM2_MEM" { + ext.when = { params.aligner == "bwa-mem2" } + } + + withName: "DRAGMAP_ALIGN" { + ext.when = { params.aligner == "dragmap" } + ext.args = { "--RGSM ${meta.patient}_${meta.sample} --RGID ${meta.read_group}" } + } + + withName: "(BWAMEM.*_MEM|DRAGMAP_ALIGN)" { + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.tokenize('.')[0]) : "${meta.id}.sorted" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/" }, + pattern: "*bam", + // Only save if save_output_as_bam AND + // (save_mapped OR no_markduplicates OR sentieon_dedup) AND + // only a single BAM file per sample + saveAs: { + if (params.save_output_as_bam && + ( + params.save_mapped || + (params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) + ) && (meta.size * meta.num_lanes == 1) + ) { "mapped/${meta.id}/${it}" } + else { null } + } + ] + } + + withName: "(BWAMEM.*_MEM|DRAGMAP_ALIGN)" { + // Markduplicates Spark NEEDS name-sorted reads or runtime goes through the roof + // However if it's skipped, reads need to be coordinate-sorted + // Only name sort if Spark for Markduplicates + duplicate marking is not skipped + ext.args2 = { (!params.skip_tools || (params.skip_tools && !params.skip_tools.split(',').contains('markduplicates'))) ? '-n' : '' } + } + + withName: "BWAMEM.*_MEM|SENTIEON_BWAMEM" { + // Using -B 3 for tumor samples + ext.args = { meta.status == 1 ? "-K 100000000 -Y -B 3 -R ${meta.read_group}" : "-K 100000000 -Y -R ${meta.read_group}" } + } + } + + + withName: 'MERGE_BAM|INDEX_MERGE_BAM' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/" }, + pattern: "*{bam,bai}", + // Only save if (save_output_as_bam AND (no_markduplicates OR save_mapped )) + saveAs: { (params.save_output_as_bam && (params.save_mapped || params.skip_tools && params.skip_tools.split(',').contains('markduplicates'))) ? "mapped/${meta.id}/${it}" : null } + ] + } + + withName: 'MERGE_BAM' { + ext.prefix = { "${meta.id}.sorted" } + } + + + // RNA aligners + withName: 'STAR_GENOMEGENERATE' { + ext.args = params.read_length ? "--sjdbOverhang ${params.read_length - 1}" : '' + } + + withName: 'UNTAR_.*|STAR_GENOMEGENERATE|HISAT2_BUILD|HISAT2_EXTRACTSPLICESITES' { + publishDir = [ + enabled: params.save_reference, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/index" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: STAR_ALIGN { + ext.args = [ + '--outSAMtype BAM SortedByCoordinate', + '--readFilesCommand zcat', + '--outFilterMultimapScoreRange 1', + '--outFilterMultimapNmax 20', + '--outFilterMismatchNmax 10', + '--alignMatesGapMax 1000000', + '--sjdbScore 2', + '--alignSJDBoverhangMin 1', + '--genomeLoad NoSharedMemory', + '--outFilterMatchNminOverLread 0.33', + '--outFilterScoreMinOverLread 0.33', + '--twopass1readsN -1', + params.save_unaligned ? '--outReadsUnmapped Fastx' : '', + params.read_length ? "--sjdbOverhang ${params.read_length - 1}" : '', + params.star_twopass ? '--twopassMode Basic' : '', + params.star_max_memory_bamsort > 0 ? "--limitBAMsortRAM ${params.star_max_memory_bamsort}" : "", + params.star_bins_bamsort > 0 ? "--outBAMsortingBinsN ${params.star_bins_bamsort}" : "", + params.star_max_collapsed_junc > 0 ? "--limitOutSJcollapsed ${params.star_max_collapsed_junc}" : "" + ].join(' ').trim() + ext.args2 = { "--outSAMattrRGline ${meta.read_group}" } + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.tokenize('.')[0]) : "" } + publishDir = [ + [ + path: { "${params.outdir}/reports/star/${meta.patient}/${meta.id}/" }, + mode: params.publish_dir_mode, + pattern: '*.{out,tab}', + enabled: params.save_align_intermeds + ], + [ + path: { "${params.outdir}/preprocessing/star/${meta.patient}/${meta.id}/mapped/" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: params.save_align_intermeds + ], + [ + path: { "${params.outdir}/preprocessing/star/${meta.patient}/${meta.id}/unmapped/" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_align_intermeds + ] + ] + } + + // HISAT2 for second run + withName: '.*:ALIGN_HISAT2:HISAT2_ALIGN' { + ext.args = '--met-stderr --new-summary' + publishDir = [ + [ + path: { "${params.outdir}/report/hisat2/${meta.patient}/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: '*.log', + enabled: params.save_align_intermeds + ], + [ + path: { "${params.outdir}/preprocessing/hisat2/${meta.patient}/${meta.id}/" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: params.save_align_intermeds + ], + [ + path: { "${params.outdir}/preprocessing/hisat2/${meta.patient}/${meta.id}/unmapped" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_unaligned + ] + ] + } + + } + + + + + +} \ No newline at end of file diff --git a/conf/modules/prepare_genome_and_intervals/prepare_genome.config b/conf/modules/prepare_genome_and_intervals/prepare_genome.config new file mode 100644 index 0000000..d218301 --- /dev/null +++ b/conf/modules/prepare_genome_and_intervals/prepare_genome.config @@ -0,0 +1,138 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// PREPARE_GENOME TODO: add stuff and remove redundant code + +process { + + withName: 'BWAMEM1_INDEX' { + ext.when = { !params.bwa && params.step == "mapping" && (params.aligner == "bwa-mem" || params.aligner == "sentieon-bwamem")} + publishDir = [ + enabled: (params.save_reference || params.build_only_index), + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference" }, + pattern: "bwa" + ] + } + + withName: 'BWAMEM2_INDEX' { + ext.when = { !params.bwamem2 && params.step == "mapping" && params.aligner == "bwa-mem2" } + publishDir = [ + enabled: (params.save_reference || params.build_only_index), + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference" }, + pattern: "bwamem2" + ] + } + + withName: 'DRAGMAP_HASHTABLE' { + ext.when = { !params.dragmap && params.step == "mapping" && params.aligner == "dragmap" } + publishDir = [ + enabled: (params.save_reference || params.build_only_index), + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference" }, + pattern: "dragmap" + ] + } + + withName: 'GATK4_CREATESEQUENCEDICTIONARY' { + ext.when = { !params.dict && params.step != "annotate" && params.step != "controlfreec" } + publishDir = [ + enabled: (params.save_reference || params.build_only_index), + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/dict" }, + pattern: "*dict" + ] + } + + withName: 'MSISENSORPRO_SCAN' { + ext.when = { params.tools && params.tools.split(',').contains('msisensorpro') } + publishDir = [ + enabled: (params.save_reference || params.build_only_index), + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/msi" }, + pattern: "*list" + ] + } + + withName: 'SAMTOOLS_FAIDX' { + ext.when = { !params.fasta_fai && params.step != "annotate" } + publishDir = [ + enabled: (params.save_reference || params.build_only_index), + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/fai" }, + pattern: "*fai" + ] + } + + withName: 'TABIX_DBSNP' { + ext.when = { !params.dbsnp_tbi && params.dbsnp && ((params.step == "mapping" || params.step == "markduplicates" || params.step == "prepare_recalibration") || params.tools && (params.tools.split(',').contains('controlfreec') || params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') || params.tools.split(',').contains('mutect2'))) } + publishDir = [ + enabled: (params.save_reference || params.build_only_index), + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/dbsnp" }, + pattern: "*vcf.gz.tbi" + ] + } + + withName: 'TABIX_GERMLINE_RESOURCE' { + ext.when = { !params.germline_resource_tbi && params.germline_resource && params.tools && params.tools.split(',').contains('mutect2') } + publishDir = [ + enabled: (params.save_reference || params.build_only_index), + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/germline_resource" }, + pattern: "*vcf.gz.tbi" + ] + } + + withName: 'TABIX_KNOWN_INDELS' { + ext.when = { !params.known_indels_tbi && params.known_indels && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper'))) ) } + publishDir = [ + enabled: (params.save_reference || params.build_only_index), + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/known_indels" }, + pattern: "*vcf.gz.tbi" + ] + } + + withName: 'TABIX_KNOWN_SNPS' { + ext.when = { !params.known_snps_tbi && params.known_snps && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper'))) ) } + publishDir = [ + enabled: (params.save_reference || params.build_only_index), + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/known_snps" }, + pattern: "*vcf.gz.tbi" + ] + } + + withName: 'TABIX_PON' { + ext.when = { !params.pon_tbi && params.pon && params.tools && params.tools.split(',').contains('mutect2') } + publishDir = [ + enabled: (params.save_reference || params.build_only_index), + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/pon" }, + pattern: "*vcf.gz.tbi" + ] + } + + withName: 'UNZIP_ALLELES|UNZIP_LOCI|UNZIP_GC|UNZIP_RT' { + ext.when = { params.tools && params.tools.split(',').contains('ascat')} + publishDir = [ + enabled: false + ] + } + + withName: 'UNTAR_CHR_DIR' { + ext.when = { params.tools && params.tools.split(',').contains('controlfreec')} + } +} \ No newline at end of file diff --git a/conf/modules/prepare_genome_and_intervals/prepare_intervals.config b/conf/modules/prepare_genome_and_intervals/prepare_intervals.config new file mode 100644 index 0000000..e760a1c --- /dev/null +++ b/conf/modules/prepare_genome_and_intervals/prepare_intervals.config @@ -0,0 +1,45 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// PREPARE INTERVALS + +process { + + withName: 'CREATE_INTERVALS_BED' { + publishDir = [ + enabled: (params.save_reference || params.build_only_index), + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/intervals" }, + pattern: "*bed" + ] + } + + withName: 'GATK4_INTERVALLISTTOBED' { + publishDir = [ + enabled: (params.save_reference || params.build_only_index), + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/intervals" }, + pattern: "*bed" + ] + } + + withName: 'TABIX_BGZIPTABIX_INTERVAL_SPLIT' { + ext.prefix = {"${meta.id}"} + publishDir = [ + enabled: (params.save_reference || params.build_only_index), + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/intervals" }, + pattern: "*bed.gz" + ] + } +} \ No newline at end of file diff --git a/subworkflows/local/bam_variant_calling_pre_post_processing/main.nf b/subworkflows/local/bam_variant_calling_pre_post_processing/main.nf new file mode 100644 index 0000000..bd1fd3d --- /dev/null +++ b/subworkflows/local/bam_variant_calling_pre_post_processing/main.nf @@ -0,0 +1,161 @@ +// +// Core workflow of the RNA/DNA variant calling pipeline +// +include { BAM_GATK_PREPROCESSING } from '../gatk_preprocessing/main' +// For now only matched supported +// include { BAM_VARIANT_CALLING } from '../variant_calling/main' +// // Can we just call normalization here? +// include { VCF_NORMALIZE } from '../normalize_vcf_variants/main' +// // Can we just call the consensus module here? +// include { VCF_CONSENSUS } from '../consensus/main' +// // maybe just call VEP here? +// include { VCF_ANNOTATE } from '../annotate/main' +// include { MAF_BASIC_FILTERING as FILTERING } from '../../../modules/local/filter_variants' + + +workflow BAM_VARIANT_CALLING_PRE_POST_PROCESSING { + take: + step // step to start with + tools + skip_tools + ch_input_sample // input from CSV if applicable + ch_genome_bam // input from mapping + fasta // fasta reference file + fasta_fai // fai for fasta file + dict // + dbsnp + dbsnp_tbi + pon + pon_tbi + germline_resource + germline_resource_tbi + intervals + intervals_for_preprocessing + ch_interval_list_split + intervals_bed_gz_tbi + intervals_bed_combined + vcf_consensus_dna // to repeat rescue consensus + vcfs_status_dna // to repeat rescue consensus + + main: + ch_reports = Channel.empty() + ch_versions = Channel.empty() + ch_genome_bam.dump(tag:"ch_genome_bam") + // STEP 1: Mapping done elsewhere + // STEP 2: GATK PREPROCESSING - See: https://gatk.broadinstitute.org/hc/en-us/articles/360035535912-Data-pre-processing-for-variant-discovery + GATK_PREPROCESSING( + step, // Mandatory, step to start with - should be mapping for second pass + tools, + ch_genome_bam, // channel: [mandatory] [meta, [bam]] + skip_tools, // channel: [mandatory] skip_tools + params.save_output_as_bam, // channel: [mandatory] save_output_as_bam + fasta, // channel: [mandatory] fasta + fasta_fai , // channel: [mandatory] fasta_fai + dict, + germline_resource, // channel: [optional] germline_resource + germline_resource_tbi, // channel: [optional] germline_resource_tbi + intervals, // channel: [mandatory] intervals/target regions + intervals_for_preprocessing, // channel: [mandatory] intervals_for_preprocessing/wes + ch_interval_list_split, + ch_input_sample + ) + + ch_cram_variant_calling = GATK_PREPROCESSING.out.ch_cram_variant_calling + ch_versions = ch_versions.mix(GATK_PREPROCESSING.out.versions) + ch_reports = ch_reports.mix(GATK_PREPROCESSING.out.ch_reports) + + ch_cram_variant_calling.dump(tag:"[STEP8 RNA_FILTERING] ch_cram_variant_calling") + intervals_bed_gz_tbi.dump(tag:"[STEP8 RNA_FILTERING] intervals_bed_gz_tbi") + pon.dump(tag:"[STEP8 RNA_FILTERING] pon") + // STEP 3: VARIANT CALLING +// VARIANT_CALLING( tools, +// ch_cram_variant_calling, +// fasta, +// fasta_fai, +// dbsnp, +// dbsnp_tbi, +// dict, +// germline_resource, +// germline_resource_tbi, +// intervals, +// intervals_bed_gz_tbi, +// intervals_bed_combined, +// pon, +// pon_tbi, +// ch_input_sample +// ) +// cram_vc_pair = VARIANT_CALLING.out.cram_vc_pair // use same crams for force calling later +// vcf_to_normalize = VARIANT_CALLING.out.vcf +// contamination = VARIANT_CALLING.out.contamination_table +// segmentation = VARIANT_CALLING.out.segmentation_table +// orientation = VARIANT_CALLING.out.artifact_priors +// ch_versions = ch_versions.mix(VARIANT_CALLING.out.versions) +// ch_reports = ch_reports.mix(VARIANT_CALLING.out.reports) +// +// +// // STEP 4: NORMALIZE +// NORMALIZE (tools, +// vcf_to_normalize, +// fasta, +// ch_input_sample) +// ch_versions = ch_versions.mix(NORMALIZE.out.versions) +// vcf_normalized = NORMALIZE.out.vcf +// +// +// // STEP 5: ANNOTATE +// ANNOTATE(tools, +// vcf_normalized, // second pass TODO: make it optional +// fasta, +// ch_input_sample // first pass +// ) +// +// ch_versions = ch_versions.mix(ANNOTATE.out.versions) +// ch_reports = ch_reports.mix(ANNOTATE.out.reports) +// +// // STEP 6: CONSENSUS +// CONSENSUS ( tools, +// ANNOTATE.out.maf_ann, +// cram_vc_pair, // from previous variant calling +// dict, +// fasta, +// fasta_fai, +// germline_resource, +// germline_resource_tbi, +// intervals, +// intervals_bed_gz_tbi, +// intervals_bed_combined, +// pon, +// pon_tbi, +// vcf_consensus_dna, // null when first pass +// vcfs_status_dna, // null when first pass +// ch_input_sample, +// contamination, +// segmentation, +// orientation +// ) +// // STEP 7: FILTERING +// if (tools.split(',').contains('filtering')) { +// FILTERING(CONSENSUS.out.maf, fasta) +// +// FILTERING.out.maf.branch{ +// dna: it[0].status < 2 +// rna: it[0].status == 2 +// }.set{filtered_maf} +// filtered_maf_rna = filtered_maf.rna +// filtered_maf_dna = filtered_maf.dna +// } else{ +// filtered_maf = Channel.empty() +// filtered_maf_rna = Channel.empty() +// filtered_maf_dna = Channel.empty() +// +// } +// +// emit: +// vcf_consensus_dna = CONSENSUS.out.vcf_consensus_dna +// vcfs_status_dna = CONSENSUS.out.vcfs_status_dna +// maf = filtered_maf +// maf_rna = filtered_maf_rna +// maf_dna = filtered_maf_dna +// versions = ch_versions // channel: [ versions.yml ] +// reports = ch_reports +} From 912ff434fe55008d502d2b9c6c571e65b5c16fb8 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Thu, 24 Aug 2023 10:32:35 +0100 Subject: [PATCH 14/56] Structured config files into sub-folders for easy access. Removed redundant code and moved subworkflows to correspondent folders. Fixes in rnadnavar.nf workflow and input check. Removed dependency in schema_input.json for lane. Added modules and subwrokflows the proper way. --- assets/schema_input.json | 1 - .../alignment/alignment_to_fastq.config | 87 +++ .../{bam_align => alignment}/bam_align.config | 114 ++- conf/modules/annotate/annotate.config | 69 ++ conf/modules/consensus/normalise.config | 58 ++ conf/modules/consensus/vcf_consensus.config | 54 ++ conf/modules/filtering/maf_filtering.config | 81 +++ .../gatk4_preprocessing/markduplicates.config | 124 ++++ .../prepare_recalibration.config | 38 + .../gatk4_preprocessing/recalibrate.config | 61 ++ .../splitncigarreads.config | 68 ++ .../prepare_resources/prepare_cache.config | 27 + .../prepare_genome.config | 47 +- .../prepare_intervals.config | 21 +- .../quality_control/quality_control.config | 122 ++++ conf/modules/quality_control/trimming.config | 42 ++ conf/modules/variant_calling/freebayes.config | 79 ++ conf/modules/variant_calling/manta.config | 28 + conf/modules/variant_calling/mutect2.config | 128 ++++ conf/modules/variant_calling/strelka.config | 56 ++ lib/WorkflowRnadnavar.groovy | 39 +- modules.json | 5 - modules/nf-core/dragmap/align/main.nf | 46 ++ modules/nf-core/dragmap/align/meta.yml | 47 ++ modules/nf-core/dragmap/hashtable/main.nf | 36 + modules/nf-core/dragmap/hashtable/meta.yml | 40 + modules/nf-core/ensemblvep/download/main.nf | 45 ++ modules/nf-core/ensemblvep/download/meta.yml | 43 ++ nextflow.config | 171 +++-- nextflow_schema.json | 76 +- subworkflows/local/bam_align/main.nf | 18 +- .../main.nf} | 2 +- .../main.nf | 7 +- subworkflows/local/core_workflow_pass.nf | 157 ---- subworkflows/local/prepare_intervals/main.nf | 5 + .../local/prepare_reference_and_intervals.nf | 14 - .../nf-core/bam_markduplicates_picard/main.nf | 52 ++ .../bam_markduplicates_picard/meta.yml | 62 ++ subworkflows/nf-core/bam_qc_picard/main.nf | 45 ++ subworkflows/nf-core/bam_qc_picard/meta.yml | 84 +++ .../nf-core/bam_sort_stats_samtools/main.nf | 50 ++ .../nf-core/bam_sort_stats_samtools/meta.yml | 67 ++ .../nf-core/bam_stats_samtools/main.nf | 32 + .../nf-core/bam_stats_samtools/meta.yml | 41 ++ .../main.nf | 139 ++++ .../meta.yml | 116 +++ subworkflows/nf-core/fastq_align_bwa/main.nf | 43 ++ subworkflows/nf-core/fastq_align_bwa/meta.yml | 72 ++ .../nf-core/fastq_align_hisat2/main.nf | 44 ++ .../nf-core/fastq_align_hisat2/meta.yml | 89 +++ subworkflows/nf-core/fastq_align_star/main.nf | 49 ++ .../nf-core/fastq_align_star/meta.yml | 108 +++ workflows/rnadnavar.nf | 681 ++++++------------ 53 files changed, 3060 insertions(+), 770 deletions(-) create mode 100644 conf/modules/alignment/alignment_to_fastq.config rename conf/modules/{bam_align => alignment}/bam_align.config (56%) create mode 100644 conf/modules/annotate/annotate.config create mode 100644 conf/modules/consensus/normalise.config create mode 100644 conf/modules/consensus/vcf_consensus.config create mode 100644 conf/modules/filtering/maf_filtering.config create mode 100644 conf/modules/gatk4_preprocessing/markduplicates.config create mode 100644 conf/modules/gatk4_preprocessing/prepare_recalibration.config create mode 100644 conf/modules/gatk4_preprocessing/recalibrate.config create mode 100644 conf/modules/gatk4_preprocessing/splitncigarreads.config create mode 100644 conf/modules/prepare_resources/prepare_cache.config rename conf/modules/{prepare_genome_and_intervals => prepare_resources}/prepare_genome.config (77%) rename conf/modules/{prepare_genome_and_intervals => prepare_resources}/prepare_intervals.config (72%) create mode 100644 conf/modules/quality_control/quality_control.config create mode 100644 conf/modules/quality_control/trimming.config create mode 100644 conf/modules/variant_calling/freebayes.config create mode 100644 conf/modules/variant_calling/manta.config create mode 100644 conf/modules/variant_calling/mutect2.config create mode 100644 conf/modules/variant_calling/strelka.config create mode 100644 modules/nf-core/dragmap/align/main.nf create mode 100644 modules/nf-core/dragmap/align/meta.yml create mode 100644 modules/nf-core/dragmap/hashtable/main.nf create mode 100644 modules/nf-core/dragmap/hashtable/meta.yml create mode 100644 modules/nf-core/ensemblvep/download/main.nf create mode 100644 modules/nf-core/ensemblvep/download/meta.yml rename subworkflows/local/{gatk_preprocessing.nf => bam_gatk_preprocessing/main.nf} (99%) delete mode 100644 subworkflows/local/core_workflow_pass.nf create mode 100644 subworkflows/nf-core/bam_markduplicates_picard/main.nf create mode 100644 subworkflows/nf-core/bam_markduplicates_picard/meta.yml create mode 100644 subworkflows/nf-core/bam_qc_picard/main.nf create mode 100644 subworkflows/nf-core/bam_qc_picard/meta.yml create mode 100644 subworkflows/nf-core/bam_sort_stats_samtools/main.nf create mode 100644 subworkflows/nf-core/bam_sort_stats_samtools/meta.yml create mode 100644 subworkflows/nf-core/bam_stats_samtools/main.nf create mode 100644 subworkflows/nf-core/bam_stats_samtools/meta.yml create mode 100644 subworkflows/nf-core/bam_tumor_normal_somatic_variant_calling_gatk/main.nf create mode 100644 subworkflows/nf-core/bam_tumor_normal_somatic_variant_calling_gatk/meta.yml create mode 100644 subworkflows/nf-core/fastq_align_bwa/main.nf create mode 100644 subworkflows/nf-core/fastq_align_bwa/meta.yml create mode 100644 subworkflows/nf-core/fastq_align_hisat2/main.nf create mode 100644 subworkflows/nf-core/fastq_align_hisat2/meta.yml create mode 100644 subworkflows/nf-core/fastq_align_star/main.nf create mode 100644 subworkflows/nf-core/fastq_align_star/meta.yml diff --git a/assets/schema_input.json b/assets/schema_input.json index 2775191..6b8708f 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -31,7 +31,6 @@ "type": "string", "pattern": "^\\S+$", "unique": ["patient", "sample"], - "dependentRequired": ["fastq_1"], "meta": ["lane"] }, "fastq_1": { diff --git a/conf/modules/alignment/alignment_to_fastq.config b/conf/modules/alignment/alignment_to_fastq.config new file mode 100644 index 0000000..30afa67 --- /dev/null +++ b/conf/modules/alignment/alignment_to_fastq.config @@ -0,0 +1,87 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// BAM TO FASTQ + +process { // alignment_to_fastq + + withName: 'COLLATE_FASTQ_MAP' { + ext.args2 = '-N' + ext.prefix = {"${meta.id}.mapped"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'COLLATE_FASTQ_UNMAP' { + ext.args2 = '-N' + ext.prefix = {"${meta.id}.unmapped"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_VIEW_MAP_MAP' { + ext.args = '-b -f1 -F12' + ext.prefix = {"${meta.id}.map_map"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_VIEW_MAP_UNMAP' { + ext.args = '-b -f8 -F260' + ext.prefix = {"${meta.id}.map_unmap"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_VIEW_UNMAP_MAP' { + ext.args = '-b -f4 -F264' + ext.prefix = {"${meta.id}.unmap_map"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_VIEW_UNMAP_UNMAP' { + ext.args = '-b -f12 -F256' + ext.prefix = {"${meta.id}.unmap_unmap"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_MERGE_UNMAP' { + ext.prefix = {"${meta.id}.merged_unmap"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + withName: 'CAT_FASTQ' { + publishDir = [ + enabled: params.save_split_fastqs, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/bed" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} \ No newline at end of file diff --git a/conf/modules/bam_align/bam_align.config b/conf/modules/alignment/bam_align.config similarity index 56% rename from conf/modules/bam_align/bam_align.config rename to conf/modules/alignment/bam_align.config index 2888658..602651e 100644 --- a/conf/modules/bam_align/bam_align.config +++ b/conf/modules/alignment/bam_align.config @@ -12,7 +12,7 @@ // BAM_ALIGN config -process { +process { // bam_align if (params.step == 'mapping'){ @@ -50,22 +50,22 @@ process { else { null } } ] - } + } withName: "(BWAMEM.*_MEM|DRAGMAP_ALIGN)" { // Markduplicates Spark NEEDS name-sorted reads or runtime goes through the roof // However if it's skipped, reads need to be coordinate-sorted // Only name sort if Spark for Markduplicates + duplicate marking is not skipped ext.args2 = { (!params.skip_tools || (params.skip_tools && !params.skip_tools.split(',').contains('markduplicates'))) ? '-n' : '' } - } + } - withName: "BWAMEM.*_MEM|SENTIEON_BWAMEM" { + withName: "BWAMEM.*_MEM|SENTIEON_BWAMEM" { // Using -B 3 for tumor samples ext.args = { meta.status == 1 ? "-K 100000000 -Y -B 3 -R ${meta.read_group}" : "-K 100000000 -Y -R ${meta.read_group}" } - } } + withName: 'MERGE_BAM|INDEX_MERGE_BAM' { publishDir = [ mode: params.publish_dir_mode, @@ -95,9 +95,9 @@ process { ] } - withName: STAR_ALIGN { + withName: 'STAR_ALIGN' { ext.args = [ - '--outSAMtype BAM SortedByCoordinate', + '--outSAMtype BAM Unsorted', '--readFilesCommand zcat', '--outFilterMultimapScoreRange 1', '--outFilterMultimapNmax 20', @@ -113,11 +113,9 @@ process { params.read_length ? "--sjdbOverhang ${params.read_length - 1}" : '', params.star_twopass ? '--twopassMode Basic' : '', params.star_max_memory_bamsort > 0 ? "--limitBAMsortRAM ${params.star_max_memory_bamsort}" : "", - params.star_bins_bamsort > 0 ? "--outBAMsortingBinsN ${params.star_bins_bamsort}" : "", params.star_max_collapsed_junc > 0 ? "--limitOutSJcollapsed ${params.star_max_collapsed_junc}" : "" - ].join(' ').trim() - ext.args2 = { "--outSAMattrRGline ${meta.read_group}" } - ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.tokenize('.')[0]) : "" } + ].flatten().unique(false).join(' ').trim() + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).baseName.tokenize('.')[0]) : "" } publishDir = [ [ path: { "${params.outdir}/reports/star/${meta.patient}/${meta.id}/" }, @@ -165,10 +163,102 @@ process { ] } - } + // POST ALIGNMENT AND PREPROCESSING BAM TODO: check if it follows new pattern + withName: '.*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('') : "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/preprocessing/" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + saveAs: { (params.save_bam_mapped || (params.skip_tools && params.skip_tools.split(',').contains('markduplicates'))) && (meta.size * meta.numLanes == 1) ? "mapped/${meta.patient}/${meta.id}/${it}" : null } + + ] + } + withName: '.*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_FLAGSTAT' { + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]) : "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/reports/samtools/${meta.patient}/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.flagstat", + enabled: params.save_align_intermeds + ] + } + + + withName: '.*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_STATS' { + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(input.name.tokenize('.')[1]) : "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/reports/samtools/${meta.patient}/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.stats", + enabled: params.save_align_intermeds + ] + } + + withName: '.*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_IDXSTATS' { + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(input.name.tokenize('.')[1]) : "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/reports/samtools/${meta.patient}/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.idxstats", + enabled: params.save_align_intermeds + ] + } + withName: '.*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { + ext.args = params.bam_csi_index ? '-c' : '' + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('.aligned') : "${meta.id}.aligned" } + publishDir = [ + path: { "${params.outdir}/preprocessing/" }, + mode: params.publish_dir_mode, + pattern: "*.{bai,csi}", + saveAs: { (params.save_bam_mapped || (params.skip_tools && params.skip_tools.split(',').contains('markduplicates'))) && (meta.size * meta.numLanes == 1) ? "mapped/${meta.patient}/${meta.id}/${it}" : null } + ] + } + } + // Second run alignment + if (params.skip_tools && !params.skip_tools.split(',').contains('second_run')){ + withName: '.*:FASTQ_ALIGN_HISAT2:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('.aligned_hs2') : "${meta.id}.aligned_hs2" } + publishDir = [ + path: { "${params.outdir}/preprocessing/hisat2/${meta.patient}/${meta.id}/" }, + mode: params.publish_dir_mode, + pattern: "*.bam", + enabled: params.save_align_intermeds + ] + } + withName: '.*:FASTQ_ALIGN_HISAT2:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { + ext.args = params.bam_csi_index ? '-c' : '' + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('.aligned_hs2') : "${meta.id}.aligned_hs2" } + publishDir = [ + path: { "${params.outdir}/preprocessing/hisat2/${meta.patient}/${meta.id}/" }, + mode: params.publish_dir_mode, + pattern: "*.{bai,csi}", + enabled: params.save_align_intermeds + ] + } + + withName: '.*:FASTQ_ALIGN_HISAT2:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_FLAGSTAT' { + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('.aligned_hs2') : "${meta.id}.aligned_hs2" } + publishDir = [ + path: { "${params.outdir}/reports/samtools/${meta.patient}/${meta.id}/" }, + mode: params.publish_dir_mode, + pattern: "*.{bai,csi}", + enabled: params.save_align_intermeds + ] + } + withName: '.*:ALIGN_HISAT2:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_IDXSTATS' { + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(input.name.tokenize('.aligned_hs2')[1]) : "${meta.id}.aligned_hs2" } + publishDir = [ + path: { "${params.outdir}/reports/samtools/${meta.patient}/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.idxstats", + enabled: params.save_align_intermeds + ] + } + } } \ No newline at end of file diff --git a/conf/modules/annotate/annotate.config b/conf/modules/annotate/annotate.config new file mode 100644 index 0000000..83a22a3 --- /dev/null +++ b/conf/modules/annotate/annotate.config @@ -0,0 +1,69 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// ANNOTATE + +process { // annotate + + // VEP TODO: is vep_custom_args working?? + if (params.tools && params.tools.split(',').contains('vep')) { + withName: 'ENSEMBLVEP_VEP' { + ext.args = { [ + (params.vep_dbnsfp && params.dbnsfp && !params.dbnsfp_consequence) ? "--plugin dbNSFP,${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', + (params.vep_dbnsfp && params.dbnsfp && params.dbnsfp_consequence) ? "--plugin dbNSFP,'consequence=${params.dbnsfp_consequence}',${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', + (params.vep_loftee) ? "--plugin LoF,loftee_path:/opt/conda/envs/nf-core-vep-${params.vep_version}/share/ensembl-vep-${params.vep_version}-0" : '', + (params.vep_spliceai && params.spliceai_snv && params.spliceai_indel) ? "--plugin SpliceAI,snv=${params.spliceai_snv.split("/")[-1]},indel=${params.spliceai_indel.split("/")[-1]}" : '', + (params.vep_spliceregion) ? '--plugin SpliceRegion' : '', + (params.vep_out_format) ? "--${params.vep_out_format}" : '--vcf', + (params.vep_custom_args) ?: '' + ].join(' ').trim() } + // If just VEP: _VEP.ann.vcf + ext.prefix = { vcf.baseName - ".vcf" + "_VEP.ann" } + publishDir = [ + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/EnsemblVEP/${meta.variantcaller}/${meta.id}/" }, + pattern: "*html" + ], + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, + pattern: "*{gz}" + ] + ] + } + } + + // ALL ANNOTATION TOOLS + if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('vep') || params.tools.split(',').contains('merge'))) { + withName: "NFCORE_SAREK:SAREK:VCF_ANNOTATE_ALL:.*:(TABIX_BGZIPTABIX|TABIX_TABIX)" { + ext.prefix = { input.name - ".vcf" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, + pattern: "*{gz.tbi}" + ] + } + } + + if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('merge'))) { + withName: 'NFCORE_SAREK:SAREK:VCF_ANNOTATE_ALL:VCF_ANNOTATE_SNPEFF:TABIX_BGZIPTABIX' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, + pattern: "*{gz,gz.tbi}", + saveAs: { params.tools.split(',').contains('snpeff') ? it : null } + ] + } + } +} \ No newline at end of file diff --git a/conf/modules/consensus/normalise.config b/conf/modules/consensus/normalise.config new file mode 100644 index 0000000..85ba365 --- /dev/null +++ b/conf/modules/consensus/normalise.config @@ -0,0 +1,58 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// NORMALISE + +process { // normalise + + if (params.skip_toos && params.skip_toos.split(',').contains('normalise')) { + // VT + // TODO: stats are not going to the report dir - no idea why + withName: 'VT_DECOMPOSE'{ + ext.args = "" + ext.prefix = { "${vcf.baseName.minus(".vcf")}.dec" } + publishDir = [ + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/vt/" }, + pattern: {'*dec.stats'}, + saveAs: {"${meta.variantcaller}/${meta.patient}/${meta.id}/${it}"}, + enabled: true + ], + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: {"*{vcf.gz,vcf.gz.tbi}"}, + saveAs: {"${meta.variantcaller}/${meta.patient}/${meta.id}/${it}"}, + enabled: false // store normalised results only + ] + ] + } + + withName: 'VT_NORMALIZE'{ + ext.args = {"-n"} + ext.prefix = { "${vcf.baseName.minus(".dec.vcf")}.norm" } + publishDir = [[ + path: { "${params.outdir}/variant_calling/" }, + pattern: "*{vcf.gz,vcf.gz.tbi,norm.stats}", + saveAs: { "${meta.variantcaller}/${meta.patient}/${meta.id}/${it}" }, + enabled: true // just store normalised results + ], + [mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/vt/${meta.variantcaller}/${meta.patient}/${meta.id}/" }, + pattern: "*stats" + ]] + } + + } +} diff --git a/conf/modules/consensus/vcf_consensus.config b/conf/modules/consensus/vcf_consensus.config new file mode 100644 index 0000000..bc63ea0 --- /dev/null +++ b/conf/modules/consensus/vcf_consensus.config @@ -0,0 +1,54 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// CONSENSUS + +process { // consensus + + if (params.tools && params.tools.split(',').contains('consensus')) { + + withName: 'RUN_CONSENSUS' { + ext.prefix = { "${meta.id}.consensus"} + ext.args = {"--id=${meta.id}"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/consensus/${meta.patient}/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: true + ] + } + + withName: 'RUN_CONSENSUS_RESCUE_DNA' { + ext.prefix = { "${meta.id}.withRNA.consensus"} + ext.args = {"--id=${meta.id}_withRNAConsensus"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/consensus/${meta.patient}/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: true + ] + } + + withName: 'RUN_CONSENSUS_RESCUE_RNA' { + ext.prefix = { "${meta.id}.withDNA.consensus"} + ext.args = {"--id=${meta.id}_withDNAConsensus"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/consensus/${meta.patient}/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: true + ] + } + + } +} diff --git a/conf/modules/filtering/maf_filtering.config b/conf/modules/filtering/maf_filtering.config new file mode 100644 index 0000000..97a2dde --- /dev/null +++ b/conf/modules/filtering/maf_filtering.config @@ -0,0 +1,81 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// MAF FILTERING + +process { // maf filtering + if (params.tools && params.tools.split(',').contains('filtering')) { + + + withName: "VCF2MAF" { + ext.args = { [ + "--inhibit-vep", + "--normal-id ${meta.normal_id}", + "--tumor-id ${meta.tumor_id}", + "--vcf-tumor-id ${meta.tumor_id}", + "--vcf-normal-id ${meta.normal_id}", + "--max-subpop-af 0.0001", + "--retain-ann gnomADg_AF,MAX_AF,MAX_AF_POPS", + "--retain-fmt AD,DP,AF,GT", + params.vep_genome ? "--ncbi-build ${params.vep_genome}" : '', + meta.variantcaller == "strelka"? "--vcf-tumor-id TUMOR --vcf-normal-id NORMAL" : '' + ].join(' ').trim() } + ext.prefix = { "${meta.id}.${meta.variantcaller}" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variants/annotated/unfiltered/${meta.patient}/${meta.id}/" }, + pattern: "*{maf,maf.gz}" + ] + } + + withName: "FILTERING" { + ext.prefix = { "${meta.id}.filtered"} + ext.args = { [params.whitelist? "--whitelist ${params.whitelist}": "", + params.blacklist? "--blacklist ${params.blacklist}": ""].join(' ').trim() } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variants/annotated/filtered/${meta.patient}/${meta.id}/" }, + pattern: "*{maf,maf.gz}" + ] + } + + withName: 'SAMTOOLS_MERGE_SECOND_PASS' { + ext.prefix = {"${meta.id}.merged_2ndpass"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/second_pass/input/${meta.patient}/${meta.id}/" }, + pattern: "*{bam}", + enabled: params.save_align_intermeds + ] + } + + + + withName: 'RNA_FILTERING' { + ext.prefix = {"${meta.id}.rna_filt"} + ext.args = { [params.rnaedits? "--rnaedits ${params.rnaedits}": "", + params.rna_pon? "--pon ${params.rna_pon}" : "", + params.chain? "--chain ${params.chain}" : "", + params.fasta19? "--ref19 ${params.fasta19}" : "", + params.rna_pon19? "--pon19 ${params.rna_pon19}" : "" + ].join(' ').trim() } + + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variants/annotated/filtered/${meta.patient}/${meta.id}/" }, + pattern: "*{maf}", + enabled: true + ] + } + } +} \ No newline at end of file diff --git a/conf/modules/gatk4_preprocessing/markduplicates.config b/conf/modules/gatk4_preprocessing/markduplicates.config new file mode 100644 index 0000000..bb12432 --- /dev/null +++ b/conf/modules/gatk4_preprocessing/markduplicates.config @@ -0,0 +1,124 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// MARKDUPLICATES + +process { // markduplicates + + withName: 'CRAM_TO_BAM' { + ext.args = "-b" + } + + withName: 'BAM_TO_CRAM' { + // BAM provided for step Markduplicates either run through MD or Convert -> then saved as sorted.cram (convert) or md.cram (md directly) + // BAM files provided for step prepare_recal are converted and run through BQSR -> then saved as md.cram + // BAM files provided for step recal are converted and run through BQSR II -> then saved as md.cram + ext.args = "-C" + ext.prefix = { "${meta.id}.converted" } + publishDir = [ + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/converted/${meta.id}" }, + pattern: "*{cram,crai}" + ] + } + // TODO: is this necessary? the id should be different +// withName: '.*:BAM_TO_CRAM_SNCR:BAM_TO_CRAM' { +// // BAM provided for step Markduplicates either run through MD or Convert -> then saved as sorted.cram (convert) or md.cram (md directly) +// // BAM files provided for step prepare_recal are converted and run through BQSR -> then saved as md.cram +// // BAM files provided for step recal are converted and run through BQSR II -> then saved as md.cram +// ext.args = "-C" +// ext.prefix = { "${meta.id}.converted." } +// publishDir = [ +// enabled: !params.save_output_as_bam, +// mode: params.publish_dir_mode, +// path: { "${params.outdir}/preprocessing/converted/${meta.id}" }, +// pattern: "*{cram,crai}" +// ] +// } + + withName: 'BAM_TO_CRAM_MAPPING' { + // Run only when mapping should be saved as CRAM or when no MD is done + ext.when = (params.save_mapped && !params.save_output_as_bam) || (params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) + ext.prefix = { "${meta.id}.sorted" } + publishDir = [ + // Never publish if BAM only should be published + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/mapped/${meta.id}/" }, + pattern: "*{cram,crai}" + ] + } + + withName: 'GATK4_ESTIMATELIBRARYCOMPLEXITY' { + ext.prefix = { "${meta.id}.md.cram" } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('markduplicates_report')) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/markduplicates/${meta.id}" }, + pattern: "*metrics" + ] + } + // TODO: do we need to create index here? (--CREATE_INDEX true) + withName: 'GATK4_MARKDUPLICATES' { + ext.args = '-REMOVE_DUPLICATES false -VALIDATION_STRINGENCY LENIENT' + ext.prefix = { "${meta.id}.md.cram" } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) } + publishDir = [ + [ + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" }, + pattern: "*{cram,crai}" + ], + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/" }, + pattern: "*metrics", + saveAs: { !(params.skip_tools && params.skip_tools.split(',').contains('markduplicates_report')) ? "markduplicates/${meta.id}/${it}" : null} + ] + ] + } + + withName: 'GATK4_MARKDUPLICATES_SPARK' { + ext.args = '--remove-sequencing-duplicates false -VS LENIENT' + ext.prefix = { "${meta.id}.md.cram" } + publishDir = [ + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" }, + pattern: "*{cram,crai}" + ] + } + + withName: 'INDEX_MARKDUPLICATES' { + publishDir = [ + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" }, + pattern: "*{cram,crai}" + ] + } + + + withName: 'NFCORE_RNADNAVAR:RNADNAVAR:CRAM_TO_BAM' { + ext.prefix = { "${meta.id}.md" } + ext.when = { params.save_output_as_bam } + publishDir = [ + enabled: params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" }, + pattern: "*{md.bam,md.bam.bai}" + ] + } +} \ No newline at end of file diff --git a/conf/modules/gatk4_preprocessing/prepare_recalibration.config b/conf/modules/gatk4_preprocessing/prepare_recalibration.config new file mode 100644 index 0000000..21e38a9 --- /dev/null +++ b/conf/modules/gatk4_preprocessing/prepare_recalibration.config @@ -0,0 +1,38 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// PREPARE_RECALIBRATION + +process { //prepare_recalibration + + withName: 'GATK4_BASERECALIBRATOR' { + ext.args = { meta.status >= 2 ? "--lenient" : "" } + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.recal" : "${meta.id}_${intervals.simpleName}.recal" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/" }, + pattern: "*table", + saveAs: { meta.num_intervals > 1 ? null : "recal_table/${meta.id}/${it}" } + ] + } + + withName: 'GATK4_GATHERBQSRREPORTS' { + ext.prefix = {"${meta.id}.recal"} + ext.when = { meta.num_intervals > 1 } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/recal_table/${meta.id}/" }, + pattern: "*table", + ] + } +} \ No newline at end of file diff --git a/conf/modules/gatk4_preprocessing/recalibrate.config b/conf/modules/gatk4_preprocessing/recalibrate.config new file mode 100644 index 0000000..1a1ce1a --- /dev/null +++ b/conf/modules/gatk4_preprocessing/recalibrate.config @@ -0,0 +1,61 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// RECALIBRATE + +process { // recalibrate + + withName: 'GATK4_APPLYBQSR' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.recal" : "${meta.id}_${intervals.simpleName}.recal" } + publishDir = [ + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/" }, + pattern: "*cram", + saveAs: { meta.num_intervals > 1 ? null : "recalibrated/${meta.id}/${it}" } + ] + } + + if ((params.step == 'mapping' || params.step == 'markduplicates'|| params.step == 'prepare_recalibration'|| params.step == 'recalibrate') && (!(params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator')))) { + withName: '.*:BAM_APPLYBQSR:CRAM_MERGE_INDEX_SAMTOOLS:MERGE_CRAM' { + ext.prefix = { "${meta.id}.recal" } + ext.when = { meta.num_intervals > 1 } + publishDir = [ + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/recalibrated/${meta.id}/" }, + pattern: "*cram" + ] + } + + withName: '.*:BAM_APPLYBQSR::CRAM_MERGE_INDEX_SAMTOOLS:INDEX_CRAM' { + publishDir = [ + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/recalibrated/${meta.id}/" }, + pattern: "*{recal.cram,recal.cram.crai}" + ] + } + } + + withName: 'CRAM_TO_BAM_RECAL' { + ext.prefix = { "${meta.id}.recal" } + ext.when = { params.save_output_as_bam} + publishDir = [ + enabled: params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/recalibrated/${meta.id}/" }, + pattern: "*{recal.bam,recal.bam.bai}" + ] + } +} \ No newline at end of file diff --git a/conf/modules/gatk4_preprocessing/splitncigarreads.config b/conf/modules/gatk4_preprocessing/splitncigarreads.config new file mode 100644 index 0000000..58f8c35 --- /dev/null +++ b/conf/modules/gatk4_preprocessing/splitncigarreads.config @@ -0,0 +1,68 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// RECALIBRATE + +process { //splitncigar + // TODO: check SECOND_RUN and suffixes + withName: '.*:SPLITNCIGAR:GATK4_SPLITNCIGARREADS' { + ext.args = ['-rf ReassignOneMappingQuality', + '-RMQF 255 ', + '-RMQT 60', + '-U ALLOW_N_CIGAR_READS'].join(' ').trim() + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/splitncigar/${meta.patient}/${meta.id}/" }, + pattern: "*{bam,bai}", + enabled: params.save_align_intermeds // will be saved as CRAM + ] + } + + withName: '.*:SECOND_RUN:GATK_PREPROCESSING:SPLITNCIGAR:GATK4_SPLITNCIGARREADS' { + ext.prefix = {"${meta.id}.sncr"} + ext.args = ['-rf ReassignOneMappingQuality', + '-RMQF 255 ', + '-RMQT 60', + '-U ALLOW_N_CIGAR_READS'].join(' ').trim() + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/splitncigar/${meta.patient}/${meta.id}/" }, + pattern: "*{bam,bai}", + enabled: params.save_align_intermeds // will be saved as CRAM + ] + } + + withName: ".*:PREPARE_SECOND_RUN:MERGE_ALIGN:INDEX_MERGE_BAM" { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/" }, + pattern: "*.{bai,csi}", + saveAs: { params.save_bam_mapped ? "second_run/${meta.patient}/${meta.id}/${it}" : null }, + enabled: params.save_align_intermeds + ] + + } + + withName: '.*:SPLITNCIGAR:SAMTOOLS_INDEX' { + ext.args = params.bam_csi_index ? '-c' : '' + publishDir = [ + path: { "${params.outdir}/preprocessing/splitncigar/${meta.patient}/${meta.id}/" }, + mode: params.publish_dir_mode, + pattern: "*.{bai,csi}", + enabled: params.save_align_intermeds + ] + } + + + +} diff --git a/conf/modules/prepare_resources/prepare_cache.config b/conf/modules/prepare_resources/prepare_cache.config new file mode 100644 index 0000000..cf922fb --- /dev/null +++ b/conf/modules/prepare_resources/prepare_cache.config @@ -0,0 +1,27 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// PREPARE_CACHE + +process { // prepare_cache + + // VEP + withName: 'ENSEMBLVEP_DOWNLOAD' { + ext.when = { params.tools && (params.tools.split(',').contains('vep') || params.tools.split(',').contains('merge')) } + ext.args = '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE' + publishDir = [ + mode: params.publish_dir_mode, + path: { params.outdir_cache ? "${params.outdir_cache}/": "${params.outdir}/cache/" } + ] + } +} \ No newline at end of file diff --git a/conf/modules/prepare_genome_and_intervals/prepare_genome.config b/conf/modules/prepare_resources/prepare_genome.config similarity index 77% rename from conf/modules/prepare_genome_and_intervals/prepare_genome.config rename to conf/modules/prepare_resources/prepare_genome.config index d218301..6554ec1 100644 --- a/conf/modules/prepare_genome_and_intervals/prepare_genome.config +++ b/conf/modules/prepare_resources/prepare_genome.config @@ -13,7 +13,7 @@ // PREPARE_GENOME TODO: add stuff and remove redundant code -process { +process { // prepare_genome withName: 'BWAMEM1_INDEX' { ext.when = { !params.bwa && params.step == "mapping" && (params.aligner == "bwa-mem" || params.aligner == "sentieon-bwamem")} @@ -45,23 +45,26 @@ process { ] } - withName: 'GATK4_CREATESEQUENCEDICTIONARY' { - ext.when = { !params.dict && params.step != "annotate" && params.step != "controlfreec" } - publishDir = [ - enabled: (params.save_reference || params.build_only_index), + withName: 'STAR_GENOMEGENERATE' { + ext.args = params.read_length ? "--sjdbOverhang ${params.read_length - 1}" : '' + } + + withName: 'UNTAR_.*|STAR_GENOMEGENERATE|HISAT2_BUILD|HISAT2_EXTRACTSPLICESITES' { + publishDir = [ + enabled: params.save_reference, mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/dict" }, - pattern: "*dict" + path: { "${params.outdir}/reference/index" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'MSISENSORPRO_SCAN' { - ext.when = { params.tools && params.tools.split(',').contains('msisensorpro') } + withName: 'GATK4_CREATESEQUENCEDICTIONARY' { + ext.when = { !params.dict && params.step != "annotate"} publishDir = [ enabled: (params.save_reference || params.build_only_index), mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/msi" }, - pattern: "*list" + path: { "${params.outdir}/reference/dict" }, + pattern: "*dict" ] } @@ -76,7 +79,7 @@ process { } withName: 'TABIX_DBSNP' { - ext.when = { !params.dbsnp_tbi && params.dbsnp && ((params.step == "mapping" || params.step == "markduplicates" || params.step == "prepare_recalibration") || params.tools && (params.tools.split(',').contains('controlfreec') || params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') || params.tools.split(',').contains('mutect2'))) } + ext.when = { !params.dbsnp_tbi && params.dbsnp && ((params.step == "mapping" || params.step == "markduplicates" || params.step == "prepare_recalibration") || params.tools && params.tools.split(',').contains('mutect2')) } publishDir = [ enabled: (params.save_reference || params.build_only_index), mode: params.publish_dir_mode, @@ -96,7 +99,7 @@ process { } withName: 'TABIX_KNOWN_INDELS' { - ext.when = { !params.known_indels_tbi && params.known_indels && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper'))) ) } + ext.when = { !params.known_indels_tbi && params.known_indels && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' ) } publishDir = [ enabled: (params.save_reference || params.build_only_index), mode: params.publish_dir_mode, @@ -106,7 +109,7 @@ process { } withName: 'TABIX_KNOWN_SNPS' { - ext.when = { !params.known_snps_tbi && params.known_snps && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper'))) ) } + ext.when = { !params.known_indels_tbi && params.known_indels && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' ) } publishDir = [ enabled: (params.save_reference || params.build_only_index), mode: params.publish_dir_mode, @@ -125,14 +128,12 @@ process { ] } - withName: 'UNZIP_ALLELES|UNZIP_LOCI|UNZIP_GC|UNZIP_RT' { - ext.when = { params.tools && params.tools.split(',').contains('ascat')} - publishDir = [ - enabled: false - ] - } - - withName: 'UNTAR_CHR_DIR' { - ext.when = { params.tools && params.tools.split(',').contains('controlfreec')} + withName: "GTF2BED" { + publishDir = [ + enabled: (params.save_reference || params.build_only_index), + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference" }, + pattern: "*bed" + ] } } \ No newline at end of file diff --git a/conf/modules/prepare_genome_and_intervals/prepare_intervals.config b/conf/modules/prepare_resources/prepare_intervals.config similarity index 72% rename from conf/modules/prepare_genome_and_intervals/prepare_intervals.config rename to conf/modules/prepare_resources/prepare_intervals.config index e760a1c..9ee572e 100644 --- a/conf/modules/prepare_genome_and_intervals/prepare_intervals.config +++ b/conf/modules/prepare_resources/prepare_intervals.config @@ -13,7 +13,7 @@ // PREPARE INTERVALS -process { +process { // prepare_intervals withName: 'CREATE_INTERVALS_BED' { publishDir = [ @@ -33,8 +33,18 @@ process { ] } + withName: 'GATK4_BEDTOINTERVALLIST' { + publishDir = [ + enabled: params.save_reference, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/interval_list" }, + pattern: "*.interval_list" + ] + } + withName: 'TABIX_BGZIPTABIX_INTERVAL_SPLIT' { ext.prefix = {"${meta.id}"} + ext.args2 = "-0 -p bed" publishDir = [ enabled: (params.save_reference || params.build_only_index), mode: params.publish_dir_mode, @@ -42,4 +52,13 @@ process { pattern: "*bed.gz" ] } + + withName: 'BUILD_INTERVALS' { + publishDir = [ + enabled: params.save_reference, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/bed" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } \ No newline at end of file diff --git a/conf/modules/quality_control/quality_control.config b/conf/modules/quality_control/quality_control.config new file mode 100644 index 0000000..af9189c --- /dev/null +++ b/conf/modules/quality_control/quality_control.config @@ -0,0 +1,122 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +// QC config + +process { // quality_control + + withName: 'FASTQC' { + ext.args = '--quiet' + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('fastqc')) } + publishDir = [ + [ + path: { "${params.outdir}/reports/fastqc/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*{html,zip}" + ] + ] + } + + withName: 'MULTIQC' { + ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + publishDir = [ + path: { "${params.outdir}/reports"}, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: !(params.tools && (params.skip_tools.split(',').contains('multiqc'))) + ] + errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'} + } + // TODO check this is correct + withName: '.*:CRAM_QC_NO_MD:SAMTOOLS_STATS' { + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) } + ext.prefix = { "${meta.id}.sorted.cram" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/samtools/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + if (params.skip_tools && !params.skip_tools.split(',').contains('mosdepth')){ + withName: 'MOSDEPTH' { + ext.args = { !params.wes ? "-n --fast-mode --by 500" : ""} + ext.prefix = { + if (params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) { + "${meta.id}.sorted" + } else { + "${meta.id}.md" + } + } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('mosdepth')) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/mosdepth/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + // TODO: check that this is capturing what it should + if ((params.step == 'mapping' || params.step == 'markduplicates'|| params.step == 'prepare_recalibration'|| params.step == 'recalibrate') && (!(params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator')))) { + withName: '.*:CRAM_QC_RECAL:MOSDEPTH' { + ext.prefix = { "${meta.id}.recal" } + } + + withName: '.*:CRAM_QC_RECAL:SAMTOOLS_STATS' { + ext.prefix = { "${meta.id}.recal.cram" } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/samtools/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + + if (params.tools && params.tools.split(',').contains('vcf_qc')){ + + // VCF + withName: 'BCFTOOLS_STATS' { + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('bcftools')) } + ext.prefix = { vcf.baseName - ".vcf" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/bcftools/${meta.variantcaller}/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'VCFTOOLS_.*' { + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('vcftools')) } + ext.prefix = { variant_file.baseName - ".vcf" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/vcftools/${meta.variantcaller}/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'VCFTOOLS_TSTV_COUNT' { + ext.args = "--TsTv-by-count" + } + + withName: 'VCFTOOLS_TSTV_QUAL' { + ext.args = "--TsTv-by-qual" + } + + withName: 'VCFTOOLS_SUMMARY' { + ext.args = "--FILTER-summary" + + } + } +} \ No newline at end of file diff --git a/conf/modules/quality_control/trimming.config b/conf/modules/quality_control/trimming.config new file mode 100644 index 0000000..c2d2639 --- /dev/null +++ b/conf/modules/quality_control/trimming.config @@ -0,0 +1,42 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// TRIMMING + +process { // trimming + + withName: 'FASTP' { + ext.args = [ "-Q", + !params.trim_fastq ? "--disable_adapter_trimming" : "", // Disable adapter trimming + params.clip_r1 > 0 ? "--trim_front1 ${params.clip_r1}" : "", // Remove bp from the 5' end of read 1 + params.clip_r2 > 0 ? "--trim_front2 ${params.clip_r2}" : "", // Remove bp from the 5' end of read 2 + params.three_prime_clip_r1 > 0 ? "--trim_tail1 ${params.three_prime_clip_r1}" : "", // Remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed + params.three_prime_clip_r2 > 0 ? "--trim_tail2 ${params.three_prime_clip_r2}" : "", // Remove bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed + params.trim_nextseq ? "--trim_poly_g" : "", // Apply the --nextseq=X option, to trim based on quality after removing poly-G tails + params.split_fastq > 0 ? "--split_by_lines ${params.split_fastq * 4}" : "" + ].join(" ").trim() + publishDir = [ + [ + path: { "${params.outdir}/reports/fastp/${meta.sample}" }, + mode: params.publish_dir_mode, + pattern: "*.{html,json,log}" + ], + [ + enabled: params.save_trimmed || params.save_split_fastqs, + path: { "${params.outdir}/preprocessing/fastp/${meta.sample}/" }, + mode: params.publish_dir_mode, + pattern: "*.fastp.fastq.gz" + ] + ] + } +} \ No newline at end of file diff --git a/conf/modules/variant_calling/freebayes.config b/conf/modules/variant_calling/freebayes.config new file mode 100644 index 0000000..8ba6f62 --- /dev/null +++ b/conf/modules/variant_calling/freebayes.config @@ -0,0 +1,79 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// FREEBAYES + +process { // freebayes + + if (params.tools && params.tools.split(',').contains('freebayes')) { + + withName: 'MERGE_FREEBAYES' { + ext.prefix = { "${meta.id}.freebayes" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/freebayes/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'FREEBAYES' { + ext.args = '--min-alternate-fraction 0.1 --min-mapping-quality 1' + //To make sure no naming conflicts ensure with module BCFTOOLS_SORT & the naming being correct in the output folder + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}" : "${meta.id}.${target_bed.simpleName}" } + ext.when = { params.tools && params.tools.split(',').contains('freebayes') } + publishDir = [ + enabled: false + ] + } + + withName: 'BCFTOOLS_SORT' { + ext.prefix = { meta.num_intervals <= 1 ? meta.id + ".freebayes" : vcf.name - ".vcf" + ".sort" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*vcf.gz", + saveAs: { meta.num_intervals > 1 ? null : "freebayes/${meta.id}/${it}" } + ] + } + + withName : 'TABIX_VC_FREEBAYES' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/freebayes/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + // PAIR_VARIANT_CALLING + if (params.tools && params.tools.split(',').contains('freebayes')) { + withName: '.*:BAM_VARIANT_CALLING_SOMATIC_ALL:BAM_VARIANT_CALLING_FREEBAYES:FREEBAYES' { + ext.args = "--pooled-continuous \ + --pooled-discrete \ + --genotype-qualities \ + --report-genotype-likelihood-max \ + --allele-balance-priors-off \ + --min-alternate-fraction 0.03 \ + --min-repeat-entropy 1 \ + --min-alternate-count 2 " + } + } + + withName: 'VCFFILTER' { + //To make sure no naming conflicts ensure with module BCFTOOLS_SORT & the naming being correct in the output folder + ext.prefix = { "${vcf.baseName.minus(".vcf")}.filtered" } + ext.args = '-f "QUAL > 1 & QUAL / AO > 10 & SAF > 0 & SAR > 0 & RPR > 1 & RPL > 1" -t PASS -F FAIL' + ext.when = { params.tools && params.tools.split(',').contains('freebayes') } + publishDir = [enabled: false] + } + } +} \ No newline at end of file diff --git a/conf/modules/variant_calling/manta.config b/conf/modules/variant_calling/manta.config new file mode 100644 index 0000000..1770beb --- /dev/null +++ b/conf/modules/variant_calling/manta.config @@ -0,0 +1,28 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// MANTA + +process { // manta + if (params.tools && params.tools.split(',').contains('manta')) { + withName: 'MANTA_SOMATIC' { + ext.args = {params.wes || meta.status >= 2 ? "--exome" : "" } + ext.prefix = { "${meta.id}.manta" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/manta/${meta.id}" }, + pattern: "*{diploid_sv,tumor_sv,somatic_sv}.{vcf.gz,vcf.gz.tbi}" + ] + } + } +} \ No newline at end of file diff --git a/conf/modules/variant_calling/mutect2.config b/conf/modules/variant_calling/mutect2.config new file mode 100644 index 0000000..2226407 --- /dev/null +++ b/conf/modules/variant_calling/mutect2.config @@ -0,0 +1,128 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// MUTECT2 + +process { // mutect2 + if (params.tools && params.tools.split(',').contains('mutect2')) { + +// withName: 'GATK4_MUTECT2' { +// ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.simpleName}" } +// ext.when = { params.tools && params.tools.split(',').contains('mutect2') } +// ext.args = { params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true --f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz" : "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz" } +// publishDir = [ +// mode: params.publish_dir_mode, +// path: { "${params.outdir}/variant_calling/" }, +// pattern: "*{vcf.gz,vcf.gz.tbi,stats}", +// saveAs: { meta.num_intervals > 1 ? null : "mutect2/${meta.id}/${it}" } +// ] +// } + + // PAIR_VARIANT_CALLING + withName: 'MUTECT2_PAIRED' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.simpleName}" } + ext.when = { params.tools && params.tools.split(',').contains('mutect2') } + ext.args = { params.ignore_soft_clipped_bases ? + "--dont-use-soft-clipped-bases true --f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --smith-waterman FASTEST_AVAILABLE --normal-sample ${meta.normal_id} --callable-depth 1 " : + "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --smith-waterman FASTEST_AVAILABLE --normal-sample ${meta.patient}_${meta.normal_id} --callable-depth 1 " } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*{vcf.gz,vcf.gz.tbi,stats}", + saveAs: { meta.num_intervals > 1 ? null : "mutect2/${meta.id}/${it}" } + ] + } + + withName: 'MERGE_MUTECT2.*' { + ext.prefix = { "${meta.id}.mutect2" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + // TODO: FILTERMUTECTCALLS is a patch for second run [!!] + withName: 'FILTERMUTECTCALLS.*' { + ext.prefix = {"${meta.id}.mutect2.filtered"} + ext.args = { [meta.status >= 2 ? '--max-events-in-region 5': '', + meta.cont && !(meta.cont.endswith("NO_TABLE")) ? '--contamination-table ${meta.cont}' :'', + meta.seg && !(meta.seg.endswith("NO_SEG")) ? '--tumor-segmentation ${meta.seg}':'', + meta.orient && !(meta.orient.endswith("NO_ARTPRIOR"))? '--orientation-bias-artifact-priors ${meta.orient}':'' + ].join(' ').trim() + } + + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : "mutect2/${meta.id}/${filename}" } + ] + } + + withName: 'CALCULATECONTAMINATION' { + ext.prefix = { "${meta.id}.mutect2" } + ext.args = { "-tumor-segmentation ${meta.id}.mutect2.segmentation.table" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'LEARNREADORIENTATIONMODEL' { + ext.prefix = { "${meta.id}.mutect2.artifactprior" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'MERGEMUTECTSTATS' { + ext.prefix = { "${meta.id}.mutect2" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'GATHERPILEUPSUMMARIES.*' { + ext.prefix = { "${meta.id}.mutect2" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'GETPILEUPSUMMARIES.*' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.simpleName}" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*.table", + saveAs: { meta.num_intervals > 1 ? null : "mutect2/${meta.id}/${it}" } + ] + } + + if (params.joint_mutect2) { + withName: 'CALCULATECONTAMINATION' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.patient}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } +} \ No newline at end of file diff --git a/conf/modules/variant_calling/strelka.config b/conf/modules/variant_calling/strelka.config new file mode 100644 index 0000000..99d0480 --- /dev/null +++ b/conf/modules/variant_calling/strelka.config @@ -0,0 +1,56 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// STRELKA + +process { // strelka + + if (params.tools && params.tools.split(',').contains('strelka')) { + + withName: 'STRELKA_.*' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.strelka" : "${meta.id}.strelka.${target_bed.simpleName}" } + ext.args = {params.wes || meta.status >= 2 ? "--exome" : "" } + ext.when = { params.tools && params.tools.split(',').contains('strelka') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}", + saveAs: { meta.num_intervals > 1 ? null : "strelka/${meta.id}/${it}" } + ] + } + + withName: 'MERGE_STRELKA.*' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/strelka/${meta.id}/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + + withName: 'MERGE_STRELKA' { + ext.prefix = {"${meta.id}.strelka.variants"} + } + + withName: 'MERGE_STRELKA_GENOME' { + ext.prefix = {"${meta.id}.strelka.genome"} + } + + // PAIR_VARIANT_CALLING + withName: 'MERGE_STRELKA_INDELS' { + ext.prefix = {"${meta.id}.strelka.somatic_indels"} + } + withName: 'MERGE_STRELKA_SNVS' { + ext.prefix = {"${meta.id}.strelka.somatic_snvs"} + } + } +} \ No newline at end of file diff --git a/lib/WorkflowRnadnavar.groovy b/lib/WorkflowRnadnavar.groovy index a80e6ca..d745470 100755 --- a/lib/WorkflowRnadnavar.groovy +++ b/lib/WorkflowRnadnavar.groovy @@ -45,15 +45,52 @@ class WorkflowRnadnavar { return yaml_file_text } - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + public static String toolCitationText(params) { + + // TODO Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text + } + + public static String toolBibliographyText(params) { + + // TODO Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file def meta = [:] meta.workflow = run_workflow.toMap() meta["manifest_map"] = run_workflow.manifest.toMap() + // Pipeline DOI meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + //meta["tool_bibliography"] = toolBibliographyText(params) + def methods_text = mqc_methods_yaml.text def engine = new SimpleTemplateEngine() diff --git a/modules.json b/modules.json index bf5dfad..fc17001 100644 --- a/modules.json +++ b/modules.json @@ -60,11 +60,6 @@ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, - "ensemblvep": { - "branch": "master", - "git_sha": "29984d70aea47d06f0062a1785d76c357dd40ea9", - "installed_by": ["modules"] - }, "ensemblvep/download": { "branch": "master", "git_sha": "9f9e1fc31cb35876922070c0e601ae05abae5cae", diff --git a/modules/nf-core/dragmap/align/main.nf b/modules/nf-core/dragmap/align/main.nf new file mode 100644 index 0000000..6221fde --- /dev/null +++ b/modules/nf-core/dragmap/align/main.nf @@ -0,0 +1,46 @@ +process DRAGMAP_ALIGN { + tag "$meta.id" + label 'process_high' + + conda "bioconda::dragmap=1.2.1 bioconda::samtools=1.15.1 conda-forge::pigz=2.3.4" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-580d344d9d4a496cd403932da8765f9e0187774d:5ebebbc128cd624282eaa37d2c7fe01505a91a69-0': + 'biocontainers/mulled-v2-580d344d9d4a496cd403932da8765f9e0187774d:5ebebbc128cd624282eaa37d2c7fe01505a91a69-0' }" + + input: + tuple val(meta) , path(reads) + tuple val(meta2), path(hashmap) + val sort_bam + + output: + tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path('*.log'), emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reads_command = meta.single_end ? "-1 $reads" : "-1 ${reads[0]} -2 ${reads[1]}" + def samtools_command = sort_bam ? 'sort' : 'view' + + """ + dragen-os \\ + -r $hashmap \\ + $args \\ + --num-threads $task.cpus \\ + $reads_command \\ + 2> ${prefix}.dragmap.log \\ + | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dragmap: \$(echo \$(dragen-os --version 2>&1)) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/dragmap/align/meta.yml b/modules/nf-core/dragmap/align/meta.yml new file mode 100644 index 0000000..763e005 --- /dev/null +++ b/modules/nf-core/dragmap/align/meta.yml @@ -0,0 +1,47 @@ +name: dragmap_align +description: Performs fastq alignment to a reference using DRAGMAP +keywords: + - alignment + - map + - fastq + - bam + - sam +tools: + - dragmap: + description: Dragmap is the Dragen mapper/aligner Open Source Software. + homepage: https://github.com/Illumina/dragmap + documentation: https://github.com/Illumina/dragmap + tool_dev_url: https://github.com/Illumina/dragmap#basic-command-line-usage + + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - hashmap: + type: file + description: DRAGMAP hash table + pattern: "Directory containing DRAGMAP hash table *.{cmp,.bin,.txt}" +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Emiller88" diff --git a/modules/nf-core/dragmap/hashtable/main.nf b/modules/nf-core/dragmap/hashtable/main.nf new file mode 100644 index 0000000..529b438 --- /dev/null +++ b/modules/nf-core/dragmap/hashtable/main.nf @@ -0,0 +1,36 @@ +process DRAGMAP_HASHTABLE { + tag "$fasta" + label 'process_high' + + conda "bioconda::dragmap=1.3.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/dragmap:1.3.0--h72d16da_1': + 'biocontainers/dragmap:1.3.0--h72d16da_1' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("dragmap") , emit: hashmap + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + mkdir dragmap + dragen-os \\ + --build-hash-table true \\ + --ht-reference $fasta \\ + --output-directory dragmap \\ + $args \\ + --ht-num-threads $task.cpus + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dragmap: \$(echo \$(dragen-os --version 2>&1)) + END_VERSIONS + """ +} diff --git a/modules/nf-core/dragmap/hashtable/meta.yml b/modules/nf-core/dragmap/hashtable/meta.yml new file mode 100644 index 0000000..133cc9f --- /dev/null +++ b/modules/nf-core/dragmap/hashtable/meta.yml @@ -0,0 +1,40 @@ +name: dragmap_hashtable +description: Create DRAGEN hashtable for reference genome +keywords: + - index + - fasta + - genome + - reference +tools: + - dragmap: + description: Dragmap is the Dragen mapper/aligner Open Source Software. + homepage: https://github.com/Illumina/dragmap + documentation: https://github.com/Illumina/dragmap + tool_dev_url: https://github.com/Illumina/dragmap#basic-command-line-usage + + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input genome fasta file +output: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - hashmap: + type: file + description: DRAGMAP hash table + pattern: "*.{cmp,.bin,.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Emiller88" diff --git a/modules/nf-core/ensemblvep/download/main.nf b/modules/nf-core/ensemblvep/download/main.nf new file mode 100644 index 0000000..4873b91 --- /dev/null +++ b/modules/nf-core/ensemblvep/download/main.nf @@ -0,0 +1,45 @@ +process ENSEMBLVEP_DOWNLOAD { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::ensembl-vep=110.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ensembl-vep:110.0--pl5321h2a3209d_0' : + 'biocontainers/ensembl-vep:110.0--pl5321h2a3209d_0' }" + + input: + tuple val(meta), val(assembly), val(species), val(cache_version) + + output: + tuple val(meta), path("vep_cache"), emit: cache + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + vep_install \\ + --CACHEDIR vep_cache \\ + --SPECIES $species \\ + --ASSEMBLY $assembly \\ + --CACHE_VERSION $cache_version \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + mkdir vep_cache + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/ensemblvep/download/meta.yml b/modules/nf-core/ensemblvep/download/meta.yml new file mode 100644 index 0000000..acb337c --- /dev/null +++ b/modules/nf-core/ensemblvep/download/meta.yml @@ -0,0 +1,43 @@ +name: ENSEMBLVEP_DOWNLOAD +description: Ensembl Variant Effect Predictor (VEP). The cache downloading options are controlled through `task.ext.args`. +keywords: + - annotation + - cache + - download +tools: + - ensemblvep: + description: | + VEP determines the effect of your variants (SNPs, insertions, deletions, CNVs + or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions. + homepage: https://www.ensembl.org/info/docs/tools/vep/index.html + documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - assembly: + type: string + description: | + Genome assembly + - species: + type: string + description: | + Specie + - cache_version: + type: string + description: | + cache version +output: + - cache: + type: file + description: cache + pattern: "*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" diff --git a/nextflow.config b/nextflow.config index abe78c8..63b495a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -2,7 +2,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nf-core/rnadnavar Nextflow config file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Default config options for all compute environments + Dbam_laign.configefault config options for all compute environments ---------------------------------------------------------------------------------------- */ @@ -10,25 +10,25 @@ params { // Input options - mandatory - input = null // sample sheet - step = 'mapping' // Starts with mapping - skip_tools = null // All tools (markduplicates + baserecalibrator + QC) are used by default - save_bam_mapped = false // Mapped BAMs not saved - save_output_as_bam = false //Output files from preprocessing are saved as bam and not as cram files + input = null // sample sheet + step = 'mapping' // Starts with mapping + skip_tools = null // All tools (markduplicates + baserecalibrator + QC) are used by default + save_bam_mapped = false // Mapped BAMs not saved + save_output_as_bam = false //Output files from preprocessing are saved as bam and not as cram files // Genome and reference options - genome = null - igenomes_base = 's3://ngi-igenomes/igenomes' - igenomes_ignore = false + genome = null + igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_ignore = false + save_reference = false + build_only_index = false // Only build the reference indexes + download_cache = false // Do not download annotation cache - // Output options - save_reference = false - save_merged_fastq = false // Sequence read information read_length = 76 // Required for STAR to build index and align reads TODO: automate - wes = false // Set to true, if data is exome/targeted sequencing data. Used to use correct models in various variant callers + wes = false // Set to true, if data is exome/targeted sequencing data. Used to use correct models in various variant callers // Alignment aligner = 'bwa-mem' // Only STAR is currently supported. @@ -42,13 +42,10 @@ params { bam_csi_index = false save_unaligned = false save_align_intermeds = false - bwa = null - bwamem2 = null - hisat2_build_memory = null - - - // Preprocessing of alignment + hisat2_build_memory = null remove_duplicates = false + save_mapped = false // Mapped BAMs not saved + // Modify fastqs (trim/split) with FASTP trim_fastq = false // No trimming @@ -59,7 +56,6 @@ params { trim_nextseq = 0 split_fastq = 50000000 // FASTQ files will not be split by default by FASTP save_trimmed = false - save_trimmed_fail = false save_split_fastqs = false // Variant calling @@ -80,22 +76,31 @@ params { // GATK intervallist parameters gatk_interval_scatter_count = 25 ignore_soft_clipped_bases = true + // Variant annotation tools = null // No default Variant_Calling or Annotation tools genesplicer = null // genesplicer disabled within VEP + dbnsfp = null // No dbnsfp processed file + dbnsfp_consequence = null // No default consequence for dbnsfp plugin + dbnsfp_fields = "rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF" // Default fields for dbnsfp plugin + dbnsfp_tbi = null // No dbnsfp processed file index + spliceai_indel = null // No spliceai_indel file + spliceai_indel_tbi = null // No spliceai_indel file index + spliceai_snv = null // No spliceai_snv file + spliceai_snv_tbi = null // No spliceai_snv file index + vep_custom_args= "--no_progress --offline --shift_hgvs 1 --check_existing --tsl --domains --total_length --allele_number --no_escape --xref_refseq --failed 1 --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --format vcf --biotype --force_overwrite --sift p --polyphen p --variant_class --regulatory --allele_number --af_gnomad --af_gnomadg --gene_phenotype --hgvs --hgvsg --max_af" vep_cache = null // No directory for VEP cache - vep_genome = null // No default genome for VEP -// vep_cache_version = '106' // No default cache version for VEP -// vep_version = '106.1' // No default cache version for VEP + vep_include_fasta = false // Don't use fasta file for annotation with VEP vep_dbnsfp = null vep_loftee = null vep_spliceai = null vep_spliceregion = null vep_out_format = 'vcf' + outdir_cache = null // No default outdir cache //filtering whitelist = null - + blacklist = null // MultiQC options multiqc_config = null multiqc_title = null @@ -105,7 +110,6 @@ params { // Boilerplate options outdir = null - tracedir = "${params.outdir}/pipeline_info" publish_dir_mode = 'copy' email = null email_on_fail = null @@ -122,17 +126,18 @@ params { custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" config_profile_contact = null config_profile_url = null + test_data_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnadnavar' // Max resource options // Defaults only, expecting to be overwritten max_memory = '128.GB' - max_cpus = 32 + max_cpus = 16 max_time = '240.h' // Schema validation default options validationFailUnrecognisedParams = false - validationLenientMode = false + validationLenientMode = true validationSchemaIgnoreParams = 'genomes' validationShowHiddenParams = false validate_params = true @@ -158,95 +163,99 @@ try { // } profiles { debug { + cleanup = false dumpHashes = true process.beforeScript = 'echo $HOSTNAME' cleanup = false } conda { + apptainer.enabled = false + charliecloud.enabled = false conda.enabled = true docker.enabled = false - singularity.enabled = false podman.enabled = false shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = false } mamba { + apptainer.enabled = false + charliecloud.enabled = false conda.enabled = true conda.useMamba = true + charliecloud.enabled = false docker.enabled = false - singularity.enabled = false podman.enabled = false shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = false } docker { + apptainer.enabled = false + charliecloud.enabled = false + conda.enabled = false docker.enabled = true docker.userEmulation = true - conda.enabled = false - singularity.enabled = false podman.enabled = false shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = false } arm { docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true + apptainer.enabled = false + charliecloud.enabled = false conda.enabled = false docker.enabled = false podman.enabled = false shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.autoMounts = true + singularity.enabled = true } podman { - podman.enabled = true + apptainer.enabled = false + charliecloud.enabled = false conda.enabled = false docker.enabled = false - singularity.enabled = false + podman.enabled = true shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = false } shifter { - shifter.enabled = true + apptainer.enabled = false + charliecloud.enabled = false conda.enabled = false docker.enabled = false - singularity.enabled = false podman.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + shifter.enabled = true + singularity.enabled = false } charliecloud { + apptainer.enabled = false charliecloud.enabled = true conda.enabled = false docker.enabled = false - singularity.enabled = false podman.enabled = false shifter.enabled = false - apptainer.enabled = false + singularity.enabled = false } apptainer { apptainer.enabled = true + charliecloud.enabled = false conda.enabled = false docker.enabled = false - singularity.enabled = false podman.enabled = false shifter.enabled = false - charliecloud.enabled = false + singularity.enabled = false } gitpod { - executor.name = 'local' executor.cpus = 16 executor.memory = 60.GB + executor.name = 'local' } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } + // Basic test profile for CI + test { includeConfig 'conf/test.config' } + test_cache { includeConfig 'conf/test/cache.config' } + } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile @@ -268,6 +277,7 @@ if (!params.igenomes_ignore) { } else { params.genomes = [:] } + // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -282,22 +292,24 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') +def tracedir = params.outdir + "/pipeline_info" timeline { enabled = true - file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" + file = "${tracedir}/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" + file = "${tracedir}/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" + file = "${tracedir}/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" + file = "${tracedir}/pipeline_dag_${trace_timestamp}.html" } manifest { @@ -312,7 +324,42 @@ manifest { } // Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' +includeConfig 'conf/modules/modules.config' + +// prepare reference +includeConfig 'conf/modules/prepare_resources/prepare_cache.config' +includeConfig 'conf/modules/prepare_resources/prepare_genome.config' +includeConfig 'conf/modules/prepare_resources/prepare_intervals.config' + +// quality control +includeConfig 'conf/modules/quality_control/quality_control.config' +includeConfig 'conf/modules/quality_control/trimming.config' + +// alignment +includeConfig 'conf/modules/alignment/bam_align.config' +includeConfig 'conf/modules/alignment/alignment_to_fastq.config' + +// preprocessing +includeConfig 'conf/modules/gatk4_preprocessing/markduplicates.config' +includeConfig 'conf/modules/gatk4_preprocessing/prepare_recalibration.config' +includeConfig 'conf/modules/gatk4_preprocessing/recalibrate.config' + +// variant calling +includeConfig 'conf/modules/variant_calling/freebayes.config' +includeConfig 'conf/modules/variant_calling/strelka.config' +includeConfig 'conf/modules/variant_calling/mutect2.config' +includeConfig 'conf/modules/variant_calling/sage.config' + +// annotate +includeConfig 'conf/modules/annotate/annotate.config' + +// consensus +includeConfig 'conf/modules/consensus/vcf_consensus.config' + +// filtering +includeConfig 'conf/modules/filtering/maf_filtering.config' + + // Function to ensure that resource requirements don't go beyond // a maximum limit @@ -345,4 +392,4 @@ def check_max(obj, type) { return obj } } -} +} \ No newline at end of file diff --git a/nextflow_schema.json b/nextflow_schema.json index 4cec090..6386b13 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -84,10 +84,6 @@ "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", "fa_icon": "fas fa-file-signature" }, - "save_merged_fastq": { - "type": "boolean", - "description": "Save FastQ files after merging re-sequenced libraries in the results directory." - }, "save_bam_mapped": { "type": "boolean", "fa_icon": "fas fa-download", @@ -110,9 +106,9 @@ "genome": { "type": "string", "description": "Name of iGenomes reference.", - "default": "GRCh38", + "default": "GATK.GRCh38", "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`.\n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." }, "bwa": { "type": "string", @@ -277,13 +273,6 @@ "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nAlternatively cache version can be use to specify the correct Ensembl Genomes version number as these differ from the concurrent Ensembl/VEP version numbers", "hidden": true }, - "vep_version": { - "type": "string", - "fa_icon": "fas fa-tag", - "description": "VEP version.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nThis is used to specify the VEP version when using the container with pre-downloaded cache.", - "hidden": true - }, "save_reference": { "type": "boolean", "fa_icon": "fas fa-download", @@ -349,19 +338,11 @@ "default": "", "fa_icon": "fas fa-cut", "properties": { - "trim_fastq": { + "trim_fastq": { "type": "boolean", "fa_icon": "fas fa-cut", "description": "Run FastP for read trimming", - "help_text": "Use this to perform adapter trimming. Adapter are detected automatically by using the FastP flag `--detect_adapter_for_pe`. For more info see [FastP](https://github.com/OpenGene/fastp) ", - "hidden": true - }, - "save_trimmed_fail": { - "type": "boolean", - "fa_icon": "fas fa-cut", - "description": "Save failed fastq from FastP", - "help_text": "Use this to saved failed FastP results ", - "hidden": false + "help_text": "Use this to perform adapter trimming. Adapter are detected automatically by using the FastP flag `--detect_adapter_for_pe`. For more info see [FastP](https://github.com/OpenGene/fastp)." }, "clip_r1": { "type": "integer", @@ -409,26 +390,12 @@ "description": "Save trimmed FastQ file intermediates.", "hidden": true }, - "umi_read_structure": { - "type": "string", - "fa_icon": "fas fa-tape", - "description": "Specify UMI read structure", - "hidden": true, - "help_text": "One structure if UMI is present on one end (i.e. '+T 2M11S+T'), or two structures separated by a blank space if UMIs a present on both ends (i.e. '2M11S+T 2M11S+T'); please note, this does not handle duplex-UMIs.\n\nFor more info on UMI usage in the pipeline, also check docs [here](./docs/usage.md/#how-to-handle-umis)." - }, - "group_by_umi_strategy": { - "type": "string", - "default": "Adjacency", - "description": "Default strategy with UMI", - "hidden": true, - "help_text": "Available values: Identity, Edit, Adjacency, Paired" - }, - "save_split_fastqs": { + "save_split_fastqs": { "type": "boolean", "fa_icon": "fas fa-vial", "description": "If set, publishes split FASTQ files. Intended for testing purposes.", "hidden": true - } + } } }, "pipeline_stage_options": { @@ -546,6 +513,34 @@ } } }, + "preprocessing": { + "title": "Preprocessing", + "type": "object", + "description": "Configure preprocessing tools", + "default": "", + "fa_icon": "fas fa-toolbox", + "properties": { + "aligner": { + "type": "string", + "default": "bwa-mem", + "fa_icon": "fas fa-puzzle-piece", + "enum": ["bwa-mem", "bwa-mem2", "dragmap", "sentieon-bwamem"], + "description": "Specify aligner to be used to map reads to reference genome.", + "help_text": "Sarek will build missing indices automatically if not provided. Set `--bwa false` if indices should be (re-)built.\nIf DragMap is selected as aligner, it is recommended to skip baserecalibration with `--skip_tools baserecalibrator`. For more info see [here](https://gatk.broadinstitute.org/hc/en-us/articles/4407897446939--How-to-Run-germline-single-sample-short-variant-discovery-in-DRAGEN-mode)." + }, + "save_mapped": { + "type": "boolean", + "fa_icon": "fas fa-download", + "description": "Save mapped files.", + "help_text": "If the parameter `--split-fastq` is used, the sharded bam files are merged and converted to CRAM before saving them." + }, + "save_output_as_bam": { + "type": "boolean", + "description": "Saves output from mapping (if `--save_mapped`), Markduplicates & Baserecalibration as BAM file instead of CRAM", + "fa_icon": "fas fa-download" + } + } + }, "variant_calling": { "title": "Variant calling", "type": "object", @@ -1023,6 +1018,9 @@ { "$ref": "#/definitions/variant_calling" }, + { + "$ref": "#/definitions/preprocessing" + }, { "$ref": "#/definitions/annotation" }, diff --git a/subworkflows/local/bam_align/main.nf b/subworkflows/local/bam_align/main.nf index 9d77841..a4ca5c6 100644 --- a/subworkflows/local/bam_align/main.nf +++ b/subworkflows/local/bam_align/main.nf @@ -137,12 +137,12 @@ workflow BAM_ALIGN { // and not stall the workflow until all reads from all channels are mapped [ groupKey( meta - meta.subMap('num_lanes', 'read_group', 'size') + [ data_type:'bam', id:meta.sample ], (meta.num_lanes ?: 1) * (meta.size ?: 1)), bam ] }.groupTuple() - bam_mapped_dna,dump(tag:"bam_mapped_dna") + bam_mapped_dna.dump(tag:"bam_mapped_dna") // RNA will be aligned with STAR // Run STAR - ALIGN_STAR ( - ch_reads_to_map_status.rna, + FASTQ_ALIGN_STAR ( + reads_for_alignment_status.rna, star_index, gtf, params.star_ignore_sjdbgtf, @@ -151,7 +151,7 @@ workflow BAM_ALIGN { [ [ id:"fasta" ], [] ] // fasta ) // Grouping the bams from the same samples not to stall the workflow - bam_mapped_rna = ALIGN_STAR.out.bam.map{ meta, bam -> + bam_mapped_rna = FASTQ_ALIGN_STAR.out.bam.map{ meta, bam -> // Update meta.id to be meta.sample, ditching sample-lane that is not needed anymore // Update meta.data_type @@ -164,11 +164,11 @@ workflow BAM_ALIGN { // and not stall the workflow until all reads from all channels are mapped [ groupKey( meta - meta.subMap('num_lanes', 'read_group', 'size') + [ data_type:'bam', id:meta.sample ], (meta.num_lanes ?: 1) * (meta.size ?: 1)), bam ] }.groupTuple() - bam_mapped_rna,dump(tag:"bam_mapped_rna") + bam_mapped_rna.dump(tag:"bam_mapped_rna") // Gather QC reports - reports = reports.mix(ALIGN_STAR.out.stats.collect{it[1]}.ifEmpty([])) - reports = reports.mix(ALIGN_STAR.out.log_final.collect{it[1]}.ifEmpty([])) - versions = versions.mix(ALIGN_STAR.out.versions) + reports = reports.mix(FASTQ_ALIGN_STAR.out.stats.collect{it[1]}.ifEmpty([])) + reports = reports.mix(FASTQ_ALIGN_STAR.out.log_final.collect{it[1]}.ifEmpty([])) + versions = versions.mix(FASTQ_ALIGN_STAR.out.versions) // mix dna and rna in one channel bam_mapped = bam_mapped_dna.mix(bam_mapped_rna) @@ -192,7 +192,7 @@ workflow BAM_ALIGN { // Gather used softwares versions versions = versions.mix(CONVERT_FASTQ_INPUT.out.versions) versions = versions.mix(FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP.out.versions) - versions = versions.mix(ALIGN_STAR.out.versions) + versions = versions.mix(FASTQ_ALIGN_STAR.out.versions) } diff --git a/subworkflows/local/gatk_preprocessing.nf b/subworkflows/local/bam_gatk_preprocessing/main.nf similarity index 99% rename from subworkflows/local/gatk_preprocessing.nf rename to subworkflows/local/bam_gatk_preprocessing/main.nf index 3a9fc40..56b3b96 100644 --- a/subworkflows/local/gatk_preprocessing.nf +++ b/subworkflows/local/bam_gatk_preprocessing/main.nf @@ -16,7 +16,7 @@ include { PREPARE_RECALIBRATION_CSV } from '../local/ include { RECALIBRATE } from '../nf-core/gatk4/recalibrate/main' include { RECALIBRATE_CSV } from '../local/recalibrate_csv' -workflow GATK_PREPROCESSING { +workflow BAM_GATK_PREPROCESSING { take: step // Mandatory, step to start with tools diff --git a/subworkflows/local/bam_variant_calling_pre_post_processing/main.nf b/subworkflows/local/bam_variant_calling_pre_post_processing/main.nf index bd1fd3d..5e69f66 100644 --- a/subworkflows/local/bam_variant_calling_pre_post_processing/main.nf +++ b/subworkflows/local/bam_variant_calling_pre_post_processing/main.nf @@ -1,7 +1,7 @@ // // Core workflow of the RNA/DNA variant calling pipeline // -include { BAM_GATK_PREPROCESSING } from '../gatk_preprocessing/main' +include { BAM_GATK_PREPROCESSING } from '../bam_gatk_preprocessing/main' // For now only matched supported // include { BAM_VARIANT_CALLING } from '../variant_calling/main' // // Can we just call normalization here? @@ -41,9 +41,8 @@ workflow BAM_VARIANT_CALLING_PRE_POST_PROCESSING { ch_reports = Channel.empty() ch_versions = Channel.empty() ch_genome_bam.dump(tag:"ch_genome_bam") - // STEP 1: Mapping done elsewhere - // STEP 2: GATK PREPROCESSING - See: https://gatk.broadinstitute.org/hc/en-us/articles/360035535912-Data-pre-processing-for-variant-discovery - GATK_PREPROCESSING( + // GATK PREPROCESSING - See: https://gatk.broadinstitute.org/hc/en-us/articles/360035535912-Data-pre-processing-for-variant-discovery + BAM_GATK_PREPROCESSING( step, // Mandatory, step to start with - should be mapping for second pass tools, ch_genome_bam, // channel: [mandatory] [meta, [bam]] diff --git a/subworkflows/local/core_workflow_pass.nf b/subworkflows/local/core_workflow_pass.nf deleted file mode 100644 index 4a2dc11..0000000 --- a/subworkflows/local/core_workflow_pass.nf +++ /dev/null @@ -1,157 +0,0 @@ -// -// Core workflow of the RNA/DNA variant calling pipeline -// -include { GATK_PREPROCESSING } from './gatk_preprocessing' -include { VARIANT_CALLING } from './variant_calling' -include { NORMALIZE } from './normalize_vcf_variants' -include { CONSENSUS } from './consensus' -include { ANNOTATE } from './annotate' -include { BASIC_FILTERING as FILTERING } from '../../modules/local/filter_variants' - - -workflow CORE_RUN { - take: - step // step to start with - tools - skip_tools - ch_input_sample // input from CSV if applicable - ch_genome_bam // input from mapping - fasta // fasta reference file - fasta_fai // fai for fasta file - dict // - dbsnp - dbsnp_tbi - pon - pon_tbi - germline_resource - germline_resource_tbi - intervals - intervals_for_preprocessing - ch_interval_list_split - intervals_bed_gz_tbi - intervals_bed_combined - vcf_consensus_dna // to repeat rescue consensus - vcfs_status_dna // to repeat rescue consensus - - main: - ch_reports = Channel.empty() - ch_versions = Channel.empty() - ch_genome_bam.dump(tag:"ch_genome_bam") - // STEP 1: Mapping done elsewhere - // STEP 2: GATK PREPROCESSING - See: https://gatk.broadinstitute.org/hc/en-us/articles/360035535912-Data-pre-processing-for-variant-discovery - GATK_PREPROCESSING( - step, // Mandatory, step to start with - should be mapping for second pass - tools, - ch_genome_bam, // channel: [mandatory] [meta, [bam]] - skip_tools, // channel: [mandatory] skip_tools - params.save_output_as_bam, // channel: [mandatory] save_output_as_bam - fasta, // channel: [mandatory] fasta - fasta_fai , // channel: [mandatory] fasta_fai - dict, - germline_resource, // channel: [optional] germline_resource - germline_resource_tbi, // channel: [optional] germline_resource_tbi - intervals, // channel: [mandatory] intervals/target regions - intervals_for_preprocessing, // channel: [mandatory] intervals_for_preprocessing/wes - ch_interval_list_split, - ch_input_sample - ) - - ch_cram_variant_calling = GATK_PREPROCESSING.out.ch_cram_variant_calling - ch_versions = ch_versions.mix(GATK_PREPROCESSING.out.versions) - ch_reports = ch_reports.mix(GATK_PREPROCESSING.out.ch_reports) - - ch_cram_variant_calling.dump(tag:"[STEP8 RNA_FILTERING] ch_cram_variant_calling") - intervals_bed_gz_tbi.dump(tag:"[STEP8 RNA_FILTERING] intervals_bed_gz_tbi") - pon.dump(tag:"[STEP8 RNA_FILTERING] pon") - // STEP 3: VARIANT CALLING - VARIANT_CALLING( tools, - ch_cram_variant_calling, - fasta, - fasta_fai, - dbsnp, - dbsnp_tbi, - dict, - germline_resource, - germline_resource_tbi, - intervals, - intervals_bed_gz_tbi, - intervals_bed_combined, - pon, - pon_tbi, - ch_input_sample - ) - cram_vc_pair = VARIANT_CALLING.out.cram_vc_pair // use same crams for force calling later - vcf_to_normalize = VARIANT_CALLING.out.vcf - contamination = VARIANT_CALLING.out.contamination_table - segmentation = VARIANT_CALLING.out.segmentation_table - orientation = VARIANT_CALLING.out.artifact_priors - ch_versions = ch_versions.mix(VARIANT_CALLING.out.versions) - ch_reports = ch_reports.mix(VARIANT_CALLING.out.reports) - - - // STEP 4: NORMALIZE - NORMALIZE (tools, - vcf_to_normalize, - fasta, - ch_input_sample) - ch_versions = ch_versions.mix(NORMALIZE.out.versions) - vcf_normalized = NORMALIZE.out.vcf - - - // STEP 5: ANNOTATE - ANNOTATE(tools, - vcf_normalized, // second pass TODO: make it optional - fasta, - ch_input_sample // first pass - ) - - ch_versions = ch_versions.mix(ANNOTATE.out.versions) - ch_reports = ch_reports.mix(ANNOTATE.out.reports) - - // STEP 6: CONSENSUS - CONSENSUS ( tools, - ANNOTATE.out.maf_ann, - cram_vc_pair, // from previous variant calling - dict, - fasta, - fasta_fai, - germline_resource, - germline_resource_tbi, - intervals, - intervals_bed_gz_tbi, - intervals_bed_combined, - pon, - pon_tbi, - vcf_consensus_dna, // null when first pass - vcfs_status_dna, // null when first pass - ch_input_sample, - contamination, - segmentation, - orientation - ) - // STEP 7: FILTERING - if (tools.split(',').contains('filtering')) { - FILTERING(CONSENSUS.out.maf, fasta) - - FILTERING.out.maf.branch{ - dna: it[0].status < 2 - rna: it[0].status == 2 - }.set{filtered_maf} - filtered_maf_rna = filtered_maf.rna - filtered_maf_dna = filtered_maf.dna - } else{ - filtered_maf = Channel.empty() - filtered_maf_rna = Channel.empty() - filtered_maf_dna = Channel.empty() - - } - - emit: - vcf_consensus_dna = CONSENSUS.out.vcf_consensus_dna - vcfs_status_dna = CONSENSUS.out.vcfs_status_dna - maf = filtered_maf - maf_rna = filtered_maf_rna - maf_dna = filtered_maf_dna - versions = ch_versions // channel: [ versions.yml ] - reports = ch_reports -} \ No newline at end of file diff --git a/subworkflows/local/prepare_intervals/main.nf b/subworkflows/local/prepare_intervals/main.nf index 2d5e42b..b3e5bf5 100644 --- a/subworkflows/local/prepare_intervals/main.nf +++ b/subworkflows/local/prepare_intervals/main.nf @@ -1,6 +1,11 @@ // // PREPARE INTERVALS // + +// Initialize channels based on params or indices that were just built +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + include { BUILD_INTERVALS } from '../../../modules/local/build_intervals/main' include { CREATE_INTERVALS_BED } from '../../../modules/local/create_intervals_bed/main' include { GATK4_INTERVALLISTTOBED } from '../../../modules/nf-core/gatk4/intervallisttobed/main' diff --git a/subworkflows/local/prepare_reference_and_intervals.nf b/subworkflows/local/prepare_reference_and_intervals.nf index 9e4ec35..e659593 100644 --- a/subworkflows/local/prepare_reference_and_intervals.nf +++ b/subworkflows/local/prepare_reference_and_intervals.nf @@ -4,7 +4,6 @@ include { PREPARE_GENOME } from './prepare_genome/main' include { PREPARE_INTERVALS } from './prepare_intervals/main' include { GATK4_BEDTOINTERVALLIST } from '../../modules/nf-core/gatk4/bedtointervallist/main' -include { GATK4_INTERVALLISTTOOLS } from '../../modules/nf-core/gatk4/intervallisttools/main' workflow PREPARE_REFERENCE_AND_INTERVALS { @@ -77,18 +76,6 @@ workflow PREPARE_REFERENCE_AND_INTERVALS { ch_interval_list = GATK4_BEDTOINTERVALLIST.out.interval_list ch_versions = ch_versions.mix(GATK4_BEDTOINTERVALLIST.out.versions) - // STEP 0.D: Scatter one interval-list into many interval-files using GATK4 IntervalListTools - ch_interval_list_split = Channel.empty() - if (!params.skip_intervallisttools) { - GATK4_INTERVALLISTTOOLS( - ch_interval_list - ) - ch_interval_list_split = GATK4_INTERVALLISTTOOLS.out.interval_list.map{ meta, bed -> [bed] }.flatten() - } - else { - ch_interval_list_split = ch_interval_list - } - emit: fasta = fasta fasta_fai = fasta_fai @@ -101,7 +88,6 @@ workflow PREPARE_REFERENCE_AND_INTERVALS { star_index = PREPARE_GENOME.out.star_index gtf = PREPARE_GENOME.out.gtf ch_interval_list = ch_interval_list - ch_interval_list_split = ch_interval_list_split intervals = intervals intervals_bed_gz_tbi = intervals_bed_gz_tbi intervals_for_preprocessing = intervals_for_preprocessing diff --git a/subworkflows/nf-core/bam_markduplicates_picard/main.nf b/subworkflows/nf-core/bam_markduplicates_picard/main.nf new file mode 100644 index 0000000..6e3df33 --- /dev/null +++ b/subworkflows/nf-core/bam_markduplicates_picard/main.nf @@ -0,0 +1,52 @@ +// +// Picard MarkDuplicates, index BAM file and run samtools stats, flagstat and idxstats +// + +include { PICARD_MARKDUPLICATES } from '../../../modules/nf-core/picard/markduplicates/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' + +workflow BAM_MARKDUPLICATES_PICARD { + + take: + ch_bam // channel: [ val(meta), path(bam) ] + ch_fasta // channel: [ path(fasta) ] + ch_fai // channel: [ path(fai) ] + + main: + + ch_versions = Channel.empty() + + PICARD_MARKDUPLICATES ( ch_bam, ch_fasta, ch_fai ) + ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions.first()) + + SAMTOOLS_INDEX ( PICARD_MARKDUPLICATES.out.bam ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + ch_bam_bai = PICARD_MARKDUPLICATES.out.bam + .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .map { + meta, bam, bai, csi -> + if (bai) { + [ meta, bam, bai ] + } else { + [ meta, bam, csi ] + } + } + + BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + + emit: + bam = PICARD_MARKDUPLICATES.out.bam // channel: [ val(meta), path(bam) ] + metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), path(bam) ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), path(bai) ] + csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), path(csi) ] + + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_markduplicates_picard/meta.yml b/subworkflows/nf-core/bam_markduplicates_picard/meta.yml new file mode 100644 index 0000000..b924596 --- /dev/null +++ b/subworkflows/nf-core/bam_markduplicates_picard/meta.yml @@ -0,0 +1,62 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "bam_markduplicates_picard" +description: Picard MarkDuplicates, index BAM file and run samtools stats, flagstat and idxstats +keywords: + - markduplicates + - bam + - sam + - cram + +components: + - picard/markduplicates + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_stats_samtools + +input: + - ch_bam: + description: | + BAM/CRAM/SAM file + Structure: [ val(meta), path(bam) ] + - ch_fasta: + description: | + Reference genome fasta file + Structure: [ path(fasta) ] + - ch_fasta: + description: | + Index of the reference genome fasta file + Structure: [ path(fai) ] +output: + - bam: + description: | + processed BAM/CRAM/SAM file + Structure: [ val(meta), path(bam) ] + - bai: + description: | + BAM/CRAM/SAM samtools index + Structure: [ val(meta), path(bai) ] + - csi: + description: | + CSI samtools index + Structure: [ val(meta), path(csi) ] + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats) ] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@dmarron" + - "@drpatelh" diff --git a/subworkflows/nf-core/bam_qc_picard/main.nf b/subworkflows/nf-core/bam_qc_picard/main.nf new file mode 100644 index 0000000..f42b600 --- /dev/null +++ b/subworkflows/nf-core/bam_qc_picard/main.nf @@ -0,0 +1,45 @@ +// +// Run QC steps on BAM/CRAM files using Picard +// + +include { PICARD_COLLECTMULTIPLEMETRICS } from '../../../modules/nf-core/picard/collectmultiplemetrics/main' +include { PICARD_COLLECTWGSMETRICS } from '../../../modules/nf-core/picard/collectwgsmetrics/main' +include { PICARD_COLLECTHSMETRICS } from '../../../modules/nf-core/picard/collecthsmetrics/main' + +workflow BAM_QC_PICARD { + take: + ch_bam_bai_bait_target // channel: [ val(meta), [bam], [bai], [bait_interval], [target_interval]] + ch_fasta // channel: [ val(meta), fasta ] + ch_fasta_fai // channel: [ val(meta), fasta_fai ] + ch_fasta_dict // channel: [ val(meta), fasta_dict ] + + main: + ch_versions = Channel.empty() + ch_coverage_metrics = Channel.empty() + + ch_bam_bai = ch_bam_bai_bait_target.map{meta, bam, bai, bait, target -> return [meta,bam,bai]} + + PICARD_COLLECTMULTIPLEMETRICS( ch_bam_bai, ch_fasta, ch_fasta_fai ) + ch_versions = ch_versions.mix(PICARD_COLLECTMULTIPLEMETRICS.out.versions.first()) + + ch_bam_bai_bait_target_branched = ch_bam_bai_bait_target.branch { + hsmetrics : it.size == 5 && it[3] != [] && it[4] != [] + return it + wgsmetrics : true + return [ it[0], it[1], it[2] ] + } + + PICARD_COLLECTHSMETRICS( ch_bam_bai_bait_target_branched.hsmetrics, ch_fasta, ch_fasta_fai, ch_fasta_dict ) + ch_coverage_metrics = ch_coverage_metrics.mix(PICARD_COLLECTHSMETRICS.out.metrics) + ch_versions = ch_versions.mix(PICARD_COLLECTHSMETRICS.out.versions.first()) + + PICARD_COLLECTWGSMETRICS( ch_bam_bai_bait_target_branched.wgsmetrics, ch_fasta, ch_fasta_fai, [] ) + ch_versions = ch_versions.mix(PICARD_COLLECTWGSMETRICS.out.versions.first()) + ch_coverage_metrics = ch_coverage_metrics.mix(PICARD_COLLECTWGSMETRICS.out.metrics) + + emit: + coverage_metrics = ch_coverage_metrics // channel: [ val(meta), [ coverage_metrics ] ] + multiple_metrics = PICARD_COLLECTMULTIPLEMETRICS.out.metrics // channel: [ val(meta), [ multiple_metrics ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_qc_picard/meta.yml b/subworkflows/nf-core/bam_qc_picard/meta.yml new file mode 100644 index 0000000..c9d7aa6 --- /dev/null +++ b/subworkflows/nf-core/bam_qc_picard/meta.yml @@ -0,0 +1,84 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_qc_picard +description: Produces comprehensive statistics from BAM file +keywords: + - statistics + - counts + - hs_metrics + - wgs_metrics + - bam + - sam + - cram +components: + - picard/collectmultiplemetrics + - picard/collectwgsmetrics + - picard/collecthsmetrics +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: BAM/CRAM/SAM file index + pattern: "*.{bai,crai,sai}" + - bait_intervals: + type: optional file + description: An interval list or bed file that contains the locations of the baits used. + pattern: "baits.{interval_list,bed,bed.gz}" + - target_intervals: + type: optional file + description: An interval list or bed file that contains the locations of the targets. + pattern: "targets.{interval_list,bed,bed.gz}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: optional file + description: Reference fasta file + pattern: "*.{fasta,fa,fna}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta_fai: + type: optional file + description: Reference fasta file index + pattern: "*.{fasta,fa,fna}.fai" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta_dict: + type: optional file + description: Reference fasta sequence dictionary + pattern: "*.{dict}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - coverage_metrics: + type: file + description: Alignment metrics files generated by picard CollectHsMetrics or CollectWgsMetrics + pattern: "*_metrics.txt" + - multiple_metrics: + type: file + description: Alignment metrics files generated by picard CollectMultipleMetrics + pattern: "*_{metrics}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@matthdsm" diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/main.nf b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf new file mode 100644 index 0000000..fc1c652 --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf @@ -0,0 +1,50 @@ +// +// Sort, index BAM file and run samtools stats, flagstat and idxstats +// + +include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' + +workflow BAM_SORT_STATS_SAMTOOLS { + take: + ch_bam // channel: [ val(meta), [ bam ] ] + ch_fasta // channel: [ val(meta), path(fasta) ] + + main: + + ch_versions = Channel.empty() + + SAMTOOLS_SORT ( ch_bam ) + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) + + SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + SAMTOOLS_SORT.out.bam + .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .map { + meta, bam, bai, csi -> + if (bai) { + [ meta, bam, bai ] + } else { + [ meta, bam, csi ] + } + } + .set { ch_bam_bai } + + BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + + emit: + bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] + + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml new file mode 100644 index 0000000..69c16be --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml @@ -0,0 +1,67 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_sort_stats_samtools +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +components: + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_stats_samtools +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa}" +# TODO Update when we decide on a standard for subworkflow docs +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" diff --git a/subworkflows/nf-core/bam_stats_samtools/main.nf b/subworkflows/nf-core/bam_stats_samtools/main.nf new file mode 100644 index 0000000..44d4c01 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/main.nf @@ -0,0 +1,32 @@ +// +// Run SAMtools stats, flagstat and idxstats +// + +include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' +include { SAMTOOLS_IDXSTATS } from '../../../modules/nf-core/samtools/idxstats/main' +include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/main' + +workflow BAM_STATS_SAMTOOLS { + take: + ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ] + ch_fasta // channel: [ val(meta), path(fasta) ] + + main: + ch_versions = Channel.empty() + + SAMTOOLS_STATS ( ch_bam_bai, ch_fasta ) + ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) + + SAMTOOLS_FLAGSTAT ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) + + SAMTOOLS_IDXSTATS ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions) + + emit: + stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/bam_stats_samtools/meta.yml b/subworkflows/nf-core/bam_stats_samtools/meta.yml new file mode 100644 index 0000000..87863b1 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/meta.yml @@ -0,0 +1,41 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_stats_samtools +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +components: + - samtools/stats + - samtools/idxstats + - samtools/flagstat +input: + - ch_bam_bai: + description: | + The input channel containing the BAM/CRAM and it's index + Structure: [ val(meta), path(bam), path(bai) ] + - ch_fasta: + description: | + Reference genome fasta file + Structure: [ path(fasta) ] +output: + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats)] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@drpatelh" diff --git a/subworkflows/nf-core/bam_tumor_normal_somatic_variant_calling_gatk/main.nf b/subworkflows/nf-core/bam_tumor_normal_somatic_variant_calling_gatk/main.nf new file mode 100644 index 0000000..94b1fce --- /dev/null +++ b/subworkflows/nf-core/bam_tumor_normal_somatic_variant_calling_gatk/main.nf @@ -0,0 +1,139 @@ +// +// Run GATK mutect2 in tumor normal mode, getepileupsummaries, calculatecontamination, learnreadorientationmodel and filtermutectcalls +// + +include { GATK4_MUTECT2 as MUTECT2 } from '../../../modules/nf-core/gatk4/mutect2/main' +include { GATK4_LEARNREADORIENTATIONMODEL as LEARNREADORIENTATIONMODEL } from '../../../modules/nf-core/gatk4/learnreadorientationmodel/main' +include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES_TUMOR } from '../../../modules/nf-core/gatk4/getpileupsummaries/main' +include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES_NORMAL} from '../../../modules/nf-core/gatk4/getpileupsummaries/main' +include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../modules/nf-core/gatk4/calculatecontamination/main' +include { GATK4_FILTERMUTECTCALLS as FILTERMUTECTCALLS } from '../../../modules/nf-core/gatk4/filtermutectcalls/main' + +workflow BAM_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING_GATK { + take: + ch_input // channel: [ val(meta), path(input), path(input_index), val(which_norm) ] + ch_fasta // channel: /path/to/reference/fasta + ch_fai // channel: /path/to/reference/fasta/index + ch_dict // channel: /path/to/reference/fasta/dictionary + ch_germline_resource // channel: /path/to/germline/resource + ch_germline_resource_tbi // channel: /path/to/germline/index + ch_panel_of_normals // channel: /path/to/panel/of/normals + ch_panel_of_normals_tbi // channel: /path/to/panel/of/normals/index + ch_interval_file // channel: /path/to/interval/file + + main: + ch_versions = Channel.empty() + + // + // Perform variant calling using mutect2 module in tumor single mode. + // + MUTECT2 ( + ch_input, + ch_fasta, + ch_fai, + ch_dict, + ch_germline_resource, + ch_germline_resource_tbi, + ch_panel_of_normals, + ch_panel_of_normals_tbi + ) + + ch_versions = ch_versions.mix(MUTECT2.out.versions) + + // + // Generate artifactpriors using learnreadorientationmodel on the f1r2 output of mutect2. + // + LEARNREADORIENTATIONMODEL (MUTECT2.out.f1r2.collect()) + ch_versions = ch_versions.mix(LEARNREADORIENTATIONMODEL.out.versions) + + // + // Generate pileup summary tables using getepileupsummaries. tumor sample should always be passed in as the first input and input list entries of ch_mutect2_in, + // to ensure correct file order for calculatecontamination. + // + ch_pileup_tumor_input = ch_input.combine(ch_interval_file).map { + meta, input_file, input_index, which_norm, intervals -> + [meta, input_file[0], input_index[0], intervals] + } + + ch_pileup_normal_input = ch_input.combine(ch_interval_file).map { + meta, input_file, input_index, which_norm, intervals -> + [meta, input_file[1], input_index[1], intervals] + } + + GETPILEUPSUMMARIES_TUMOR ( + ch_pileup_tumor_input, + ch_fasta, + ch_fai, + ch_dict, + ch_germline_resource, + ch_germline_resource_tbi + ) + + GETPILEUPSUMMARIES_NORMAL ( + ch_pileup_normal_input, + ch_fasta, + ch_fai, + ch_dict, + ch_germline_resource, + ch_germline_resource_tbi + ) + + ch_versions = ch_versions.mix(GETPILEUPSUMMARIES_TUMOR.out.versions.first()) + ch_versions = ch_versions.mix(GETPILEUPSUMMARIES_NORMAL.out.versions.first()) + + // + // Contamination and segmentation tables created using calculatecontamination on the pileup summary table. + // + ch_pileup_tumor = GETPILEUPSUMMARIES_TUMOR.out.table.collect() + ch_pileup_normal = GETPILEUPSUMMARIES_NORMAL.out.table.collect() + ch_calccon_in = ch_pileup_tumor.join(ch_pileup_normal, failOnDuplicate: true, failOnMismatch: true) + CALCULATECONTAMINATION ( ch_calccon_in ) + ch_versions = ch_versions.mix(CALCULATECONTAMINATION.out.versions) + + // + // Mutect2 calls filtered by filtermutectcalls using the artifactpriors, contamination and segmentation tables. + // + ch_vcf = MUTECT2.out.vcf.collect() + ch_tbi = MUTECT2.out.tbi.collect() + ch_stats = MUTECT2.out.stats.collect() + ch_orientation = LEARNREADORIENTATIONMODEL.out.artifactprior.collect() + ch_segment = CALCULATECONTAMINATION.out.segmentation.collect() + ch_contamination = CALCULATECONTAMINATION.out.contamination.collect() + + //[] is used as a placeholder for optional input to specify the contamination estimate as a value, since the contamination table is used, this is not needed. + ch_contamination.add([]) + ch_filtermutect_in = ch_vcf + .join(ch_tbi, failOnDuplicate: true, failOnMismatch: true) + .join(ch_stats, failOnDuplicate: true, failOnMismatch: true) + .join(ch_orientation, failOnDuplicate: true, failOnMismatch: true) + .join(ch_segment, failOnDuplicate: true, failOnMismatch: true) + .join(ch_contamination, failOnDuplicate: true, failOnMismatch: true) + + FILTERMUTECTCALLS ( + ch_filtermutect_in, + ch_fasta, + ch_fai, + ch_dict + ) + ch_versions = ch_versions.mix(FILTERMUTECTCALLS.out.versions.first()) + + emit: + mutect2_vcf = MUTECT2.out.vcf.collect() // channel: [ val(meta), path(vcf) ] + mutect2_tbi = MUTECT2.out.tbi.collect() // channel: [ val(meta), path(tbi) ] + mutect2_stats = MUTECT2.out.stats.collect() // channel: [ val(meta), path(stats) ] + mutect2_f1r2 = MUTECT2.out.f1r2.collect() // channel: [ val(meta), path(f1r2) ] + + artifact_priors = LEARNREADORIENTATIONMODEL.out.artifactprior.collect() // channel: [ val(meta), path(artifactprior) ] + + pileup_table_tumor = GETPILEUPSUMMARIES_TUMOR.out.table.collect() // channel: [ val(meta), path(table) ] + pileup_table_normal = GETPILEUPSUMMARIES_NORMAL.out.table.collect() // channel: [ val(meta), path(table) ] + + contamination_table = CALCULATECONTAMINATION.out.contamination.collect() // channel: [ val(meta), path(table) ] + segmentation_table = CALCULATECONTAMINATION.out.segmentation.collect() // channel: [ val(meta), path(table) ] + + filtered_vcf = FILTERMUTECTCALLS.out.vcf.collect() // channel: [ val(meta), path(vcf) ] + filtered_tbi = FILTERMUTECTCALLS.out.tbi.collect() // channel: [ val(meta), path(tbi) ] + filtered_stats = FILTERMUTECTCALLS.out.stats.collect() // channel: [ val(meta), path(stats) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/bam_tumor_normal_somatic_variant_calling_gatk/meta.yml b/subworkflows/nf-core/bam_tumor_normal_somatic_variant_calling_gatk/meta.yml new file mode 100644 index 0000000..1f08e23 --- /dev/null +++ b/subworkflows/nf-core/bam_tumor_normal_somatic_variant_calling_gatk/meta.yml @@ -0,0 +1,116 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_tumor_normal_somatic_variant_calling_gatk +description: | + Perform variant calling on a paired tumor normal set of samples using mutect2 tumor normal mode. + f1r2 output of mutect2 is run through learnreadorientationmodel to get the artifact priors. + Run the input bam files through getpileupsummarries and then calculatecontamination to get the contamination and segmentation tables. + Filter the mutect2 output vcf using filtermutectcalls, artifact priors and the contamination & segmentation tables for additional filtering. +keywords: + - gatk4 + - mutect2 + - learnreadorientationmodel + - getpileupsummaries + - calculatecontamination + - filtermutectcalls + - variant_calling + - tumor_only + - filtered_vcf +components: + - gatk4/mutect2 + - gatk4/learnreadorientationmodel + - gatk4/getpileupsummaries + - gatk4/calculatecontamination + - gatk4/filtermutectcalls +input: + - ch_input: + description: | + The tumor and normal BAM files, in that order, also able to take CRAM as an input + Can contain an optional list of sample headers contained in the normal sample input file. + Structure: [ val(meta), path(input), path(input_index), val(which_norm) ] + - ch_fasta: + description: | + The reference fasta file + Structure: [ path(fasta) ] + - ch_fai: + description: | + Index of reference fasta file + Structure: [ path(fai) ] + - ch_dict: + description: | + GATK sequence dictionary + Structure: [ path(dict) ] + - ch_germline_resource: + description: | + Population vcf of germline sequencing, containing allele fractions. + Structure: [ path(germline_resources) ] + - ch_germline_resource_tbi: + description: | + Index file for the germline resource. + Structure: [ path(germline_resources_tbi) ] + - ch_panel_of_normals: + description: | + Vcf file to be used as a panel of normals. + Structure: [ path(panel_of_normals) ] + - ch_panel_of_normals_tbi: + description: | + Index for the panel of normals. + Structure: [ path(panel_of_normals_tbi) ] + - ch_interval_file: + description: | + File containing intervals. + Structure: [ path(interval_files) ] +output: + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] + - mutect2_vcf: + description: | + Compressed vcf file to be used for variant_calling. + Structure: [ val(meta), path(vcf) ] + - mutect2_tbi: + description: | + Indexes of the mutect2_vcf file + Structure: [ val(meta), path(tbi) ] + - mutect2_stats: + description: | + Stats files for the mutect2 vcf + Structure: [ val(meta), path(stats) ] + - mutect2_f1r2: + description: | + File containing information to be passed to LearnReadOrientationModel. + Structure: [ val(meta), path(f1r2) ] + - artifact_priors: + description: | + File containing artifact-priors to be used by filtermutectcalls. + Structure: [ val(meta), path(artifact_priors) ] + - pileup_table_tumor: + description: | + File containing the tumor pileup summary table, kept separate as calculatecontamination needs them individually specified. + Structure: [ val(meta), path(table) ] + - pileup_table_normal: + description: | + File containing the normal pileup summary table, kept separate as calculatecontamination needs them individually specified. + Structure: [ val(meta), path(table) ] + - contamination_table: + description: | + File containing the contamination table. + Structure: [ val(meta), path(table) ] + - segmentation_table: + description: | + Output table containing segmentation of tumor minor allele fractions. + Structure: [ val(meta), path(table) ] + - filtered_vcf: + description: | + File containing filtered mutect2 calls. + Structure: [ val(meta), path(vcf) ] + - filtered_tbi: + description: | + Tbi file that pairs with filtered vcf. + Structure: [ val(meta), path(tbi) ] + - filtered_stats: + description: | + File containing statistics of the filtermutectcalls run. + Structure: [ val(meta), path(stats) ] +authors: + - "@GCJMackenzie" diff --git a/subworkflows/nf-core/fastq_align_bwa/main.nf b/subworkflows/nf-core/fastq_align_bwa/main.nf new file mode 100644 index 0000000..4ce4f88 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bwa/main.nf @@ -0,0 +1,43 @@ +// +// Alignment with BWA +// + +include { BWA_MEM } from '../../../modules/nf-core/bwa/mem/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../bam_sort_stats_samtools/main' + +workflow FASTQ_ALIGN_BWA { + take: + ch_reads // channel (mandatory): [ val(meta), [ path(reads) ] ] + ch_index // channel (mandatory): [ val(meta2), path(index) ] + val_sort_bam // boolean (mandatory): true or false + ch_fasta // channel (optional) : [ path(fasta) ] + + main: + ch_versions = Channel.empty() + + // + // Map reads with BWA + // + + BWA_MEM ( ch_reads, ch_index, val_sort_bam ) + ch_versions = ch_versions.mix(BWA_MEM.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + + BAM_SORT_STATS_SAMTOOLS ( BWA_MEM.out.bam, ch_fasta ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + emit: + bam_orig = BWA_MEM.out.bam // channel: [ val(meta), path(bam) ] + + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), path(bam) ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), path(bai) ] + csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), path(csi) ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/fastq_align_bwa/meta.yml b/subworkflows/nf-core/fastq_align_bwa/meta.yml new file mode 100644 index 0000000..618a69d --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bwa/meta.yml @@ -0,0 +1,72 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: fastq_align_bwa +description: Align reads to a reference genome using bwa then sort with samtools +keywords: + - align + - fasta + - genome + - reference +components: + - bwa/mem + - bwa/align + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_sort_stats_samtools +input: + - ch_reads: + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + Structure: [ val(meta), [ path(reads) ] ] + - ch_index: + description: | + BWA genome index files + Structure: [ val(meta2), path(index) ] + - val_sort_bam: + type: boolean + description: If true bwa modules sort resulting bam files + pattern: "true|false" + - ch_fasta: + type: file + description: | + Optional reference fasta file. This only needs to be given if val_sort_bam = true + Structure: [ path(fasta) ] + +output: + - bam_orig: + description: | + BAM file produced by bwa + Structure: [ val(meta), path(bam) ] + - bam: + description: | + BAM file ordered by samtools + Structure: [ val(meta), path(bam) ] + - bai: + description: | + BAI index of the ordered BAM file + Structure: [ val(meta), path(bai) ] + - csi: + description: | + CSI index of the ordered BAM file + Structure: [ val(meta), path(csi) ] + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats) ] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@JoseEspinosa" diff --git a/subworkflows/nf-core/fastq_align_hisat2/main.nf b/subworkflows/nf-core/fastq_align_hisat2/main.nf new file mode 100644 index 0000000..a2ec1cf --- /dev/null +++ b/subworkflows/nf-core/fastq_align_hisat2/main.nf @@ -0,0 +1,44 @@ +include { HISAT2_ALIGN } from '../../../modules/nf-core/hisat2/align/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../bam_sort_stats_samtools/main' + +workflow FASTQ_ALIGN_HISAT2 { + + take: + reads // channel: [ val(meta), [ reads ] ] + index // channel: /path/to/hisat2/index + splicesites // channel: /path/to/genome.splicesites.txt + ch_fasta // channel: [ fasta ] + + main: + + ch_versions = Channel.empty() + + + // + // Map reads with HISAT2 + // + HISAT2_ALIGN ( reads, index, splicesites ) + ch_versions = ch_versions.mix(HISAT2_ALIGN.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS ( HISAT2_ALIGN.out.bam, ch_fasta ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + + emit: + orig_bam = HISAT2_ALIGN.out.bam // channel: [ val(meta), bam ] + summary = HISAT2_ALIGN.out.summary // channel: [ val(meta), log ] + fastq = HISAT2_ALIGN.out.fastq // channel: [ val(meta), fastq ] + + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), [ csi ] ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} + diff --git a/subworkflows/nf-core/fastq_align_hisat2/meta.yml b/subworkflows/nf-core/fastq_align_hisat2/meta.yml new file mode 100644 index 0000000..2b05beb --- /dev/null +++ b/subworkflows/nf-core/fastq_align_hisat2/meta.yml @@ -0,0 +1,89 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fastq_align_hisat2" +description: Align reads to a reference genome using hisat2 then sort with samtools +keywords: + - align + - sort + - rnaseq + - genome + - fastq + - bam + - sam + - cram +components: + - hisat2/align + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_sort_stats_samtools +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - index: + type: file + description: HISAT2 genome index file + pattern: "*.ht2" + - splicesites: + type: file + description: Splices sites in gtf file + pattern: "*.{txt}" + - fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - summary: + type: file + description: Aligment log + pattern: "*.log" + - fastq: + type: file + description: Optional output FASTQ file containing unaligned reads + pattern: ".fastq.gz" + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@priyanka-surana" diff --git a/subworkflows/nf-core/fastq_align_star/main.nf b/subworkflows/nf-core/fastq_align_star/main.nf new file mode 100644 index 0000000..8c11057 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_star/main.nf @@ -0,0 +1,49 @@ +include { STAR_ALIGN } from '../../../modules/nf-core/star/align/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../bam_sort_stats_samtools/main' + +workflow FASTQ_ALIGN_STAR { + + take: + ch_reads // channel: [ val(meta), [ path(reads) ] ] + ch_index // channel: [ path(index) ] + ch_gtf // channel: [ path(gtf) ] + val_star_ignore_sjdbgtf // boolean: when using pre-built STAR indices do not re-extract and use splice junctions from the GTF file + val_seq_platform // string : sequencing platform + val_seq_center // string : sequencing center + ch_fasta // channel: [ val(meta), path(fasta) ] + + main: + + ch_versions = Channel.empty() + + // + // Map reads with STAR + // + STAR_ALIGN ( ch_reads, ch_index, ch_gtf, val_star_ignore_sjdbgtf, val_seq_platform, val_seq_center ) + ch_versions = ch_versions.mix(STAR_ALIGN.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS ( STAR_ALIGN.out.bam, ch_fasta ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + emit: + + orig_bam = STAR_ALIGN.out.bam // channel: [ val(meta), path(bam) ] + log_final = STAR_ALIGN.out.log_final // channel: [ val(meta), path(log_final) ] + log_out = STAR_ALIGN.out.log_out // channel: [ val(meta), path(log_out) ] + log_progress = STAR_ALIGN.out.log_progress // channel: [ val(meta), path(log_progress) ] + bam_sorted = STAR_ALIGN.out.bam_sorted // channel: [ val(meta), path(bam) ] + bam_transcript = STAR_ALIGN.out.bam_transcript // channel: [ val(meta), path(bam) ] + fastq = STAR_ALIGN.out.fastq // channel: [ val(meta), path(fastq) ] + tab = STAR_ALIGN.out.tab // channel: [ val(meta), path(tab) ] + + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), path(bam) ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), path(bai) ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/fastq_align_star/meta.yml b/subworkflows/nf-core/fastq_align_star/meta.yml new file mode 100644 index 0000000..1f03985 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_star/meta.yml @@ -0,0 +1,108 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fastq_align_star" +description: Align reads to a reference genome using bowtie2 then sort with samtools +keywords: + - align + - fasta + - genome + - reference +components: + - star/align + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_sort_stats_samtools +input: + - ch_reads: + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + Structure: [ val(meta), [ path(reads) ] ] + - ch_index: + type: directory + description: STAR genome index + pattern: "star" + - ch_gtf: + type: file + description: | + GTF file used to set the splice junctions with the --sjdbGTFfile flag + pattern: "*.gtf" + - val_star_ignore_sjdbgtf: + type: boolean + description: | + If true the --sjdbGTFfile flag is set + pattern: "true|false" + - val_seq_platform: + type: string + description: | + Sequencing platform to be added to the bam header using the --outSAMattrRGline flag + - val_seq_center: + type: string + description: | + Sequencing center to be added to the bam header using the --outSAMattrRGline flag + - ch_fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa,fna}" + +output: + - orig_bam: + description: | + Output BAM file containing read alignments + Structure: [ val(meta), path(bam) ] + - log_final: + description: | + STAR final log file + Structure: [ val(meta), path(log_final) ] + - log_out: + description: | + STAR log out file + Structure: [ val(meta), path(log_out) ] + - log_progress: + description: | + STAR log progress file + Structure: [ val(meta), path(log_progress) ] + - bam_sorted: + description: | + Sorted BAM file of read alignments (optional) + Structure: [ val(meta), path(bam) ] + - bam_transcript: + description: | + Output BAM file of transcriptome alignment (optional) + Structure: [ val(meta), path(bam) ] + - fastq: + description: | + Unmapped FastQ files (optional) + Structure: [ val(meta), path(fastq) ] + - tab: + description: | + STAR output tab file(s) (optional) + Structure: [ val(meta), path(tab) ] + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - bam: + description: | + BAM file ordered by samtools + Structure: [ val(meta), path(bam) ] + - bai: + description: | + BAI index of the ordered BAM file + Structure: [ val(meta), path(bai) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats) ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@JoseEspinosa" diff --git a/workflows/rnadnavar.nf b/workflows/rnadnavar.nf index 955383c..6462e15 100644 --- a/workflows/rnadnavar.nf +++ b/workflows/rnadnavar.nf @@ -37,36 +37,163 @@ def checkPathParamList = [ params.whitelist ] +// Validate input parameters +WorkflowRnadnavar.initialise(params, log) + + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Check mandatory parameters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -for (param in checkPathParamList) { - if (param) { - file(param, checkIfExists: true) - } - } + +for (param in checkPathParamList) if (param) file(param, checkIfExists: true) + // Set input, can either be from --input or from automatic retrieval in lib/WorkflowRnadnavar.groovy -ch_input_sample = extract_csv(file(params.input)) +if (params.input) { + ch_from_samplesheet = params.build_only_index ? Channel.empty() : Channel.fromSamplesheet("input") +} else { + ch_from_samplesheet = params.build_only_index ? Channel.empty() : Channel.fromSamplesheet("input_restart") +} + +// Format samplesheet channel +input_sample = ch_from_samplesheet + .map{ meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller -> + // generate patient_sample key to group lanes together + [ meta.patient + meta.sample, [meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller] ] + } + .tap{ ch_with_patient_sample } // save the channel + .groupTuple() //group by patient_sample to get all lanes + .map { patient_sample, ch_items -> + // get number of lanes per sample + [ patient_sample, ch_items.size() ] + } + .combine(ch_with_patient_sample, by: 0) // for each entry add numLanes + .map { patient_sample, num_lanes, ch_items -> + + (meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller) = ch_items + if (meta.lane && fastq_2) { + meta = meta + [id: "${meta.sample}-${meta.lane}".toString()] + def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' + + def flowcell = flowcellLaneFromFastq(fastq_1) + // Don't use a random element for ID, it breaks resuming + def read_group = "\"@RG\\tID:${flowcell}.${meta.sample}.${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" + + meta = meta + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'fastq', size: 1] + + if (params.step == 'mapping') return [ meta, [ fastq_1, fastq_2 ] ] + else { + error("Samplesheet contains fastq files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations") + } + + // start from BAM + } else if (meta.lane && bam) { + if (params.step != 'mapping' && !bai) { + error("BAM index (bai) should be provided.") + } + meta = meta + [id: "${meta.sample}-${meta.lane}".toString()] + def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' + def read_group = "\"@RG\\tID:${meta.sample}_${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" + + meta = meta + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'bam', size: 1] + + if (params.step != 'annotate') return [ meta - meta.subMap('lane'), bam, bai ] + else { + error("Samplesheet contains bam files but step is `annotate`. The pipeline is expecting vcf files for the annotation. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations") + } + + // recalibration + } else if (table && cram) { + meta = meta + [id: meta.sample, data_type: 'cram'] + + if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai, table ] + else { + error("Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations") + } + + // recalibration when skipping MarkDuplicates + } else if (table && bam) { + meta = meta + [id: meta.sample, data_type: 'bam'] + + if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai, table ] + else { + error("Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations") + } + + // prepare_recalibration or variant_calling + } else if (cram) { + meta = meta + [id: meta.sample, data_type: 'cram'] + + if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai ] + else { + error("Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations") + } + + // prepare_recalibration when skipping MarkDuplicates or `--step markduplicates` + } else if (bam) { + meta = meta + [id: meta.sample, data_type: 'bam'] + if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai ] + else { + error("Samplesheet contains bam files but step is 2 `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations") + } + + // annotation + } else if (vcf) { + meta = meta + [id: meta.sample, data_type: 'vcf', variantcaller: variantcaller ?: ''] + + if (params.step == 'annotate') return [ meta - meta.subMap('lane'), vcf ] + else { + error("Samplesheet contains vcf files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations") + } + } else { + error("Missing or unknown field in csv file header. Please check your samplesheet") + } + } + + + +// Check params logic +if (params.step != 'annotate' && params.tools && !params.build_only_index) { + // Two checks for ensuring that the pipeline stops with a meaningful error message if + // 1. the sample-sheet only contains normal-samples, but some of the requested tools require tumor-samples, and + // 2. the sample-sheet only contains tumor-samples, but some of the requested tools require normal-samples. + input_sample.filter{ it[0].status == 1 }.ifEmpty{ // In this case, the sample-sheet contains no tumor-samples + if (!params.build_only_index) { + def tools_tumor = ['sage','mutect2', 'strelka', 'freebayes'] + def tools_tumor_asked = [] + tools_tumor.each{ tool -> + if (params.tools.split(',').contains(tool)) tools_tumor_asked.add(tool) + } + if (!tools_tumor_asked.isEmpty()) { + error('The sample-sheet only contains normal-samples, but the following tools, which were requested with "--tools", expect at least one tumor-sample : ' + tools_tumor_asked.join(", ")) + } + } + } + input_sample.filter{ it[0].status == 0 }.ifEmpty{ // In this case, the sample-sheet contains no normal/germline-samples + def tools_requiring_normal_samples = ['sage','mutect2', 'strelka', 'freebayes'] // Will implement tumour only in the near future + def requested_tools_requiring_normal_samples = [] + tools_requiring_normal_samples.each{ tool_requiring_normal_samples -> + if (params.tools.split(',').contains(tool_requiring_normal_samples)) requested_tools_requiring_normal_samples.add(tool_requiring_normal_samples) + } + if (!requested_tools_requiring_normal_samples.isEmpty()) { + error('The sample-sheet only contains tumor-samples, but the following tools, which were requested by the option "tools", expect at least one normal-sample : ' + requested_tools_requiring_normal_samples.join(", ")) + } + } +} // Fails when wrongful extension for intervals file if (params.wes && !params.step == 'annotate') { - if (params.intervals && !params.intervals.endsWith("bed")) exit 1, "Target file specified with `--intervals` must be in BED format for targeted data" + if (params.intervals && !params.intervals.endsWith("bed")) error("Target file specified with `--intervals` must be in BED format for targeted data") else log.warn("Intervals file was provided without parameter `--wes`: Pipeline will assume this is Whole-Genome-Sequencing data.") -} else if (params.intervals && !params.intervals.endsWith("bed") && !params.intervals.endsWith("interval_list")) exit 1, "Intervals file must end with .bed or .interval_list" +} else if (params.intervals && !params.intervals.endsWith("bed") && !params.intervals.endsWith("list")) error("Intervals file must end with .bed, .list, or .interval_list") -if(params.step == 'mapping' && params.aligner.contains("dragmap") && !(params.skip_tools && params.skip_tools.split(',').contains("baserecalibrator"))){ - log.warn("DragMap was specified as aligner. Base recalibration is not contained in --skip_tools. It is recommended to skip baserecalibration when using DragMap\nhttps://gatk.broadinstitute.org/hc/en-us/articles/4407897446939--How-to-Run-germline-single-sample-short-variant-discovery-in-DRAGEN-mode") -} // Fails when missing params for STAR -if (!params.star_index && !params.gtf && !params.gff) - { - exit 1, - "GTF|GFF3 file is required to build a STAR reference index! Use option --gtf|--gff to provide a GTF|GFF file." - } +if (!params.star_index && !params.gtf && !params.gff){ + exit 1,"GTF|GFF3 file is required to build a STAR reference index! Use option --gtf|--gff to provide a GTF|GFF file." +} // Warns when missing files or params for mutect2 if(params.tools && params.tools.split(',').contains('mutect2')){ @@ -91,23 +218,16 @@ if(!params.dbsnp && !params.known_indels){ // Fails when missing tools for variant_calling or annotate if ((params.step == 'variant_calling' || params.step == 'annotate') && !params.tools) { - log.error "Please specify at least one tool when using `--step ${params.step}`.\nhttps://nf-co.re/rnadnavar/parameters#tools" - exit 1 + error("Please specify at least one tool when using `--step ${params.step}`.\nhttps://nf-co.re/rnadnavar/parameters#tools") } -// Save AWS IGenomes file containing annotation version -def anno_readme = params.genomes[params.genome]?.readme -if (anno_readme && file(anno_readme).exists()) { - file("${params.outdir}/genome/").mkdirs() - file(anno_readme).copyTo("${params.outdir}/genome/") +if ((params.download_cache) && (params.snpeff_cache || params.vep_cache)) { + error("Please specify either `--download_cache` or `--vep_cache`.\nhttps://nf-co.re/rnadnavar/dev/usage#how-to-customise-vep-annotation") } -file("${params.outdir}").mkdirs() - - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES + IMPORT LOCAL/NF-CORE MODULES/SUBWORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ @@ -125,12 +245,13 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -// Input checks -include { INPUT_CHECK } from '../subworkflows/local/input_check' - // Build the genome index and other reference files -include { PREPARE_REFERENCE_AND_INTERVALS } from '../subworkflows/local/prepare_reference_and_intervals' -include { MAPPING } from '../subworkflows/local/mapping' +include { PREPARE_REFERENCE_AND_INTERVALS } from '../subworkflows/local/prepare_reference_and_intervals' +// Download annotation cache if needed +include { ENSEMBLVEP_DOWNLOAD } from '../modules/nf-core/ensemblvep/download/main' + +// Alignment +include { BAM_ALIGN } from '../subworkflows/local/bam_align/main' // Core subworkflows of the pipeline include { CORE_RUN } from '../subworkflows/local/core_workflow_pass' @@ -143,16 +264,30 @@ include { FILTERING_RNA } from '../subworkflows/local/rna_filtering' // // MODULE: Installed directly from nf-core/modules // +//FASTQC +include { FASTQC } from '../modules/nf-core/fastqc/main' +// MULTIQC +include { MULTIQC } from '../modules/nf-core/multiqc/main' // REPORTING VERSIONS OF SOFTWARE USED -include { FASTQC } from '../modules/nf-core/modules/fastqc/main' -include { MULTIQC } from '../modules/nf-core/modules/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CONFIG FILES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() +ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + + /* ======================================================================================== - VARIABLES + VARIABLES ======================================================================================== */ @@ -168,191 +303,94 @@ def multiqc_report = [] /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Info required for completion email and summary -def multiqc_report = [] - workflow RNADNAVAR { + // Initialise MULTIQC + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + // To gather all QC reports for MultiQC - ch_reports = Channel.empty() + reports = Channel.empty() // To gather used softwares versions for MultiQC - ch_versions = Channel.empty() + versions = Channel.empty() -// -// SUBWORKFLOW: Read in samplesheet, validate and stage input files -// - INPUT_CHECK ( - file(params.input) - ) - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") - // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ - // ! There is currently no tooling to help you write a sample sheet schema - - // - // MODULE: Run FastQC - // - FASTQC ( - INPUT_CHECK.out.reads - ) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) + // Download cache if needed + // Assuming that if the cache is provided, the user has already downloaded it + ensemblvep_info = params.vep_cache ? [] : Channel.of([ [ id:"${params.vep_cache_version}_${params.vep_genome}" ], params.vep_genome, params.vep_species, params.vep_cache_version ]) + if (params.download_cache) { + ENSEMBLVEP_DOWNLOAD(ensemblvep_info) + vep_cache = ENSEMBLVEP_DOWNLOAD.out.cache.collect().map{ meta, cache -> [ cache ] } -// STEP 0: Build reference and indices if needed - PREPARE_REFERENCE_AND_INTERVALS() - ch_versions = ch_versions.mix(PREPARE_REFERENCE_AND_INTERVALS.out.versions) - - // Reference and intervals variables - fasta = PREPARE_REFERENCE_AND_INTERVALS.out.fasta - fasta_fai = PREPARE_REFERENCE_AND_INTERVALS.out.fasta_fai - dict = PREPARE_REFERENCE_AND_INTERVALS.out.dict - germline_resource = PREPARE_REFERENCE_AND_INTERVALS.out.germline_resource - germline_resource_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.germline_resource_tbi - intervals = PREPARE_REFERENCE_AND_INTERVALS.out.intervals - intervals_for_preprocessing = PREPARE_REFERENCE_AND_INTERVALS.out.intervals_for_preprocessing - ch_interval_list_split = PREPARE_REFERENCE_AND_INTERVALS.out.ch_interval_list_split - // specific for variant calling - intervals_bed_combined = PREPARE_REFERENCE_AND_INTERVALS.out.intervals_bed_combined - intervals_bed_gz_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.intervals_bed_gz_tbi - dbsnp = PREPARE_REFERENCE_AND_INTERVALS.out.dbsnp - dbsnp_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.dbsnp_tbi - pon = PREPARE_REFERENCE_AND_INTERVALS.out.pon - pon_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.pon_tbi - germline_resource = PREPARE_REFERENCE_AND_INTERVALS.out.germline_resource - germline_resource_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.germline_resource_tbi - - -// STEP 1: ALIGNMENT PREPROCESSING - MAPPING( - PREPARE_REFERENCE_AND_INTERVALS.out.bwa, - PREPARE_REFERENCE_AND_INTERVALS.out.bwamem2, - PREPARE_REFERENCE_AND_INTERVALS.out.dragmap, - PREPARE_REFERENCE_AND_INTERVALS.out.star_index, - PREPARE_REFERENCE_AND_INTERVALS.out.gtf, - ch_input_sample - ) - ch_reports = ch_reports.mix(MAPPING.out.reports) - ch_versions = ch_versions.mix(MAPPING.out.versions) - - // 5 MAIN STEPS: GATK PREPROCESING - VARIANT CALLING - NORMALIZATION - CONSENSUS - ANNOTATION - CORE_RUN( - params.step, - params.tools, - params.skip_tools, - ch_input_sample, // input from CSV if applicable - MAPPING.out.ch_bam_mapped, // input from mapping - fasta, // fasta reference file - fasta_fai, // fai for fasta file - dict, // - dbsnp, - dbsnp_tbi, - pon, - pon_tbi, - germline_resource, - germline_resource_tbi, - intervals, - intervals_for_preprocessing, - ch_interval_list_split, - intervals_bed_gz_tbi, - intervals_bed_combined, - null, // to repeat rescue consensus - null // to repeat rescue consensus - ) - - - ch_reports = ch_reports.mix(CORE_RUN.out.reports) - ch_versions = ch_versions.mix(CORE_RUN.out.versions) - - if (params.tools.split(',').contains('second_run')) { - PREPARE_SECOND_RUN(ch_input_sample, // input from CSV if applicable - params.tools, - CORE_RUN.out.maf, - MAPPING.out.bwa_bams, // for dna re-alignments - MAPPING.out.star_bams, // for rnare-alignments - fasta, - fasta_fai, - dict, - PREPARE_REFERENCE_AND_INTERVALS.out.hisat2_index, - PREPARE_REFERENCE_AND_INTERVALS.out.splicesites - ) // do mapping with hisat2 - - ch_reports = ch_reports.mix(PREPARE_SECOND_RUN.out.reports) - ch_versions = ch_versions.mix(PREPARE_SECOND_RUN.out.versions) - SECOND_RUN( - "markduplicates", // step to start with - params.tools, - "baserecalibrator,baserecalibrator_report,contamination,learnreadorientation", - ch_input_sample, // input from CSV if applicable - PREPARE_SECOND_RUN.out.ch_bam_mapped, // input from mapping - fasta, // fasta reference file - fasta_fai, // fai for fasta file - dict, // - dbsnp, - dbsnp_tbi, - pon, - pon_tbi, - germline_resource, - germline_resource_tbi, - intervals, - intervals_for_preprocessing, - ch_interval_list_split, - intervals_bed_gz_tbi, - intervals_bed_combined, - CORE_RUN.out.vcf_consensus_dna, // to repeat rescue consensus - CORE_RUN.out.vcfs_status_dna // to repeat rescue consensus - ) - - ch_reports = ch_reports.mix(SECOND_RUN.out.reports) - ch_versions = ch_versions.mix(SECOND_RUN.out.versions) - second_run_maf = SECOND_RUN.out.maf_rna - } else{ - second_run_maf = Channel.empty() + versions = versions.mix(ENSEMBLVEP_DOWNLOAD.out.versions) } - FILTERING_RNA(params.tools, - CORE_RUN.out.maf_rna, - second_run_maf, - fasta) - ch_versions = ch_versions.mix(FILTERING_RNA.out.versions) +// STEP 0: Build reference and indices if needed + PREPARE_REFERENCE_AND_INTERVALS() + versions = versions.mix(PREPARE_REFERENCE_AND_INTERVALS.out.versions) + + // Reference and intervals variables + fasta = PREPARE_REFERENCE_AND_INTERVALS.out.fasta + fasta_fai = PREPARE_REFERENCE_AND_INTERVALS.out.fasta_fai + dict = PREPARE_REFERENCE_AND_INTERVALS.out.dict + germline_resource = PREPARE_REFERENCE_AND_INTERVALS.out.germline_resource + germline_resource_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.germline_resource_tbi + intervals = PREPARE_REFERENCE_AND_INTERVALS.out.intervals + intervals_for_preprocessing = PREPARE_REFERENCE_AND_INTERVALS.out.intervals_for_preprocessing + // specific for variant calling + intervals_bed_combined = PREPARE_REFERENCE_AND_INTERVALS.out.intervals_bed_combined + intervals_bed_gz_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.intervals_bed_gz_tbi + dbsnp = PREPARE_REFERENCE_AND_INTERVALS.out.dbsnp + dbsnp_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.dbsnp_tbi + pon = PREPARE_REFERENCE_AND_INTERVALS.out.pon + pon_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.pon_tbi + germline_resource = PREPARE_REFERENCE_AND_INTERVALS.out.germline_resource + germline_resource_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.germline_resource_tbi -// REPORTING - ch_version_yaml = Channel.empty() +// STEP 1: ALIGNMENT PREPROCESSING + BAM_ALIGN( + PREPARE_REFERENCE_AND_INTERVALS.out.bwa, + PREPARE_REFERENCE_AND_INTERVALS.out.bwamem2, + PREPARE_REFERENCE_AND_INTERVALS.out.dragmap, + PREPARE_REFERENCE_AND_INTERVALS.out.star_index, + PREPARE_REFERENCE_AND_INTERVALS.out.gtf, + input_sample + ) + reports = reports.mix(BAM_ALIGN.out.reports) + versions = versions.mix(BAM_ALIGN.out.versions) + + + version_yaml = Channel.empty() if (!(params.skip_tools && params.skip_tools.split(',').contains('versions'))) { - CUSTOM_DUMPSOFTWAREVERSIONS(ch_versions.unique().collectFile(name: 'collated_versions.yml')) - ch_version_yaml = CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect() + CUSTOM_DUMPSOFTWAREVERSIONS(versions.unique().collectFile(name: 'collated_versions.yml')) + version_yaml = CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect() } - // MODULE: MultiQC - // Present summary of reads, alignment, duplicates, BSQR stats for all samples as well as workflow summary/parameters as single report if (!(params.skip_tools && params.skip_tools.split(',').contains('multiqc'))) { workflow_summary = WorkflowRnadnavar.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) - methods_description = WorkflowRnadnavar.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) - ch_methods_description = Channel.value(methods_description) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() - ) - multiqc_report = MULTIQC.out.report.toList() + methods_description = WorkflowRnadnavar.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) + ch_methods_description = Channel.value(methods_description) + + multiqc_files = Channel.empty() + multiqc_files = multiqc_files.mix(version_yaml) + multiqc_files = multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + multiqc_files = multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + multiqc_files = multiqc_files.mix(reports.collect().ifEmpty([])) + + MULTIQC(multiqc_files.collect(), ch_multiqc_config.collect().ifEmpty([]), ch_multiqc_custom_config.collect().ifEmpty([]), ch_multiqc_logo.collect().ifEmpty([])) + + multiqc_report = MULTIQC.out.report.toList() + versions = versions.mix(MULTIQC.out.versions) + } } /* @@ -362,13 +400,9 @@ workflow RNADNAVAR { */ workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } + if (params.email || params.email_on_fail) NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) - } + if (params.hook_url) NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) } /* @@ -376,241 +410,6 @@ workflow.onComplete { FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Function to extract information (meta data + file(s)) from csv file(s) -def extract_csv(csv_file) { - - // check that the sample sheet is not 1 line or less, because it'll skip all subsequent checks if so. - file(csv_file).withReader('UTF-8') { reader -> - def line, numberOfLinesInSampleSheet = 0; - while ((line = reader.readLine()) != null) {numberOfLinesInSampleSheet++} - if (numberOfLinesInSampleSheet < 2) { - log.error "Samplesheet had less than two lines. The sample sheet must be a csv file with a header, so at least two lines." - System.exit(1) - } - } - - // Additional check of sample sheet: - // 1. If params.step == "mapping", then each row should specify a lane and the same combination of patient, sample and lane shouldn't be present in different rows. - // 2. The same sample shouldn't be listed for different patients. - def patient_sample_lane_combinations_in_samplesheet = [] - def sample2patient = [:] - - Channel.from(csv_file).splitCsv(header: true) - .map{ row -> - if (params.step == "mapping") { - if ( !row.lane ) { // This also handles the case where the lane is left as an empty string - log.error('The sample sheet should specify a lane for patient "' + row.patient.toString() + '" and sample "' + row.sample.toString() + '".') - System.exit(1) - } - def patient_sample_lane = [row.patient.toString(), row.sample.toString(), row.lane.toString()] - if (patient_sample_lane in patient_sample_lane_combinations_in_samplesheet) { - log.error('The patient-sample-lane combination "' + row.patient.toString() + '", "' + row.sample.toString() + '", and "' + row.lane.toString() + '" is present multiple times in the sample sheet.') - System.exit(1) - } else { - patient_sample_lane_combinations_in_samplesheet.add(patient_sample_lane) - } - } - if (!sample2patient.containsKey(row.sample.toString())) { - sample2patient[row.sample.toString()] = row.patient.toString() - } else if (sample2patient[row.sample.toString()] != row.patient.toString()) { - log.error('The sample "' + row.sample.toString() + '" is registered for both patient "' + row.patient.toString() + '" and "' + sample2patient[row.sample.toString()] + '" in the sample sheet.') - System.exit(1) - } - } - // keep count of the number of samples - sample_count_all = 0 - sample_count_normal = 0 - sample_count_tumor = 0 - sample_count_rna = 0 - - Channel.from(csv_file).splitCsv(header: true) - // Retrieves number of lanes by grouping together by patient and sample and counting how many entries there are for this combination - .map{ row -> - sample_count_all++ - if (!(row.patient && row.sample)){ - log.error "Missing field in csv file header. The csv file must have fields named 'patient' and 'sample'." - System.exit(1) - } - [[row.patient.toString(), row.sample.toString()], row] - }.groupTuple() - .map{ meta, rows -> - size = rows.size() - [rows, size] - }.transpose() - .map{ row, numLanes -> //from here do the usual thing for csv parsing - - def meta = [:] - - // Meta data to identify samplesheet - // Both patient and sample are mandatory - // Several sample can belong to the same patient - // Sample should be unique for the patient - if (row.patient) meta.patient = row.patient.toString() - if (row.sample) meta.sample = row.sample.toString() - - // If no status specified, sample is assumed normal - if (row.status) meta.status = row.status.toInteger() - else meta.status = 0 - - if (meta.status == 0) sample_count_normal++ - else if (meta.status == 1) sample_count_tumor++ // TODO check if elif is valid in here - else sample_count_rna++ - // TODO: think about what other condition we will have here now - // Two checks for ensuring that the pipeline stops with a meaningful error message if - // 1. the sample-sheet only contains normal-samples, but some of the requested tools require tumor-samples, and - // 2. the sample-sheet only contains tumor-samples, but some of the requested tools require normal-samples. - if ((sample_count_normal == sample_count_all) && params.tools) { // In this case, the sample-sheet contains no tumor-samples - def tools_tumor = ['sage', 'mutect2', 'strelka2'] // This will be applied to tumour DNA and tumour RNA - def tools_tumor_asked = [] - tools_tumor.each{ tool -> - if (params.tools.split(',').contains(tool)) tools_tumor_asked.add(tool) - } - if (!tools_tumor_asked.isEmpty()) { - log.error('The sample-sheet only contains normal-samples, but the following tools, which were requested with "--tools", expect at least one tumor-sample : ' + tools_tumor_asked.join(", ")) - System.exit(1) - } - // TODO no need to do anything with the germline - can this be removed? - } else if ((sample_count_tumor == sample_count_all) && params.tools) { // In this case, the sample-sheet contains no normal/germline-samples - def tools_requiring_normal_samples = ['ascat', 'deepvariant', 'haplotypecaller'] - def requested_tools_requiring_normal_samples = [] - tools_requiring_normal_samples.each{ tool_requiring_normal_samples -> - if (params.tools.split(',').contains(tool_requiring_normal_samples)) requested_tools_requiring_normal_samples.add(tool_requiring_normal_samples) - } - if (!requested_tools_requiring_normal_samples.isEmpty()) { - log.error('The sample-sheet only contains tumor-samples, but the following tools, which were requested by the option "tools", expect at least one normal-sample : ' + requested_tools_requiring_normal_samples.join(", ")) - System.exit(1) - } - } - - // mapping with fastq - if (row.lane && row.fastq_2) { - meta.id = "${row.sample}-${row.lane}".toString() - def fastq_1 = file(row.fastq_1, checkIfExists: true) - def fastq_2 = file(row.fastq_2, checkIfExists: true) - def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' - - def flowcell = flowcellLaneFromFastq(fastq_1) - //Don't use a random element for ID, it breaks resuming - def read_group = "\"@RG\\tID:${flowcell}.${row.sample}.${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.patient}_${row.sample}\\tLB:${row.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" - if (meta.status == 2) { // STAR does not need '@RG' - read_group = "ID:${flowcell}.${row.sample}.${row.lane} ${CN}PU:${row.lane} SM:${row.patient}_${row.sample} LB:${row.sample} DS:${params.fasta} PL:${params.seq_platform}" - } - - meta.numLanes = numLanes.toInteger() - meta.read_group = read_group.toString() - meta.data_type = 'fastq' - - meta.size = 1 // default number of splitted fastq - - if (params.step == 'mapping') return [meta, [fastq_1, fastq_2]] - else { - log.error "Samplesheet contains fastq files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations" - System.exit(1) - } - - // start from BAM - } else if (row.lane && row.bam) { - if (!row.bai) { - log.error "BAM index (bai) should be provided." - } - meta.id = "${row.sample}-${row.lane}".toString() - def bam = file(row.bam, checkIfExists: true) - def bai = file(row.bai, checkIfExists: true) - def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' - def read_group = "\"@RG\\tID:${row.sample}_${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:${params.seq_platform}\"" - if (meta.status == 2) { // STAR does not need '@RG' - read_group = "ID:${row.sample}_${row.lane} ${CN}PU:${row.lane} SM:${row.sample} LB:${row.sample} PL:${params.seq_platform}" - } - - meta.numLanes = numLanes.toInteger() - meta.read_group = read_group.toString() - meta.data_type = 'bam' - - meta.size = 1 // default number of splitted fastq - - if (params.step != 'annotate') return [meta, bam, bai] - else { - log.error "Samplesheet contains bam files but step is `annotate`. The pipeline is expecting vcf files for the annotation. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations" - System.exit(1) - } - - // recalibration - } else if (row.table && row.cram) { - meta.id = meta.sample - def cram = file(row.cram, checkIfExists: true) - def crai = file(row.crai, checkIfExists: true) - def table = file(row.table, checkIfExists: true) - - meta.data_type = 'cram' - - if (!(params.step == 'mapping' || params.step == 'annotate')) return [meta, cram, crai, table] - else { - log.error "Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations" - System.exit(1) - } - - // recalibration when skipping MarkDuplicates - } else if (row.table && row.bam) { - meta.id = meta.sample - def bam = file(row.bam, checkIfExists: true) - def bai = file(row.bai, checkIfExists: true) - def table = file(row.table, checkIfExists: true) - - meta.data_type = 'bam' - - if (!(params.step == 'mapping' || params.step == 'annotate')) return [meta, bam, bai, table] - else { - log.error "Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations" - System.exit(1) - } - - // prepare_recalibration or variant_calling - } else if (row.cram) { - meta.id = meta.sample - def cram = file(row.cram, checkIfExists: true) - def crai = file(row.crai, checkIfExists: true) - - meta.data_type = 'cram' - - if (!(params.step == 'mapping' || params.step == 'annotate')) return [meta, cram, crai] - else { - log.error "Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations" - System.exit(1) - } - - // prepare_recalibration when skipping MarkDuplicates or `--step markduplicates` - } else if (row.bam) { - meta.id = meta.sample - def bam = file(row.bam, checkIfExists: true) - def bai = file(row.bai, checkIfExists: true) - - meta.data_type = 'bam' - - if (!(params.step == 'mapping' || params.step == 'annotate')) return [meta, bam, bai] - else { - log.error "Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations" - System.exit(1) - } - - // annotation - } else if (row.vcf) { - meta.id = meta.sample - def vcf = file(row.vcf, checkIfExists: true) - - meta.data_type = 'vcf' - meta.variantcaller = row.variantcaller ?: '' - - if (params.step == 'annotate') return [meta, vcf] - else { - log.error "Samplesheet contains vcf files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations" - System.exit(1) - } - } else { - log.error "Missing or unknown field in csv file header. Please check your samplesheet" - System.exit(1) - } - } -} // Parse first line of a FASTQ file, return the flowcell id and lane number. def flowcellLaneFromFastq(path) { // expected format: From 97c41fab917ff2e18a6a8c4fa80b2f24767ca682 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Sun, 27 Aug 2023 18:15:00 +0100 Subject: [PATCH 15/56] Finished update of preprocessing steps. Fix issue with intervals and created a patch for splincigarreads. --- conf/base.config | 5 - conf/modules/alignment/bam_align.config | 49 +- .../gatk4_preprocessing/markduplicates.config | 37 +- .../prepare_intervals.config | 10 + modules.json | 6 +- .../nf-core/gatk4/splitncigarreads/main.nf | 30 +- modules/nf-core/samtools/stats/main.nf | 4 +- subworkflows/local/bam_align/main.nf | 12 +- subworkflows/local/bam_applybqsr/main.nf | 47 ++ .../local/bam_baserecalibrator/main.nf | 54 ++ .../local/bam_gatk_preprocessing/main.nf | 608 +++++++++--------- subworkflows/local/bam_markduplicates/main.nf | 43 ++ .../local/bam_splitncigarreads/main.nf | 51 ++ .../local/bam_variant_calling/main.nf | 159 +++++ .../channel_applybqsr_create_csv/main.nf | 23 + .../main.nf | 49 ++ .../channel_markduplicates_create_csv/main.nf | 28 + .../main.nf | 28 + .../local/cram_merge_index_samtools/main.nf | 47 ++ .../local/cram_qc_mosdepth_samtools/main.nf | 4 +- .../local/prepare_reference_and_intervals.nf | 105 --- .../prepare_reference_and_intervals/main.nf | 120 ++++ subworkflows/local/variant_calling.nf | 127 ---- subworkflows/nf-core/splitncigar.nf | 43 -- 24 files changed, 1018 insertions(+), 671 deletions(-) create mode 100644 subworkflows/local/bam_applybqsr/main.nf create mode 100644 subworkflows/local/bam_baserecalibrator/main.nf create mode 100644 subworkflows/local/bam_markduplicates/main.nf create mode 100644 subworkflows/local/bam_splitncigarreads/main.nf create mode 100644 subworkflows/local/bam_variant_calling/main.nf create mode 100644 subworkflows/local/channel_applybqsr_create_csv/main.nf create mode 100644 subworkflows/local/channel_baserecalibrator_create_csv/main.nf create mode 100644 subworkflows/local/channel_markduplicates_create_csv/main.nf create mode 100644 subworkflows/local/channel_splitncigarreads_create_csv/main.nf create mode 100644 subworkflows/local/cram_merge_index_samtools/main.nf delete mode 100644 subworkflows/local/prepare_reference_and_intervals.nf create mode 100644 subworkflows/local/prepare_reference_and_intervals/main.nf delete mode 100644 subworkflows/local/variant_calling.nf delete mode 100644 subworkflows/nf-core/splitncigar.nf diff --git a/conf/base.config b/conf/base.config index 15c8de3..df6e9e9 100644 --- a/conf/base.config +++ b/conf/base.config @@ -149,11 +149,6 @@ process { time = { check_max( 12.h * task.attempt, 'time' ) } } - - withName: 'SAMTOOLS_STATS.*' { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 21.GB * task.attempt, 'memory')} - } withName: 'SAGE|SAMTOOLS_BAMTOCRAM' { cpus = { check_max( 12 * task.attempt, 'cpus' ) } memory = { check_max( 41.GB * task.attempt, 'memory')} diff --git a/conf/modules/alignment/bam_align.config b/conf/modules/alignment/bam_align.config index 602651e..c1e5094 100644 --- a/conf/modules/alignment/bam_align.config +++ b/conf/modules/alignment/bam_align.config @@ -52,14 +52,7 @@ process { // bam_align ] } - withName: "(BWAMEM.*_MEM|DRAGMAP_ALIGN)" { - // Markduplicates Spark NEEDS name-sorted reads or runtime goes through the roof - // However if it's skipped, reads need to be coordinate-sorted - // Only name sort if Spark for Markduplicates + duplicate marking is not skipped - ext.args2 = { (!params.skip_tools || (params.skip_tools && !params.skip_tools.split(',').contains('markduplicates'))) ? '-n' : '' } - } - - withName: "BWAMEM.*_MEM|SENTIEON_BWAMEM" { + withName: "BWAMEM.*_MEM" { // Using -B 3 for tumor samples ext.args = { meta.status == 1 ? "-K 100000000 -Y -B 3 -R ${meta.read_group}" : "-K 100000000 -Y -R ${meta.read_group}" } } @@ -174,36 +167,6 @@ process { // bam_align ] } - withName: '.*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_FLAGSTAT' { - ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]) : "${meta.id}" } - publishDir = [ - path: { "${params.outdir}/reports/samtools/${meta.patient}/${meta.id}" }, - mode: params.publish_dir_mode, - pattern: "*.flagstat", - enabled: params.save_align_intermeds - ] - } - - - withName: '.*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_STATS' { - ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(input.name.tokenize('.')[1]) : "${meta.id}" } - publishDir = [ - path: { "${params.outdir}/reports/samtools/${meta.patient}/${meta.id}" }, - mode: params.publish_dir_mode, - pattern: "*.stats", - enabled: params.save_align_intermeds - ] - } - - withName: '.*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_IDXSTATS' { - ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(input.name.tokenize('.')[1]) : "${meta.id}" } - publishDir = [ - path: { "${params.outdir}/reports/samtools/${meta.patient}/${meta.id}" }, - mode: params.publish_dir_mode, - pattern: "*.idxstats", - enabled: params.save_align_intermeds - ] - } withName: '.*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { ext.args = params.bam_csi_index ? '-c' : '' @@ -220,10 +183,10 @@ process { // bam_align // Second run alignment if (params.skip_tools && !params.skip_tools.split(',').contains('second_run')){ - withName: '.*:FASTQ_ALIGN_HISAT2:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { + withName: '.*:FASTQ_ALIGN_HISAT2:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:SAMTOOLS_SORT' { ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('.aligned_hs2') : "${meta.id}.aligned_hs2" } publishDir = [ - path: { "${params.outdir}/preprocessing/hisat2/${meta.patient}/${meta.id}/" }, + path: { "${params.outdir}/preprocessing/hisat2/" }, mode: params.publish_dir_mode, pattern: "*.bam", enabled: params.save_align_intermeds @@ -241,17 +204,17 @@ process { // bam_align ] } - withName: '.*:FASTQ_ALIGN_HISAT2:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_FLAGSTAT' { + withName: '.*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:SAMTOOLS_FLAGSTAT' { ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('.aligned_hs2') : "${meta.id}.aligned_hs2" } publishDir = [ - path: { "${params.outdir}/reports/samtools/${meta.patient}/${meta.id}/" }, + path: { "${params.outdir}/reports/samtools/" }, mode: params.publish_dir_mode, pattern: "*.{bai,csi}", enabled: params.save_align_intermeds ] } - withName: '.*:ALIGN_HISAT2:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_IDXSTATS' { + withName: '.*:ALIGN_HISAT2:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:SAMTOOLS_IDXSTATS' { ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(input.name.tokenize('.aligned_hs2')[1]) : "${meta.id}.aligned_hs2" } publishDir = [ path: { "${params.outdir}/reports/samtools/${meta.patient}/${meta.id}" }, diff --git a/conf/modules/gatk4_preprocessing/markduplicates.config b/conf/modules/gatk4_preprocessing/markduplicates.config index bb12432..5a5ab9c 100644 --- a/conf/modules/gatk4_preprocessing/markduplicates.config +++ b/conf/modules/gatk4_preprocessing/markduplicates.config @@ -32,20 +32,6 @@ process { // markduplicates pattern: "*{cram,crai}" ] } - // TODO: is this necessary? the id should be different -// withName: '.*:BAM_TO_CRAM_SNCR:BAM_TO_CRAM' { -// // BAM provided for step Markduplicates either run through MD or Convert -> then saved as sorted.cram (convert) or md.cram (md directly) -// // BAM files provided for step prepare_recal are converted and run through BQSR -> then saved as md.cram -// // BAM files provided for step recal are converted and run through BQSR II -> then saved as md.cram -// ext.args = "-C" -// ext.prefix = { "${meta.id}.converted." } -// publishDir = [ -// enabled: !params.save_output_as_bam, -// mode: params.publish_dir_mode, -// path: { "${params.outdir}/preprocessing/converted/${meta.id}" }, -// pattern: "*{cram,crai}" -// ] -// } withName: 'BAM_TO_CRAM_MAPPING' { // Run only when mapping should be saved as CRAM or when no MD is done @@ -60,15 +46,6 @@ process { // markduplicates ] } - withName: 'GATK4_ESTIMATELIBRARYCOMPLEXITY' { - ext.prefix = { "${meta.id}.md.cram" } - ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('markduplicates_report')) } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reports/markduplicates/${meta.id}" }, - pattern: "*metrics" - ] - } // TODO: do we need to create index here? (--CREATE_INDEX true) withName: 'GATK4_MARKDUPLICATES' { ext.args = '-REMOVE_DUPLICATES false -VALIDATION_STRINGENCY LENIENT' @@ -90,17 +67,6 @@ process { // markduplicates ] } - withName: 'GATK4_MARKDUPLICATES_SPARK' { - ext.args = '--remove-sequencing-duplicates false -VS LENIENT' - ext.prefix = { "${meta.id}.md.cram" } - publishDir = [ - enabled: !params.save_output_as_bam, - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" }, - pattern: "*{cram,crai}" - ] - } - withName: 'INDEX_MARKDUPLICATES' { publishDir = [ enabled: !params.save_output_as_bam, @@ -110,8 +76,7 @@ process { // markduplicates ] } - - withName: 'NFCORE_RNADNAVAR:RNADNAVAR:CRAM_TO_BAM' { + withName: '.*:BAM_GATK_PREPROCESSING:CRAM_TO_BAM' { ext.prefix = { "${meta.id}.md" } ext.when = { params.save_output_as_bam } publishDir = [ diff --git a/conf/modules/prepare_resources/prepare_intervals.config b/conf/modules/prepare_resources/prepare_intervals.config index 9ee572e..5c68522 100644 --- a/conf/modules/prepare_resources/prepare_intervals.config +++ b/conf/modules/prepare_resources/prepare_intervals.config @@ -61,4 +61,14 @@ process { // prepare_intervals saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + + withName: 'TABIX_BGZIPTABIX_INTERVAL_COMBINED' { + ext.prefix = {"${meta.id}"} + publishDir = [ + enabled: (params.save_reference || params.build_only_index), + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/intervals" }, + pattern: "*bed.gz" + ] + } } \ No newline at end of file diff --git a/modules.json b/modules.json index fc17001..cfa52f9 100644 --- a/modules.json +++ b/modules.json @@ -213,7 +213,8 @@ "gatk4/splitncigarreads": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/gatk4/splitncigarreads/gatk4-splitncigarreads.diff" }, "gatk4/variantfiltration": { "branch": "master", @@ -348,7 +349,8 @@ "samtools/stats": { "branch": "master", "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", - "installed_by": ["bam_stats_samtools", "modules"] + "installed_by": ["bam_stats_samtools", "modules"], + "patch": "modules/nf-core/samtools/stats/samtools-stats.diff" }, "samtools/view": { "branch": "master", diff --git a/modules/nf-core/gatk4/splitncigarreads/main.nf b/modules/nf-core/gatk4/splitncigarreads/main.nf index 0178976..2bb397a 100644 --- a/modules/nf-core/gatk4/splitncigarreads/main.nf +++ b/modules/nf-core/gatk4/splitncigarreads/main.nf @@ -4,17 +4,20 @@ process GATK4_SPLITNCIGARREADS { conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-d9e7bad0f7fbc8f4458d5c3ab7ffaaf0235b59fb:f857e2d6cc88d35580d01cf39e0959a68b83c1d9-0': + 'biocontainers/mulled-v2-d9e7bad0f7fbc8f4458d5c3ab7ffaaf0235b59fb:f857e2d6cc88d35580d01cf39e0959a68b83c1d9-0' }" input: - tuple val(meta), path(bam), path(bai), path(intervals) + tuple val(meta), path(input), path(input_index), path(intervals) path fasta path fai path dict output: - tuple val(meta), path('*.bam'), emit: bam + tuple val(meta), path("*cram"), emit: cram, optional: true + tuple val(meta), path("*bam"), emit: bam, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.bai"), emit: bai, optional: true path "versions.yml" , emit: versions when: @@ -22,7 +25,11 @@ process GATK4_SPLITNCIGARREADS { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + + prefix = task.ext.prefix ?: "${meta.id}.bam" + // If the extension is CRAM, then change it to BAM + prefix_bam = prefix.tokenize('.')[-1] == 'cram' ? "${prefix.substring(0, prefix.lastIndexOf('.'))}.bam" : prefix + def interval_command = intervals ? "--intervals $intervals" : "" def avail_mem = 3072 @@ -33,16 +40,25 @@ process GATK4_SPLITNCIGARREADS { } """ gatk --java-options "-Xmx${avail_mem}M" SplitNCigarReads \\ - --input $bam \\ - --output ${prefix}.bam \\ + --input $input \\ + --output ${prefix_bam} \\ --reference $fasta \\ $interval_command \\ --tmp-dir . \\ $args + # If cram files are wished as output, the run samtools for conversion + if [[ ${prefix} == *.cram ]]; then + samtools view -Ch -T ${fasta} -o ${prefix} ${prefix_bam} + rm ${prefix_bam} + samtools index ${prefix} + fi + + cat <<-END_VERSIONS > versions.yml "${task.process}": gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ } diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf index 4a2607d..a9f7c1d 100644 --- a/modules/nf-core/samtools/stats/main.nf +++ b/modules/nf-core/samtools/stats/main.nf @@ -8,7 +8,7 @@ process SAMTOOLS_STATS { 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: - tuple val(meta), path(input), path(input_index) + tuple val(meta), path(bam), path(bai) tuple val(meta2), path(fasta) output: @@ -27,7 +27,7 @@ process SAMTOOLS_STATS { stats \\ --threads ${task.cpus} \\ ${reference} \\ - ${input} \\ + ${bam} \\ > ${prefix}.stats cat <<-END_VERSIONS > versions.yml diff --git a/subworkflows/local/bam_align/main.nf b/subworkflows/local/bam_align/main.nf index a4ca5c6..ea3c073 100644 --- a/subworkflows/local/bam_align/main.nf +++ b/subworkflows/local/bam_align/main.nf @@ -37,6 +37,12 @@ workflow BAM_ALIGN { reports = Channel.empty() versions = Channel.empty() + // Initialise outputs to emit + bam_mapped_rna = Channel.empty() + bam_mapped_dna = Channel.empty() + bam_mapped = Channel.empty() + cram_mapped = Channel.empty() + // Gather index for mapping given the chosen aligner for DNA index_alignement = params.aligner == "bwa-mem" ? bwa : params.aligner == "bwa-mem2" ? bwamem2 : @@ -181,6 +187,8 @@ workflow BAM_ALIGN { BAM_MERGE_INDEX_SAMTOOLS(bam_mapped) BAM_TO_CRAM_MAPPING(BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai, fasta, fasta_fai) + cram_mapped = BAM_TO_CRAM_MAPPING.out.alignment_index + // Create CSV to restart from this step params.save_output_as_bam ? CHANNEL_ALIGN_CREATE_CSV(BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai) : CHANNEL_ALIGN_CREATE_CSV(BAM_TO_CRAM_MAPPING.out.alignment_index) @@ -198,10 +206,10 @@ workflow BAM_ALIGN { emit: // TODO: do I need to output RNA and DNA separately or cam I directly use bam_mapped but separating them? - bam_mapped_rna = bam_mapped_rna //second pass with RG tags + bam_mapped_rna = bam_mapped_rna // second pass with RG tags bam_mapped_dna = bam_mapped_dna // second pass with RG tags bam_mapped = bam_mapped // for preprocessing + cram_mapped = cram_mapped // for preprocessing reports = reports versions = versions - } \ No newline at end of file diff --git a/subworkflows/local/bam_applybqsr/main.nf b/subworkflows/local/bam_applybqsr/main.nf new file mode 100644 index 0000000..e07cc4f --- /dev/null +++ b/subworkflows/local/bam_applybqsr/main.nf @@ -0,0 +1,47 @@ +// +// RECALIBRATE +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { GATK4_APPLYBQSR } from '../../../modules/nf-core/gatk4/applybqsr/main' +include { CRAM_MERGE_INDEX_SAMTOOLS } from '../cram_merge_index_samtools/main' + +workflow BAM_APPLYBQSR { + take: + cram // channel: [mandatory] [ meta, cram, crai, recal ] + dict // channel: [mandatory] [ dict ] + fasta // channel: [mandatory] [ fasta ] + fasta_fai // channel: [mandatory] [ fasta_fai ] + intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals + + main: + versions = Channel.empty() + + // Combine cram and intervals for spread and gather strategy + cram_intervals = cram.combine(intervals) + // Move num_intervals to meta map + .map{ meta, cram, crai, recal, intervals, num_intervals -> [ meta + [ num_intervals:num_intervals ], cram, crai, recal, intervals ] } + + // RUN APPLYBQSR + GATK4_APPLYBQSR(cram_intervals, fasta, fasta_fai, dict.map{ meta, it -> [ it ] }) + + // Gather the recalibrated cram files + cram_to_merge = GATK4_APPLYBQSR.out.cram.map{ meta, cram -> [ groupKey(meta, meta.num_intervals), cram ] }.groupTuple() + + // Merge and index the recalibrated cram files + CRAM_MERGE_INDEX_SAMTOOLS(cram_to_merge, fasta, fasta_fai) + + cram_recal = CRAM_MERGE_INDEX_SAMTOOLS.out.cram_crai + // Remove no longer necessary field: num_intervals + .map{ meta, cram, crai -> [ meta - meta.subMap('num_intervals'), cram, crai ] } + + // Gather versions of all tools used + versions = versions.mix(GATK4_APPLYBQSR.out.versions) + versions = versions.mix(CRAM_MERGE_INDEX_SAMTOOLS.out.versions) + + emit: + cram = cram_recal // channel: [ meta, cram, crai ] + + versions // channel: [ versions.yml ] +} \ No newline at end of file diff --git a/subworkflows/local/bam_baserecalibrator/main.nf b/subworkflows/local/bam_baserecalibrator/main.nf new file mode 100644 index 0000000..0c9406a --- /dev/null +++ b/subworkflows/local/bam_baserecalibrator/main.nf @@ -0,0 +1,54 @@ +// +// PREPARE RECALIBRATION +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { GATK4_BASERECALIBRATOR } from '../../../modules/nf-core/gatk4/baserecalibrator/main' +include { GATK4_GATHERBQSRREPORTS } from '../../../modules/nf-core/gatk4/gatherbqsrreports/main' + +workflow BAM_BASERECALIBRATOR { + take: + cram // channel: [mandatory] [ meta, cram_markduplicates, crai ] + dict // channel: [mandatory] [ dict ] + fasta // channel: [mandatory] [ fasta ] + fasta_fai // channel: [mandatory] [ fasta_fai ] + intervals // channel: [mandatory] [ intervals, num_intervals ] (or [ [], 0 ] if no intervals) + known_sites // channel: [optional] [ known_sites ] + known_sites_tbi // channel: [optional] [ known_sites_tbi ] + + main: + versions = Channel.empty() + + // Combine cram and intervals for spread and gather strategy + cram_intervals = cram.combine(intervals) + // Move num_intervals to meta map + .map{ meta, cram, crai, intervals, num_intervals -> [ meta + [ num_intervals:num_intervals ], cram, crai, intervals ] } + + // RUN BASERECALIBRATOR + GATK4_BASERECALIBRATOR(cram_intervals, fasta, fasta_fai, dict.map{ meta, it -> [ it ] }, known_sites, known_sites_tbi) + + // Figuring out if there is one or more table(s) from the same sample + table_to_merge = GATK4_BASERECALIBRATOR.out.table.map{ meta, table -> [ groupKey(meta, meta.num_intervals), table ] }.groupTuple().branch{ + // Use meta.num_intervals to asses number of intervals + single: it[0].num_intervals <= 1 + multiple: it[0].num_intervals > 1 + } + + // Only when using intervals + GATK4_GATHERBQSRREPORTS(table_to_merge.multiple) + + // Mix intervals and no_intervals channels together + table_bqsr = GATK4_GATHERBQSRREPORTS.out.table.mix(table_to_merge.single.map{ meta, table -> [ meta, table[0] ] }) + // Remove no longer necessary field: num_intervals + .map{ meta, table -> [ meta - meta.subMap('num_intervals'), table ] } + + // Gather versions of all tools used + versions = versions.mix(GATK4_BASERECALIBRATOR.out.versions) + versions = versions.mix(GATK4_GATHERBQSRREPORTS.out.versions) + + emit: + table_bqsr // channel: [ meta, table ] + + versions // channel: [ versions.yml ] +} \ No newline at end of file diff --git a/subworkflows/local/bam_gatk_preprocessing/main.nf b/subworkflows/local/bam_gatk_preprocessing/main.nf index 56b3b96..6a6f00b 100644 --- a/subworkflows/local/bam_gatk_preprocessing/main.nf +++ b/subworkflows/local/bam_gatk_preprocessing/main.nf @@ -1,327 +1,341 @@ // // GATK pre-processing best practices // -include { SAMTOOLS_CONVERT as SAMTOOLS_CRAMTOBAM } from '../../modules/nf-core/modules/samtools/convert/main' -include { SAMTOOLS_CONVERT as SAMTOOLS_CRAMTOBAM_RECAL } from '../../modules/nf-core/modules/samtools/convert/main' -include { SAMTOOLS_CONVERT as SAMTOOLS_CRAMTOBAM_SNCR } from '../../modules/nf-core/modules/samtools/convert/main' -include { SAMTOOLS_CONVERT as SAMTOOLS_BAMTOCRAM } from '../../modules/nf-core/modules/samtools/convert/main' -include { BAM_MARKDUPLICATES as MARKDUPLICATES } from '../nf-core/gatk4/markduplicates/main' -include { MARKDUPLICATES_CSV } from '../local/markduplicates_csv' -include { SPLITNCIGAR } from '../nf-core/splitncigar' // Splits reads that contain Ns in their cigar string -include { BAM_TO_CRAM } from '../nf-core/bam_to_cram' -include { BAM_TO_CRAM as BAM_TO_CRAM_SNCR } from '../nf-core/bam_to_cram' -include { CRAM_QC } from '../nf-core/cram_qc' -include { BAM_BASERECALIBRATOR } from '../nf-core/gatk4/prepare_recalibration/main' -include { PREPARE_RECALIBRATION_CSV } from '../local/prepare_recalibration_csv' -include { RECALIBRATE } from '../nf-core/gatk4/recalibrate/main' -include { RECALIBRATE_CSV } from '../local/recalibrate_csv' +// Markduplicates +include { SAMTOOLS_CONVERT as BAM_TO_CRAM } from '../../../modules/nf-core/samtools/convert/main' +include { SAMTOOLS_CONVERT as CRAM_TO_BAM } from '../../../modules/nf-core/samtools/convert/main' +include { BAM_MARKDUPLICATES } from '../../local/bam_markduplicates/main' +include { CHANNEL_MARKDUPLICATES_CREATE_CSV } from '../../local/channel_markduplicates_create_csv/main' +include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_NO_MD } from '../../local/cram_qc_mosdepth_samtools/main' +// Splitncigarreads +include { BAM_SPLITNCIGARREADS } from '../../local/bam_splitncigarreads/main' +include { CHANNEL_SPLITNCIGARREADS_CREATE_CSV } from '../../local/channel_splitncigarreads_create_csv/main' +// Create recalibration tables +include { BAM_BASERECALIBRATOR } from '../../local/bam_baserecalibrator/main' +include { CHANNEL_BASERECALIBRATOR_CREATE_CSV } from '../../local/channel_baserecalibrator_create_csv/main' +// Create recalibrated cram files to use for variant calling (+QC) +include { BAM_APPLYBQSR } from '../../local/bam_applybqsr/main' +include { CHANNEL_APPLYBQSR_CREATE_CSV } from '../../local/channel_applybqsr_create_csv/main' +include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_RECAL } from '../../local/cram_qc_mosdepth_samtools/main' +include { SAMTOOLS_CONVERT as CRAM_TO_BAM_RECAL } from '../../../modules/nf-core/samtools/convert/main' + workflow BAM_GATK_PREPROCESSING { take: - step // Mandatory, step to start with - tools - ch_bam_mapped // channel: [mandatory] ch_bam_mapped - skip_tools // channel: [mandatory] skip_tools - save_output_as_bam // channel: [mandatory] save_output_as_bam - fasta // channel: [mandatory] fasta - fasta_fai // channel: [mandatory] fasta_fai - dict - germline_resource // channel: [optional] germline_resource - germline_resource_tbi // channel: [optional] germline_resource_tbi - intervals // channel: [mandatory] intervals/target regions - intervals_for_preprocessing // channel: [mandatory] intervals/wes - ch_interval_list_split - ch_input_sample + input_sample // channel: [optional] input from CSV if applicable + bam_mapped // channel: [mandatory] bam_mapped + cram_mapped // channel: [mandatory] cram_mapped + fasta // channel: [mandatory] fasta + fasta_fai // channel: [mandatory] fasta_fai + dict // channel: [mandatory] dict + known_sites_indels // channel: [optional] known_sites + known_sites_indels_tbi // channel: [optional] known_sites + germline_resource // channel: [optional] germline_resource + germline_resource_tbi // channel: [optional] germline_resource_tbi + intervals // channel: [mandatory] intervals/target regions + intervals_for_preprocessing // channel: [mandatory] intervals/wes + intervals_and_num_intervals // channel: [mandatory] [ intervals, num_intervals ] (or [ [], 0 ] if no intervals) main: - ch_reports = Channel.empty() - ch_versions = Channel.empty() - - // Select inputs for makduplicates/recalibration - ch_bam_for_markduplicates = Channel.empty() - ch_input_cram_indexed = Channel.empty() - ch_cram_no_markduplicates_restart = Channel.empty() - ch_cram_markduplicates = Channel.empty() - ch_cram_variant_calling = Channel.empty() - // input from mapping - if (tools.split(',').contains('preprocessing')) { - if (step == 'mapping' | !ch_input_sample) { - ch_bam_for_markduplicates = ch_bam_mapped - ch_input_sample = ch_bam_mapped - } else { - ch_bam_mapped.dump(tag:"ch_bam_mapped2") - // input from samplesheet was a BAM and there is no need for alignment - ch_bam_mapped.branch{ - bam: it[0].data_type == "bam" - cram: it[0].data_type == "cram" - }.set{ch_convert} - ch_bam_for_markduplicates = ch_convert.bam.map{ meta, bam, bai -> [meta, bam]} - // If CRAM files, convert to BAM, because the tool only runs on BAM files. - if (!(skip_tools && skip_tools.split(',').contains('markduplicates'))){ - // SAMTOOLS_CRAMTOBAM ( to speed up computation) - SAMTOOLS_CRAMTOBAM(ch_convert.cram, fasta, fasta_fai) - ch_versions = ch_versions.mix(SAMTOOLS_CRAMTOBAM.out.versions) - ch_bam_for_markduplicates = ch_bam_for_markduplicates.mix(SAMTOOLS_CRAMTOBAM.out.alignment_index - .map{ meta, bam, bai -> [meta, bam]}) - } else { - ch_cram_no_markduplicates_restart = ch_convert.cram - ch_cram_markduplicates = Channel.empty() - // ch_bam_for_markduplicates will countain bam mapped with GATK4_MAPPING when step is mapping - // Or bams that are specified in the samplesheet.csv when step is prepare_recalibration - - ch_bam_for_markduplicates = ch_convert.bam - ch_input_cram_indexed = Channel.empty() - } - } + reports = Channel.empty() + versions = Channel.empty() - // STEP 1: mark duplicates - if (step in ['mapping', 'markduplicates'] ) { - // NO markduplicates, just convert BAM to CRAM - if (skip_tools && skip_tools.split(',').contains('markduplicates')) { - // ch_bam_indexed will countain bam mapped with GATK4_MAPPING when step is mapping - // Or bams that are specified in the samplesheet.csv when step is prepare_recalibration - ch_bam_indexed = step == 'mapping' ? ch_bam_mapped : ch_convert.bam - BAM_TO_CRAM( - ch_bam_indexed, - ch_input_cram_indexed, - fasta, - fasta_fai, - intervals_for_preprocessing) - ch_cram_no_markduplicates_restart = BAM_TO_CRAM.out.cram_converted - // Gather QC reports - ch_reports = ch_reports.mix(BAM_TO_CRAM.out.qc.collect{meta, report -> report}) - // Gather used softwares versions - ch_versions = ch_versions.mix(BAM_TO_CRAM.out.versions) - } - // MARKDUPLICATES - else { - // ch_bam_for_markduplicates = ch_bam_for_markduplicates.map{meta, bams ->[ meta, bams.sort()]} - MARKDUPLICATES( - ch_bam_for_markduplicates, - fasta, - fasta_fai, - intervals_for_preprocessing) - ch_cram_markduplicates = MARKDUPLICATES.out.cram - // Gather QC reports - ch_reports = ch_reports.mix(MARKDUPLICATES.out.reports.collect{ meta, report -> report }) - // Gather used softwares versions - ch_versions = ch_versions.mix(MARKDUPLICATES.out.versions) - } - // ch_md_cram_for_restart contains either: - // - crams from markduplicates - // - crams converted from bam mapped when skipping markduplicates - ch_md_cram_for_restart = Channel.empty() - .mix( - ch_cram_markduplicates, - ch_cram_no_markduplicates_restart) - .map { - meta, cram, crai -> - //Make sure correct data types are carried through - [[ - data_type: "cram", - id: meta.id, - patient: meta.patient, - sample: meta.sample, - status: meta.status, - lane: meta.lane - ], - cram, crai] - } - // CSV should be written for the file actually out, either CRAM or BAM - // Create CSV to restart from this step - if (!(skip_tools && skip_tools.split(',').contains('markduplicates'))) { - MARKDUPLICATES_CSV(ch_md_cram_for_restart) - } - } - // STEP 1b: SplitNCigarReads for RNA - if (step in ['mapping', 'markduplicates', 'splitncigar']) { - if (step == 'splitncigar') { - ch_md_cram_for_restart = ch_bam_mapped + + // check if preprocessing is skipped + if (params.skip_tools && !params.skip_tools.split(',').contains('preprocessing')) { + + // Markduplicates + if (params.step in ['mapping', 'markduplicates'] ) { + + cram_markduplicates_no_spark = Channel.empty() + + // cram_for_markduplicates will contain bam mapped with FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP when step is mapping + // Or bams that are specified in the samplesheet.csv when step is prepare_recalibration + cram_for_markduplicates = params.step == 'mapping' ? bam_mapped : input_sample.map{ meta, input, index -> [ meta, input ] } + + // if no MD is done, then run QC on mapped & converted CRAM files + // or the input BAM (+converted) or CRAM files + cram_skip_markduplicates = Channel.empty() + + // Should it be possible to restart from converted crams? + // For now, conversion from bam to cram is only done when skipping markduplicates + if (params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) { + if (params.step == 'mapping') { + cram_skip_markduplicates = cram_mapped + } else { + input_markduplicates_convert = input_sample.branch{ + bam: it[0].data_type == "bam" + cram: it[0].data_type == "cram" + } + + // Convert any input BAMs to CRAM + BAM_TO_CRAM(input_markduplicates_convert.bam, fasta, fasta_fai) + versions = versions.mix(BAM_TO_CRAM.out.versions) + + cram_skip_markduplicates = Channel.empty().mix(input_markduplicates_convert.cram, BAM_TO_CRAM.out.alignment_index) + } + + CRAM_QC_NO_MD(cram_skip_markduplicates, fasta, intervals_for_preprocessing) + + // Gather QC reports + reports = reports.mix(CRAM_QC_NO_MD.out.reports.collect{ meta, report -> report }) + + // Gather used softwares versions + versions = versions.mix(CRAM_QC_NO_MD.out.versions) + } else { + BAM_MARKDUPLICATES( + cram_for_markduplicates, + fasta, + fasta_fai, + intervals_for_preprocessing) + + cram_markduplicates_no_spark = BAM_MARKDUPLICATES.out.cram + + // Gather QC reports + reports = reports.mix(BAM_MARKDUPLICATES.out.reports.collect{ meta, report -> report }) + + // Gather used softwares versions + versions = versions.mix(BAM_MARKDUPLICATES.out.versions) + } + + // ch_md_cram_for_restart contains crams from markduplicates + ch_md_cram_for_restart = Channel.empty().mix(cram_markduplicates_no_spark) + // Make sure correct data types are carried through + .map{ meta, cram, crai -> [ meta + [data_type: "cram"], cram, crai ] } + // If params.save_output_as_bam, then convert CRAM files to BAM + CRAM_TO_BAM(ch_md_cram_for_restart, fasta, fasta_fai) + versions = versions.mix(CRAM_TO_BAM.out.versions) + + // CSV should be written for the file actually out, either CRAM or BAM + // Create CSV to restart from this step + csv_subfolder = 'markduplicates' + params.save_output_as_bam ? CHANNEL_MARKDUPLICATES_CREATE_CSV(CRAM_TO_BAM.out.alignment_index, csv_subfolder, params.outdir, params.save_output_as_bam) : CHANNEL_MARKDUPLICATES_CREATE_CSV(ch_md_cram_for_restart, csv_subfolder, params.outdir, params.save_output_as_bam) + } + + + // SplitNCigarReads for RNA + if (params.step in ['mapping', 'markduplicates', 'splitncigar']) { + if (params.step == 'mapping') { + cram_skip_splitncigar = cram_skip_markduplicates + } else { + // Support if starting from BAM or CRAM files + input_sncr_convert = input_sample.branch{ + bam: it[0].data_type == "bam" + cram: it[0].data_type == "cram" } - // Separate RNA from DNA to do SPLITNCIGARREADS from GATK - ch_md_cram_for_restart.branch{ - dna: it[0].status < 2 - rna: it[0].status == 2 - }.set{ch_md_for_splitncigar} - // RNA samples only - ch_for_splitncigar = ch_md_for_splitncigar.rna - ch_md_cram_dna = ch_md_for_splitncigar.dna - // If CRAM files, convert to BAM, because the tool only runs on BAM files. - ch_for_splitncigar.branch{ - bam: it[0].data_type == "bam" - cram: it[0].data_type == "cram" - }.set{ch_for_splitncigar_input} - SAMTOOLS_CRAMTOBAM_SNCR(ch_for_splitncigar_input.cram, fasta, fasta_fai) - ch_md_cram_for_splitncigar = ch_for_splitncigar_input.bam.mix(SAMTOOLS_CRAMTOBAM_SNCR.out.alignment_index) - SPLITNCIGAR ( - ch_md_cram_for_splitncigar, - fasta, - fasta_fai, - dict, - intervals_for_preprocessing + input_sncr_convert = input_sncr_convert.bam.map{ meta, bam, bai, table -> [ meta, bam, bai ] } + // BAM files first must be converted to CRAM files since from this step on we base everything on CRAM format + BAM_TO_CRAM(input_only_bam, fasta, fasta_fai) + versions = versions.mix(BAM_TO_CRAM.out.versions) + + ch_cram_from_bam = BAM_TO_CRAM.out.alignment_index + // Make sure correct data types are carried through + .map{ meta, cram, crai -> [ meta + [data_type: "cram"], cram, crai ] } + + cram_skip_splitncigar = Channel.empty().mix(ch_cram_from_bam, input_sncr_convert.cram) + } + + // cram_for_bam_splitncigar contains either: + // - crams from markduplicates + // - crams converted from bam mapped when skipping markduplicates + // - input cram files, when start from step markduplicates + cram_for_splitncigar = Channel.empty().mix(ch_md_cram_for_restart, cram_skip_markduplicates ) + // Make sure correct data types are carried through + .map{ meta, cram, crai -> [ meta + [data_type: "cram"], cram, crai ] } + + if (!(params.skip_tools && params.skip_tools.split(',').contains('splitncigar'))) { + + cram_for_splitncigar_status = cram_for_splitncigar.branch{ + dna: it[0].status < 2 + rna: it[0].status >= 2 + } + BAM_SPLITNCIGARREADS ( + cram_for_splitncigar_status.rna, + dict, + fasta, + fasta_fai, + intervals_and_num_intervals ) - ch_splitncigar_bam_bai = SPLITNCIGAR.out.bam_bai - ch_versions = ch_versions.mix(SPLITNCIGAR.out.versions) - // empty channel as BAM_TO_CRAM needs it as input - ch_input_cram_indexed = Channel.empty() - // SPLINCIGAR BAM to CRAM - BAM_TO_CRAM_SNCR( - ch_splitncigar_bam_bai, - ch_input_cram_indexed, - fasta, - fasta_fai, - intervals_for_preprocessing) - ch_cram_splitncigar = BAM_TO_CRAM_SNCR.out.cram_converted - // Gather QC reports - ch_reports = ch_reports.mix(BAM_TO_CRAM_SNCR.out.qc.collect{meta, report -> report}) - // Gather used softwares versions - ch_versions = ch_versions.mix(BAM_TO_CRAM_SNCR.out.versions) - // join again DNA and RNA to continue pre-processing - ch_cram_for_recalibration = Channel.empty() - ch_splitncigar_cram_for_restart = ch_cram_for_recalibration.mix( - ch_md_cram_dna, - ch_cram_splitncigar) - ch_cram_for_recal = ch_splitncigar_cram_for_restart.map{ meta, cram, crai -> - [[ - data_type: "cram", - id: meta.id, - patient: meta.patient, - sample: meta.sample, - status: meta.status - ], - cram, crai] - } + cram_splitncigar_no_spark = BAM_SPLITNCIGARREADS.out.cram.mix(cram_for_splitncigar_status.dna) + + // Gather used softwares versions + versions = versions.mix(BAM_SPLITNCIGARREADS.out.versions) + + } else { + + // ch_cram_for_bam_baserecalibrator contains either: + // - crams from markduplicates + // - crams converted from bam mapped when skipping markduplicates + // - input cram files, when start from step markduplicates + ch_cram_for_bam_baserecalibrator = Channel.empty().mix(cram_for_splitncigar) } - // STEP 2: Create recalibration tables - if (step in ['mapping', 'markduplicates', 'splitncigar', 'prepare_recalibration'] ) { - // Run if starting from step "prepare_recalibration" or from "splitncigar" but skipping splitncigar (not much sense but just in case) - if (step == 'prepare_recalibration' || (step == "splitncigar" && skip_tools.split(',').contains('splitncigar'))){ - // Known issue: if you try to start the pipeline with a csv with a file in the table column - // but want to start from prepare_recalibration (re-do the table) then it will throw an error - // because the input object already has a table - this is actually bad practice for the pipeline so should not be used. - //Support if starting from BAM or CRAM files - ch_input_sample.branch{ - bam: it[0].data_type == "bam" - cram: it[0].data_type == "cram" - }.set{ch_convert} - //BAM files first must be converted to CRAM files since from this step on we base everything on CRAM format - SAMTOOLS_BAMTOCRAM(ch_convert.bam, fasta, fasta_fai) - ch_versions = ch_versions.mix(SAMTOOLS_BAMTOCRAM.out.versions) - ch_cram_for_prepare_recalibration = Channel.empty().mix(SAMTOOLS_BAMTOCRAM.out.alignment_index, ch_convert.cram) - ch_cram_for_recal = SAMTOOLS_BAMTOCRAM.out.alignment_index - } - else { - // ch_cram_for_prepare_recalibration contains either: - // - crams converted from bam mapped when skipping markduplicates - // - input cram files, when start from step markduplicates - ch_cram_for_prepare_recalibration = Channel.empty().mix(ch_cram_for_recal, ch_input_cram_indexed) - } - // BASERECALIBRATOR - if (!(skip_tools && skip_tools.split(',').contains('baserecalibrator'))) { - ch_table_bqsr = Channel.empty() - - ch_cram_for_prepare_recalibration.dump(tag:"[STEP2_GATKPREPROCESSING] cram_input_for_recal") - BAM_BASERECALIBRATOR( - ch_cram_for_prepare_recalibration, - dict, - fasta, - fasta_fai, - intervals, - germline_resource, - germline_resource_tbi) - - ch_table_bqsr = BAM_BASERECALIBRATOR.out.table_bqsr - // Gather used softwares versions - ch_versions = ch_versions.mix(BAM_BASERECALIBRATOR.out.versions) - - - ch_reports = ch_reports.mix(ch_table_bqsr.collect{ meta, table -> table}) - ch_cram_applybqsr = ch_cram_for_prepare_recalibration.join(ch_table_bqsr) - // Create CSV to restart from this step - PREPARE_RECALIBRATION_CSV(ch_cram_for_recal.join(ch_table_bqsr), skip_tools) - } + // ch_md_cram_for_restart contains crams from markduplicates + ch_sncr_cram_for_restart = Channel.empty().mix(cram_splitncigar_no_spark) + // Make sure correct data types are carried through + .map{ meta, cram, crai -> [ meta + [data_type: "cram"], cram, crai ] } + } + + // BQSR + if (params.step in ['mapping', 'markduplicates', 'splitncigar', 'prepare_recalibration']) { + + // Run if starting from step "prepare_recalibration" + if (params.step == 'prepare_recalibration') { + + // Support if starting from BAM or CRAM files + input_prepare_recal_convert = input_sample.branch{ + bam: it[0].data_type == "bam" + cram: it[0].data_type == "cram" + } + + // BAM files first must be converted to CRAM files since from this step on we base everything on CRAM format + BAM_TO_CRAM(input_prepare_recal_convert.bam, fasta, fasta_fai) + versions = versions.mix(BAM_TO_CRAM.out.versions) + + ch_cram_from_bam = BAM_TO_CRAM.out.alignment_index + // Make sure correct data types are carried through + .map{ meta, cram, crai -> [ meta + [data_type: "cram"], cram, crai ] } + + ch_cram_for_bam_baserecalibrator = Channel.empty().mix(ch_cram_for_bam_baserecalibrator, input_prepare_recal_convert.cram) + ch_sncr_cram_for_restart = ch_cram_from_bam + + } else { + + // ch_cram_for_bam_baserecalibrator contains either: + // - crams from markduplicates + // - crams from splitncigarreads + // - crams converted from bam mapped when skipping markduplicates + // - crams converted from bam mapped when skipping splitncigarreads + // - input cram files, when start from step markduplicates + // - input cram files, when start from step splitncigarreads + ch_cram_for_bam_baserecalibrator = Channel.empty().mix(ch_sncr_cram_for_restart, cram_skip_splitncigar ) + // Make sure correct data types are carried through + .map{ meta, cram, crai -> [ meta + [data_type: "cram"], cram, crai ] } + + } + + // Create recalibration tables + if (!(params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator'))) { + + ch_table_bqsr_no_spark = Channel.empty() + + BAM_BASERECALIBRATOR( + ch_cram_for_bam_baserecalibrator, + dict, + fasta, + fasta_fai, + intervals_and_num_intervals, + known_sites_indels, + known_sites_indels_tbi) + + ch_table_bqsr_no_spark = BAM_BASERECALIBRATOR.out.table_bqsr + + // Gather used softwares versions + versions = versions.mix(BAM_BASERECALIBRATOR.out.versions) + + + // ch_table_bqsr contains either: + // - bqsr table from baserecalibrator + ch_table_bqsr = Channel.empty().mix( + ch_table_bqsr_no_spark) + + reports = reports.mix(ch_table_bqsr.collect{ meta, table -> table }) + + cram_applybqsr = ch_cram_for_bam_baserecalibrator.join(ch_table_bqsr, failOnDuplicate: true, failOnMismatch: true) + + // Create CSV to restart from this step + CHANNEL_BASERECALIBRATOR_CREATE_CSV(ch_sncr_cram_for_restart.join(ch_table_bqsr, failOnDuplicate: true), params.tools, params.skip_tools, params.save_output_as_bam, params.outdir) + } + } + + if (params.step in ['mapping', 'markduplicates', 'prepare_recalibration', 'recalibrate']) { + + // Run if starting from step "prepare_recalibration" + if (params.step == 'recalibrate') { + + // Support if starting from BAM or CRAM files + input_recal_convert = input_sample.branch{ + bam: it[0].data_type == "bam" + cram: it[0].data_type == "cram" + } + + // If BAM file, split up table and mapped file to convert BAM to CRAM + input_only_table = input_recal_convert.bam.map{ meta, bam, bai, table -> [ meta, table ] } + input_only_bam = input_recal_convert.bam.map{ meta, bam, bai, table -> [ meta, bam, bai ] } + + // BAM files first must be converted to CRAM files since from this step on we base everything on CRAM format + BAM_TO_CRAM(input_only_bam, fasta, fasta_fai) + versions = versions.mix(BAM_TO_CRAM.out.versions) + + cram_applybqsr = Channel.empty().mix( + BAM_TO_CRAM.out.alignment_index.join(input_only_table, failOnDuplicate: true, failOnMismatch: true), + input_recal_convert.cram) + // Join together converted cram with input tables + .map{ meta, cram, crai, table -> [ meta + [data_type: "cram"], cram, crai, table ]} } - // STEP 3: RECALIBRATING - if (step in ['mapping', 'markduplicates', 'splitncigar', 'prepare_recalibration', 'recalibrate'] ) { - // Run if starting from step "prepare_recalibration" - if(step == 'recalibrate'){ - //Support if starting from BAM or CRAM files - ch_input_sample.branch{ - bam: it[0].data_type == "bam" - cram: it[0].data_type == "cram" - }.set{ch_convert} - //If BAM file, split up table and mapped file to convert BAM to CRAM - ch_bam_table = ch_convert.bam.map{ meta, bam, bai, table -> [meta, table]} - ch_bam_bam = ch_convert.bam.map{ meta, bam, bai, table -> [meta, bam, bai]} - - ch_bam_table.dump(tag:"ch_bam_table") - ch_bam_bam.dump(tag:"ch_bam_bam") - // BAM files first must be converted to CRAM files since from this step on we base everything on CRAM format - SAMTOOLS_BAMTOCRAM(ch_bam_bam, fasta, fasta_fai) - ch_versions = ch_versions.mix(SAMTOOLS_BAMTOCRAM.out.versions) - - ch_cram_applybqsr = Channel.empty().mix( - SAMTOOLS_BAMTOCRAM.out.alignment_index.join(ch_bam_table), - ch_convert.cram) // Join together converted cram with input tables - } - if (!(skip_tools && skip_tools.split(',').contains('baserecalibrator'))) { - // RECALIBRATION - // ch_cram_applybqsr.dump(tag:"[STEP2_GATKPREPROCESSING] PREPARE_RECALIBRATION") + if (!(params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator'))) { + + cram_variant_calling_no_spark = Channel.empty() - RECALIBRATE( - ch_cram_applybqsr, + BAM_APPLYBQSR( + cram_applybqsr, dict, fasta, fasta_fai, - intervals) + intervals_and_num_intervals) - ch_cram_variant_calling = RECALIBRATE.out.cram - ch_versions = ch_versions.mix(RECALIBRATE.out.versions) + cram_variant_calling_no_spark = BAM_APPLYBQSR.out.cram - // QC for resulting CRAM(s) - CRAM_QC( - ch_cram_variant_calling, - fasta, - fasta_fai, - intervals_for_preprocessing) - // Gather QC reports - ch_reports = ch_reports.mix(CRAM_QC.out.qc.collect{meta, report -> report}) // Gather used softwares versions - ch_versions = ch_versions.mix(CRAM_QC.out.versions) - - //If save_output_as_bam, then convert CRAM files to BAM - SAMTOOLS_CRAMTOBAM_RECAL(ch_cram_variant_calling, fasta, fasta_fai) - ch_versions = ch_versions.mix(SAMTOOLS_CRAMTOBAM_RECAL.out.versions) - - // CSV should be written for the file actually out out, either CRAM or BAM - csv_recalibration = Channel.empty() - csv_recalibration = save_output_as_bam ? SAMTOOLS_CRAMTOBAM_RECAL.out.alignment_index : ch_cram_variant_calling - // Create CSV to restart from this step - RECALIBRATE_CSV(csv_recalibration.transpose()) - - } else if (step == 'recalibrate'){ - // ch_cram_variant_calling contains either: - // - input bams converted to crams, if started from step recal + skip BQSR - // - input crams if started from step recal + skip BQSR - ch_cram_variant_calling = Channel.empty().mix(SAMTOOLS_BAMTOCRAM.out.alignment_index, - ch_convert.cram.map{ meta, cram, crai, table -> [meta, cram, crai]}) - } else { - // ch_cram_variant_calling contains either: - // - crams from markduplicates = ch_cram_for_prepare_recalibration if skip BQSR but not started from step recalibration - ch_cram_variant_calling = ch_cram_for_prepare_recalibration - } - } - } + versions = versions.mix(BAM_APPLYBQSR.out.versions) + + cram_variant_calling = Channel.empty().mix( + cram_variant_calling_no_spark) + + CRAM_QC_RECAL( + cram_variant_calling, + fasta, + intervals_for_preprocessing) + + // Gather QC reports + reports = reports.mix(CRAM_QC_RECAL.out.reports.collect{ meta, report -> report }) + // Gather used softwares versions + versions = versions.mix(CRAM_QC_RECAL.out.versions) + + // If params.save_output_as_bam, then convert CRAM files to BAM + CRAM_TO_BAM_RECAL(cram_variant_calling, fasta, fasta_fai) + versions = versions.mix(CRAM_TO_BAM_RECAL.out.versions) + + // CSV should be written for the file actually out out, either CRAM or BAM + csv_recalibration = Channel.empty() + csv_recalibration = params.save_output_as_bam ? CRAM_TO_BAM_RECAL.out.alignment_index : cram_variant_calling + + // Create CSV to restart from this step + CHANNEL_APPLYBQSR_CREATE_CSV(csv_recalibration) + + } else if (params.step == 'recalibrate') { + // cram_variant_calling contains either: + // - input bams converted to crams, if started from step recal + skip BQSR + // - input crams if started from step recal + skip BQSR + cram_variant_calling = Channel.empty().mix( + BAM_TO_CRAM.out.alignment_index, + input_recal_convert.cram.map{ meta, cram, crai, table -> [ meta, cram, crai ] }) + } else { + // cram_variant_calling contains either: + // - crams from markduplicates = ch_cram_for_bam_baserecalibrator if skip BQSR but not started from step recalibration + cram_variant_calling = Channel.empty().mix(ch_cram_for_bam_baserecalibrator) + } + } + } emit: - ch_cram_variant_calling = ch_cram_variant_calling - versions = ch_versions - ch_reports = ch_reports + cram_variant_calling = cram_variant_calling + versions = versions + reports = reports } \ No newline at end of file diff --git a/subworkflows/local/bam_markduplicates/main.nf b/subworkflows/local/bam_markduplicates/main.nf new file mode 100644 index 0000000..f305e8b --- /dev/null +++ b/subworkflows/local/bam_markduplicates/main.nf @@ -0,0 +1,43 @@ +// +// MARKDUPLICATES AND QC after mapping +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { CRAM_QC_MOSDEPTH_SAMTOOLS } from '../cram_qc_mosdepth_samtools/main' +include { GATK4_MARKDUPLICATES } from '../../../modules/nf-core/gatk4/markduplicates/main' + +workflow BAM_MARKDUPLICATES { + take: + bam // channel: [mandatory] [ meta, bam ] + fasta // channel: [mandatory] [ fasta ] + fasta_fai // channel: [mandatory] [ fasta_fai ] + intervals_bed_combined // channel: [optional] [ intervals_bed ] + + main: + versions = Channel.empty() + reports = Channel.empty() + + // RUN MARKUPDUPLICATES + GATK4_MARKDUPLICATES(bam, fasta, fasta_fai) + + // Join with the crai file + cram = GATK4_MARKDUPLICATES.out.cram.join(GATK4_MARKDUPLICATES.out.crai, failOnDuplicate: true, failOnMismatch: true) + + // QC on CRAM + CRAM_QC_MOSDEPTH_SAMTOOLS(cram, fasta, intervals_bed_combined) + + // Gather all reports generated + reports = reports.mix(GATK4_MARKDUPLICATES.out.metrics) + reports = reports.mix(CRAM_QC_MOSDEPTH_SAMTOOLS.out.reports) + + // Gather versions of all tools used + versions = versions.mix(GATK4_MARKDUPLICATES.out.versions) + versions = versions.mix(CRAM_QC_MOSDEPTH_SAMTOOLS.out.versions) + + emit: + cram + reports + + versions // channel: [ versions.yml ] +} \ No newline at end of file diff --git a/subworkflows/local/bam_splitncigarreads/main.nf b/subworkflows/local/bam_splitncigarreads/main.nf new file mode 100644 index 0000000..95ffb16 --- /dev/null +++ b/subworkflows/local/bam_splitncigarreads/main.nf @@ -0,0 +1,51 @@ +// +// SPLITNCIGARREADS +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { GATK4_SPLITNCIGARREADS } from '../../../modules/nf-core/gatk4/splitncigarreads/main' +include { CRAM_MERGE_INDEX_SAMTOOLS } from '../cram_merge_index_samtools/main' + +workflow BAM_SPLITNCIGARREADS { + take: + cram // channel: [mandatory] [ meta, cram_markduplicates, crai ] + dict // channel: [mandatory] [ dict ] + fasta // channel: [mandatory] [ fasta ] + fasta_fai // channel: [mandatory] [ fasta_fai ] + intervals // channel: [mandatory] [ intervals, num_intervals ] (or [ [], 0 ] if no intervals) + + main: + versions = Channel.empty() + + // Combine cram and intervals for spread and gather strategy + cram_intervals = cram.combine(intervals) + // Move num_intervals to meta map + .map{ meta, cram, crai, intervals, num_intervals -> [ meta + [ num_intervals:num_intervals ], cram, crai, intervals ] } + + GATK4_SPLITNCIGARREADS ( + cram_intervals, + fasta, + fasta_fai, + dict.map{ meta, dict -> [ dict ] } + ) + + // Gather the recalibrated cram files + cram_to_merge = GATK4_SPLITNCIGARREADS.out.cram.map{ meta, cram -> [ groupKey(meta, meta.num_intervals), cram ] }.groupTuple() + + // Merge and index the recalibrated cram files + CRAM_MERGE_INDEX_SAMTOOLS(cram_to_merge, fasta, fasta_fai) + + cram_recal = CRAM_MERGE_INDEX_SAMTOOLS.out.cram_crai + // Remove no longer necessary field: num_intervals + .map{ meta, cram, crai -> [ meta - meta.subMap('num_intervals'), cram, crai ] } + + // Gather versions of all tools used + versions = versions.mix(GATK4_SPLITNCIGARREADS.out.versions) + versions = versions.mix(CRAM_MERGE_INDEX_SAMTOOLS.out.versions) + + + emit: + cram = cram_recal // channel: [ meta, cram, crai ] + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_variant_calling/main.nf b/subworkflows/local/bam_variant_calling/main.nf new file mode 100644 index 0000000..d681240 --- /dev/null +++ b/subworkflows/local/bam_variant_calling/main.nf @@ -0,0 +1,159 @@ +include { SAMTOOLS_CONVERT as SAMTOOLS_BAMTOCRAM_VARIANTCALLING } from '../../modules/nf-core/modules/samtools/convert/main' +include { PAIR_VARIANT_CALLING } from './pair_variant_calling' +include { VCF_QC } from '../nf-core/vcf_qc' +include { VARIANTCALLING_CSV } from './variantcalling_csv' + + +workflow VARIANT_CALLING { + + take: + tools + cram_variant_calling + fasta + fasta_fai + dict + germline_resource + germline_resource_tbi + intervals + intervals_bed_gz_tbi + intervals_bed_combined + pon + pon_tbi + input_sample + + main: + reports = Channel.empty() + versions = Channel.empty() + + if (params.step == 'variant_calling') { + + input_variant_calling_convert = input_sample.branch{ + bam: it[0].data_type == "bam" + cram: it[0].data_type == "cram" + } + + // BAM files first must be converted to CRAM files since from this step on we base everything on CRAM format + BAM_TO_CRAM(input_variant_calling_convert.bam, fasta, fasta_fai) + versions = versions.mix(BAM_TO_CRAM.out.versions) + + cram_variant_calling = Channel.empty().mix(BAM_TO_CRAM.out.alignment_index, input_variant_calling_convert.cram) + + } + + if (params.tools) { + if (params.step == 'annotate') cram_variant_calling = Channel.empty() + + // + // Logic to separate germline samples, tumor samples with no matched normal, and combine tumor-normal pairs + // + cram_variant_calling_status = cram_variant_calling.branch{ + normal: it[0].status == 0 + tumor: it[0].status == 1 + rna: it[0].status == 2 + } + + // All Germline samples + cram_variant_calling_normal_to_cross = cram_variant_calling_status.normal.map{ meta, cram, crai -> [ meta.patient, meta, cram, crai ] } + + // All tumor samples + cram_variant_calling_pair_to_cross = cram_variant_calling_status.tumor.map{ meta, cram, crai -> [ meta.patient, meta, cram, crai ] } + + // Tumor only samples + // 1. Group together all tumor samples by patient ID [ patient1, [ meta1, meta2 ], [ cram1, crai1, cram2, crai2 ] ] + + // Downside: this only works by waiting for all tumor samples to finish preprocessing, since no group size is provided + cram_variant_calling_tumor_grouped = cram_variant_calling_pair_to_cross.groupTuple() + + // 2. Join with normal samples, in each channel there is one key per patient now. Patients without matched normal end up with: [ patient1, [ meta1, meta2 ], [ cram1, crai1, cram2, crai2 ], null ] + cram_variant_calling_tumor_joined = cram_variant_calling_tumor_grouped.join(cram_variant_calling_normal_to_cross, failOnDuplicate: true, remainder: true) + + // 3. Filter out entries with last entry null + cram_variant_calling_tumor_filtered = cram_variant_calling_tumor_joined.filter{ it -> !(it.last()) } + + // 4. Transpose [ patient1, [ meta1, meta2 ], [ cram1, crai1, cram2, crai2 ] ] back to [ patient1, meta1, [ cram1, crai1 ], null ] [ patient1, meta2, [ cram2, crai2 ], null ] + // and remove patient ID field & null value for further processing [ meta1, [ cram1, crai1 ] ] [ meta2, [ cram2, crai2 ] ] + cram_variant_calling_tumor_only = cram_variant_calling_tumor_filtered.transpose().map{ it -> [it[1], it[2], it[3]] } + + only_paired_variant_calling = true // for now only this supported + if (only_paired_variant_calling) { + // Normal only samples + + // 1. Join with tumor samples, in each channel there is one key per patient now. Patients without matched tumor end up with: [ patient1, [ meta1 ], [ cram1, crai1 ], null ] as there is only one matched normal possible + cram_variant_calling_normal_joined = cram_variant_calling_normal_to_cross.join(cram_variant_calling_tumor_grouped, failOnDuplicate: true, remainder: true) + + // 2. Filter out entries with last entry null + cram_variant_calling_normal_filtered = cram_variant_calling_normal_joined.filter{ it -> !(it.last()) } + + // 3. Remove patient ID field & null value for further processing [ meta1, [ cram1, crai1 ] ] [ meta2, [ cram2, crai2 ] ] (no transposing needed since only one normal per patient ID) + cram_variant_calling_status_normal = cram_variant_calling_normal_filtered.map{ it -> [it[1], it[2], it[3]] } + } else { + cram_variant_calling_status_normal = cram_variant_calling_status.normal + } + + // Tumor - normal pairs + // Use cross to combine normal with all tumor samples, i.e. multi tumor samples from recurrences + cram_variant_calling_pair = cram_variant_calling_normal_to_cross.cross(cram_variant_calling_pair_to_cross) + .map { normal, tumor -> + def meta = [:] + + meta.id = "${tumor[1].sample}_vs_${normal[1].sample}".toString() + meta.normal_id = normal[1].sample + meta.patient = normal[0] + meta.status = tumor[1].status + meta.tumor_id = tumor[1].sample + + [ meta, normal[2], normal[3], tumor[2], tumor[3] ] + } + + + // PAIR VARIANT CALLING + BAM_VARIANT_CALLING_SOMATIC( + tools, + cram_variant_calling_pair, + dict, + fasta, + fasta_fai, + germline_resource, + germline_resource_tbi, + intervals, + intervals_bed_gz_tbi, + intervals_bed_combined, + pon, + pon_tbi + ) + + // POST VARIANTCALLING + POST_VARIANTCALLING(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_all, + params.concatenate_vcfs) + + // Gather vcf files for annotation and QC + vcf_to_normalize = Channel.empty() + vcf_to_normalize = vcf_to_normalize.mix(BAM_VARIANT_CALLING_SOMATIC_ALL.out.vcf_all) + + // QC + VCF_QC_BCFTOOLS_VCFTOOLS(vcf_to_normalize, intervals_bed_combined) + + reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.bcftools_stats.collect{ meta, stats -> stats }) + reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_tstv_counts.collect{ meta, counts -> counts }) + reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_tstv_qual.collect{ meta, qual -> qual }) + reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_filter_summary.collect{ meta, summary -> summary }) + + CHANNEL_VARIANT_CALLING_CREATE_CSV(vcf_to_annotate) + + // Gather used variant calling softwares versions + versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_ALL.out.versions) + versions = versions.mix(POST_VARIANTCALLING.out.versions) + versions = versions.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.versions) + } + + + emit: + cram_vc_pair = ch_cram_variant_calling_pair + vcf = vcf_to_normalize + contamination_table = PAIR_VARIANT_CALLING.out.contamination_table + segmentation_table = PAIR_VARIANT_CALLING.out.segmentation_table + artifact_priors = PAIR_VARIANT_CALLING.out.artifact_priors + reports = reports + versions = versions + +} \ No newline at end of file diff --git a/subworkflows/local/channel_applybqsr_create_csv/main.nf b/subworkflows/local/channel_applybqsr_create_csv/main.nf new file mode 100644 index 0000000..336a90f --- /dev/null +++ b/subworkflows/local/channel_applybqsr_create_csv/main.nf @@ -0,0 +1,23 @@ +// +// CHANNEL_APPLYBQSR_CREATE_CSV +// + +workflow CHANNEL_APPLYBQSR_CREATE_CSV { + take: + cram_recalibrated_index // channel: [mandatory] meta, cram, crai + + main: + // Creating csv files to restart from this step + cram_recalibrated_index.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/csv") { meta, file, index -> + patient = meta.patient + sample = meta.sample + status = meta.status + file = "${params.outdir}/preprocessing/recalibrated/${sample}/${file.name}" + index = "${params.outdir}/preprocessing/recalibrated/${sample}/${index.name}" + + type = params.save_output_as_bam ? "bam" : "cram" + type_index = params.save_output_as_bam ? "bai" : "crai" + + ["recalibrated.csv", "patient,status,sample,${type},${type_index}\n${patient},${status},${sample},${file},${index}\n"] + } +} \ No newline at end of file diff --git a/subworkflows/local/channel_baserecalibrator_create_csv/main.nf b/subworkflows/local/channel_baserecalibrator_create_csv/main.nf new file mode 100644 index 0000000..9f1b48a --- /dev/null +++ b/subworkflows/local/channel_baserecalibrator_create_csv/main.nf @@ -0,0 +1,49 @@ +// +// CHANNEL_BASERECALIBRATOR_CREATE_CSV +// + +workflow CHANNEL_BASERECALIBRATOR_CREATE_CSV { + take: + cram_table_bqsr // channel: [mandatory] meta, cram, crai, table + tools + skip_tools + save_output_as_bam + outdir + + main: + // Creating csv files to restart from this step + if (!(skip_tools && (skip_tools.split(',').contains('markduplicates')))) { + cram_table_bqsr.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${outdir}/csv") { meta, cram, crai, table -> + + patient = meta.patient + sample = meta.sample + status = meta.status + suffix_aligned = save_output_as_bam ? "bam" : "cram" + suffix_index = save_output_as_bam ? "bam.bai" : "cram.crai" + cram = "${outdir}/preprocessing/markduplicates/${sample}/${cram.baseName}.${suffix_aligned}" + crai = "${outdir}/preprocessing/markduplicates/${sample}/${crai.baseName.minus(".cram")}.${suffix_index}" + table = "${outdir}/preprocessing/recal_table/${sample}/${sample}.recal.table" + + type = save_output_as_bam ? "bam" : "cram" + type_index = save_output_as_bam ? "bai" : "crai" + + ["markduplicates.csv", "patient,status,sample,${type},${type_index},table\n${patient},${status},${sample},${cram},${crai},${table}\n"] + } + } else { + cram_table_bqsr.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${outdir}/csv") { meta, cram, crai, table -> + patient = meta.patient + sample = meta.sample + status = meta.status + suffix_aligned = save_output_as_bam ? "bam" : "cram" + suffix_index = save_output_as_bam ? "bam.bai" : "cram.crai" + cram = "${outdir}/preprocessing/${sample}/mapped/${cram.baseName}.${suffix_aligned}" + crai = "${outdir}/preprocessing/${sample}/mapped/${crai.baseName.minus(".cram")}.${suffix_index}" + table = "${outdir}/preprocessing/${sample}/recal_table/${sample}.recal.table" + + type = save_output_as_bam ? "bam" : "cram" + type_index = save_output_as_bam ? "bai" : "crai" + + ["sorted.csv", "patient,status,sample,${type},${type_index},table\n${patient},${status},${sample},${cram},${crai},${table}\n"] + } + } +} \ No newline at end of file diff --git a/subworkflows/local/channel_markduplicates_create_csv/main.nf b/subworkflows/local/channel_markduplicates_create_csv/main.nf new file mode 100644 index 0000000..4f022ed --- /dev/null +++ b/subworkflows/local/channel_markduplicates_create_csv/main.nf @@ -0,0 +1,28 @@ +// +// CHANNEL_MARKDUPLICATES_CREATE_CSV +// + +workflow CHANNEL_MARKDUPLICATES_CREATE_CSV { + take: + cram_markduplicates // channel: [mandatory] meta, cram, crai + csv_subfolder + outdir + save_output_as_bam + + main: + // Creating csv files to restart from this step + cram_markduplicates.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${outdir}/csv") { meta, file, index -> + patient = meta.patient + sample = meta.sample + status = meta.status + suffix_aligned = save_output_as_bam ? "bam" : "cram" + suffix_index = save_output_as_bam ? "bam.bai" : "cram.crai" + file = "${outdir}/preprocessing/${csv_subfolder}/${sample}/${file.baseName}.${suffix_aligned}" + index = "${outdir}/preprocessing/${csv_subfolder}/${sample}/${index.baseName.minus(".cram")}.${suffix_index}" + + type = save_output_as_bam ? "bam" : "cram" + type_index = save_output_as_bam ? "bai" : "crai" + + ["markduplicates_no_table.csv", "patient,status,sample,${type},${type_index}\n${patient},${status},${sample},${file},${index}\n"] + } +} \ No newline at end of file diff --git a/subworkflows/local/channel_splitncigarreads_create_csv/main.nf b/subworkflows/local/channel_splitncigarreads_create_csv/main.nf new file mode 100644 index 0000000..3acb4fa --- /dev/null +++ b/subworkflows/local/channel_splitncigarreads_create_csv/main.nf @@ -0,0 +1,28 @@ +// +// CHANNEL_SPLITNCIGARREADS_CREATE_CSV +// + +workflow CHANNEL_SPLITNCIGARREADS_CREATE_CSV { + take: + cram_splitncigarreads // channel: [mandatory] meta, cram, crai + csv_subfolder + outdir + save_output_as_bam + + main: + // Creating csv files to restart from this step + cram_splitncigarreads.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${outdir}/csv") { meta, file, index -> + patient = meta.patient + sample = meta.sample + status = meta.status + suffix_aligned = save_output_as_bam ? "bam" : "cram" + suffix_index = save_output_as_bam ? "bam.bai" : "cram.crai" + file = "${outdir}/preprocessing/${csv_subfolder}/${sample}/${file.baseName}.${suffix_aligned}" + index = "${outdir}/preprocessing/${csv_subfolder}/${sample}/${index.baseName.minus(".cram")}.${suffix_index}" + + type = save_output_as_bam ? "bam" : "cram" + type_index = save_output_as_bam ? "bai" : "crai" + + ["splitncigarreads_no_table.csv", "patient,status,sample,${type},${type_index}\n${patient},${status},${sample},${file},${index}\n"] + } +} \ No newline at end of file diff --git a/subworkflows/local/cram_merge_index_samtools/main.nf b/subworkflows/local/cram_merge_index_samtools/main.nf new file mode 100644 index 0000000..87be834 --- /dev/null +++ b/subworkflows/local/cram_merge_index_samtools/main.nf @@ -0,0 +1,47 @@ +// +// MERGE INDEX CRAM +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { SAMTOOLS_INDEX as INDEX_CRAM } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_MERGE as MERGE_CRAM } from '../../../modules/nf-core/samtools/merge/main' + +workflow CRAM_MERGE_INDEX_SAMTOOLS { + take: + cram // channel: [mandatory] meta, cram + fasta // channel: [mandatory] fasta + fasta_fai // channel: [mandatory] fai for fasta + + main: + versions = Channel.empty() + + // Figuring out if there is one or more cram(s) from the same sample + cram_to_merge = cram.branch{ meta, cram -> + // cram is a list, so use cram.size() to asses number of intervals + single: cram.size() <= 1 + return [ meta, cram[0] ] + multiple: cram.size() > 1 + } + + // Only when using intervals + MERGE_CRAM(cram_to_merge.multiple, fasta.map{ it -> [ [ id:'fasta' ], it ] }, fasta_fai.map{ it -> [ [ id:'fasta_fai' ], it ] }) + + // Mix intervals and no_intervals channels together + cram_all = MERGE_CRAM.out.cram.mix(cram_to_merge.single) + + // Index cram + INDEX_CRAM(cram_all) + + // Join with the crai file + cram_crai = cram_all.join(INDEX_CRAM.out.crai, failOnDuplicate: true, failOnMismatch: true) + + // Gather versions of all tools used + versions = versions.mix(INDEX_CRAM.out.versions.first()) + versions = versions.mix(MERGE_CRAM.out.versions.first()) + + emit: + cram_crai + + versions +} \ No newline at end of file diff --git a/subworkflows/local/cram_qc_mosdepth_samtools/main.nf b/subworkflows/local/cram_qc_mosdepth_samtools/main.nf index 1a7f6db..0bd6093 100644 --- a/subworkflows/local/cram_qc_mosdepth_samtools/main.nf +++ b/subworkflows/local/cram_qc_mosdepth_samtools/main.nf @@ -4,8 +4,8 @@ // For all modules here: // A when clause condition is defined in the conf/modules.config to determine if the module should be run -include { SAMTOOLS_STATS } from '../../../modules/nf-core/modules/samtools/stats/main' -include { MOSDEPTH } from '../../../modules/nf-core/modules/mosdepth/main' +include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' +include { MOSDEPTH } from '../../../modules/nf-core/mosdepth/main' workflow CRAM_QC_MOSDEPTH_SAMTOOLS { take: diff --git a/subworkflows/local/prepare_reference_and_intervals.nf b/subworkflows/local/prepare_reference_and_intervals.nf deleted file mode 100644 index e659593..0000000 --- a/subworkflows/local/prepare_reference_and_intervals.nf +++ /dev/null @@ -1,105 +0,0 @@ -// -// PREPARE REFERENCE AND INTERVAL FILES FOR PIPELINE -// -include { PREPARE_GENOME } from './prepare_genome/main' -include { PREPARE_INTERVALS } from './prepare_intervals/main' -include { GATK4_BEDTOINTERVALLIST } from '../../modules/nf-core/gatk4/bedtointervallist/main' - - -workflow PREPARE_REFERENCE_AND_INTERVALS { - - main: - ch_versions = Channel.empty() - - // Initialize file channels based on params, defined in the params.genomes[params.genome] scope - dbsnp = params.dbsnp ? Channel.fromPath(params.dbsnp).collect() : Channel.value([]) - known_snps = params.known_snps ? Channel.fromPath(params.known_snps).collect() : Channel.value([]) - fasta = params.fasta ? Channel.fromPath(params.fasta).collect() : Channel.empty() - fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : Channel.empty() - germline_resource = params.germline_resource ? Channel.fromPath(params.germline_resource).collect() : Channel.value([]) //Mutec2 does not require a germline resource, so set to optional input - known_indels = params.known_indels ? Channel.fromPath(params.known_indels).collect() : Channel.value([]) - known_snps = params.known_snps ? Channel.fromPath(params.known_snps).collect() : Channel.value([]) - pon = params.pon ? Channel.fromPath(params.pon).collect() : Channel.value([]) //PON is optional for Mutect2 (but highly recommended) - whitelist = params.whitelist ? Channel.fromPath(params.whitelist).collect() : Channel.value([]) - - // STEP 0.A: Build indices if needed - PREPARE_GENOME( - dbsnp, - fasta, - fasta_fai, - germline_resource, - known_indels, - known_snps, - pon) - ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) - - // Gather built indices or get them from the params - bwa = params.fasta ? params.bwa ? Channel.fromPath(params.bwa).collect() : PREPARE_GENOME.out.bwa : [] - bwamem2 = params.fasta ? params.bwamem2 ? Channel.fromPath(params.bwamem2).collect() : PREPARE_GENOME.out.bwamem2 : [] - dragmap = params.fasta ? params.dragmap ? Channel.fromPath(params.dragmap).collect() : PREPARE_GENOME.out.hashtable : [] - hisat2_index = params.fasta ? params.hisat2_index ? Channel.fromPath(params.hisat2_index).collect() : PREPARE_GENOME.out.hisat2_index : [] - splicesites = params.fasta ? params.splicesites ? Channel.fromPath(params.splicesites).collect() : PREPARE_GENOME.out.splicesites : [] - dict = params.fasta ? params.dict ? Channel.fromPath(params.dict).collect() : PREPARE_GENOME.out.dict : [] - fasta_fai = params.fasta ? params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : PREPARE_GENOME.out.fasta_fai : [] - dbsnp_tbi = params.dbsnp ? params.dbsnp_tbi ? Channel.fromPath(params.dbsnp_tbi).collect() : PREPARE_GENOME.out.dbsnp_tbi : Channel.value([]) - germline_resource_tbi = params.germline_resource ? params.germline_resource_tbi ? Channel.fromPath(params.germline_resource_tbi).collect() : PREPARE_GENOME.out.germline_resource_tbi : [] - known_indels_tbi = params.known_indels ? params.known_indels_tbi ? Channel.fromPath(params.known_indels_tbi).collect() : PREPARE_GENOME.out.known_indels_tbi : Channel.value([]) - known_snps_tbi = params.known_snps ? params.known_snps_tbi ? Channel.fromPath(params.known_snps_tbi).collect() : PREPARE_GENOME.out.known_snps_tbi : Channel.value([]) - pon_tbi = params.pon ? params.pon_tbi ? Channel.fromPath(params.pon_tbi).collect() : PREPARE_GENOME.out.pon_tbi : [] - // known_sites is made by grouping both the dbsnp and the known snps/indels resources - // Which can either or both be optional - known_sites_indels = dbsnp.concat(known_indels).collect() - known_sites_indels_tbi = dbsnp_tbi.concat(known_indels_tbi).collect() - - known_sites_snps = dbsnp.concat(known_snps).collect() - known_sites_snps_tbi = dbsnp_tbi.concat(known_snps_tbi).collect() - - // STEP 0.B: Build intervals if needed - PREPARE_INTERVALS(fasta_fai, params.intervals, params.no_intervals) - ch_versions = ch_versions.mix(PREPARE_INTERVALS.out.versions) - - // Intervals for speed up preprocessing/variant calling by spread/gather - intervals_bed_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_combined // [interval.bed] all intervals in one file - intervals_for_preprocessing = params.wes ? intervals_bed_combined : [] // For QC during preprocessing, we don't need any intervals (MOSDEPTH doesn't take them for WGS) - intervals = PREPARE_INTERVALS.out.intervals_bed // [interval, num_intervals] multiple interval.bed files, divided by useful intervals for scatter/gather - intervals_bed_gz_tbi = PREPARE_INTERVALS.out.intervals_bed_gz_tbi // [interval_bed, tbi, num_intervals] multiple interval.bed.gz/.tbi files, divided by useful intervals for scatter/gather - - - // STEP 0.C: Prepare the interval list from the GTF file using GATK4 BedToIntervalList - ch_genome_bed = Channel.from([id:'genome.bed']).combine(PREPARE_GENOME.out.exon_bed) - ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) - ch_interval_list = Channel.empty() - GATK4_BEDTOINTERVALLIST( - ch_genome_bed, - dict - ) - ch_interval_list = GATK4_BEDTOINTERVALLIST.out.interval_list - ch_versions = ch_versions.mix(GATK4_BEDTOINTERVALLIST.out.versions) - - emit: - fasta = fasta - fasta_fai = fasta_fai - dict = dict - bwa = bwa - germline_resource = germline_resource - germline_resource_tbi = germline_resource_tbi - bwamem2 = bwamem2 - dragmap = dragmap - star_index = PREPARE_GENOME.out.star_index - gtf = PREPARE_GENOME.out.gtf - ch_interval_list = ch_interval_list - intervals = intervals - intervals_bed_gz_tbi = intervals_bed_gz_tbi - intervals_for_preprocessing = intervals_for_preprocessing - intervals_bed_combined = intervals_bed_combined - dbsnp = dbsnp - dbsnp_tbi = dbsnp_tbi - pon = pon - pon_tbi = pon_tbi - germline_resource = germline_resource - germline_resource_tbi = germline_resource_tbi - hisat2_index = hisat2_index - splicesites = splicesites - versions = ch_versions // channel: [ versions.yml ] - -} \ No newline at end of file diff --git a/subworkflows/local/prepare_reference_and_intervals/main.nf b/subworkflows/local/prepare_reference_and_intervals/main.nf new file mode 100644 index 0000000..2242621 --- /dev/null +++ b/subworkflows/local/prepare_reference_and_intervals/main.nf @@ -0,0 +1,120 @@ +// +// PREPARE REFERENCE AND INTERVAL FILES FOR PIPELINE +// +include { PREPARE_GENOME } from './../prepare_genome/main' +include { PREPARE_INTERVALS } from './../prepare_intervals/main' +include { GATK4_BEDTOINTERVALLIST } from '../../../modules/nf-core/gatk4/bedtointervallist/main' + + +workflow PREPARE_REFERENCE_AND_INTERVALS { + + main: + versions = Channel.empty() + + // Initialize file channels based on params, defined in the params.genomes[params.genome] scope + dbsnp = params.dbsnp ? Channel.fromPath(params.dbsnp).collect() : Channel.value([]) + known_snps = params.known_snps ? Channel.fromPath(params.known_snps).collect() : Channel.value([]) + fasta = params.fasta ? Channel.fromPath(params.fasta).collect() : Channel.empty() + fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : Channel.empty() + germline_resource = params.germline_resource ? Channel.fromPath(params.germline_resource).collect() : Channel.value([]) //Mutec2 does not require a germline resource, so set to optional input + known_indels = params.known_indels ? Channel.fromPath(params.known_indels).collect() : Channel.value([]) + known_snps = params.known_snps ? Channel.fromPath(params.known_snps).collect() : Channel.value([]) + pon = params.pon ? Channel.fromPath(params.pon).collect() : Channel.value([]) //PON is optional for Mutect2 (but highly recommended) + whitelist = params.whitelist ? Channel.fromPath(params.whitelist).collect() : Channel.value([]) + + // STEP 0.A: Build indices if needed + PREPARE_GENOME( + dbsnp, + fasta, + fasta_fai, + germline_resource, + known_indels, + known_snps, + pon) + versions = versions.mix(PREPARE_GENOME.out.versions) + + // Gather built indices or get them from the params + bwa = params.fasta ? params.bwa ? Channel.fromPath(params.bwa).collect() : PREPARE_GENOME.out.bwa : [] + bwamem2 = params.fasta ? params.bwamem2 ? Channel.fromPath(params.bwamem2).collect() : PREPARE_GENOME.out.bwamem2 : [] + dragmap = params.fasta ? params.dragmap ? Channel.fromPath(params.dragmap).collect() : PREPARE_GENOME.out.hashtable : [] + hisat2_index = params.fasta ? params.hisat2_index ? Channel.fromPath(params.hisat2_index).collect() : PREPARE_GENOME.out.hisat2_index : [] + splicesites = params.fasta ? params.splicesites ? Channel.fromPath(params.splicesites).collect() : PREPARE_GENOME.out.splicesites : [] + dict = params.fasta ? params.dict ? Channel.fromPath(params.dict).collect() : PREPARE_GENOME.out.dict : [] + fasta_fai = params.fasta ? params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : PREPARE_GENOME.out.fasta_fai : [] + dbsnp_tbi = params.dbsnp ? params.dbsnp_tbi ? Channel.fromPath(params.dbsnp_tbi).collect() : PREPARE_GENOME.out.dbsnp_tbi : Channel.value([]) + germline_resource_tbi = params.germline_resource ? params.germline_resource_tbi ? Channel.fromPath(params.germline_resource_tbi).collect() : PREPARE_GENOME.out.germline_resource_tbi : [] + known_indels_tbi = params.known_indels ? params.known_indels_tbi ? Channel.fromPath(params.known_indels_tbi).collect() : PREPARE_GENOME.out.known_indels_tbi : Channel.value([]) + known_snps_tbi = params.known_snps ? params.known_snps_tbi ? Channel.fromPath(params.known_snps_tbi).collect() : PREPARE_GENOME.out.known_snps_tbi : Channel.value([]) + pon_tbi = params.pon ? params.pon_tbi ? Channel.fromPath(params.pon_tbi).collect() : PREPARE_GENOME.out.pon_tbi : [] + // known_sites is made by grouping both the dbsnp and the known snps/indels resources + // Which can either or both be optional + known_sites_indels = dbsnp.concat(known_indels).collect() + known_sites_indels_tbi = dbsnp_tbi.concat(known_indels_tbi).collect() + + known_sites_snps = dbsnp.concat(known_snps).collect() + known_sites_snps_tbi = dbsnp_tbi.concat(known_snps_tbi).collect() + +// STEP 0.B: Build intervals if needed + PREPARE_INTERVALS(fasta_fai, params.intervals, params.no_intervals) + versions = versions.mix(PREPARE_INTERVALS.out.versions) + + // Intervals for speed up preprocessing/variant calling by spread/gather + intervals_bed_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_combined + // For QC during preprocessing, we don't need any intervals (MOSDEPTH doesn't take them for WGS) + intervals_for_preprocessing = params.wes ? + intervals_bed_combined.map{it -> [ [ id:it.baseName ], it ]}.collect() : + Channel.value([ [ id:'null' ], [] ]) + intervals = PREPARE_INTERVALS.out.intervals_bed // [interval, num_intervals] multiple interval.bed files, divided by useful intervals for scatter/gather + intervals_bed_gz_tbi = PREPARE_INTERVALS.out.intervals_bed_gz_tbi // [interval_bed, tbi, num_intervals] multiple interval.bed.gz/.tbi files, divided by useful intervals for scatter/gather + + intervals_and_num_intervals = intervals.map{ interval, num_intervals -> + if ( num_intervals < 1 ) [ [], num_intervals ] + else [ interval, num_intervals ] + } + + intervals_bed_gz_tbi_and_num_intervals = intervals_bed_gz_tbi.map{ intervals, num_intervals -> + if ( num_intervals < 1 ) [ [], [], num_intervals ] + else [ intervals[0], intervals[1], num_intervals ] + } + + // STEP 0.C: Prepare the interval list from the GTF file using GATK4 BedToIntervalList + ch_genome_bed = Channel.from([id:'genome.bed']).combine(PREPARE_GENOME.out.exon_bed) + versions = versions.mix(PREPARE_GENOME.out.versions) + ch_interval_list = Channel.empty() + GATK4_BEDTOINTERVALLIST( + ch_genome_bed, + dict + ) + ch_interval_list = GATK4_BEDTOINTERVALLIST.out.interval_list + versions = versions.mix(GATK4_BEDTOINTERVALLIST.out.versions) + + emit: + fasta = fasta + fasta_fai = fasta_fai + dict = dict + bwa = bwa + germline_resource = germline_resource + germline_resource_tbi = germline_resource_tbi + bwamem2 = bwamem2 + dragmap = dragmap + star_index = PREPARE_GENOME.out.star_index + gtf = PREPARE_GENOME.out.gtf + ch_interval_list = ch_interval_list + intervals = intervals + intervals_bed_gz_tbi = intervals_bed_gz_tbi + intervals_for_preprocessing = intervals_for_preprocessing + intervals_bed_combined = intervals_bed_combined + dbsnp = dbsnp + dbsnp_tbi = dbsnp_tbi + pon = pon + pon_tbi = pon_tbi + germline_resource = germline_resource + germline_resource_tbi = germline_resource_tbi + hisat2_index = hisat2_index + splicesites = splicesites + known_sites_indels = known_sites_indels + known_sites_indels_tbi = known_sites_indels_tbi + known_sites_snps = known_sites_snps + known_sites_snps_tbi = known_sites_snps_tbi + versions = versions // channel: [ versions.yml ] +} \ No newline at end of file diff --git a/subworkflows/local/variant_calling.nf b/subworkflows/local/variant_calling.nf deleted file mode 100644 index 3544adf..0000000 --- a/subworkflows/local/variant_calling.nf +++ /dev/null @@ -1,127 +0,0 @@ -include { SAMTOOLS_CONVERT as SAMTOOLS_BAMTOCRAM_VARIANTCALLING } from '../../modules/nf-core/modules/samtools/convert/main' -include { PAIR_VARIANT_CALLING } from './pair_variant_calling' -include { VCF_QC } from '../nf-core/vcf_qc' -include { VARIANTCALLING_CSV } from './variantcalling_csv' - - -workflow VARIANT_CALLING { - - take: - tools - ch_cram_variant_calling - fasta - fasta_fai - dbsnp - dbsnp_tbi - dict - germline_resource - germline_resource_tbi - intervals - intervals_bed_gz_tbi - intervals_bed_combined - pon - pon_tbi - ch_input_sample - - main: - ch_reports = Channel.empty() - ch_versions = Channel.empty() - - // get input for variant calling - if (params.step == 'variant_calling') { - // if input is a BAM file for variant calling we need to convert it to CRAM - ch_input_sample.branch{ - bam: it[0].data_type == "bam" - cram: it[0].data_type == "cram" - }.set{ch_convert} - //BAM files first must be converted to CRAM files since from this step on we base everything on CRAM format - SAMTOOLS_BAMTOCRAM_VARIANTCALLING(ch_convert.bam, fasta, fasta_fai) - ch_versions = ch_versions.mix(SAMTOOLS_BAMTOCRAM_VARIANTCALLING.out.versions) - ch_cram_variant_calling = Channel.empty().mix(SAMTOOLS_BAMTOCRAM_VARIANTCALLING.out.alignment_index, ch_convert.cram) - } - - if (params.step == 'annotate') { - // no variant calling will be performed - ch_cram_variant_calling = Channel.empty() - } - // Logic to separate germline samples, tumor samples with no matched normal, and combine tumor-normal pairs - ch_cram_variant_calling.branch{ - normal: it[0].status == 0 - tumor: it[0].status >= 1 - }.set{ch_cram_variant_calling_status} - - // All Germline samples -- will be the same for DNA and RNA - ch_cram_variant_calling_normal_to_cross = ch_cram_variant_calling_status.normal.map{ meta, cram, crai -> [meta.patient, meta, cram, crai] } -// ch_cram_variant_calling_normal_to_cross.dump(tag: "[STEP3_VARIANTCALLING] normals to cross") - // All tumor samples - ch_cram_variant_calling_tumor_pair_to_cross = ch_cram_variant_calling_status.tumor.map{ meta, cram, crai -> [meta.patient, meta, cram, crai] } -// ch_cram_variant_calling_tumor_pair_to_cross.dump(tag: "[STEP3_VARIANTCALLING] tumors to cross") - - // Tumor - normal pairs - // Use cross to combine normal with all tumor samples, i.e. multi tumor samples from recurrences - ch_cram_variant_calling_pair = ch_cram_variant_calling_normal_to_cross.cross(ch_cram_variant_calling_tumor_pair_to_cross) - .map { normal, tumor -> - def meta = [:] - meta.patient = normal[0] - meta.normal_id = normal[1].sample - meta.tumor_id = tumor[1].sample - meta.status = tumor[1].status - meta.id = "${meta.tumor_id}_vs_${meta.normal_id}".toString() - meta.alleles = null - [meta, normal[2], normal[3], tumor[2], tumor[3]] - } -// ch_cram_variant_calling_pair.dump(tag:"[STEP3_VARIANTCALLING] variant_calling_pairs") - PAIR_VARIANT_CALLING( - params.tools, - ch_cram_variant_calling_pair, - dbsnp, - dbsnp_tbi, - dict, - fasta, - fasta_fai, - germline_resource, - germline_resource_tbi, - intervals, - intervals_bed_gz_tbi, - intervals_bed_combined, - pon, - pon_tbi, - params.highconfidence, - params.actionablepanel, - params.knownhot, - params.ensbl_sage, - params.skip_tools - ) - ch_versions = ch_versions.mix(PAIR_VARIANT_CALLING.out.versions) - - // Gather vcf files for annotation and QC - vcf_to_normalize = Channel.empty() - vcf_to_normalize = vcf_to_normalize.mix(PAIR_VARIANT_CALLING.out.strelka_vcf) - vcf_to_normalize = vcf_to_normalize.mix(PAIR_VARIANT_CALLING.out.mutect2_vcf) - vcf_to_normalize = vcf_to_normalize.mix(PAIR_VARIANT_CALLING.out.freebayes_vcf) - vcf_to_normalize = vcf_to_normalize.mix(PAIR_VARIANT_CALLING.out.sage_vcf) - // ch_cram_variant_calling_pair.dump(tag:"[STEP3_VARIANTCALLING] all_vcfs") - - //QC - if (tools.split(',').contains('vcf_qc')) { - VCF_QC(vcf_to_normalize, intervals_bed_combined) - VARIANTCALLING_CSV(vcf_to_normalize) - - ch_versions = ch_versions.mix(VCF_QC.out.versions) - ch_reports = ch_reports.mix(VCF_QC.out.bcftools_stats.collect{meta, stats -> stats}) - ch_reports = ch_reports.mix(VCF_QC.out.vcftools_tstv_counts.collect{ meta, counts -> counts}) - ch_reports = ch_reports.mix(VCF_QC.out.vcftools_tstv_qual.collect{ meta, qual -> qual }) - ch_reports = ch_reports.mix(VCF_QC.out.vcftools_filter_summary.collect{meta, summary -> summary}) - } - - - emit: - cram_vc_pair = ch_cram_variant_calling_pair - vcf = vcf_to_normalize - contamination_table = PAIR_VARIANT_CALLING.out.contamination_table - segmentation_table = PAIR_VARIANT_CALLING.out.segmentation_table - artifact_priors = PAIR_VARIANT_CALLING.out.artifact_priors - reports = ch_reports - versions = ch_versions - -} \ No newline at end of file diff --git a/subworkflows/nf-core/splitncigar.nf b/subworkflows/nf-core/splitncigar.nf deleted file mode 100644 index 66f4468..0000000 --- a/subworkflows/nf-core/splitncigar.nf +++ /dev/null @@ -1,43 +0,0 @@ -// -// Subworkflow: Run GATK4 SplitNCigarReads without intervals, merge and index BAM file. -// -include { GATK4_SPLITNCIGARREADS } from '../../modules/nf-core/modules/gatk4/splitncigarreads/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/modules/samtools/index/main' - -workflow SPLITNCIGAR { - take: - bam // channel: [ val(meta), [ bam ], [bai] ] - fasta // channel: [ fasta ] - fasta_fai // channel: [ fai ] - fasta_dict // channel: [ dict ] - intervals // channel: [ interval_list] - - main: - - ch_versions = Channel.empty() - bam = bam.map{meta, bam, bai -> [meta, bam, bai, []]} - GATK4_SPLITNCIGARREADS ( - bam, - fasta, - fasta_fai, - fasta_dict - ) - bam_splitncigar = GATK4_SPLITNCIGARREADS.out.bam - ch_versions = ch_versions.mix(GATK4_SPLITNCIGARREADS.out.versions.first()) - - SAMTOOLS_INDEX ( - bam_splitncigar - ) - splitncigar_bam_bai = bam_splitncigar - .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) - .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) - .map{meta, bam, bai, csi -> - if (bai) [meta, bam, bai] - else [meta, bam, csi] - } - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) - - emit: - bam_bai = splitncigar_bam_bai - versions = ch_versions -} From fac07867f64110df60c185d90431c012d15a2bcb Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Sun, 27 Aug 2023 18:15:56 +0100 Subject: [PATCH 16/56] Added a patch version of samtools stats to be consistent with names. --- .../samtools/stats/samtools-stats.diff | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 modules/nf-core/samtools/stats/samtools-stats.diff diff --git a/modules/nf-core/samtools/stats/samtools-stats.diff b/modules/nf-core/samtools/stats/samtools-stats.diff new file mode 100644 index 0000000..50015e3 --- /dev/null +++ b/modules/nf-core/samtools/stats/samtools-stats.diff @@ -0,0 +1,23 @@ +Changes in module 'nf-core/samtools/stats' +--- modules/nf-core/samtools/stats/main.nf ++++ modules/nf-core/samtools/stats/main.nf +@@ -8,7 +8,7 @@ + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: +- tuple val(meta), path(input), path(input_index) ++ tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(fasta) + + output: +@@ -27,7 +27,7 @@ + stats \\ + --threads ${task.cpus} \\ + ${reference} \\ +- ${input} \\ ++ ${bam} \\ + > ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + +************************************************************ From 948087ca88bb78f8028d05ad6fa7cec1101a9d75 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Sun, 27 Aug 2023 18:18:13 +0100 Subject: [PATCH 17/56] Added a patch version of splitncigarreads to make it compatible with CRAM outputs. --- .../gatk4-splitncigarreads.diff | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 modules/nf-core/gatk4/splitncigarreads/gatk4-splitncigarreads.diff diff --git a/modules/nf-core/gatk4/splitncigarreads/gatk4-splitncigarreads.diff b/modules/nf-core/gatk4/splitncigarreads/gatk4-splitncigarreads.diff new file mode 100644 index 0000000..1a2499b --- /dev/null +++ b/modules/nf-core/gatk4/splitncigarreads/gatk4-splitncigarreads.diff @@ -0,0 +1,71 @@ +Changes in module 'nf-core/gatk4/splitncigarreads' +--- modules/nf-core/gatk4/splitncigarreads/main.nf ++++ modules/nf-core/gatk4/splitncigarreads/main.nf +@@ -4,17 +4,20 @@ + + conda "bioconda::gatk4=4.4.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? +- 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': +- 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" ++ 'https://depot.galaxyproject.org/singularity/mulled-v2-d9e7bad0f7fbc8f4458d5c3ab7ffaaf0235b59fb:f857e2d6cc88d35580d01cf39e0959a68b83c1d9-0': ++ 'biocontainers/mulled-v2-d9e7bad0f7fbc8f4458d5c3ab7ffaaf0235b59fb:f857e2d6cc88d35580d01cf39e0959a68b83c1d9-0' }" + + input: +- tuple val(meta), path(bam), path(bai), path(intervals) ++ tuple val(meta), path(input), path(input_index), path(intervals) + path fasta + path fai + path dict + + output: +- tuple val(meta), path('*.bam'), emit: bam ++ tuple val(meta), path("*cram"), emit: cram, optional: true ++ tuple val(meta), path("*bam"), emit: bam, optional: true ++ tuple val(meta), path("*.crai"), emit: crai, optional: true ++ tuple val(meta), path("*.bai"), emit: bai, optional: true + path "versions.yml" , emit: versions + + when: +@@ -22,7 +25,11 @@ + + script: + def args = task.ext.args ?: '' +- def prefix = task.ext.prefix ?: "${meta.id}" ++ ++ prefix = task.ext.prefix ?: "${meta.id}.bam" ++ // If the extension is CRAM, then change it to BAM ++ prefix_bam = prefix.tokenize('.')[-1] == 'cram' ? "${prefix.substring(0, prefix.lastIndexOf('.'))}.bam" : prefix ++ + def interval_command = intervals ? "--intervals $intervals" : "" + + def avail_mem = 3072 +@@ -33,16 +40,25 @@ + } + """ + gatk --java-options "-Xmx${avail_mem}M" SplitNCigarReads \\ +- --input $bam \\ +- --output ${prefix}.bam \\ ++ --input $input \\ ++ --output ${prefix_bam} \\ + --reference $fasta \\ + $interval_command \\ + --tmp-dir . \\ + $args + ++ # If cram files are wished as output, the run samtools for conversion ++ if [[ ${prefix} == *.cram ]]; then ++ samtools view -Ch -T ${fasta} -o ${prefix} ${prefix_bam} ++ rm ${prefix_bam} ++ samtools index ${prefix} ++ fi ++ ++ + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') ++ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + } + +************************************************************ From 40c442f02edece58961cb902fde46babbc21c2d8 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Tue, 29 Aug 2023 11:50:26 +0100 Subject: [PATCH 18/56] Added variant_calling subworkflows from sarek --- .../bam_variant_calling_freebayes/main.nf | 60 +++++ .../bam_variant_calling_somatic_manta/main.nf | 47 ++++ .../main.nf | 212 ++++++++++++++++++ .../meta.yml | 127 +++++++++++ .../main.nf | 63 ++++++ .../local/vcf_qc_bcftools_vcftools/main.nf | 30 +++ .../nf-core/variantcalling/freebayes/main.nf | 88 -------- .../variantcalling/manta/somatic/main.nf | 191 ---------------- .../variantcalling/manta/tumoronly/main.nf | 110 --------- .../nf-core/variantcalling/sage/main.nf | 96 -------- .../variantcalling/strelka/single/main.nf | 86 ------- .../variantcalling/strelka/somatic/main.nf | 100 --------- 12 files changed, 539 insertions(+), 671 deletions(-) create mode 100644 subworkflows/local/bam_variant_calling_freebayes/main.nf create mode 100644 subworkflows/local/bam_variant_calling_somatic_manta/main.nf create mode 100644 subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf create mode 100644 subworkflows/local/bam_variant_calling_somatic_mutect2/meta.yml create mode 100644 subworkflows/local/bam_variant_calling_somatic_strelka/main.nf create mode 100644 subworkflows/local/vcf_qc_bcftools_vcftools/main.nf delete mode 100644 subworkflows/nf-core/variantcalling/freebayes/main.nf delete mode 100644 subworkflows/nf-core/variantcalling/manta/somatic/main.nf delete mode 100644 subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf delete mode 100644 subworkflows/nf-core/variantcalling/sage/main.nf delete mode 100644 subworkflows/nf-core/variantcalling/strelka/single/main.nf delete mode 100644 subworkflows/nf-core/variantcalling/strelka/somatic/main.nf diff --git a/subworkflows/local/bam_variant_calling_freebayes/main.nf b/subworkflows/local/bam_variant_calling_freebayes/main.nf new file mode 100644 index 0000000..7a58760 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_freebayes/main.nf @@ -0,0 +1,60 @@ +// +// FREEBAYES variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { BCFTOOLS_SORT } from '../../../modules/nf-core/bcftools/sort/main' +include { FREEBAYES } from '../../../modules/nf-core/freebayes/main' +include { GATK4_MERGEVCFS as MERGE_FREEBAYES } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../../modules/nf-core/tabix/tabix/main' + +workflow BAM_VARIANT_CALLING_FREEBAYES { + take: + cram // channel: [mandatory] [ meta, cram1, crai1, cram2, crai2 ] or [ meta, cram, crai, [], [] ] + dict // channel: [mandatory] [ meta, dict ] + fasta // channel: [mandatory] [ fasta ] + fasta_fai // channel: [mandatory] [ fasta_fai ] + intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals + + main: + versions = Channel.empty() + + // Combine cram and intervals for spread and gather strategy + cram_intervals = cram.combine(intervals) + // Move num_intervals to meta map and reorganize channel for FREEBAYES module + .map{ meta, cram1, crai1, cram2, crai2, intervals, num_intervals -> [ meta + [ num_intervals:num_intervals ], cram1, crai1, cram2, crai2, intervals ]} + + FREEBAYES(cram_intervals, fasta, fasta_fai, [], [], []) + + BCFTOOLS_SORT(FREEBAYES.out.vcf) + + // Figuring out if there is one or more vcf(s) from the same sample + bcftools_vcf_out = BCFTOOLS_SORT.out.vcf.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Only when using intervals + vcf_to_merge = bcftools_vcf_out.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() + MERGE_FREEBAYES(vcf_to_merge, dict) + + // Only when no_intervals + TABIX_VC_FREEBAYES(bcftools_vcf_out.no_intervals) + + // Mix intervals and no_intervals channels together + vcf = MERGE_FREEBAYES.out.vcf.mix(bcftools_vcf_out.no_intervals) + // add variantcaller to meta map and remove no longer necessary field: num_intervals + .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'freebayes' ], vcf ] } + + versions = versions.mix(BCFTOOLS_SORT.out.versions) + versions = versions.mix(MERGE_FREEBAYES.out.versions) + versions = versions.mix(FREEBAYES.out.versions) + versions = versions.mix(TABIX_VC_FREEBAYES.out.versions) + + emit: + vcf + + versions +} \ No newline at end of file diff --git a/subworkflows/local/bam_variant_calling_somatic_manta/main.nf b/subworkflows/local/bam_variant_calling_somatic_manta/main.nf new file mode 100644 index 0000000..31e4155 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_somatic_manta/main.nf @@ -0,0 +1,47 @@ +// +// MANTA somatic variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { MANTA_SOMATIC } from '../../../modules/nf-core/manta/somatic/main' + +workflow BAM_VARIANT_CALLING_SOMATIC_MANTA { + take: + cram // channel: [mandatory] [ meta, cram1, crai1, cram2, crai2 ] + fasta // channel: [mandatory] [ fasta ] + fasta_fai // channel: [mandatory] [ fasta_fai ] + intervals // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi ] or [ [], [] ] if no intervals + + main: + versions = Channel.empty() + + // Combine cram and intervals, account for 0 intervals + cram_intervals = cram.combine(intervals).map{ it -> + bed_gz = it.size() > 5 ? it[5] : [] + bed_tbi = it.size() > 5 ? it[6] : [] + + [it[0], it[1], it[2], it[3], it[4], bed_gz, bed_tbi] + } + + MANTA_SOMATIC(cram_intervals, fasta, fasta_fai) + + candidate_small_indels_vcf = MANTA_SOMATIC.out.candidate_small_indels_vcf + candidate_small_indels_vcf_tbi = MANTA_SOMATIC.out.candidate_small_indels_vcf_tbi + candidate_sv_vcf = MANTA_SOMATIC.out.candidate_sv_vcf + diploid_sv_vcf = MANTA_SOMATIC.out.diploid_sv_vcf + somatic_sv_vcf = MANTA_SOMATIC.out.somatic_sv_vcf + + // Only diploid and somatic SV should get annotated + // add variantcaller to meta map + vcf = Channel.empty().mix(diploid_sv_vcf, somatic_sv_vcf).map{ meta, vcf -> [ meta + [ variantcaller:'manta' ], vcf ] } + + versions = versions.mix(MANTA_SOMATIC.out.versions) + + emit: + candidate_small_indels_vcf + candidate_small_indels_vcf_tbi + vcf + + versions +} \ No newline at end of file diff --git a/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf b/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf new file mode 100644 index 0000000..22b4f6c --- /dev/null +++ b/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf @@ -0,0 +1,212 @@ +// +// +// MUTECT2: tumor-normal mode variantcalling: getpileupsummaries, calculatecontamination, learnreadorientationmodel and filtermutectcalls +// + +include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../modules/nf-core/gatk4/calculatecontamination/main' +include { GATK4_FILTERMUTECTCALLS as FILTERMUTECTCALLS } from '../../../modules/nf-core/gatk4/filtermutectcalls/main' +include { GATK4_GATHERPILEUPSUMMARIES as GATHERPILEUPSUMMARIES_NORMAL } from '../../../modules/nf-core/gatk4/gatherpileupsummaries/main' +include { GATK4_GATHERPILEUPSUMMARIES as GATHERPILEUPSUMMARIES_TUMOR } from '../../../modules/nf-core/gatk4/gatherpileupsummaries/main' +include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES_NORMAL } from '../../../modules/nf-core/gatk4/getpileupsummaries/main' +include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES_TUMOR } from '../../../modules/nf-core/gatk4/getpileupsummaries/main' +include { GATK4_LEARNREADORIENTATIONMODEL as LEARNREADORIENTATIONMODEL } from '../../../modules/nf-core/gatk4/learnreadorientationmodel/main' +include { GATK4_MERGEMUTECTSTATS as MERGEMUTECTSTATS } from '../../../modules/nf-core/gatk4/mergemutectstats/main' +include { GATK4_MERGEVCFS as MERGE_MUTECT2 } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { GATK4_MUTECT2 as MUTECT2_PAIRED } from '../../../modules/nf-core/gatk4/mutect2/main' + +workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 { + take: + input // channel: [ meta, [ input ], [ input_index ] ] + fasta // channel: /path/to/reference/fasta + fai // channel: /path/to/reference/fasta/index + dict // channel: /path/to/reference/fasta/dictionary + germline_resource // channel: /path/to/germline/resource + germline_resource_tbi // channel: /path/to/germline/index + panel_of_normals // channel: /path/to/panel/of/normals + panel_of_normals_tbi // channel: /path/to/panel/of/normals/index + intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals + joint_mutect2 // boolean: [mandatory] [default: false] run mutect2 in joint mode + + main: + versions = Channel.empty() + + //If no germline resource is provided, then create an empty channel to avoid GetPileupsummaries from being run + germline_resource_pileup = germline_resource_tbi ? germline_resource : Channel.empty() + germline_resource_pileup_tbi = germline_resource_tbi ?: Channel.empty() + + // Combine input and intervals for spread and gather strategy + input_intervals = input.combine(intervals) + // Move num_intervals to meta map and reorganize channel for MUTECT2_PAIRED module + .map{ meta, input_list, input_index_list, intervals, num_intervals -> [ meta + [ num_intervals:num_intervals ], input_list, input_index_list, intervals ] } + + if (joint_mutect2) { + // Separate normal cram files and remove duplicates + ch_normal_cram = input.map{ meta, cram, crai -> [ meta - meta.subMap('tumor_id') + [id:meta.patient], cram[0], crai[0] ] }.unique() + // Extract tumor cram files + ch_tumor_cram = input.map{ meta, cram, crai -> [ meta - meta.subMap('tumor_id') + [id:meta.patient], cram[1], crai[1] ] } + // Merge normal and tumor crams by patient + ch_tn_cram = ch_normal_cram.mix(ch_tumor_cram).groupTuple() + // Combine input and intervals for scatter and gather strategy + ch_tn_intervals = ch_tn_cram.combine(intervals) + // Move num_intervals to meta map and reorganize channel for MUTECT2_PAIRED module + .map{ meta, cram, crai, intervals, num_intervals -> [ meta + [ num_intervals:num_intervals ], cram, crai, intervals ] } + MUTECT2_PAIRED( ch_tn_intervals, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi) + } + else { + // Perform variant calling using mutect2 module pair mode + MUTECT2_PAIRED( input_intervals, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi) + } + + // Figuring out if there is one or more vcf(s) from the same sample + vcf_branch = MUTECT2_PAIRED.out.vcf.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more tbi(s) from the same sample + tbi_branch = MUTECT2_PAIRED.out.tbi.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more vcf(s) from the same sample + stats_branch = MUTECT2_PAIRED.out.stats.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more vcf(s) from the same sample + f1r2_branch = MUTECT2_PAIRED.out.f1r2.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Only when using intervals + vcf_to_merge = vcf_branch.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ] }.groupTuple() + stats_to_merge = stats_branch.intervals.map{ meta, stats -> [ groupKey(meta, meta.num_intervals), stats ] }.groupTuple() + f1r2_to_merge = f1r2_branch.intervals.map{ meta, f1r2 -> [ groupKey(meta, meta.num_intervals), f1r2 ] }.groupTuple() + + MERGE_MUTECT2(vcf_to_merge, dict) + MERGEMUTECTSTATS(stats_to_merge) + + // Mix intervals and no_intervals channels together and remove no longer necessary field: normal_id, tumor_id, num_intervals + vcf = Channel.empty().mix(MERGE_MUTECT2.out.vcf, vcf_branch.no_intervals).map{ meta, vcf -> [ meta - meta.subMap('normal_id', 'tumor_id', 'num_intervals'), vcf ]} + tbi = Channel.empty().mix(MERGE_MUTECT2.out.tbi, tbi_branch.no_intervals).map{ meta, tbi -> [ meta - meta.subMap('normal_id', 'tumor_id', 'num_intervals'), tbi ]} + stats = Channel.empty().mix(MERGEMUTECTSTATS.out.stats, stats_branch.no_intervals).map{ meta, stats -> [ meta - meta.subMap('normal_id', 'tumor_id', 'num_intervals'), stats ]} + f1r2 = Channel.empty().mix(f1r2_to_merge, f1r2_branch.no_intervals).map{ meta, f1r2 -> [ meta - meta.subMap('normal_id', 'tumor_id', 'num_intervals'), f1r2 ]} + + // Generate artifactpriors using learnreadorientationmodel on the f1r2 output of mutect2 + LEARNREADORIENTATIONMODEL(f1r2) + + pileup = input_intervals.multiMap{ meta, input_list, input_index_list, intervals -> + tumor: [ meta, input_list[1], input_index_list[1], intervals ] + normal: [ meta, input_list[0], input_index_list[0], intervals ] + } + + // Prepare input channel for normal pileup summaries. + // Remember, the input channel contains tumor-normal pairs, so there will be multiple copies of the normal sample for each tumor for a given patient. + // Therefore, we use unique function to generate normal pileup summaries once for each patient for better efficiency. + pileup_normal = pileup.normal.map{ meta, cram, crai, intervals -> [ meta - meta.subMap('tumor_id') + [ id:meta.normal_id ], cram, crai, intervals] }.unique() + // Prepare input channel for tumor pileup summaries. + pileup_tumor = pileup.tumor.map{ meta, cram, crai, intervals -> [ meta + [ id:meta.tumor_id ], cram, crai, intervals ] } + + // Generate pileup summary tables using getepileupsummaries. tumor sample should always be passed in as the first input and input list entries of vcf_to_filter, + GETPILEUPSUMMARIES_NORMAL(pileup_normal, fasta, fai, dict, germline_resource_pileup, germline_resource_pileup_tbi) + GETPILEUPSUMMARIES_TUMOR(pileup_tumor, fasta, fai, dict, germline_resource_pileup, germline_resource_pileup_tbi) + + // Figuring out if there is one or more table(s) from the same sample + pileup_table_normal_branch = GETPILEUPSUMMARIES_NORMAL.out.table.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more table(s) from the same sample + pileup_table_tumor_branch = GETPILEUPSUMMARIES_TUMOR.out.table.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Only when using intervals + pileup_table_normal_to_merge = pileup_table_normal_branch.intervals.map{ meta, table -> [ groupKey(meta, meta.num_intervals), table ] }.groupTuple() + pileup_table_tumor_to_merge = pileup_table_tumor_branch.intervals.map{ meta, table -> [ groupKey(meta, meta.num_intervals), table ] }.groupTuple() + + // Merge Pileup Summaries + GATHERPILEUPSUMMARIES_NORMAL(pileup_table_normal_to_merge, dict.map{ meta, dict -> [ dict ] }) + GATHERPILEUPSUMMARIES_TUMOR(pileup_table_tumor_to_merge, dict.map{ meta, dict -> [ dict ] }) + + // Do some channel magic to generate tumor-normal pairs again. + // This is necessary because we generated one normal pileup summary for each patient but we need run calculate contamination for each tumor-normal pair. + pileup_table_tumor = Channel.empty().mix(GATHERPILEUPSUMMARIES_TUMOR.out.table, pileup_table_tumor_branch.no_intervals).map{meta, table -> [ meta - meta.subMap('normal_id', 'tumor_id', 'num_intervals') + [id:meta.patient], meta.id, table ] } + pileup_table_normal= Channel.empty().mix(GATHERPILEUPSUMMARIES_NORMAL.out.table, pileup_table_normal_branch.no_intervals).map{meta, table -> [ meta - meta.subMap('normal_id', 'tumor_id', 'num_intervals') + [id:meta.patient], meta.id, table ] } + + ch_calculatecontamination_in_tables = pileup_table_tumor.combine( + pileup_table_normal, by:0).map{ + meta, tumor_id, tumor_table, normal_id, normal_table -> [ meta + [ id: tumor_id + "_vs_" + normal_id ], tumor_table, normal_table] + } + + CALCULATECONTAMINATION(ch_calculatecontamination_in_tables) + + // Initialize empty channel: Contamination calculation is run on pileup table, pileup is not run if germline resource is not provided + ch_seg_to_filtermutectcalls = Channel.empty() + ch_cont_to_filtermutectcalls = Channel.empty() + + if (joint_mutect2) { + // Reduce the meta to only patient name + ch_seg_to_filtermutectcalls = CALCULATECONTAMINATION.out.segmentation.map{ meta, seg -> [ meta - meta.subMap('tumor_id') + [id: meta.patient], seg]}.groupTuple() + ch_cont_to_filtermutectcalls = CALCULATECONTAMINATION.out.contamination.map{ meta, cont -> [ meta - meta.subMap('tumor_id') + [id: meta.patient], cont]}.groupTuple() + } + else { + // Keep tumor_vs_normal ID + ch_seg_to_filtermutectcalls = CALCULATECONTAMINATION.out.segmentation + ch_cont_to_filtermutectcalls = CALCULATECONTAMINATION.out.contamination + } + + // Mutect2 calls filtered by filtermutectcalls using the artifactpriors, contamination and segmentation tables + vcf_to_filter = vcf.join(tbi, failOnDuplicate: true, failOnMismatch: true) + .join(stats, failOnDuplicate: true, failOnMismatch: true) + .join(LEARNREADORIENTATIONMODEL.out.artifactprior, failOnDuplicate: true, failOnMismatch: true) + .join(ch_seg_to_filtermutectcalls) + .join(ch_cont_to_filtermutectcalls) + .map{ meta, vcf, tbi, stats, orientation, seg, cont -> [ meta, vcf, tbi, stats, orientation, seg, cont, [] ] } + + FILTERMUTECTCALLS(vcf_to_filter, fasta, fai, dict) + + vcf_filtered = FILTERMUTECTCALLS.out.vcf + // add variantcaller to meta map + .map{ meta, vcf -> [ meta + [ variantcaller:'mutect2' ], vcf ] } + + versions = versions.mix(MERGE_MUTECT2.out.versions) + versions = versions.mix(CALCULATECONTAMINATION.out.versions) + versions = versions.mix(FILTERMUTECTCALLS.out.versions) + versions = versions.mix(GETPILEUPSUMMARIES_NORMAL.out.versions) + versions = versions.mix(GETPILEUPSUMMARIES_TUMOR.out.versions) + versions = versions.mix(GATHERPILEUPSUMMARIES_NORMAL.out.versions) + versions = versions.mix(GATHERPILEUPSUMMARIES_TUMOR.out.versions) + versions = versions.mix(LEARNREADORIENTATIONMODEL.out.versions) + versions = versions.mix(MERGEMUTECTSTATS.out.versions) + versions = versions.mix(MUTECT2_PAIRED.out.versions) + + emit: + vcf // channel: [ meta, vcf ] + stats // channel: [ meta, stats ] + + vcf_filtered // channel: [ meta, vcf ] + index_filtered = FILTERMUTECTCALLS.out.tbi // channel: [ meta, tbi ] + stats_filtered = FILTERMUTECTCALLS.out.stats // channel: [ meta, stats ] + + artifact_priors = LEARNREADORIENTATIONMODEL.out.artifactprior // channel: [ meta, artifactprior ] + + pileup_table_normal // channel: [ meta, table_normal ] + pileup_table_tumor // channel: [ meta, table_tumor ] + + contamination_table = ch_cont_to_filtermutectcalls // channel: [ meta, contamination ] + segmentation_table = ch_seg_to_filtermutectcalls // channel: [ meta, segmentation ] + + versions // channel: [ versions.yml ] +} \ No newline at end of file diff --git a/subworkflows/local/bam_variant_calling_somatic_mutect2/meta.yml b/subworkflows/local/bam_variant_calling_somatic_mutect2/meta.yml new file mode 100644 index 0000000..13ffad2 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_somatic_mutect2/meta.yml @@ -0,0 +1,127 @@ +name: gatk_tumor_normal_somatic_variant_calling +description: | + Perform variant calling on a paired tumor normal set of samples using mutect2 tumor normal mode. + f1r2 output of mutect2 is run through learnreadorientationmodel to get the artifact priors. + Run the input bam files through getpileupsummarries and then calculatecontamination to get the contamination and segmentation tables. + Filter the mutect2 output vcf using filtermutectcalls, artifact priors and the contamination & segmentation tables for additional filtering. +keywords: + - gatk4 + - mutect2 + - learnreadorientationmodel + - getpileupsummaries + - calculatecontamination + - filtermutectcalls + - variant_calling + - tumor_only + - filtered_vcf +modules: + - gatk4/mutect2 + - gatk4/learnreadorientationmodel + - gatk4/getpileupsummaries + - gatk4/calculatecontamination + - gatk4/filtermutectcalls +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - input: + type: list + description: list containing the tumor and normal BAM files, in that order, also able to take CRAM as an input + pattern: "[ *.{bam/cram} ]" + - input_index: + type: list + description: list containing the tumor and normal BAM file indexes, in that order, also able to take CRAM index as an input + pattern: "[ *.{bam.bai/cram.crai} ]" + - which_norm: + type: list + description: optional list of sample headers contained in the normal sample input file. + pattern: "testN" + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - germline_resource: + type: file + description: Population vcf of germline sequencing, containing allele fractions. + pattern: "*.vcf.gz" + - germline_resource_tbi: + type: file + description: Index file for the germline resource. + pattern: "*.vcf.gz.tbi" + - panel_of_normals: + type: file + description: vcf file to be used as a panel of normals. + pattern: "*.vcf.gz" + - panel_of_normals_tbi: + type: file + description: Index for the panel of normals. + pattern: "*.vcf.gz.tbi" + - interval_file: + type: file + description: File containing intervals. + pattern: "*.interval_list" +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - mutect2_vcf: + type: file + description: Compressed vcf file to be used for variant_calling. + pattern: "[ *.vcf.gz ]" + - mutect2_tbi: + type: file + description: Indexes of the mutect2_vcf file + pattern: "[ *vcf.gz.tbi ]" + - mutect2_stats: + type: file + description: Stats files for the mutect2 vcf + pattern: "[ *vcf.gz.stats ]" + - mutect2_f1r2: + type: file + description: file containing information to be passed to LearnReadOrientationModel. + pattern: "*.f1r2.tar.gz" + - artifact_priors: + type: file + description: file containing artifact-priors to be used by filtermutectcalls. + pattern: "*.tar.gz" + - pileup_table_tumor: + type: file + description: File containing the tumor pileup summary table, kept separate as calculatecontamination needs them individually specified. + pattern: "*_tumor.pileups.table" + - pileup_table_normal: + type: file + description: File containing the normal pileup summary table, kept separate as calculatecontamination needs them individually specified. + pattern: "*_normal.pileups.table" + - contamination_table: + type: file + description: File containing the contamination table. + pattern: "*.contamination.table" + - segmentation_table: + type: file + description: Output table containing segmentation of tumor minor allele fractions. + pattern: "*.segmentation.table" + - filtered_vcf: + type: file + description: file containing filtered mutect2 calls. + pattern: "*.vcf.gz" + - filtered_tbi: + type: file + description: tbi file that pairs with filtered vcf. + pattern: "*.vcf.gz.tbi" + - filtered_stats: + type: file + description: file containing statistics of the filtermutectcalls run. + pattern: "*.filteringStats.tsv" +authors: + - "@GCJMackenzie" \ No newline at end of file diff --git a/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf b/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf new file mode 100644 index 0000000..45bf5ce --- /dev/null +++ b/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf @@ -0,0 +1,63 @@ +// +// STRELKA2 tumor-normal variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { GATK4_MERGEVCFS as MERGE_STRELKA_INDELS } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_STRELKA_SNVS } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { STRELKA_SOMATIC } from '../../../modules/nf-core/strelka/somatic/main' + +workflow BAM_VARIANT_CALLING_SOMATIC_STRELKA { + take: + cram // channel: [mandatory] [ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi ] manta* are optional + dict // channel: [optional] [ meta, dict ] + fasta // channel: [mandatory] [ fasta ] + fasta_fai // channel: [mandatory] [ fasta_fai ] + intervals // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi, num_intervals ] or [ [], [], 0 ] if no intervals + + main: + versions = Channel.empty() + + // Combine cram and intervals for spread and gather strategy + cram_intervals = cram.combine(intervals) + // Move num_intervals to meta map + .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, intervals, intervals_index, num_intervals -> [ meta + [ num_intervals:num_intervals ], normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, intervals, intervals_index ] } + + STRELKA_SOMATIC(cram_intervals, fasta, fasta_fai ) + + // Figuring out if there is one or more vcf(s) from the same sample + vcf_indels = STRELKA_SOMATIC.out.vcf_indels.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more vcf(s) from the same sample + vcf_snvs = STRELKA_SOMATIC.out.vcf_snvs.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Only when using intervals + vcf_indels_to_merge = vcf_indels.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() + vcf_snvs_to_merge = vcf_snvs.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() + + MERGE_STRELKA_INDELS(vcf_indels_to_merge, dict) + MERGE_STRELKA_SNVS(vcf_snvs_to_merge, dict) + + // Mix intervals and no_intervals channels together + vcf = Channel.empty().mix(MERGE_STRELKA_INDELS.out.vcf, MERGE_STRELKA_SNVS.out.vcf, vcf_indels.no_intervals, vcf_snvs.no_intervals) + // add variantcaller to meta map and remove no longer necessary field: num_intervals + .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'strelka' ], vcf ] } + + versions = versions.mix(MERGE_STRELKA_SNVS.out.versions) + versions = versions.mix(MERGE_STRELKA_INDELS.out.versions) + versions = versions.mix(STRELKA_SOMATIC.out.versions) + + emit: + vcf + + versions +} \ No newline at end of file diff --git a/subworkflows/local/vcf_qc_bcftools_vcftools/main.nf b/subworkflows/local/vcf_qc_bcftools_vcftools/main.nf new file mode 100644 index 0000000..d284a2d --- /dev/null +++ b/subworkflows/local/vcf_qc_bcftools_vcftools/main.nf @@ -0,0 +1,30 @@ +include { BCFTOOLS_STATS } from '../../../modules/nf-core/bcftools/stats/main' +include { VCFTOOLS as VCFTOOLS_SUMMARY } from '../../../modules/nf-core/vcftools/main' +include { VCFTOOLS as VCFTOOLS_TSTV_COUNT } from '../../../modules/nf-core/vcftools/main' +include { VCFTOOLS as VCFTOOLS_TSTV_QUAL } from '../../../modules/nf-core/vcftools/main' + +workflow VCF_QC_BCFTOOLS_VCFTOOLS { + take: + vcf + target_bed + + main: + + versions = Channel.empty() + + BCFTOOLS_STATS(vcf.map{ meta, vcf -> [ meta, vcf, [] ] }, [[:],[]], [[:],[]], [[:],[]], [[:],[]], [[:],[]]) + VCFTOOLS_TSTV_COUNT(vcf, target_bed, []) + VCFTOOLS_TSTV_QUAL(vcf, target_bed, []) + VCFTOOLS_SUMMARY(vcf, target_bed, []) + + versions = versions.mix(BCFTOOLS_STATS.out.versions) + versions = versions.mix(VCFTOOLS_TSTV_COUNT.out.versions) + + emit: + bcftools_stats = BCFTOOLS_STATS.out.stats + vcftools_tstv_counts = VCFTOOLS_TSTV_COUNT.out.tstv_count + vcftools_tstv_qual = VCFTOOLS_TSTV_QUAL.out.tstv_qual + vcftools_filter_summary = VCFTOOLS_SUMMARY.out.filter_summary + + versions +} \ No newline at end of file diff --git a/subworkflows/nf-core/variantcalling/freebayes/main.nf b/subworkflows/nf-core/variantcalling/freebayes/main.nf deleted file mode 100644 index 93aa581..0000000 --- a/subworkflows/nf-core/variantcalling/freebayes/main.nf +++ /dev/null @@ -1,88 +0,0 @@ -include { BCFTOOLS_SORT } from '../../../../modules/nf-core/modules/bcftools/sort/main' -include { GATK4_MERGEVCFS as MERGE_FREEBAYES } from '../../../../modules/nf-core/modules/gatk4/mergevcfs/main' -include { FREEBAYES } from '../../../../modules/nf-core/modules/freebayes/main' -include { VCFFILTER } from '../../../../modules/nf-core/modules/vcflib/filter/main' -include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../../../modules/nf-core/modules/tabix/tabix/main' - -workflow RUN_FREEBAYES { - take: - cram // channel: [mandatory] [meta, cram, crai, [], [], interval] - dict - fasta // channel: [mandatory] - fasta_fai // channel: [mandatory] - - main: - - ch_versions = Channel.empty() - - FREEBAYES( - cram, - fasta, - fasta_fai, - [], [], []) - - VCFFILTER(FREEBAYES.out.vcf) - - BCFTOOLS_SORT(VCFFILTER.out.vcf) - BCFTOOLS_SORT.out.vcf.branch{ - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - }.set{bcftools_vcf_out} - - // Only when no intervals - TABIX_VC_FREEBAYES(bcftools_vcf_out.no_intervals) - - // Only when using intervals - MERGE_FREEBAYES( - bcftools_vcf_out.intervals - .map{ meta, vcf -> - - new_meta = meta.tumor_id ? [ - id: meta.tumor_id + "_vs_" + meta.normal_id, - normal_id: meta.normal_id, - num_intervals: meta.num_intervals, - patient: meta.patient, - status: meta.status, - tumor_id: meta.tumor_id, - alleles: meta.alleles - ] - : [ - id: meta.sample, - num_intervals: meta.num_intervals, - patient: meta.patient, - sample: meta.sample, - status: meta.status, - alleles: meta.alleles - ] - [groupKey(new_meta, meta.num_intervals), vcf] - }.groupTuple(), - dict - ) - - // Mix output channels for "no intervals" and "with intervals" results - freebayes_vcf = Channel.empty().mix( - MERGE_FREEBAYES.out.vcf, - bcftools_vcf_out.no_intervals) - .map{ meta, vcf -> - [ [ - id: meta.id, - normal_id: meta.normal_id, - num_intervals: meta.num_intervals, - patient: meta.patient, - status: meta.status, - tumor_id: meta.tumor_id, - alleles: meta.alleles, - variantcaller: "freebayes" - ], - vcf] - } - - ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions) - ch_versions = ch_versions.mix(MERGE_FREEBAYES.out.versions) - ch_versions = ch_versions.mix(FREEBAYES.out.versions) - ch_versions = ch_versions.mix(TABIX_VC_FREEBAYES.out.versions) - - emit: - freebayes_vcf - versions = ch_versions -} diff --git a/subworkflows/nf-core/variantcalling/manta/somatic/main.nf b/subworkflows/nf-core/variantcalling/manta/somatic/main.nf deleted file mode 100644 index a845f62..0000000 --- a/subworkflows/nf-core/variantcalling/manta/somatic/main.nf +++ /dev/null @@ -1,191 +0,0 @@ -include { GATK4_MERGEVCFS as MERGE_MANTA_DIPLOID } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' -include { GATK4_MERGEVCFS as MERGE_MANTA_SMALL_INDELS } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' -include { GATK4_MERGEVCFS as MERGE_MANTA_SOMATIC } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' -include { GATK4_MERGEVCFS as MERGE_MANTA_SV } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' -include { MANTA_SOMATIC } from '../../../../../modules/nf-core/modules/manta/somatic/main' - -workflow RUN_MANTA_SOMATIC { - take: - cram // channel: [mandatory] [meta, normal_cram, normal_crai, tumor_cram, tumor_crai, interval.bed.gz, interval.bed.gz.tbi] - dict // channel: [optional] - fasta // channel: [mandatory] - fasta_fai // channel: [mandatory] - - main: - - ch_versions = Channel.empty() - - MANTA_SOMATIC(cram, fasta, fasta_fai) - - // Figure out if using intervals or no_intervals - MANTA_SOMATIC.out.candidate_small_indels_vcf.branch{ - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - }.set{manta_candidate_small_indels_vcf} - - MANTA_SOMATIC.out.candidate_small_indels_vcf_tbi.branch{ - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - }.set{manta_candidate_small_indels_vcf_tbi} - - MANTA_SOMATIC.out.candidate_sv_vcf.branch{ - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - }.set{manta_candidate_sv_vcf} - - MANTA_SOMATIC.out.diploid_sv_vcf.branch{ - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - }.set{manta_diploid_sv_vcf} - - MANTA_SOMATIC.out.somatic_sv_vcf.branch{ - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - }.set{manta_somatic_sv_vcf} - - //Only when using intervals - MERGE_MANTA_SV( - manta_candidate_small_indels_vcf.intervals.map{ meta, vcf -> - - [groupKey([ - id: meta.tumor_id + "_vs_" + meta.normal_id, - normal_id: meta.normal_id, - num_intervals: meta.num_intervals, - patient: meta.patient, - status: meta.status, - tumor_id: meta.tumor_id, - alleles: meta.alleles - ], - meta.num_intervals), - vcf] - - }.groupTuple(), - dict) - - MERGE_MANTA_SMALL_INDELS( - manta_candidate_sv_vcf.intervals.map{ meta, vcf -> - - [groupKey([ - id: meta.tumor_id + "_vs_" + meta.normal_id, - normal_id: meta.normal_id, - num_intervals: meta.num_intervals, - patient: meta.patient, - status: meta.status, - tumor_id: meta.tumor_id, - alleles: meta.alleles - ], - meta.num_intervals), - vcf] - - }.groupTuple(), - dict) - - MERGE_MANTA_DIPLOID( - manta_diploid_sv_vcf.intervals.map{ meta, vcf -> - new_meta = [ - id: meta.tumor_id + "_vs_" + meta.normal_id, - normal_id: meta.normal_id, - num_intervals: meta.num_intervals, - patient: meta.patient, - status: meta.status, - tumor_id: meta.tumor_id, - alleles: meta.alleles - ] - - [groupKey([ - id: meta.tumor_id + "_vs_" + meta.normal_id, - normal_id: meta.normal_id, - num_intervals: meta.num_intervals, - patient: meta.patient, - status: meta.status, - tumor_id: meta.tumor_id, - alleles: meta.alleles - ], - meta.num_intervals), - vcf] - - }.groupTuple(), - dict) - - MERGE_MANTA_SOMATIC( - manta_somatic_sv_vcf.intervals.map{ meta, vcf -> - - [groupKey([ - id: meta.tumor_id + "_vs_" + meta.normal_id, - normal_id: meta.normal_id, - num_intervals: meta.num_intervals, - patient: meta.patient, - status: meta.status, - tumor_id: meta.tumor_id, - alleles: meta.alleles - ], - meta.num_intervals), - vcf] - - }.groupTuple(), - dict) - - // Mix output channels for "no intervals" and "with intervals" results - manta_vcf = Channel.empty().mix( - MERGE_MANTA_DIPLOID.out.vcf, - MERGE_MANTA_SOMATIC.out.vcf, - manta_diploid_sv_vcf.no_intervals, - manta_somatic_sv_vcf.no_intervals - ).map{ meta, vcf -> - [[ - id: meta.tumor_id + "_vs_" + meta.normal_id, - num_intervals: meta.num_intervals, - normal_id: meta.normal_id, - patient: meta.patient, - status: meta.status, - tumor_id: meta.tumor_id, - alleles: meta.alleles, - variantcaller: "manta" - ], - vcf] - } - - // Don't set variantcaller & num_intervals key. These files are not annotated, so they don't need it and joining with reads for StrelkaBP then fails - manta_candidate_small_indels_vcf = Channel.empty().mix( - MERGE_MANTA_SMALL_INDELS.out.vcf, - manta_candidate_small_indels_vcf.no_intervals - ).map{ meta, vcf -> - [[ - id: meta.tumor_id + "_vs_" + meta.normal_id, - normal_id: meta.normal_id, - patient: meta.patient, - status: meta.status, - tumor_id: meta.tumor_id, - alleles: meta.alleles - ], - vcf] - } - - manta_candidate_small_indels_vcf_tbi = Channel.empty().mix( - MERGE_MANTA_SMALL_INDELS.out.tbi, - manta_candidate_small_indels_vcf_tbi.no_intervals - ).map{ meta, vcf -> - [[ - id: meta.tumor_id + "_vs_" + meta.normal_id, - normal_id: meta.normal_id, - patient: meta.patient, - status: meta.status, - tumor_id: meta.tumor_id, - alleles: meta.alleles - ], - vcf] - } - - ch_versions = ch_versions.mix(MERGE_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(MERGE_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(MERGE_MANTA_DIPLOID.out.versions) - ch_versions = ch_versions.mix(MERGE_MANTA_SOMATIC.out.versions) - ch_versions = ch_versions.mix(MANTA_SOMATIC.out.versions) - - emit: - manta_vcf - manta_candidate_small_indels_vcf - manta_candidate_small_indels_vcf_tbi - versions = ch_versions - -} diff --git a/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf b/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf deleted file mode 100644 index acddb21..0000000 --- a/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf +++ /dev/null @@ -1,110 +0,0 @@ -include { GATK4_MERGEVCFS as MERGE_MANTA_SMALL_INDELS } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' -include { GATK4_MERGEVCFS as MERGE_MANTA_SV } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' -include { GATK4_MERGEVCFS as MERGE_MANTA_TUMOR } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' -include { MANTA_TUMORONLY } from '../../../../../modules/nf-core/modules/manta/tumoronly/main' - -// Seems to be the consensus on upstream modules implementation too -workflow RUN_MANTA_TUMORONLY { - take: - cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] - dict // channel: [optional] - fasta // channel: [mandatory] - fasta_fai // channel: [mandatory] - - main: - - ch_versions = Channel.empty() - - MANTA_TUMORONLY(cram, fasta, fasta_fai) - - // Figure out if using intervals or no_intervals - MANTA_TUMORONLY.out.candidate_small_indels_vcf.branch{ - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - }.set{manta_small_indels_vcf} - - MANTA_TUMORONLY.out.candidate_sv_vcf.branch{ - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - }.set{manta_candidate_sv_vcf} - - MANTA_TUMORONLY.out.tumor_sv_vcf.branch{ - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - }.set{manta_tumor_sv_vcf} - - //Only when using intervals - MERGE_MANTA_SMALL_INDELS( - manta_small_indels_vcf.intervals.map{ meta, vcf -> - - [groupKey([ - id: meta.sample, - num_intervals: meta.num_intervals, - patient: meta.patient, - sample: meta.sample, - status: meta.status, - ], - meta.num_intervals), - vcf] - - }.groupTuple(sort:true), - dict) - - MERGE_MANTA_SV( - manta_candidate_sv_vcf.intervals.map{ meta, vcf -> - - [groupKey([ - id: meta.sample, - num_intervals: meta.num_intervals, - patient: meta.patient, - sample: meta.sample, - status: meta.status - ], - meta.num_intervals), - vcf] - - }.groupTuple(sort:true), - dict) - - MERGE_MANTA_TUMOR( - manta_tumor_sv_vcf.intervals.map{ meta, vcf -> - - [groupKey( [ - id: meta.sample, - num_intervals: meta.num_intervals, - patient: meta.patient, - sample: meta.sample, - status: meta.status, - ], - meta.num_intervals), - vcf] - - }.groupTuple(sort:true), - dict) - - // Mix output channels for "no intervals" and "with intervals" results - // Only tumor sv should get annotated - manta_vcf = Channel.empty().mix( - MERGE_MANTA_TUMOR.out.vcf, - manta_tumor_sv_vcf.no_intervals - ).map{ meta, vcf -> - [[ - id: meta.sample, - num_intervals: meta.num_intervals, - patient: meta.patient, - sample: meta.sample, - status: meta.status, - variantcaller: "manta" - ], - vcf] - } - - ch_versions = ch_versions.mix(MERGE_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(MERGE_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(MERGE_MANTA_TUMOR.out.versions) - ch_versions = ch_versions.mix(MANTA_TUMORONLY.out.versions) - - emit: - manta_vcf - versions = ch_versions -} diff --git a/subworkflows/nf-core/variantcalling/sage/main.nf b/subworkflows/nf-core/variantcalling/sage/main.nf deleted file mode 100644 index 46f3b59..0000000 --- a/subworkflows/nf-core/variantcalling/sage/main.nf +++ /dev/null @@ -1,96 +0,0 @@ -include { BCFTOOLS_SORT } from '../../../../modules/nf-core/modules/bcftools/sort/main' -include { GATK4_MERGEVCFS as MERGE_SAGE } from '../../../../modules/nf-core/modules/gatk4/mergevcfs/main' -include { SAGE } from '../../../../modules/nf-core/modules/sage/main' -include { TABIX_TABIX as TABIX_VC_SAGE } from '../../../../modules/nf-core/modules/tabix/tabix/main' -//include { TABIX_BGZIPTABIX as BGZIPTABIX_VC_SAGE } from '../../../../modules/nf-core/modules/tabix/bgziptabix/main' - - -workflow RUN_SAGE { - take: - cram // channel: [mandatory] [meta, cram, crai, [], [], interval] - dict - fasta // channel: [mandatory] - fasta_fai - highconfidence - actionablepanel - knownhot - ensbl_sage - - main: - - ch_versions = Channel.empty() - - - SAGE( - cram, - fasta, - fasta_fai, - dict, - highconfidence, - actionablepanel, - knownhot, - ensbl_sage) - - BCFTOOLS_SORT(SAGE.out.vcf) - BCFTOOLS_SORT.out.vcf.branch{ - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - }.set{bcftools_vcf_out} - - // Only when no intervals - TABIX_VC_SAGE(bcftools_vcf_out.no_intervals) - - // Only when using intervals - MERGE_SAGE( - bcftools_vcf_out.intervals - .map{ meta, vcf -> - - new_meta = meta.tumor_id ? [ - id: meta.tumor_id + "_vs_" + meta.normal_id, - normal_id: meta.normal_id, - num_intervals: meta.num_intervals, - patient: meta.patient, - status: meta.status, - tumor_id: meta.tumor_id, - alleles: meta.alleles - ] - : [ - id: meta.sample, - num_intervals: meta.num_intervals, - patient: meta.patient, - sample: meta.sample, - status: meta.status, - alleles: meta.alleles - ] - [groupKey(new_meta, meta.num_intervals), vcf] - }.groupTuple(), - dict - ) - - // Mix output channels for "no intervals" and "with intervals" results - sage_vcf = Channel.empty().mix( - MERGE_SAGE.out.vcf, - bcftools_vcf_out.no_intervals) - .map{ meta, vcf -> - [ [ - id: meta.id, - normal_id: meta.normal_id, - num_intervals: meta.num_intervals, - patient: meta.patient, - status: meta.status, - tumor_id: meta.tumor_id, - alleles: meta.alleles, - variantcaller: "sage" - ], - vcf] - } - - - ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions) - ch_versions = ch_versions.mix(MERGE_SAGE.out.versions) - ch_versions = ch_versions.mix(SAGE.out.versions) - - emit: - sage_vcf = sage_vcf - versions = ch_versions -} diff --git a/subworkflows/nf-core/variantcalling/strelka/single/main.nf b/subworkflows/nf-core/variantcalling/strelka/single/main.nf deleted file mode 100644 index 2e6f578..0000000 --- a/subworkflows/nf-core/variantcalling/strelka/single/main.nf +++ /dev/null @@ -1,86 +0,0 @@ -include { GATK4_MERGEVCFS as MERGE_STRELKA } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' -include { GATK4_MERGEVCFS as MERGE_STRELKA_GENOME } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' -include { STRELKA_GERMLINE as STRELKA_SINGLE } from '../../../../../modules/nf-core/modules/strelka/germline/main' - -workflow RUN_STRELKA_SINGLE { - take: - cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] - dict // channel: [optional] - fasta // channel: [mandatory] - fasta_fai // channel: [mandatory] - - main: - - ch_versions = Channel.empty() - - STRELKA_SINGLE(cram, fasta, fasta_fai) - - // Figure out if using intervals or no_intervals - STRELKA_SINGLE.out.vcf.branch{ - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - }.set{strelka_vcf} - - STRELKA_SINGLE.out.genome_vcf.branch{ - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - }.set{strelka_genome_vcf} - - MERGE_STRELKA( - strelka_vcf.intervals - .map{ meta, vcf -> - new_meta = [ - id: meta.sample, - num_intervals: meta.num_intervals, - patient: meta.patient, - sample: meta.sample, - status: meta.status - ] - - [groupKey(new_meta, meta.num_intervals), vcf] - }.groupTuple(sort:true), - dict - ) - - MERGE_STRELKA_GENOME( - strelka_genome_vcf.intervals - .map{ meta, vcf -> - - [groupKey([ - id: meta.sample, - num_intervals: meta.num_intervals, - patient: meta.patient, - sample: meta.sample, - status: meta.status, - ], - meta.num_intervals), - vcf] - - }.groupTuple(sort:true), - dict - ) - - // Mix output channels for "no intervals" and "with intervals" results - // Only strelka variant vcf should get annotated - strelka_vcf = Channel.empty().mix( - MERGE_STRELKA.out.vcf, - strelka_vcf.no_intervals) - .map{ meta, vcf -> - [[ - id: meta.sample, - num_intervals: meta.num_intervals, - patient: meta.patient, - sample: meta.sample, - status: meta.status, - variantcaller: "strelka" - ],vcf] - } - - ch_versions = ch_versions.mix(MERGE_STRELKA.out.versions) - ch_versions = ch_versions.mix(MERGE_STRELKA_GENOME.out.versions) - ch_versions = ch_versions.mix(STRELKA_SINGLE.out.versions) - - emit: - strelka_vcf - versions = ch_versions -} diff --git a/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf b/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf deleted file mode 100644 index 783004c..0000000 --- a/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf +++ /dev/null @@ -1,100 +0,0 @@ -include { GATK4_MERGEVCFS as MERGE_STRELKA_INDELS } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' -include { GATK4_MERGEVCFS as MERGE_STRELKA_SNVS } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' -include { GATK4_MERGEVCFS as MERGE_STRELKA } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' -include { STRELKA_SOMATIC } from '../../../../../modules/nf-core/modules/strelka/somatic/main' - -workflow RUN_STRELKA_SOMATIC { - take: - cram // channel: [mandatory] [meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, interval.bed.gz, interval.bed.gz.tbi] manta* are optional - dict // channel: [optional] - fasta // channel: [mandatory] - fasta_fai // channel: [mandatory] - - main: - - ch_versions = Channel.empty() - STRELKA_SOMATIC(cram, fasta, fasta_fai ) - - // Figure out if using intervals or no_intervals - STRELKA_SOMATIC.out.vcf_snvs.branch{ - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - }.set{strelka_vcf_snvs} - - STRELKA_SOMATIC.out.vcf_indels.branch{ - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - }.set{strelka_vcf_indels} - - // Only when using intervals - MERGE_STRELKA_SNVS(strelka_vcf_snvs.intervals.map{ meta, vcf -> - - [groupKey([ - id: meta.tumor_id + "_vs_" + meta.normal_id, - normal_id: meta.normal_id, - num_intervals: meta.num_intervals, - patient: meta.patient, - status: meta.status, - tumor_id: meta.tumor_id, - alleles: meta.alleles - ], - meta.num_intervals), - vcf] - - }.groupTuple(), - dict) - - MERGE_STRELKA_INDELS(strelka_vcf_indels.intervals.map{ meta, vcf -> - - [groupKey([ - id: meta.tumor_id + "_vs_" + meta.normal_id, - normal_id: meta.normal_id, - num_intervals: meta.num_intervals, - patient: meta.patient, - status: meta.status, - tumor_id: meta.tumor_id, - alleles: meta.alleles - ], - meta.num_intervals), - vcf] - }.groupTuple(), - dict) - - // Mix output channels for "no intervals" and "with intervals" results - strelka_separate_vcf = Channel.empty().mix( - MERGE_STRELKA_SNVS.out.vcf, - strelka_vcf_snvs.no_intervals, - MERGE_STRELKA_INDELS.out.vcf, - strelka_vcf_indels.no_intervals - ) - .map{ meta, vcf -> - [[ - id: meta.tumor_id + "_vs_" + meta.normal_id, - normal_id: meta.normal_id, - num_intervals: meta.num_intervals, - patient: meta.patient, - status: meta.status, - tumor_id: meta.tumor_id, - alleles: meta.alleles, - variantcaller: "strelka" - ], - vcf] - } - MERGE_STRELKA( - strelka_separate_vcf.groupTuple(), - dict - ) - strelka_vcf = MERGE_STRELKA.out.vcf - - - // add merge between SNVs and indels - - ch_versions = ch_versions.mix(MERGE_STRELKA_SNVS.out.versions) - ch_versions = ch_versions.mix(MERGE_STRELKA_INDELS.out.versions) - ch_versions = ch_versions.mix(MERGE_STRELKA.out.versions) - ch_versions = ch_versions.mix(STRELKA_SOMATIC.out.versions) - - emit: - strelka_vcf - versions = ch_versions -} From f6a78e248293d8022595e821624c85e192e8a119 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Tue, 29 Aug 2023 11:52:01 +0100 Subject: [PATCH 19/56] Added sage variant caller subworkflow. Updated module to fit new structure. --- modules/local/sage/main.nf | 137 +++++++++--------- modules/local/sage/meta.yml | 17 +-- .../main.nf | 19 +++ .../local/post_variant_calling/main.nf | 27 ++++ 4 files changed, 114 insertions(+), 86 deletions(-) create mode 100644 subworkflows/local/channel_variant_calling_create_csv/main.nf create mode 100644 subworkflows/local/post_variant_calling/main.nf diff --git a/modules/local/sage/main.nf b/modules/local/sage/main.nf index 1523430..1d5f3b9 100644 --- a/modules/local/sage/main.nf +++ b/modules/local/sage/main.nf @@ -1,91 +1,84 @@ -def VERSION = '3.1' // Version information not provided by tool on CLI - process SAGE { tag "$meta.id" - label 'process_low' + label 'process_medium' - conda (params.enable_conda ? "bioconda::hmftools-sage=3.1" : null) + conda "bioconda::hmftools-sage=3.2.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-sage:3.1--hdfd78af_0' : - 'quay.io/biocontainers/hmftools-sage:3.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/hmftools-sage:3.2.3--hdfd78af_0' : + 'quay.io/biocontainers/hmftools-sage:hmftools-sage:3.2.3--hdfd78af_0' }" input: - tuple val(meta), path(normal), path(normal_index), path(tumor), path(tumor_index), path(intervals) - path fasta - path fasta_fai - path dict - path highconfidence - path actionablepanel - path knownhot - path ensbl_sage + tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) output: - tuple val(meta), path("*.vcf"), emit: vcf - path "versions.yml" , emit: versions + tuple val(meta), path("*.vcf"), emit: vcf + path "versions.yml" , emit: versions when: - task.ext.when == null || task.ext.when + task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def reference = normal ? "-reference ${meta.normal_id} -reference_bam ${normal}" : "" - def HighConfidence = highconfidence ? "-high_confidence_bed ${highconfidence}" : "" - def ActionableCodingPanel = actionablepanel ? "-panel_bed ${actionablepanel}" : "" - def KnownHotspots = knownhot ? "-hotspots ${knownhot}" : "" - def avail_mem = 4 - if (!task.memory) { - log.info '[SAGE] Available memory not known - defaulting to 4GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - if (intervals){ // If no reads the intervals don't work in sage - """ - export _JAVA_OPTIONS="-Xmx${avail_mem}g" - INTER=\$(sed -E 's/\\s+0\\s+/\\t1\\t/g' $intervals | grep -v chrM | sed 's/\t/:/g' | paste -s -d ';') + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = input_normal ? "-reference ${meta.normal_id} -reference_bam ${input_normal}" : "" + def avail_mem = 3072 + if (!task.memory) { + log.info '[SAGE] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + if (intervals){ + """ + echo "[WARNING] If no reads in the intervals from $intervals, sage won't work" + export _JAVA_OPTIONS="-Xmx${avail_mem}g" + INTER=\$(sed -E 's/\\s+0\\s+/\\t1\\t/g' $intervals | sed 's/\t/:/g' | paste -s -d ';') + + SAGE \\ + -out ${prefix}.vcf \\ + -ref_genome $fasta \\ + -threads $task.cpus \\ + -tumor ${meta.tumor_id} -tumor_bam ${input_tumor} \\ + $reference \\ + -specific_regions \$INTER \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + SAGE: \$(grep 'Sage version' .command.log | cut -d " " -f6) + END_VERSIONS + """ - SAGE \\ - -out ${prefix}.vcf \\ - -ref_genome $fasta \\ - -threads $task.cpus \\ - -tumor ${meta.tumor_id} \\ - -tumor_bam ${tumor} \\ - $reference \\ - -ensembl_data_dir $ensbl_sage \\ - $HighConfidence \\ - $ActionableCodingPanel \\ - $KnownHotspots \\ - -specific_regions \$INTER \\ - $args + } else { + """ + export _JAVA_OPTIONS="-Xmx${avail_mem}g" + SAGE \\ + -out ${prefix}.vcf \\ + -ref_genome $fasta \\ + -threads $task.cpus \\ + -tumor ${meta.tumor_id} \\ + -tumor_bam ${input_tumor} \\ + $reference \\ + $args - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sage: $VERSION - END_VERSIONS - """ + cat <<-END_VERSIONS > versions.yml + "${task.process}": + SAGE: \$(grep 'Sage version' .command.log | cut -d " " -f6) + END_VERSIONS + """ + } + stub: - } else { - """ - export _JAVA_OPTIONS="-Xmx${avail_mem}g" - SAGE \\ - -out ${prefix}.vcf \\ - -ref_genome $fasta \\ - -threads $task.cpus \\ - -tumor ${meta.tumor_id} \\ - -tumor_bam ${tumor} \\ - $reference \\ - -ensembl_data_dir $ensbl_sage \\ - $HighConfidence \\ - $ActionableCodingPanel \\ - $KnownHotspots \\ - $args + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sage: $VERSION - END_VERSIONS - """ - } + cat <<-END_VERSIONS > versions.yml + "${task.process}": + SAGE: \$(SAGE | grep 'Sage version' .command.log | cut -d " " -f6) + END_VERSIONS + """ diff --git a/modules/local/sage/meta.yml b/modules/local/sage/meta.yml index b54f9dd..d4cfb52 100644 --- a/modules/local/sage/meta.yml +++ b/modules/local/sage/meta.yml @@ -45,18 +45,7 @@ input: type: file description: reference fasta file pattern: ".{fa,fa.gz,fasta,fasta.gz}" - - highconfidence: - type: file - description: Optional. - pattern: "*.bed" - - actionablepanel: - type: file - description: Optional. - pattern: "*.bed" - - knownhot: - type: file - description: Optional. - pattern: "*.bed" + output: - meta: @@ -66,8 +55,8 @@ output: e.g. [ id:'test', single_end:false ] - vcf: type: file - description: Compressed VCF file - pattern: "*.vcf.gz" + description: VCF file + pattern: "*.vcf" - version: type: file description: File containing software version diff --git a/subworkflows/local/channel_variant_calling_create_csv/main.nf b/subworkflows/local/channel_variant_calling_create_csv/main.nf new file mode 100644 index 0000000..71383ea --- /dev/null +++ b/subworkflows/local/channel_variant_calling_create_csv/main.nf @@ -0,0 +1,19 @@ +// +// CHANNEL_VARIANT_CALLING_CREATE_CSV +// + +workflow CHANNEL_VARIANT_CALLING_CREATE_CSV { + take: + vcf_to_annotate // channel: [mandatory] meta, vcf + + main: + // Creating csv files to restart from this step + vcf_to_annotate.collectFile(keepHeader: true, skip: 1,sort: true, storeDir: "${params.outdir}/csv"){ meta, vcf -> + patient = meta.patient + sample = meta.id + variantcaller = meta.variantcaller + status = meta.status + vcf = "${params.outdir}/variant_calling/${variantcaller}/${meta.id}/${vcf.getName()}" + ["variantcalled.csv", "patient,sample,status,variantcaller,vcf\n${patient},${sample},${status},${variantcaller},${vcf}\n"] + } +} \ No newline at end of file diff --git a/subworkflows/local/post_variant_calling/main.nf b/subworkflows/local/post_variant_calling/main.nf new file mode 100644 index 0000000..8667fdb --- /dev/null +++ b/subworkflows/local/post_variant_calling/main.nf @@ -0,0 +1,27 @@ +// +// POST VARIANT CALLING: processes run on variantcalled but not annotated VCFs +// + +include { CONCATENATE_GERMLINE_VCFS } from '../vcf_concatenate_germline/main' + +workflow POST_VARIANTCALLING { + + take: + vcfs + concatenate_vcfs + + main: + versions = Channel.empty() + + if(concatenate_vcfs){ + CONCATENATE_GERMLINE_VCFS(vcfs) + + vcfs = vcfs.mix(CONCATENATE_GERMLINE_VCFS.out.vcfs) + versions = versions.mix(CONCATENATE_GERMLINE_VCFS.out.versions) + } + + emit: + vcfs // post processed vcfs + + versions // channel: [ versions.yml ] +} \ No newline at end of file From 74d838bbfc5035de779dd53e36a47cc4f2eed28c Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Tue, 29 Aug 2023 11:52:54 +0100 Subject: [PATCH 20/56] Added preffix to freebayes.config --- conf/modules/variant_calling/freebayes.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules/variant_calling/freebayes.config b/conf/modules/variant_calling/freebayes.config index 8ba6f62..420c6fc 100644 --- a/conf/modules/variant_calling/freebayes.config +++ b/conf/modules/variant_calling/freebayes.config @@ -29,7 +29,7 @@ process { // freebayes withName: 'FREEBAYES' { ext.args = '--min-alternate-fraction 0.1 --min-mapping-quality 1' //To make sure no naming conflicts ensure with module BCFTOOLS_SORT & the naming being correct in the output folder - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}" : "${meta.id}.${target_bed.simpleName}" } + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.freebayes" : "${meta.id}.freebayes.${target_bed.simpleName}" } ext.when = { params.tools && params.tools.split(',').contains('freebayes') } publishDir = [ enabled: false From 3872b28f3e7b6810664c9bb557471e1ecffa7cb5 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Tue, 29 Aug 2023 11:53:35 +0100 Subject: [PATCH 21/56] Added option for when step is variant_calling in bam_gatk_preprocessing --- subworkflows/local/bam_gatk_preprocessing/main.nf | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/subworkflows/local/bam_gatk_preprocessing/main.nf b/subworkflows/local/bam_gatk_preprocessing/main.nf index 6a6f00b..c3f150e 100644 --- a/subworkflows/local/bam_gatk_preprocessing/main.nf +++ b/subworkflows/local/bam_gatk_preprocessing/main.nf @@ -332,6 +332,21 @@ workflow BAM_GATK_PREPROCESSING { } } + if (params.step == 'variant_calling') { + + input_variant_calling_convert = input_sample.branch{ + bam: it[0].data_type == "bam" + cram: it[0].data_type == "cram" + } + + // BAM files first must be converted to CRAM files since from this step on we base everything on CRAM format + BAM_TO_CRAM(input_variant_calling_convert.bam, fasta, fasta_fai) + versions = versions.mix(BAM_TO_CRAM.out.versions) + + cram_variant_calling = Channel.empty().mix(BAM_TO_CRAM.out.alignment_index, input_variant_calling_convert.cram) + + } + emit: cram_variant_calling = cram_variant_calling versions = versions From d8df7a24ae95e6a256f79fe5c6fd0e0d041cd138 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Tue, 29 Aug 2023 11:55:08 +0100 Subject: [PATCH 22/56] Core workflow now bam_variant_calling_pre_post_processing and bam_variant_calling. Updated variables and modules imports. --- .../local/bam_variant_calling/main.nf | 65 +++++++-------- .../main.nf | 82 ++++++++++++------- 2 files changed, 82 insertions(+), 65 deletions(-) diff --git a/subworkflows/local/bam_variant_calling/main.nf b/subworkflows/local/bam_variant_calling/main.nf index d681240..ed175c8 100644 --- a/subworkflows/local/bam_variant_calling/main.nf +++ b/subworkflows/local/bam_variant_calling/main.nf @@ -1,10 +1,18 @@ -include { SAMTOOLS_CONVERT as SAMTOOLS_BAMTOCRAM_VARIANTCALLING } from '../../modules/nf-core/modules/samtools/convert/main' -include { PAIR_VARIANT_CALLING } from './pair_variant_calling' -include { VCF_QC } from '../nf-core/vcf_qc' -include { VARIANTCALLING_CSV } from './variantcalling_csv' +// +// Variant Calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run +// Variant calling on tumor/normal pair +include { BAM_VARIANT_CALLING_SOMATIC } from '../bam_variant_calling_somatic/main' +// QC on VCF files +include { VCF_QC_BCFTOOLS_VCFTOOLS } from '../vcf_qc_bcftools_vcftools/main' +// Create samplesheet to restart from different steps +include { CHANNEL_VARIANT_CALLING_CREATE_CSV } from '../channel_variant_calling_create_csv/main' -workflow VARIANT_CALLING { + +workflow BAM_VARIANT_CALLING { take: tools @@ -17,6 +25,7 @@ workflow VARIANT_CALLING { intervals intervals_bed_gz_tbi intervals_bed_combined + intervals_bed_gz_tbi_combined pon pon_tbi input_sample @@ -25,21 +34,6 @@ workflow VARIANT_CALLING { reports = Channel.empty() versions = Channel.empty() - if (params.step == 'variant_calling') { - - input_variant_calling_convert = input_sample.branch{ - bam: it[0].data_type == "bam" - cram: it[0].data_type == "cram" - } - - // BAM files first must be converted to CRAM files since from this step on we base everything on CRAM format - BAM_TO_CRAM(input_variant_calling_convert.bam, fasta, fasta_fai) - versions = versions.mix(BAM_TO_CRAM.out.versions) - - cram_variant_calling = Channel.empty().mix(BAM_TO_CRAM.out.alignment_index, input_variant_calling_convert.cram) - - } - if (params.tools) { if (params.step == 'annotate') cram_variant_calling = Channel.empty() @@ -105,30 +99,28 @@ workflow VARIANT_CALLING { [ meta, normal[2], normal[3], tumor[2], tumor[3] ] } - // PAIR VARIANT CALLING BAM_VARIANT_CALLING_SOMATIC( tools, cram_variant_calling_pair, - dict, fasta, fasta_fai, + dict, germline_resource, germline_resource_tbi, intervals, intervals_bed_gz_tbi, intervals_bed_combined, + intervals_bed_gz_tbi_combined, pon, - pon_tbi + pon_tbi, + params.joint_mutect2 ) - // POST VARIANTCALLING - POST_VARIANTCALLING(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_all, - params.concatenate_vcfs) // Gather vcf files for annotation and QC vcf_to_normalize = Channel.empty() - vcf_to_normalize = vcf_to_normalize.mix(BAM_VARIANT_CALLING_SOMATIC_ALL.out.vcf_all) + vcf_to_normalize = vcf_to_normalize.mix(BAM_VARIANT_CALLING_SOMATIC.out.vcf_all) // QC VCF_QC_BCFTOOLS_VCFTOOLS(vcf_to_normalize, intervals_bed_combined) @@ -138,22 +130,21 @@ workflow VARIANT_CALLING { reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_tstv_qual.collect{ meta, qual -> qual }) reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_filter_summary.collect{ meta, summary -> summary }) - CHANNEL_VARIANT_CALLING_CREATE_CSV(vcf_to_annotate) + CHANNEL_VARIANT_CALLING_CREATE_CSV(vcf_to_normalize) // Gather used variant calling softwares versions - versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_ALL.out.versions) - versions = versions.mix(POST_VARIANTCALLING.out.versions) + versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC.out.versions) versions = versions.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.versions) } emit: - cram_vc_pair = ch_cram_variant_calling_pair - vcf = vcf_to_normalize - contamination_table = PAIR_VARIANT_CALLING.out.contamination_table - segmentation_table = PAIR_VARIANT_CALLING.out.segmentation_table - artifact_priors = PAIR_VARIANT_CALLING.out.artifact_priors - reports = reports - versions = versions + cram_variant_calling_pair = cram_variant_calling_pair + vcf_to_normalize = vcf_to_normalize + contamination_table = BAM_VARIANT_CALLING_SOMATIC.out.contamination_table_mutect2 + segmentation_table = BAM_VARIANT_CALLING_SOMATIC.out.segmentation_table_mutect2 + artifact_priors = BAM_VARIANT_CALLING_SOMATIC.out.artifact_priors_mutect2 + reports = reports + versions = versions } \ No newline at end of file diff --git a/subworkflows/local/bam_variant_calling_pre_post_processing/main.nf b/subworkflows/local/bam_variant_calling_pre_post_processing/main.nf index 5e69f66..924a15e 100644 --- a/subworkflows/local/bam_variant_calling_pre_post_processing/main.nf +++ b/subworkflows/local/bam_variant_calling_pre_post_processing/main.nf @@ -1,9 +1,11 @@ // // Core workflow of the RNA/DNA variant calling pipeline // -include { BAM_GATK_PREPROCESSING } from '../bam_gatk_preprocessing/main' +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run +include { BAM_GATK_PREPROCESSING } from '../bam_gatk_preprocessing/main' // For now only matched supported -// include { BAM_VARIANT_CALLING } from '../variant_calling/main' + include { BAM_VARIANT_CALLING } from '../bam_variant_calling/main' // // Can we just call normalization here? // include { VCF_NORMALIZE } from '../normalize_vcf_variants/main' // // Can we just call the consensus module here? @@ -38,30 +40,54 @@ workflow BAM_VARIANT_CALLING_PRE_POST_PROCESSING { vcfs_status_dna // to repeat rescue consensus main: - ch_reports = Channel.empty() - ch_versions = Channel.empty() - ch_genome_bam.dump(tag:"ch_genome_bam") - // GATK PREPROCESSING - See: https://gatk.broadinstitute.org/hc/en-us/articles/360035535912-Data-pre-processing-for-variant-discovery - BAM_GATK_PREPROCESSING( - step, // Mandatory, step to start with - should be mapping for second pass - tools, - ch_genome_bam, // channel: [mandatory] [meta, [bam]] - skip_tools, // channel: [mandatory] skip_tools - params.save_output_as_bam, // channel: [mandatory] save_output_as_bam - fasta, // channel: [mandatory] fasta - fasta_fai , // channel: [mandatory] fasta_fai - dict, - germline_resource, // channel: [optional] germline_resource - germline_resource_tbi, // channel: [optional] germline_resource_tbi - intervals, // channel: [mandatory] intervals/target regions - intervals_for_preprocessing, // channel: [mandatory] intervals_for_preprocessing/wes - ch_interval_list_split, - ch_input_sample - ) + reports = Channel.empty() + versions = Channel.empty() - ch_cram_variant_calling = GATK_PREPROCESSING.out.ch_cram_variant_calling - ch_versions = ch_versions.mix(GATK_PREPROCESSING.out.versions) - ch_reports = ch_reports.mix(GATK_PREPROCESSING.out.ch_reports) + // GATK PREPROCESSING - See: https://gatk.broadinstitute.org/hc/en-us/articles/360035535912-Data-pre-processing-for-variant-discovery + BAM_GATK_PREPROCESSING( + input_sample, + bam_mapped, // channel: [mandatory] [meta, [bam]] + cram_mapped, // channel: [mandatory] [meta, [cram]] + fasta, // channel: [mandatory] fasta + fasta_fai , // channel: [mandatory] fasta_fai + dict, // channel: [mandatory] dict + known_sites_indels, // channel: [optional] known_sites + known_sites_indels_tbi, // channel: [optional] known_sites + germline_resource, // channel: [optional] germline_resource + germline_resource_tbi, // channel: [optional] germline_resource_tbi + intervals, // channel: [mandatory] intervals/target regions + intervals_for_preprocessing, // channel: [mandatory] intervals_for_preprocessing/wes + intervals_and_num_intervals // channel: [mandatory] intervals_for_preprocessing/wes + ) + + cram_variant_calling = BAM_GATK_PREPROCESSING.out.cram_variant_calling + versions = versions.mix(BAM_GATK_PREPROCESSING.out.versions) + reports = reports.mix(BAM_GATK_PREPROCESSING.out.reports) + + // VARIANT CALLING + BAM_VARIANT_CALLING( + params.tools, + cram_variant_calling, + fasta, + fasta_fai, + dict, + germline_resource, + germline_resource_tbi, + intervals, + intervals_bed_gz_tbi, + intervals_bed_combined, + intervals_bed_gz_tbi_combined, + pon, + pon_tbi, + input_sample + ) + cram_variant_calling_pair = BAM_VARIANT_CALLING.out.cram_variant_calling_pair // use same crams for force calling later + vcf_to_normalize = BAM_VARIANT_CALLING.out.vcf_to_normalize + contamination = BAM_VARIANT_CALLING.out.contamination_table + segmentation = BAM_VARIANT_CALLING.out.segmentation_table + orientation = BAM_VARIANT_CALLING.out.artifact_priors + versions = versions.mix(BAM_VARIANT_CALLING.out.versions) + reports = reports.mix(BAM_VARIANT_CALLING.out.reports) ch_cram_variant_calling.dump(tag:"[STEP8 RNA_FILTERING] ch_cram_variant_calling") intervals_bed_gz_tbi.dump(tag:"[STEP8 RNA_FILTERING] intervals_bed_gz_tbi") @@ -149,12 +175,12 @@ workflow BAM_VARIANT_CALLING_PRE_POST_PROCESSING { // // } // -// emit: + emit: // vcf_consensus_dna = CONSENSUS.out.vcf_consensus_dna // vcfs_status_dna = CONSENSUS.out.vcfs_status_dna // maf = filtered_maf // maf_rna = filtered_maf_rna // maf_dna = filtered_maf_dna -// versions = ch_versions // channel: [ versions.yml ] -// reports = ch_reports + versions = versions // channel: [ versions.yml ] + reports = reports } From 4fcb3b8fc969d4837a1dd463a9ab417531032f0f Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Tue, 29 Aug 2023 11:55:44 +0100 Subject: [PATCH 23/56] Added bam_variant_calling_somatic from bam_variant_calling_somatic_all from sarek. Removed '_all' because I just use a selection. --- .../local/bam_variant_calling_somatic/main.nf | 155 ++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 subworkflows/local/bam_variant_calling_somatic/main.nf diff --git a/subworkflows/local/bam_variant_calling_somatic/main.nf b/subworkflows/local/bam_variant_calling_somatic/main.nf new file mode 100644 index 0000000..dca4d9f --- /dev/null +++ b/subworkflows/local/bam_variant_calling_somatic/main.nf @@ -0,0 +1,155 @@ +// +// PAIRED VARIANT CALLING +// + +include { BAM_VARIANT_CALLING_FREEBAYES } from '../bam_variant_calling_freebayes/main' +include { BAM_VARIANT_CALLING_SOMATIC_MANTA } from '../bam_variant_calling_somatic_manta/main' +include { BAM_VARIANT_CALLING_SOMATIC_MUTECT2 } from '../bam_variant_calling_somatic_mutect2/main' +include { BAM_VARIANT_CALLING_SOMATIC_STRELKA } from '../bam_variant_calling_somatic_strelka/main' +include { BAM_VARIANT_CALLING_SOMATIC_SAGE } from '../bam_variant_calling_somatic_sage/main' + +workflow BAM_VARIANT_CALLING_SOMATIC { + take: + tools // Mandatory, list of tools to apply + cram // channel: [mandatory] cram + fasta // channel: [mandatory] fasta + fasta_fai // channel: [mandatory] fasta_fai + dict // channel: [mandatory] dict + germline_resource // channel: [optional] germline_resource + germline_resource_tbi // channel: [optional] germline_resource_tbi + intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals + intervals_bed_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed + intervals_bed_combined // channel: [mandatory] intervals/target regions in one file unzipped + intervals_bed_gz_tbi_combined // [] if no_intervals, else interval_bed_combined_gz, interval_bed_combined_gz_tbi + panel_of_normals // channel: [optional] panel_of_normals + panel_of_normals_tbi // channel: [optional] panel_of_normals_tbi + joint_mutect2 // boolean: [mandatory] [default: false] run mutect2 in joint mode + + main: + versions = Channel.empty() + + //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config + vcf_freebayes = Channel.empty() + vcf_manta = Channel.empty() + vcf_strelka = Channel.empty() + vcf_mutect2 = Channel.empty() + vcf_sage = Channel.empty() + + // TODO: unify fasta/fasta_fai/dict structure + // FREEBAYES + if (tools.split(',').contains('freebayes')) { + BAM_VARIANT_CALLING_FREEBAYES( + cram, + dict, + fasta, + fasta_fai, + intervals + ) + + vcf_freebayes = BAM_VARIANT_CALLING_FREEBAYES.out.vcf + versions = versions.mix(BAM_VARIANT_CALLING_FREEBAYES.out.versions) + } + + // SAGE + if (tools.split(',').contains('sage')) { + + BAM_VARIANT_CALLING_SOMATIC_SAGE( + cram, + // Remap channel to match module/subworkflow + dict.map{ it -> [ [ id:'dict' ], it ] }, + // Remap channel to match module/subworkflow + fasta.map{ it -> [ [ id:'fasta' ], it ] }, + // Remap channel to match module/subworkflow + fasta_fai.map{ it -> [ [ id:'fasta_fai' ], it ] }, + intervals + ) + + vcf_sage = BAM_VARIANT_CALLING_SOMATIC_SAGE.out.vcf + versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_SAGE.out.versions) + } + + // MANTA + if (tools.split(',').contains('manta')) { + BAM_VARIANT_CALLING_SOMATIC_MANTA( + cram, + fasta, + fasta_fai, + intervals_bed_gz_tbi_combined + ) + + vcf_manta = BAM_VARIANT_CALLING_SOMATIC_MANTA.out.vcf + versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_MANTA.out.versions) + } + + // STRELKA + if (tools.split(',').contains('strelka')) { + // Remap channel to match module/subworkflow + cram_strelka = (tools.split(',').contains('manta')) ? + cram.join(BAM_VARIANT_CALLING_SOMATIC_MANTA.out.candidate_small_indels_vcf, failOnDuplicate: true, failOnMismatch: true).join(BAM_VARIANT_CALLING_SOMATIC_MANTA.out.candidate_small_indels_vcf_tbi, failOnDuplicate: true, failOnMismatch: true) : + cram.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> [ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, [], [] ] } + + BAM_VARIANT_CALLING_SOMATIC_STRELKA( + cram_strelka, + // Remap channel to match module/subworkflow + dict, + fasta, + fasta_fai, + intervals_bed_gz_tbi + ) + + vcf_strelka = Channel.empty().mix(BAM_VARIANT_CALLING_SOMATIC_STRELKA.out.vcf) + versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_STRELKA.out.versions) + } + + // MUTECT2 + if (tools.split(',').contains('mutect2')) { + BAM_VARIANT_CALLING_SOMATIC_MUTECT2( + // Remap channel to match module/subworkflow + cram.map { meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> [ meta, [ normal_cram, tumor_cram ], [ normal_crai, tumor_crai ] ] }, + // Remap channel to match module/subworkflow + fasta, + // Remap channel to match module/subworkflow + fasta_fai, + dict, + germline_resource, + germline_resource_tbi, + panel_of_normals, + panel_of_normals_tbi, + intervals, + joint_mutect2 + ) + + vcf_mutect2 = BAM_VARIANT_CALLING_SOMATIC_MUTECT2.out.vcf_filtered + contamination_table_mutect2 = BAM_VARIANT_CALLING_SOMATIC_MUTECT2.out.contamination_table + segmentation_table_mutect2 = BAM_VARIANT_CALLING_SOMATIC_MUTECT2.out.segmentation_table + artifact_priors_mutect2 = BAM_VARIANT_CALLING_SOMATIC_MUTECT2.out.artifact_priors + versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_MUTECT2.out.versions) + } else { + + contamination_table_mutect2 = Channel.empty() + segmentation_table_mutect2 = Channel.empty() + artifact_priors_mutect2 = Channel.empty() + + + } + + vcf_all = Channel.empty().mix( + vcf_freebayes, + vcf_manta, + vcf_mutect2, + vcf_strelka, + vcf_sage + ) + + emit: + vcf_all + vcf_freebayes + vcf_manta + vcf_mutect2 + vcf_strelka + vcf_sage + contamination_table_mutect2 = contamination_table_mutect2 + segmentation_table_mutect2 = segmentation_table_mutect2 + artifact_priors_mutect2 = artifact_priors_mutect2 + versions +} \ No newline at end of file From 6a879f9d0252c39dfc53fb8da7b247414369d018 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Tue, 29 Aug 2023 11:55:58 +0100 Subject: [PATCH 24/56] Added SAGE subworkflow --- .../bam_variant_calling_somatic_sage/main.nf | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 subworkflows/local/bam_variant_calling_somatic_sage/main.nf diff --git a/subworkflows/local/bam_variant_calling_somatic_sage/main.nf b/subworkflows/local/bam_variant_calling_somatic_sage/main.nf new file mode 100644 index 0000000..1a7bdf8 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_somatic_sage/main.nf @@ -0,0 +1,60 @@ +// +// SAGE variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { BCFTOOLS_SORT } from '../../../modules/nf-core/bcftools/sort/main' +include { SAGE } from '../../../modules/local/sage/main' +include { GATK4_MERGEVCFS as MERGE_SAGE } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { TABIX_TABIX as TABIX_VC_SAGE } from '../../../modules/nf-core/tabix/tabix/main' + +workflow BAM_VARIANT_CALLING_SOMATIC_SAGE { + take: + cram // channel: [mandatory] [ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi ] manta* are optional + dict // channel: [mandatory] [ meta, dict ] + fasta // channel: [mandatory] [ fasta ] + fasta_fai // channel: [mandatory] [ fasta_fai ] + intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals + + main: + versions = Channel.empty() + + // Combine cram and intervals for spread and gather strategy + cram_intervals = cram.combine(intervals) + // Move num_intervals to meta map and reorganize channel for SAGE module + .map{ meta, cram1, crai1, cram2, crai2, intervals, num_intervals -> [ meta + [ num_intervals:num_intervals ], cram1, crai1, cram2, crai2, intervals ]} + + SAGE(cram_intervals, fasta, fasta_fai) + + BCFTOOLS_SORT(SAGE.out.vcf) + + // Figuring out if there is one or more vcf(s) from the same sample + bcftools_vcf_out = BCFTOOLS_SORT.out.vcf.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Only when using intervals + vcf_to_merge = bcftools_vcf_out.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() + MERGE_SAGE(vcf_to_merge, dict) + + // Only when no_intervals + TABIX_VC_SAGE(bcftools_vcf_out.no_intervals) + + // Mix intervals and no_intervals channels together + vcf = MERGE_SAGE.out.vcf.mix(bcftools_vcf_out.no_intervals) + // add variantcaller to meta map and remove no longer necessary field: num_intervals + .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'freebayes' ], vcf ] } + + versions = versions.mix(BCFTOOLS_SORT.out.versions) + versions = versions.mix(MERGE_SAGE.out.versions) + versions = versions.mix(SAGE.out.versions) + versions = versions.mix(TABIX_VC_SAGE.out.versions) + + emit: + vcf + + versions +} \ No newline at end of file From 948f0457152279cdfe5c90faafcf852aaf0acccf Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Tue, 29 Aug 2023 11:56:35 +0100 Subject: [PATCH 25/56] Added interval variables that was missing. --- .../prepare_reference_and_intervals/main.nf | 64 ++++++++++--------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/subworkflows/local/prepare_reference_and_intervals/main.nf b/subworkflows/local/prepare_reference_and_intervals/main.nf index 2242621..90aea5a 100644 --- a/subworkflows/local/prepare_reference_and_intervals/main.nf +++ b/subworkflows/local/prepare_reference_and_intervals/main.nf @@ -64,10 +64,11 @@ workflow PREPARE_REFERENCE_AND_INTERVALS { intervals_for_preprocessing = params.wes ? intervals_bed_combined.map{it -> [ [ id:it.baseName ], it ]}.collect() : Channel.value([ [ id:'null' ], [] ]) - intervals = PREPARE_INTERVALS.out.intervals_bed // [interval, num_intervals] multiple interval.bed files, divided by useful intervals for scatter/gather - intervals_bed_gz_tbi = PREPARE_INTERVALS.out.intervals_bed_gz_tbi // [interval_bed, tbi, num_intervals] multiple interval.bed.gz/.tbi files, divided by useful intervals for scatter/gather + intervals = PREPARE_INTERVALS.out.intervals_bed // [interval, num_intervals] multiple interval.bed files, divided by useful intervals for scatter/gather + intervals_bed_gz_tbi = PREPARE_INTERVALS.out.intervals_bed_gz_tbi // [interval_bed, tbi, num_intervals] multiple interval.bed.gz/.tbi files, divided by useful intervals for scatter/gather + intervals_bed_gz_tbi_combined = PREPARE_INTERVALS.out.intervals_bed_gz_tbi_combined // [ intervals.bed.gz, intervals.bed.gz.tbi] - intervals_and_num_intervals = intervals.map{ interval, num_intervals -> + intervals_and_num_intervals = intervals.map{ interval, num_intervals -> if ( num_intervals < 1 ) [ [], num_intervals ] else [ interval, num_intervals ] } @@ -89,32 +90,33 @@ workflow PREPARE_REFERENCE_AND_INTERVALS { versions = versions.mix(GATK4_BEDTOINTERVALLIST.out.versions) emit: - fasta = fasta - fasta_fai = fasta_fai - dict = dict - bwa = bwa - germline_resource = germline_resource - germline_resource_tbi = germline_resource_tbi - bwamem2 = bwamem2 - dragmap = dragmap - star_index = PREPARE_GENOME.out.star_index - gtf = PREPARE_GENOME.out.gtf - ch_interval_list = ch_interval_list - intervals = intervals - intervals_bed_gz_tbi = intervals_bed_gz_tbi - intervals_for_preprocessing = intervals_for_preprocessing - intervals_bed_combined = intervals_bed_combined - dbsnp = dbsnp - dbsnp_tbi = dbsnp_tbi - pon = pon - pon_tbi = pon_tbi - germline_resource = germline_resource - germline_resource_tbi = germline_resource_tbi - hisat2_index = hisat2_index - splicesites = splicesites - known_sites_indels = known_sites_indels - known_sites_indels_tbi = known_sites_indels_tbi - known_sites_snps = known_sites_snps - known_sites_snps_tbi = known_sites_snps_tbi - versions = versions // channel: [ versions.yml ] + fasta = fasta + fasta_fai = fasta_fai + dict = dict + bwa = bwa + germline_resource = germline_resource + germline_resource_tbi = germline_resource_tbi + bwamem2 = bwamem2 + dragmap = dragmap + star_index = PREPARE_GENOME.out.star_index + gtf = PREPARE_GENOME.out.gtf + ch_interval_list = ch_interval_list + intervals = intervals + intervals_bed_gz_tbi = intervals_bed_gz_tbi + intervals_for_preprocessing = intervals_for_preprocessing + intervals_bed_combined = intervals_bed_combined + intervals_bed_gz_tbi_combined = intervals_bed_gz_tbi_combined + dbsnp = dbsnp + dbsnp_tbi = dbsnp_tbi + pon = pon + pon_tbi = pon_tbi + germline_resource = germline_resource + germline_resource_tbi = germline_resource_tbi + hisat2_index = hisat2_index + splicesites = splicesites + known_sites_indels = known_sites_indels + known_sites_indels_tbi = known_sites_indels_tbi + known_sites_snps = known_sites_snps + known_sites_snps_tbi = known_sites_snps_tbi + versions = versions // channel: [ versions.yml ] } \ No newline at end of file From c09051a1d71da5991fad9653fd20c820d7064b8f Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Tue, 29 Aug 2023 11:57:04 +0100 Subject: [PATCH 26/56] Small meta updates --- modules/nf-core/vcftools/meta.yml | 4 ++-- .../bam_tumor_normal_somatic_variant_calling_gatk/meta.yml | 2 +- .../gatk4/tumor_normal_somatic_variant_calling/meta.yml | 4 ++-- .../nf-core/gatk4/tumor_only_somatic_variant_calling/meta.yml | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/nf-core/vcftools/meta.yml b/modules/nf-core/vcftools/meta.yml index 04b786f..a634e55 100644 --- a/modules/nf-core/vcftools/meta.yml +++ b/modules/nf-core/vcftools/meta.yml @@ -214,11 +214,11 @@ output: pattern: "*.012" - genotypes_matrix_individual: type: file - description: Details the individuals included in the main genotypes_matrix file (optional) + description: Details the individuals included in the main.nf genotypes_matrix file (optional) pattern: "*.012.indv" - genotypes_matrix_position: type: file - description: Details the site locations included in the main genotypes_matrix file (optional) + description: Details the site locations included in the main.nf genotypes_matrix file (optional) pattern: "*.012.pos" - impute_hap: type: file diff --git a/subworkflows/nf-core/bam_tumor_normal_somatic_variant_calling_gatk/meta.yml b/subworkflows/nf-core/bam_tumor_normal_somatic_variant_calling_gatk/meta.yml index 1f08e23..50b7d4d 100644 --- a/subworkflows/nf-core/bam_tumor_normal_somatic_variant_calling_gatk/meta.yml +++ b/subworkflows/nf-core/bam_tumor_normal_somatic_variant_calling_gatk/meta.yml @@ -12,7 +12,7 @@ keywords: - getpileupsummaries - calculatecontamination - filtermutectcalls - - variant_calling + - bam_variant_calling - tumor_only - filtered_vcf components: diff --git a/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/meta.yml b/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/meta.yml index d5abdca..9d3a273 100644 --- a/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/meta.yml +++ b/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/meta.yml @@ -11,7 +11,7 @@ keywords: - getpileupsummaries - calculatecontamination - filtermutectcalls - - variant_calling + - bam_variant_calling - tumor_only - filtered_vcf modules: @@ -77,7 +77,7 @@ output: pattern: "versions.yml" - mutect2_vcf: type: file - description: Compressed vcf file to be used for variant_calling. + description: Compressed vcf file to be used for bam_variant_calling. pattern: "[ *.vcf.gz ]" - mutect2_tbi: type: file diff --git a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/meta.yml b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/meta.yml index 4c41f1f..bed635b 100644 --- a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/meta.yml +++ b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/meta.yml @@ -9,7 +9,7 @@ keywords: - getpileupsummaries - calculatecontamination - filtermutectcalls - - variant_calling + - bam_variant_calling - tumor_only - filtered_vcf modules: @@ -70,7 +70,7 @@ output: pattern: "versions.yml" - mutect2_vcf: type: file - description: Compressed vcf file to be used for variant_calling. + description: Compressed vcf file to be used for bam_variant_calling. pattern: "[ *.vcf.gz ]" - mutect2_tbi: type: file From c1bdc0a648eb6eba1f3ca5d38b2d539caa37cd39 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Wed, 30 Aug 2023 15:13:35 +0100 Subject: [PATCH 27/56] Adding dict to sage, needed to run --- modules/local/sage/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/local/sage/main.nf b/modules/local/sage/main.nf index 1d5f3b9..477784e 100644 --- a/modules/local/sage/main.nf +++ b/modules/local/sage/main.nf @@ -11,6 +11,7 @@ process SAGE { tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor), path(intervals) tuple val(meta2), path(fasta) tuple val(meta3), path(fai) + tuple val(meta4), path(dict) output: tuple val(meta), path("*.vcf"), emit: vcf From 375cb4ea7ef586e661dd426a25bf253095554e45 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Wed, 30 Aug 2023 15:23:51 +0100 Subject: [PATCH 28/56] Added normalise workflow, some final fixes in variant calling subworkflows. --- conf/modules/annotate/annotate.config | 6 +- conf/modules/consensus/normalise.config | 58 ----- .../gatk4_preprocessing/recalibrate.config | 2 +- .../splitncigarreads.config | 69 +++--- conf/modules/normalise/vt.config | 54 +++++ conf/modules/variant_calling/manta.config | 21 +- conf/modules/variant_calling/mutect2.config | 169 +++++++------- conf/modules/variant_calling/sage.config | 67 ++++++ conf/modules/variant_calling/strelka.config | 75 ++++--- .../variant_calling/variant_calling.config | 30 +++ modules/local/vt/decompose/main.nf | 37 ++-- .../local/vt/{normalize => normalise}/main.nf | 9 +- .../vt/{normalize => normalise}/meta.yml | 12 +- nextflow.config | 18 +- nextflow_schema.json | 144 ++++-------- .../local/bam_variant_calling/main.nf | 15 +- .../bam_variant_calling_somatic_sage/main.nf | 2 +- .../main.nf | 9 +- .../main.nf | 3 +- subworkflows/local/normalize_vcf_variants.nf | 50 ----- subworkflows/local/pair_variant_calling.nf | 206 ------------------ .../local/prepare_recalibration_csv.nf | 41 ---- subworkflows/local/variantcalling_csv.nf | 18 -- subworkflows/local/vcf_normalise/main.nf | 50 +++++ 24 files changed, 476 insertions(+), 689 deletions(-) delete mode 100644 conf/modules/consensus/normalise.config create mode 100644 conf/modules/normalise/vt.config create mode 100644 conf/modules/variant_calling/sage.config create mode 100644 conf/modules/variant_calling/variant_calling.config rename modules/local/vt/{normalize => normalise}/main.nf (83%) rename modules/local/vt/{normalize => normalise}/meta.yml (81%) delete mode 100644 subworkflows/local/normalize_vcf_variants.nf delete mode 100644 subworkflows/local/pair_variant_calling.nf delete mode 100644 subworkflows/local/prepare_recalibration_csv.nf delete mode 100644 subworkflows/local/variantcalling_csv.nf create mode 100644 subworkflows/local/vcf_normalise/main.nf diff --git a/conf/modules/annotate/annotate.config b/conf/modules/annotate/annotate.config index 83a22a3..b6e4992 100644 --- a/conf/modules/annotate/annotate.config +++ b/conf/modules/annotate/annotate.config @@ -15,7 +15,7 @@ process { // annotate - // VEP TODO: is vep_custom_args working?? + // VEP if (params.tools && params.tools.split(',').contains('vep')) { withName: 'ENSEMBLVEP_VEP' { ext.args = { [ @@ -23,9 +23,9 @@ process { // annotate (params.vep_dbnsfp && params.dbnsfp && params.dbnsfp_consequence) ? "--plugin dbNSFP,'consequence=${params.dbnsfp_consequence}',${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', (params.vep_loftee) ? "--plugin LoF,loftee_path:/opt/conda/envs/nf-core-vep-${params.vep_version}/share/ensembl-vep-${params.vep_version}-0" : '', (params.vep_spliceai && params.spliceai_snv && params.spliceai_indel) ? "--plugin SpliceAI,snv=${params.spliceai_snv.split("/")[-1]},indel=${params.spliceai_indel.split("/")[-1]}" : '', - (params.vep_spliceregion) ? '--plugin SpliceRegion' : '', + (params.vep_spliceregion) ? "--plugin SpliceRegion" : '', (params.vep_out_format) ? "--${params.vep_out_format}" : '--vcf', - (params.vep_custom_args) ?: '' + (params.vep_custom_args) ?: "" ].join(' ').trim() } // If just VEP: _VEP.ann.vcf ext.prefix = { vcf.baseName - ".vcf" + "_VEP.ann" } diff --git a/conf/modules/consensus/normalise.config b/conf/modules/consensus/normalise.config deleted file mode 100644 index 85ba365..0000000 --- a/conf/modules/consensus/normalise.config +++ /dev/null @@ -1,58 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Config file for defining DSL2 per module options and publishing paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Available keys to override module options: - ext.args = Additional arguments appended to command in module. - ext.args2 = Second set of arguments appended to command in module (multi-tool modules). - ext.args3 = Third set of arguments appended to command in module (multi-tool modules). - ext.prefix = File name prefix for output files. - ext.when = When to run the module. ----------------------------------------------------------------------------------------- -*/ - -// NORMALISE - -process { // normalise - - if (params.skip_toos && params.skip_toos.split(',').contains('normalise')) { - // VT - // TODO: stats are not going to the report dir - no idea why - withName: 'VT_DECOMPOSE'{ - ext.args = "" - ext.prefix = { "${vcf.baseName.minus(".vcf")}.dec" } - publishDir = [ - [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reports/vt/" }, - pattern: {'*dec.stats'}, - saveAs: {"${meta.variantcaller}/${meta.patient}/${meta.id}/${it}"}, - enabled: true - ], - [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/" }, - pattern: {"*{vcf.gz,vcf.gz.tbi}"}, - saveAs: {"${meta.variantcaller}/${meta.patient}/${meta.id}/${it}"}, - enabled: false // store normalised results only - ] - ] - } - - withName: 'VT_NORMALIZE'{ - ext.args = {"-n"} - ext.prefix = { "${vcf.baseName.minus(".dec.vcf")}.norm" } - publishDir = [[ - path: { "${params.outdir}/variant_calling/" }, - pattern: "*{vcf.gz,vcf.gz.tbi,norm.stats}", - saveAs: { "${meta.variantcaller}/${meta.patient}/${meta.id}/${it}" }, - enabled: true // just store normalised results - ], - [mode: params.publish_dir_mode, - path: { "${params.outdir}/reports/vt/${meta.variantcaller}/${meta.patient}/${meta.id}/" }, - pattern: "*stats" - ]] - } - - } -} diff --git a/conf/modules/gatk4_preprocessing/recalibrate.config b/conf/modules/gatk4_preprocessing/recalibrate.config index 1a1ce1a..71b7ed2 100644 --- a/conf/modules/gatk4_preprocessing/recalibrate.config +++ b/conf/modules/gatk4_preprocessing/recalibrate.config @@ -38,7 +38,7 @@ process { // recalibrate ] } - withName: '.*:BAM_APPLYBQSR::CRAM_MERGE_INDEX_SAMTOOLS:INDEX_CRAM' { + withName: '.*:BAM_APPLYBQSR:CRAM_MERGE_INDEX_SAMTOOLS:INDEX_CRAM' { publishDir = [ enabled: !params.save_output_as_bam, mode: params.publish_dir_mode, diff --git a/conf/modules/gatk4_preprocessing/splitncigarreads.config b/conf/modules/gatk4_preprocessing/splitncigarreads.config index 58f8c35..ad5e7bd 100644 --- a/conf/modules/gatk4_preprocessing/splitncigarreads.config +++ b/conf/modules/gatk4_preprocessing/splitncigarreads.config @@ -15,54 +15,63 @@ process { //splitncigar // TODO: check SECOND_RUN and suffixes - withName: '.*:SPLITNCIGAR:GATK4_SPLITNCIGARREADS' { - ext.args = ['-rf ReassignOneMappingQuality', - '-RMQF 255 ', - '-RMQT 60', - '-U ALLOW_N_CIGAR_READS'].join(' ').trim() - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/splitncigar/${meta.patient}/${meta.id}/" }, - pattern: "*{bam,bai}", - enabled: params.save_align_intermeds // will be saved as CRAM - ] + withName: 'GATK4_SPLITNCIGARREADS' { + ext.args = '--create-output-bam-index false' + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.sncr.cram" : "${meta.id}_${intervals.simpleName}.sncr.cram" } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('splitncigar')) } + publishDir = [ + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/splitncigarreads/${meta.id}/" }, + pattern: "*{cram,crai}" + ] } withName: '.*:SECOND_RUN:GATK_PREPROCESSING:SPLITNCIGAR:GATK4_SPLITNCIGARREADS' { - ext.prefix = {"${meta.id}.sncr"} - ext.args = ['-rf ReassignOneMappingQuality', - '-RMQF 255 ', - '-RMQT 60', - '-U ALLOW_N_CIGAR_READS'].join(' ').trim() + ext.args = '--create-output-bam-index false' + ext.prefix = { "${meta.id}.sncr.cram" } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('splitncigar')) } publishDir = [ mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/splitncigar/${meta.patient}/${meta.id}/" }, + path: { "${params.outdir}/preprocessing/splitncigarreads/${meta.id}/" }, pattern: "*{bam,bai}", enabled: params.save_align_intermeds // will be saved as CRAM ] } - withName: ".*:PREPARE_SECOND_RUN:MERGE_ALIGN:INDEX_MERGE_BAM" { - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/" }, - pattern: "*.{bai,csi}", - saveAs: { params.save_bam_mapped ? "second_run/${meta.patient}/${meta.id}/${it}" : null }, - enabled: params.save_align_intermeds - ] + withName: ".*:PREPARE_SECOND_RUN:MERGE_ALIGN:INDEX_MERGE_BAM" { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/" }, + pattern: "*.{bai,csi}", + saveAs: { params.save_bam_mapped ? "second_run/${meta.patient}/${meta.id}/${it}" : null }, + enabled: params.save_align_intermeds + ] - } + } - withName: '.*:SPLITNCIGAR:SAMTOOLS_INDEX' { + withName: '.*:BAM_SPLITNCIGARREADS:CRAM_MERGE_INDEX_SAMTOOLS:INDEX_CRAM' { ext.args = params.bam_csi_index ? '-c' : '' publishDir = [ - path: { "${params.outdir}/preprocessing/splitncigar/${meta.patient}/${meta.id}/" }, + path: { "${params.outdir}/preprocessing/splitncigarreads/${meta.id}/" }, mode: params.publish_dir_mode, - pattern: "*.{bai,csi}", - enabled: params.save_align_intermeds + pattern: "*{recal.cram,recal.cram.crai}", + enabled: !params.save_output_as_bam, ] } + withName: '.*:BAM_SPLITNCIGARREADS:CRAM_MERGE_INDEX_SAMTOOLS:MERGE_CRAM' { + ext.prefix = { "${meta.id}.sncr" } + ext.when = { meta.num_intervals > 1 } + publishDir = [ + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/splitncigarreads/${meta.id}/" }, + pattern: "*cram" + ] + } + + } diff --git a/conf/modules/normalise/vt.config b/conf/modules/normalise/vt.config new file mode 100644 index 0000000..b0cf3f5 --- /dev/null +++ b/conf/modules/normalise/vt.config @@ -0,0 +1,54 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// VT + +process { // vt + + withName: 'VT_DECOMPOSE'{ + ext.args = "" + ext.prefix = { vcf.baseName - ".vcf.gz" + ".dec" } + publishDir = [ + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/vt/${meta.id}" }, + pattern: "*.dec.stats" + ], + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/${meta.variantcaller}/${meta.id}" }, + pattern: {"*{vcf.gz,vcf.gz.tbi}"} + enabled: false + ] + ] + } + + withName: 'VT_NORMALISE'{ + ext.args = {"-n"} + ext.prefix = { vcf.baseName - ".vcf.gz" + ".norm" } + publishDir = [ + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/${meta.variantcaller}/${meta.id}" }, + pattern: "*{vcf.gz,vcf.gz.tbi}", + ], + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/vt/${meta.id}" }, + pattern: {"*.stats"}, + enabled: true + ] + ] + } + +} \ No newline at end of file diff --git a/conf/modules/variant_calling/manta.config b/conf/modules/variant_calling/manta.config index 1770beb..5e96235 100644 --- a/conf/modules/variant_calling/manta.config +++ b/conf/modules/variant_calling/manta.config @@ -14,15 +14,14 @@ // MANTA process { // manta - if (params.tools && params.tools.split(',').contains('manta')) { - withName: 'MANTA_SOMATIC' { - ext.args = {params.wes || meta.status >= 2 ? "--exome" : "" } - ext.prefix = { "${meta.id}.manta" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/manta/${meta.id}" }, - pattern: "*{diploid_sv,tumor_sv,somatic_sv}.{vcf.gz,vcf.gz.tbi}" - ] - } + withName: 'MANTA_SOMATIC' { + ext.args = {params.wes || meta.status >= 2 ? "--exome" : "" } + ext.prefix = { "${meta.id}.manta" } + ext.when = { params.tools && params.tools.split(',').contains('manta') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/manta/${meta.id}" }, + pattern: "*{diploid_sv,tumor_sv,somatic_sv}.{vcf.gz,vcf.gz.tbi}" + ] } -} \ No newline at end of file +} diff --git a/conf/modules/variant_calling/mutect2.config b/conf/modules/variant_calling/mutect2.config index 2226407..3aa45c9 100644 --- a/conf/modules/variant_calling/mutect2.config +++ b/conf/modules/variant_calling/mutect2.config @@ -14,7 +14,6 @@ // MUTECT2 process { // mutect2 - if (params.tools && params.tools.split(',').contains('mutect2')) { // withName: 'GATK4_MUTECT2' { // ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.simpleName}" } @@ -28,101 +27,101 @@ process { // mutect2 // ] // } - // PAIR_VARIANT_CALLING - withName: 'MUTECT2_PAIRED' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.simpleName}" } - ext.when = { params.tools && params.tools.split(',').contains('mutect2') } - ext.args = { params.ignore_soft_clipped_bases ? - "--dont-use-soft-clipped-bases true --f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --smith-waterman FASTEST_AVAILABLE --normal-sample ${meta.normal_id} --callable-depth 1 " : - "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --smith-waterman FASTEST_AVAILABLE --normal-sample ${meta.patient}_${meta.normal_id} --callable-depth 1 " } + // PAIR_VARIANT_CALLING + withName: 'MUTECT2_PAIRED' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.simpleName}" } + ext.when = { params.tools && params.tools.split(',').contains('mutect2') } + ext.args = { params.ignore_soft_clipped_bases ? + "--dont-use-soft-clipped-bases true --f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --smith-waterman FASTEST_AVAILABLE --normal-sample ${meta.patient}_${meta.normal_id} --callable-depth 1 " : + "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --smith-waterman FASTEST_AVAILABLE --normal-sample ${meta.patient}_${meta.normal_id} --callable-depth 1 " } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*{vcf.gz,vcf.gz.tbi,stats}", + saveAs: { meta.num_intervals > 1 ? null : "mutect2/${meta.id}/${it}" } + ] + } + + withName: 'MERGE_MUTECT2.*' { + ext.prefix = { "${meta.id}.mutect2" } publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/" }, - pattern: "*{vcf.gz,vcf.gz.tbi,stats}", - saveAs: { meta.num_intervals > 1 ? null : "mutect2/${meta.id}/${it}" } - ] - } + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + // TODO: FILTERMUTECTCALLS is a patch for second run [!!] + withName: 'FILTERMUTECTCALLS.*' { + ext.prefix = {"${meta.id}.mutect2.filtered"} + ext.args = { [meta.status >= 2 ? '--max-events-in-region 5': '', + meta.cont && !(meta.cont.endswith("NO_TABLE")) ? '--contamination-table ${meta.cont}' :'', + meta.seg && !(meta.seg.endswith("NO_SEG")) ? '--tumor-segmentation ${meta.seg}':'', + meta.orient && !(meta.orient.endswith("NO_ARTPRIOR"))? '--orientation-bias-artifact-priors ${meta.orient}':'' + ].join(' ').trim() + } - withName: 'MERGE_MUTECT2.*' { - ext.prefix = { "${meta.id}.mutect2" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/mutect2/${meta.id}" }, - pattern: "*{vcf.gz,vcf.gz.tbi}" - ] - } - // TODO: FILTERMUTECTCALLS is a patch for second run [!!] - withName: 'FILTERMUTECTCALLS.*' { - ext.prefix = {"${meta.id}.mutect2.filtered"} - ext.args = { [meta.status >= 2 ? '--max-events-in-region 5': '', - meta.cont && !(meta.cont.endswith("NO_TABLE")) ? '--contamination-table ${meta.cont}' :'', - meta.seg && !(meta.seg.endswith("NO_SEG")) ? '--tumor-segmentation ${meta.seg}':'', - meta.orient && !(meta.orient.endswith("NO_ARTPRIOR"))? '--orientation-bias-artifact-priors ${meta.orient}':'' - ].join(' ').trim() - } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : "mutect2/${meta.id}/${filename}" } + ] + } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : "mutect2/${meta.id}/${filename}" } - ] - } + withName: 'CALCULATECONTAMINATION' { + ext.prefix = { "${meta.id}.mutect2" } + ext.args = { "-tumor-segmentation ${meta.id}.mutect2.segmentation.table" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: 'CALCULATECONTAMINATION' { - ext.prefix = { "${meta.id}.mutect2" } - ext.args = { "-tumor-segmentation ${meta.id}.mutect2.segmentation.table" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/mutect2/${meta.id}" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: 'LEARNREADORIENTATIONMODEL' { + ext.prefix = { "${meta.id}.mutect2.artifactprior" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: 'LEARNREADORIENTATIONMODEL' { - ext.prefix = { "${meta.id}.mutect2.artifactprior" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/mutect2/${meta.id}/" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: 'MERGEMUTECTSTATS' { + ext.prefix = { "${meta.id}.mutect2" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: 'MERGEMUTECTSTATS' { - ext.prefix = { "${meta.id}.mutect2" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/mutect2/${meta.id}/" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: 'GATHERPILEUPSUMMARIES.*' { + ext.prefix = { "${meta.id}.mutect2" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: 'GATHERPILEUPSUMMARIES.*' { - ext.prefix = { "${meta.id}.mutect2" } + withName: 'GETPILEUPSUMMARIES.*' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.simpleName}" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*.table", + saveAs: { meta.num_intervals > 1 ? null : "mutect2/${meta.id}/${it}" } + ] + } + + if (params.joint_mutect2) { + withName: 'CALCULATECONTAMINATION' { publishDir = [ mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/mutect2/${meta.id}/" }, + path: { "${params.outdir}/variant_calling/mutect2/${meta.patient}" }, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - - withName: 'GETPILEUPSUMMARIES.*' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.simpleName}" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/" }, - pattern: "*.table", - saveAs: { meta.num_intervals > 1 ? null : "mutect2/${meta.id}/${it}" } - ] - } - - if (params.joint_mutect2) { - withName: 'CALCULATECONTAMINATION' { - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/mutect2/${meta.patient}" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } } + } \ No newline at end of file diff --git a/conf/modules/variant_calling/sage.config b/conf/modules/variant_calling/sage.config new file mode 100644 index 0000000..e79cc84 --- /dev/null +++ b/conf/modules/variant_calling/sage.config @@ -0,0 +1,67 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// SAGE + +process { // sage + + withName: '.*SAGE.*' { + ext.when = { params.tools && params.tools.split(',').contains('sage') } + } + + // SAGE + withName: 'SAGE' { + ext.args = { [ + (params.sage_custom_args) ?:"", + (params.sage_highconfidence) ? "-high_confidence_bed ${params.sage_highconfidence}" : "", + (params.sage_actionablepanel) ? "-panel_bed ${params.sage_actionablepanel}": "", + (params.sage_knownhotspots) ? "-hotspots ${params.sage_knownhotspots}" : "", + (params.sage_ensembl_dir) ? "-ensembl_data_dir ${params.sage_ensembl_dir}" : "" + ].join(' ').trim() } + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.sage_out" : "${meta.id}.sage_out.${target_bed.simpleName}" } + ext.when = { params.tools && params.tools.split(',').contains('sage') } + publishDir = [ + enabled: false + ] + + } + + withName: 'MERGE_SAGE' { + ext.prefix = {"${meta.id}.sage"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/sage/${meta.patient}/${meta.id}/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + + withName : 'TABIX_VC_SAGE' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/sage/${meta.patient}/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:BAM_VARIANT_CALLING_SOMATIC_SAGE:BCFTOOLS_SORT' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.sage" : vcf.baseName - ".vcf" + ".sage.sort" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}", + saveAs: { meta.num_intervals > 1 ? null : "sage/${meta.patient}/${meta.id}/${it}" }, + enabled: true + ] + } + +} diff --git a/conf/modules/variant_calling/strelka.config b/conf/modules/variant_calling/strelka.config index 99d0480..14bfcea 100644 --- a/conf/modules/variant_calling/strelka.config +++ b/conf/modules/variant_calling/strelka.config @@ -15,42 +15,41 @@ process { // strelka - if (params.tools && params.tools.split(',').contains('strelka')) { - - withName: 'STRELKA_.*' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.strelka" : "${meta.id}.strelka.${target_bed.simpleName}" } - ext.args = {params.wes || meta.status >= 2 ? "--exome" : "" } - ext.when = { params.tools && params.tools.split(',').contains('strelka') } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/" }, - pattern: "*{vcf.gz,vcf.gz.tbi}", - saveAs: { meta.num_intervals > 1 ? null : "strelka/${meta.id}/${it}" } - ] - } - - withName: 'MERGE_STRELKA.*' { - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/strelka/${meta.id}/" }, - pattern: "*{vcf.gz,vcf.gz.tbi}" - ] - } - - withName: 'MERGE_STRELKA' { - ext.prefix = {"${meta.id}.strelka.variants"} - } - - withName: 'MERGE_STRELKA_GENOME' { - ext.prefix = {"${meta.id}.strelka.genome"} - } - - // PAIR_VARIANT_CALLING - withName: 'MERGE_STRELKA_INDELS' { - ext.prefix = {"${meta.id}.strelka.somatic_indels"} - } - withName: 'MERGE_STRELKA_SNVS' { - ext.prefix = {"${meta.id}.strelka.somatic_snvs"} - } - } + + withName: 'STRELKA_.*' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.strelka" : "${meta.id}.strelka.${target_bed.simpleName}" } + ext.args = {params.wes || meta.status >= 2 ? "--exome" : "" } + ext.when = { params.tools && params.tools.split(',').contains('strelka') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}", + saveAs: { meta.num_intervals > 1 ? null : "strelka/${meta.id}/${it}" } + ] + } + + withName: 'MERGE_STRELKA.*' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/strelka/${meta.id}/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + + withName: 'MERGE_STRELKA' { + ext.prefix = {"${meta.id}.strelka.variants"} + } + + withName: 'MERGE_STRELKA_GENOME' { + ext.prefix = {"${meta.id}.strelka.genome"} + } + + // PAIR_VARIANT_CALLING + withName: 'MERGE_STRELKA_INDELS' { + ext.prefix = {"${meta.id}.strelka.somatic_indels"} + } + withName: 'MERGE_STRELKA_SNVS' { + ext.prefix = {"${meta.id}.strelka.somatic_snvs"} + } + } \ No newline at end of file diff --git a/conf/modules/variant_calling/variant_calling.config b/conf/modules/variant_calling/variant_calling.config new file mode 100644 index 0000000..35ec577 --- /dev/null +++ b/conf/modules/variant_calling/variant_calling.config @@ -0,0 +1,30 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// VARIANT CALLING + +process { // variant_calling + + if (params.skip_toos && params.skip_toos.split(',').contains('variant_calling')) { + withName: 'SAMTOOLS_BAMTOCRAM_VARIANTCALLING' { + ext.prefix = { "${meta.id}.recal" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/recalibrated/${meta.patient}/${meta.id}/" }, + pattern: "*{cram,crai}" + ] + } + } + + +} \ No newline at end of file diff --git a/modules/local/vt/decompose/main.nf b/modules/local/vt/decompose/main.nf index 6589cae..850804b 100644 --- a/modules/local/vt/decompose/main.nf +++ b/modules/local/vt/decompose/main.nf @@ -1,30 +1,31 @@ process VT_DECOMPOSE { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::vt-0.57721-h17a1952_6" : null ) + conda "bioconda::vt-0.57721-h17a1952_6" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/vt:0.57721--h17a1952_6' : 'quay.io/biocontainers/vt:0.57721--h17a1952_6' }" input: - tuple val(meta), path(vcf) + tuple val(meta), path(vcf) output: - tuple val(meta), path("*.vcf.gz"), emit: vcf - path "versions.yml" , emit: versions + tuple val(meta), path("*.vcf.gz"), emit: vcf + path "*.stats" , emit: stats + path "versions.yml" , emit: versions when: - task.ext.when == null || task.ext.when + task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def vcf_decompressed = vcf.baseName.minus(".gz") - """ - gzip -d $vcf -c > ${vcf_decompressed} - # GQ is a float when empty which can happen with some tools like freebayes - this is a fix - sed -i -E 's/(##FORMAT= ${vcf_decompressed} + # GQ is a float when empty which can happen with some tools like freebayes - this is a fix + sed -i -E 's/(##FORMAT= ${prefix}.stats gzip ${prefix}.vcf - cat <<-END_VERSIONS > versions.yml - "${task.process}": - vt decompose: \$(vt decompose -? 2>&1 | head -n1 | sed 's/^.*decompose //; s/ .*\$//') - END_VERSIONS - """ + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vt decompose: \$(vt decompose -? 2>&1 | head -n1 | sed 's/^.*decompose //; s/ .*\$//') + END_VERSIONS + """ } \ No newline at end of file diff --git a/modules/local/vt/normalize/main.nf b/modules/local/vt/normalise/main.nf similarity index 83% rename from modules/local/vt/normalize/main.nf rename to modules/local/vt/normalise/main.nf index 8798de2..e7bb533 100644 --- a/modules/local/vt/normalize/main.nf +++ b/modules/local/vt/normalise/main.nf @@ -1,18 +1,19 @@ -process VT_NORMALIZE { +process VT_NORMALISE { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::vt-0.57721-h17a1952_6" : null ) + conda "bioconda::vt-0.57721-h17a1952_6" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/vt:0.57721--h17a1952_6' : 'quay.io/biocontainers/vt:0.57721--h17a1952_6' }" input: tuple val(meta), path(vcf) - path fasta + tuple val(meta1), path(fasta) output: tuple val(meta), path("*.vcf.gz"), emit: vcf + path "*.stats" , emit: stats path "versions.yml" , emit: versions when: diff --git a/modules/local/vt/normalize/meta.yml b/modules/local/vt/normalise/meta.yml similarity index 81% rename from modules/local/vt/normalize/meta.yml rename to modules/local/vt/normalise/meta.yml index 32bf1b6..5d42373 100644 --- a/modules/local/vt/normalize/meta.yml +++ b/modules/local/vt/normalise/meta.yml @@ -1,12 +1,12 @@ -name: vt_normalize -description: Normalize VCF files +name: vt_normalise +description: normalise VCF files keywords: - - normalize + - normalise - VCF - variant calling tools: - decompose: - description: Normalize variants in a VCF file. + description: normalise variants in a VCF file. homepage: https://genome.sph.umich.edu/wiki/Vt documentation: https://genome.sph.umich.edu/wiki/Vt tool_dev_url: https://github.com/atks/vt @@ -21,7 +21,7 @@ input: e.g. [ id:'test', single_end:false ] - vcf: type: file - description: The VCF/BCF file to be normalized + description: The VCF/BCF file to be normalised pattern: "*.{vcf.gz,vcf,bcf}" - fasta: type: file @@ -40,7 +40,7 @@ output: pattern: "versions.yml" - vcf: type: file - description: Normalized VCF file + description: normalised VCF file pattern: "*.{vcf.gz}" authors: diff --git a/nextflow.config b/nextflow.config index 63b495a..bd503e4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -43,6 +43,7 @@ params { save_unaligned = false save_align_intermeds = false hisat2_build_memory = null + splicesites = null remove_duplicates = false save_mapped = false // Mapped BAMs not saved @@ -61,16 +62,20 @@ params { // Variant calling no_intervals = false intervals = null - nucleotides_per_second = 1000 + nucleotides_per_second = 200000 // Default interval size germline_resource = null germline_resource_tbi = null known_snps = null known_snps_tbi = null pon = null pon_tbi = null - actionablepanel = null - highconfidence = null - knownhot = null + sage_actionablepanel = null + sage_highconfidence = null + sage_knownhotspots = null + sage_ensembl_dir = null + sage_custom_args = false + joint_mutect2 = false // if true, enables patient-wise multi-sample somatic variant calling + // GATK intervallist parameters @@ -341,15 +346,20 @@ includeConfig 'conf/modules/alignment/alignment_to_fastq.config' // preprocessing includeConfig 'conf/modules/gatk4_preprocessing/markduplicates.config' +includeConfig 'conf/modules/gatk4_preprocessing/splitncigarreads.config' includeConfig 'conf/modules/gatk4_preprocessing/prepare_recalibration.config' includeConfig 'conf/modules/gatk4_preprocessing/recalibrate.config' // variant calling includeConfig 'conf/modules/variant_calling/freebayes.config' includeConfig 'conf/modules/variant_calling/strelka.config' +includeConfig 'conf/modules/variant_calling/manta.config' includeConfig 'conf/modules/variant_calling/mutect2.config' includeConfig 'conf/modules/variant_calling/sage.config' +// normalisation +includeConfig 'conf/modules/normalise/vt.config' + // annotate includeConfig 'conf/modules/annotate/annotate.config' diff --git a/nextflow_schema.json b/nextflow_schema.json index 6386b13..77e8ee5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -72,17 +72,11 @@ "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", "fa_icon": "fas fa-folder-open" }, - "email": { - "type": "string", - "description": "Email address for completion summary.", - "fa_icon": "fas fa-envelope", - "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" - }, - "multiqc_title": { - "type": "string", - "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", - "fa_icon": "fas fa-file-signature" + "save_mapped": { + "type": "boolean", + "fa_icon": "fas fa-download", + "description": "Save mapped files.", + "help_text": "If the parameter `--split-fastq` is used, the sharded bam files are merged and converted to CRAM before saving them." }, "save_bam_mapped": { "type": "boolean", @@ -131,6 +125,18 @@ "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\nIf you wish to recompute indices available on igenomes, set `--dragmap false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference, if `--aligner dragmap` is specified. Combine with `--save_reference` to save for future runs.", "hidden": true }, + "hisat2_index": { + "type": "string", + "description": "Path to STAR index folder or compressed file (tar.gz)", + "help_text": "This parameter can be used if there is an pre-defined STAR index available. You can either give the full path to the index directory or a compressed file in tar.gz format." + }, + "splicesites": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "fa_icon": "fas fa-hand-scissors", + "description": "Splice sites file required for HISAT2." + }, "star_index": { "type": "string", "description": "Path to STAR index folder or compressed file (tar.gz)", @@ -436,66 +442,9 @@ "type": "string", "default": "bwa-mem", "fa_icon": "fas fa-puzzle-piece", - "enum": ["bwa-mem", "bwa-mem2", "dragmap"], + "enum": ["bwa-mem", "bwa-mem2", "dragmap", "sentieon-bwamem"], "description": "Specify aligner to be used to map reads to reference genome.", - "help_text": "`Rnadnavar` will build missing indices automatically if not provided. Set `--bwa false` if indices should be (re-)built.\nIf `DragMap` is selected as aligner, it is recommended to skip baserecalibration with `--skip_tools baserecalibrator`. See [here](https://gatk.broadinstitute.org/hc/en-us/articles/4407897446939--How-to-Run-germline-single-sample-short-variant-discovery-in-DRAGEN-mode) for more info.\n", - "hidden": true - }, - "star_index": { - "type": "string", - "description": "Path to STAR index folder or compressed file (tar.gz)", - "help_text": "This parameter can be used if there is an pre-defined STAR index available. You can either give the full path to the index directory or a compressed file in tar.gz format." - }, - "star_twopass": { - "type": "boolean", - "description": "Enable STAR 2-pass mapping mode.", - "help_text": "This parameter enables STAR to perform 2-pass mapping. Default true." - }, - "star_ignore_sjdbgtf": { - "type": "boolean", - "description": "Do not use GTF file during STAR index buidling step", - "help_text": "Do not use parameter --sjdbGTFfile during the STAR genomeGenerate process." - }, - "star_max_memory_bamsort": { - "type": "integer", - "default": 0, - "description": "Option to limit RAM when sorting BAM file. Value to be specified in bytes. If 0, will be set to the genome index size.", - "help_text": "This parameter specifies the maximum available RAM (bytes) for sorting BAM during STAR alignment." - }, - "star_bins_bamsort": { - "type": "integer", - "default": 50, - "description": "Specifies the number of genome bins for coordinate-sorting", - "help_text": "This parameter specifies the number of bins to be used for coordinate sorting during STAR alignment step." - }, - "star_max_collapsed_junc": { - "type": "integer", - "default": 1000000, - "description": "Specifies the maximum number of collapsed junctions" - }, - "hisat2_index": { - "type": "string", - "format": "path", - "fa_icon": "fas fa-bezier-curve", - "description": "Path to directory or tar.gz archive for pre-built HISAT2 index." - }, - "splicesites": { - "type": "string", - "format": "file-path", - "mimetype": "text/plain", - "fa_icon": "fas fa-hand-scissors", - "description": "Splice sites file required for HISAT2." - }, - "seq_center": { - "type": "string", - "description": "Sequencing center information to be added to read group of BAM files.", - "help_text": "This parameter is required for creating a proper BAM header to use in the downstream analysis of GATK. " - }, - "seq_platform": { - "type": "string", - "default": "illumina", - "description": "Specify the sequencing platform used", - "help_text": "This parameter is required for creating a proper BAM header to use in the downstream analysis of GATK. " + "help_text": "Sarek will build missing indices automatically if not provided. Set `--bwa false` if indices should be (re-)built.\nIf DragMap is selected as aligner, it is recommended to skip baserecalibration with `--skip_tools baserecalibrator`. For more info see [here](https://gatk.broadinstitute.org/hc/en-us/articles/4407897446939--How-to-Run-germline-single-sample-short-variant-discovery-in-DRAGEN-mode)." }, "save_unaligned": { "type": "boolean", @@ -513,34 +462,7 @@ } } }, - "preprocessing": { - "title": "Preprocessing", - "type": "object", - "description": "Configure preprocessing tools", - "default": "", - "fa_icon": "fas fa-toolbox", - "properties": { - "aligner": { - "type": "string", - "default": "bwa-mem", - "fa_icon": "fas fa-puzzle-piece", - "enum": ["bwa-mem", "bwa-mem2", "dragmap", "sentieon-bwamem"], - "description": "Specify aligner to be used to map reads to reference genome.", - "help_text": "Sarek will build missing indices automatically if not provided. Set `--bwa false` if indices should be (re-)built.\nIf DragMap is selected as aligner, it is recommended to skip baserecalibration with `--skip_tools baserecalibrator`. For more info see [here](https://gatk.broadinstitute.org/hc/en-us/articles/4407897446939--How-to-Run-germline-single-sample-short-variant-discovery-in-DRAGEN-mode)." - }, - "save_mapped": { - "type": "boolean", - "fa_icon": "fas fa-download", - "description": "Save mapped files.", - "help_text": "If the parameter `--split-fastq` is used, the sharded bam files are merged and converted to CRAM before saving them." - }, - "save_output_as_bam": { - "type": "boolean", - "description": "Saves output from mapping (if `--save_mapped`), Markduplicates & Baserecalibration as BAM file instead of CRAM", - "fa_icon": "fas fa-download" - } - } - }, + "variant_calling": { "title": "Variant calling", "type": "object", @@ -580,24 +502,41 @@ "help_text": "If none provided, will be generated automatically from the PON bgzipped VCF file.", "hidden": true }, - "highconfidence": { + "joint_mutect2": { + "type": "boolean", + "fa_icon": "fas fa-angle-double-right", + "description": "Runs Mutect2 in joint (multi-sample) mode for better concordance among variant calls of tumor samples from the same patient. Mutect2 outputs will be stored in a subfolder named with patient ID under `variant_calling/mutect2/` folder. Only a single normal sample per patient is allowed. Tumor-only mode is also supported." + }, + "sage_highconfidence": { "type": "string", "fa_icon": "fas fa-file", "description": "Bed file with known high confidence used as input in Sage variant caller", "hidden": true }, - "actionablepanel": { + "sage_actionablepanel": { "type": "string", "fa_icon": "fas fa-file", "description": "Bed file with ac actionable list of variants used as input in Sage variant caller", "hidden": true }, - "knownhot": { + "sage_knownhotspots": { "type": "string", "fa_icon": "fas fa-file", "description": "Known hotspots used as input in Sage variant caller", "hidden": true }, + "sage_ensembl_dir": { + "type": "string", + "fa_icon": "fas fa-file", + "description": "Directory to ensembl cache for SAGE", + "hidden": true + }, + "sage_custom_args": { + "type": "string", + "fa_icon": "fas fa-file", + "description": "Custom parameters for SAGE", + "hidden": true + }, "ignore_soft_clipped_bases": { "type": "boolean", "fa_icon": "fas fa-ban", @@ -1018,9 +957,6 @@ { "$ref": "#/definitions/variant_calling" }, - { - "$ref": "#/definitions/preprocessing" - }, { "$ref": "#/definitions/annotation" }, diff --git a/subworkflows/local/bam_variant_calling/main.nf b/subworkflows/local/bam_variant_calling/main.nf index ed175c8..8ed8986 100644 --- a/subworkflows/local/bam_variant_calling/main.nf +++ b/subworkflows/local/bam_variant_calling/main.nf @@ -42,11 +42,10 @@ workflow BAM_VARIANT_CALLING { // cram_variant_calling_status = cram_variant_calling.branch{ normal: it[0].status == 0 - tumor: it[0].status == 1 - rna: it[0].status == 2 + tumor: it[0].status >= 1 // DNA and RNA should NOT have same sample id } - // All Germline samples + // All Germline samples cram_variant_calling_normal_to_cross = cram_variant_calling_status.normal.map{ meta, cram, crai -> [ meta.patient, meta, cram, crai ] } // All tumor samples @@ -119,18 +118,18 @@ workflow BAM_VARIANT_CALLING { // Gather vcf files for annotation and QC - vcf_to_normalize = Channel.empty() - vcf_to_normalize = vcf_to_normalize.mix(BAM_VARIANT_CALLING_SOMATIC.out.vcf_all) + vcf_to_normalise = Channel.empty() + vcf_to_normalise = vcf_to_normalise.mix(BAM_VARIANT_CALLING_SOMATIC.out.vcf_all) // QC - VCF_QC_BCFTOOLS_VCFTOOLS(vcf_to_normalize, intervals_bed_combined) + VCF_QC_BCFTOOLS_VCFTOOLS(vcf_to_normalise, intervals_bed_combined) reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.bcftools_stats.collect{ meta, stats -> stats }) reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_tstv_counts.collect{ meta, counts -> counts }) reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_tstv_qual.collect{ meta, qual -> qual }) reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_filter_summary.collect{ meta, summary -> summary }) - CHANNEL_VARIANT_CALLING_CREATE_CSV(vcf_to_normalize) + CHANNEL_VARIANT_CALLING_CREATE_CSV(vcf_to_normalise, "variantcalled") // Gather used variant calling softwares versions versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC.out.versions) @@ -140,7 +139,7 @@ workflow BAM_VARIANT_CALLING { emit: cram_variant_calling_pair = cram_variant_calling_pair - vcf_to_normalize = vcf_to_normalize + vcf_to_normalise = vcf_to_normalise contamination_table = BAM_VARIANT_CALLING_SOMATIC.out.contamination_table_mutect2 segmentation_table = BAM_VARIANT_CALLING_SOMATIC.out.segmentation_table_mutect2 artifact_priors = BAM_VARIANT_CALLING_SOMATIC.out.artifact_priors_mutect2 diff --git a/subworkflows/local/bam_variant_calling_somatic_sage/main.nf b/subworkflows/local/bam_variant_calling_somatic_sage/main.nf index 1a7bdf8..bf82393 100644 --- a/subworkflows/local/bam_variant_calling_somatic_sage/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_sage/main.nf @@ -25,7 +25,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_SAGE { // Move num_intervals to meta map and reorganize channel for SAGE module .map{ meta, cram1, crai1, cram2, crai2, intervals, num_intervals -> [ meta + [ num_intervals:num_intervals ], cram1, crai1, cram2, crai2, intervals ]} - SAGE(cram_intervals, fasta, fasta_fai) + SAGE(cram_intervals, fasta, fasta_fai, dict) BCFTOOLS_SORT(SAGE.out.vcf) diff --git a/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf b/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf index 45bf5ce..8c9c70f 100644 --- a/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf @@ -6,6 +6,7 @@ include { GATK4_MERGEVCFS as MERGE_STRELKA_INDELS } from '../../../modules/nf-core/gatk4/mergevcfs/main' include { GATK4_MERGEVCFS as MERGE_STRELKA_SNVS } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_STRELKA } from '../../../modules/nf-core/gatk4/mergevcfs/main' include { STRELKA_SOMATIC } from '../../../modules/nf-core/strelka/somatic/main' workflow BAM_VARIANT_CALLING_SOMATIC_STRELKA { @@ -22,7 +23,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_STRELKA { // Combine cram and intervals for spread and gather strategy cram_intervals = cram.combine(intervals) // Move num_intervals to meta map - .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, intervals, intervals_index, num_intervals -> [ meta + [ num_intervals:num_intervals ], normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, intervals, intervals_index ] } + .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, intervals_gz_tbi, num_intervals -> [ meta + [ num_intervals:num_intervals ], normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, intervals_gz_tbi[0], intervals_gz_tbi[1] ] } STRELKA_SOMATIC(cram_intervals, fasta, fasta_fai ) @@ -52,12 +53,16 @@ workflow BAM_VARIANT_CALLING_SOMATIC_STRELKA { // add variantcaller to meta map and remove no longer necessary field: num_intervals .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'strelka' ], vcf ] } + // Merge SNVs and indels + MERGE_STRELKA(vcf, dict) + versions = versions.mix(MERGE_STRELKA_SNVS.out.versions) versions = versions.mix(MERGE_STRELKA_INDELS.out.versions) + versions = versions.mix(MERGE_STRELKA.out.versions) versions = versions.mix(STRELKA_SOMATIC.out.versions) emit: - vcf + vcf = MERGE_STRELKA.out.vcf versions } \ No newline at end of file diff --git a/subworkflows/local/channel_variant_calling_create_csv/main.nf b/subworkflows/local/channel_variant_calling_create_csv/main.nf index 71383ea..f3a43ce 100644 --- a/subworkflows/local/channel_variant_calling_create_csv/main.nf +++ b/subworkflows/local/channel_variant_calling_create_csv/main.nf @@ -5,6 +5,7 @@ workflow CHANNEL_VARIANT_CALLING_CREATE_CSV { take: vcf_to_annotate // channel: [mandatory] meta, vcf + csv_name main: // Creating csv files to restart from this step @@ -14,6 +15,6 @@ workflow CHANNEL_VARIANT_CALLING_CREATE_CSV { variantcaller = meta.variantcaller status = meta.status vcf = "${params.outdir}/variant_calling/${variantcaller}/${meta.id}/${vcf.getName()}" - ["variantcalled.csv", "patient,sample,status,variantcaller,vcf\n${patient},${sample},${status},${variantcaller},${vcf}\n"] + ["${csv_name}.csv", "patient,sample,status,variantcaller,vcf\n${patient},${sample},${status},${variantcaller},${vcf}\n"] } } \ No newline at end of file diff --git a/subworkflows/local/normalize_vcf_variants.nf b/subworkflows/local/normalize_vcf_variants.nf deleted file mode 100644 index aa53a2b..0000000 --- a/subworkflows/local/normalize_vcf_variants.nf +++ /dev/null @@ -1,50 +0,0 @@ -// -// NORMALIZATION OF VCF VARIANTS WITH VT -// - -include { VT_DECOMPOSE } from '../../modules/local/vt/decompose/main' -include { VT_NORMALIZE } from '../../modules/local/vt/normalize/main' - -workflow NORMALIZE { - take: - tools - vcf - fasta - ch_input_sample - - main: - ch_versions = Channel.empty() - ch_vcf_norm = Channel.empty() - - if (params.step in ['mapping', 'markduplicates', 'splitncigar', 'prepare_recalibration', 'recalibrate', 'variant_calling', 'normalize'] ) { - if (params.step == 'normalize') - // TODO - test this - vcf_to_normalize = ch_input_sample - else { - vcf_to_normalize = vcf - } - vcf_to_normalize.dump(tag:'[STEP4] vcf_to_normalize') - if (tools.split(',').contains('normalise') | tools.split(',').contains('normalize') ) { - ch_vcf_decomp = Channel.empty() - ch_vcf_norm = Channel.empty() - // Separate variantss - VT_DECOMPOSE(vcf) - - ch_vcf_decomp = ch_vcf_decomp.mix(VT_DECOMPOSE.out.vcf) - ch_versions = ch_versions.mix(VT_DECOMPOSE.out.versions.first()) - - // Normalize variants - VT_NORMALIZE(ch_vcf_decomp, - fasta) - - ch_vcf_norm = ch_vcf_norm.mix(VT_NORMALIZE.out.vcf) - ch_versions = ch_versions.mix(VT_NORMALIZE.out.versions.first()) - ch_vcf_norm.dump(tag:'[STEP4] vcf_normalized') - } - } - - emit: - vcf = ch_vcf_norm // channel: [ [meta], vcf ] - versions = ch_versions // channel: [ versions.yml ] - -} diff --git a/subworkflows/local/pair_variant_calling.nf b/subworkflows/local/pair_variant_calling.nf deleted file mode 100644 index b63e50b..0000000 --- a/subworkflows/local/pair_variant_calling.nf +++ /dev/null @@ -1,206 +0,0 @@ -// -// PAIRED VARIANT CALLING -// -include { GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main' -include { RUN_MANTA_SOMATIC } from '../nf-core/variantcalling/manta/somatic/main.nf' -include { RUN_FREEBAYES as RUN_FREEBAYES_SOMATIC } from '../nf-core/variantcalling/freebayes/main.nf' -include { RUN_SAGE as RUN_SAGE_SOMATIC } from '../nf-core/variantcalling/sage/main.nf' -include { RUN_STRELKA_SOMATIC } from '../nf-core/variantcalling/strelka/somatic/main.nf' - -workflow PAIR_VARIANT_CALLING { - take: - tools // Mandatory, list of tools to apply - cram_pair // channel: [mandatory] cram - dbsnp // channel: [mandatory] dbsnp - dbsnp_tbi // channel: [mandatory] dbsnp_tbi - dict // channel: [mandatory] dict - fasta // channel: [mandatory] fasta - fasta_fai // channel: [mandatory] fasta_fai - germline_resource // channel: [optional] germline_resource - germline_resource_tbi // channel: [optional] germline_resource_tbi - intervals // channel: [mandatory] intervals/target regions - intervals_bed_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed - intervals_bed_combined // channel: [mandatory] intervals/target regions in one file unzipped - panel_of_normals // channel: [optional] panel_of_normals - panel_of_normals_tbi // channel: [optional] panel_of_normals_tbi - highconfidence - actionablepanel - knownhot - ensbl_sage - skip_tools - - main: - - ch_versions = Channel.empty() - - manta_vcf = Channel.empty() - strelka_vcf = Channel.empty() - mutect2_vcf = Channel.empty() - freebayes_vcf = Channel.empty() - sage_vcf = Channel.empty() - - // Remap channel with intervals - cram_pair_intervals = cram_pair.combine(intervals) - .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals, num_intervals -> - // If no interval file provided (0) then add empty list - intervals_new = num_intervals == 0 ? [] : intervals - - [[ - id: meta.tumor_id + "_vs_" + meta.normal_id, - normal_id: meta.normal_id, - num_intervals: num_intervals, - patient: meta.patient, - status: meta.status, - tumor_id: meta.tumor_id - ], - normal_cram, normal_crai, tumor_cram, tumor_crai, intervals_new] - } - cram_pair_intervals.dump(tag:'[STEP3] variant_calling_pairs_with_intervals') - // Remap channel with gzipped intervals + indexes - cram_pair_intervals_gz_tbi = cram_pair.combine(intervals_bed_gz_tbi) - .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, bed_tbi, num_intervals -> - - //If no interval file provided (0) then add empty list - bed_new = num_intervals == 0 ? [] : bed_tbi[0] - tbi_new = num_intervals == 0 ? [] : bed_tbi[1] - - [[ - id: meta.tumor_id + "_vs_" + meta.normal_id, - normal_id: meta.normal_id, - num_intervals: num_intervals, - patient: meta.patient, - status: meta.status, - tumor_id: meta.tumor_id - ], - normal_cram, normal_crai, tumor_cram, tumor_crai, bed_new, tbi_new] - - } - if (tools.split(',').contains('manta')) { - // MANTA - RUN_MANTA_SOMATIC( - cram_pair_intervals_gz_tbi, - dict, - fasta, - fasta_fai - ) - manta_vcf = RUN_MANTA_SOMATIC.out.manta_vcf - manta_candidate_small_indels_vcf = RUN_MANTA_SOMATIC.out.manta_candidate_small_indels_vcf - manta_candidate_small_indels_vcf_tbi = RUN_MANTA_SOMATIC.out.manta_candidate_small_indels_vcf_tbi - ch_versions = ch_versions.mix(RUN_MANTA_SOMATIC.out.versions) - } - if (tools.split(',').contains('strelka')) { - // STRELKA - if (tools.split(',').contains('manta')) { - cram_pair_strelka = cram_pair.join(manta_candidate_small_indels_vcf) - .join(manta_candidate_small_indels_vcf_tbi) - .combine(intervals_bed_gz_tbi) - .map{ - meta, normal_cram, normal_crai, tumor_cram, tumor_crai, vcf, vcf_tbi, bed_tbi, num_intervals -> - //If no interval file provided (0) then add empty list - bed_new = num_intervals <= 1 ? [] : bed_tbi[0] - tbi_new = num_intervals <= 1 ? [] : bed_tbi[1] - - [ - [ - id: meta.tumor_id + "_vs_" + meta.normal_id, - normal_id: meta.normal_id, - num_intervals: num_intervals, - patient: meta.patient, - status: meta.status, - tumor_id: meta.tumor_id, - alleles: meta.alleles - ], - normal_cram, normal_crai, tumor_cram, tumor_crai, vcf, vcf_tbi, bed_new, tbi_new - ] - } - } else { - cram_pair_strelka = cram_pair_intervals_gz_tbi.map{ - meta, normal_cram, normal_crai, tumor_cram, tumor_crai, bed, tbi -> - [meta, normal_cram, normal_crai, tumor_cram, tumor_crai, [], [], bed, tbi] - } - } - RUN_STRELKA_SOMATIC( - cram_pair_strelka, - dict, - fasta, - fasta_fai - ) - strelka_vcf = Channel.empty().mix(RUN_STRELKA_SOMATIC.out.strelka_vcf) - ch_versions = ch_versions.mix(RUN_STRELKA_SOMATIC.out.versions) - } - if (tools.split(',').contains('strelka')) { - // FREEBAYES - RUN_FREEBAYES_SOMATIC( - cram_pair_intervals, - dict, - fasta, - fasta_fai - ) - - freebayes_vcf = RUN_FREEBAYES_SOMATIC.out.freebayes_vcf - ch_versions = ch_versions.mix(RUN_FREEBAYES_SOMATIC.out.versions) - } - if (tools.split(',').contains('sage')) { - // SAGE - RUN_SAGE_SOMATIC( - cram_pair_intervals, - dict, - fasta, - fasta_fai, - highconfidence, - actionablepanel, - knownhot, - ensbl_sage - ) - sage_vcf = RUN_SAGE_SOMATIC.out.sage_vcf - ch_versions = ch_versions.mix(RUN_FREEBAYES_SOMATIC.out.versions) - } - if (tools.split(',').contains('mutect2')) { - // MUTECT2 - cram_pair_mutect2 = cram_pair_intervals.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals -> - if (meta.num_intervals == 1){ - [meta, [normal_cram[0], tumor_cram[0]], [normal_crai, tumor_crai], intervals] - } else{ - [meta, [normal_cram, tumor_cram], [normal_crai, tumor_crai], intervals]} - } - cram_pair_mutect2.dump(tag:'[STEP3] variant_calling_pairs_with_intervals - mutect2') - // TODO: add BCFTOOLS call for mutect2 - GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING( - cram_pair_mutect2, - fasta, - fasta_fai, - dict, - germline_resource, - germline_resource_tbi, - panel_of_normals, - panel_of_normals_tbi, - skip_tools, - null, // contamination table from previous mutect2 run - null, // segmentation table from previous mutect2 run - null // orientation from previous mutect2 run - ) - - mutect2_vcf = GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING.out.filtered_vcf - contamination_table = GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING.out.contamination_table - segmentation_table = GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING.out.segmentation_table - artifact_priors = GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING.out.artifact_priors - ch_versions = ch_versions.mix(GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING.out.versions) - } else { - contamination_table = Channel.empty() - segmentation_table = Channel.empty() - artifact_priors = Channel.empty() - } - - - emit: - manta_vcf = manta_vcf - strelka_vcf = strelka_vcf - freebayes_vcf = freebayes_vcf - sage_vcf = sage_vcf - mutect2_vcf = mutect2_vcf - contamination_table = contamination_table - segmentation_table = segmentation_table - artifact_priors = artifact_priors - - versions = ch_versions -} diff --git a/subworkflows/local/prepare_recalibration_csv.nf b/subworkflows/local/prepare_recalibration_csv.nf deleted file mode 100644 index e2656fc..0000000 --- a/subworkflows/local/prepare_recalibration_csv.nf +++ /dev/null @@ -1,41 +0,0 @@ -// -// PREPARE_RECALIBRATION_CSV -// - -workflow PREPARE_RECALIBRATION_CSV { - take: - cram_table_bqsr // channel: [mandatory] meta, cram, crai, table - skip_tools - - main: - // Creating csv files to restart from this step - if (!(skip_tools && (skip_tools.split(',').contains('markduplicates')))) { - cram_table_bqsr.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/csv") { meta, cram, crai, table -> - patient = meta.patient - id = meta.id - lane = meta.lane - sample = meta.sample - status = meta.status - suffix_aligned = params.save_output_as_bam ? "bam" : "cram" - suffix_index = params.save_output_as_bam ? "bam.bai" : "cram.crai" - cram = status <2 ? "${params.outdir}/preprocessing/markduplicates/${patient}/${id}/${cram.baseName}.${suffix_aligned}" : "${params.outdir}/preprocessing/splitncigar/${patient}/${id}/${cram.baseName}.${suffix_aligned}" - crai = status <2 ? "${params.outdir}/preprocessing/markduplicates/${patient}/${id}/${crai.baseName.minus(".cram")}.${suffix_index}" : "${params.outdir}/preprocessing/splitncigar/${patient}/${id}/${crai.baseName.minus(".cram")}.${suffix_index}" - table = "${params.outdir}/preprocessing/recal_table/${patient}/${id}/${id}.recal.table" - ["markduplicates.csv", "patient,status,sample,lane,fastq_1,fastq_2,bam,bai,cram,crai,table,vcf\n${patient},${status},${sample},${lane},,,,,${cram},${crai},${table},\n"] - } - } else { - cram_table_bqsr.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/csv") { meta, cram, crai, table -> - patient = meta.patient - id = meta.id - lane = meta.lane - sample = meta.sample - status = meta.status - suffix_aligned = params.save_output_as_bam ? "bam" : "cram" - suffix_index = params.save_output_as_bam ? "bam.bai" : "cram.crai" - cram = status <2 ? "${params.outdir}/preprocessing/mapped/${patient}/${id}/${cram.baseName}.${suffix_aligned}" : "${params.outdir}/preprocessing/splitncigar/${patient}/${id}/${cram.baseName}.${suffix_aligned}" - crai = status <2 ? "${params.outdir}/preprocessing/mapped/${patient}/${id}/${crai.baseName.minus(".cram")}.${suffix_index}" : "${params.outdir}/preprocessing/splitncigar/${patient}/${id}/${crai.baseName.minus(".cram")}.${suffix_index}" - table = "${params.outdir}/preprocessing/${patient}/${id}/recal_table/${id}.recal.table" - ["sorted.csv", "ppatient,status,sample,lane,fastq_1,fastq_2,bam,bai,cram,crai,table,vcf\n${patient},${status},${sample},${lane},,,,,${cram},${crai},${table},\n"] - } - } -} diff --git a/subworkflows/local/variantcalling_csv.nf b/subworkflows/local/variantcalling_csv.nf deleted file mode 100644 index 99e9beb..0000000 --- a/subworkflows/local/variantcalling_csv.nf +++ /dev/null @@ -1,18 +0,0 @@ -// -// VARIANTCALLING_CSV -// - -workflow VARIANTCALLING_CSV { - take: - vcf_to_annotate // channel: [mandatory] meta, vcf - - main: - // Creating csv files to restart from this step - vcf_to_annotate.collectFile(keepHeader: true, skip: 1,sort: true, storeDir: "${params.outdir}/csv"){ meta, vcf -> - patient = meta.patient - sample = meta.id - variantcaller = meta.variantcaller - vcf = "${params.outdir}/variant_calling/${variantcaller}/${meta.id}/${vcf.getName()}" - ["variantcalled.csv", "patient,sample,variantcaller,vcf\n${patient},${sample},${variantcaller},${vcf}\n"] - } -} diff --git a/subworkflows/local/vcf_normalise/main.nf b/subworkflows/local/vcf_normalise/main.nf new file mode 100644 index 0000000..0d200f7 --- /dev/null +++ b/subworkflows/local/vcf_normalise/main.nf @@ -0,0 +1,50 @@ +// +// Normalise VCFs with VT +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run +// VT steps +include { VT_DECOMPOSE } from '../../../modules/local/vt/decompose/main' +include { VT_NORMALISE } from '../../../modules/local/vt/normalise/main' +// Create samplesheet to restart from different steps +include { CHANNEL_VARIANT_CALLING_CREATE_CSV } from '../channel_variant_calling_create_csv/main' + + +workflow VCF_NORMALISE { + take: + vcf_to_normalise + fasta + input_sample + + main: + version = Channel.empty() + vcf_to_consensus = Channel.empty() + + if (params.step in ['mapping', 'markduplicates', 'splitncigar', 'prepare_recalibration', 'recalibrate', 'variant_calling', 'normalise'] ) { + + if (params.step == 'normalise') vcf_to_normalise = input_sample + + vcf_decomposed = Channel.empty() + // Separate variantss + VT_DECOMPOSE(vcf_to_normalise) + + vcf_decomposed = vcf_decomposed.mix(VT_DECOMPOSE.out.vcf) + version = version.mix(VT_DECOMPOSE.out.versions.first()) + + // Normalise variants + VT_NORMALISE(vcf_decomposed, + fasta) + + vcf_to_consensus = vcf_to_consensus.mix(VT_NORMALISE.out.vcf) + version = version.mix(VT_NORMALISE.out.versions.first()) + + CHANNEL_VARIANT_CALLING_CREATE_CSV(vcf_to_normalise, "variantcallednormalised") + + + } + + emit: + vcf = vcf_to_consensus // channel: [ [meta], vcf ] + versions = version // channel: [ versions.yml ] + +} From 43e9f79cb5da6cc49bc6821806eadb6e58bcb42d Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Wed, 30 Aug 2023 17:54:48 +0100 Subject: [PATCH 29/56] Fixed star config read groups and SM --- conf/modules/alignment/bam_align.config | 1 + subworkflows/local/bam_align/main.nf | 2 +- workflows/rnadnavar.nf | 16 ++++++++++++---- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/conf/modules/alignment/bam_align.config b/conf/modules/alignment/bam_align.config index c1e5094..2a1e390 100644 --- a/conf/modules/alignment/bam_align.config +++ b/conf/modules/alignment/bam_align.config @@ -102,6 +102,7 @@ process { // bam_align '--outFilterMatchNminOverLread 0.33', '--outFilterScoreMinOverLread 0.33', '--twopass1readsN -1', + '--outSAMattrRGline \'ID:${meta.read_group}\' \'SM:${meta.patient}_${meta.sample}\'', params.save_unaligned ? '--outReadsUnmapped Fastx' : '', params.read_length ? "--sjdbOverhang ${params.read_length - 1}" : '', params.star_twopass ? '--twopassMode Basic' : '', diff --git a/subworkflows/local/bam_align/main.nf b/subworkflows/local/bam_align/main.nf index ea3c073..368195e 100644 --- a/subworkflows/local/bam_align/main.nf +++ b/subworkflows/local/bam_align/main.nf @@ -15,7 +15,6 @@ include { FASTQ_ALIGN_STAR } from '../../nf-core/fa include { BAM_MERGE_INDEX_SAMTOOLS } from '../bam_merge_index_samtools/main' // Create samplesheets to restart from mapping include { CHANNEL_ALIGN_CREATE_CSV } from '../channel_align_create_csv/main' - // MODULES // Run FASTQC include { FASTQC } from '../../../modules/nf-core/fastqc/main' @@ -144,6 +143,7 @@ workflow BAM_ALIGN { [ groupKey( meta - meta.subMap('num_lanes', 'read_group', 'size') + [ data_type:'bam', id:meta.sample ], (meta.num_lanes ?: 1) * (meta.size ?: 1)), bam ] }.groupTuple() bam_mapped_dna.dump(tag:"bam_mapped_dna") + reads_for_alignment_status.rna.dump(tag:"reads_for_alignment_status.rna") // RNA will be aligned with STAR // Run STAR diff --git a/workflows/rnadnavar.nf b/workflows/rnadnavar.nf index 6462e15..8a08bac 100644 --- a/workflows/rnadnavar.nf +++ b/workflows/rnadnavar.nf @@ -4,7 +4,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation' def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) def citation = '\n' + WorkflowMain.citation(workflow) + '\n' @@ -13,7 +13,11 @@ def summary_params = paramsSummaryMap(workflow) // Print parameter summary log to screen log.info logo + paramsSummaryLog(workflow) + citation -WorkflowRnadnavar.initialise(params, log) +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ // Check input path parameters to see if they exist def checkPathParamList = [ @@ -80,7 +84,9 @@ input_sample = ch_from_samplesheet def flowcell = flowcellLaneFromFastq(fastq_1) // Don't use a random element for ID, it breaks resuming def read_group = "\"@RG\\tID:${flowcell}.${meta.sample}.${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" - + if (meta.status >= 2) { // STAR does not need '@RG' + read_group = "ID:${flowcell}.${meta.sample}.${meta.lane} ${CN}PU:${meta.lane} SM:${meta.patient}_${meta.sample} LB:${meta.sample} DS:${params.fasta} PL:${params.seq_platform}" + } meta = meta + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'fastq', size: 1] if (params.step == 'mapping') return [ meta, [ fastq_1, fastq_2 ] ] @@ -96,7 +102,9 @@ input_sample = ch_from_samplesheet meta = meta + [id: "${meta.sample}-${meta.lane}".toString()] def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' def read_group = "\"@RG\\tID:${meta.sample}_${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" - + if (meta.status >= 2) { // STAR does not need '@RG' + read_group = "ID:${meta.sample}.${meta.lane} ${CN}PU:${meta.lane} SM:${meta.patient}_${meta.sample} LB:${meta.sample} DS:${params.fasta} PL:${params.seq_platform}" + } meta = meta + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'bam', size: 1] if (params.step != 'annotate') return [ meta - meta.subMap('lane'), bam, bai ] From a12f72cee05538852288157ef8d1915d9c44796d Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Tue, 5 Sep 2023 22:27:37 +0100 Subject: [PATCH 30/56] Removing files --- conf/modules/variant_calling/freebayes.config | 79 ------- subworkflows/local/filtering/filtering.nf | 180 --------------- subworkflows/local/markduplicates_csv.nf | 23 -- subworkflows/local/tumor_variant_calling.nf | 217 ------------------ 4 files changed, 499 deletions(-) delete mode 100644 conf/modules/variant_calling/freebayes.config delete mode 100644 subworkflows/local/filtering/filtering.nf delete mode 100644 subworkflows/local/markduplicates_csv.nf delete mode 100644 subworkflows/local/tumor_variant_calling.nf diff --git a/conf/modules/variant_calling/freebayes.config b/conf/modules/variant_calling/freebayes.config deleted file mode 100644 index 420c6fc..0000000 --- a/conf/modules/variant_calling/freebayes.config +++ /dev/null @@ -1,79 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Config file for defining DSL2 per module options and publishing paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Available keys to override module options: - ext.args = Additional arguments appended to command in module. - ext.args2 = Second set of arguments appended to command in module (multi-tool modules). - ext.args3 = Third set of arguments appended to command in module (multi-tool modules). - ext.prefix = File name prefix for output files. - ext.when = When to run the module. ----------------------------------------------------------------------------------------- -*/ - -// FREEBAYES - -process { // freebayes - - if (params.tools && params.tools.split(',').contains('freebayes')) { - - withName: 'MERGE_FREEBAYES' { - ext.prefix = { "${meta.id}.freebayes" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/freebayes/${meta.id}/" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: 'FREEBAYES' { - ext.args = '--min-alternate-fraction 0.1 --min-mapping-quality 1' - //To make sure no naming conflicts ensure with module BCFTOOLS_SORT & the naming being correct in the output folder - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.freebayes" : "${meta.id}.freebayes.${target_bed.simpleName}" } - ext.when = { params.tools && params.tools.split(',').contains('freebayes') } - publishDir = [ - enabled: false - ] - } - - withName: 'BCFTOOLS_SORT' { - ext.prefix = { meta.num_intervals <= 1 ? meta.id + ".freebayes" : vcf.name - ".vcf" + ".sort" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/" }, - pattern: "*vcf.gz", - saveAs: { meta.num_intervals > 1 ? null : "freebayes/${meta.id}/${it}" } - ] - } - - withName : 'TABIX_VC_FREEBAYES' { - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/freebayes/${meta.id}/" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - // PAIR_VARIANT_CALLING - if (params.tools && params.tools.split(',').contains('freebayes')) { - withName: '.*:BAM_VARIANT_CALLING_SOMATIC_ALL:BAM_VARIANT_CALLING_FREEBAYES:FREEBAYES' { - ext.args = "--pooled-continuous \ - --pooled-discrete \ - --genotype-qualities \ - --report-genotype-likelihood-max \ - --allele-balance-priors-off \ - --min-alternate-fraction 0.03 \ - --min-repeat-entropy 1 \ - --min-alternate-count 2 " - } - } - - withName: 'VCFFILTER' { - //To make sure no naming conflicts ensure with module BCFTOOLS_SORT & the naming being correct in the output folder - ext.prefix = { "${vcf.baseName.minus(".vcf")}.filtered" } - ext.args = '-f "QUAL > 1 & QUAL / AO > 10 & SAF > 0 & SAR > 0 & RPR > 1 & RPL > 1" -t PASS -F FAIL' - ext.when = { params.tools && params.tools.split(',').contains('freebayes') } - publishDir = [enabled: false] - } - } -} \ No newline at end of file diff --git a/subworkflows/local/filtering/filtering.nf b/subworkflows/local/filtering/filtering.nf deleted file mode 100644 index ec89e09..0000000 --- a/subworkflows/local/filtering/filtering.nf +++ /dev/null @@ -1,180 +0,0 @@ -// -// STEP7 : FILTERING VARIANTS -// - -include { BASIC_FILTERING } from '../../../modules/local/filter_variants' -include { BASIC_FILTERING as BASIC_FILTERING_RNA } from '../../../modules/local/filter_variants' -include { SECOND_PASS as SECOND_PASS_RNA } from './second_pass' -include { VCF2MAF } from '../../../modules/local/vcf2maf/vcf2maf/main' -include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_SECOND_PASS } from '../../../modules/nf-core/modules/samtools/merge/main' -include { RNA_FILTERING } from '../../../modules/local/rna_filtering' - - - -workflow FILTERING { - - take: - ch_input_sample - vcf_to_maf - vcf_consensus_dna // to repeat rescue consensus - vcfs_status_dna // to repeat rescue consensus - star_bams - bwa_bams - fasta - fasta_fai - dict - hisat2_index - splicesites - dbsnp - dbsnp_tbi - pon - pon_tbi - germline_resource - germline_resource_tbi - intervals - intervals_for_preprocessing - ch_interval_list_split - intervals_bed_gz_tbi - intervals_bed_combined - - - main: - ch_reports = Channel.empty() - ch_versions = Channel.empty() - - if (params.step == 'filtering') vcf_to_maf = ch_input_sample - - // Initiate resources variables - whitelist = params.whitelist ? Channel.fromPath(params.whitelist).collect() : Channel.value([]) - darned = params.darned ? Channel.fromPath(params.darned).collect() : Channel.value([]) - radar = params.radar ? Channel.fromPath(params.radar).collect() : Channel.value([]) - nat = params.nat ? Channel.fromPath(params.nat).collect() : Channel.value([]) - redi = params.redi ? Channel.fromPath(params.redi).collect() : Channel.value([]) - - - // First we transform the vcf to MAF - VCF2MAF(vcf_to_maf, - fasta) - maf_to_filter = VCF2MAF.out.maf - maf_to_filter.dump(tag:"[STEP7: FILTERING] maf input") - whitelist.dump(tag:"[STEP7: FILTERING] whitelist") - ch_versions = ch_versions.mix(VCF2MAF.out.versions) - - // BASIC FILTERING - BASIC_FILTERING(maf_to_filter, whitelist, fasta) - BASIC_FILTERING.out.maf.dump(tag:"[STEP7: FILTERING] maf filtered") - // Once this is done DNA is ready, RNA still has a 2nd PASS TODO: optional? - BASIC_FILTERING.out.maf.branch{ - dna: it[0].status < 2 - rna: it[0].status == 2 - }.set{maf_to_filter_status} - ch_versions = ch_versions.mix(BASIC_FILTERING.out.versions) - - // STEP 8: RNA FILTERING - TODO: make it optional and run just rna filtering - // RNA specific filtering (2nd PASS) - this fast BUT it increases the length of the pipeline considerably - - // 1 We take the previous aligned reads with star for tumor RNA and DNA normal - bwa_bams.branch{ - normal: it[0].status == 0 - tumor: it[0].status == 1 - }.set{previous_dna_alignment} - // we only need normals - dna tumour will NOT be realigned - previous_normal_alignment = previous_dna_alignment.normal.groupTuple() - // 2. Group them and merge if applicable - previous_alignment = star_bams - .mix(previous_normal_alignment) - SAMTOOLS_MERGE_SECOND_PASS(previous_alignment, fasta) - ch_versions = ch_versions.mix(SAMTOOLS_MERGE_SECOND_PASS.out.versions) - - previous_alignment_merged = SAMTOOLS_MERGE_SECOND_PASS.out.bam - .map{meta, bam -> [ - [ - id:meta.sample, - data_type:"bam", - patient:meta.patient, - sample:meta.sample, - read_group:meta.read_group, - status:meta.status - ], - bam - ] - } - previous_alignment_merged.dump(tag:"[STEP7: FILTERING] bams for realignment") - SECOND_PASS_RNA( - ch_input_sample, - maf_to_filter_status.rna, - previous_alignment_merged, - vcf_consensus_dna, - vcfs_status_dna, - fasta, - fasta_fai, - dict, - hisat2_index, - splicesites, - dbsnp, - dbsnp_tbi, - pon, - pon_tbi, - germline_resource, - germline_resource_tbi, - intervals, - intervals_for_preprocessing, - ch_interval_list_split, - intervals_bed_gz_tbi, - intervals_bed_combined - ) - ch_versions = ch_versions.mix(SECOND_PASS_RNA.out.versions) - ch_reports = ch_versions.mix(SECOND_PASS_RNA.out.reports) - - maf_to_filter = SECOND_PASS_RNA.out.maf // TODO: optional? - maf_to_filter.dump(tag:"[STEP7: FILTERING] maf_to_filter_rna_2pass") - BASIC_FILTERING_RNA(maf_to_filter, whitelist, fasta) - - maf_to_filter_status.rna.dump(tag:"[STEP7: FILTERING] maf_to_filter_status.rna") - BASIC_FILTERING_RNA.out.maf.dump(tag:"[STEP7: FILTERING] BASIC_FILTERING_RNA.out.maf") - - maf_to_cross_first_pass = maf_to_filter_status.rna - .map{meta, maf -> [meta.patient, meta, maf]} - maf_to_cross_second_pass = BASIC_FILTERING_RNA.out.maf - .map{meta, maf -> [meta.patient, meta, maf]} - - maf_to_cross_first_pass.dump(tag:"[STEP7: FILTERING] maf_to_cross_first_pass") - maf_to_cross_second_pass.dump(tag:"[STEP7: FILTERING] maf_to_cross_second_pass") - maf_to_cross_first_pass - .cross(maf_to_cross_second_pass).dump(tag:"[STEP7: FILTERING] maf_to_cross_crossed_pass") - - maf_crossed = maf_to_cross_first_pass - .cross(maf_to_cross_second_pass) - .map{first, second -> - def meta = [:] - meta.patient = first[0] - meta.first_id = first[1].tumor_id - meta.second_id = second[1].tumor_id - meta.status = first[1].status - meta.tumor_id = first[1].tumor_id - meta.id = first[1].tumor_id - meta.normal_id = first[1].normal_id - [meta, first[2], second[2]] - } - maf_crossed.dump(tag:"[STEP7: FILTERING] maf_crossed") -// maf_to_filter_status_dna = maf_to_filter_status.dna.map{meta, maf -> [meta, maf, file("NO_FILE.maf")]} -// maf_to_filter_status_dna.dump(tag:"[STEP7: FILTERING] maf_to_filter_status_dna") -// maf_crossed = maf_crossed.mix(maf_to_filter_status.dna) - RNA_FILTERING(maf_crossed, - fasta) - ch_versions = ch_versions.mix(RNA_FILTERING.out.versions) - - // TODO RNA PON - // TODO produce some stats DNA vs RNA, oncoprint, etc -// FINAL_REPORT(RNA_FILTERING.out.maf, -// BASIC_FILTERING.out, -// CONSENSUS.out.txt) - - - - - - emit: - versions = ch_versions // channel: [ versions.yml ] - reports = ch_reports // channel: [ versions.yml ] -} \ No newline at end of file diff --git a/subworkflows/local/markduplicates_csv.nf b/subworkflows/local/markduplicates_csv.nf deleted file mode 100644 index 0f7c671..0000000 --- a/subworkflows/local/markduplicates_csv.nf +++ /dev/null @@ -1,23 +0,0 @@ -// -// MARKDUPLICATES_CSV -// - -workflow MARKDUPLICATES_CSV { - take: - cram_markduplicates // channel: [mandatory] meta, cram, crai - - main: - // Creating csv files to restart from this step - cram_markduplicates.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/csv") { meta, file, index -> - id = meta.id - lane = meta.lane - patient = meta.patient - sample = meta.sample - status = meta.status - suffix_aligned = params.save_output_as_bam ? "bam" : "cram" - suffix_index = params.save_output_as_bam ? "bam.bai" : "cram.crai" - file = "${params.outdir}/preprocessing/markduplicates/${patient}/${id}/${file.baseName}.${suffix_aligned}" - index = "${params.outdir}/preprocessing/markduplicates/${patient}/${id}/${index.baseName.minus(".cram")}.${suffix_index}" - ["markduplicates_no_table.csv", "patient,status,sample,lane,fastq_1,fastq_2,bam,bai,cram,crai,table,vcf\n${patient},${status},${sample},${lane},,,,,${file},${index},,\n"] - } -} diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf deleted file mode 100644 index ecd4c20..0000000 --- a/subworkflows/local/tumor_variant_calling.nf +++ /dev/null @@ -1,217 +0,0 @@ -// TODO: ADAPT TO RNADNAVAR -// TUMOR VARIANT CALLING -// Should be only run on patients without normal sample -// - -include { RUN_FREEBAYES } from '../nf-core/variantcalling/freebayes/main.nf' -include { GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main' -include { RUN_MANTA_TUMORONLY } from '../nf-core/variantcalling/manta/tumoronly/main.nf' -include { RUN_STRELKA_SINGLE } from '../nf-core/variantcalling/strelka/single/main.nf' -include { RUN_CONTROLFREEC_TUMORONLY } from '../nf-core/variantcalling/controlfreec/tumoronly/main.nf' -include { RUN_CNVKIT } from '../nf-core/variantcalling/cnvkit/main.nf' -include { RUN_MPILEUP } from '../nf-core/variantcalling/mpileup/main' -include { RUN_TIDDIT } from '../nf-core/variantcalling/tiddit/single/main.nf' - -workflow TUMOR_ONLY_VARIANT_CALLING { - take: - tools // Mandatory, list of tools to apply - cram_recalibrated // channel: [mandatory] cram - bwa // channel: [optional] bwa - cf_chrom_len // channel: [optional] controlfreec length file - chr_files - cnvkit_reference - dbsnp // channel: [mandatory] dbsnp - dbsnp_tbi // channel: [mandatory] dbsnp_tbi - dict // channel: [mandatory] dict - fasta // channel: [mandatory] fasta - fasta_fai // channel: [mandatory] fasta_fai - germline_resource // channel: [optional] germline_resource - germline_resource_tbi // channel: [optional] germline_resource_tbi - intervals // channel: [mandatory] intervals/target regions - intervals_bed_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed - intervals_bed_combined // channel: [mandatory] intervals/target regions in one file unzipped - mappability - panel_of_normals // channel: [optional] panel_of_normals - panel_of_normals_tbi // channel: [optional] panel_of_normals_tbi - - main: - - ch_versions = Channel.empty() - - //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config - freebayes_vcf = Channel.empty() - manta_vcf = Channel.empty() - mutect2_vcf = Channel.empty() - strelka_vcf = Channel.empty() - tiddit_vcf = Channel.empty() - - // Remap channel with intervals - cram_recalibrated_intervals = cram_recalibrated.combine(intervals) - .map{ meta, cram, crai, intervals, num_intervals -> - - //If no interval file provided (0) then add empty list - intervals_new = num_intervals == 0 ? [] : intervals - - [[ - data_type: meta.data_type, - id: meta.sample, - num_intervals: num_intervals, - patient: meta.patient, - sample: meta.sample, - status: meta.status, - ], - cram, crai, intervals_new] - } - - // Remap channel with gzipped intervals + indexes - cram_recalibrated_intervals_gz_tbi = cram_recalibrated.combine(intervals_bed_gz_tbi) - .map{ meta, cram, crai, bed_tbi, num_intervals -> - - //If no interval file provided (0) then add empty list - bed_new = num_intervals == 0 ? [] : bed_tbi[0] - tbi_new = num_intervals == 0 ? [] : bed_tbi[1] - - [[ - data_type: meta.data_type, - id: meta.sample, - num_intervals: num_intervals, - patient: meta.patient, - sample: meta.sample, - status: meta.status, - ], - cram, crai, bed_new, tbi_new] - } - - if (tools.split(',').contains('mpileup') || tools.split(',').contains('controlfreec')){ - cram_intervals_no_index = cram_recalibrated_intervals.map { meta, cram, crai, intervals -> - [meta, cram, intervals] - } - RUN_MPILEUP( - cram_intervals_no_index, - fasta - ) - - ch_versions = ch_versions.mix(RUN_MPILEUP.out.versions) - } - - if (tools.split(',').contains('controlfreec')){ - controlfreec_input = RUN_MPILEUP.out.mpileup - .map{ meta, pileup_tumor -> - [meta, [], pileup_tumor, [], [], [], []] - } - - length_file = cf_chrom_len ?: fasta_fai - RUN_CONTROLFREEC_TUMORONLY( - controlfreec_input, - fasta, - length_file, - dbsnp, - dbsnp_tbi, - chr_files, - mappability, - intervals_bed_combined - ) - - ch_versions = ch_versions.mix(RUN_CONTROLFREEC_TUMORONLY.out.versions) - } - - if(tools.split(',').contains('cnvkit')){ - cram_recalibrated_cnvkit_tumoronly = cram_recalibrated - .map{ meta, cram, crai -> - [meta, cram, []] - } - - RUN_CNVKIT ( - cram_recalibrated_cnvkit_tumoronly, - fasta, - fasta_fai, - [], - cnvkit_reference - ) - - ch_versions = ch_versions.mix(RUN_CNVKIT.out.versions) - } - - if (tools.split(',').contains('freebayes')){ - // Remap channel for Freebayes - cram_recalibrated_intervals_freebayes = cram_recalibrated_intervals - .map{ meta, cram, crai, intervals -> - [meta, cram, crai, [], [], intervals] - } - - RUN_FREEBAYES( - cram_recalibrated_intervals_freebayes, - dict, - fasta, - fasta_fai - ) - - freebayes_vcf = RUN_FREEBAYES.out.freebayes_vcf - ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions) - } - - if (tools.split(',').contains('mutect2')) { - GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING( - cram_recalibrated_intervals, - fasta, - fasta_fai, - dict, - germline_resource, - germline_resource_tbi, - panel_of_normals, - panel_of_normals_tbi - ) - - mutect2_vcf = GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING.out.filtered_vcf - ch_versions = ch_versions.mix(GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING.out.versions) - } - - if (tools.split(',').contains('manta')){ - - RUN_MANTA_TUMORONLY( - cram_recalibrated_intervals_gz_tbi, - dict, - fasta, - fasta_fai - ) - - manta_vcf = RUN_MANTA_TUMORONLY.out.manta_vcf - ch_versions = ch_versions.mix(RUN_MANTA_TUMORONLY.out.versions) - } - - if (tools.split(',').contains('strelka')) { - - RUN_STRELKA_SINGLE( - cram_recalibrated_intervals_gz_tbi, - dict, - fasta, - fasta_fai - ) - - strelka_vcf = RUN_STRELKA_SINGLE.out.strelka_vcf - ch_versions = ch_versions.mix(RUN_STRELKA_SINGLE.out.versions) - } - - //TIDDIT - if (tools.split(',').contains('tiddit')){ - - RUN_TIDDIT( - cram_recalibrated, - fasta, - bwa - ) - - tiddit_vcf = RUN_TIDDIT.out.tiddit_vcf - ch_versions = ch_versions.mix(RUN_TIDDIT.out.versions) - } - - - emit: - freebayes_vcf - manta_vcf - mutect2_vcf - strelka_vcf - tiddit_vcf - - versions = ch_versions -} From 23df38682bf586621e1447249c239d8a1804a387 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Tue, 5 Sep 2023 22:28:26 +0100 Subject: [PATCH 31/56] STAR issue fix --- modules/nf-core/star/align/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/star/align/main.nf b/modules/nf-core/star/align/main.nf index 8cb8e9a..04326c6 100644 --- a/modules/nf-core/star/align/main.nf +++ b/modules/nf-core/star/align/main.nf @@ -45,7 +45,7 @@ process STAR_ALIGN { def ignore_gtf = star_ignore_sjdbgtf ? '' : "--sjdbGTFfile $gtf" def seq_platform = seq_platform ? "'PL:$seq_platform'" : "" def seq_center = seq_center ? "'CN:$seq_center'" : "" - def attrRG = args.contains("--outSAMattrRGline") ? "" : "--outSAMattrRGline 'ID:$prefix' $seq_center 'SM:$prefix' $seq_platform" + def attrRG = (args.contains("--outSAMattrRGline")) ? "" : "--outSAMattrRGline 'ID:$prefix' $seq_center 'SM:$prefix' $seq_platform" def out_sam_type = (args.contains('--outSAMtype')) ? '' : '--outSAMtype BAM Unsorted' def mv_unsorted_bam = (args.contains('--outSAMtype BAM Unsorted SortedByCoordinate')) ? "mv ${prefix}.Aligned.out.bam ${prefix}.Aligned.unsort.out.bam" : '' """ From 2a9a696d15ed10100b5f05d2d8c1698a46cc31a7 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Tue, 5 Sep 2023 22:43:46 +0100 Subject: [PATCH 32/56] Revised code for last steps of the core run. --- bin/run_consensus.R | 93 +++-- bin/run_consensus.py | 383 ------------------ .../{run_consensus.nf => consensus/main.nf} | 14 +- modules/local/maf_filtering/main.nf | 34 ++ modules/local/vcf2maf/vcf2maf/main.nf | 44 +- nextflow.config | 1 - .../local/bam_variant_calling/main.nf | 20 +- .../main.nf | 222 +++++----- .../local/bam_variant_calling_somatic/main.nf | 40 +- .../bam_variant_calling_somatic_sage/main.nf | 2 +- .../main.nf | 11 +- subworkflows/local/maf_filtering/main.nf | 33 ++ subworkflows/local/prepare_genome/main.nf | 2 +- subworkflows/local/vcf_annotate/main.nf | 65 +++ subworkflows/local/vcf_consensus/main.nf | 153 +++++++ subworkflows/local/vcf_normalise/main.nf | 7 +- workflows/rnadnavar.nf | 8 + 17 files changed, 503 insertions(+), 629 deletions(-) delete mode 100644 bin/run_consensus.py rename modules/local/{run_consensus.nf => consensus/main.nf} (72%) create mode 100644 modules/local/maf_filtering/main.nf create mode 100644 subworkflows/local/maf_filtering/main.nf create mode 100644 subworkflows/local/vcf_annotate/main.nf create mode 100644 subworkflows/local/vcf_consensus/main.nf diff --git a/bin/run_consensus.R b/bin/run_consensus.R index 256e897..aa2f4ac 100644 --- a/bin/run_consensus.R +++ b/bin/run_consensus.R @@ -1,6 +1,6 @@ #!/usr/bin/env Rscript # Date: Sun 20 Sep 2020 -# Author: Raquel Manzano - CRUK CI Caldas lab +# Author: Raquel Manzano - @RaqManzano # Script: Find overlaps between vcf # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Libraries @@ -12,7 +12,7 @@ suppressPackageStartupMessages(library(plyr)) suppressPackageStartupMessages(library(ggpubr)) suppressPackageStartupMessages(library(ComplexHeatmap)) suppressPackageStartupMessages(library(ggrepel)) - +suppressPackageStartupMessages(library(stringr)) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ options(datatable.fread.input.cmd.message=FALSE) @@ -30,7 +30,6 @@ if("--help" %in% script_args) { cat("The consensusR script: Arguments: - --id=sampleID - character, sample id to put in the plots --input=input_file.maf - character, VCf/MAF file, can be specfied more than once for several inputs --caller=caller_name - character, caller that was used to generate input file - has to be in the SAME order as input --out_prefix=output_prefix - character, preffix for outputs @@ -69,9 +68,11 @@ vcfs <- strsplit(argsL[["input"]], split=",")[[1]] callers <- strsplit(argsL[["caller"]], split=",")[[1]] names(vcfs) <- callers +is.vcf <- grepl(x = vcfs[1], pattern = ".vcf$|.vcf.gz$", perl = T) + # output files pdf.out <- paste0(argsL$out_prefix, ".pdf") -if (grepl(x = vcfs[1], pattern = ".vcf", fixed = T)){ +if (is.vcf){ vcf.out <- paste0(argsL$out_prefix, ".vcf") } else{ vcf.out <- paste0(argsL$out_prefix, ".maf") @@ -85,7 +86,7 @@ contigs_meta <- paste0(contigs_meta$V1, collapse = "\n") callers_meta <- list() for ( c in callers){ vcf_meta <- fread(paste0("zgrep -E '##|#version' ", vcfs[c]), sep = NULL, header = F) - if (grepl(pattern = ".vcf", x = vcfs[c] )){ + if (is.vcf){ vcf_header <- fread(paste0("zgrep '#CHROM' ", vcfs[c]), sep = NULL, header = F) callers_meta[[c]] <- list(meta=paste0(vcf_meta$V1, collapse = "\n"), header=strsplit(vcf_header$V1, "\t")[[1]]) @@ -105,7 +106,7 @@ muts <- list() for(c in callers[1:length(callers)]){ v <- vcfs[c] tmp <- fread(paste0("zgrep -v '##' ", v)) - if (!grepl(x = vcfs[1], pattern = ".vcf", fixed = T)){ + if (!is.vcf){ tmp$`#CHROM` <- tmp$Chromosome tmp$POS <- tmp$Start_Position tmp$REF <- tmp$Reference_Allele @@ -135,7 +136,6 @@ for(c in callers[1:length(callers)]){ message("- Finding overlaps") # Third, we find overlaps -overlaps <- list() overlapping.vars <- data.frame(DNAchange=character(), caller=character(), FILTER=character()) for (c1 in callers){ for (c2 in callers){ @@ -143,46 +143,58 @@ for (c1 in callers){ group_name <- paste0(c1, "vs", c2) # The gap between 2 adjacent ranges is 0. hits <- GenomicRanges::findOverlaps(query = mutsGR[[c1]], subject = mutsGR[[c2]], maxgap = 0) - overlaps[[group_name]] <- hits - m.hits <- muts[[c1]][queryHits(hits)]$DNAchange - filt <- muts[[c1]][queryHits(hits)]$FILTER + dnachange.hits <- muts[[c1]][queryHits(hits)]$DNAchange + filt.hits <- muts[[c1]][queryHits(hits)]$FILTER # due to normalization we might find the same variant with different filters - these come from homopolymer regions - overlapping.vars <- rbind(overlapping.vars, data.frame(DNAchange=m.hits, caller=c1, FILTER=filt)) + overlapping.vars <- rbind(overlapping.vars, unique(data.frame(DNAchange=dnachange.hits, caller=c1, FILTER=filt.hits))) } } } # Finally, extract the set of variants that will be the consensus set overlapping.vars <- overlapping.vars[!duplicated(overlapping.vars),] -overlapping.vars <- aggregate( FILTER ~ DNAchange + caller, overlapping.vars, paste, collapse=";" ) +overlapping.vars <- as.data.table(overlapping.vars)[, .(caller = paste(caller, collapse = "|"), FILTER = paste(FILTER, collapse = "|")), by = DNAchange] +overlapping.vars <- as.data.frame(overlapping.vars) -# Set of variants that are called in at least 2 callers -con.vars <- overlapping.vars$DNAchange -#tail(sort(table(con.vars)[table(con.vars)>=2])) -con.vars.ths <- names(table(con.vars)[table(con.vars)>=2]) +# Overlaps that are adjacent, and only SNVs are removed if not DNP +overlapping.variants.count <- stringr::str_count(string = overlapping.vars$caller, pattern = stringr::fixed("|")) + 1 +names(overlapping.variants.count) <- overlapping.vars$DNAchange +overlapping.variants.count.snvs <- overlapping.variants.count[ grepl(pattern = "[0-9](A|C|G|T)>(A|C|G|T)$", x = names(overlapping.variants.count))] +overlapping.variants.count.indels <- overlapping.variants.count[!grepl(pattern = "[0-9](A|C|G|T)>(A|C|G|T)$", x = names(overlapping.variants.count))] -message("- There are ", prettyNum(length(con.vars.ths), big.mark = ','), " variants that are consensus") +# only snvs with exact match will be in the consensus list +con.vars.ths.snv <- names(overlapping.variants.count.snvs[overlapping.variants.count.snvs >= argsL$thr]) +# we let all indels pass as they have shown overlap +con.vars.ths.indel <- names(overlapping.variants.count.indels) +message("- There are ", prettyNum(length(con.vars.ths.snv), big.mark = ','), " SNVs that are consensus") +message("- There are ", prettyNum(length(con.vars.ths.indel), big.mark = ','), " indels that are consensus") + +con.vars.ths <- c(con.vars.ths.snv, con.vars.ths.indel) # The next steps are for the output # To keep the information from the consensus we extract the callers that called each mutation and its correspondent filters. what.caller.called <- function(row, consensus, variants){ variant <- row["DNAchange"] - var.callers <- variants[variants$DNAchange==variant,]$caller - var.callers <- paste(var.callers, collapse = "|") - filters <- variants[variants$DNAchange==variant,]$FILTER - filters <- paste(sub(pattern = ";", - replacement = ",", - x =filters), - collapse = "|") - if (var.callers == ""){ - var.callers <- row["Caller"] - filters <- row["FILTER"] + if (variant %in% consensus){ + var.callers <- variants[variants$DNAchange==variant,]$caller + var.callers <- paste(var.callers, collapse = "|") + filters <- variants[variants$DNAchange==variant,]$FILTER + filters <- paste(sub(pattern = ";", + replacement = ",", + x =filters), + collapse = "|") + if (var.callers == ""){ + var.callers <- row["Caller"] + filters <- row["FILTER"] + } + list(callers=var.callers, filters=filters) + } else { + list(callers=row["Caller"], filters=row['FILTER']) } - list(callers=var.callers, filters=filters) } - for (c in callers){ +for (c in callers){ message("- Annotating calls from ", c) values <- apply(X = muts[[c]], MARGIN = 1, FUN = what.caller.called, consensus=con.vars.ths, variants=overlapping.vars) muts[[c]] <- cbind( muts[[c]], as.data.frame(do.call(rbind, values))) @@ -201,10 +213,10 @@ simplified.filter <- sapply(all.muts$filters, FUN = function(x){ }) simplified.filter <- ifelse(is.na(simplified.filter), "PASS", simplified.filter) all.muts$FILTER_consensus <- simplified.filter -all.muts$INFO_consensus <- paste0("callers=", all.muts$callers, ";filters=", all.muts$filters, ";consensus_filter=", all.muts$FILTER_consensus) +all.muts$INFO_consensus <- paste0("callers=", all.muts$callers, ";filters=", all.muts$filters, ";consensus_filter=", all.muts$FILTER_consensus) ## write consensus -# I want to keep the ingo without duplicating the mutations +# I want to keep the info without duplicating the mutations all.muts$isconsensus <- grepl(pattern = "|", x = all.muts$callers, fixed = T) # WRITE OUTPUTS @@ -214,7 +226,7 @@ meta_consensus <- paste0('##INFO=') extra.cols <- c() for ( c in callers){ - if (grepl(x = vcfs[c], pattern = ".vcf", fixed = T)){ + if (is.vcf){ updated_meta <- paste(callers_meta[[c]]$meta, meta_consensus, sep="\n") @@ -231,8 +243,10 @@ for ( c in callers){ } write(x = updated_meta, file = vcf.out.caller, ncolumns = 1, append = F) extra_cols <- c() - if (grepl(x = vcfs[c], pattern = ".vcf", fixed = T)){ - fwrite(x = all.muts[all.muts$Caller==c,][,callers_meta[[c]]$header], + if (is.vcf){ + fields_to_write <- all.muts[all.muts$Caller==c,][,callers_meta[[c]]$header] + fields_to_write$INFO <- paste(fields_to_write$INFO, all.muts[all.muts$Caller==c,]$INFO_consensus, sep=";") + fwrite(x = fields_to_write, file = vcf.out.caller, append = T, sep = "\t", @@ -250,7 +264,7 @@ for ( c in callers){ # Final VCF consensus -if (grepl(x = vcfs[1], pattern = ".vcf", fixed = T)){ +if (is.vcf){ meta <- paste0("##fileformat=VCFv4.2\n##source=Consensus", length(callers), "Callers (", paste0(callers, collapse = ","), ")\n", contigs_meta, '##FILTER=\n##FILTER=\n') # we need the meta contigs and the INFO @@ -260,18 +274,19 @@ if (grepl(x = vcfs[1], pattern = ".vcf", fixed = T)){ meta <- "#version 2.4" } -# to.vcf <- all.muts[all.muts$isconsensus==T,] -if (grepl(x = vcfs[1], pattern = ".vcf", fixed = T)){ +to.vcf <- all.muts[all.muts$isconsensus==T,] +if (is.vcf){ col.out <- c("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT") to.vcf$ID <- to.vcf$DNAchange to.vcf$QUAL <- "." - to.vcf$INFO <- to.vcf$INFO_consensus + to.vcf$INFO <- to.vcf$INFO_consensus to.vcf$FORMAT <- "." to.vcf$FILTER <- to.vcf$FILTER_consensus } else{ col.out <- callers_meta[[c]]$header } -to.vcf <- all.muts[,col.out] + +to.vcf <- to.vcf[,col.out][!duplicated(to.vcf),] message("- Total variants ", prettyNum(nrow(to.vcf), big.mark = ",")) message("- Variants in consensus ", prettyNum(nrow(all.muts[(!duplicated(all.muts$DNAchange) & all.muts$isconsensus==T),]), big.mark = ",")) diff --git a/bin/run_consensus.py b/bin/run_consensus.py deleted file mode 100644 index 229ca00..0000000 --- a/bin/run_consensus.py +++ /dev/null @@ -1,383 +0,0 @@ -#!/usr/bin/env python - -""" -Author: Raquel Manzano - @RaqManzano -Script: Performs a consensus with results from different variant callers -""" -import argparse -import pandas as pd -import numpy as np -import gzip - - -def argparser(): - parser = argparse.ArgumentParser(description='') - parser.add_argument("--input", "-i", help="Input VCF files, space separated", nargs='+') - parser.add_argument("--names", "-n", help="Caller names (if not given numbers will be assigned), space separated", - nargs='+') - parser.add_argument("--indel_window", help="Window to look for indels overlap", default=3) - parser.add_argument("--prefix", help="prefix for output files, e.g. {prefix}.vcf", default='consensus') - parser.add_argument("--maf", help="If input is maf select this parameter", action='store_true') - return parser.parse_args() - - -def open_vcf(vcf): - if vcf.endswith("gz"): - vcf = gzip.open(vcf, mode="rt") - else: - vcf = open(vcf, mode="r") - return vcf - - -def maf_to_pandas(maf_file): - maf = pd.read_csv(maf_file, sep="\t", comment="#") - return maf - - -def vcf_to_pandas(vcf_file): - meta = '' - variants = [] - vcf = open_vcf(vcf_file) - for line in vcf: - if line.startswith("##"): - meta += line - elif line.startswith("#"): - header = line.strip().split("\t") - else: - variants += [line.strip().split("\t")] - vcf.close() - # print(variants) - try: - df = pd.DataFrame(variants, columns=header) - except ValueError: - df = pd.read_csv(vcf_file, sep="\t", comment="#") - except UnboundLocalError: - header = '' - df = pd.read_csv(vcf_file, sep="\t", comment="#") - - return (meta, header, df) - - -def add_varid_and_type(df, caller, chrom, pos, ref, alt): - """Adds a variant id using chrom:g.posREF>ALT format""" - try: - df['DNAchange'] = df[chrom].map(str) + ":g." + df[pos].map(str) + df[ref] + ">" + df[alt] - except KeyError: - pos = "Start_position" # common error in maf format - df["Start_Position"] = df["Start_position"] # to fix downstream if this typo happens - df['DNAchange'] = df[chrom].map(str) + ":g." + df[pos].map(str) + df[ref] + ">" + df[alt] - df['Type'] = np.where((df[ref].str.len() == 1) & (df[alt].str.len() == 1), 'SNV', - np.where((df[ref].str.len() == df[alt].str.len()), "MNP", - np.where((df[ref].str.len() > df[alt].str.len()), "DEL", "INS") - ) - ) - df["Caller"] = caller - calls = {"SNVs": df[df["Type"] == "SNV"], - "MNPs": df[df["Type"] == "MNP"], - "INDELs": df[df["Type"].str.contains("INS|DEL")]} - return calls - - -def assign_callers_names(caller_names, vcfs): - """ - Assign a name to a caller - :param caller_names: - :param vcfs: - :return: - """ - if not caller_names: - caller_names = ["CALLER" + str(idx + 1) for idx, _ in enumerate(vcfs)] - print("- Callers names: " + ' - '.join(caller_names)) - return caller_names - - -def consensus_exact_match(list_of_calls, threshold=2): - """ - Consensus for SNVs-like variants, exact match will be expected. - :param list_of_calls: a list of pandas dataframes from a VCF - :param threshold: minimum number of callers to assume a consensus - :return: a list of pandas dataframes - """ - # extract variant id to match between call set - to_concat = [] - for df in list_of_calls: - to_concat += [df['DNAchange']] - # count how many teams we see this variant - all_snvs = pd.concat(to_concat) - # count how many teams we see this variant - all_snvs = all_snvs.value_counts().rename_axis('DNAchange').reset_index(name='consensus_count') - # if count >= threshold then we will assume is a consensus - in_consensus = all_snvs['consensus_count'] >= threshold - consensus_snvs = all_snvs['DNAchange'][in_consensus] - # feed that info back to dataframe - list_of_calls_with_consensus = [] - for calls in list_of_calls: - calls = calls.assign(consensus=calls['DNAchange'].isin(consensus_snvs)) - calls = calls.assign(overlap=np.where(calls['consensus'] == True, "ABSOLUTE", "NONE")) - calls = calls.assign(overlap_id='.') - calls = pd.merge(calls, all_snvs, on='DNAchange') - list_of_calls_with_consensus += [calls] - - return list_of_calls_with_consensus - - -def consensus_overlap(list_of_calls, caller_names, window=3, chrom='#CHROM', pos="POS", alt="ALT"): - """ - Assigns a consensus type when variants overlap in a 3bp window - :param list_of_calls: list of pandas dataframes - :param window: integer - :return: list of pandas dataframes - """ - indels = [] - for df in list_of_calls: - indels += [df] - # make sure it is sorted for iteration - all_indels = pd.concat(indels) - all_indels[pos] = all_indels[pos].astype(int) - all_indels_sorted = all_indels.sort_values([chrom, pos]) - previous_start = 0 - previous_end = 0 - previous_caller = "" - previous_filter = "" - previous_varid = "" - overlaps = [] - overlap_id = [] - for idx, row in all_indels_sorted.iterrows(): - if row['overlap'] == "ABSOLUTE": # if already consensus then there is absolute overlap - overlaps += [row['overlap']] - overlap_id += [row['overlap_id']] - else: - current_start = row[pos] - window - current_end = row[pos] + (len(row[alt]) - 1) + window - current_caller = row["Caller"] - try: - current_filter = row["FILTER"] - except KeyError: # means there is no filter - we assume pass - current_filter = "PASS" - overlap_id, overlaps = seek_overlap(current_caller, current_end, current_filter, current_start, overlap_id, - overlaps, previous_caller, previous_end, previous_filter, - previous_start, previous_varid) - # save current values as previous - previous_start = current_start + window - previous_end = current_end - window - previous_caller = current_caller[:] - previous_filter = current_filter[:] - previous_varid = row['DNAchange'] - all_indels_sorted['overlap'] = overlaps - all_indels_sorted['overlap_id'] = overlap_id - # back into a list - indels_list = [] - for caller in caller_names: # to keep order - indels_list += [all_indels_sorted[all_indels_sorted['Caller'] == caller]] - return indels_list - - -def seek_overlap(current_caller, current_end, current_filter, current_start, overlap_id, overlaps, previous_caller, - previous_end, previous_filter, previous_start, previous_varid): - if current_start <= previous_start and current_end >= previous_start: - if previous_end <= current_end: - # |-------| - # |--| - overlaps += ["TOTAL"] - else: - # |-------| - # |---------| - overlaps += ["PARTIAL_RIGHT"] - overlap_id += [previous_varid] - elif current_start >= previous_start and current_start <= previous_end: - if current_end >= previous_end: - # |-------| - # |-----| - overlaps += ["PARTIAL_LEFT"] - else: - # |----| - # |-------| - overlaps += ["TOTAL"] - overlap_id += [previous_varid] - else: - overlaps += ["NONE"] - overlap_id += ['.'] - if current_caller == previous_caller: - overlaps[-1] = "NONE" # this is not really an overlap but a multiallelic variant - overlap_id[-1] = '.' - # if overlap but both filters fail then they overlap but we assign a LOWQ tag - elif current_filter != "PASS" and previous_filter != "PASS" and overlaps[-1] != "NONE": - overlaps[-1] = overlaps[-1] + "_LOWQ" - return overlap_id, overlaps - - -def add_consensus_info_to_meta(meta): - new_info = [ - '##INFO=', - '##INFO=', - '##INFO=', - '##INFO=' - ] - meta += '\n'.join(new_info) + '\n' - return meta - - -def add_consensus_info_to_info(variants, variants_in_consensus): - # add new info to INFO field - for idx, row in variants.iterrows(): - if row["overlap"] == "": - row["overlap"] = "." - if row["overlap_id"] == "": - row["overlap_id"] = "." - info = row["INFO"] - info += ";overlap_type=" + row["overlap"] - info += ";consensus_count=" + str(row["consensus_count"]) - info += ";overlap_id=" + row["overlap_id"] - for caller, consensus_variants in variants_in_consensus.items(): - if row["DNAchange"] in consensus_variants: - if 'consensus_callers' not in info: - info += f";consensus_callers={caller}" - else: - info += f",{caller}" - if info.startswith(";"): - info = info[1:] - variants.at[idx, 'INFO'] = info - return variants - - -def add_consensus_info(calls_dict, callers_name, variants_in_consensus, chrom, pos, maf): - # put together info for one caller - caller_dict = {x: {'meta': '', 'variants': []} for x in callers_name} - for idx, caller in enumerate(callers_name): - meta = calls_dict['meta'][idx] # header - meta = add_consensus_info_to_meta(meta) - caller_set = pd.concat([calls_dict["SNVs"][idx], - calls_dict["MNPs"][idx], - calls_dict["INDELs"][idx], - ]).sort_values([chrom, pos]).reset_index() - caller = caller_set["Caller"].unique()[0] # order can get mess up with python dict - if maf: - caller_set["INFO"] = "" - caller_set = add_consensus_info_to_info(caller_set, variants_in_consensus) - caller_dict[caller]['meta'] = meta - caller_dict[caller]['variants'] = caller_set - return caller_dict - - -def write_vcf_and_intervals(caller_dict, caller_names, vcf_list, prefix, headers, chrom, pos, maf): - all_calls = [] - all_calls_txt = [] - for caller, vcf, header in zip(caller_names, vcf_list, headers): - vcf_out = f"{prefix}_{caller}.vcf" - txt_out = f"{prefix}_{caller}.txt" # a txt file with the consensus info - with open(vcf_out, "w") as out: - meta = caller_dict[caller]['meta'] - meta_contigs = [x for x in meta.split("\n") if "##contig" in x] - out.write(meta) - variants = caller_dict[caller]['variants'] - variants["ID"] = variants['DNAchange'] - if "FILTER" not in variants.columns: - variants["FILTER"] = "." - to_txt = variants[["DNAchange", "FILTER", "Caller", "overlap", - "overlap_id", "consensus_count"]] - - to_txt = to_txt.assign(consensus_callers=variants["INFO"].str.extract("consensus_callers=(\S+)")) - to_txt.to_csv(txt_out, mode="w", index=False, header=True, sep="\t") - if not maf: - variants[header].to_csv(vcf_out, mode="a", index=False, header=True, sep="\t") - all_calls += [variants] - all_calls_txt += [to_txt] - all_out = f"{prefix}.vcf" - # consensus txt - all_calls_txt = pd.concat(all_calls_txt).reset_index() - all_calls_txt = all_calls_txt[all_calls_txt.consensus_callers.str.contains(",")] - all_calls_txt.drop(["Caller"], axis=1, inplace=True) - all_calls_txt = all_calls_txt.drop_duplicates("DNAchange") - all_calls_txt.sort_values("DNAchange", inplace=True) - all_calls_txt.to_csv(f"{prefix}_consensus.txt", mode="w", index=False, header=True, sep="\t") - # consensus vcf - if not maf: - all_calls = pd.concat(all_calls).reset_index() - all_calls[pos] = all_calls[pos].astype(int) - all_calls.sort_values([chrom, pos], inplace=True) - # write a basic consensus VCF - consensus_variants = all_calls[all_calls['overlap'] != "NONE"] - # Generate a minimal VCF for forcing variant calling - consensus_variants = consensus_variants[ - ["#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT"]] - consensus_variants['QUAL'] = "." - consensus_variants['FILTER'] = "." - consensus_variants['INFO'] = "." - consensus_variants['FORMAT'] = "." - consensus_variants['SAMPLE'] = "." - consensus_variants = consensus_variants.drop_duplicates("ID") - meta = f"##fileformat=VCFv4.2\n##source=Consensus{len(caller_names)}Callers\n" + "\n".join(meta_contigs) + "\n" - with open(all_out, "w") as int_out: - int_out.write(meta) - consensus_variants.to_csv(all_out, mode="a", index=False, header=True, sep="\t") - - -def get_calls_from_callers(vcfs, caller_names, chrom, pos, ref, alt): - # get calls from each caller - original_vcf_headers = [] - calls_dict = {"SNVs": [], "MNPs": [], "INDELs": [], "meta": []} - variants_in_consensus = {} - for caller, vcf in zip(caller_names, vcfs): - meta, header, df = vcf_to_pandas(vcf) - print(f"- {caller} has {len(df.index)}") - original_vcf_headers += [header] - calls = add_varid_and_type(df=df, caller=caller, chrom=chrom, pos=pos, ref=ref, alt=alt) - calls_dict["SNVs"] += [(calls["SNVs"])] - calls_dict["MNPs"] += [calls["MNPs"]] - calls_dict["INDELs"] += [calls["INDELs"]] - calls_dict["meta"] += [meta] - variants_in_consensus[caller] = calls["SNVs"]['DNAchange'].tolist() + \ - calls["INDELs"]['DNAchange'].tolist() + \ - calls["MNPs"]['DNAchange'].tolist() - return original_vcf_headers, calls_dict, variants_in_consensus - - -def do_consensus(indel_window, calls_dict, caller_names, chrom, pos, alt): - # perform consensus - consensus = {} - for var_type in calls_dict.keys(): - if var_type != "meta": # meta is for the VCF headers only - consensus[var_type] = consensus_exact_match(calls_dict[var_type]) - else: - consensus['meta'] = calls_dict['meta'] - # extra consensus check for indels tha might overlap - consensus["INDELs"] = consensus_overlap(list_of_calls=consensus["INDELs"], caller_names=caller_names, - window=indel_window, chrom=chrom, pos=pos, alt=alt) - return consensus - - -def main(): - args = argparser() - if args.maf: - chrom = "Chromosome" - pos = "Start_Position" # can be sometimes Start_position (no capital p) - ref = "Reference_Allele" - alt = "Tumor_Seq_Allele2" - else: - chrom = "#CHROM" - pos = "POS" - ref = "REF" - alt = "ALT" - # get callers names - caller_names = assign_callers_names(caller_names=args.names, vcfs=args.input) - # get calls - original_vcf_headers, calls_dict, variants_in_consensus = get_calls_from_callers(vcfs=args.input, - caller_names=args.names, - chrom=chrom, pos=pos, ref=ref, - alt=alt) - # consensus - consensus = do_consensus(indel_window=args.indel_window, - calls_dict=calls_dict, - caller_names=caller_names, - chrom=chrom, pos=pos, alt=alt) - # add info to output - caller_dict = add_consensus_info(calls_dict=consensus, callers_name=caller_names, - variants_in_consensus=variants_in_consensus, - chrom=chrom, pos=pos, maf=args.maf) - # write to separate vcfs - write_vcf_and_intervals(caller_dict=caller_dict, caller_names=caller_names, vcf_list=args.input, prefix=args.prefix, - headers=original_vcf_headers, chrom=chrom, pos=pos, maf=args.maf) - - -if __name__ == '__main__': - main() diff --git a/modules/local/run_consensus.nf b/modules/local/consensus/main.nf similarity index 72% rename from modules/local/run_consensus.nf rename to modules/local/consensus/main.nf index ab1e1a6..39a277c 100644 --- a/modules/local/run_consensus.nf +++ b/modules/local/consensus/main.nf @@ -10,9 +10,12 @@ process RUN_CONSENSUS { tuple val(meta), path(vcf), val(caller) output: - tuple val(meta), path('*.consensus.maf'), val(['consensus']) , emit: vcf - tuple val(meta), path('*.consensus_*.maf'), val(caller) , emit: vcf_separate - path "versions.yml" , emit: versions + tuple val(meta), path('*.consensus.vcf') , optional:true , emit: vcf + tuple val(meta), path('*.consensus_*.vcf'), val(caller) , optional:true , emit: vcf_separate + tuple val(meta), path('*.consensus.maf') , optional:true , emit: maf + tuple val(meta), path('*.consensus_*.maf'), val(caller) , optional:true , emit: maf_separate + path("*.pdf") , emit: pdf + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -24,13 +27,10 @@ process RUN_CONSENSUS { def caller_list = caller.collect{ "--caller=$it"}.join(' ') """ - run_consensus.R ${input_list} ${caller_list} --out_prefix=${prefix} $args + run_consensus.R ${input_list} ${caller_list} --out_prefix=${prefix}.consensus $args cat <<-END_VERSIONS > versions.yml "${task.process}": R: \$(echo \$(R --version 2>&1) | head -n 1) END_VERSIONS """ - - - } \ No newline at end of file diff --git a/modules/local/maf_filtering/main.nf b/modules/local/maf_filtering/main.nf new file mode 100644 index 0000000..0bc9f78 --- /dev/null +++ b/modules/local/maf_filtering/main.nf @@ -0,0 +1,34 @@ +process MAF_FILTERING { + tag "$meta.id" + label 'process_low' + + conda "anaconda::pandas=1.4.3" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-629aec3ba267b06a1efc3ec454c0f09e134f6ee2%3A3b083bb5eae6e491b8579589b070fa29afbea2a1-0' : + 'quay.io/biocontainers/mulled-v2-629aec3ba267b06a1efc3ec454c0f09e134f6ee2%3A3b083bb5eae6e491b8579589b070fa29afbea2a1-0' }" + + + input: + tuple val(meta), path(maf) + path fasta + + output: + tuple val(meta), path('*.maf') , emit: maf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/rnadnavar/bin/ + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + filter_mutations.py -i $maf --output ${prefix}.maf --ref $fasta $args + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(echo \$(python --version 2>&1) | sed 's/^.*Python (//;s/).*//') + END_VERSIONS + """ + +} \ No newline at end of file diff --git a/modules/local/vcf2maf/vcf2maf/main.nf b/modules/local/vcf2maf/vcf2maf/main.nf index 2f2ee4b..b5478b5 100644 --- a/modules/local/vcf2maf/vcf2maf/main.nf +++ b/modules/local/vcf2maf/vcf2maf/main.nf @@ -4,37 +4,37 @@ process VCF2MAF { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::vcf2maf-1.6.21-hdfd78af_0" : null ) + conda "bioconda::vcf2maf-1.6.21-hdfd78af_0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/vcf2maf:1.6.21--hdfd78af_0' : 'quay.io/biocontainers/vcf2maf:1.6.21--hdfd78af_0' }" input: - tuple val(meta), path(vcf) - path(fasta) + tuple val(meta), path(vcf) + path(fasta) output: - tuple val(meta), path("*.maf") , emit: maf - path "versions.yml" , emit: versions + tuple val(meta), path("*.maf") , emit: maf + path "versions.yml" , emit: versions when: - task.ext.when == null || task.ext.when + task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def vcf_decompressed = vcf.baseName.minus(".gz") - """ - vcf2maf.pl \\ - --input-vcf ${vcf} \\ - --output-maf ${prefix}.maf \\ - --ref-fasta $fasta \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - vcf2maf: $VERSION - END_VERSIONS - """ - + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def vcf_decompressed = vcf.baseName.minus(".gz") + """ + gzip -d $vcf -c > ${vcf_decompressed} + vcf2maf.pl \\ + --input-vcf ${vcf_decompressed} \\ + --output-maf ${prefix}.maf \\ + --ref-fasta $fasta \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vcf2maf: $VERSION + END_VERSIONS + """ } \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index bd503e4..188aeb6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -351,7 +351,6 @@ includeConfig 'conf/modules/gatk4_preprocessing/prepare_recalibration.config' includeConfig 'conf/modules/gatk4_preprocessing/recalibrate.config' // variant calling -includeConfig 'conf/modules/variant_calling/freebayes.config' includeConfig 'conf/modules/variant_calling/strelka.config' includeConfig 'conf/modules/variant_calling/manta.config' includeConfig 'conf/modules/variant_calling/mutect2.config' diff --git a/subworkflows/local/bam_variant_calling/main.nf b/subworkflows/local/bam_variant_calling/main.nf index 8ed8986..391c119 100644 --- a/subworkflows/local/bam_variant_calling/main.nf +++ b/subworkflows/local/bam_variant_calling/main.nf @@ -118,8 +118,10 @@ workflow BAM_VARIANT_CALLING { // Gather vcf files for annotation and QC - vcf_to_normalise = Channel.empty() - vcf_to_normalise = vcf_to_normalise.mix(BAM_VARIANT_CALLING_SOMATIC.out.vcf_all) + vcf_to_normalise = Channel.empty().mix(BAM_VARIANT_CALLING_SOMATIC.out.vcf_all) + contamination_table_mutect2 = Channel.empty().mix(BAM_VARIANT_CALLING_SOMATIC.out.contamination_table_mutect2) + segmentation_table_mutect2 = Channel.empty().mix(BAM_VARIANT_CALLING_SOMATIC.out.segmentation_table_mutect2) + artifact_priors_mutect2 = Channel.empty().mix(BAM_VARIANT_CALLING_SOMATIC.out.artifact_priors_mutect2) // QC VCF_QC_BCFTOOLS_VCFTOOLS(vcf_to_normalise, intervals_bed_combined) @@ -134,15 +136,23 @@ workflow BAM_VARIANT_CALLING { // Gather used variant calling softwares versions versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC.out.versions) versions = versions.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.versions) + } else{ + + cram_variant_calling_pair = Channel.empty() + vcf_to_normalise = Channel.empty() + contamination_table_mutect2 = Channel.empty() + segmentation_table_mutect2 = Channel.empty() + artifact_priors_mutect2 = Channel.empty() + } emit: cram_variant_calling_pair = cram_variant_calling_pair vcf_to_normalise = vcf_to_normalise - contamination_table = BAM_VARIANT_CALLING_SOMATIC.out.contamination_table_mutect2 - segmentation_table = BAM_VARIANT_CALLING_SOMATIC.out.segmentation_table_mutect2 - artifact_priors = BAM_VARIANT_CALLING_SOMATIC.out.artifact_priors_mutect2 + contamination_table = contamination_table_mutect2 + segmentation_table = segmentation_table_mutect2 + artifact_priors = artifact_priors_mutect2 reports = reports versions = versions diff --git a/subworkflows/local/bam_variant_calling_pre_post_processing/main.nf b/subworkflows/local/bam_variant_calling_pre_post_processing/main.nf index 924a15e..02f17a5 100644 --- a/subworkflows/local/bam_variant_calling_pre_post_processing/main.nf +++ b/subworkflows/local/bam_variant_calling_pre_post_processing/main.nf @@ -5,39 +5,41 @@ // A when clause condition is defined in the conf/modules.config to determine if the module should be run include { BAM_GATK_PREPROCESSING } from '../bam_gatk_preprocessing/main' // For now only matched supported - include { BAM_VARIANT_CALLING } from '../bam_variant_calling/main' -// // Can we just call normalization here? -// include { VCF_NORMALIZE } from '../normalize_vcf_variants/main' -// // Can we just call the consensus module here? -// include { VCF_CONSENSUS } from '../consensus/main' -// // maybe just call VEP here? -// include { VCF_ANNOTATE } from '../annotate/main' -// include { MAF_BASIC_FILTERING as FILTERING } from '../../../modules/local/filter_variants' +include { BAM_VARIANT_CALLING } from '../bam_variant_calling/main' +// Normalise VCFs +include { VCF_NORMALISE } from '../vcf_normalise/main' +// Annotation +include { VCF_ANNOTATE } from '../vcf_annotate/main' +// Consensus + include { VCF_CONSENSUS } from '../vcf_consensus/main' +// Filtering + include { MAF_FILTERING } from '../maf_filtering/main' workflow BAM_VARIANT_CALLING_PRE_POST_PROCESSING { take: - step // step to start with - tools - skip_tools - ch_input_sample // input from CSV if applicable - ch_genome_bam // input from mapping - fasta // fasta reference file - fasta_fai // fai for fasta file - dict // - dbsnp - dbsnp_tbi - pon - pon_tbi - germline_resource - germline_resource_tbi - intervals - intervals_for_preprocessing - ch_interval_list_split - intervals_bed_gz_tbi - intervals_bed_combined - vcf_consensus_dna // to repeat rescue consensus - vcfs_status_dna // to repeat rescue consensus + input_sample // input from CSV if applicable + bam_mapped // channel: [mandatory] bam_mapped + cram_mapped // channel: [mandatory] cram_mapped + fasta // fasta reference file + fasta_fai // fai for fasta file + dict // dict for fasta file + dbsnp // channel: [optional] germline_resource + dbsnp_tbi // channel: [optional] germline_resource_tbi + pon // channel: [optional] pon for mutect2 + pon_tbi // channel: [optional] pon_tbi for mutect2 + known_sites_indels // channel: [optional] known_sites + known_sites_indels_tbi // channel: [optional] known_sites + germline_resource // channel: [optional] germline_resource + germline_resource_tbi // channel: [optional] germline_resource + intervals // channel: [mandatory] intervals/target regions + intervals_for_preprocessing // channel: [mandatory] intervals/wes + intervals_bed_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed + intervals_bed_combined // channel: [mandatory] intervals/target regions in one file unzipped + intervals_and_num_intervals // channel: [mandatory] [ intervals, num_intervals ] (or [ [], 0 ] if no intervals) + intervals_bed_gz_tbi_combined // channel: [mandatory] intervals/target regions in one file zipped + dna_consensus_maf // to repeat rescue consensus + dna_varcall_mafs // to repeat rescue consensus main: reports = Channel.empty() @@ -56,7 +58,7 @@ workflow BAM_VARIANT_CALLING_PRE_POST_PROCESSING { germline_resource, // channel: [optional] germline_resource germline_resource_tbi, // channel: [optional] germline_resource_tbi intervals, // channel: [mandatory] intervals/target regions - intervals_for_preprocessing, // channel: [mandatory] intervals_for_preprocessing/wes + intervals_for_preprocessing, // channel: [mandatory] intervals_for_preprocessing/wes intervals_and_num_intervals // channel: [mandatory] intervals_for_preprocessing/wes ) @@ -81,106 +83,64 @@ workflow BAM_VARIANT_CALLING_PRE_POST_PROCESSING { pon_tbi, input_sample ) - cram_variant_calling_pair = BAM_VARIANT_CALLING.out.cram_variant_calling_pair // use same crams for force calling later - vcf_to_normalize = BAM_VARIANT_CALLING.out.vcf_to_normalize - contamination = BAM_VARIANT_CALLING.out.contamination_table - segmentation = BAM_VARIANT_CALLING.out.segmentation_table - orientation = BAM_VARIANT_CALLING.out.artifact_priors - versions = versions.mix(BAM_VARIANT_CALLING.out.versions) - reports = reports.mix(BAM_VARIANT_CALLING.out.reports) - - ch_cram_variant_calling.dump(tag:"[STEP8 RNA_FILTERING] ch_cram_variant_calling") - intervals_bed_gz_tbi.dump(tag:"[STEP8 RNA_FILTERING] intervals_bed_gz_tbi") - pon.dump(tag:"[STEP8 RNA_FILTERING] pon") - // STEP 3: VARIANT CALLING -// VARIANT_CALLING( tools, -// ch_cram_variant_calling, -// fasta, -// fasta_fai, -// dbsnp, -// dbsnp_tbi, -// dict, -// germline_resource, -// germline_resource_tbi, -// intervals, -// intervals_bed_gz_tbi, -// intervals_bed_combined, -// pon, -// pon_tbi, -// ch_input_sample -// ) -// cram_vc_pair = VARIANT_CALLING.out.cram_vc_pair // use same crams for force calling later -// vcf_to_normalize = VARIANT_CALLING.out.vcf -// contamination = VARIANT_CALLING.out.contamination_table -// segmentation = VARIANT_CALLING.out.segmentation_table -// orientation = VARIANT_CALLING.out.artifact_priors -// ch_versions = ch_versions.mix(VARIANT_CALLING.out.versions) -// ch_reports = ch_reports.mix(VARIANT_CALLING.out.reports) -// -// -// // STEP 4: NORMALIZE -// NORMALIZE (tools, -// vcf_to_normalize, -// fasta, -// ch_input_sample) -// ch_versions = ch_versions.mix(NORMALIZE.out.versions) -// vcf_normalized = NORMALIZE.out.vcf -// -// -// // STEP 5: ANNOTATE -// ANNOTATE(tools, -// vcf_normalized, // second pass TODO: make it optional -// fasta, -// ch_input_sample // first pass -// ) -// -// ch_versions = ch_versions.mix(ANNOTATE.out.versions) -// ch_reports = ch_reports.mix(ANNOTATE.out.reports) -// -// // STEP 6: CONSENSUS -// CONSENSUS ( tools, -// ANNOTATE.out.maf_ann, -// cram_vc_pair, // from previous variant calling -// dict, -// fasta, -// fasta_fai, -// germline_resource, -// germline_resource_tbi, -// intervals, -// intervals_bed_gz_tbi, -// intervals_bed_combined, -// pon, -// pon_tbi, -// vcf_consensus_dna, // null when first pass -// vcfs_status_dna, // null when first pass -// ch_input_sample, -// contamination, -// segmentation, -// orientation -// ) -// // STEP 7: FILTERING -// if (tools.split(',').contains('filtering')) { -// FILTERING(CONSENSUS.out.maf, fasta) -// -// FILTERING.out.maf.branch{ -// dna: it[0].status < 2 -// rna: it[0].status == 2 -// }.set{filtered_maf} -// filtered_maf_rna = filtered_maf.rna -// filtered_maf_dna = filtered_maf.dna -// } else{ -// filtered_maf = Channel.empty() -// filtered_maf_rna = Channel.empty() -// filtered_maf_dna = Channel.empty() -// -// } -// + cram_variant_calling_pair = BAM_VARIANT_CALLING.out.cram_variant_calling_pair // use same crams for force calling later + vcf_to_normalise = BAM_VARIANT_CALLING.out.vcf_to_normalise + contamination = BAM_VARIANT_CALLING.out.contamination_table + segmentation = BAM_VARIANT_CALLING.out.segmentation_table + orientation = BAM_VARIANT_CALLING.out.artifact_priors + versions = versions.mix(BAM_VARIANT_CALLING.out.versions) + reports = reports.mix(BAM_VARIANT_CALLING.out.reports) + + + // NORMALISE + VCF_NORMALISE ( + vcf_to_normalise, + // Remap channel to match module/subworkflow + fasta.map{ it -> [ [ id:'fasta' ], it ] }, + input_sample + ) + versions = versions.mix(VCF_NORMALISE.out.versions) + vcf_to_annotate = VCF_NORMALISE.out.vcf + + // ANNOTATION + + VCF_ANNOTATE( + vcf_to_annotate.map{meta, vcf -> [ meta + [ file_name: vcf.baseName ], vcf ] }, + fasta, + input_sample + ) + + vcf_to_consensus = VCF_ANNOTATE.out.vcf_ann + versions = versions.mix(VCF_ANNOTATE.out.versions) + reports = reports.mix(VCF_ANNOTATE.out.reports) + + vcf_to_consensus.dump(tag:"vcf_to_consensus0") + // STEP 6: CONSENSUS + VCF_CONSENSUS ( + params.tools, + vcf_to_consensus, + fasta, + dna_consensus_maf, // null when first pass + dna_varcall_mafs, // null when first pass + input_sample + ) + + dna_consensus_maf = VCF_CONSENSUS.out.maf_consensus_dna + dna_varcall_mafs = VCF_CONSENSUS.out.mafs_dna + maf_to_filter = VCF_CONSENSUS.out.maf + versions = versions.mix(VCF_CONSENSUS.out.versions) + + maf_to_filter.dump(tag:"maf_to_filter0") + // STEP 7: FILTERING + MAF_FILTERING(maf_to_filter, fasta, input_sample) + filtered_maf = MAF_FILTERING.out.maf + versions = versions.mix(MAF_FILTERING.out.versions) + + emit: -// vcf_consensus_dna = CONSENSUS.out.vcf_consensus_dna -// vcfs_status_dna = CONSENSUS.out.vcfs_status_dna -// maf = filtered_maf -// maf_rna = filtered_maf_rna -// maf_dna = filtered_maf_dna - versions = versions // channel: [ versions.yml ] - reports = reports + dna_consensus_maf = dna_consensus_maf + dna_varcall_mafs = dna_varcall_mafs + maf = filtered_maf + versions = versions // channel: [ versions.yml ] + reports = reports } diff --git a/subworkflows/local/bam_variant_calling_somatic/main.nf b/subworkflows/local/bam_variant_calling_somatic/main.nf index dca4d9f..c11aa57 100644 --- a/subworkflows/local/bam_variant_calling_somatic/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic/main.nf @@ -1,8 +1,6 @@ // // PAIRED VARIANT CALLING // - -include { BAM_VARIANT_CALLING_FREEBAYES } from '../bam_variant_calling_freebayes/main' include { BAM_VARIANT_CALLING_SOMATIC_MANTA } from '../bam_variant_calling_somatic_manta/main' include { BAM_VARIANT_CALLING_SOMATIC_MUTECT2 } from '../bam_variant_calling_somatic_mutect2/main' include { BAM_VARIANT_CALLING_SOMATIC_STRELKA } from '../bam_variant_calling_somatic_strelka/main' @@ -28,35 +26,18 @@ workflow BAM_VARIANT_CALLING_SOMATIC { main: versions = Channel.empty() - //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config - vcf_freebayes = Channel.empty() - vcf_manta = Channel.empty() + //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config vcf_manta = Channel.empty() vcf_strelka = Channel.empty() vcf_mutect2 = Channel.empty() vcf_sage = Channel.empty() - // TODO: unify fasta/fasta_fai/dict structure - // FREEBAYES - if (tools.split(',').contains('freebayes')) { - BAM_VARIANT_CALLING_FREEBAYES( - cram, - dict, - fasta, - fasta_fai, - intervals - ) - - vcf_freebayes = BAM_VARIANT_CALLING_FREEBAYES.out.vcf - versions = versions.mix(BAM_VARIANT_CALLING_FREEBAYES.out.versions) - } - // SAGE if (tools.split(',').contains('sage')) { BAM_VARIANT_CALLING_SOMATIC_SAGE( cram, // Remap channel to match module/subworkflow - dict.map{ it -> [ [ id:'dict' ], it ] }, + dict, // Remap channel to match module/subworkflow fasta.map{ it -> [ [ id:'fasta' ], it ] }, // Remap channel to match module/subworkflow @@ -64,7 +45,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC { intervals ) - vcf_sage = BAM_VARIANT_CALLING_SOMATIC_SAGE.out.vcf + vcf_sage = BAM_VARIANT_CALLING_SOMATIC_SAGE.out.vcf versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_SAGE.out.versions) } @@ -107,9 +88,9 @@ workflow BAM_VARIANT_CALLING_SOMATIC { // Remap channel to match module/subworkflow cram.map { meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> [ meta, [ normal_cram, tumor_cram ], [ normal_crai, tumor_crai ] ] }, // Remap channel to match module/subworkflow - fasta, + fasta.map{ it -> [ [ id:'fasta' ], it ] }, // Remap channel to match module/subworkflow - fasta_fai, + fasta_fai.map{ it -> [ [ id:'fasta_fai' ], it ] }, dict, germline_resource, germline_resource_tbi, @@ -119,11 +100,11 @@ workflow BAM_VARIANT_CALLING_SOMATIC { joint_mutect2 ) - vcf_mutect2 = BAM_VARIANT_CALLING_SOMATIC_MUTECT2.out.vcf_filtered + vcf_mutect2 = BAM_VARIANT_CALLING_SOMATIC_MUTECT2.out.vcf_filtered contamination_table_mutect2 = BAM_VARIANT_CALLING_SOMATIC_MUTECT2.out.contamination_table - segmentation_table_mutect2 = BAM_VARIANT_CALLING_SOMATIC_MUTECT2.out.segmentation_table - artifact_priors_mutect2 = BAM_VARIANT_CALLING_SOMATIC_MUTECT2.out.artifact_priors - versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_MUTECT2.out.versions) + segmentation_table_mutect2 = BAM_VARIANT_CALLING_SOMATIC_MUTECT2.out.segmentation_table + artifact_priors_mutect2 = BAM_VARIANT_CALLING_SOMATIC_MUTECT2.out.artifact_priors + versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_MUTECT2.out.versions) } else { contamination_table_mutect2 = Channel.empty() @@ -134,8 +115,6 @@ workflow BAM_VARIANT_CALLING_SOMATIC { } vcf_all = Channel.empty().mix( - vcf_freebayes, - vcf_manta, vcf_mutect2, vcf_strelka, vcf_sage @@ -143,7 +122,6 @@ workflow BAM_VARIANT_CALLING_SOMATIC { emit: vcf_all - vcf_freebayes vcf_manta vcf_mutect2 vcf_strelka diff --git a/subworkflows/local/bam_variant_calling_somatic_sage/main.nf b/subworkflows/local/bam_variant_calling_somatic_sage/main.nf index bf82393..32474ed 100644 --- a/subworkflows/local/bam_variant_calling_somatic_sage/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_sage/main.nf @@ -46,7 +46,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_SAGE { // Mix intervals and no_intervals channels together vcf = MERGE_SAGE.out.vcf.mix(bcftools_vcf_out.no_intervals) // add variantcaller to meta map and remove no longer necessary field: num_intervals - .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'freebayes' ], vcf ] } + .map{ meta, vcf -> [ meta - meta.subMap('normal_id', 'tumor_id','num_intervals') + [ variantcaller:'sage' ], vcf ] } versions = versions.mix(BCFTOOLS_SORT.out.versions) versions = versions.mix(MERGE_SAGE.out.versions) diff --git a/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf b/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf index 8c9c70f..7d7bdc8 100644 --- a/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf @@ -49,20 +49,23 @@ workflow BAM_VARIANT_CALLING_SOMATIC_STRELKA { MERGE_STRELKA_SNVS(vcf_snvs_to_merge, dict) // Mix intervals and no_intervals channels together - vcf = Channel.empty().mix(MERGE_STRELKA_INDELS.out.vcf, MERGE_STRELKA_SNVS.out.vcf, vcf_indels.no_intervals, vcf_snvs.no_intervals) - // add variantcaller to meta map and remove no longer necessary field: num_intervals - .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'strelka' ], vcf ] } + vcf = Channel.empty().mix(MERGE_STRELKA_INDELS.out.vcf, MERGE_STRELKA_SNVS.out.vcf, vcf_indels.no_intervals, vcf_snvs.no_intervals).groupTuple() // Merge SNVs and indels MERGE_STRELKA(vcf, dict) + vcf_merged = MERGE_STRELKA.out.vcf + // add variantcaller to meta map and remove no longer necessary field: num_intervals + .map{ meta, vcf -> [ meta - meta.subMap('normal_id', 'tumor_id','num_intervals') + [ variantcaller:'strelka' ], vcf ] } + + versions = versions.mix(MERGE_STRELKA_SNVS.out.versions) versions = versions.mix(MERGE_STRELKA_INDELS.out.versions) versions = versions.mix(MERGE_STRELKA.out.versions) versions = versions.mix(STRELKA_SOMATIC.out.versions) emit: - vcf = MERGE_STRELKA.out.vcf + vcf = vcf_merged versions } \ No newline at end of file diff --git a/subworkflows/local/maf_filtering/main.nf b/subworkflows/local/maf_filtering/main.nf new file mode 100644 index 0000000..1c7de3e --- /dev/null +++ b/subworkflows/local/maf_filtering/main.nf @@ -0,0 +1,33 @@ +// +// Filtering somatic mutation analysis +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run +include { MAF_FILTERING as FILTERING } from '../../../modules/local/maf_filtering/main' + +workflow MAF_FILTERING { + + take: + maf_to_filter + fasta + input_sample + + main: + versions = Channel.empty() + maf = Channel.empty() + if (params.step in ['mapping', 'markduplicates', 'splitncigar', + 'prepare_recalibration', 'recalibrate', 'variant_calling', + 'normalise', 'consensus', 'filtering'] ) { + + if (params.step == 'filtering') maf_to_filter = input_sample + maf_to_filter.dump(tag:"maf_to_filter") + // BASIC FILTERING + FILTERING(maf_to_filter, fasta) + maf = FILTERING.out.maf + versions = versions.mix(FILTERING.out.versions) + } + + emit: + maf = maf + versions = versions // channel: [ versions.yml ] +} \ No newline at end of file diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 1e8f21b..0a6395d 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -135,7 +135,7 @@ workflow PREPARE_GENOME { // HISAT2 not necessary if second pass skipped - if (!params.skip_tools.split(',').contains("second_pass")){ + if ((params.skip_tools && !params.skip_tools.split(',').contains("second_pass"))){ if (params.splicesites) { ch_splicesites = Channel.fromPath(params.splicesites).collect() } else{ diff --git a/subworkflows/local/vcf_annotate/main.nf b/subworkflows/local/vcf_annotate/main.nf new file mode 100644 index 0000000..28f130a --- /dev/null +++ b/subworkflows/local/vcf_annotate/main.nf @@ -0,0 +1,65 @@ +// +// ANNOTATION +// + +include { VCF_ANNOTATE_ENSEMBLVEP } from '../../nf-core/vcf_annotate_ensemblvep/main' +include { CHANNEL_VARIANT_CALLING_CREATE_CSV as CHANNEL_ANNOTATE_CREATE_CSV } from '../channel_variant_calling_create_csv/main' + +workflow VCF_ANNOTATE { + take: + vcf // channel: [ val(meta), vcf ] + fasta + input_sample + + main: + reports = Channel.empty() + vcf_ann = Channel.empty() + tab_ann = Channel.empty() + json_ann = Channel.empty() + versions = Channel.empty() + + if (params.step == 'annotate') vcf_to_annotate = input_sample + + if (params.tools && params.tools.split(',').contains('vep')) { + + if (params.tools.split(',').contains('vep')) { + fasta = (params.vep_include_fasta) ? fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] } : [[id: 'null'], []] + vep_cache_version = params.vep_cache_version ?: Channel.empty() + vep_genome = params.vep_genome ?: Channel.empty() + vep_species = params.vep_species ?: Channel.empty() + vep_cache = params.vep_cache ? params.use_annotation_cache_keys ? Channel.fromPath("${params.vep_cache}/${params.vep_cache_version}_${params.vep_genome}").collect() : Channel.fromPath(params.vep_cache).collect() : [] + + vep_extra_files = [] + if (params.dbnsfp && params.dbnsfp_tbi) { + vep_extra_files.add(file(params.dbnsfp, checkIfExists: true)) + vep_extra_files.add(file(params.dbnsfp_tbi, checkIfExists: true)) + } + + if (params.spliceai_snv && params.spliceai_snv_tbi && params.spliceai_indel && params.spliceai_indel_tbi) { + vep_extra_files.add(file(params.spliceai_indel, checkIfExists: true)) + vep_extra_files.add(file(params.spliceai_indel_tbi, checkIfExists: true)) + vep_extra_files.add(file(params.spliceai_snv, checkIfExists: true)) + vep_extra_files.add(file(params.spliceai_snv_tbi, checkIfExists: true)) + } + + vcf_for_vep = vcf.map{ meta, vcf -> [ meta, vcf, [] ] } + vcf_for_vep.dump(tag:"vcf_for_vep") + VCF_ANNOTATE_ENSEMBLVEP(vcf_for_vep, fasta, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) + + reports = reports.mix(VCF_ANNOTATE_ENSEMBLVEP.out.reports) + vcf_ann = vcf_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.vcf_tbi) + tab_ann = tab_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.tab) + json_ann = json_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.json) + versions = versions.mix(VCF_ANNOTATE_ENSEMBLVEP.out.versions) + CHANNEL_ANNOTATE_CREATE_CSV(vcf_ann.map{meta, vcf, tbi -> [meta, vcf]}, "annotated") + + } + } + + emit: + vcf_ann // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] + tab_ann + json_ann + reports // path: *.html + versions // path: versions.yml +} \ No newline at end of file diff --git a/subworkflows/local/vcf_consensus/main.nf b/subworkflows/local/vcf_consensus/main.nf new file mode 100644 index 0000000..dbb073a --- /dev/null +++ b/subworkflows/local/vcf_consensus/main.nf @@ -0,0 +1,153 @@ +// +// Consensus +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run +include { VCF2MAF } from '../../../modules/local/vcf2maf/vcf2maf/main' +include { RUN_CONSENSUS } from '../../../modules/local/consensus/main' +include { RUN_CONSENSUS as RUN_CONSENSUS_RESCUE } from '../../../modules/local/consensus/main' + +workflow VCF_CONSENSUS { + take: + tools + vcf_to_consensus + fasta + previous_maf_consensus_dna // results already done to avoid a second run when rna filterig + previous_mafs_status_dna // results already done to avoid a second run when rna filterig + input_sample + + main: + versions = Channel.empty() + + maf_consensus_dna = Channel.empty() + maf_consensus_rna = Channel.empty() + mafs_status_dna = Channel.empty() + + if (params.step in ['mapping', 'markduplicates', 'splitncigar', + 'prepare_recalibration', 'recalibrate', 'variant_calling', + 'normalise', 'consensus'] ) { + + if (params.step == 'consensus') vcf_to_consensus = input_sample + // First we transform the maf to MAF + VCF2MAF(vcf_to_consensus.map{meta, vcf, tbi -> [meta, vcf]}, + fasta) + maf_to_consensus = VCF2MAF.out.maf + versions = versions.mix(VCF2MAF.out.versions) + + maf_to_consensus.dump(tag:"maf_to_consensus") + // count number of callers to generate groupKey + maf_to_consensus = maf_to_consensus.map{ meta, maf -> + def toolsllist = tools.split(',') + def ncallers = toolsllist.count('sage') + + toolsllist.count('strelka') + + toolsllist.count('mutect2') + + toolsllist.count("consensus") + [groupKey([ + id : meta.id, + patient : meta.patient, + status : meta.status, + ncallers : ncallers + ], ncallers), + maf, meta.variantcaller + ]} // groupKey should avoid the groupTuple wait but it does not seem to work atm + .groupTuple() // makes the whole pipeline wait for all processes to finish + maf_to_consensus.dump(tag:"maf_to_consensus1") + // Run consensus on VCF with same id + RUN_CONSENSUS ( maf_to_consensus ) + + consensus_maf = RUN_CONSENSUS.out.maf // 1 consensus_maf from all callers + consensus_maf.dump(tag:'consensus_maf0') + // Separate DNA from RNA + // VCFs from variant calling + mafs_from_varcal = maf_to_consensus.branch{ + dna: it[0].status <= 1 + rna: it[0].status == 2 + } + // VCF from consensus + maf_from_consensus = consensus_maf.branch{ + dna: it[0].status <= 1 + rna: it[0].status == 2 + } + + maf_from_consensus_rna = maf_from_consensus.rna.map{meta, maf -> [meta, maf, ['ConsensusRNA']]} + mafs_from_varcal_rna = mafs_from_varcal.rna + + // Only RNA mafs are processed again if second run + if (previous_maf_consensus_dna && (!(params.skip_tools && params.skip_tools.split(',').contains('second_run')))){ + maf_from_consensus_dna = previous_maf_consensus_dna // VCF with consensus calling + mafs_from_varcal_dna = previous_mafs_status_dna // VCFs with consensus calling + } else { + maf_from_consensus_dna = maf_from_consensus.dna.map{meta, maf -> [meta, maf, ['ConsensusDNA']]} + mafs_from_varcal_dna = mafs_from_varcal.dna + } + + // RESCUE STEP: cross dna / rna for a crossed second consensus + if (!(params.skip_tools && params.skip_tools.split(',').contains('rescue'))) { + // VCF from consensus + maf_consensus_status_dna_to_cross = maf_from_consensus_dna.map{ + meta, maf, caller -> + [meta.patient, meta, [maf], caller] + } + + maf_consensus_status_rna_to_cross = maf_from_consensus_rna.map{ + meta, maf, caller -> + [meta.patient, meta, [maf], caller] + } + // VCFs from variant calling + mafs_status_dna_to_cross = mafs_from_varcal_dna.map{ + meta, mafs, callers -> + [meta.patient, meta, mafs, callers] + } + + mafs_status_rna_to_cross = mafs_from_varcal_rna.map{ + meta, mafs, callers -> + [meta.patient, meta, mafs, callers] + } + + // cross results keeping metadata + mafs_dna_crossed_with_rna_rescue = mafs_status_dna_to_cross + .cross(maf_consensus_status_rna_to_cross) + .map { dna, rna -> + def meta = [:] + meta.patient = dna[0] + meta.dna_id = dna[1].id + meta.rna_id = rna[1].id + meta.status = dna[1].status + meta.id = "${meta.dna_id}_with_${meta.rna_id}".toString() + [meta, dna[2] + rna[2], dna[3] + rna[3]] + } + mafs_rna_crossed_with_dna_rescue = mafs_status_rna_to_cross + .cross(maf_consensus_status_dna_to_cross) + .map { rna, dna -> + def meta = [:] + meta.patient = rna[0] + meta.rna_id = rna[1].id + meta.dna_id = dna[1].id + meta.status = rna[1].status + meta.id = "${meta.rna_id}_with_${meta.dna_id}".toString() + [meta, rna[2] + dna[2], rna[3] + dna[3]] + } + + mafs_dna_crossed_with_rna_rescue.dump(tag:"mafs_dna_crossed_with_rna_rescue") + mafs_rna_crossed_with_dna_rescue.dump(tag:"mafs_rna_crossed_with_dna_rescue") + RUN_CONSENSUS_RESCUE ( mafs_dna_crossed_with_rna_rescue.mix(mafs_rna_crossed_with_dna_rescue) ) + + maf_from_rescue = RUN_CONSENSUS_RESCUE.out.maf.branch{ + dna: it[0].status <= 1 + rna: it[0].status == 2 + } + + maf_from_consensus_dna = maf_from_rescue.dna + maf_from_consensus_rna = maf_from_rescue.rna + consensus_maf = maf_from_consensus_dna.mix(maf_from_consensus_rna) + consensus_maf.dump(tag:'consensus_maf1') + + } + } + + emit: + maf_consensus_dna = maf_from_consensus_dna + mafs_dna = mafs_from_varcal_dna + maf = consensus_maf // channel: [ [meta], maf ] + versions = versions // channel: [ versions.yml ] +} \ No newline at end of file diff --git a/subworkflows/local/vcf_normalise/main.nf b/subworkflows/local/vcf_normalise/main.nf index 0d200f7..c07bb01 100644 --- a/subworkflows/local/vcf_normalise/main.nf +++ b/subworkflows/local/vcf_normalise/main.nf @@ -17,10 +17,10 @@ workflow VCF_NORMALISE { input_sample main: - version = Channel.empty() + version = Channel.empty() vcf_to_consensus = Channel.empty() - if (params.step in ['mapping', 'markduplicates', 'splitncigar', 'prepare_recalibration', 'recalibrate', 'variant_calling', 'normalise'] ) { + if (params.step in ['mapping', 'markduplicates', 'splitncigar', 'prepare_recalibration', 'recalibrate', 'variant_calling', 'normalise'] && (!(params.skip_tools && params.skip_tools.split(",").contains("normalise")))) { if (params.step == 'normalise') vcf_to_normalise = input_sample @@ -38,8 +38,7 @@ workflow VCF_NORMALISE { vcf_to_consensus = vcf_to_consensus.mix(VT_NORMALISE.out.vcf) version = version.mix(VT_NORMALISE.out.versions.first()) - CHANNEL_VARIANT_CALLING_CREATE_CSV(vcf_to_normalise, "variantcallednormalised") - + CHANNEL_VARIANT_CALLING_CREATE_CSV(vcf_to_consensus, "normalised") } diff --git a/workflows/rnadnavar.nf b/workflows/rnadnavar.nf index 8a08bac..58f5679 100644 --- a/workflows/rnadnavar.nf +++ b/workflows/rnadnavar.nf @@ -216,6 +216,14 @@ if(params.tools && params.tools.split(',').contains('mutect2')){ } } +if(params.tools && params.tools.split(',').contains('sage')){ + if(!params.sage_ensembl_dir){ + log.error "SAGE requires ensembl resource file. Please provide `--sage_ensembl_dir`\nYou can skip this step in the workflow by removing sage from `--tools` to the command." + exit 1 + } +} + + // Fails when missing resources for baserecalibrator if(!params.dbsnp && !params.known_indels){ if (params.step in ['mapping', 'markduplicates', 'prepare_recalibration', 'recalibrate'] && (!params.skip_tools || (params.skip_tools && !params.skip_tools.split(',').contains('baserecalibrator')))){ From b7a51838becedb66645fba7e49d200bab91ecfa5 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Tue, 5 Sep 2023 22:44:34 +0100 Subject: [PATCH 33/56] Done two fixes in prepare references subworkflows. --- subworkflows/local/prepare_genome/main.nf | 3 +-- subworkflows/local/prepare_reference_and_intervals/main.nf | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 0a6395d..54fcad9 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -105,7 +105,7 @@ workflow PREPARE_GENOME { ch_gene_bed = GUNZIP_GENE_BED.out.gunzip.map{ meta, bed -> [bed] }.collect() versions = versions.mix(GUNZIP_GENE_BED.out.versions) } else { - ch_gene_bed = Channel.fromPath(params.exon_bed).collect() + ch_exon_bed = Channel.fromPath(params.exon_bed).collect() } } else { ch_exon_bed = GTF2BED ( ch_gtf ).bed.collect() @@ -168,7 +168,6 @@ workflow PREPARE_GENOME { versions = versions.mix(TABIX_KNOWN_INDELS.out.versions) versions = versions.mix(TABIX_PON.out.versions) - emit: bwa = BWAMEM1_INDEX.out.index.map{ meta, index -> [index] }.collect() // path: bwa/* bwamem2 = BWAMEM2_INDEX.out.index.map{ meta, index -> [index] }.collect() // path: bwamem2/* diff --git a/subworkflows/local/prepare_reference_and_intervals/main.nf b/subworkflows/local/prepare_reference_and_intervals/main.nf index 90aea5a..d97e47e 100644 --- a/subworkflows/local/prepare_reference_and_intervals/main.nf +++ b/subworkflows/local/prepare_reference_and_intervals/main.nf @@ -84,7 +84,7 @@ workflow PREPARE_REFERENCE_AND_INTERVALS { ch_interval_list = Channel.empty() GATK4_BEDTOINTERVALLIST( ch_genome_bed, - dict + dict.map{ it -> [ [id:'dict'], it ] } ) ch_interval_list = GATK4_BEDTOINTERVALLIST.out.interval_list versions = versions.mix(GATK4_BEDTOINTERVALLIST.out.versions) From 16aea303ff99dfed5776eda1a5bd53451c1c6401 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Wed, 6 Sep 2023 11:06:47 +0100 Subject: [PATCH 34/56] Initiating vcf_manta channel --- subworkflows/local/bam_variant_calling_somatic/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/subworkflows/local/bam_variant_calling_somatic/main.nf b/subworkflows/local/bam_variant_calling_somatic/main.nf index c11aa57..bee5b93 100644 --- a/subworkflows/local/bam_variant_calling_somatic/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic/main.nf @@ -27,6 +27,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC { versions = Channel.empty() //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config vcf_manta = Channel.empty() + vcf_manta = Channel.empty() vcf_strelka = Channel.empty() vcf_mutect2 = Channel.empty() vcf_sage = Channel.empty() From 59847f75e281aeb139a108f0c2f0129faf7d7de5 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Wed, 6 Sep 2023 11:07:46 +0100 Subject: [PATCH 35/56] Removing unnecessary bits --- conf/modules/variant_calling/manta.config | 3 ++- subworkflows/local/prepare_intervals/main.nf | 2 +- .../local/prepare_reference_and_intervals/main.nf | 11 ----------- 3 files changed, 3 insertions(+), 13 deletions(-) diff --git a/conf/modules/variant_calling/manta.config b/conf/modules/variant_calling/manta.config index 5e96235..c95bc58 100644 --- a/conf/modules/variant_calling/manta.config +++ b/conf/modules/variant_calling/manta.config @@ -21,7 +21,8 @@ process { // manta publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/manta/${meta.id}" }, - pattern: "*{diploid_sv,tumor_sv,somatic_sv}.{vcf.gz,vcf.gz.tbi}" + pattern: "*{diploid_sv,tumor_sv,somatic_sv}.{vcf.gz,vcf.gz.tbi}", + enabled: false // only used to run strelka ] } } diff --git a/subworkflows/local/prepare_intervals/main.nf b/subworkflows/local/prepare_intervals/main.nf index b3e5bf5..a7205f9 100644 --- a/subworkflows/local/prepare_intervals/main.nf +++ b/subworkflows/local/prepare_intervals/main.nf @@ -33,7 +33,7 @@ workflow PREPARE_INTERVALS { intervals_bed = Channel.fromPath(file("${params.outdir}/no_intervals.bed")).map{ it -> [ it, 0 ] } intervals_bed_gz_tbi = Channel.fromPath(file("${params.outdir}/no_intervals.bed.{gz,gz.tbi}")).collect().map{ it -> [ it, 0 ] } intervals_combined = Channel.fromPath(file("${params.outdir}/no_intervals.bed")).map{ it -> [ [ id:it.simpleName ], it ] } - } else if (params.step != 'annotate' && params.step != 'controlfreec') { + } else if (params.step != 'annotate') { // If no interval/target file is provided, then generated intervals from FASTA file if (!intervals) { BUILD_INTERVALS(fasta_fai.map{it -> [ [ id:it.baseName ], it ] }) diff --git a/subworkflows/local/prepare_reference_and_intervals/main.nf b/subworkflows/local/prepare_reference_and_intervals/main.nf index d97e47e..0b67e10 100644 --- a/subworkflows/local/prepare_reference_and_intervals/main.nf +++ b/subworkflows/local/prepare_reference_and_intervals/main.nf @@ -78,16 +78,6 @@ workflow PREPARE_REFERENCE_AND_INTERVALS { else [ intervals[0], intervals[1], num_intervals ] } - // STEP 0.C: Prepare the interval list from the GTF file using GATK4 BedToIntervalList - ch_genome_bed = Channel.from([id:'genome.bed']).combine(PREPARE_GENOME.out.exon_bed) - versions = versions.mix(PREPARE_GENOME.out.versions) - ch_interval_list = Channel.empty() - GATK4_BEDTOINTERVALLIST( - ch_genome_bed, - dict.map{ it -> [ [id:'dict'], it ] } - ) - ch_interval_list = GATK4_BEDTOINTERVALLIST.out.interval_list - versions = versions.mix(GATK4_BEDTOINTERVALLIST.out.versions) emit: fasta = fasta @@ -100,7 +90,6 @@ workflow PREPARE_REFERENCE_AND_INTERVALS { dragmap = dragmap star_index = PREPARE_GENOME.out.star_index gtf = PREPARE_GENOME.out.gtf - ch_interval_list = ch_interval_list intervals = intervals intervals_bed_gz_tbi = intervals_bed_gz_tbi intervals_for_preprocessing = intervals_for_preprocessing From df59be1d37125c7798fd0cf38d18f76132ff9e84 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Wed, 6 Sep 2023 11:08:43 +0100 Subject: [PATCH 36/56] Fixing minor bugs --- conf/modules/filtering/maf_filtering.config | 79 ++++++--------------- subworkflows/local/maf_filtering/main.nf | 2 +- subworkflows/local/vcf_consensus/main.nf | 8 +-- 3 files changed, 28 insertions(+), 61 deletions(-) diff --git a/conf/modules/filtering/maf_filtering.config b/conf/modules/filtering/maf_filtering.config index 97a2dde..c4157c7 100644 --- a/conf/modules/filtering/maf_filtering.config +++ b/conf/modules/filtering/maf_filtering.config @@ -14,68 +14,35 @@ // MAF FILTERING process { // maf filtering - if (params.tools && params.tools.split(',').contains('filtering')) { - withName: "VCF2MAF" { - ext.args = { [ - "--inhibit-vep", - "--normal-id ${meta.normal_id}", - "--tumor-id ${meta.tumor_id}", - "--vcf-tumor-id ${meta.tumor_id}", - "--vcf-normal-id ${meta.normal_id}", - "--max-subpop-af 0.0001", - "--retain-ann gnomADg_AF,MAX_AF,MAX_AF_POPS", - "--retain-fmt AD,DP,AF,GT", - params.vep_genome ? "--ncbi-build ${params.vep_genome}" : '', - meta.variantcaller == "strelka"? "--vcf-tumor-id TUMOR --vcf-normal-id NORMAL" : '' - ].join(' ').trim() } - ext.prefix = { "${meta.id}.${meta.variantcaller}" } - publishDir = [ + + withName: "MAF_FILTERING" { + ext.prefix = { "${meta.id}.filtered"} + ext.args = { [params.whitelist? "--whitelist ${params.whitelist}": "", + params.blacklist? "--blacklist ${params.blacklist}": ""].join(' ').trim() } + publishDir = [ mode: params.publish_dir_mode, - path: { "${params.outdir}/variants/annotated/unfiltered/${meta.patient}/${meta.id}/" }, + path: { "${params.outdir}/variants/annotated/filtered/${meta.patient}/${meta.id}/" }, pattern: "*{maf,maf.gz}" ] - } - - withName: "FILTERING" { - ext.prefix = { "${meta.id}.filtered"} - ext.args = { [params.whitelist? "--whitelist ${params.whitelist}": "", - params.blacklist? "--blacklist ${params.blacklist}": ""].join(' ').trim() } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variants/annotated/filtered/${meta.patient}/${meta.id}/" }, - pattern: "*{maf,maf.gz}" - ] - } - - withName: 'SAMTOOLS_MERGE_SECOND_PASS' { - ext.prefix = {"${meta.id}.merged_2ndpass"} - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/second_pass/input/${meta.patient}/${meta.id}/" }, - pattern: "*{bam}", - enabled: params.save_align_intermeds - ] - } + } + withName: 'RNA_FILTERING' { + ext.prefix = {"${meta.id}.rna_filt"} + ext.args = { [params.rnaedits? "--rnaedits ${params.rnaedits}": "", + params.rna_pon? "--pon ${params.rna_pon}" : "", + params.chain? "--chain ${params.chain}" : "", + params.fasta19? "--ref19 ${params.fasta19}" : "", + params.rna_pon19? "--pon19 ${params.rna_pon19}" : "" + ].join(' ').trim() } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variants/annotated/filtered/${meta.patient}/${meta.id}/" }, + pattern: "*{maf}", + enabled: true + ] + } - withName: 'RNA_FILTERING' { - ext.prefix = {"${meta.id}.rna_filt"} - ext.args = { [params.rnaedits? "--rnaedits ${params.rnaedits}": "", - params.rna_pon? "--pon ${params.rna_pon}" : "", - params.chain? "--chain ${params.chain}" : "", - params.fasta19? "--ref19 ${params.fasta19}" : "", - params.rna_pon19? "--pon19 ${params.rna_pon19}" : "" - ].join(' ').trim() } - - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variants/annotated/filtered/${meta.patient}/${meta.id}/" }, - pattern: "*{maf}", - enabled: true - ] - } - } } \ No newline at end of file diff --git a/subworkflows/local/maf_filtering/main.nf b/subworkflows/local/maf_filtering/main.nf index 1c7de3e..9c38931 100644 --- a/subworkflows/local/maf_filtering/main.nf +++ b/subworkflows/local/maf_filtering/main.nf @@ -17,7 +17,7 @@ workflow MAF_FILTERING { maf = Channel.empty() if (params.step in ['mapping', 'markduplicates', 'splitncigar', 'prepare_recalibration', 'recalibrate', 'variant_calling', - 'normalise', 'consensus', 'filtering'] ) { + 'normalise', 'consensus', 'filtering'] && (!(params.skip_tools && params.skip_tools.split(",").contains("filtering")))) { if (params.step == 'filtering') maf_to_filter = input_sample maf_to_filter.dump(tag:"maf_to_filter") diff --git a/subworkflows/local/vcf_consensus/main.nf b/subworkflows/local/vcf_consensus/main.nf index dbb073a..5167d32 100644 --- a/subworkflows/local/vcf_consensus/main.nf +++ b/subworkflows/local/vcf_consensus/main.nf @@ -19,13 +19,13 @@ workflow VCF_CONSENSUS { main: versions = Channel.empty() - maf_consensus_dna = Channel.empty() - maf_consensus_rna = Channel.empty() - mafs_status_dna = Channel.empty() + maf_from_consensus_dna = Channel.empty() + mafs_from_varcal_dna = Channel.empty() + consensus_maf = Channel.empty() if (params.step in ['mapping', 'markduplicates', 'splitncigar', 'prepare_recalibration', 'recalibrate', 'variant_calling', - 'normalise', 'consensus'] ) { + 'normalise', 'consensus'] && (!(params.skip_tools && params.skip_tools.split(",").contains("consensus")))) { if (params.step == 'consensus') vcf_to_consensus = input_sample // First we transform the maf to MAF From dd3d0bab076b413f86858168c06a3e0d93fd344f Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Wed, 6 Sep 2023 11:09:17 +0100 Subject: [PATCH 37/56] Adjusted filter_mutations.py to make blacklist and whitelist optional --- bin/filter_mutations.py | 66 ++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/bin/filter_mutations.py b/bin/filter_mutations.py index 3231475..cd17510 100644 --- a/bin/filter_mutations.py +++ b/bin/filter_mutations.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 """ -Date: 13 Feb 2023 Author: @RaqManzano Script: Filter variants from a MAF file producing another MAF file with the new filters added. """ @@ -89,7 +88,8 @@ def remove_ig_and_pseudo(maf): """ # fill na values maf[['BIOTYPE', 'SYMBOL']] = maf[['BIOTYPE', 'SYMBOL']].fillna(value="") - maf["ig_pseudo"] = maf["BIOTYPE"].str.contains('IG_C_gene|IG_D_gene|IG_J_gene|IG_V_gene|TR_C_gene|TR_J_gene|TR_V_gene|pseudogene') + maf["ig_pseudo"] = maf["BIOTYPE"].str.contains( + 'IG_C_gene|IG_D_gene|IG_J_gene|IG_V_gene|TR_C_gene|TR_J_gene|TR_V_gene|pseudogene') return maf @@ -132,7 +132,6 @@ def add_context(chrom, pos, ref, genome, flank=10): return context - def remove_homopolymers(maf, ref): """ Check for variants in homopolymer regions (a sequence of 6 consecutive identical bases) @@ -177,16 +176,17 @@ def filtering(maf, gnomad_thr, whitelist, blacklist, filters): """ if "PASS" not in filters: filters += ["PASS"] # a PASS is always allowed - maf["whitelist"] = maf['DNAchange'].isin(whitelist) # whitelist - maf = remove_muts_in_range(df=maf, blacklist=blacklist) # blacklist + if whitelist: + maf["whitelist"] = maf['DNAchange'].isin(whitelist) # whitelist + if blacklist: + maf = remove_muts_in_range(df=maf, blacklist=blacklist) # blacklist maf["ingnomAD"] = maf["MAX_AF"] >= gnomad_thr # gnomad return maf - def add_ravex_filters(maf, filters, noncoding=False, homopolymer=False, ig_pseudo=False, min_alt_reads=2, - consensus=True): + blacklist=False,whitelist=False): maf["RaVeX_FILTER"] = "PASS" maf["Existing_variation"] = maf["Existing_variation"].fillna("") maf["SOMATIC"] = maf["SOMATIC"].fillna("") @@ -196,8 +196,9 @@ def add_ravex_filters(maf, filters, noncoding=False, homopolymer=False, ig_pseud ravex_filter += ["min_alt_reads"] if row["ingnomAD"]: ravex_filter += ["gnomad"] - if row["blacklist"]: - ravex_filter += ["blacklist"] + if blacklist: + if row["blacklist"]: + ravex_filter += ["blacklist"] if not noncoding: if row["noncoding"]: ravex_filter += ["noncoding"] @@ -214,46 +215,43 @@ def add_ravex_filters(maf, filters, noncoding=False, homopolymer=False, ig_pseud else: if not row["FILTER_consensus"] in filters: ravex_filter += ["vc_filter"] - if not ravex_filter or row["whitelist"]: - ravex_filter = ["PASS"] + if whitelist: + if not ravex_filter or row["whitelist"]: + ravex_filter = ["PASS"] + else: + if not ravex_filter: + ravex_filter = ["PASS"] ravex_filter = ";".join(ravex_filter) maf.at[idx, "RaVeX_FILTER"] = ravex_filter return maf def dedup_maf(dnachange, maf): - """If more than one caller, then we need to dedup the entries. We select according to the caller we trust more: - mutect2, strelka, sage, others for SNVs, strelka, mutect2, sage for indels + """If more than one caller, then we need to dedup the entries. We select according to this caller list: + mutect2, sage, strelka TODO: this is too slow, need to think on a better implementation """ maf_variant = maf[maf["DNAchange"] == dnachange] - if maf_variant.iloc[0]["Variant_Type"] == "SNP": - maf_variant_dedup = maf_variant[maf_variant["Caller"].str.contains("mutect")] + maf_variant_dedup = maf_variant[maf_variant["Caller"].str.contains("mutect")] + if maf_variant_dedup.empty: + maf_variant_dedup = maf_variant[maf_variant["Caller"].str.contains("sage")] if maf_variant_dedup.empty: maf_variant_dedup = maf_variant[maf_variant["Caller"].str.contains("strelka")] if maf_variant_dedup.empty: - maf_variant_dedup = maf_variant[maf_variant["Caller"].str.contains("sage")] - if maf_variant_dedup.empty: - maf_variant_dedup = maf_variant.drop_duplicates(subset='DNAchange', keep="first") - else: - maf_variant_dedup = maf_variant[maf_variant["Caller"].str.contains("strelka")] - if maf_variant_dedup.empty: - maf_variant_dedup = maf_variant[maf_variant["Caller"].str.contains("mutect")] - if maf_variant_dedup.empty: - maf_variant_dedup = maf_variant[maf_variant["Caller"].str.contains("sage")] - if maf_variant_dedup.empty: - maf_variant_dedup = maf_variant.drop_duplicates(subset='DNAchange', keep="first") + maf_variant_dedup = maf_variant.drop_duplicates(subset='DNAchange', keep="first") return maf_variant_dedup -def write_maf(maf_df, mafin_file, mafout_file, vc_priority=["mutect2", "strelka", 'sage', 'consensus']): +def write_maf(maf_df, mafin_file, mafout_file, vc_priority=["mutect2", 'sage', "strelka"]): """Write output""" header_lines = subprocess.getoutput(f"zgrep -Eh '#|Hugo_Symbol' {mafin_file} 2>/dev/null") print("Removing duplicated variants from maf (only one entry from a caller will be kept)") maf_dedup = [] + all_callers = maf_df["Caller"].unique() + vc_priority = vc_priority + [x for x in all_callers if x not in vc_priority] for caller in vc_priority: - maf_dedup += [maf_df[maf_df["Caller"]==caller]] + maf_dedup += [maf_df[maf_df["Caller"] == caller]] maf_dedup = pd.concat(maf_dedup).drop_duplicates(subset='DNAchange', keep="first") with open(mafout_file, "w") as mafout: mafout.write(header_lines) @@ -268,9 +266,15 @@ def main(): 'IGR', 'INTRON', 'RNA'] args = argparser() maf = read_maf(args.input) + whitelist = False + blacklist = False + if args.whitelist: + whitelist = read_whitelist_bed(args.whitelist) + if args.blacklist: + blacklist = read_blacklist_bed(args.blacklist) maf = filtering(maf=maf, gnomad_thr=args.gnomad_thr, - whitelist=read_whitelist_bed(args.whitelist), - blacklist=read_blacklist_bed(args.blacklist), + whitelist=whitelist, + blacklist=blacklist, filters=args.filters) # tag noncoding maf = noncoding(maf=maf, noncoding=noncoding_list) @@ -279,7 +283,7 @@ def main(): # tag homopolymers maf = remove_homopolymers(maf=maf, ref=args.ref) # tag consensus - maf = add_ravex_filters(maf=maf, filters=args.filters) + maf = add_ravex_filters(maf=maf, filters=args.filters, blacklist=blacklist, whitelist=whitelist) if not args.output: args.output = args.input.replace(".maf", "filtered.maf") write_maf(maf_df=maf, mafin_file=args.input, mafout_file=args.output) From 5109ce3c4492d5962830216c103d8cb144021e31 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Fri, 8 Sep 2023 23:36:03 +0100 Subject: [PATCH 38/56] Removed old subworkflows --- modules/local/maf2bed.nf | 34 ----------- subworkflows/nf-core/bam_sort_samtools.nf | 55 ----------------- subworkflows/nf-core/bam_stats_samtools.nf | 39 ------------- subworkflows/nf-core/bam_to_cram.nf | 52 ----------------- subworkflows/nf-core/cram_qc.nf | 39 ------------- subworkflows/nf-core/ensemblvep_annotate.nf | 38 ------------ subworkflows/nf-core/merge_index_cram.nf | 47 --------------- subworkflows/nf-core/recalibrate.nf | 65 --------------------- subworkflows/nf-core/run_fastqc.nf | 19 ------ subworkflows/nf-core/vcf_qc.nf | 30 ---------- 10 files changed, 418 deletions(-) delete mode 100644 modules/local/maf2bed.nf delete mode 100644 subworkflows/nf-core/bam_sort_samtools.nf delete mode 100644 subworkflows/nf-core/bam_stats_samtools.nf delete mode 100644 subworkflows/nf-core/bam_to_cram.nf delete mode 100644 subworkflows/nf-core/cram_qc.nf delete mode 100644 subworkflows/nf-core/ensemblvep_annotate.nf delete mode 100644 subworkflows/nf-core/merge_index_cram.nf delete mode 100644 subworkflows/nf-core/recalibrate.nf delete mode 100644 subworkflows/nf-core/run_fastqc.nf delete mode 100644 subworkflows/nf-core/vcf_qc.nf diff --git a/modules/local/maf2bed.nf b/modules/local/maf2bed.nf deleted file mode 100644 index c66ab96..0000000 --- a/modules/local/maf2bed.nf +++ /dev/null @@ -1,34 +0,0 @@ -process MAF2BED { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "anaconda::pandas=1.4.3" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pandas:1.4.3' : - 'quay.io/biocontainers/pandas:1.4.3' }" - - input: - tuple val(meta), path(maf) - - output: - tuple val(meta), path('*.bed') , emit: bed - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: // This script is bundled with the pipeline, in nf-core/rnadnavar/bin/ - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - - """ - maf2bed.py --mafin $maf --bedout ${prefix}.bed $args - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(echo \$(python --version 2>&1) | sed 's/^.*Python (//;s/).*//') - END_VERSIONS - """ - - - -} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_sort_samtools.nf b/subworkflows/nf-core/bam_sort_samtools.nf deleted file mode 100644 index c071268..0000000 --- a/subworkflows/nf-core/bam_sort_samtools.nf +++ /dev/null @@ -1,55 +0,0 @@ -// -// Sort, index BAM file and run samtools stats, flagstat and idxstats -// - -include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' -include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools/main' - -workflow BAM_SORT_SAMTOOLS { - take: - ch_bam // channel: [ val(meta), [ bam ] ] - fasta - - main: - - ch_versions = Channel.empty() - - // ch_bam.view() - SAMTOOLS_SORT ( - ch_bam - ) - ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions) - - SAMTOOLS_INDEX ( - SAMTOOLS_SORT.out.bam - ) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) - SAMTOOLS_SORT.out.bam - .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) - .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) - .map { - meta, bam, bai, csi -> - if (bai) { - [ meta, bam, bai ] - } else { - [ meta, bam, csi ] - } - } - .set { ch_bam_bai } - BAM_STATS_SAMTOOLS ( - ch_bam_bai, - fasta - ) - ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) - - emit: - bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] - csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] - - stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/bam_stats_samtools.nf b/subworkflows/nf-core/bam_stats_samtools.nf deleted file mode 100644 index 9ad52b6..0000000 --- a/subworkflows/nf-core/bam_stats_samtools.nf +++ /dev/null @@ -1,39 +0,0 @@ -// -// Run SAMtools stats, flagstat and idxstats -// - -include { SAMTOOLS_STATS } from '../../modules/nf-core/modules/samtools/stats/main' -include { SAMTOOLS_IDXSTATS } from '../../modules/nf-core/modules/samtools/idxstats/main' -include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/modules/samtools/flagstat/main' - -workflow BAM_STATS_SAMTOOLS { - take: - ch_bam_bai // channel: [ val(meta), [ bam ], [bai/csi] ] - fasta - - main: - ch_versions = Channel.empty() - - SAMTOOLS_STATS ( - ch_bam_bai, - fasta - ) - ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) - - SAMTOOLS_FLAGSTAT ( - ch_bam_bai - ) - ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions.first()) - - SAMTOOLS_IDXSTATS ( - ch_bam_bai - ) - ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions.first()) - - emit: - stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/bam_to_cram.nf b/subworkflows/nf-core/bam_to_cram.nf deleted file mode 100644 index 665418d..0000000 --- a/subworkflows/nf-core/bam_to_cram.nf +++ /dev/null @@ -1,52 +0,0 @@ -// -// BAM TO CRAM and optionnal QC -// -// For all modules here: -// A when clause condition is defined in the conf/modules.config to determine if the module should be run - -include { SAMTOOLS_CONVERT as SAMTOOLS_BAMTOCRAM } from '../../modules/nf-core/modules/samtools/convert/main' -include { SAMTOOLS_STATS as SAMTOOLS_STATS_CRAM } from '../../modules/nf-core/modules/samtools/stats/main' -include { MOSDEPTH } from '../../modules/nf-core/modules/mosdepth/main' - -workflow BAM_TO_CRAM { - take: - bam_indexed // channel: [mandatory] meta, bam, bai - cram_indexed - fasta // channel: [mandatory] fasta - fasta_fai // channel: [mandatory] fai - intervals_bed_combined // channel: [optional] intervals_bed - - - main: - ch_versions = Channel.empty() - qc_reports = Channel.empty() - - // remap to have channel without bam index - bam_no_index = bam_indexed.map{ meta, bam, bai -> [meta, bam] } - - // Convert bam input to cram - SAMTOOLS_BAMTOCRAM(bam_indexed, fasta, fasta_fai) - - cram_indexed = Channel.empty().mix(cram_indexed,SAMTOOLS_BAMTOCRAM.out.alignment_index) - - // Reports on cram - SAMTOOLS_STATS_CRAM(cram_indexed, fasta) - // TODO: cram_indexed can accept bed file at the end - not implemented yet - MOSDEPTH(cram_indexed, fasta) - - // Gather all reports generated - qc_reports = qc_reports.mix(SAMTOOLS_STATS_CRAM.out.stats) - qc_reports = qc_reports.mix(MOSDEPTH.out.global_txt, - MOSDEPTH.out.regions_txt) - - // Gather versions of all tools used - ch_versions = ch_versions.mix(MOSDEPTH.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_BAMTOCRAM.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_STATS_CRAM.out.versions) - - emit: - cram_converted = SAMTOOLS_BAMTOCRAM.out.alignment_index - qc = qc_reports - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/cram_qc.nf b/subworkflows/nf-core/cram_qc.nf deleted file mode 100644 index bd2685b..0000000 --- a/subworkflows/nf-core/cram_qc.nf +++ /dev/null @@ -1,39 +0,0 @@ -// -// QC on CRAM -// -// For all modules here: -// A when clause condition is defined in the conf/modules.config to determine if the module should be run - -include { SAMTOOLS_STATS } from '../../modules/nf-core/modules/samtools/stats/main' -include { MOSDEPTH } from '../../modules/nf-core/modules/mosdepth/main' - -workflow CRAM_QC { - take: - cram // channel: [mandatory] meta, cram, crai - fasta // channel: [mandatory] fasta - fasta_fai // channel: [mandatory] fasta_fai - intervals_bed_combined - - main: - ch_versions = Channel.empty() - qc_reports = Channel.empty() - - // Reports run on cram - SAMTOOLS_STATS(cram, fasta) - // TODO: cram_indexed can accept bed file at the end - not implemented yet - MOSDEPTH(cram, fasta) - - // Gather all reports generated - qc_reports = qc_reports.mix(SAMTOOLS_STATS.out.stats) - qc_reports = qc_reports.mix(MOSDEPTH.out.global_txt, - MOSDEPTH.out.regions_txt) - - // Gather versions of all tools used - ch_versions = ch_versions.mix(MOSDEPTH.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) - - emit: - qc = qc_reports - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/ensemblvep_annotate.nf b/subworkflows/nf-core/ensemblvep_annotate.nf deleted file mode 100644 index 13dffee..0000000 --- a/subworkflows/nf-core/ensemblvep_annotate.nf +++ /dev/null @@ -1,38 +0,0 @@ -// -// Run VEP to annotate VCF files -// - -include { ENSEMBLVEP } from '../../modules/nf-core/modules/ensemblvep/main' -include { TABIX_BGZIPTABIX } from '../../modules/nf-core/modules/tabix/bgziptabix/main' - -workflow ENSEMBLVEP_ANNOTATE { - take: - vcf // channel: [ val(meta), vcf, tbi ] - vep_genome // value: which genome - vep_species // value: which species - vep_cache_version // value: which cache version - vep_cache // path: path_to_vep_cache (optionnal) - - main: - - ch_versions = Channel.empty() - - ENSEMBLVEP ( - vcf, - vep_genome, - vep_species, - vep_cache_version, - vep_cache - ) - ch_versions = ch_versions.mix(ENSEMBLVEP.out.versions.first()) - - TABIX_BGZIPTABIX ( - ENSEMBLVEP.out.vcf - ) - ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions.first()) - - emit: - vcf_tbi = TABIX_BGZIPTABIX.out.gz_tbi // channel: [ val(meta), vcf, tbi ] - reports = ENSEMBLVEP.out.report // path: *.html - versions = ch_versions // channel: [versions.yml] -} diff --git a/subworkflows/nf-core/merge_index_cram.nf b/subworkflows/nf-core/merge_index_cram.nf deleted file mode 100644 index 3273f5c..0000000 --- a/subworkflows/nf-core/merge_index_cram.nf +++ /dev/null @@ -1,47 +0,0 @@ -// -// MERGE INDEX CRAM -// -// For all modules here: -// A when clause condition is defined in the conf/modules.config to determine if the module should be run - -include { SAMTOOLS_INDEX as INDEX_CRAM } from '../../modules/nf-core/modules/samtools/index/main' -include { SAMTOOLS_MERGE as MERGE_CRAM } from '../../modules/nf-core/modules/samtools/merge/main' - -workflow CRAM_MERGE_INDEX_SAMTOOLS { - take: - cram // channel: [mandatory] meta, cram - fasta // channel: [mandatory] fasta - fasta_fai // channel: [mandatory] fai for fasta - - main: - versions = Channel.empty() - - // Figuring out if there is one or more cram(s) from the same sample - cram_to_merge = cram.branch{ meta, cram -> - // cram is a list, so use cram.size() to asses number of intervals - single: cram.size() <= 1 - return [ meta, cram[0] ] - multiple: cram.size() > 1 - } - - // Only when using intervals - MERGE_CRAM(cram_to_merge.multiple, fasta.map{ it -> [ [ id:'fasta' ], it ] }, fasta_fai.map{ it -> [ [ id:'fasta_fai' ], it ] }) - - // Mix intervals and no_intervals channels together - cram_all = MERGE_CRAM.out.cram.mix(cram_to_merge.single) - - // Index cram - INDEX_CRAM(cram_all) - - // Join with the crai file - cram_crai = cram_all.join(INDEX_CRAM.out.crai, failOnDuplicate: true, failOnMismatch: true) - - // Gather versions of all tools used - versions = versions.mix(INDEX_CRAM.out.versions.first()) - versions = versions.mix(MERGE_CRAM.out.versions.first()) - - emit: - cram_crai - - versions -} \ No newline at end of file diff --git a/subworkflows/nf-core/recalibrate.nf b/subworkflows/nf-core/recalibrate.nf deleted file mode 100644 index 664cf3f..0000000 --- a/subworkflows/nf-core/recalibrate.nf +++ /dev/null @@ -1,65 +0,0 @@ -/* -======================================================================================== - RECALIBRATE -======================================================================================== -*/ - -include { GATK4_APPLYBQSR as APPLYBQSR } from '../../modules/nf-core/modules/gatk4/applybqsr/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/modules/samtools/index/main' -include { SAMTOOLS_STATS } from '../../modules/nf-core/modules/samtools/stats/main' - -workflow RECALIBRATE { - take: - skip_samtools // boolean: true/false - bam // channel: [mandatory] bam - dict // channel: [mandatory] dict - fai // channel: [mandatory] fai - fasta // channel: [mandatory] fasta - - main: - - ch_versions = Channel.empty() - - bam_recalibrated_index = Channel.empty() - bam_recalibrated = Channel.empty() - bam_reports = Channel.empty() - - APPLYBQSR ( - bam, - fasta, - fai, - dict - ) - bam_recalibrated = APPLYBQSR.out.bam - ch_versions = ch_versions.mix(APPLYBQSR.out.versions.first()) - - SAMTOOLS_INDEX ( - bam_recalibrated - ) - bam_recalibrated_index = bam_recalibrated - .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) - .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) - .map{meta, bam, bai, csi -> - if (bai) [meta, bam, bai] - else [meta, bam, csi] - } - - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) - - samtools_stats = Channel.empty() - - if (!skip_samtools) { - SAMTOOLS_STATS(bam_recalibrated_index, []) - samtools_stats = SAMTOOLS_STATS.out.stats - ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) - } - bam_reports = samtools_stats - - - emit: - bam = bam_recalibrated_index - qc = bam_reports - - versions = ch_versions - -} diff --git a/subworkflows/nf-core/run_fastqc.nf b/subworkflows/nf-core/run_fastqc.nf deleted file mode 100644 index bc1fd16..0000000 --- a/subworkflows/nf-core/run_fastqc.nf +++ /dev/null @@ -1,19 +0,0 @@ -// -// Read QC -// -include { FASTQC } from '../../modules/nf-core/modules/fastqc/main' - -workflow RUN_FASTQC { - take: - reads // channel: [ val(meta), [ reads ] ] - - main: - ch_versions = Channel.empty() - FASTQC(reads) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - - emit: - fastqc_html = FASTQC.out.html // channel: [ val(meta), [ html ] ] - fastqc_zip = FASTQC.out.zip // channel: [ val(meta), [ zip ] ] - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/vcf_qc.nf b/subworkflows/nf-core/vcf_qc.nf deleted file mode 100644 index ded3817..0000000 --- a/subworkflows/nf-core/vcf_qc.nf +++ /dev/null @@ -1,30 +0,0 @@ -include { BCFTOOLS_STATS } from '../../modules/nf-core/modules/bcftools/stats/main' -include { VCFTOOLS as VCFTOOLS_SUMMARY } from '../../modules/nf-core/modules/vcftools/main' -include { VCFTOOLS as VCFTOOLS_TSTV_COUNT } from '../../modules/nf-core/modules/vcftools/main' -include { VCFTOOLS as VCFTOOLS_TSTV_QUAL } from '../../modules/nf-core/modules/vcftools/main' - -workflow VCF_QC { - take: - vcf - target_bed - - main: - - ch_versions = Channel.empty() - - BCFTOOLS_STATS(vcf) - VCFTOOLS_TSTV_COUNT(vcf, target_bed, []) - VCFTOOLS_TSTV_QUAL(vcf, target_bed, []) - VCFTOOLS_SUMMARY(vcf, target_bed, []) - - ch_versions = ch_versions.mix(BCFTOOLS_STATS.out.versions) - ch_versions = ch_versions.mix(VCFTOOLS_TSTV_COUNT.out.versions) - - emit: - bcftools_stats = BCFTOOLS_STATS.out.stats - vcftools_tstv_counts = VCFTOOLS_TSTV_COUNT.out.tstv_count - vcftools_tstv_qual = VCFTOOLS_TSTV_QUAL.out.tstv_qual - vcftools_filter_summary = VCFTOOLS_SUMMARY.out.filter_summary - - versions = ch_versions -} From 0a7a2df29bc690f3ca7584664908a49d47808c1b Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Fri, 8 Sep 2023 23:36:12 +0100 Subject: [PATCH 39/56] Removed old subworkflows --- subworkflows/nf-core/align_hisat2.nf | 43 ---------------------------- 1 file changed, 43 deletions(-) delete mode 100644 subworkflows/nf-core/align_hisat2.nf diff --git a/subworkflows/nf-core/align_hisat2.nf b/subworkflows/nf-core/align_hisat2.nf deleted file mode 100644 index 2e9b7a2..0000000 --- a/subworkflows/nf-core/align_hisat2.nf +++ /dev/null @@ -1,43 +0,0 @@ -// -// Alignment with HISAT2 -// - -include { HISAT2_ALIGN } from '../../modules/nf-core/modules/hisat2/align/main' -include { BAM_SORT_SAMTOOLS } from './bam_sort_samtools' - -workflow ALIGN_HISAT2 { - take: - reads // channel: [ val(meta), [ reads ] ] - index // channel: /path/to/star/index/ - splicesites // channel: /path/to/genome.splicesites.txt - - main: - - ch_versions = Channel.empty() - - // - // Map reads with HISAT2 - // - HISAT2_ALIGN ( reads, index, splicesites ) - ch_versions = ch_versions.mix(HISAT2_ALIGN.out.versions.first()) - - // - // Sort, index BAM file and run samtools stats, flagstat and idxstats - // - BAM_SORT_SAMTOOLS ( HISAT2_ALIGN.out.bam ) - ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions) - - emit: - orig_bam = HISAT2_ALIGN.out.bam // channel: [ val(meta), bam ] - summary = HISAT2_ALIGN.out.summary // channel: [ val(meta), log ] - fastq = HISAT2_ALIGN.out.fastq // channel: [ val(meta), fastq ] - - bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] - bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] - csi = BAM_SORT_SAMTOOLS.out.csi // channel: [ val(meta), [ csi ] ] - stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // channel: [ versions.yml ] -} From d71263168260263746bbd9fc7647bcd8ab52889c Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Fri, 8 Sep 2023 23:36:52 +0100 Subject: [PATCH 40/56] Added code for preparing second run --- .../filtering/prepare_second_run.config | 47 ++++++++++ .../extract_reads_id.config | 37 ++++++++ .../prepare_second_run/filtersamreads.config | 28 ++++++ modules/local/maf2bed/main.nf | 33 +++++++ subworkflows/local/prepare_second_run/main.nf | 93 +++++++++++++++++++ 5 files changed, 238 insertions(+) create mode 100644 conf/modules/filtering/prepare_second_run.config create mode 100644 conf/modules/prepare_second_run/extract_reads_id.config create mode 100644 conf/modules/prepare_second_run/filtersamreads.config create mode 100644 modules/local/maf2bed/main.nf create mode 100644 subworkflows/local/prepare_second_run/main.nf diff --git a/conf/modules/filtering/prepare_second_run.config b/conf/modules/filtering/prepare_second_run.config new file mode 100644 index 0000000..04acdf4 --- /dev/null +++ b/conf/modules/filtering/prepare_second_run.config @@ -0,0 +1,47 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// PREPARE SECOND RUN + +process { // second run + + if (params.skip_tools && !params.skip_tools.split(',').contains('second_run')){ + + withName: "GATK4_FILTERSAMREADS" { + ext.prefix = { "${bam.baseName.minus(".bam")}_filtered"} + ext.args = { "--READ_LIST_FILE $read_ids --FILTER includeReadList --VALIDATION_STRINGENCY LENIENT --CREATE_INDEX true" } + publishDir = [ + enabled: params.save_align_intermeds, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/second_pass/bams/${meta.patient}/${meta.id}/" }, + pattern: "*{.bam,.bai}" + ] + } + + withName: "EXTRACT_READ_IDS" { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/second_pass/readids/${meta.patient}/${meta.id}/" }, + pattern: "*{.txt}" + ] + } + + withName: "MAF2BED" { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/second_pass/maf2bed/${meta.patient}/${meta.id}/" }, + pattern: "*{maf,maf.gz}" + ] + } + } +} \ No newline at end of file diff --git a/conf/modules/prepare_second_run/extract_reads_id.config b/conf/modules/prepare_second_run/extract_reads_id.config new file mode 100644 index 0000000..ed4b945 --- /dev/null +++ b/conf/modules/prepare_second_run/extract_reads_id.config @@ -0,0 +1,37 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// extrad readsid + +process { // extract reads id and maf2bed + + withName: "EXTRACT_READ_IDS" { + ext.prefix = { "${meta.id}"} + ext.args = { "" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/second_pass/readids/${meta.id}/" }, + pattern: "*.txt" + ] + } + + withName: "MAF2BED" { + ext.prefix = { "${meta.id}"} + ext.args = { "" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/second_pass/maf2bed/${meta.id}/" }, + pattern: "*bed" + ] + } +} \ No newline at end of file diff --git a/conf/modules/prepare_second_run/filtersamreads.config b/conf/modules/prepare_second_run/filtersamreads.config new file mode 100644 index 0000000..71015d3 --- /dev/null +++ b/conf/modules/prepare_second_run/filtersamreads.config @@ -0,0 +1,28 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// FILTER SAM READS + +process { // filtersamreads + + withName: "PICARD_FILTERSAMREADS" { + ext.prefix = { "${meta.id}_filtered"} + ext.args = { "--VALIDATION_STRINGENCY LENIENT --CREATE_INDEX true -R ${params.fasta}" } + publishDir = [ + enabled: params.save_align_intermeds, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/second_pass/${meta.id}/" }, + pattern: "*{.bam,.bai}" + ] + } +} \ No newline at end of file diff --git a/modules/local/maf2bed/main.nf b/modules/local/maf2bed/main.nf new file mode 100644 index 0000000..a3bc2d1 --- /dev/null +++ b/modules/local/maf2bed/main.nf @@ -0,0 +1,33 @@ +process MAF2BED { + tag "$meta.id" + label 'process_single' + + conda "anaconda::pandas=1.4.3" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pandas:1.4.3' : + 'quay.io/biocontainers/pandas:1.4.3' }" + + input: + tuple val(meta), path(maf) + + output: + tuple val(meta), path('*.bed') , emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ +#!/usr/bin/env python +import subprocess +import pandas as pd +maf = pd.read_csv("${maf}", sep="\\t", comment="#") +bed = maf[["Chromosome", "Start_Position", "End_Position"]] +bed.to_csv("${prefix}.bed", sep="\\t", index=False, header=False) +subprocess.check_call('cat <<-END_VERSIONS > versions.yml\\n\\"${task.process}\\":\\n\tpython: \$(echo \$(python --version 2>&1) | sed \\"s/^.*Python (//;s/).*//\\")\\nEND_VERSIONS', shell=True) + """ +} \ No newline at end of file diff --git a/subworkflows/local/prepare_second_run/main.nf b/subworkflows/local/prepare_second_run/main.nf new file mode 100644 index 0000000..d958bf6 --- /dev/null +++ b/subworkflows/local/prepare_second_run/main.nf @@ -0,0 +1,93 @@ +// +// PREPARE SECOND RUN: extract reads from candidate regions for re-alignment (RNA and DNA normal only) +// +include { MAF2BED } from '../../../modules/local/maf2bed/main' +// Extract read ids for selected regions +include { SAMTOOLS_EXTRACT_READ_IDS } from '../../../modules/local/extract_reads_id/main' +// Filter bam for selected regions +include { PICARD_FILTERSAMREADS } from '../../../modules/nf-core/picard/filtersamreads/main' +// Convert BAM files to FASTQ files +include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_INPUT } from '../bam_convert_samtools/main' +// Realignment with HISAT2 +include { FASTQ_ALIGN_HISAT2 } from '../../nf-core/fastq_align_hisat2/main' + + +workflow BAM_EXTRACT_READS_HISAT2_ALIGN { + take: + input_sample + maf_with_candidates // MAf with candidate regions to extract + reads_to_realign // CRAM/BAM to extract reads from + fasta + fasta_fai + dict + hisat2_index + splicesites + + main: + versions = Channel.empty() + bam_mapped = Channel.empty() + + if (params.step in ['mapping', 'markduplicates', 'splitncigar', + 'prepare_recalibration', 'recalibrate', 'variant_calling', 'normalize', 'consensus', + 'second_run'] && !(params.skip_tools && params.skip_tools.split(",").contains("second_run"))) { + if (params.step == 'second_run') { + // [meta, cram, crai, maf] + cram_to_realign = input_sample + // TODO convert to CRAM or/and MERGE if necessary + // TODO Merge alignments if applicable +// MERGE_ALIGN(previous_alignment.map{meta, cram, crai, maf -> [meta, cram]}) + } else { + // TODO (remember to change id to _realign) + + } + // Get candidate regions + // Add files to meta to keep them for next processes + maf_to_bed = cram_to_realign.map{meta, cram, crai, maf -> [meta + [cram_file:cram, crai_file:crai, maf_file:maf], maf]} + maf_to_bed.dump(tag:"maf_to_bed") + MAF2BED(maf_to_bed) + // Extract read names with regions from bed + cram_to_extract = MAF2BED.out.bed.map{meta, bed -> [meta, meta.cram_file, meta.crai_file, bed]} + cram_to_extract.dump(tag:"cram_to_extract") + SAMTOOLS_EXTRACT_READ_IDS(cram_to_extract) + // Extract reads + cram_to_filter = SAMTOOLS_EXTRACT_READ_IDS.out.read_ids.map{meta, readsid -> [meta, meta.cram_file, readsid]} + cram_to_filter.dump(tag:"cram_to_filter") + PICARD_FILTERSAMREADS(cram_to_filter, 'includeReadList') // bam -> filtered_bam + // Conver to FQ + bam_to_fq = PICARD_FILTERSAMREADS.out.bam.join(PICARD_FILTERSAMREADS.out.bai) + bam_to_fq.dump(tag:"bam_to_fq") + fasta.dump(tag:"fasta") + fasta_fai.dump(tag:"fasta_fai") + interleave_input = false // Currently don't allow interleaved input + CONVERT_FASTQ_INPUT( + bam_to_fq, + fasta.map{it -> [ [ id:"fasta" ], it ]}, // fasta + fasta_fai.map{it -> [ [ id:"fasta_fai" ], it ]}, // fasta_fai + interleave_input + ) + // Align with HISAT2 + reads_for_realignment = CONVERT_FASTQ_INPUT.out.reads + reads_for_realignment.dump(tag:"reads_for_realignment") + // TODO: add single_end to input check + FASTQ_ALIGN_HISAT2( + reads_for_realignment.map{meta, reads -> [meta + [single_end:false], reads]}, + hisat2_index, + splicesites, + fasta.map{it -> [ [ id:"fasta" ], it ]} + ) + // Mix with index + bam_mapped = FASTQ_ALIGN_HISAT2.out.bam.mix(FASTQ_ALIGN_HISAT2.out.bai) + } + + emit: + bam_mapped = bam_mapped + versions = versions // channel: [ versions.yml ] + + + + + + + + +} \ No newline at end of file From 7a37c752afe3037fd3c5d4e6886ce2d781880f68 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Fri, 8 Sep 2023 23:37:39 +0100 Subject: [PATCH 41/56] Fixed typo --- subworkflows/local/bam_align/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/bam_align/main.nf b/subworkflows/local/bam_align/main.nf index 368195e..8a3fbf7 100644 --- a/subworkflows/local/bam_align/main.nf +++ b/subworkflows/local/bam_align/main.nf @@ -43,7 +43,7 @@ workflow BAM_ALIGN { cram_mapped = Channel.empty() // Gather index for mapping given the chosen aligner for DNA - index_alignement = params.aligner == "bwa-mem" ? bwa : + index_alignment = params.aligner == "bwa-mem" ? bwa : params.aligner == "bwa-mem2" ? bwamem2 : dragmap if (params.step == 'mapping') { @@ -126,7 +126,7 @@ workflow BAM_ALIGN { // STEP 1.D.1: DNA mapping with BWA sort_bam = true - FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP(reads_for_alignment_status.dna, index_alignement, sort_bam) + FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP(reads_for_alignment_status.dna, index_alignment, sort_bam) // Grouping the bams from the same samples not to stall the workflow bam_mapped_dna = FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP.out.bam.map{ meta, bam -> From 33be52368894038b95982e363e7c511d54df7c98 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Sun, 10 Sep 2023 22:43:56 +0100 Subject: [PATCH 42/56] Modified picard/filtersamreads to return bai as well --- modules/nf-core/picard/filtersamreads/main.nf | 1 + .../filtersamreads/picard-filtersamreads.diff | 13 +++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 modules/nf-core/picard/filtersamreads/picard-filtersamreads.diff diff --git a/modules/nf-core/picard/filtersamreads/main.nf b/modules/nf-core/picard/filtersamreads/main.nf index f7bd191..0e9d994 100644 --- a/modules/nf-core/picard/filtersamreads/main.nf +++ b/modules/nf-core/picard/filtersamreads/main.nf @@ -13,6 +13,7 @@ process PICARD_FILTERSAMREADS { output: tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("*.bai"), emit: bai path "versions.yml" , emit: versions when: diff --git a/modules/nf-core/picard/filtersamreads/picard-filtersamreads.diff b/modules/nf-core/picard/filtersamreads/picard-filtersamreads.diff new file mode 100644 index 0000000..08bacab --- /dev/null +++ b/modules/nf-core/picard/filtersamreads/picard-filtersamreads.diff @@ -0,0 +1,13 @@ +Changes in module 'nf-core/picard/filtersamreads' +--- modules/nf-core/picard/filtersamreads/main.nf ++++ modules/nf-core/picard/filtersamreads/main.nf +@@ -13,6 +13,7 @@ + + output: + tuple val(meta), path("*.bam"), emit: bam ++ tuple val(meta), path("*.bai"), emit: bai + path "versions.yml" , emit: versions + + when: + +************************************************************ From f5f992405293c9bdf10e93f6633396cd0dd62863 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Sun, 10 Sep 2023 22:45:40 +0100 Subject: [PATCH 43/56] Fixed dict fo sncr --- subworkflows/local/bam_splitncigarreads/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/bam_splitncigarreads/main.nf b/subworkflows/local/bam_splitncigarreads/main.nf index 95ffb16..9fa71ab 100644 --- a/subworkflows/local/bam_splitncigarreads/main.nf +++ b/subworkflows/local/bam_splitncigarreads/main.nf @@ -27,7 +27,7 @@ workflow BAM_SPLITNCIGARREADS { cram_intervals, fasta, fasta_fai, - dict.map{ meta, dict -> [ dict ] } + dict.map{ meta, it -> it } ) // Gather the recalibrated cram files From fa9a17e417c3a5c19760152a4ec1d3d7291f4af9 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Sun, 10 Sep 2023 22:49:55 +0100 Subject: [PATCH 44/56] Added second_run boolean for realignment subworkflow --- .../local/bam_convert_samtools/main.nf | 1 - .../local/bam_gatk_preprocessing/main.nf | 21 +++++----- .../local/bam_variant_calling/main.nf | 7 ++-- .../main.nf | 42 +++++++++++-------- .../local/bam_variant_calling_somatic/main.nf | 8 ++-- subworkflows/local/maf_filtering/main.nf | 8 ++-- subworkflows/local/vcf_annotate/main.nf | 6 +-- subworkflows/local/vcf_normalise/main.nf | 7 +++- 8 files changed, 57 insertions(+), 43 deletions(-) diff --git a/subworkflows/local/bam_convert_samtools/main.nf b/subworkflows/local/bam_convert_samtools/main.nf index ed1f659..aef7576 100644 --- a/subworkflows/local/bam_convert_samtools/main.nf +++ b/subworkflows/local/bam_convert_samtools/main.nf @@ -24,7 +24,6 @@ workflow BAM_CONVERT_SAMTOOLS { // Index File if not PROVIDED -> this also requires updates to samtools view possibly URGH // MAP - MAP - input.dump(tag:'input') SAMTOOLS_VIEW_MAP_MAP(input, fasta, []) // UNMAP - UNMAP diff --git a/subworkflows/local/bam_gatk_preprocessing/main.nf b/subworkflows/local/bam_gatk_preprocessing/main.nf index c3f150e..4bec4ad 100644 --- a/subworkflows/local/bam_gatk_preprocessing/main.nf +++ b/subworkflows/local/bam_gatk_preprocessing/main.nf @@ -22,7 +22,7 @@ include { SAMTOOLS_CONVERT as CRAM_TO_BAM_RECAL } from '../../../ workflow BAM_GATK_PREPROCESSING { take: - input_sample // channel: [optional] input from CSV if applicable + input_sample // channel: [optional] input from CSV if applicable bam_mapped // channel: [mandatory] bam_mapped cram_mapped // channel: [mandatory] cram_mapped fasta // channel: [mandatory] fasta @@ -35,18 +35,19 @@ workflow BAM_GATK_PREPROCESSING { intervals // channel: [mandatory] intervals/target regions intervals_for_preprocessing // channel: [mandatory] intervals/wes intervals_and_num_intervals // channel: [mandatory] [ intervals, num_intervals ] (or [ [], 0 ] if no intervals) + second_run // boolean main: reports = Channel.empty() versions = Channel.empty() - + cram_variant_calling = Channel.empty() // check if preprocessing is skipped if (params.skip_tools && !params.skip_tools.split(',').contains('preprocessing')) { // Markduplicates - if (params.step in ['mapping', 'markduplicates'] ) { + if (params.step in ['mapping', 'markduplicates'] || second_run) { cram_markduplicates_no_spark = Channel.empty() @@ -115,7 +116,7 @@ workflow BAM_GATK_PREPROCESSING { // SplitNCigarReads for RNA - if (params.step in ['mapping', 'markduplicates', 'splitncigar']) { + if (params.step in ['mapping', 'markduplicates', 'splitncigar'] || second_run) { if (params.step == 'mapping') { cram_skip_splitncigar = cram_skip_markduplicates } else { @@ -182,8 +183,8 @@ workflow BAM_GATK_PREPROCESSING { // BQSR if (params.step in ['mapping', 'markduplicates', 'splitncigar', 'prepare_recalibration']) { - // Run if starting from step "prepare_recalibration" - if (params.step == 'prepare_recalibration') { + // Run if starting from step "prepare_recalibration". This will not run for second pass + if (params.step == 'prepare_recalibration' && !second_run) { // Support if starting from BAM or CRAM files input_prepare_recal_convert = input_sample.branch{ @@ -231,10 +232,10 @@ workflow BAM_GATK_PREPROCESSING { known_sites_indels, known_sites_indels_tbi) - ch_table_bqsr_no_spark = BAM_BASERECALIBRATOR.out.table_bqsr + ch_table_bqsr_no_spark = BAM_BASERECALIBRATOR.out.table_bqsr - // Gather used softwares versions - versions = versions.mix(BAM_BASERECALIBRATOR.out.versions) + // Gather used softwares versions + versions = versions.mix(BAM_BASERECALIBRATOR.out.versions) // ch_table_bqsr contains either: @@ -254,7 +255,7 @@ workflow BAM_GATK_PREPROCESSING { if (params.step in ['mapping', 'markduplicates', 'prepare_recalibration', 'recalibrate']) { // Run if starting from step "prepare_recalibration" - if (params.step == 'recalibrate') { + if (params.step == 'recalibrate' && !second_run) { // Support if starting from BAM or CRAM files input_recal_convert = input_sample.branch{ diff --git a/subworkflows/local/bam_variant_calling/main.nf b/subworkflows/local/bam_variant_calling/main.nf index 391c119..08bda06 100644 --- a/subworkflows/local/bam_variant_calling/main.nf +++ b/subworkflows/local/bam_variant_calling/main.nf @@ -29,12 +29,12 @@ workflow BAM_VARIANT_CALLING { pon pon_tbi input_sample + second_run main: reports = Channel.empty() versions = Channel.empty() - - if (params.tools) { + if (tools || second_run) { if (params.step == 'annotate') cram_variant_calling = Channel.empty() // @@ -113,7 +113,8 @@ workflow BAM_VARIANT_CALLING { intervals_bed_gz_tbi_combined, pon, pon_tbi, - params.joint_mutect2 + params.joint_mutect2, + second_run ) diff --git a/subworkflows/local/bam_variant_calling_pre_post_processing/main.nf b/subworkflows/local/bam_variant_calling_pre_post_processing/main.nf index 02f17a5..dc76fc7 100644 --- a/subworkflows/local/bam_variant_calling_pre_post_processing/main.nf +++ b/subworkflows/local/bam_variant_calling_pre_post_processing/main.nf @@ -39,7 +39,8 @@ workflow BAM_VARIANT_CALLING_PRE_POST_PROCESSING { intervals_and_num_intervals // channel: [mandatory] [ intervals, num_intervals ] (or [ [], 0 ] if no intervals) intervals_bed_gz_tbi_combined // channel: [mandatory] intervals/target regions in one file zipped dna_consensus_maf // to repeat rescue consensus - dna_varcall_mafs // to repeat rescue consensus + dna_varcall_mafs // to repeat rescue consensus + second_run main: reports = Channel.empty() @@ -48,18 +49,19 @@ workflow BAM_VARIANT_CALLING_PRE_POST_PROCESSING { // GATK PREPROCESSING - See: https://gatk.broadinstitute.org/hc/en-us/articles/360035535912-Data-pre-processing-for-variant-discovery BAM_GATK_PREPROCESSING( input_sample, - bam_mapped, // channel: [mandatory] [meta, [bam]] - cram_mapped, // channel: [mandatory] [meta, [cram]] - fasta, // channel: [mandatory] fasta - fasta_fai , // channel: [mandatory] fasta_fai - dict, // channel: [mandatory] dict - known_sites_indels, // channel: [optional] known_sites - known_sites_indels_tbi, // channel: [optional] known_sites - germline_resource, // channel: [optional] germline_resource - germline_resource_tbi, // channel: [optional] germline_resource_tbi - intervals, // channel: [mandatory] intervals/target regions - intervals_for_preprocessing, // channel: [mandatory] intervals_for_preprocessing/wes - intervals_and_num_intervals // channel: [mandatory] intervals_for_preprocessing/wes + bam_mapped, // channel: [mandatory] [meta, [bam]] + cram_mapped, // channel: [mandatory] [meta, [cram]] + fasta, // channel: [mandatory] fasta + fasta_fai , // channel: [mandatory] fasta_fai + dict.map{ it -> [ [ id:'dict' ], it ] }, // channel: [mandatory] dict + known_sites_indels, // channel: [optional] known_sites + known_sites_indels_tbi, // channel: [optional] known_sites + germline_resource, // channel: [optional] germline_resource + germline_resource_tbi, // channel: [optional] germline_resource_tbi + intervals, // channel: [mandatory] intervals/target regions + intervals_for_preprocessing, // channel: [mandatory] intervals_for_preprocessing/wes + intervals_and_num_intervals, // channel: [mandatory] intervals_for_preprocessing/wes + second_run ) cram_variant_calling = BAM_GATK_PREPROCESSING.out.cram_variant_calling @@ -81,7 +83,8 @@ workflow BAM_VARIANT_CALLING_PRE_POST_PROCESSING { intervals_bed_gz_tbi_combined, pon, pon_tbi, - input_sample + input_sample, + second_run ) cram_variant_calling_pair = BAM_VARIANT_CALLING.out.cram_variant_calling_pair // use same crams for force calling later vcf_to_normalise = BAM_VARIANT_CALLING.out.vcf_to_normalise @@ -97,7 +100,8 @@ workflow BAM_VARIANT_CALLING_PRE_POST_PROCESSING { vcf_to_normalise, // Remap channel to match module/subworkflow fasta.map{ it -> [ [ id:'fasta' ], it ] }, - input_sample + input_sample, + second_run ) versions = versions.mix(VCF_NORMALISE.out.versions) vcf_to_annotate = VCF_NORMALISE.out.vcf @@ -107,7 +111,8 @@ workflow BAM_VARIANT_CALLING_PRE_POST_PROCESSING { VCF_ANNOTATE( vcf_to_annotate.map{meta, vcf -> [ meta + [ file_name: vcf.baseName ], vcf ] }, fasta, - input_sample + input_sample, + second_run ) vcf_to_consensus = VCF_ANNOTATE.out.vcf_ann @@ -122,7 +127,8 @@ workflow BAM_VARIANT_CALLING_PRE_POST_PROCESSING { fasta, dna_consensus_maf, // null when first pass dna_varcall_mafs, // null when first pass - input_sample + input_sample, + second_run ) dna_consensus_maf = VCF_CONSENSUS.out.maf_consensus_dna @@ -132,7 +138,7 @@ workflow BAM_VARIANT_CALLING_PRE_POST_PROCESSING { maf_to_filter.dump(tag:"maf_to_filter0") // STEP 7: FILTERING - MAF_FILTERING(maf_to_filter, fasta, input_sample) + MAF_FILTERING(maf_to_filter, fasta, input_sample, second_run) filtered_maf = MAF_FILTERING.out.maf versions = versions.mix(MAF_FILTERING.out.versions) diff --git a/subworkflows/local/bam_variant_calling_somatic/main.nf b/subworkflows/local/bam_variant_calling_somatic/main.nf index bee5b93..70541bc 100644 --- a/subworkflows/local/bam_variant_calling_somatic/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic/main.nf @@ -22,6 +22,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC { panel_of_normals // channel: [optional] panel_of_normals panel_of_normals_tbi // channel: [optional] panel_of_normals_tbi joint_mutect2 // boolean: [mandatory] [default: false] run mutect2 in joint mode + second_run main: versions = Channel.empty() @@ -31,9 +32,8 @@ workflow BAM_VARIANT_CALLING_SOMATIC { vcf_strelka = Channel.empty() vcf_mutect2 = Channel.empty() vcf_sage = Channel.empty() - // SAGE - if (tools.split(',').contains('sage')) { + if (tools.split(',').contains('sage') || second_run) { BAM_VARIANT_CALLING_SOMATIC_SAGE( cram, @@ -64,7 +64,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC { } // STRELKA - if (tools.split(',').contains('strelka')) { + if (tools.split(',').contains('strelka') || second_run) { // Remap channel to match module/subworkflow cram_strelka = (tools.split(',').contains('manta')) ? cram.join(BAM_VARIANT_CALLING_SOMATIC_MANTA.out.candidate_small_indels_vcf, failOnDuplicate: true, failOnMismatch: true).join(BAM_VARIANT_CALLING_SOMATIC_MANTA.out.candidate_small_indels_vcf_tbi, failOnDuplicate: true, failOnMismatch: true) : @@ -84,7 +84,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC { } // MUTECT2 - if (tools.split(',').contains('mutect2')) { + if (tools.split(',').contains('mutect2') || second_run) { BAM_VARIANT_CALLING_SOMATIC_MUTECT2( // Remap channel to match module/subworkflow cram.map { meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> [ meta, [ normal_cram, tumor_cram ], [ normal_crai, tumor_crai ] ] }, diff --git a/subworkflows/local/maf_filtering/main.nf b/subworkflows/local/maf_filtering/main.nf index 9c38931..2ded6e3 100644 --- a/subworkflows/local/maf_filtering/main.nf +++ b/subworkflows/local/maf_filtering/main.nf @@ -11,16 +11,18 @@ workflow MAF_FILTERING { maf_to_filter fasta input_sample + second_run main: versions = Channel.empty() maf = Channel.empty() - if (params.step in ['mapping', 'markduplicates', 'splitncigar', + if ((params.step in ['mapping', 'markduplicates', 'splitncigar', 'prepare_recalibration', 'recalibrate', 'variant_calling', - 'normalise', 'consensus', 'filtering'] && (!(params.skip_tools && params.skip_tools.split(",").contains("filtering")))) { + 'normalise', 'consensus', 'filtering'] && + (!(params.skip_tools && params.skip_tools.split(",").contains("filtering")))) || + second_run) { if (params.step == 'filtering') maf_to_filter = input_sample - maf_to_filter.dump(tag:"maf_to_filter") // BASIC FILTERING FILTERING(maf_to_filter, fasta) maf = FILTERING.out.maf diff --git a/subworkflows/local/vcf_annotate/main.nf b/subworkflows/local/vcf_annotate/main.nf index 28f130a..3410266 100644 --- a/subworkflows/local/vcf_annotate/main.nf +++ b/subworkflows/local/vcf_annotate/main.nf @@ -10,6 +10,7 @@ workflow VCF_ANNOTATE { vcf // channel: [ val(meta), vcf ] fasta input_sample + second_run main: reports = Channel.empty() @@ -20,7 +21,7 @@ workflow VCF_ANNOTATE { if (params.step == 'annotate') vcf_to_annotate = input_sample - if (params.tools && params.tools.split(',').contains('vep')) { + if (params.tools && params.tools.split(',').contains('vep') || second_run) { if (params.tools.split(',').contains('vep')) { fasta = (params.vep_include_fasta) ? fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] } : [[id: 'null'], []] @@ -43,11 +44,10 @@ workflow VCF_ANNOTATE { } vcf_for_vep = vcf.map{ meta, vcf -> [ meta, vcf, [] ] } - vcf_for_vep.dump(tag:"vcf_for_vep") VCF_ANNOTATE_ENSEMBLVEP(vcf_for_vep, fasta, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) reports = reports.mix(VCF_ANNOTATE_ENSEMBLVEP.out.reports) - vcf_ann = vcf_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.vcf_tbi) + vcf_ann = vcf_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.vcf_tbi).map{meta, vcf, tbi -> [meta +[data_type:"vcf"], vcf, tbi]} tab_ann = tab_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.tab) json_ann = json_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.json) versions = versions.mix(VCF_ANNOTATE_ENSEMBLVEP.out.versions) diff --git a/subworkflows/local/vcf_normalise/main.nf b/subworkflows/local/vcf_normalise/main.nf index c07bb01..b28bfca 100644 --- a/subworkflows/local/vcf_normalise/main.nf +++ b/subworkflows/local/vcf_normalise/main.nf @@ -15,12 +15,17 @@ workflow VCF_NORMALISE { vcf_to_normalise fasta input_sample + second_run main: version = Channel.empty() vcf_to_consensus = Channel.empty() - if (params.step in ['mapping', 'markduplicates', 'splitncigar', 'prepare_recalibration', 'recalibrate', 'variant_calling', 'normalise'] && (!(params.skip_tools && params.skip_tools.split(",").contains("normalise")))) { + if ((params.step in ['mapping', 'markduplicates', 'splitncigar', + 'prepare_recalibration', 'recalibrate', + 'variant_calling', 'normalise'] && + (!(params.skip_tools && params.skip_tools.split(",").contains("normalise")))) || + second_run) { if (params.step == 'normalise') vcf_to_normalise = input_sample From b13f956051e25a538c99a975d5eb747c02f041c4 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Sun, 10 Sep 2023 22:51:44 +0100 Subject: [PATCH 45/56] Fixed config files --- conf/base.config | 36 +- conf/modules/alignment/bam_align.config | 365 +++++++++--------- conf/modules/annotate/annotate.config | 52 ++- conf/modules/consensus/vcf_consensus.config | 66 ++-- conf/modules/normalise/vt.config | 6 +- .../quality_control/quality_control.config | 136 ++++--- conf/modules/variant_calling/sage.config | 7 +- conf/modules/variant_calling/strelka.config | 10 +- 8 files changed, 362 insertions(+), 316 deletions(-) diff --git a/conf/base.config b/conf/base.config index df6e9e9..4ecc28d 100644 --- a/conf/base.config +++ b/conf/base.config @@ -8,7 +8,7 @@ ---------------------------------------------------------------------------------------- */ -process { +process { // base // TODO nf-core: Check the defaults for all processes cpus = { check_max( 1 * task.attempt, 'cpus' ) } @@ -87,13 +87,13 @@ process { time = { check_max( 30.h * task.attempt, 'time' ) } clusterOptions = "--account caldas-sl2-cpu --partition cclake-himem" } - withName: '.*HISAT2_ALIGN' { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 41.GB * task.attempt, 'memory')} - time = { check_max( 12.h * task.attempt, 'time' ) } + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 14.GB * task.attempt, 'memory')} + time = { check_max( 4.h * task.attempt, 'time' ) } } + withName: 'MUTECT2' { cpus = { check_max( 12 * task.attempt, 'cpus' ) } time = { check_max( 24.h * task.attempt, 'time' ) } @@ -138,25 +138,37 @@ process { withName:'RUN_CONSENSUS.*' { errorStrategy = { task.exitStatus in [140] ? 'retry' : 'finish' } - cpus = { check_max( 12 * task.attempt, 'cpus' ) } + cpus = { check_max( 8 * task.attempt, 'cpus' ) } memory = { check_max( 41.GB * task.attempt, 'memory')} - time = { check_max( 8.h * task.attempt, 'time' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } } withName:'GATK4_FILTERSAMREADS' { cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 41.GB * task.attempt, 'memory')} + memory = { check_max( 30.GB * task.attempt, 'memory')} time = { check_max( 12.h * task.attempt, 'time' ) } } - withName: 'SAGE|SAMTOOLS_BAMTOCRAM' { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 41.GB * task.attempt, 'memory')} - time = { check_max( 12.h * task.attempt, 'time' ) } + withName: 'SAGE' { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 14.GB * task.attempt, 'memory')} + time = { check_max( 4.h * task.attempt, 'time' ) } } withName: 'MULTIQC' { cpus = { check_max( 4 * task.attempt, 'cpus' ) } memory = { check_max( 16.GB * task.attempt, 'memory')} time = { check_max( 1.h * task.attempt, 'time' ) } } + + withName: 'GATK4_BEDTOINTERVALLIST|CREATE_INTERVALS_BED|TABIX_BGZIPTABIX_INTERVAL.*' { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + time = { check_max( 2.h * task.attempt, 'time' ) } + } + + withName: '.*BAM_SORT_STATS_SAMTOOLS:SAMTOOLS.*' { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 14.GB * task.attempt, 'memory')} + time = { check_max( 4.h * task.attempt, 'time' ) } + } } diff --git a/conf/modules/alignment/bam_align.config b/conf/modules/alignment/bam_align.config index 2a1e390..f55ee77 100644 --- a/conf/modules/alignment/bam_align.config +++ b/conf/modules/alignment/bam_align.config @@ -14,215 +14,206 @@ process { // bam_align - if (params.step == 'mapping'){ - - // DNA aligners - - withName: "BWAMEM1_MEM" { - ext.when = { params.aligner == "bwa-mem" } - } - - withName: "BWAMEM2_MEM" { - ext.when = { params.aligner == "bwa-mem2" } - } - - withName: "DRAGMAP_ALIGN" { - ext.when = { params.aligner == "dragmap" } - ext.args = { "--RGSM ${meta.patient}_${meta.sample} --RGID ${meta.read_group}" } - } - - withName: "(BWAMEM.*_MEM|DRAGMAP_ALIGN)" { - ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.tokenize('.')[0]) : "${meta.id}.sorted" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/" }, - pattern: "*bam", - // Only save if save_output_as_bam AND - // (save_mapped OR no_markduplicates OR sentieon_dedup) AND - // only a single BAM file per sample - saveAs: { - if (params.save_output_as_bam && - ( - params.save_mapped || - (params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) - ) && (meta.size * meta.num_lanes == 1) - ) { "mapped/${meta.id}/${it}" } - else { null } - } - ] - } - - withName: "BWAMEM.*_MEM" { - // Using -B 3 for tumor samples - ext.args = { meta.status == 1 ? "-K 100000000 -Y -B 3 -R ${meta.read_group}" : "-K 100000000 -Y -R ${meta.read_group}" } - } - - - - withName: 'MERGE_BAM|INDEX_MERGE_BAM' { - publishDir = [ + + // DNA aligners + + withName: "BWAMEM1_MEM" { + ext.when = { params.aligner == "bwa-mem" } + } + + withName: "BWAMEM2_MEM" { + ext.when = { params.aligner == "bwa-mem2" } + } + + withName: "DRAGMAP_ALIGN" { + ext.when = { params.aligner == "dragmap" } + ext.args = { "--RGSM ${meta.patient}_${meta.sample} --RGID ${meta.read_group}" } + } + + withName: "(BWAMEM.*_MEM|DRAGMAP_ALIGN)" { + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.tokenize('.')[0]) : "${meta.id}.sorted" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/" }, + pattern: "*bam", + // Only save if save_output_as_bam AND + // (save_mapped OR no_markduplicates OR sentieon_dedup) AND + // only a single BAM file per sample + saveAs: { + if (params.save_output_as_bam && + ( + params.save_mapped || + (params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) + ) && (meta.size * meta.num_lanes == 1) + ) { "mapped/${meta.id}/${it}" } + else { null } + } + ] + } + + withName: "BWAMEM.*_MEM" { + // Using -B 3 for tumor samples + ext.args = { meta.status == 1 ? "-K 100000000 -Y -B 3 -R ${meta.read_group}" : "-K 100000000 -Y -R ${meta.read_group}" } + } + + + + withName: 'MERGE_BAM|INDEX_MERGE_BAM' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/" }, + pattern: "*{bam,bai}", + // Only save if (save_output_as_bam AND (no_markduplicates OR save_mapped )) + saveAs: { (params.save_output_as_bam && (params.save_mapped || params.skip_tools && params.skip_tools.split(',').contains('markduplicates'))) ? "mapped/${meta.id}/${it}" : null } + ] + } + + withName: 'MERGE_BAM' { + ext.prefix = { "${meta.id}.sorted" } + } + + + // RNA aligners + withName: 'STAR_GENOMEGENERATE' { + ext.args = params.read_length ? "--sjdbOverhang ${params.read_length - 1}" : '' + } + + withName: 'UNTAR_.*|STAR_GENOMEGENERATE|HISAT2_BUILD|HISAT2_EXTRACTSPLICESITES' { + publishDir = [ + enabled: params.save_reference, mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/" }, - pattern: "*{bam,bai}", - // Only save if (save_output_as_bam AND (no_markduplicates OR save_mapped )) - saveAs: { (params.save_output_as_bam && (params.save_mapped || params.skip_tools && params.skip_tools.split(',').contains('markduplicates'))) ? "mapped/${meta.id}/${it}" : null } + path: { "${params.outdir}/reference/index" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - } - - withName: 'MERGE_BAM' { - ext.prefix = { "${meta.id}.sorted" } - } - - - // RNA aligners - withName: 'STAR_GENOMEGENERATE' { - ext.args = params.read_length ? "--sjdbOverhang ${params.read_length - 1}" : '' - } - - withName: 'UNTAR_.*|STAR_GENOMEGENERATE|HISAT2_BUILD|HISAT2_EXTRACTSPLICESITES' { - publishDir = [ - enabled: params.save_reference, - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/index" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: 'STAR_ALIGN' { - ext.args = [ - '--outSAMtype BAM Unsorted', - '--readFilesCommand zcat', - '--outFilterMultimapScoreRange 1', - '--outFilterMultimapNmax 20', - '--outFilterMismatchNmax 10', - '--alignMatesGapMax 1000000', - '--sjdbScore 2', - '--alignSJDBoverhangMin 1', - '--genomeLoad NoSharedMemory', - '--outFilterMatchNminOverLread 0.33', - '--outFilterScoreMinOverLread 0.33', - '--twopass1readsN -1', - '--outSAMattrRGline \'ID:${meta.read_group}\' \'SM:${meta.patient}_${meta.sample}\'', - params.save_unaligned ? '--outReadsUnmapped Fastx' : '', - params.read_length ? "--sjdbOverhang ${params.read_length - 1}" : '', - params.star_twopass ? '--twopassMode Basic' : '', - params.star_max_memory_bamsort > 0 ? "--limitBAMsortRAM ${params.star_max_memory_bamsort}" : "", - params.star_max_collapsed_junc > 0 ? "--limitOutSJcollapsed ${params.star_max_collapsed_junc}" : "" - ].flatten().unique(false).join(' ').trim() - ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).baseName.tokenize('.')[0]) : "" } + } + + withName: 'STAR_ALIGN' { + ext.args = {[ + "--outSAMtype BAM Unsorted", + "--readFilesCommand zcat", + "--outFilterMultimapScoreRange 1", + "--outFilterMultimapNmax 20", + "--outFilterMismatchNmax 10", + "--alignMatesGapMax 1000000", + "--sjdbScore 2", + "--alignSJDBoverhangMin 1", + "--genomeLoad NoSharedMemory", + "--outFilterMatchNminOverLread 0.33", + "--outFilterScoreMinOverLread 0.33", + "--twopass1readsN -1", + "--outSAMattrRGline ${meta.read_group}", + params.save_unaligned ? "--outReadsUnmapped Fastx" : "", + params.read_length ? "--sjdbOverhang ${params.read_length - 1}" : "", + params.star_twopass ? "--twopassMode Basic" : "", + params.star_max_memory_bamsort > 0 ? "--limitBAMsortRAM ${params.star_max_memory_bamsort}" : "", + params.star_max_collapsed_junc > 0 ? "--limitOutSJcollapsed ${params.star_max_collapsed_junc}" : "" + ].flatten().unique(false).join(' ').trim()} + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).baseName.tokenize('.')[0]) : "" } + publishDir = [ + [ + path: { "${params.outdir}/reports/star/${meta.patient}/${meta.id}/" }, + mode: params.publish_dir_mode, + pattern: '*.{out,tab}', + enabled: params.save_align_intermeds + ], + [ + path: { "${params.outdir}/preprocessing/star/${meta.patient}/${meta.id}/mapped/" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: params.save_align_intermeds + ], + [ + path: { "${params.outdir}/preprocessing/star/${meta.patient}/${meta.id}/unmapped/" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_align_intermeds + ] + ] + } + + // HISAT2 for second run + withName: '.*:ALIGN_HISAT2:HISAT2_ALIGN' { + ext.args = '--met-stderr --new-summary' publishDir = [ [ - path: { "${params.outdir}/reports/star/${meta.patient}/${meta.id}/" }, + path: { "${params.outdir}/report/hisat2/${meta.patient}/${meta.id}" }, mode: params.publish_dir_mode, - pattern: '*.{out,tab}', + pattern: '*.log', enabled: params.save_align_intermeds ], [ - path: { "${params.outdir}/preprocessing/star/${meta.patient}/${meta.id}/mapped/" }, + path: { "${params.outdir}/preprocessing/hisat2/${meta.patient}/${meta.id}/" }, mode: params.publish_dir_mode, pattern: '*.bam', enabled: params.save_align_intermeds ], [ - path: { "${params.outdir}/preprocessing/star/${meta.patient}/${meta.id}/unmapped/" }, + path: { "${params.outdir}/preprocessing/hisat2/${meta.patient}/${meta.id}/unmapped" }, mode: params.publish_dir_mode, pattern: '*.fastq.gz', - enabled: params.save_align_intermeds - ] + enabled: params.save_unaligned ] - } - - // HISAT2 for second run - withName: '.*:ALIGN_HISAT2:HISAT2_ALIGN' { - ext.args = '--met-stderr --new-summary' - publishDir = [ - [ - path: { "${params.outdir}/report/hisat2/${meta.patient}/${meta.id}" }, - mode: params.publish_dir_mode, - pattern: '*.log', - enabled: params.save_align_intermeds - ], - [ - path: { "${params.outdir}/preprocessing/hisat2/${meta.patient}/${meta.id}/" }, - mode: params.publish_dir_mode, - pattern: '*.bam', - enabled: params.save_align_intermeds - ], - [ - path: { "${params.outdir}/preprocessing/hisat2/${meta.patient}/${meta.id}/unmapped" }, - mode: params.publish_dir_mode, - pattern: '*.fastq.gz', - enabled: params.save_unaligned - ] - ] - } + ] + } - // POST ALIGNMENT AND PREPROCESSING BAM TODO: check if it follows new pattern - withName: '.*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { - ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('') : "${meta.id}" } - publishDir = [ - path: { "${params.outdir}/preprocessing/" }, - mode: params.publish_dir_mode, - pattern: '*.bam', - saveAs: { (params.save_bam_mapped || (params.skip_tools && params.skip_tools.split(',').contains('markduplicates'))) && (meta.size * meta.numLanes == 1) ? "mapped/${meta.patient}/${meta.id}/${it}" : null } + // POST ALIGNMENT AND PREPROCESSING BAM TODO: check if it follows new pattern + withName: '.*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('') : "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/preprocessing/" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + saveAs: { (params.save_bam_mapped || (params.skip_tools && params.skip_tools.split(',').contains('markduplicates'))) && (meta.size * meta.numLanes == 1) ? "mapped/${meta.patient}/${meta.id}/${it}" : null } - ] - } + ] + } - withName: '.*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { - ext.args = params.bam_csi_index ? '-c' : '' - ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('.aligned') : "${meta.id}.aligned" } - publishDir = [ - path: { "${params.outdir}/preprocessing/" }, - mode: params.publish_dir_mode, - pattern: "*.{bai,csi}", - saveAs: { (params.save_bam_mapped || (params.skip_tools && params.skip_tools.split(',').contains('markduplicates'))) && (meta.size * meta.numLanes == 1) ? "mapped/${meta.patient}/${meta.id}/${it}" : null } + withName: '.*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { + ext.args = params.bam_csi_index ? '-c' : '' + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('.aligned') : "${meta.id}.aligned" } + publishDir = [ + path: { "${params.outdir}/preprocessing/" }, + mode: params.publish_dir_mode, + pattern: "*.{bai,csi}", + saveAs: { (params.save_bam_mapped || (params.skip_tools && params.skip_tools.split(',').contains('markduplicates'))) && (meta.size * meta.numLanes == 1) ? "mapped/${meta.patient}/${meta.id}/${it}" : null } - ] - } + ] } + // Second run alignment - if (params.skip_tools && !params.skip_tools.split(',').contains('second_run')){ - withName: '.*:FASTQ_ALIGN_HISAT2:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:SAMTOOLS_SORT' { - ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('.aligned_hs2') : "${meta.id}.aligned_hs2" } - publishDir = [ - path: { "${params.outdir}/preprocessing/hisat2/" }, - mode: params.publish_dir_mode, - pattern: "*.bam", - enabled: params.save_align_intermeds - ] - } - - withName: '.*:FASTQ_ALIGN_HISAT2:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { - ext.args = params.bam_csi_index ? '-c' : '' - ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('.aligned_hs2') : "${meta.id}.aligned_hs2" } - publishDir = [ - path: { "${params.outdir}/preprocessing/hisat2/${meta.patient}/${meta.id}/" }, - mode: params.publish_dir_mode, - pattern: "*.{bai,csi}", - enabled: params.save_align_intermeds - ] - } - - withName: '.*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:SAMTOOLS_FLAGSTAT' { - ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('.aligned_hs2') : "${meta.id}.aligned_hs2" } - publishDir = [ - path: { "${params.outdir}/reports/samtools/" }, - mode: params.publish_dir_mode, - pattern: "*.{bai,csi}", - enabled: params.save_align_intermeds - ] - } - - withName: '.*:ALIGN_HISAT2:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:SAMTOOLS_IDXSTATS' { - ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(input.name.tokenize('.aligned_hs2')[1]) : "${meta.id}.aligned_hs2" } - publishDir = [ - path: { "${params.outdir}/reports/samtools/${meta.patient}/${meta.id}" }, - mode: params.publish_dir_mode, - pattern: "*.idxstats", - enabled: params.save_align_intermeds - ] - } + withName: '.*:FASTQ_ALIGN_HISAT2:.*'{ + ext.when = {params.skip_tools && !params.skip_tools.split(',').contains('second_run')} + } + withName: '.*:FASTQ_ALIGN_HISAT2:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('.aligned_hs2') : "${meta.id}.aligned_hs2" } + publishDir = [ + path: { "${params.outdir}/preprocessing/hisat2/" }, + mode: params.publish_dir_mode, + pattern: "*.bam", + enabled: params.save_align_intermeds + ] + } + + withName: '.*:FASTQ_ALIGN_HISAT2:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { + ext.args = params.bam_csi_index ? '-c' : '' + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('.aligned_hs2') : "${meta.id}.aligned_hs2" } + publishDir = [ + path: { "${params.outdir}/preprocessing/hisat2/${meta.patient}/${meta.id}/" }, + mode: params.publish_dir_mode, + pattern: "*.{bai,csi}", + enabled: params.save_align_intermeds + ] + } + + withName: '.*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_FLAGSTAT' { + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('.aligned_hs2') : "${meta.id}.aligned_hs2" } + publishDir = [ + path: { "${params.outdir}/reports/samtools/" }, + mode: params.publish_dir_mode, + pattern: "*.{bai,csi}", + enabled: params.save_align_intermeds + ] } + } \ No newline at end of file diff --git a/conf/modules/annotate/annotate.config b/conf/modules/annotate/annotate.config index b6e4992..008d646 100644 --- a/conf/modules/annotate/annotate.config +++ b/conf/modules/annotate/annotate.config @@ -16,37 +16,35 @@ process { // annotate // VEP - if (params.tools && params.tools.split(',').contains('vep')) { - withName: 'ENSEMBLVEP_VEP' { - ext.args = { [ - (params.vep_dbnsfp && params.dbnsfp && !params.dbnsfp_consequence) ? "--plugin dbNSFP,${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', - (params.vep_dbnsfp && params.dbnsfp && params.dbnsfp_consequence) ? "--plugin dbNSFP,'consequence=${params.dbnsfp_consequence}',${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', - (params.vep_loftee) ? "--plugin LoF,loftee_path:/opt/conda/envs/nf-core-vep-${params.vep_version}/share/ensembl-vep-${params.vep_version}-0" : '', - (params.vep_spliceai && params.spliceai_snv && params.spliceai_indel) ? "--plugin SpliceAI,snv=${params.spliceai_snv.split("/")[-1]},indel=${params.spliceai_indel.split("/")[-1]}" : '', - (params.vep_spliceregion) ? "--plugin SpliceRegion" : '', - (params.vep_out_format) ? "--${params.vep_out_format}" : '--vcf', - (params.vep_custom_args) ?: "" - ].join(' ').trim() } - // If just VEP: _VEP.ann.vcf - ext.prefix = { vcf.baseName - ".vcf" + "_VEP.ann" } - publishDir = [ - [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reports/EnsemblVEP/${meta.variantcaller}/${meta.id}/" }, - pattern: "*html" - ], - [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, - pattern: "*{gz}" - ] + withName: 'ENSEMBLVEP_VEP' { + ext.args = { [ + (params.vep_dbnsfp && params.dbnsfp && !params.dbnsfp_consequence) ? "--plugin dbNSFP,${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', + (params.vep_dbnsfp && params.dbnsfp && params.dbnsfp_consequence) ? "--plugin dbNSFP,'consequence=${params.dbnsfp_consequence}',${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', + (params.vep_loftee) ? "--plugin LoF,loftee_path:/opt/conda/envs/nf-core-vep-${params.vep_version}/share/ensembl-vep-${params.vep_version}-0" : '', + (params.vep_spliceai && params.spliceai_snv && params.spliceai_indel) ? "--plugin SpliceAI,snv=${params.spliceai_snv.split("/")[-1]},indel=${params.spliceai_indel.split("/")[-1]}" : '', + (params.vep_spliceregion) ? "--plugin SpliceRegion" : '', + (params.vep_out_format) ? "--${params.vep_out_format}" : '--vcf', + (params.vep_custom_args) ?: "" + ].join(' ').trim() } + // If just VEP: _VEP.ann.vcf + ext.prefix = { vcf.baseName - ".vcf" + "_VEP.ann" } + publishDir = [ + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/EnsemblVEP/${meta.variantcaller}/${meta.id}/" }, + pattern: "*html" + ], + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, + pattern: "*{gz}" ] - } + ] } // ALL ANNOTATION TOOLS if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('vep') || params.tools.split(',').contains('merge'))) { - withName: "NFCORE_SAREK:SAREK:VCF_ANNOTATE_ALL:.*:(TABIX_BGZIPTABIX|TABIX_TABIX)" { + withName: ".*:VCF_ANNOTATE:.*:(TABIX_BGZIPTABIX|TABIX_TABIX)" { ext.prefix = { input.name - ".vcf" } publishDir = [ mode: params.publish_dir_mode, @@ -57,7 +55,7 @@ process { // annotate } if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('merge'))) { - withName: 'NFCORE_SAREK:SAREK:VCF_ANNOTATE_ALL:VCF_ANNOTATE_SNPEFF:TABIX_BGZIPTABIX' { + withName: '.*:VCF_ANNOTATE:VCF_ANNOTATE_SNPEFF:TABIX_BGZIPTABIX' { publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, diff --git a/conf/modules/consensus/vcf_consensus.config b/conf/modules/consensus/vcf_consensus.config index bc63ea0..6bc603d 100644 --- a/conf/modules/consensus/vcf_consensus.config +++ b/conf/modules/consensus/vcf_consensus.config @@ -15,40 +15,40 @@ process { // consensus - if (params.tools && params.tools.split(',').contains('consensus')) { + withName: "VCF2MAF" { + ext.args = { [ + "--inhibit-vep", + "--normal-id ${meta.id.split('_vs_')[1]}", + "--tumor-id ${meta.id.split('_vs_')[0]}", + "--max-subpop-af 0.0001", + "--retain-ann gnomADg_AF,MAX_AF,MAX_AF_POPS", + "--retain-fmt AD,DP,AF,GT,AU,CU,GU,TU,TAR,TIR,TOR", + params.vep_genome ? "--ncbi-build ${params.vep_genome}" : '', + meta.variantcaller == "strelka"? "--vcf-tumor-id TUMOR --vcf-normal-id NORMAL" : '', + meta.variantcaller == "mutect2"? "--vcf-tumor-id ${meta.patient}_${meta.id.split('_vs_')[0]} --vcf-normal-id ${meta.patient}_${meta.id.split('_vs_')[1]}" : '' + ].join(' ').trim() } + ext.prefix = { "${meta.id}.${meta.variantcaller}" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/maf/${meta.id}/" }, + pattern: "*{maf,maf.gz}" + ] + } - withName: 'RUN_CONSENSUS' { - ext.prefix = { "${meta.id}.consensus"} - ext.args = {"--id=${meta.id}"} - publishDir = [ + withName: 'RUN_CONSENSUS.*' { + ext.prefix = { "${meta.id}"} + ext.args = {"--id=${meta.id}"} + publishDir = [ + [ mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/consensus/${meta.patient}/${meta.id}/" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: true - ] - } - - withName: 'RUN_CONSENSUS_RESCUE_DNA' { - ext.prefix = { "${meta.id}.withRNA.consensus"} - ext.args = {"--id=${meta.id}_withRNAConsensus"} - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/consensus/${meta.patient}/${meta.id}/" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: true - ] - } - - withName: 'RUN_CONSENSUS_RESCUE_RNA' { - ext.prefix = { "${meta.id}.withDNA.consensus"} - ext.args = {"--id=${meta.id}_withDNAConsensus"} - publishDir = [ + path: { "${params.outdir}/consensus/${meta.id}/" }, + pattern: "*{vcf.gz,vcf,maf,maf.gz}" + ], + [ mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/consensus/${meta.patient}/${meta.id}/" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: true - ] - } - - } + path: { "${params.outdir}/reports/consensus/" }, + pattern: "*pdf" + ] + ] + } } diff --git a/conf/modules/normalise/vt.config b/conf/modules/normalise/vt.config index b0cf3f5..78ac2da 100644 --- a/conf/modules/normalise/vt.config +++ b/conf/modules/normalise/vt.config @@ -17,7 +17,7 @@ process { // vt withName: 'VT_DECOMPOSE'{ ext.args = "" - ext.prefix = { vcf.baseName - ".vcf.gz" + ".dec" } + ext.prefix = { vcf.baseName - ".vcf" + ".dec" } publishDir = [ [ mode: params.publish_dir_mode, @@ -27,7 +27,7 @@ process { // vt [ mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.variantcaller}/${meta.id}" }, - pattern: {"*{vcf.gz,vcf.gz.tbi}"} + pattern: {"*{vcf.gz,vcf.gz.tbi}"}, enabled: false ] ] @@ -35,7 +35,7 @@ process { // vt withName: 'VT_NORMALISE'{ ext.args = {"-n"} - ext.prefix = { vcf.baseName - ".vcf.gz" + ".norm" } + ext.prefix = { vcf.baseName - ".vcf" + ".norm" } publishDir = [ [ mode: params.publish_dir_mode, diff --git a/conf/modules/quality_control/quality_control.config b/conf/modules/quality_control/quality_control.config index af9189c..ad7c187 100644 --- a/conf/modules/quality_control/quality_control.config +++ b/conf/modules/quality_control/quality_control.config @@ -64,59 +64,107 @@ process { // quality_control saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } + } + // TODO: check that this is capturing what it should - if ((params.step == 'mapping' || params.step == 'markduplicates'|| params.step == 'prepare_recalibration'|| params.step == 'recalibrate') && (!(params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator')))) { - withName: '.*:CRAM_QC_RECAL:MOSDEPTH' { - ext.prefix = { "${meta.id}.recal" } - } + if ((params.step == 'mapping' || params.step == 'markduplicates'|| params.step == 'prepare_recalibration'|| params.step == 'recalibrate') && (!(params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator')))) { + withName: '.*:CRAM_QC_RECAL:MOSDEPTH' { + ext.prefix = { "${meta.id}.recal" } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('mosdepth')) } - withName: '.*:CRAM_QC_RECAL:SAMTOOLS_STATS' { - ext.prefix = { "${meta.id}.recal.cram" } - ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reports/samtools/${meta.id}" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } - } + } + withName: '.*:CRAM_QC_RECAL:SAMTOOLS_STATS' { + ext.prefix = { "${meta.id}.recal.cram" } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/samtools/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } - if (params.tools && params.tools.split(',').contains('vcf_qc')){ - // VCF - withName: 'BCFTOOLS_STATS' { - ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('bcftools')) } - ext.prefix = { vcf.baseName - ".vcf" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reports/bcftools/${meta.variantcaller}/${meta.id}/" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: 'VCFTOOLS_.*' { - ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('vcftools')) } - ext.prefix = { variant_file.baseName - ".vcf" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reports/vcftools/${meta.variantcaller}/${meta.id}/" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + // VCF + withName: 'BCFTOOLS_STATS' { + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('bcftools')) } + ext.prefix = { vcf.baseName - ".vcf" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/bcftools/${meta.variantcaller}/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: 'VCFTOOLS_TSTV_COUNT' { - ext.args = "--TsTv-by-count" - } + withName: 'VCFTOOLS_.*' { + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('vcftools')) } + ext.prefix = { variant_file.baseName - ".vcf" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/vcftools/${meta.variantcaller}/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: 'VCFTOOLS_TSTV_QUAL' { - ext.args = "--TsTv-by-qual" - } + withName: 'VCFTOOLS_TSTV_COUNT' { + ext.args = "--TsTv-by-count" + } - withName: 'VCFTOOLS_SUMMARY' { - ext.args = "--FILTER-summary" + withName: 'VCFTOOLS_TSTV_QUAL' { + ext.args = "--TsTv-by-qual" + } + + withName: 'VCFTOOLS_SUMMARY' { + ext.args = "--FILTER-summary" - } } + + withName: '.*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) } + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('.sorted.bam') : "${meta.id}.sorted.bam" } + publishDir = [ + path: { "${params.outdir}/reports/samtools/samtools_stats" }, + mode: params.publish_dir_mode, + pattern: "*.{stats,flagstat,idxstats}" + ] + } + + withName: '.*:FASTQ_ALIGN_HISAT2:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) } + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('.sorted.bam') : "${meta.id}.sorted.bam" } + publishDir = [ + path: { "${params.outdir}/reports/samtools/hisat2/samtools_stats" }, + mode: params.publish_dir_mode, + pattern: "*.{stats,flagstat,idxstats}" + ] + } + + withName: '.*:BAM_MARKDUPLICATES:CRAM_QC_MOSDEPTH_SAMTOOLS:SAMTOOLS_.*' { + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) } + ext.prefix = { "${meta.id}.md.cram" } + publishDir = [ + path: { "${params.outdir}/reports/samtools/markduplicates/samtools_stats" }, + mode: params.publish_dir_mode, + pattern: "*.{stats,flagstat,idxstats}" + ] + } + + withName: 'MOSDEPTH' { + ext.args = { !params.wes ? "-n --fast-mode --by 500" : ""} + ext.prefix = { + if (params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) { + "${meta.id}.sorted" + } else { + "${meta.id}.md" + } + } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('mosdepth')) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/mosdepth/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } } \ No newline at end of file diff --git a/conf/modules/variant_calling/sage.config b/conf/modules/variant_calling/sage.config index e79cc84..c478468 100644 --- a/conf/modules/variant_calling/sage.config +++ b/conf/modules/variant_calling/sage.config @@ -33,14 +33,13 @@ process { // sage publishDir = [ enabled: false ] - } withName: 'MERGE_SAGE' { ext.prefix = {"${meta.id}.sage"} publishDir = [ mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/sage/${meta.patient}/${meta.id}/" }, + path: { "${params.outdir}/variant_calling/sage/${meta.id}/" }, pattern: "*{vcf.gz,vcf.gz.tbi}" ] } @@ -48,7 +47,7 @@ process { // sage withName : 'TABIX_VC_SAGE' { publishDir = [ mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/sage/${meta.patient}/${meta.id}/" }, + path: { "${params.outdir}/variant_calling/sage/${meta.id}/" }, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -59,7 +58,7 @@ process { // sage mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/" }, pattern: "*{vcf.gz,vcf.gz.tbi}", - saveAs: { meta.num_intervals > 1 ? null : "sage/${meta.patient}/${meta.id}/${it}" }, + saveAs: { meta.num_intervals > 1 ? null : "sage/${meta.id}/${it}" }, enabled: true ] } diff --git a/conf/modules/variant_calling/strelka.config b/conf/modules/variant_calling/strelka.config index 14bfcea..ae9465f 100644 --- a/conf/modules/variant_calling/strelka.config +++ b/conf/modules/variant_calling/strelka.config @@ -24,7 +24,8 @@ process { // strelka mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/" }, pattern: "*{vcf.gz,vcf.gz.tbi}", - saveAs: { meta.num_intervals > 1 ? null : "strelka/${meta.id}/${it}" } + saveAs: { meta.num_intervals > 1 ? null : "strelka/${meta.id}/${it}" }, + enabled: false ] } @@ -32,7 +33,8 @@ process { // strelka publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/strelka/${meta.id}/" }, - pattern: "*{vcf.gz,vcf.gz.tbi}" + pattern: "*{vcf.gz,vcf.gz.tbi}", + enabled: true ] } @@ -40,10 +42,6 @@ process { // strelka ext.prefix = {"${meta.id}.strelka.variants"} } - withName: 'MERGE_STRELKA_GENOME' { - ext.prefix = {"${meta.id}.strelka.genome"} - } - // PAIR_VARIANT_CALLING withName: 'MERGE_STRELKA_INDELS' { ext.prefix = {"${meta.id}.strelka.somatic_indels"} From c863626420a143f591bd949a175bf12b77dfddb2 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Sun, 10 Sep 2023 22:52:18 +0100 Subject: [PATCH 46/56] Adjusted extract_reads_id for new subworkflow --- .../local/{extract_reads.nf => extract_reads_id/main.nf} | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) rename modules/local/{extract_reads.nf => extract_reads_id/main.nf} (82%) diff --git a/modules/local/extract_reads.nf b/modules/local/extract_reads_id/main.nf similarity index 82% rename from modules/local/extract_reads.nf rename to modules/local/extract_reads_id/main.nf index 8a49248..aec5061 100644 --- a/modules/local/extract_reads.nf +++ b/modules/local/extract_reads_id/main.nf @@ -1,15 +1,14 @@ -process EXTRACT_READ_IDS { +process SAMTOOLS_EXTRACT_READ_IDS { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "bioconda::samtools=1.15.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" input: - tuple val(meta), path(input), path(index) - tuple val(meta), path(bed) + tuple val(meta), path(input), path(index), path(bed) output: tuple val(meta), path("*_IDs_all.txt") , emit: read_ids From 8c7735655b327f427a95e7f23d62e779f065a46b Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Sun, 10 Sep 2023 22:53:03 +0100 Subject: [PATCH 47/56] Changed variable name --- subworkflows/local/channel_variant_calling_create_csv/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/channel_variant_calling_create_csv/main.nf b/subworkflows/local/channel_variant_calling_create_csv/main.nf index f3a43ce..361ca6e 100644 --- a/subworkflows/local/channel_variant_calling_create_csv/main.nf +++ b/subworkflows/local/channel_variant_calling_create_csv/main.nf @@ -4,12 +4,12 @@ workflow CHANNEL_VARIANT_CALLING_CREATE_CSV { take: - vcf_to_annotate // channel: [mandatory] meta, vcf + vcf_to_csv // channel: [mandatory] meta, vcf csv_name main: // Creating csv files to restart from this step - vcf_to_annotate.collectFile(keepHeader: true, skip: 1,sort: true, storeDir: "${params.outdir}/csv"){ meta, vcf -> + vcf_to_csv.collectFile(keepHeader: true, skip: 1,sort: true, storeDir: "${params.outdir}/csv"){ meta, vcf -> patient = meta.patient sample = meta.id variantcaller = meta.variantcaller From 2457f1a385b067c0e50c8eb6b4d6d990f4c42134 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Sun, 10 Sep 2023 22:53:33 +0100 Subject: [PATCH 48/56] Created consensus csv generation subworkflow --- .../channel_consensus_create_csv/main.nf | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 subworkflows/local/channel_consensus_create_csv/main.nf diff --git a/subworkflows/local/channel_consensus_create_csv/main.nf b/subworkflows/local/channel_consensus_create_csv/main.nf new file mode 100644 index 0000000..fe2c741 --- /dev/null +++ b/subworkflows/local/channel_consensus_create_csv/main.nf @@ -0,0 +1,19 @@ +// +// CHANNEL_CONSENSUS_CREATE_CSV +// + +workflow CHANNEL_CONSENSUS_CREATE_CSV { + take: + maf_to_csv // channel: [mandatory] meta, maf, variantcaller + csv_name + + main: + // Creating csv files to restart from this step + maf_to_csv.collectFile(keepHeader: true, skip: 1,sort: true, storeDir: "${params.outdir}/csv"){ meta, maf, variantcaller -> + patient = meta.patient + sample = meta.id + status = meta.status + maf = "${params.outdir}/consensus/${variantcaller}/${meta.id}/${maf.getName()}" + ["${csv_name}.csv", "patient,sample,status,variantcaller,maf\n${patient},${sample},${status},${variantcaller},${maf}\n"] + } +} \ No newline at end of file From 1f6abc9463f30c25336a545140fadd4198629e46 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Sun, 10 Sep 2023 22:54:04 +0100 Subject: [PATCH 49/56] Fixed HISAT2 in prepare_genome --- subworkflows/local/prepare_genome/main.nf | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 54fcad9..0e6631f 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -139,15 +139,20 @@ workflow PREPARE_GENOME { if (params.splicesites) { ch_splicesites = Channel.fromPath(params.splicesites).collect() } else{ - ch_splicesites = HISAT2_EXTRACTSPLICESITES ( ch_gtf ).txt + HISAT2_EXTRACTSPLICESITES ( ch_gtf.map{ it -> [ [ id:'null' ], it ]} ) + ch_splicesites = HISAT2_EXTRACTSPLICESITES.out.txt versions = versions.mix(HISAT2_EXTRACTSPLICESITES.out.versions) - } if (params.hisat2_index) { ch_hisat2_index = Channel.fromPath(params.hisat2_index).collect() } else{ - ch_hisat2_index = HISAT2_BUILD ( fasta, ch_gtf, ch_splicesites ).index + HISAT2_BUILD ( + fasta, + ch_gtf.map{ it -> [ [ id:'null' ], it ]}, + ch_splicesites + ) + ch_hisat2_index = HISAT2_BUILD.out.index versions = versions.mix(HISAT2_BUILD.out.versions) } } else { From 2f3548636fdb1dc8cc7a79fd1d6ef1138ce612a6 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Sun, 10 Sep 2023 22:54:28 +0100 Subject: [PATCH 50/56] Added code to extract dna vcfs from input_sample csv --- subworkflows/local/prepare_second_run/main.nf | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/prepare_second_run/main.nf b/subworkflows/local/prepare_second_run/main.nf index d958bf6..a085bbe 100644 --- a/subworkflows/local/prepare_second_run/main.nf +++ b/subworkflows/local/prepare_second_run/main.nf @@ -22,6 +22,8 @@ workflow BAM_EXTRACT_READS_HISAT2_ALIGN { dict hisat2_index splicesites + dna_consensus_maf + dna_varcall_mafs main: versions = Channel.empty() @@ -31,8 +33,20 @@ workflow BAM_EXTRACT_READS_HISAT2_ALIGN { 'prepare_recalibration', 'recalibrate', 'variant_calling', 'normalize', 'consensus', 'second_run'] && !(params.skip_tools && params.skip_tools.split(",").contains("second_run"))) { if (params.step == 'second_run') { - // [meta, cram, crai, maf] - cram_to_realign = input_sample + input_elements_status = input_sample.branch{ + norealign: it[0].status == 1 + realign: it[0].status == 2 || it[0].status == 0 + } + input_elements_status.norealign.dump(tag:"input_elements_status.norealign") + dna_mafs = input_elements_status.norealign.map{meta, vcf -> [meta + [ consensus: meta.variantcaller ==~ /(\S+)?(?i)consensus(\S+)/ ], vcf]} + dna_mafs_consensus = dna_mafs.branch{ + isconsensus: it[0].consensus == true + noconsensus: it[0].consensus == false + } + dna_consensus_maf = dna_mafs_consensus.isconsensus + dna_varcall_mafs = dna_mafs_consensus.noconsensus + // [meta, cram, crai, maf] (RNA dna NORMAL) + cram_to_realign = input_elements_status.realign // TODO convert to CRAM or/and MERGE if necessary // TODO Merge alignments if applicable // MERGE_ALIGN(previous_alignment.map{meta, cram, crai, maf -> [meta, cram]}) @@ -80,8 +94,10 @@ workflow BAM_EXTRACT_READS_HISAT2_ALIGN { } emit: - bam_mapped = bam_mapped - versions = versions // channel: [ versions.yml ] + bam_mapped = bam_mapped + dna_consensus_maf = dna_consensus_maf + dna_varcall_mafs = dna_varcall_mafs + versions = versions // channel: [ versions.yml ] From c4e49b8f4d7343dbf3d1346a80db36df066b778a Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Sun, 10 Sep 2023 22:55:35 +0100 Subject: [PATCH 51/56] Added option for vcf/maf input in consensus and csv channel generation --- subworkflows/local/vcf_consensus/main.nf | 57 ++++++++++++++++++------ 1 file changed, 44 insertions(+), 13 deletions(-) diff --git a/subworkflows/local/vcf_consensus/main.nf b/subworkflows/local/vcf_consensus/main.nf index 5167d32..c505de2 100644 --- a/subworkflows/local/vcf_consensus/main.nf +++ b/subworkflows/local/vcf_consensus/main.nf @@ -6,6 +6,9 @@ include { VCF2MAF } from '../../../modules/local/vcf2maf/vcf2maf/main' include { RUN_CONSENSUS } from '../../../modules/local/consensus/main' include { RUN_CONSENSUS as RUN_CONSENSUS_RESCUE } from '../../../modules/local/consensus/main' +// Create samplesheets to restart from consensus +include { CHANNEL_CONSENSUS_CREATE_CSV } from '../channel_consensus_create_csv/main' +include { CHANNEL_CONSENSUS_CREATE_CSV as CHANNEL_RESCUE_CREATE_CSV } from '../channel_consensus_create_csv/main' workflow VCF_CONSENSUS { take: @@ -15,6 +18,7 @@ workflow VCF_CONSENSUS { previous_maf_consensus_dna // results already done to avoid a second run when rna filterig previous_mafs_status_dna // results already done to avoid a second run when rna filterig input_sample + second_run main: versions = Channel.empty() @@ -23,18 +27,25 @@ workflow VCF_CONSENSUS { mafs_from_varcal_dna = Channel.empty() consensus_maf = Channel.empty() - if (params.step in ['mapping', 'markduplicates', 'splitncigar', + if ((params.step in ['mapping', 'markduplicates', 'splitncigar', 'prepare_recalibration', 'recalibrate', 'variant_calling', - 'normalise', 'consensus'] && (!(params.skip_tools && params.skip_tools.split(",").contains("consensus")))) { + 'normalise', 'consensus'] && + (!(params.skip_tools && params.skip_tools.split(",").contains("consensus")))) || + second_run) { if (params.step == 'consensus') vcf_to_consensus = input_sample + + vcf_to_consensus_type = vcf_to_consensus.branch{ + vcf: it[0].data_type == "vcf" + maf: it[0].data_type == "maf" + } // First we transform the maf to MAF - VCF2MAF(vcf_to_consensus.map{meta, vcf, tbi -> [meta, vcf]}, + VCF2MAF(vcf_to_consensus_type.vcf.map{meta, vcf, tbi -> [meta, vcf]}, fasta) - maf_to_consensus = VCF2MAF.out.maf + maf_to_consensus = VCF2MAF.out.maf.mix(vcf_to_consensus_type.maf) versions = versions.mix(VCF2MAF.out.versions) - maf_to_consensus.dump(tag:"maf_to_consensus") +// maf_to_consensus.dump(tag:"maf_to_consensus") // count number of callers to generate groupKey maf_to_consensus = maf_to_consensus.map{ meta, maf -> def toolsllist = tools.split(',') @@ -51,12 +62,10 @@ workflow VCF_CONSENSUS { maf, meta.variantcaller ]} // groupKey should avoid the groupTuple wait but it does not seem to work atm .groupTuple() // makes the whole pipeline wait for all processes to finish - maf_to_consensus.dump(tag:"maf_to_consensus1") // Run consensus on VCF with same id RUN_CONSENSUS ( maf_to_consensus ) consensus_maf = RUN_CONSENSUS.out.maf // 1 consensus_maf from all callers - consensus_maf.dump(tag:'consensus_maf0') // Separate DNA from RNA // VCFs from variant calling mafs_from_varcal = maf_to_consensus.branch{ @@ -80,6 +89,18 @@ workflow VCF_CONSENSUS { maf_from_consensus_dna = maf_from_consensus.dna.map{meta, maf -> [meta, maf, ['ConsensusDNA']]} mafs_from_varcal_dna = mafs_from_varcal.dna } + maf_from_consensus_dna + .mix(maf_from_consensus_rna) + .mix(mafs_from_varcal_dna) + .mix(mafs_from_varcal_rna).transpose().dump(tag:'consensus') + CHANNEL_CONSENSUS_CREATE_CSV( + maf_from_consensus_dna + .mix(maf_from_consensus_rna) + .mix(mafs_from_varcal_dna) + .mix(mafs_from_varcal_rna) + .transpose(), + "consensus" + ) // RESCUE STEP: cross dna / rna for a crossed second consensus if (!(params.skip_tools && params.skip_tools.split(',').contains('rescue'))) { @@ -128,8 +149,8 @@ workflow VCF_CONSENSUS { [meta, rna[2] + dna[2], rna[3] + dna[3]] } - mafs_dna_crossed_with_rna_rescue.dump(tag:"mafs_dna_crossed_with_rna_rescue") - mafs_rna_crossed_with_dna_rescue.dump(tag:"mafs_rna_crossed_with_dna_rescue") +// mafs_dna_crossed_with_rna_rescue.dump(tag:"mafs_dna_crossed_with_rna_rescue") +// mafs_rna_crossed_with_dna_rescue.dump(tag:"mafs_rna_crossed_with_dna_rescue") RUN_CONSENSUS_RESCUE ( mafs_dna_crossed_with_rna_rescue.mix(mafs_rna_crossed_with_dna_rescue) ) maf_from_rescue = RUN_CONSENSUS_RESCUE.out.maf.branch{ @@ -137,11 +158,21 @@ workflow VCF_CONSENSUS { rna: it[0].status == 2 } - maf_from_consensus_dna = maf_from_rescue.dna - maf_from_consensus_rna = maf_from_rescue.rna + maf_from_consensus_dna = maf_from_rescue.dna.map{meta, maf -> [meta, maf, ['ConsensusDNA']]} + maf_from_consensus_rna = maf_from_rescue.rna.map{meta, maf -> [meta, maf, ['ConsensusRNA']]} consensus_maf = maf_from_consensus_dna.mix(maf_from_consensus_rna) - consensus_maf.dump(tag:'consensus_maf1') - + maf_from_consensus_dna + .mix(maf_from_consensus_rna) + .mix(mafs_from_varcal_dna) + .mix(mafs_from_varcal_rna).transpose().dump(tag:'rescued') + CHANNEL_RESCUE_CREATE_CSV( + maf_from_consensus_dna + .mix(maf_from_consensus_rna) + .mix(mafs_from_varcal_dna) + .mix(mafs_from_varcal_rna) + .transpose(), + "rescued" + ) } } From fa408bcb2e04a94da8944c4423c0f84c86b95671 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Sun, 10 Sep 2023 22:56:40 +0100 Subject: [PATCH 52/56] Added code for input processing if step second_run --- workflows/rnadnavar.nf | 43 ++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/workflows/rnadnavar.nf b/workflows/rnadnavar.nf index 58f5679..05a0c9f 100644 --- a/workflows/rnadnavar.nf +++ b/workflows/rnadnavar.nf @@ -60,12 +60,11 @@ if (params.input) { } else { ch_from_samplesheet = params.build_only_index ? Channel.empty() : Channel.fromSamplesheet("input_restart") } - // Format samplesheet channel input_sample = ch_from_samplesheet - .map{ meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller -> + .map{ meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller, maf -> // generate patient_sample key to group lanes together - [ meta.patient + meta.sample, [meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller] ] + [ meta.patient + meta.sample, [meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller, maf] ] } .tap{ ch_with_patient_sample } // save the channel .groupTuple() //group by patient_sample to get all lanes @@ -76,7 +75,7 @@ input_sample = ch_from_samplesheet .combine(ch_with_patient_sample, by: 0) // for each entry add numLanes .map { patient_sample, num_lanes, ch_items -> - (meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller) = ch_items + (meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller, maf) = ch_items if (meta.lane && fastq_2) { meta = meta + [id: "${meta.sample}-${meta.lane}".toString()] def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' @@ -93,6 +92,26 @@ input_sample = ch_from_samplesheet else { error("Samplesheet contains fastq files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations") } + // start for second run + } else if ((maf || vcf) && params.step=="second_run"){ + if (meta.lane == null) meta.lane = "LX" + meta = meta + [id: "${meta.sample}-${meta.lane}-realign".toString()] + def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' + def read_group = "\"@RG\\tID:${meta.sample}_${meta.lane}_realign\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" + if (meta.status >= 2) { // STAR does not need '@RG' + read_group = "ID:${meta.sample}_${meta.lane}_realign ${CN}PU:${meta.lane} SM:${meta.patient}_${meta.sample} LB:${meta.sample} DS:${params.fasta} PL:${params.seq_platform}" + } + if (meta.status >= 2 || meta.status==0){ // these are the files that will go through realignment + if (cram) return [ meta + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'cram', size: 1], cram, crai, maf ] + else if (bam) return [ meta + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'bam', size: 1], bam, bai, maf ] + else { + error("Combination error")} + } else if (meta.status == 1){ + + return [meta + [data_type: 'maf', variantcaller: variantcaller ?: ''], maf] + + } + // start from BAM } else if (meta.lane && bam) { @@ -103,7 +122,7 @@ input_sample = ch_from_samplesheet def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' def read_group = "\"@RG\\tID:${meta.sample}_${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" if (meta.status >= 2) { // STAR does not need '@RG' - read_group = "ID:${meta.sample}.${meta.lane} ${CN}PU:${meta.lane} SM:${meta.patient}_${meta.sample} LB:${meta.sample} DS:${params.fasta} PL:${params.seq_platform}" + read_group = "ID:${meta.sample}_${meta.lane} ${CN}PU:${meta.lane} SM:${meta.patient}_${meta.sample} LB:${meta.sample} DS:${params.fasta} PL:${params.seq_platform}" } meta = meta + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'bam', size: 1] @@ -262,7 +281,7 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // // Build the genome index and other reference files -include { PREPARE_REFERENCE_AND_INTERVALS } from '../subworkflows/local/prepare_reference_and_intervals' +include { PREPARE_REFERENCE_AND_INTERVALS } from '../subworkflows/local/prepare_reference_and_intervals/main' // Download annotation cache if needed include { ENSEMBLVEP_DOWNLOAD } from '../modules/nf-core/ensemblvep/download/main' @@ -270,13 +289,13 @@ include { ENSEMBLVEP_DOWNLOAD } from '../modules/nf-core/ensemblve include { BAM_ALIGN } from '../subworkflows/local/bam_align/main' // Core subworkflows of the pipeline -include { CORE_RUN } from '../subworkflows/local/core_workflow_pass' -include { CORE_RUN as SECOND_RUN } from '../subworkflows/local/core_workflow_pass' - -// Filtering -include { PREPARE_SECOND_RUN } from '../subworkflows/local/prepare_second_run' -include { FILTERING_RNA } from '../subworkflows/local/rna_filtering' +include { BAM_VARIANT_CALLING_PRE_POST_PROCESSING } from '../subworkflows/local/bam_variant_calling_pre_post_processing/main' +// Second run +include { BAM_EXTRACT_READS_HISAT2_ALIGN as PREPARE_SECOND_RUN } from '../subworkflows/local/prepare_second_run/main' +include { BAM_VARIANT_CALLING_PRE_POST_PROCESSING as SECOND_RUN } from '../subworkflows/local/bam_variant_calling_pre_post_processing/main' +//include { FILTERING_RNA } from '../subworkflows/local/rna_filtering' +// // // MODULE: Installed directly from nf-core/modules // From 3fee83a6e68ca943142c4e30035a885d18891322 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Sun, 10 Sep 2023 22:57:32 +0100 Subject: [PATCH 53/56] Fixed subworkflows until second_run --- workflows/rnadnavar.nf | 102 ++++++++++++++++++++++++++++------------- 1 file changed, 69 insertions(+), 33 deletions(-) diff --git a/workflows/rnadnavar.nf b/workflows/rnadnavar.nf index 05a0c9f..c6951d0 100644 --- a/workflows/rnadnavar.nf +++ b/workflows/rnadnavar.nf @@ -366,40 +366,76 @@ workflow RNADNAVAR { versions = versions.mix(ENSEMBLVEP_DOWNLOAD.out.versions) } -// STEP 0: Build reference and indices if needed - PREPARE_REFERENCE_AND_INTERVALS() - versions = versions.mix(PREPARE_REFERENCE_AND_INTERVALS.out.versions) - - // Reference and intervals variables - fasta = PREPARE_REFERENCE_AND_INTERVALS.out.fasta - fasta_fai = PREPARE_REFERENCE_AND_INTERVALS.out.fasta_fai - dict = PREPARE_REFERENCE_AND_INTERVALS.out.dict - germline_resource = PREPARE_REFERENCE_AND_INTERVALS.out.germline_resource - germline_resource_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.germline_resource_tbi - intervals = PREPARE_REFERENCE_AND_INTERVALS.out.intervals - intervals_for_preprocessing = PREPARE_REFERENCE_AND_INTERVALS.out.intervals_for_preprocessing - // specific for variant calling - intervals_bed_combined = PREPARE_REFERENCE_AND_INTERVALS.out.intervals_bed_combined - intervals_bed_gz_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.intervals_bed_gz_tbi - dbsnp = PREPARE_REFERENCE_AND_INTERVALS.out.dbsnp - dbsnp_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.dbsnp_tbi - pon = PREPARE_REFERENCE_AND_INTERVALS.out.pon - pon_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.pon_tbi - germline_resource = PREPARE_REFERENCE_AND_INTERVALS.out.germline_resource - germline_resource_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.germline_resource_tbi - - + // STEP 0: Build reference and indices if needed + PREPARE_REFERENCE_AND_INTERVALS() + versions = versions.mix(PREPARE_REFERENCE_AND_INTERVALS.out.versions) + + // Reference and intervals variables + fasta = PREPARE_REFERENCE_AND_INTERVALS.out.fasta + fasta_fai = PREPARE_REFERENCE_AND_INTERVALS.out.fasta_fai + dict = PREPARE_REFERENCE_AND_INTERVALS.out.dict + germline_resource = PREPARE_REFERENCE_AND_INTERVALS.out.germline_resource + germline_resource_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.germline_resource_tbi + intervals = PREPARE_REFERENCE_AND_INTERVALS.out.intervals + intervals_for_preprocessing = PREPARE_REFERENCE_AND_INTERVALS.out.intervals_for_preprocessing + // specific for variant calling + intervals_bed_combined = PREPARE_REFERENCE_AND_INTERVALS.out.intervals_bed_combined + intervals_bed_gz_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.intervals_bed_gz_tbi + intervals_bed_gz_tbi_combined = PREPARE_REFERENCE_AND_INTERVALS.out.intervals_bed_gz_tbi_combined + dbsnp = PREPARE_REFERENCE_AND_INTERVALS.out.dbsnp + dbsnp_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.dbsnp_tbi + pon = PREPARE_REFERENCE_AND_INTERVALS.out.pon + pon_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.pon_tbi + known_sites_indels = PREPARE_REFERENCE_AND_INTERVALS.out.known_sites_indels + known_sites_indels_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.known_sites_indels_tbi + known_sites_snps = PREPARE_REFERENCE_AND_INTERVALS.out.known_sites_snps + known_sites_snps_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.known_sites_snps_tbi + + intervals_and_num_intervals = intervals.map{ interval, num_intervals -> + if ( num_intervals < 1 ) [ [], num_intervals ] + else [ interval, num_intervals ] + } // STEP 1: ALIGNMENT PREPROCESSING - BAM_ALIGN( - PREPARE_REFERENCE_AND_INTERVALS.out.bwa, - PREPARE_REFERENCE_AND_INTERVALS.out.bwamem2, - PREPARE_REFERENCE_AND_INTERVALS.out.dragmap, - PREPARE_REFERENCE_AND_INTERVALS.out.star_index, - PREPARE_REFERENCE_AND_INTERVALS.out.gtf, - input_sample - ) - reports = reports.mix(BAM_ALIGN.out.reports) - versions = versions.mix(BAM_ALIGN.out.versions) + BAM_ALIGN( + PREPARE_REFERENCE_AND_INTERVALS.out.bwa, + PREPARE_REFERENCE_AND_INTERVALS.out.bwamem2, + PREPARE_REFERENCE_AND_INTERVALS.out.dragmap, + PREPARE_REFERENCE_AND_INTERVALS.out.star_index, + PREPARE_REFERENCE_AND_INTERVALS.out.gtf, + input_sample + ) + + reports = reports.mix(BAM_ALIGN.out.reports) + versions = versions.mix(BAM_ALIGN.out.versions) + // 5 MAIN STEPS: GATK PREPROCESING - VARIANT CALLING - NORMALIZATION - CONSENSUS - ANNOTATION + BAM_VARIANT_CALLING_PRE_POST_PROCESSING( + input_sample, // input from CSV if applicable + BAM_ALIGN.out.bam_mapped, // input from mapping + BAM_ALIGN.out.cram_mapped, // input from mapping + fasta, // fasta reference file + fasta_fai, // fai for fasta file + dict, // + dbsnp, + dbsnp_tbi, + pon, + pon_tbi, + known_sites_indels, + known_sites_indels_tbi, + germline_resource, + germline_resource_tbi, + intervals, + intervals_for_preprocessing, + intervals_bed_gz_tbi, + intervals_bed_combined, + intervals_and_num_intervals, + intervals_bed_gz_tbi_combined, + null, // to repeat rescue consensus TODO: is this the best strategy? + null, // to repeat rescue consensus + false // is second run + ) + + reports = reports.mix(BAM_VARIANT_CALLING_PRE_POST_PROCESSING.out.reports) + versions = versions.mix(BAM_VARIANT_CALLING_PRE_POST_PROCESSING.out.versions) version_yaml = Channel.empty() From 54c233706b8fd7eea9656641f1a01ec7d63c0054 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Sun, 10 Sep 2023 22:57:38 +0100 Subject: [PATCH 54/56] Fixed subworkflows until second_run --- subworkflows/local/prepare_second_run.nf | 217 ----------------------- 1 file changed, 217 deletions(-) delete mode 100644 subworkflows/local/prepare_second_run.nf diff --git a/subworkflows/local/prepare_second_run.nf b/subworkflows/local/prepare_second_run.nf deleted file mode 100644 index fa1ee31..0000000 --- a/subworkflows/local/prepare_second_run.nf +++ /dev/null @@ -1,217 +0,0 @@ -// -// -// - -include { BAM_MERGE_INDEX_SAMTOOLS as MERGE_ALIGN } from './bam_merge_index_samtools/main' -include { MAF2BED } from '../../modules/local/maf2bed' -include { EXTRACT_READ_IDS } from '../../modules/local/extract_reads' -include { BAM_CONVERT_SAMTOOLS as ALIGN2FQ } from '../nf-core/alignment_to_fastq' -include { ALIGN_HISAT2 } from '../nf-core/align_hisat2' -include { GATK4_FILTERSAMREADS } from '../../modules/nf-core/modules/gatk4/filtersamreads/main' - - -workflow PREPARE_SECOND_RUN { - take: - ch_input_sample - maf // for consensus - bwa_bams // for realigment - star_bams // for realigment - fasta - fasta_fai - dict - hisat2_index - splicesites - - main: - ch_reports = Channel.empty() - ch_versions = Channel.empty() - - if (params.step in ['mapping', 'markduplicates', 'splitncigar', 'prepare_recalibration', 'recalibrate', 'variant_calling', 'normalize', 'consensus', 'second_pass'] ) { - - // RNA specific filtering (2nd PASS) - this is fast BUT it increases the length of the pipeline considerably - star_bams.dump(tag:"star_bams") - bwa_bams.dump(tag:"bwa_bams") - // 1 We take the previous aligned reads with star for tumor RNA and DNA normal - bwa_bams.branch{ - normal: it[0].status == 0 - tumor: it[0].status == 1 - }.set{previous_dna_alignment} - // we only need normals - dna tumour will NOT be realigned - previous_normal_alignment = previous_dna_alignment.normal.groupTuple() - // 2. Group them and merge if applicable - previous_alignment = star_bams.map{meta, bam, bai -> [meta, [bam], bai]} - .mix(previous_normal_alignment) - previous_alignment.dump(tag: 'previous_alignment') - MERGE_ALIGN(previous_alignment.map{meta, bam, bai -> [meta, bam]}) - ch_versions = ch_versions.mix(MERGE_ALIGN.out.versions) - // TODO make indexing optional if bai already there - MERGE_ALIGN.out.bam_bai.dump(tag:'MERGE_ALIGN.out.bam_bai') - previous_bams = MERGE_ALIGN.out.bam_bai - .map{meta, bam, bai -> [ - [ - id:meta.sample, - data_type:"bam", - patient:meta.patient, - sample:meta.sample, - read_group:meta.read_group, - status:meta.status - ], - bam, bai - ] - } - previous_bams.dump(tag:"[STEP7: FILTERING] bams for realignment") - - // STEP A: Extract allele coordinates from consensus - MAF2BED(maf) - bed = MAF2BED.out.bed - bed.branch{ - dna: it[0].status == 1 // Bed from DNA tumour - rna: it[0].status == 2 // Bed from RNA tumour - }.set{bed_status} - bed.dump(tag:"[STEP8: RNA_FILTERING] bed1") - // Match metadata for tumor and normal samples - this file comes from variant calling - bed_normal = bed_status.rna.map{meta, bed -> - def (tumor_id, normal_id) = meta.rna_id.split( '_vs_' ) - [[ - id: normal_id + "_2pass", - patient: meta.patient, - status: 0 // Had status from tumour because it comes from the variant calling consensus - ], - bed]} - bed_tumor = bed_status.rna.map{meta, bed -> - def (tumor_id, normal_id) = meta.rna_id.split( '_vs_' ) - [[ - id: tumor_id + "_2pass" , - patient: meta.patient, - status: meta.status - ], - bed]} - bed_normal.dump(tag:"bed_normal") - bed_tumor.dump(tag:"bed_tumor") - bed = bed_normal.mix(bed_tumor).map{meta, bed -> [meta.id, meta, bed]} - bed.dump(tag:"[STEP8: RNA_FILTERING] bed2") - - // STEP B: Extract reads from BAMs - read_to_cross = previous_bams.map{meta, bam, bai -> - [ meta.id+ "_2pass", - [id: meta.id + "_2pass", - patient: meta.patient, - status: meta.status, - read_group: meta.read_group], - bam,bai] } - read_to_cross.dump(tag:'read_to_cross') - - crossed_reads_bed = read_to_cross.cross(bed) - // We cross the values to make sure we match per id the bed file with the bams - bed_to_filter = crossed_reads_bed.map{ reads, bed -> - def rg_id = ( reads[1].read_group =~ /ID:(\S+?)(\\t|\s|\"|$)/ )[0][1] - def rg_pu = ( reads[1].read_group =~ /PU:(\S+?)(\\t|\s|\"|$)/ )[0][1] - def rg_sm = ( reads[1].read_group =~ /SM:(\S+?)(\\t|\s|\"|$)/ )[0][1] - def rg_lb = ( reads[1].read_group =~ /LB:(\S+?)(\\t|\s|\"|$)/ )[0][1] - def rg_pl = ( reads[1].read_group =~ /PL:(\S+?)(\\t|\s|\"|$)/ )[0][1] - def meta = [:] - meta.id = bed[1].id - meta.patient = bed[1].patient - meta.status = bed[1].status - meta.sample = rg_sm + "_2pass" - meta.read_group = reads[1].read_group - meta.rg_id = rg_id - meta.rg_pu = rg_pu - meta.rg_sm = rg_sm + "_2pass" - meta.rg_lb = rg_lb - meta.rg_pl = rg_pl - [meta, bed[2]]} - reads_to_filter = crossed_reads_bed.map{ reads, bed -> - def rg_id = ( reads[1].read_group =~ /ID:(\S+?)(\\t|\s|\"|$)/ )[0][1] - def rg_pu = ( reads[1].read_group =~ /PU:(\S+?)(\\t|\s|\"|$)/ )[0][1] - def rg_sm = ( reads[1].read_group =~ /SM:(\S+?)(\\t|\s|\"|$)/ )[0][1] - def rg_lb = ( reads[1].read_group =~ /LB:(\S+?)(\\t|\s|\"|$)/ )[0][1] - def rg_pl = ( reads[1].read_group =~ /PL:(\S+?)(\\t|\s|\"|$)/ )[0][1] - def meta = [:] - meta.id = reads[1].id - meta.patient = reads[1].patient - meta.status = reads[1].status - meta.sample = rg_sm + "_2pass" - meta.read_group = reads[1].read_group - meta.rg_id = rg_id - meta.rg_pu = rg_pu - meta.rg_sm = rg_sm + "_2pass" - meta.rg_lb = rg_lb - meta.rg_pl = rg_pl - [meta, reads[2], reads[3]]} - reads_to_filter.dump(tag:"[STEP8: RNA_FILTERING] reads_to_filter") - bed_to_filter.dump(tag:"[STEP8: RNA_FILTERING] bed_to_filter") - EXTRACT_READ_IDS(reads_to_filter, bed_to_filter) - read_ids = EXTRACT_READ_IDS.out.read_ids - read_ids.dump(tag:"[STEP8: RNA_FILTERING] read_ids") - - //STEP C: Extract reads according to selected read ids - bam_read = reads_to_filter.join(read_ids) - GATK4_FILTERSAMREADS(bam_read, fasta) // bam -> filtered_bam - GATK4_FILTERSAMREADS.out.bam.dump(tag:'[STEP8: RNA_FILTERING] filtered_bam') - - //STEP D: Get FQs for re-alignment - // bam -> fq - ALIGN2FQ(GATK4_FILTERSAMREADS.out.bam, - [ [ id:"fasta" ], [] ], // fasta - [ [ id:'null' ], [] ], // fasta_fai - false) - ALIGN2FQ.out.reads.dump(tag:'[STEP8: RNA_FILTERING] fastq_for_realignment') - - //STEP E: HISAT2 re-alignment - reads_to_realign = ALIGN2FQ.out.reads.map{ meta, reads -> - // This can throw a concurrent error when submitting a lot of samples -// read_files = reads.sort{ a,b -> a.getName().tokenize('.')[0] <=> b.getName().tokenize('.')[0] }.collate(2) - [[data_type:"fastq"] + meta, reads] - } - reads_to_realign.dump(tag:'[STEP8: RNA_FILTERING] reads_to_realign') - // HISAT2 realignment - ALIGN_HISAT2 ( - reads_to_realign, - hisat2_index, - splicesites - ) - ch_genome_bam = ALIGN_HISAT2.out.bam - ch_genome_bam_index = ALIGN_HISAT2.out.bai - ch_samtools_stats = ALIGN_HISAT2.out.stats - ch_samtools_flagstat = ALIGN_HISAT2.out.flagstat - ch_samtools_idxstats = ALIGN_HISAT2.out.idxstats - ch_hisat2_multiqc = ALIGN_HISAT2.out.summary - ch_genome_bam.join(ch_genome_bam_index).dump(tag:"HERE") - ch_genome_bam.join(ch_genome_bam_index).map{meta, bam, bai -> - [[data_type:"bam", - id:meta.id, - patient: meta.patient, - sample:meta.sample, - status:meta.status], - bam, bai] - }.dump(tag:"HERE2") - ch_genome_bam_for_md = ch_genome_bam.join(ch_genome_bam_index) - .map{meta, bam, bai -> - [[data_type:"bam", - id:meta.id, - patient: meta.patient, - sample:meta.sample, - status:meta.status], - bam, bai] - } - ch_genome_bam.dump(tag:'[STEP8: RNA_FILTERING] ch_genome_bam') - ch_genome_bam_index.dump(tag:'[STEP8: RNA_FILTERING] ch_genome_bam_index') -// ch_genome_bam_for_md = ch_genome_bam.mix(ch_genome_bam_index) - - ch_genome_bam_for_md.dump(tag:'[STEP8: RNA_FILTERING] ch_genome_bam_for_md') - } - - emit: - ch_bam_mapped = ch_genome_bam_for_md - versions = ch_versions // channel: [ versions.yml ] - reports = ch_reports - - - - - - - - -} \ No newline at end of file From 528b2695a470797af76de9963c32d41808861d4c Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Sun, 10 Sep 2023 22:58:29 +0100 Subject: [PATCH 55/56] Modified picard/filtersamreads to return bai as well --- modules.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules.json b/modules.json index cfa52f9..fc15267 100644 --- a/modules.json +++ b/modules.json @@ -284,7 +284,8 @@ "picard/filtersamreads": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/picard/filtersamreads/picard-filtersamreads.diff" }, "picard/markduplicates": { "branch": "master", From 0dde4a6106c0b762785af46a9669fb30c2c0cc6a Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Sun, 10 Sep 2023 22:59:31 +0100 Subject: [PATCH 56/56] Added maf to schema_input.json --- assets/schema_input.json | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/assets/schema_input.json b/assets/schema_input.json index 6b8708f..bacbdc4 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -155,6 +155,21 @@ }, "variantcaller": { "type": "string" + }, + "maf": { + "errorMessage": "MAF file for reads 1 cannot contain spaces and must have extension '.maf'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.maf$" + }, + { + "type": "string", + "maxLength": 0 + } + ], + "format": "file-path", + "exists": true } }, "required": ["patient", "sample"]