diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index ea27a58..b290e09 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -2,6 +2,7 @@ "name": "nfcore", "image": "nfcore/gitpod:latest", "remoteUser": "gitpod", + "runArgs": ["--privileged"], // Configure tool-specific properties. "customizations": { @@ -9,15 +10,7 @@ "vscode": { // Set *default* container specific settings.json values on container create. "settings": { - "python.defaultInterpreterPath": "/opt/conda/bin/python", - "python.linting.enabled": true, - "python.linting.pylintEnabled": true, - "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", - "python.formatting.yapfPath": "/opt/conda/bin/yapf", - "python.linting.flake8Path": "/opt/conda/bin/flake8", - "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", - "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint" + "python.defaultInterpreterPath": "/opt/conda/bin/python" }, // Add the IDs of extensions you want installed when the container is created. diff --git a/.editorconfig b/.editorconfig index 43c7138..d8d119f 100644 --- a/.editorconfig +++ b/.editorconfig @@ -18,11 +18,20 @@ end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset -indent_size = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset [/assets/email*] indent_size = unset +# ignore python and markdown +[*.{py,md}] +indent_style = unset + # C++ compiles code [/bin/cutsite_trimming] end_of_line = unset @@ -30,3 +39,6 @@ insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset indent_size = unset + +[*.cpp] +indent_size = 2 diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 3b558e4..1e09f71 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,6 +9,7 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) +> [!NOTE] > If you need help using or modifying nf-core/hic then the best place to ask is on the nf-core Slack [#hic](https://nfcore.slack.com/channels/hic) channel ([join our Slack here](https://nf-co.re/join/slack)). ## Contribution workflow @@ -25,6 +26,12 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nf-test test --profile debug,test,docker --verbose +``` + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. @@ -85,7 +92,7 @@ Once there, use `nf-core schema build` to add to `nextflow_schema.json`. Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. -The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. ### Naming schemes @@ -116,4 +123,3 @@ To get started: Devcontainer specs: - [DevContainer config](.devcontainer/devcontainer.json) -- [Dockerfile](.devcontainer/Dockerfile) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index eb120e6..14b2607 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,7 +42,7 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 22.10.1)_ + * Nextflow version _(eg. 23.04.0)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index e2cdecc..ec32a3d 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -19,6 +19,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/hic/ - [ ] If necessary, also make a PR on the nf-core/hic _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 2b99a42..463733e 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -8,24 +8,29 @@ on: types: [published] workflow_dispatch: jobs: - run-tower: + run-platform: name: Run AWS full tests if: github.repository == 'nf-core/hic' runs-on: ubuntu-latest steps: - - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/hic/work-${{ github.sha }} parameters: | { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/hic/results-${{ github.sha }}" } - profiles: test_full,public_aws_ecr - - uses: actions/upload-artifact@v3 + profiles: test_full + + - uses: actions/upload-artifact@v4 with: - name: Tower debug log file - path: tower_action_*.log + name: Seqera Platform debug log file + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 23c4973..d8c080b 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -5,25 +5,29 @@ name: nf-core AWS test on: workflow_dispatch: jobs: - run-tower: + run-platform: name: Run AWS tests if: github.repository == 'nf-core/hic' runs-on: ubuntu-latest steps: - # Launch workflow using Tower CLI tool action - - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v1 + # Launch workflow using Seqera Platform CLI tool action + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/hic/work-${{ github.sha }} parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/hic/results-test-${{ github.sha }}" } - profiles: test,public_aws_ecr - - uses: actions/upload-artifact@v3 + profiles: test + + - uses: actions/upload-artifact@v4 with: - name: Tower debug log file - path: tower_action_*.log + name: Seqera Platform debug log file + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index edf46df..8b0682e 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -19,7 +19,7 @@ jobs: # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment if: failure() - uses: mshick/add-pr-comment@v1 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 with: message: | ## This PR is against the `master` branch :x: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 83b111b..da272e7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,17 +24,83 @@ jobs: strategy: matrix: NXF_VER: - - "22.10.1" + - "23.04.0" - "latest-everything" steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - name: Run pipeline with test data + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + + nf-test: + name: ${{ matrix.profile }} NF-${{ matrix.NXF_VER }} + # Only run on push if this is the nf-core dev branch (merged PRs) + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/hic') }}" + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + NXF_VER: + - "23.04.0" + - "latest-everything" + profile: + - "docker" + steps: + - name: Check out pipeline code + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1.3.0 with: version: "${{ matrix.NXF_VER }}" + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@v4 + with: + python-version: "3.11" + architecture: "x64" + + - name: Install pdiff to see diff between nf-test snapshots + run: python -m pip install --upgrade pip pdiff cryptography + + - uses: nf-core/setup-nf-test@v1 + - name: Run pipeline with test data run: | nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + + - name: Run nf-test + env: + NFT_DIFF: "pdiff" + NFT_DIFF_ARGS: "--line-numbers --width 120 --expand-tabs=2" + run: | + nf-test test --verbose --tag PIPELINE --profile "+${{ matrix.profile }}" --junitxml=test.xml --tap=test.tap + + - uses: pcolby/tap-summary@v1 + with: + path: >- + test.tap + + - name: Output log on failure + if: failure() + run: | + sudo apt install bat > /dev/null + batcat --decorations=always --color=always ${{ github.workspace }}/.nf-test/tests/*/meta/nextflow.log + + - name: Publish Test Report + uses: mikepenz/action-junit-report@v3 + if: always() # always run even if the previous step fails + with: + report_paths: test.xml diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index 694e90e..0b6b1f2 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@v7 + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 with: stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 0000000..2d20d64 --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,86 @@ +name: Test successful pipeline download with 'nf-core download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core download." + required: true + default: "dev" + pull_request: + types: + - opened + - edited + - synchronize + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 + with: + singularity-version: 3.8.3 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + run: | + nf-core download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Run the downloaded pipeline (stub) + id: stub_run_pipeline + continue-on-error: true + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + - name: Run the downloaded pipeline (stub run not supported) + id: run_pipeline + if: ${{ job.steps.stub_run_pipeline.status == failure() }} + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index 6655005..14aff71 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -4,7 +4,7 @@ on: types: [created] jobs: - deploy: + fix-linting: # Only run if comment is on a PR with the main repo, and if it contains the magic keywords if: > contains(github.event.comment.html_url, '/pull/') && @@ -13,10 +13,17 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v3 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 with: token: ${{ secrets.nf_core_bot_auth_token }} + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + # Action runs on the issue comment, so we don't get the PR by default # Use the gh cli to check out the PR - name: Checkout Pull Request @@ -24,32 +31,59 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v3 + # Install and run pre-commit + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" - - name: Install Prettier - run: npm install -g prettier @prettier/plugin-php + - name: Install pre-commit + run: pip install pre-commit - # Check that we actually need to fix something - - name: Run 'prettier --check' - id: prettier_status - run: | - if prettier --check ${GITHUB_WORKSPACE}; then - echo "result=pass" >> $GITHUB_OUTPUT - else - echo "result=fail" >> $GITHUB_OUTPUT - fi + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true - - name: Run 'prettier --write' - if: steps.prettier_status.outputs.result == 'fail' - run: prettier --write ${GITHUB_WORKSPACE} + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" - name: Commit & push changes - if: steps.prettier_status.outputs.result == 'fail' + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' run: | git config user.email "core@nf-co.re" git config user.name "nf-core-bot" git config push.default upstream git add . git status - git commit -m "[automated] Fix linting with Prettier" + git commit -m "[automated] Fix code linting" git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/hic/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 888cb4b..1fcafe8 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,74 +11,34 @@ on: types: [published] jobs: - EditorConfig: + pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - - uses: actions/setup-node@v3 - - - name: Install editorconfig-checker - run: npm install -g editorconfig-checker - - - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') - - Prettier: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - - - name: Install Prettier - run: npm install -g prettier - - - name: Run Prettier --check - run: prettier --check ${GITHUB_WORKSPACE} - - PythonBlack: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - name: Check code lints with Black - uses: psf/black@stable - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - message: | - ## Python linting (`black`) is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` - * Fix formatting errors in your pipeline: `black .` - - Once you push these changes the test should pass, and you can hide this comment :+1: + python-version: "3.12" - We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + - name: Install pre-commit + run: pip install pre-commit - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + - name: Run pre-commit + run: pre-commit run --all-files nf-core: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: "3.8" + python-version: "3.12" architecture: "x64" - name: Install dependencies @@ -99,7 +59,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 0bbcd30..40acc23 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@v2 + uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml new file mode 100644 index 0000000..03ecfcf --- /dev/null +++ b/.github/workflows/release-announcements.yml @@ -0,0 +1,75 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - name: get topics and convert to hashtags + id: get_topics + run: | + echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" >> $GITHUB_OUTPUT + + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/.gitignore b/.gitignore index 5124c9a..3e70ad8 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,7 @@ results/ testing/ testing* *.pyc +.nf-test.log +nf-test +.nf-test* +test.xml diff --git a/.gitpod.yml b/.gitpod.yml index 85d95ec..105a182 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,14 +1,20 @@ image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack - - codezombiech.gitignore # Language support for .gitignore files - # - cssho.vscode-svgviewer # SVG viewer - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code - - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - mechatroner.rainbow-csv # Highlight columns in csv files in different colors - # - nextflow.nextflow # Nextflow syntax highlighting + # - nextflow.nextflow # Nextflow syntax highlighting - oderwat.indent-rainbow # Highlight indentation level - streetsidesoftware.code-spell-checker # Spelling checker for source code + - charliermarsh.ruff # Code linter Ruff diff --git a/.nf-core.yml b/.nf-core.yml index 3805dc8..e0b85a7 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1 +1,2 @@ repository_type: pipeline +nf_core_version: "2.14.1" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0c31cdb..4dc0f1d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,13 @@ repos: - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v2.7.1" + rev: "v3.1.0" hooks: - id: prettier + additional_dependencies: + - prettier@3.2.5 + + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "2.7.3" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e45be4..031a952 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,12 +7,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- Parameter `--dnase`is now replaced by `--no_digestion` + +- Add CALDER2 compartments calling (see '--compartments_caller' option) + +- Add new '--balancing_opts' to update `cooler balance` arguments (#193) + - New subworkflow based on `pairtools` to detect valid pairs. The user -can now choose between `--processing hicpro` (default) or `--processing pairtools` + can now choose between `--processing hicpro` (default) or `--processing pairtools` - Default mapping options with `HiC-Pro` has been updated to give closer results in comparison -with `BWA-mem/pairtools` + with `BWA-mem/pairtools` + +- [#194](https://github.com/nf-core/hic/pull/194) - Update pipeline template to [nf-core/tools 2.13.1](https://github.com/nf-core/tools/releases/tag/2.13.1) +- [#196](https://github.com/nf-core/hic/pull/196) - Add nf-test + +### `Removed` + +- Removed public_aws_ecr profile ## v2.1.0 - 2023-06-01 @@ -22,6 +35,8 @@ with `BWA-mem/pairtools` ### `Fixed` +- cooltools version in COOLTOOLS_INSULATION not parsed correctly + ## v2.0.0 - 2023-01-12 ### `Added` diff --git a/CITATIONS.md b/CITATIONS.md index 0313a1a..d4c280a 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -16,7 +16,10 @@ - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. + - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. ## Software packaging/containerisation tools @@ -35,5 +38,8 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index f4fd052..c089ec7 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,18 +1,20 @@ -# Code of Conduct at nf-core (v1.0) +# Code of Conduct at nf-core (v1.4) ## Our Pledge -In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: - Age +- Ability - Body size +- Caste - Familial status - Gender identity and expression - Geographical location - Level of experience - Nationality and national origins - Native language -- Physical and neurological ability +- Neurodiversity - Race or ethnicity - Religion - Sexual identity and orientation @@ -22,80 +24,133 @@ Please note that the list above is alphabetised and is therefore not ranked in a ## Preamble -> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: -An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. -We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. -Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. -We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. -Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. ## Our Responsibilities -The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. -The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. -Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. -## When are where does this Code of Conduct apply? +## When and where does this Code of Conduct apply? -Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): - Communicating with an official project email address. - Communicating with community members within the nf-core Slack channel. - Participating in hackathons organised by nf-core (both online and in-person events). -- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. -- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. - Representing nf-core on social media. This includes both official and personal accounts. ## nf-core cares 😊 -nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): - Ask for consent before sharing another community member’s personal information (including photographs) on social media. - Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. -- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) - Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) - Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) - Focus on what is best for the team and the community. (When in doubt, ask) -- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Accept feedback, yet be unafraid to question, deliberate, and learn. - Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) -- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) - Take breaks when you feel like you need them. -- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) ## nf-core frowns on 😕 -The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: - Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. - “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. - Spamming or trolling of individuals on social media. -- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. -- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. ### Online Trolling -The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. -All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. -## Procedures for Reporting CoC violations +## Procedures for reporting CoC violations If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. -Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. -All reports will be handled with utmost discretion and confidentially. +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. ## Attribution and Acknowledgements @@ -106,6 +161,22 @@ All reports will be handled with utmost discretion and confidentially. ## Changelog -### v1.0 - March 12th, 2021 +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 - Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/README.md b/README.md index 00752d2..d23a1e0 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,19 @@ -# ![nf-core/hic](docs/images/nf-core-hic_logo_light.png#gh-light-mode-only) ![nf-core/hic](docs/images/nf-core-hic_logo_dark.png#gh-dark-mode-only) - -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/hic/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.2669512-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.2669512) - -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) +

+ + + nf-core/hic + +

+ +[![GitHub Actions CI Status](https://github.com/nf-core/hic/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/hic/actions/workflows/ci.yml) +[![GitHub Actions Linting Status](https://github.com/nf-core/hic/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/hic/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/hic/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.2669512-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.2669512) +[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) + +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) -[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/hic) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/hic) [![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23hic-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/hic)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) @@ -28,26 +35,24 @@ On release, automated continuous integration tests run the pipeline on a full-si 2. Detection of valid interaction products 3. Duplicates removal 4. Generate raw and normalized contact maps ([`iced`](https://github.com/hiclib/iced)) - 5. Generate `pairs` files for downstream analysis + 5. Generate `pairs` files for downstream analysis 2. [`Pairtools`](https://github.com/open2c/pairtools) 1. Mapping using [`BWA-mem`](https://github.com/lh3/bwa) - 2. Detection of valid interaction products with [`pairtools`](https://github.com/open2c/pairtools) - 3. Duplicates removal - 4. Generate `pairs` files for downstream analysis + 2. Detection of valid interaction products with [`pairtools`](https://github.com/open2c/pairtools) + 3. Duplicates removal + 4. Generate `pairs` files for downstream analysis 3. Create genome-wide contact maps at various resolutions ([`cooler`](https://github.com/open2c/cooler)) 4. Contact maps normalization using balancing algorithm ([`cooler`](https://github.com/open2c/cooler)) 5. Export to various contact maps formats ([`HiC-Pro`](https://github.com/nservant/HiC-Pro), [`cooler`](https://github.com/open2c/cooler)) 6. Quality controls ([`HiC-Pro`](https://github.com/nservant/HiC-Pro), [`HiCExplorer`](https://github.com/deeptools/HiCExplorer)) -7. Compartments calling ([`cooltools`](https://cooltools.readthedocs.io/en/latest/)) +7. Compartments calling ([`cooltools`](https://cooltools.readthedocs.io/en/latest/), [`Calder2`](https://github.com/CSOgroup/CALDER2)) 8. TADs calling ([`HiCExplorer`](https://github.com/deeptools/HiCExplorer), [`cooltools`](https://cooltools.readthedocs.io/en/latest/)) 9. Quality control report ([`MultiQC`](https://multiqc.info/)) ## Usage -> **Note** -> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how -> to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) -> with `-profile test` before running the workflow on actual data. +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. First, prepare a samplesheet with your input data that looks as follows: @@ -69,16 +74,15 @@ nextflow run nf-core/hic \ --outdir ``` -> **Warning:** -> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those -> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; > see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -For more details, please refer to the [usage documentation](https://nf-co.re/hic/usage) and the [parameter documentation](https://nf-co.re/hic/parameters). +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/hic/usage) and the [parameter documentation](https://nf-co.re/hic/parameters). ## Pipeline output -To see the the results of a test run with a full size dataset refer to the [results](https://nf-co.re/hic/results) tab on the nf-core website pipeline page. +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/hic/results) tab on the nf-core website pipeline page. For more details about the output files and reports, please refer to the [output documentation](https://nf-co.re/hic/output). @@ -94,7 +98,7 @@ For further information or help, don't hesitate to get in touch on the [Slack `# ## Citations -If you use nf-core/hic for your analysis, please cite it using the following doi: doi: [10.5281/zenodo.2669512](https://doi.org/10.5281/zenodo.2669512) +If you use nf-core/hic for your analysis, please cite it using the following doi: [10.5281/zenodo.2669512](https://doi.org/10.5281/zenodo.2669512) An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. diff --git a/assets/email_template.html b/assets/email_template.html index d207f01..f299709 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -12,7 +12,7 @@ -

nf-core/hic v${version}

+

nf-core/hic ${version}

Run Name: $runName

<% if (!success){ diff --git a/assets/email_template.txt b/assets/email_template.txt index 6905d6f..8d266ca 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -4,7 +4,7 @@ |\\ | |__ __ / ` / \\ |__) |__ } { | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,' - nf-core/hic v${version} + nf-core/hic ${version} ---------------------------------------------------- Run Name: $runName diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 2f0a308..18d2c25 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,18 +3,21 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "nf-core/hic Methods Description" section_href: "https://github.com/nf-core/hic" plot_type: "html" -## nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline ## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

-

Data was processed using nf-core/hic v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

Data was processed using nf-core/hic v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

${workflow.commandLine}
+

${tool_citations}

References

  • Servant, N., Ewels, P. A., Peltzer, A., Garcia, M. U. (2021) nf-core/hic. Zenodo. https://doi.org/10.5281/zenodo.2669512
  • -
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
  • -
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
  • +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography}
Notes:
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index b2cf07d..96e1ac4 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,8 @@ report_comment: > - This report has been generated by the nf-core/hic + This report has been generated by the nf-core/hic analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. + report_section_order: "nf-core-hic-methods-description": order: -1000 @@ -11,3 +12,5 @@ report_section_order: order: -1002 export_plots: true + +disable_version_detection: true diff --git a/assets/nf-core-hic_logo_light.png b/assets/nf-core-hic_logo_light.png index 553c19d..7d3fedf 100644 Binary files a/assets/nf-core-hic_logo_light.png and b/assets/nf-core-hic_logo_light.png differ diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index e699919..69f5f75 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,2 +1,2 @@ sample,fastq_1,fastq_2 -SRR4292758,https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R1.fastq.gz,https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R2.fastq.gz +SRR4292758,https://raw.githubusercontent.com/nf-core/test-datasets/hic/data/SRR4292758_00_R1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/hic/data/SRR4292758_00_R2.fastq.gz diff --git a/assets/schema_input.json b/assets/schema_input.json index fae1a32..2819403 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,27 +10,24 @@ "sample": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "errorMessage": "Sample name must be provided and cannot contain spaces", + "meta": ["id"] }, "fastq_1": { "type": "string", + "format": "file-path", + "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" }, "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" } }, - "required": ["sample", "fastq_1"] + "required": ["sample", "fastq_1", "fastq_2"] } } diff --git a/assets/slackreport.json b/assets/slackreport.json index 043d02f..225a0b3 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "sanger-tol/readmapping v${version} - ${runName}", + "author_name": "nf-core/hic ${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py deleted file mode 100755 index dde3baa..0000000 --- a/bin/check_samplesheet.py +++ /dev/null @@ -1,258 +0,0 @@ -#!/usr/bin/env python - -"""Provide a command line tool to validate and transform tabular samplesheets.""" - - -import argparse -import csv -import logging -import sys -from collections import Counter -from pathlib import Path - -logger = logging.getLogger() - - -class RowChecker: - """ - Define a service that can validate and transform each given row. - - Attributes: - modified (list): A list of dicts, where each dict corresponds to a previously - validated and transformed row. The order of rows is maintained. - - """ - - VALID_FORMATS = ( - ".fq.gz", - ".fastq.gz", - ) - - def __init__( - self, - sample_col="sample", - first_col="fastq_1", - second_col="fastq_2", - single_col="single_end", - **kwargs, - ): - """ - Initialize the row checker with the expected column names. - - Args: - sample_col (str): The name of the column that contains the sample name - (default "sample"). - first_col (str): The name of the column that contains the first (or only) - FASTQ file path (default "fastq_1"). - second_col (str): The name of the column that contains the second (if any) - FASTQ file path (default "fastq_2"). - single_col (str): The name of the new column that will be inserted and - records whether the sample contains single- or paired-end sequencing - reads (default "single_end"). - - """ - super().__init__(**kwargs) - self._sample_col = sample_col - self._first_col = first_col - self._second_col = second_col - self._single_col = single_col - self._seen = set() - self.modified = [] - - def validate_and_transform(self, row): - """ - Perform all validations on the given row and insert the read pairing status. - - Args: - row (dict): A mapping from column headers (keys) to elements of that row - (values). - - """ - self._validate_sample(row) - self._validate_first(row) - self._validate_second(row) - self._validate_pair(row) - self._seen.add((row[self._sample_col], row[self._first_col])) - self.modified.append(row) - - def _validate_sample(self, row): - """Assert that the sample name exists and convert spaces to underscores.""" - if len(row[self._sample_col]) <= 0: - raise AssertionError("Sample input is required.") - # Sanitize samples slightly. - row[self._sample_col] = row[self._sample_col].replace(" ", "_") - - def _validate_first(self, row): - """Assert that the first FASTQ entry is non-empty and has the right format.""" - if len(row[self._first_col]) <= 0: - raise AssertionError("At least the first FASTQ file is required.") - self._validate_fastq_format(row[self._first_col]) - - def _validate_second(self, row): - """Assert that the second FASTQ entry has the right format if it exists.""" - if len(row[self._second_col]) > 0: - self._validate_fastq_format(row[self._second_col]) - - def _validate_pair(self, row): - """Assert that read pairs have the same file extension. Report pair status.""" - if row[self._first_col] and row[self._second_col]: - row[self._single_col] = False - first_col_suffix = Path(row[self._first_col]).suffixes[-2:] - second_col_suffix = Path(row[self._second_col]).suffixes[-2:] - if first_col_suffix != second_col_suffix: - raise AssertionError("FASTQ pairs must have the same file extensions.") - else: - row[self._single_col] = True - - def _validate_fastq_format(self, filename): - """Assert that a given filename has one of the expected FASTQ extensions.""" - if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): - raise AssertionError( - f"The FASTQ file has an unrecognized extension: {filename}\n" - f"It should be one of: {', '.join(self.VALID_FORMATS)}" - ) - - def validate_unique_samples(self): - """ - Assert that the combination of sample name and FASTQ filename is unique. - - In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the - number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. - - """ - if len(self._seen) != len(self.modified): - raise AssertionError("The pair of sample name and FASTQ must be unique.") - seen = Counter() - for row in self.modified: - sample = row[self._sample_col] - seen[sample] += 1 - ##row[self._sample_col] = f"{sample}_T{seen[sample]}" - - -def read_head(handle, num_lines=10): - """Read the specified number of lines from the current position in the file.""" - lines = [] - for idx, line in enumerate(handle): - if idx == num_lines: - break - lines.append(line) - return "".join(lines) - - -def sniff_format(handle): - """ - Detect the tabular format. - - Args: - handle (text file): A handle to a `text file`_ object. The read position is - expected to be at the beginning (index 0). - - Returns: - csv.Dialect: The detected tabular format. - - .. _text file: - https://docs.python.org/3/glossary.html#term-text-file - - """ - peek = read_head(handle) - handle.seek(0) - sniffer = csv.Sniffer() - dialect = sniffer.sniff(peek) - return dialect - - -def check_samplesheet(file_in, file_out): - """ - Check that the tabular samplesheet has the structure expected by nf-core pipelines. - - Validate the general shape of the table, expected columns, and each row. Also add - an additional column which records whether one or two FASTQ reads were found. - - Args: - file_in (pathlib.Path): The given tabular samplesheet. The format can be either - CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. - file_out (pathlib.Path): Where the validated and transformed samplesheet should - be created; always in CSV format. - - Example: - This function checks that the samplesheet follows the following structure, - see also the `viral recon samplesheet`_:: - - sample,fastq_1,fastq_2 - SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz - SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz - SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz, - - .. _viral recon samplesheet: - https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv - - """ - required_columns = {"sample", "fastq_1", "fastq_2"} - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_in.open(newline="") as in_handle: - reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) - # Validate the existence of the expected header columns. - if not required_columns.issubset(reader.fieldnames): - req_cols = ", ".join(required_columns) - logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") - sys.exit(1) - # Validate each row. - checker = RowChecker() - for i, row in enumerate(reader): - try: - checker.validate_and_transform(row) - except AssertionError as error: - logger.critical(f"{str(error)} On line {i + 2}.") - sys.exit(1) - checker.validate_unique_samples() - header = list(reader.fieldnames) - header.insert(1, "single_end") - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_out.open(mode="w", newline="") as out_handle: - writer = csv.DictWriter(out_handle, header, delimiter=",") - writer.writeheader() - for row in checker.modified: - writer.writerow(row) - - -def parse_args(argv=None): - """Define and immediately parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Validate and transform a tabular samplesheet.", - epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", - ) - parser.add_argument( - "file_in", - metavar="FILE_IN", - type=Path, - help="Tabular input samplesheet in CSV or TSV format.", - ) - parser.add_argument( - "file_out", - metavar="FILE_OUT", - type=Path, - help="Transformed output samplesheet in CSV format.", - ) - parser.add_argument( - "-l", - "--log-level", - help="The desired log level (default WARNING).", - choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), - default="WARNING", - ) - return parser.parse_args(argv) - - -def main(argv=None): - """Coordinate argument parsing and program execution.""" - args = parse_args(argv) - logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") - if not args.file_in.is_file(): - logger.error(f"The given input file {args.file_in} was not found!") - sys.exit(2) - args.file_out.parent.mkdir(parents=True, exist_ok=True) - check_samplesheet(args.file_in, args.file_out) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/bin/mapped_2hic_fragments.py b/bin/mapped_2hic_fragments.py index a129871..dfeff70 100755 --- a/bin/mapped_2hic_fragments.py +++ b/bin/mapped_2hic_fragments.py @@ -10,7 +10,7 @@ """ Script to keep only valid 3C products - DE and SC are removed -Output is : readname / +Output is : readname / """ import time import getopt diff --git a/bin/mergeSAM.py b/bin/mergeSAM.py index d670ec2..6dc0701 100755 --- a/bin/mergeSAM.py +++ b/bin/mergeSAM.py @@ -10,7 +10,7 @@ """ Script to pair 2 SAM/BAM files into one PE BAM -- On 03/05/16 Ferhat made changes starting from ~/bin/HiC-Pro_2.7.2b/scripts/mergeSAM.py +- On 03/05/16 Ferhat made changes starting from ~/bin/HiC-Pro_2.7.2b/scripts/mergeSAM.py to make singletons possible to be reported """ diff --git a/bin/src/build_matrix.cpp b/bin/src/build_matrix.cpp index e366d5b..b4cd812 100644 --- a/bin/src/build_matrix.cpp +++ b/bin/src/build_matrix.cpp @@ -1,5 +1,5 @@ // HiC-Pro -// Copyright 2015 Institut Curie +// Copyright 2015 Institut Curie // Author(s): Eric Viara // Contact: nicolas.servant@curie.fr // This software is distributed without any guarantee under the terms of the BSD-3 License @@ -62,7 +62,7 @@ struct Interval { Interval(chrsize_t start = 0, chrsize_t end = 0) : start(start), end(end) { } }; - + class ChrRegions { std::vector chr_v; @@ -85,36 +85,36 @@ class ChrRegions { ifs.getline(buffer, sizeof(buffer)-1); line_num++; if (is_empty_line(buffer)) { - continue; + continue; } chrsize_t start = 0; chrsize_t end = 0; char chr[2048]; if (bed_line_parse(buffer, chr, start, end, bedfile, line_num)) { - return 1; + return 1; } if (intervals.find(chr) == intervals.end()) { - intervals[chr] = new std::vector(); - chr_v.push_back(chr); + intervals[chr] = new std::vector(); + chr_v.push_back(chr); } /* - if (lastend != 0 && !strcmp(lastchr, chr) && start != lastend) { - std::cerr << "warning: discontinuous segment for chromosome " << chr << " at position " << start << " " << end << std::endl; - } - */ + if (lastend != 0 && !strcmp(lastchr, chr) && start != lastend) { + std::cerr << "warning: discontinuous segment for chromosome " << chr << " at position " << start << " " << end << std::endl; + } + */ if (*lastchr && strcmp(lastchr, chr)) { - lastend = 0; + lastend = 0; } if (lastend != 0 && start < lastend) { - std::cerr << "error: bedfile not sorted at line #" << line_num << std::endl; - exit(1); + std::cerr << "error: bedfile not sorted at line #" << line_num << std::endl; + exit(1); } strcpy(lastchr, chr); lastend = end; intervals[chr]->push_back(Interval(start, end)); if (progress && (line_num % 100000) == 0) { - std::cerr << '.' << std::flush; + std::cerr << '.' << std::flush; } } if (progress) { @@ -135,13 +135,13 @@ class ChrRegions { std::vector::const_iterator itv_begin = itv_vect->begin(); std::vector::const_iterator itv_end = itv_vect->end(); while (itv_begin != itv_end) { - const Interval& itv = (*itv_begin); - ofs << chrname << '\t' << itv.start << '\t' << itv.end << '\t' << num << '\n'; - if (progress && (num % 100000) == 0) { - std::cerr << '.' << std::flush; - } - num++; - ++itv_begin; + const Interval& itv = (*itv_begin); + ofs << chrname << '\t' << itv.start << '\t' << itv.end << '\t' << num << '\n'; + if (progress && (num % 100000) == 0) { + std::cerr << '.' << std::flush; + } + num++; + ++itv_begin; } ++begin; } @@ -184,15 +184,15 @@ class Dichotomic { n = (l + r) >> 1; const Interval& itv = intervals[n]; if (value >= itv.start+1 && value <= itv.end) { - return n; + return n; } int x = middle(itv) - value; - + if (x < 0) { - l = n + 1; + l = n + 1; } else { - r = n - 1; + r = n - 1; } //std::cout << "l: " << l << '\n'; //std::cout << "r: " << r << '\n'; @@ -260,10 +260,10 @@ class AxisChromosome { } binend = binstart + chr->getBincount(); /* - if (verbose) { + if (verbose) { std::cerr << "AxisChromosome: " << chr->getName() << " " << binstart << " " << binend << " " << chr->getBincount() << std::endl; - } - */ + } + */ } chrsize_t getBinstart() const {return binstart;} @@ -282,15 +282,15 @@ class AxisChromosome { assert(intervals != NULL); if (!NO_DICHO) { - Dichotomic dicho(*intervals); - int where = dicho.find(start); - if (where < 0) { - if (!quiet) { - std::cerr << "warning: no bin at position " << chr->getName() << ":" << start << std::endl; - } - return BIN_NOT_FOUND; - } - return where + getBinstart(); + Dichotomic dicho(*intervals); + int where = dicho.find(start); + if (where < 0) { + if (!quiet) { + std::cerr << "warning: no bin at position " << chr->getName() << ":" << start << std::endl; + } + return BIN_NOT_FOUND; + } + return where + getBinstart(); } std::vector::const_iterator begin = intervals->begin(); @@ -298,14 +298,14 @@ class AxisChromosome { chrsize_t binidx = 1; while (begin != end) { - const Interval& itv = *begin; - if (start >= itv.start+1 && start <= itv.end) { - break; - } - ++binidx; - ++begin; + const Interval& itv = *begin; + if (start >= itv.start+1 && start <= itv.end) { + break; + } + ++binidx; + ++begin; } - + return binidx + getBinstart() - 1; } @@ -318,7 +318,7 @@ class AxisChromosome { int chrsize = getChrsize(); if (cur_binend > chrsize) { cur_binend = chrsize; - } + } return cur_binidx + getBinstart() - 1; } }; @@ -354,13 +354,13 @@ class Matrix { chrsize_t chrsize = axis_chr->getChrsize(); binend -= binstart; for (chrsize_t bin = 0; bin < binend; ++bin) { - // bed are 0-based begin, 1-based end - chrsize_t beg = bin * binsize; - chrsize_t end = beg + binsize - 1; - if (end > chrsize) { - end = chrsize-1; - } - ofs << name << '\t' << beg << '\t' << (end+1) << '\t' << (bin+binstart) << '\n'; + // bed are 0-based begin, 1-based end + chrsize_t beg = bin * binsize; + chrsize_t end = beg + binsize - 1; + if (end > chrsize) { + end = chrsize-1; + } + ofs << name << '\t' << beg << '\t' << (end+1) << '\t' << (bin+binstart) << '\n'; } ++begin; } @@ -398,9 +398,9 @@ class Matrix { size_t line_total = 0; if (progress) { while (begin != end) { - const std::map& line = (*begin).second; - line_total += line.size(); - ++begin; + const std::map& line = (*begin).second; + line_total += line.size(); + ++begin; } begin = mat.begin(); } @@ -418,13 +418,13 @@ class Matrix { std::map::const_iterator bb = line.begin(); std::map::const_iterator ee = line.end(); while (bb != ee) { - if (progress && (line_cnt % modulo) == 0) { - double percent = (double(line_cnt)/line_total)*100; - std::cerr << "" << percent << "% " << line_cnt << " / " << line_total << std::endl; - } - ofs << abs << '\t' << (*bb).first << '\t' << (*bb).second << '\n'; - line_cnt++; - ++bb; + if (progress && (line_cnt % modulo) == 0) { + double percent = (double(line_cnt)/line_total)*100; + std::cerr << "" << percent << "% " << line_cnt << " / " << line_total << std::endl; + } + ofs << abs << '\t' << (*bb).first << '\t' << (*bb).second << '\n'; + line_cnt++; + ++bb; } ++begin; } @@ -490,10 +490,10 @@ void Chromosome::computeSizes(chrsize_t ori_binsize, chrsize_t step, bool binadj assert(intervals != NULL); bincount = intervals->size(); /* - if (verbose) { + if (verbose) { std::cerr << name << " bincount: " << bincount << std::endl; - } - */ + } + */ } else { if (chrsize < ori_binsize) { binsize = chrsize; @@ -509,10 +509,10 @@ void Chromosome::computeSizes(chrsize_t ori_binsize, chrsize_t step, bool binadj bincount = remainder > 0 ? tmp_bincount+1 : tmp_bincount; } /* - if (verbose) { + if (verbose) { std::cerr << name << " sizes: " << chrsize << " " << binsize << " " << stepsize << " " << bincount << std::endl; - } - */ + } + */ } } @@ -559,7 +559,7 @@ enum MatrixFormat { LOWER_MATRIX, COMPLETE_MATRIX }; - + static int get_options(int argc, char* argv[], chrsize_t& binsize, const char*& binfile, const char*& chrsize_file, const char*& ifile, const char*& oprefix, Format& format, std::string& bed_prefix, bool& binadjust, MatrixFormat& matrix_format, chrsize_t& step, bool& whole_genome, int& binoffset, const char*& chrA, const char*& chrB) { prog = argv[0]; @@ -567,85 +567,85 @@ static int get_options(int argc, char* argv[], chrsize_t& binsize, const char*& const char* opt = argv[ac]; if (*opt == '-') { if (!strcmp(opt, "--binadjust")) { - binadjust = true; + binadjust = true; } else if (!strcmp(opt, "--version")) { - std::cout << "build_matrix version " << VERSION << "\n"; - exit(0); + std::cout << "build_matrix version " << VERSION << "\n"; + exit(0); } else if (!strcmp(opt, "--progress")) { - progress = true; + progress = true; } else if (!strcmp(opt, "--quiet")) { - quiet = true; + quiet = true; } else if (!strcmp(opt, "--detail-progress")) { - progress = true; - detail_progress = true; + progress = true; + detail_progress = true; } else if (!strcmp(opt, "--matrix-format")) { - if (ac == argc-1) { - return usage(); - } - std::string matrix_format_str = argv[++ac]; - if (matrix_format_str == "asis") { - matrix_format = ASIS_MATRIX; - } else if (matrix_format_str == "upper") { - matrix_format = UPPER_MATRIX; - } else if (matrix_format_str == "lower") { - matrix_format = LOWER_MATRIX; - } else if (matrix_format_str == "complete") { - matrix_format = COMPLETE_MATRIX; - } else { - return usage(); - } + if (ac == argc-1) { + return usage(); + } + std::string matrix_format_str = argv[++ac]; + if (matrix_format_str == "asis") { + matrix_format = ASIS_MATRIX; + } else if (matrix_format_str == "upper") { + matrix_format = UPPER_MATRIX; + } else if (matrix_format_str == "lower") { + matrix_format = LOWER_MATRIX; + } else if (matrix_format_str == "complete") { + matrix_format = COMPLETE_MATRIX; + } else { + return usage(); + } } else if (!strcmp(opt, "--step")) { - if (ac == argc-1) { - return usage(); - } - step = atoi(argv[++ac]); + if (ac == argc-1) { + return usage(); + } + step = atoi(argv[++ac]); } else if (!strcmp(opt, "--binfile")) { - if (ac == argc-1) { - return usage(); - } - binfile = argv[++ac]; + if (ac == argc-1) { + return usage(); + } + binfile = argv[++ac]; } else if (!strcmp(opt, "--binsize")) { - if (ac == argc-1) { - return usage(); - } - binsize = atoi(argv[++ac]); + if (ac == argc-1) { + return usage(); + } + binsize = atoi(argv[++ac]); } else if (!strcmp(opt, "--binoffset")) { - if (ac == argc-1) { - return usage(); - } - binoffset = atoi(argv[++ac]); + if (ac == argc-1) { + return usage(); + } + binoffset = atoi(argv[++ac]); } else if (!strcmp(opt, "--ifile")) { - if (ac == argc-1) { - return usage(); - } - ifile = argv[++ac]; + if (ac == argc-1) { + return usage(); + } + ifile = argv[++ac]; } else if (!strcmp(opt, "--oprefix")) { - if (ac == argc-1) { - return usage(); - } - oprefix = argv[++ac]; + if (ac == argc-1) { + return usage(); + } + oprefix = argv[++ac]; } else if (!strcmp(opt, "--chrsizes")) { - if (ac == argc-1) { - return usage(); - } - chrsize_file = argv[++ac]; + if (ac == argc-1) { + return usage(); + } + chrsize_file = argv[++ac]; } else if (!strcmp(opt, "--chrA")) { - if (ac == argc-1) { - return usage(); - } - chrA = argv[++ac]; - whole_genome = false; + if (ac == argc-1) { + return usage(); + } + chrA = argv[++ac]; + whole_genome = false; } else if (!strcmp(opt, "--chrB")) { - if (ac == argc-1) { - return usage(); - } - chrB = argv[++ac]; - whole_genome = false; + if (ac == argc-1) { + return usage(); + } + chrB = argv[++ac]; + whole_genome = false; } else if (!strcmp(opt, "--help")) { - return help(); + return help(); } else { - std::cerr << '\n' << prog << ": unknown option " << opt << std::endl; - return usage(); + std::cerr << '\n' << prog << ": unknown option " << opt << std::endl; + return usage(); } } } @@ -759,19 +759,19 @@ static int build_matrix_init(Matrix& matrix, const char* ifile, std::ifstream& i while ((nn = read(fd, p_buffer, sizeof(p_buffer))) > 0) { const char *p = p_buffer; while (nn-- > 0) { - if (*p++ == '\n') { - line_total++; - } + if (*p++ == '\n') { + line_total++; + } } if ((cnt % 200) == 0) { - std::cerr << '.' << std::flush; + std::cerr << '.' << std::flush; } cnt++; } std::cerr << std::endl; close(fd); } - + std::ifstream chrsizefs; chrsizefs.open(chrsize_file); if (chrsizefs.bad() || chrsizefs.fail()) { @@ -917,34 +917,34 @@ static int build_matrix(int binoffset, chrsize_t ori_binsize, const char* binfil case UPPER_MATRIX: if (abs_bin < ord_bin) { - matrix.add(abs_bin, ord_bin); + matrix.add(abs_bin, ord_bin); } else { - matrix.add(ord_bin, abs_bin); + matrix.add(ord_bin, abs_bin); } break; case LOWER_MATRIX: if (abs_bin > ord_bin) { - matrix.add(abs_bin, ord_bin); + matrix.add(abs_bin, ord_bin); } else { - matrix.add(ord_bin, abs_bin); + matrix.add(ord_bin, abs_bin); } break; case COMPLETE_MATRIX: matrix.add(abs_bin, ord_bin); if (abs_bin != ord_bin) { - matrix.add(ord_bin, abs_bin); + matrix.add(ord_bin, abs_bin); } break; } line_cnt++; if (progress && (line_cnt % 100000) == 0) { if (detail_progress) { - double percent = (double(line_cnt)/line_total)*100; - std::cerr << "" << percent << "% " << line_cnt << " / " << line_total << std::endl; + double percent = (double(line_cnt)/line_total)*100; + std::cerr << "" << percent << "% " << line_cnt << " / " << line_total << std::endl; } else { - std::cerr << line_cnt << std::endl; + std::cerr << line_cnt << std::endl; } } } diff --git a/conf/base.config b/conf/base.config index 2558cb1..634808c 100644 --- a/conf/base.config +++ b/conf/base.config @@ -57,7 +57,4 @@ process { errorStrategy = 'retry' maxRetries = 2 } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - } } diff --git a/conf/igenomes.config b/conf/igenomes.config index f4c32e3..2d00943 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -14,158 +14,191 @@ params { 'GRCh37' { fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" } 'GRCh38' { fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" } 'CHM13' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" - bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" - gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" - mito_name = "chrM" } 'GRCm38' { fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" } 'TAIR10' { fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" } 'EB2' { fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" } 'UMD3.1' { fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" } 'WBcel235' { fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" } 'CanFam3.1' { fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" } 'GRCz10' { fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" } 'BDGP6' { fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" } 'EquCab2' { fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" } 'EB1' { fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" } 'Galgal4' { fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" } 'Gm01' { fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" } 'Mmul_1' { fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" } 'IRGSP-1.0' { fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" } 'CHIMP2.1.4' { fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" } 'Rnor_6.0' { fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" } 'R64-1-1' { fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" } 'EF2' { fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" } 'Sbi1' { fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" } 'Sscrofa10.2' { fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" } 'AGPv3' { fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" } 'hg38' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" } 'hg19' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" } 'mm10' { fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" } 'bosTau8' { fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" } 'ce10' { fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" } 'canFam3' { fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" } 'danRer10' { fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" } 'dm6' { fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" } 'equCab2' { fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" } 'galGal4' { fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" } 'panTro4' { fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" } 'rn6' { fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" } 'sacCer3' { fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" } 'susScr3' { fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" } } } diff --git a/conf/modules.config b/conf/modules.config index 70e85c8..f9e5c6e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1,17 +1,21 @@ process { - //Default publishDir = [ path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' { + withName: FASTQC { + ext.args = '--quiet' + } + + withName: 'MULTIQC' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: 'copy', - pattern: '*_versions.yml' + path: { "${params.outdir}/multiqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -60,7 +64,7 @@ process { ] ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" } ext.args = params.bwt2_opts_end2end ?: '' - ext.args2 = !params.dnase ? "-F 4" :"" + ext.args2 = !params.no_digestion ? "-F 4" :"" } withName: 'TRIM_READS' { @@ -185,7 +189,7 @@ process { // PAIRTOOLS withName: 'BWA_MEM' { - publishDir = [ + publishDir = [ path: { "${params.outdir}/bwa" }, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, mode: 'copy', @@ -213,7 +217,7 @@ process { enabled: false ] ext.prefix = { "${meta.id}_${meta.chunk}_restrict" } - ext.when = !params.dnase + ext.when = !params.no_digestion } withName: 'PAIRTOOLS_SORT' { @@ -242,19 +246,19 @@ process { ] ext.args = { params.save_interaction_bam ? "--output-sam ${meta.id}_pairtools.bam" : '' } } - + withName: 'SAMTOOLS_SORT' { - publishDir = [ + publishDir = [ path: { "${params.outdir}/pairtools" }, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, mode: 'copy', enabled: params.save_pairs_intermediates ] - ext.prefix = { "${meta.id}_pairtools_sorted" } + ext.prefix = { "${meta.id}_pairtools_sorted" } } withName: 'SAMTOOLS_INDEX' { - publishDir = [ + publishDir = [ path: { "${params.outdir}/pairtools" }, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, mode: 'copy', @@ -269,7 +273,7 @@ process { pattern: "*.pairs.stat" ] ext.args = { "--mark-dups" } - ext.prefix = { "${meta.id}_dedup" } + ext.prefix = { "${meta.id}_dedup" } ext.when = !params.keep_dups } @@ -283,15 +287,14 @@ process { ext.args = { [ params.min_mapq > 0 ? "(mapq1>${params.min_mapq} and mapq2>${params.min_mapq})" : '', params.min_cis_dist > 0 ? " and ((chrom1==chrom2 and abs(pos1-pos2) > ${params.min_cis_dist}) or chrom1!=chrom2)" : '', - params.keep_multi ? " and ((pair_type.upper()=='UU') or (pair_type.upper()=='UR') or (pair_type.upper()=='RU') or (pair_type.upper()=='MM') or (pair_type.upper()=='MU'))" : - " and ((pair_type.upper()=='UU') or (pair_type.upper()=='UR') or (pair_type.upper()=='RU'))", - params.dnase ? '' : " and ((chrom1==chrom2 and abs(int(rfrag1) - int(rfrag2)) > 1) or chrom1!=chrom2)", + params.keep_multi ? " and ((pair_type.upper()=='UU') or (pair_type.upper()=='UR') or (pair_type.upper()=='RU') or (pair_type.upper()=='MM') or (pair_type.upper()=='MU'))" : " and ((pair_type.upper()=='UU') or (pair_type.upper()=='UR') or (pair_type.upper()=='RU'))", + params.no_digestion ? '' : " and ((chrom1==chrom2 and abs(int(rfrag1) - int(rfrag2)) > 1) or chrom1!=chrom2)", //params.min_insert_size > 0 ? " and ( (rfrag_end1 - r1pos) + (rfrag_end2 - r2pos)) > ${params.min_insert_size}" : '', //params.max_insert_size > 0 ? " and ( (rfrag_end1 - r1pos) + (rfrag_end2 - r2pos)) < ${params.max_insert_size}" : '', //params.min_restriction_fragment_size > 0 ? " -t ${params.min_restriction_fragment_size}" : '', //params.max_restriction_fragment_size > 0 ? " -m ${params.max_restriction_fragment_size}" : '', ].join(' ').trim() } - } + } withName: 'PAIRTOOLS_STATS' { publishDir = [ @@ -351,6 +354,7 @@ process { mode: 'copy' ] ext.args = '--force' + ext.args = params.balancing_opts ?: '' ext.prefix = { "${cool.baseName}_balanced" } } diff --git a/conf/public_aws_ecr.config b/conf/public_aws_ecr.config deleted file mode 100644 index abff277..0000000 --- a/conf/public_aws_ecr.config +++ /dev/null @@ -1,57 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - AWS ECR Config -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Config to set public AWS ECR images wherever possible - This improves speed when running on AWS infrastructure. - Use this as an example template when using your own private registry. ----------------------------------------------------------------------------------------- -*/ - -docker.registry = 'public.ecr.aws' -podman.registry = 'public.ecr.aws' - -process { - withName: 'BOWTIE2_ALIGN' { - container = 'quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' - } - withName: 'BOWTIE2_ALIGN_TRIMMED' { - container = 'quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' - } - withName: 'BUILD_CONTACT_MAPS' { - container = 'quay.io/nf-core/ubuntu:20.04' - } - withName: 'COMBINE_MATES' { - container = 'quay.io/biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' - } - withName: 'COOLTOOLS_EIGSCIS' { - container = 'quay.io/biocontainers/mulled-v2-c81d8d6b6acf4714ffaae1a274527a41958443f6:cc7ea58b8cefc76bed985dcfe261cb276ed9e0cf-0' - } - withName: 'GET_RESTRICTION_FRAGMENTS' { - container = 'quay.io/biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' - } - withName: 'GET_VALID_INTERACTION' { - container = 'quay.io/biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' - } - withName: 'GET_VALID_INTERACTION_DNASE' { - container = 'quay.io/biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' - } - withName: 'ICE_NORMALIZATION' { - container = 'quay.io/biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' - } - withName: 'MERGE_STATS' { - container = 'quay.io/biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' - } - withName: 'MERGE_VALID_INTERACTION' { - container = 'quay.io/nf-core/ubuntu:20.04' - } - withName: 'SAMPLESHEET_CHECK' { - container = 'quay.io/biocontainers/python:3.8.3' - } - withName: 'SPLIT_COOLER_DUMP' { - container = 'quay.io/nf-core/ubuntu:20.04' - } - withName: 'TRIM_READS' { - container = 'quay.io/nf-core/ubuntu:20.04' - } -} diff --git a/docs/benchmark.md b/docs/benchmark.md index 9172d6c..eb90636 100644 --- a/docs/benchmark.md +++ b/docs/benchmark.md @@ -22,7 +22,7 @@ nextflow run nf-core-hic/main.nf \ --outdir '/tmp/results_test_pairtools/' ``` -The idea here was just to have a look at the final list of selected (and unselected) +The idea here was just to have a look at the final list of selected (and unselected) read pairs classified as valid interactions (or spurious interaction products) Here is a quick summary statistics ; @@ -46,9 +46,9 @@ Here is a quick summary statistics ; # Filtered by Pairtools only = 4686 (11.58%) ``` -Overall, we can see that **70%** of valid interactions are called by both `HiC-Pro` and `Pairtools`. +Overall, we can see that **70%** of valid interactions are called by both `HiC-Pro` and `Pairtools`. Regarding the 30% of read pairs which are different between the two tools, we can see that a large -majority (>75%) are due to differences in the read mapping (`bowtie2` versus `bwa-mem`). +majority (>75%) are due to differences in the read mapping (`bowtie2` versus `bwa-mem`). The few other differences can be at least partly explain by differences in the read pairs selection such as how a read is assigned to a restriction fragments, etc. @@ -68,7 +68,7 @@ nextflow run nf-core-hic/main.nf \ --res_compartments '500000,250000' \ --res_tads '40000,20000' \ --outdir './results_SRX2636669_hicpro/' \ - -profile singularity + -profile singularity ``` or `bwa-mem/pairtools` ; @@ -107,20 +107,14 @@ As before, small statistics were computed to compare the list of valid (and not # Filtered by Pairtools only = 6386317 (10.65%) ``` -Almost **80%** of valid interactions are called in common by `HiC-Pro` and `pairtools`. +Almost **80%** of valid interactions are called in common by `HiC-Pro` and `pairtools`. As previously observed, most of the differences observed between the two tools are explained by distinct mapping procedures. - Finally, we generated the contact maps around a specific regions on the X chromosome -using the `cool` files and the TADs calling generated with both tools. +using the `cool` files and the TADs calling generated with both tools. **No difference is observed at the contact map level.** ![X Inactivation Center - HiC-Pro processing](./images/SRX2636669_hicpro_pygentracks.png) ![X Inactivation Center - Bwa-mem / pairtools](./images/SRX2636669_pairtools_pygentracks.png) - - - - - diff --git a/docs/images/nf-core-hic_logo_dark.png b/docs/images/nf-core-hic_logo_dark.png index e245502..67649fd 100644 Binary files a/docs/images/nf-core-hic_logo_dark.png and b/docs/images/nf-core-hic_logo_dark.png differ diff --git a/docs/images/nf-core-hic_logo_light.png b/docs/images/nf-core-hic_logo_light.png index 5601950..9a7ee85 100644 Binary files a/docs/images/nf-core-hic_logo_light.png and b/docs/images/nf-core-hic_logo_light.png differ diff --git a/docs/output.md b/docs/output.md index 1086b03..88470cf 100644 --- a/docs/output.md +++ b/docs/output.md @@ -45,7 +45,7 @@ and aligned back on the genome. Aligned reads for both fragment mates are then paired in a single paired-end BAM file. Singletons and low quality mapped reads are filtered (`--min_mapq`). -Note that if the `--dnase` mode is activated, HiC-Pro will skip the second +Note that if the `--no_digestion` mode is activated, HiC-Pro will skip the second mapping step. **Output directory: `results/hicpro/mapping`** @@ -95,8 +95,8 @@ Duplicated valid pairs associated to PCR artefacts are discarded (see `--keep_dup` to not discard them). In case of Hi-C protocols that do not require a restriction enzyme such as -DNase Hi-C or micro Hi-C, the assignment to a restriction is not possible -(see `--dnase`). +DNase Hi-C or micro-C, the assignment to a restriction is not possible +(see `--no_digestion`). Short range interactions that are likely to be spurious ligation products can thus be discarded using the `--min_cis_dist` parameter. @@ -109,7 +109,7 @@ can thus be discarded using the `--min_cis_dist` parameter. - `*.FiltPairs` - List of filtered pairs - `*RSstat` - Statitics of number of read pairs falling in each category -Of note, these results are saved only if `--save_pairs_intermediates` is used. +Of note, these results are saved only if `--save_pairs_intermediates` is used. The `validPairs` are stored using a simple tab-delimited text format ; ```bash @@ -195,9 +195,8 @@ is specified on the command line. - `*.matrix` - genome-wide contact maps - `*_iced.matrix` - genome-wide iced contact maps -The contact maps are generated for all specified resolutions -(see `--bin_size` argument). -A contact map is defined by : +The contact maps are generated for all specified resolutions (see `--bin_size` argument). +A contact map is defined by: - A list of genomic intervals related to the specified resolution (BED format). - A matrix, stored as standard triplet sparse format (i.e. list format). @@ -221,7 +220,7 @@ downstream analysis. ## Hi-C contact maps Contact maps are usually stored as simple txt (`HiC-Pro`), .hic (`Juicer/Juicebox`) and .(m)cool (`cooler/Higlass`) formats. -The .cool and .hic format are compressed and indexed and usually much more efficient than the txt format. +The .cool and .hic format are compressed and indexed and usually much more efficient than the txt format. In the current workflow, we propose to use the `cooler` format as a standard to build the raw and normalised maps after valid pairs detection as it is used by several downstream analysis and visualisation tools. @@ -296,6 +295,7 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`. diff --git a/docs/usage.md b/docs/usage.md index 4ad48da..50fe02e 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -18,7 +18,7 @@ You will need to create a samplesheet with information about the samples you wou The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. Below is an example for the same sample sequenced across 3 lanes: -```console +```csv title="samplesheet.csv" sample,fastq_1,fastq_2 CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz @@ -29,7 +29,7 @@ CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz The `nf-core-hic` pipeline is designed to work only with paired-end data. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. -```console +```csv title="samplesheet.csv" sample,fastq_1,fastq_2 SAMPLE_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz SAMPLE_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz @@ -49,7 +49,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/hic --input samplesheet.csv --outdir --genome GRCh37 -profile docker +nextflow run nf-core/hic --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker ``` This will launch the pipeline with the `docker` configuration profile. @@ -68,8 +68,11 @@ If you wish to repeatedly use the same parameters for multiple runs, rather than Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. -> ⚠️ Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). -> The above pipeline run specified with a params file in yaml format: +:::warning +Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +::: + +The above pipeline run specified with a params file in yaml format: ```bash nextflow run nf-core/hic -profile docker -params-file params.yaml @@ -81,7 +84,6 @@ with `params.yaml` containing: input: './samplesheet.csv' outdir: './results/' genome: 'GRCh37' -input: 'data' <...> ``` @@ -105,12 +107,15 @@ This version number will be logged in reports when you run the pipeline, so that To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. -> 💡 If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +:::tip +If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +::: ## Core Nextflow arguments -> **NB:** These options are part of Nextflow and use a _single_ hyphen -> (pipeline parameters use a double-hyphen). +:::note +These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +::: ### `-profile` @@ -119,8 +124,9 @@ configuration presets for different compute environments. Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. -> We highly recommend the use of Docker or Singularity containers for full -> pipeline reproducibility, however when this is not possible, Conda is also supported. +:::info +We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +::: The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) @@ -152,6 +158,8 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) - `apptainer` - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `wave` + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). - `conda` - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. @@ -243,15 +251,15 @@ Note that by default, no filters are applied on DNA and restriction fragment siz nextflow run main.nf --input './*_R{1,2}.fastq.gz' --genome 'mm10' --digestion 'dnpii' ``` -### DNase Hi-C protocol +### DNase Hi-C / Micro-C protocol -Here is an command line example for DNase protocol. +Here is an command line example for DNase or Micro-C protocol. Alignment will be performed on the `mm10` genome with default paramters. Multi-hits will not be considered and duplicates will be removed. Contacts involving fragments separated by less than 1000bp will be discarded. ```bash -nextflow run main.nf --input './*_R{1,2}.fastq.gz' --genome 'mm10' --dnase --min_cis 1000 +nextflow run main.nf --input './*_R{1,2}.fastq.gz' --genome 'mm10' --no_digestion --min_cis 1000 ``` ## Inputs @@ -448,9 +456,9 @@ Default: 'AAGCTAGCTT' Exemple of the ARIMA kit: GATCGATC,GANTGATC,GANTANTC,GATCANTC -### DNAse Hi-C +### DNAse/Micro-C -#### `--dnase` +#### `--no_digestion` In DNAse Hi-C mode, all options related to digestion Hi-C (see previous section) are ignored. @@ -458,7 +466,7 @@ In this case, it is highly recommended to use the `--min_cis_dist` parameter to remove spurious ligation products. ```bash ---dnase +--no_digestion ``` ### HiC-pro processing @@ -502,7 +510,7 @@ Default: '0' - no filter #### `--min_cis_dist` Filter short range contact below the specified distance. -Mainly useful for DNase Hi-C. Default: '0' +Mainly useful for DNAse/Micro-C. Default: '0' ```bash --min_cis_dist '[numeric]' @@ -615,7 +623,12 @@ Several resolutions can be specified (comma-separeted). Default: '250000' ### Compartment calling -Call open/close compartments for each chromosome, using the `cooltools` command. +Call open/close compartments for each chromosome, using the `cooltools` or `calder2` tools. + +#### `--compartments_caller` + +Compartments calling can be performed with either `cooltools` +or `calder2`. Multiple choices are possible (comma-separated). #### `--res_compartments` diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy deleted file mode 100755 index 9b34804..0000000 --- a/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,530 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import nextflow.Nextflow -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-apptainer', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - Nextflow.error('Exiting!') - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy deleted file mode 100755 index 25a0a74..0000000 --- a/lib/NfcoreTemplate.groovy +++ /dev/null @@ -1,336 +0,0 @@ -// -// This file holds several functions used within the nf-core pipeline template. -// - -import org.yaml.snakeyaml.Yaml - -class NfcoreTemplate { - - // - // Check AWS Batch related parameters have been specified correctly - // - public static void awsBatch(workflow, params) { - if (workflow.profile.contains('awsbatch')) { - // Check params.awsqueue and params.awsregion have been set if running on AWSBatch - assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - } - } - - // - // Warn if a -profile or Nextflow config has not been provided to run the pipeline - // - public static void checkConfigProvided(workflow, log) { - if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " - } - } - - // - // Generate version string - // - public static String version(workflow) { - String version_string = "" - - if (workflow.manifest.version) { - def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' - version_string += "${prefix_v}${workflow.manifest.version}" - } - - if (workflow.commitId) { - def git_shortsha = workflow.commitId.substring(0, 7) - version_string += "-g${git_shortsha}" - } - - return version_string - } - - // - // Construct and send completion email - // - public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { - - // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = NfcoreTemplate.version(workflow) - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - } - - // - // Construct and send a notification to a web server as JSON - // e.g. Microsoft Teams and Slack - // - public static void IM_notification(workflow, params, summary_params, projectDir, log) { - def hook_url = params.hook_url - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId - if (workflow.repository) misc_fields['repository'] = workflow.repository - if (workflow.commitId) misc_fields['commitid'] = workflow.commitId - if (workflow.revision) misc_fields['revision'] = workflow.revision - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp - - def msg_fields = [:] - msg_fields['version'] = NfcoreTemplate.version(workflow) - msg_fields['runName'] = workflow.runName - msg_fields['success'] = workflow.success - msg_fields['dateComplete'] = workflow.complete - msg_fields['duration'] = workflow.duration - msg_fields['exitStatus'] = workflow.exitStatus - msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") - msg_fields['projectDir'] = workflow.projectDir - msg_fields['summary'] = summary << misc_fields - - // Render the JSON template - def engine = new groovy.text.GStringTemplateEngine() - // Different JSON depending on the service provider - // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format - def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" - def hf = new File("$projectDir/assets/${json_path}") - def json_template = engine.createTemplate(hf).make(msg_fields) - def json_message = json_template.toString() - - // POST - def post = new URL(hook_url).openConnection(); - post.setRequestMethod("POST") - post.setDoOutput(true) - post.setRequestProperty("Content-Type", "application/json") - post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); - if (! postRC.equals(200)) { - log.warn(post.getErrorStream().getText()); - } - } - - // - // Print pipeline summary on completion - // - public static void summary(workflow, params, log) { - Map colors = logColours(params.monochrome_logs) - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } - - // - // ANSII Colours used for terminal logging - // - public static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - // - // Does what is says on the tin - // - public static String dashedLine(monochrome_logs) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - // - // nf-core logo - // - public static String logo(workflow, monochrome_logs) { - Map colors = logColours(monochrome_logs) - String workflow_version = NfcoreTemplate.version(workflow) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/Utils.groovy b/lib/Utils.groovy deleted file mode 100644 index 8d030f4..0000000 --- a/lib/Utils.groovy +++ /dev/null @@ -1,47 +0,0 @@ -// -// This file holds several Groovy functions that could be useful for any Nextflow pipeline -// - -import org.yaml.snakeyaml.Yaml - -class Utils { - - // - // When running with -profile conda, warn if channels have not been set-up appropriately - // - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] - def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean - - // Check that they are in the right order - def channel_priority_violation = false - def n = required_channels_in_order.size() - for (int i = 0; i < n - 1; i++) { - channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) - } - - if (channels_missing | channel_priority_violation) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/\n" + - " The observed channel order is \n" + - " ${channels}\n" + - " but the following channel order is required:\n" + - " ${required_channels_in_order}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - } -} diff --git a/lib/WorkflowHic.groovy b/lib/WorkflowHic.groovy deleted file mode 100755 index f14c26c..0000000 --- a/lib/WorkflowHic.groovy +++ /dev/null @@ -1,85 +0,0 @@ -// -// This file holds several functions specific to the workflow/hic.nf in the nf-core/hic pipeline -// - -import nextflow.Nextflow -import groovy.text.SimpleTemplateEngine - -class WorkflowHic { - - // - // Check and validate parameters - // - public static void initialise(params, log) { - genomeExistsError(params, log) - - // digestion parameters - if (params.digest && params.digestion && !params.digest.containsKey(params.digestion)) { - Nextflow.error "Unknown digestion protocol. Currently, the available digestion options are ${params.digest.keySet().join(", ")}. Please set manually the '--restriction_site' and '--ligation_site' parameters." - } - - // Check Digestion or DNase Hi-C mode - //if (!params.dnase && !params.ligation_site) { - // Nextflow.error "Ligation motif not found. Please either use the `--digestion` parameters or specify the `--restriction_site` and `--ligation_site`. For DNase Hi-C, please use '--dnase' option" - //} - - } - - // - // Get workflow summary for MultiQC - // - public static String paramsSummaryMultiqc(workflow, summary) { - String summary_section = '' - for (group in summary.keySet()) { - def group_params = summary.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

$group

\n" - summary_section += "
\n" - for (param in group_params.keySet()) { - summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" - } - summary_section += "
\n" - } - } - - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" - return yaml_file_text - } - - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { - // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file - def meta = [:] - meta.workflow = run_workflow.toMap() - meta["manifest_map"] = run_workflow.manifest.toMap() - - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " - - def methods_text = mqc_methods_yaml.text - - def engine = new SimpleTemplateEngine() - def description_html = engine.createTemplate(methods_text).make(meta) - - return description_html - } - - // - // Exit pipeline if incorrect --genome key provided - // - private static void genomeExistsError(params, log) { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - Nextflow.error(error_string) - } - } -} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy deleted file mode 100755 index e4bce38..0000000 --- a/lib/WorkflowMain.groovy +++ /dev/null @@ -1,99 +0,0 @@ -// -// This file holds several functions specific to the main.nf workflow in the nf-core/hic pipeline -// - -import nextflow.Nextflow - -class WorkflowMain { - - // - // Citation string for pipeline - // - public static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - "* The pipeline\n" + - " https://doi.org/10.5281/zenodo.2669513\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" - } - - // - // Generate help string - // - public static String help(workflow, params) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Generate parameter summary log string - // - public static String paramsSummaryLog(workflow, params) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } - - // - // Validate parameters and print summary to screen - // - public static void initialise(workflow, params, log) { - // Print help to screen if required - if (params.help) { - log.info help(workflow, params) - System.exit(0) - } - - // Print workflow version and exit on --version - if (params.version) { - String workflow_version = NfcoreTemplate.version(workflow) - log.info "${workflow.manifest.name} ${workflow_version}" - System.exit(0) - } - - // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params) - - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) - } - - // Check that a -profile or Nextflow config has been provided to run the pipeline - NfcoreTemplate.checkConfigProvided(workflow, log) - - // Check that conda channels are set-up correctly - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - Utils.checkCondaChannels(log) - } - - // Check AWS batch settings - NfcoreTemplate.awsBatch(workflow, params) - - // Check input has been provided - if (!params.input) { - Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") - } - } - // - // Get attribute from genome config file e.g. fasta - // - public static Object getGenomeAttribute(params, attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null - } -} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar deleted file mode 100644 index 805c8bb..0000000 Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ diff --git a/main.nf b/main.nf index d87bb41..556e259 100644 --- a/main.nf +++ b/main.nf @@ -13,49 +13,114 @@ nextflow.enable.dsl = 2 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - GENOME PARAMETER VALUES + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') -params.bwt2_index = WorkflowMain.getGenomeAttribute(params, 'bowtie2') -params.bwa_index = WorkflowMain.getGenomeAttribute(params, 'bwamem2') +include { HIC } from './workflows/hic' +include { PREPARE_GENOME } from './subworkflows/local/prepare_genome' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_hic_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_hic_pipeline' +include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_hic_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY + GENOME PARAMETER VALUES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -WorkflowMain.initialise(workflow, params, log) +params.fasta = getGenomeAttribute('fasta') +params.bwt2_index = getGenomeAttribute('bowtie2') +params.bwa_index = getGenomeAttribute('bwa') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOW FOR PIPELINE + NAMED WORKFLOWS FOR PIPELINE ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { HIC } from './workflows/hic' - // -// WORKFLOW: Run main nf-core/hic analysis pipeline +// WORKFLOW: Run main analysis pipeline depending on type of input // workflow NFCORE_HIC { - HIC () -} + take: + samplesheet // channel: samplesheet read in from --input + + main: + + ch_versions = Channel.empty() + + // + // SUBWORKFLOW: prepare genome annotation + // + PREPARE_GENOME( + params.fasta, + params.bwt2_index, + params.bwa_index + ) + ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) + + // + // WORKFLOW: Run pipeline + // + HIC ( + samplesheet, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.index, + PREPARE_GENOME.out.chromosome_size, + PREPARE_GENOME.out.res_frag, + PREPARE_GENOME.out.restriction_site, + PREPARE_GENOME.out.ligation_site + ) + ch_versions = ch_versions.mix(HIC.out.versions) + + emit: + multiqc_report = HIC.out.multiqc_report // channel: /path/to/multiqc_report.html + versions = ch_versions +} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN ALL WORKFLOWS + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// WORKFLOW: Execute a single named workflow for the pipeline -// See: https://github.com/nf-core/rnaseq/issues/619 -// workflow { - NFCORE_HIC () + + main: + + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.help, + params.validate_params, + params.monochrome_logs, + args, + params.outdir, + params.input + ) + + // + // WORKFLOW: Run main workflow + // + NFCORE_HIC ( + PIPELINE_INITIALISATION.out.samplesheet + ) + + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION ( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url, + NFCORE_HIC.out.multiqc_report + ) } /* diff --git a/modules.json b/modules.json index 4eb6814..ac76f9f 100644 --- a/modules.json +++ b/modules.json @@ -7,149 +7,145 @@ "nf-core": { "bowtie2/align": { "branch": "master", - "git_sha": "fe54581f8bed20e4c4a51c616c93fd3379d89820", - "installed_by": [ - "modules" - ] + "git_sha": "e4bad511789f16d0df39ee306b2cd50418365048", + "installed_by": ["modules"] }, "bowtie2/build": { "branch": "master", - "git_sha": "6a24fbe314bb2e6fe6306c29a63076ea87e8eb3c", - "installed_by": [ - "modules" - ] + "git_sha": "1fea64f5132a813ec97c1c6d3a74e0aee7142b6d", + "installed_by": ["modules"] }, "bwa/index": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "git_sha": "e0ff65e1fb313677de09f5f477ae3da30ce19b7b", + "installed_by": ["modules"] }, "bwa/mem": { "branch": "master", - "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", - "installed_by": [ - "modules" - ] + "git_sha": "e0ff65e1fb313677de09f5f477ae3da30ce19b7b", + "installed_by": ["modules"] + }, + "calder2": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] }, "cooler/balance": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] }, "cooler/cload": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] }, "cooler/dump": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] }, "cooler/makebins": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] }, "cooler/zoomify": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] - }, - "custom/dumpsoftwareversions": { - "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] }, "custom/getchromsizes": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "git_sha": "1ceaa8ba4d0fd886dbca0e545815d905b7407de7", + "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", + "installed_by": ["modules"] + }, + "multiqc": { + "branch": "master", + "git_sha": "b80f5fd12ff7c43938f424dd76392a2704fa2396", + "installed_by": ["modules"] }, "pairix": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] }, "pairtools/dedup": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "git_sha": "4d3743f71f43cc40505dee2bc0747dca2df5f69a", + "installed_by": ["modules"] + }, + "pairtools/merge": { + "branch": "master", + "git_sha": "4d3743f71f43cc40505dee2bc0747dca2df5f69a", + "installed_by": ["modules"] }, "pairtools/parse": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "git_sha": "4d3743f71f43cc40505dee2bc0747dca2df5f69a", + "installed_by": ["modules"] }, "pairtools/restrict": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "git_sha": "4d3743f71f43cc40505dee2bc0747dca2df5f69a", + "installed_by": ["modules"] }, "pairtools/select": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "git_sha": "4d3743f71f43cc40505dee2bc0747dca2df5f69a", + "installed_by": ["modules"] }, "pairtools/sort": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "git_sha": "4d3743f71f43cc40505dee2bc0747dca2df5f69a", + "installed_by": ["modules"] + }, + "pairtools/stats": { + "branch": "master", + "git_sha": "4d3743f71f43cc40505dee2bc0747dca2df5f69a", + "installed_by": ["modules"] + }, + "samtools/flagstat": { + "branch": "master", + "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", + "installed_by": ["modules"] }, "samtools/index": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", + "installed_by": ["modules"] }, "samtools/sort": { "branch": "master", - "git_sha": "a0f7be95788366c1923171e358da7d049eb440f9", - "installed_by": [ - "modules" - ] + "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", + "installed_by": ["modules"] } } }, "subworkflows": { - "nf-core": {} + "nf-core": { + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", + "installed_by": ["subworkflows"] + }, + "utils_nfvalidation_plugin": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + } + } } } } -} \ No newline at end of file +} diff --git a/modules/local/cooltools/eigscis.nf b/modules/local/cooltools/eigscis.nf index 873bd24..dfc2148 100644 --- a/modules/local/cooltools/eigscis.nf +++ b/modules/local/cooltools/eigscis.nf @@ -26,11 +26,11 @@ process COOLTOOLS_EIGSCIS { """ cooltools genome binnify --all-names ${chrsize} ${resolution} > genome_bins.txt cooltools genome gc genome_bins.txt ${fasta} > genome_gc.txt - cooltools eigs-cis ${args} -o ${prefix}_compartments ${cool} + cooltools eigs-cis ${args} --phasing-track genome_gc.txt -o ${prefix}_compartments ${cool} cat <<-END_VERSIONS > versions.yml "${task.process}": - cooltools: \$(cooltools --version 2>&1 | grep version | sed 's/cooltools, version //') + cooltools: \$(cooltools --version | grep 'cooltools, version ' | sed 's/cooltools, version //') END_VERSIONS """ } diff --git a/modules/local/cooltools/insulation.nf b/modules/local/cooltools/insulation.nf index 77a9472..50a9a2b 100644 --- a/modules/local/cooltools/insulation.nf +++ b/modules/local/cooltools/insulation.nf @@ -26,7 +26,7 @@ process COOLTOOLS_INSULATION { cat <<-END_VERSIONS > versions.yml "${task.process}": - cooltools: \$(cooltools --version 2>&1 | grep version | sed 's/cooltools, version //') + cooltools: \$(cooltools --version | grep 'cooltools, version ' | sed 's/cooltools, version //') END_VERSIONS """ } diff --git a/modules/local/hicexplorer/hicFindTADs.nf b/modules/local/hicexplorer/hicFindTADs.nf index d86dc83..6f62a04 100644 --- a/modules/local/hicexplorer/hicFindTADs.nf +++ b/modules/local/hicexplorer/hicFindTADs.nf @@ -4,6 +4,7 @@ process HIC_FIND_TADS { label 'process_medium' + tag "$meta.id" conda "bioconda::hicexplorer=3.7.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/hicpro/dnase_mapping_stats.nf b/modules/local/hicpro/dnase_mapping_stats.nf index 8e85113..7b5abb3 100644 --- a/modules/local/hicpro/dnase_mapping_stats.nf +++ b/modules/local/hicpro/dnase_mapping_stats.nf @@ -14,6 +14,7 @@ process MAPPING_STATS_DNASE { output: tuple val(meta), path(bam), emit:bam tuple val(meta), path("${prefix}.mapstat"), emit:stats + path("versions.yml"), emit: versions script: prefix = meta.id + "_" + meta.chunk + "_" + meta.mates @@ -27,5 +28,10 @@ process MAPPING_STATS_DNASE { echo -n "global_${tag}\t" >> ${prefix}.mapstat samtools view -c -F 4 ${bam} >> ${prefix}.mapstat echo -n "local_${tag}\t0" >> ${prefix}.mapstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS """ } diff --git a/modules/local/multiqc.nf b/modules/local/multiqc.nf deleted file mode 100644 index 453513f..0000000 --- a/modules/local/multiqc.nf +++ /dev/null @@ -1,35 +0,0 @@ -process MULTIQC { - label 'process_medium' - - conda "bioconda::multiqc=1.14" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" - - input: - path multiqc_config - path (mqc_custom_config) - path workflow_summary - path ('fastqc/*') - path ('input_*/*') - - output: - path "*multiqc_report.html", emit: report - path "*_data" , emit: data - path "*_plots" , optional:true, emit: plots - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - multiqc -f $args . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS - """ -} diff --git a/modules/local/pairtools/pairtools_merge.nf b/modules/local/pairtools/pairtools_merge.nf index eefe134..663ef9e 100644 --- a/modules/local/pairtools/pairtools_merge.nf +++ b/modules/local/pairtools/pairtools_merge.nf @@ -27,10 +27,10 @@ process PAIRTOOLS_MERGE { def prefix = task.ext.prefix ?: "${meta.id}_merged" """ pairtools merge \ - ${args} \ - --nproc ${task.cpus} \ - -o ${prefix}.pairs.gz \ - ${allpairs} + ${args} \ + --nproc ${task.cpus} \ + -o ${prefix}.pairs.gz \ + ${allpairs} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/pairtools/parse/main.nf~ b/modules/local/pairtools/pairtools_parse.nf similarity index 84% rename from modules/nf-core/pairtools/parse/main.nf~ rename to modules/local/pairtools/pairtools_parse.nf index 354e4b3..d08fd57 100644 --- a/modules/nf-core/pairtools/parse/main.nf~ +++ b/modules/local/pairtools/pairtools_parse.nf @@ -4,14 +4,14 @@ process PAIRTOOLS_PARSE { // Pinning numpy to 1.23 until https://github.com/open2c/pairtools/issues/170 is resolved // Not an issue with the biocontainers because they were built prior to numpy 1.24 - conda "bioconda::pairtools=1.0.2 conda-forge::numpy=1.23" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pairtools:1.0.2--py39h2a9f597_0' : 'biocontainers/pairtools:1.0.2--py39h2a9f597_0' }" input: tuple val(meta), path(bam) - path chromsizes + tuple val(meta2), path(chromsizes) output: tuple val(meta), path("*.pairsam.gz") , emit: pairsam @@ -24,11 +24,14 @@ process PAIRTOOLS_PARSE { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def assembly = meta2.id ? "--assembly ${meta2.id}" : "" """ pairtools \\ parse \\ -c $chromsizes \\ $args \\ + --nproc-in ${task.cpus} --nproc-out ${task.cpus} \\ + $assembly \\ --output-stats ${prefix}.pairsam.stat \\ -o ${prefix}.pairsam.gz \\ $bam diff --git a/modules/local/pairtools/pairtools_split.nf b/modules/local/pairtools/pairtools_split.nf index 672330c..c167b7d 100644 --- a/modules/local/pairtools/pairtools_split.nf +++ b/modules/local/pairtools/pairtools_split.nf @@ -28,10 +28,10 @@ process PAIRTOOLS_SPLIT { def prefix = task.ext.prefix ?: "${meta.id}" """ pairtools split \ - --nproc-in ${task.cpus} --nproc-out ${task.cpus} \ - --output-pairs ${prefix}.split.pairs.gz \ - ${args} \ - ${pairs} + --nproc-in ${task.cpus} --nproc-out ${task.cpus} \ + --output-pairs ${prefix}.split.pairs.gz \ + ${args} \ + ${pairs} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/pairtools/pairtools_stats.nf b/modules/local/pairtools/pairtools_stats.nf index 4bd65a3..2da0c7e 100644 --- a/modules/local/pairtools/pairtools_stats.nf +++ b/modules/local/pairtools/pairtools_stats.nf @@ -26,10 +26,10 @@ process PAIRTOOLS_STATS { def prefix = task.ext.prefix ?: "${meta.id}_stats" """ pairtools stats \ - ${args} \ - --nproc-in ${task.cpus} --nproc-out ${task.cpus} \ - -o ${prefix}.txt \ - ${pairs} + ${args} \ + --nproc-in ${task.cpus} --nproc-out ${task.cpus} \ + -o ${prefix}.txt \ + ${pairs} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf deleted file mode 100644 index 096c80b..0000000 --- a/modules/local/samplesheet_check.nf +++ /dev/null @@ -1,31 +0,0 @@ -process SAMPLESHEET_CHECK { - tag "$samplesheet" - label 'process_single' - - conda "conda-forge::python=3.8.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'biocontainers/python:3.8.3' }" - - input: - path samplesheet - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: // This script is bundled with the pipeline, in nf-core/hic/bin/ - """ - check_samplesheet.py \\ - $samplesheet \\ - samplesheet.valid.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/modules/nf-core/bowtie2/align/environment.yml b/modules/nf-core/bowtie2/align/environment.yml new file mode 100644 index 0000000..d279635 --- /dev/null +++ b/modules/nf-core/bowtie2/align/environment.yml @@ -0,0 +1,9 @@ +name: bowtie2_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bowtie2=2.5.2 + - bioconda::samtools=1.18 + - conda-forge::pigz=2.6 diff --git a/modules/nf-core/bowtie2/align/main.nf b/modules/nf-core/bowtie2/align/main.nf index a77114d..809525a 100644 --- a/modules/nf-core/bowtie2/align/main.nf +++ b/modules/nf-core/bowtie2/align/main.nf @@ -1,22 +1,27 @@ process BOWTIE2_ALIGN { tag "$meta.id" - label "process_high" + label 'process_high' - conda "bioconda::bowtie2=2.4.4 bioconda::samtools=1.16.1 conda-forge::pigz=2.6" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' : - 'biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:f70b31a2db15c023d641c32f433fb02cd04df5a6-0' : + 'biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:f70b31a2db15c023d641c32f433fb02cd04df5a6-0' }" input: tuple val(meta) , path(reads) tuple val(meta2), path(index) + tuple val(meta3), path(fasta) val save_unaligned val sort_bam output: - tuple val(meta), path("*.{bam,sam}"), emit: aligned + tuple val(meta), path("*.sam") , emit: sam , optional:true + tuple val(meta), path("*.bam") , emit: bam , optional:true + tuple val(meta), path("*.cram") , emit: cram , optional:true + tuple val(meta), path("*.csi") , emit: csi , optional:true + tuple val(meta), path("*.crai") , emit: crai , optional:true tuple val(meta), path("*.log") , emit: log - tuple val(meta), path("*fastq.gz") , emit: fastq, optional:true + tuple val(meta), path("*fastq.gz") , emit: fastq , optional:true path "versions.yml" , emit: versions when: @@ -39,7 +44,10 @@ process BOWTIE2_ALIGN { def samtools_command = sort_bam ? 'sort' : 'view' def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ - def extension = (args2 ==~ extension_pattern) ? (args2 =~ extension_pattern)[0][2].toLowerCase() : "bam" + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" """ INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` @@ -52,8 +60,8 @@ process BOWTIE2_ALIGN { --threads $task.cpus \\ $unaligned \\ $args \\ - 2> ${prefix}.bowtie2.log \\ - | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.${extension} - + 2> >(tee ${prefix}.bowtie2.log >&2) \\ + | samtools $samtools_command $args2 --threads $task.cpus ${reference} -o ${prefix}.${extension} - if [ -f ${prefix}.unmapped.fastq.1.gz ]; then mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz @@ -76,12 +84,27 @@ process BOWTIE2_ALIGN { def prefix = task.ext.prefix ?: "${meta.id}" def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ def extension = (args2 ==~ extension_pattern) ? (args2 =~ extension_pattern)[0][2].toLowerCase() : "bam" + def create_unmapped = "" + if (meta.single_end) { + create_unmapped = save_unaligned ? "touch ${prefix}.unmapped.fastq.gz" : "" + } else { + create_unmapped = save_unaligned ? "touch ${prefix}.unmapped_1.fastq.gz && touch ${prefix}.unmapped_2.fastq.gz" : "" + } + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + def create_index = "" + if (extension == "cram") { + create_index = "touch ${prefix}.crai" + } else if (extension == "bam") { + create_index = "touch ${prefix}.csi" + } """ touch ${prefix}.${extension} + ${create_index} touch ${prefix}.bowtie2.log - touch ${prefix}.unmapped_1.fastq.gz - touch ${prefix}.unmapped_2.fastq.gz + ${create_unmapped} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/bowtie2/align/meta.yml b/modules/nf-core/bowtie2/align/meta.yml index 60d04c1..38610e0 100644 --- a/modules/nf-core/bowtie2/align/meta.yml +++ b/modules/nf-core/bowtie2/align/meta.yml @@ -36,6 +36,15 @@ input: type: file description: Bowtie2 genome index files pattern: "*.ebwt" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Bowtie2 genome fasta file + pattern: "*.fasta" - save_unaligned: type: boolean description: | @@ -46,22 +55,41 @@ input: description: use samtools sort (true) or samtools view (false) pattern: "true or false" output: - - aligned: + - sam: type: file - description: Output BAM/SAM file containing read alignments - pattern: "*.{bam,sam}" - - versions: + description: Output SAM file containing read alignments + pattern: "*.sam" + - bam: type: file - description: File containing software versions - pattern: "versions.yml" - - fastq: + description: Output BAM file containing read alignments + pattern: "*.bam" + - cram: type: file - description: Unaligned FastQ files - pattern: "*.fastq.gz" + description: Output CRAM file containing read alignments + pattern: "*.cram" + - csi: + type: file + description: Output SAM/BAM index for large inputs + pattern: "*.csi" + - crai: + type: file + description: Output CRAM index + pattern: "*.crai" - log: type: file description: Aligment log pattern: "*.log" + - fastq: + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@joseespinosa" - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bowtie2/align/tests/cram_crai.config b/modules/nf-core/bowtie2/align/tests/cram_crai.config new file mode 100644 index 0000000..03f1d5e --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/cram_crai.config @@ -0,0 +1,5 @@ +process { + withName: BOWTIE2_ALIGN { + ext.args2 = '--output-fmt cram --write-index' + } +} diff --git a/modules/nf-core/bowtie2/align/tests/large_index.config b/modules/nf-core/bowtie2/align/tests/large_index.config new file mode 100644 index 0000000..fdc1c59 --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/large_index.config @@ -0,0 +1,5 @@ +process { + withName: BOWTIE2_BUILD { + ext.args = '--large-index' + } +} \ No newline at end of file diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test b/modules/nf-core/bowtie2/align/tests/main.nf.test new file mode 100644 index 0000000..03aeaf9 --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test @@ -0,0 +1,623 @@ +nextflow_process { + + name "Test Process BOWTIE2_ALIGN" + script "../main.nf" + process "BOWTIE2_ALIGN" + tag "modules" + tag "modules_nfcore" + tag "bowtie2" + tag "bowtie2/build" + tag "bowtie2/align" + + test("sarscov2 - fastq, index, fasta, false, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, false - sam") { + + config "./sam.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.sam[0][1]).readLines()[0..4], + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, false - sam2") { + + config "./sam2.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.sam[0][1]).readLines()[0..4], + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, true - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, true - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, large_index, fasta, false, false - bam") { + + config "./large_index.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam") { + + config "./large_index.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true, true - cram") { + + config "./cram_crai.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = true //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.cram[0][1]).name, + file(process.out.crai[0][1]).name + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - stub") { + + options "-stub" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + file(process.out.log[0][1]).name, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true, false - stub") { + + options "-stub" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + file(process.out.log[0][1]).name, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test.snap b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap new file mode 100644 index 0000000..028e7da --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap @@ -0,0 +1,311 @@ +{ + "sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam": { + "content": [ + "test.bam", + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bowtie2.log:md5,bd89ce1b28c93bf822bae391ffcedd19" + ] + ], + [ + + ], + [ + "versions.yml:md5,01d18ab035146ea790e9a0f70adb758f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T13:19:25.337323" + }, + "sarscov2 - fastq, index, fasta, false, false - sam2": { + "content": [ + [ + "ERR5069949.2151832\t16\tMT192765.1\t17453\t42\t150M\t*\t0\t0\tACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGA\tAAAA versions.yml @@ -34,14 +35,15 @@ process BWA_INDEX { """ stub: + def prefix = task.ext.prefix ?: "${fasta.baseName}" """ mkdir bwa - touch bwa/genome.amb - touch bwa/genome.ann - touch bwa/genome.bwt - touch bwa/genome.pac - touch bwa/genome.sa + touch bwa/${prefix}.amb + touch bwa/${prefix}.ann + touch bwa/${prefix}.bwt + touch bwa/${prefix}.pac + touch bwa/${prefix}.sa cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/bwa/index/meta.yml b/modules/nf-core/bwa/index/meta.yml index 2c6cfcd..6bbc87a 100644 --- a/modules/nf-core/bwa/index/meta.yml +++ b/modules/nf-core/bwa/index/meta.yml @@ -11,7 +11,7 @@ tools: BWA is a software package for mapping DNA sequences against a large reference genome, such as the human genome. homepage: http://bio-bwa.sourceforge.net/ - documentation: http://www.htslib.org/doc/samtools.html + documentation: https://bio-bwa.sourceforge.net/bwa.shtml arxiv: arXiv:1303.3997 licence: ["GPL-3.0-or-later"] input: @@ -40,3 +40,7 @@ output: authors: - "@drpatelh" - "@maxulysse" +maintainers: + - "@drpatelh" + - "@maxulysse" + - "@gallvp" diff --git a/modules/nf-core/bwa/index/tests/main.nf.test b/modules/nf-core/bwa/index/tests/main.nf.test new file mode 100644 index 0000000..af33e73 --- /dev/null +++ b/modules/nf-core/bwa/index/tests/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process BWA_INDEX" + tag "modules_nfcore" + tag "modules" + tag "bwa" + tag "bwa/index" + script "../main.nf" + process "BWA_INDEX" + + test("BWA index") { + + when { + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bwa/index/tests/main.nf.test.snap b/modules/nf-core/bwa/index/tests/main.nf.test.snap new file mode 100644 index 0000000..7c8f046 --- /dev/null +++ b/modules/nf-core/bwa/index/tests/main.nf.test.snap @@ -0,0 +1,47 @@ +{ + "BWA index": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da", + "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66", + "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1" + ] + ] + ], + "1": [ + "versions.yml:md5,a64462ac7dfb21f4ade9b02e7f65c5bb" + ], + "index": [ + [ + { + "id": "test" + }, + [ + "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da", + "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66", + "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1" + ] + ] + ], + "versions": [ + "versions.yml:md5,a64462ac7dfb21f4ade9b02e7f65c5bb" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-16T11:40:09.925307" + } +} \ No newline at end of file diff --git a/modules/nf-core/bwa/index/tests/tags.yml b/modules/nf-core/bwa/index/tests/tags.yml new file mode 100644 index 0000000..28bb483 --- /dev/null +++ b/modules/nf-core/bwa/index/tests/tags.yml @@ -0,0 +1,2 @@ +bwa/index: + - modules/nf-core/bwa/index/** diff --git a/modules/nf-core/bwa/mem/environment.yml b/modules/nf-core/bwa/mem/environment.yml new file mode 100644 index 0000000..3aa9f0c --- /dev/null +++ b/modules/nf-core/bwa/mem/environment.yml @@ -0,0 +1,10 @@ +name: bwa_mem +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bwa=0.7.18 + # renovate: datasource=conda depName=bioconda/samtools + - samtools=1.20 + - htslib=1.20.0 diff --git a/modules/nf-core/bwa/mem/main.nf b/modules/nf-core/bwa/mem/main.nf index d2f85da..9c815f0 100644 --- a/modules/nf-core/bwa/mem/main.nf +++ b/modules/nf-core/bwa/mem/main.nf @@ -2,19 +2,23 @@ process BWA_MEM { tag "$meta.id" label 'process_high' - conda "bioconda::bwa=0.7.17 bioconda::samtools=1.16.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' : - 'biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:1bd8542a8a0b42e0981337910954371d0230828e-0' : + 'biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:1bd8542a8a0b42e0981337910954371d0230828e-0' }" input: - tuple val(meta), path(reads) + tuple val(meta) , path(reads) tuple val(meta2), path(index) + tuple val(meta3), path(fasta) val sort_bam output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions + tuple val(meta), path("*.bam") , emit: bam, optional: true + tuple val(meta), path("*.cram") , emit: cram, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + tuple val(meta), path("*.crai") , emit: crai, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -24,6 +28,13 @@ process BWA_MEM { def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def samtools_command = sort_bam ? 'sort' : 'view' + def extension = args2.contains("--output-fmt sam") ? "sam" : + args2.contains("--output-fmt cram") ? "cram": + sort_bam && args2.contains("-O cram")? "cram": + !sort_bam && args2.contains("-C") ? "cram": + "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" """ INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` @@ -32,7 +43,29 @@ process BWA_MEM { -t $task.cpus \\ \$INDEX \\ $reads \\ - | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - + | samtools $samtools_command $args2 ${reference} --threads $task.cpus -o ${prefix}.${extension} - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def samtools_command = sort_bam ? 'sort' : 'view' + def extension = args2.contains("--output-fmt sam") ? "sam" : + args2.contains("--output-fmt cram") ? "cram": + sort_bam && args2.contains("-O cram")? "cram": + !sort_bam && args2.contains("-C") ? "cram": + "bam" + """ + touch ${prefix}.${extension} + touch ${prefix}.csi + touch ${prefix}.crai cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/bwa/mem/meta.yml b/modules/nf-core/bwa/mem/meta.yml index 62357bf..b126dd8 100644 --- a/modules/nf-core/bwa/mem/meta.yml +++ b/modules/nf-core/bwa/mem/meta.yml @@ -14,7 +14,7 @@ tools: BWA is a software package for mapping DNA sequences against a large reference genome, such as the human genome. homepage: http://bio-bwa.sourceforge.net/ - documentation: http://www.htslib.org/doc/samtools.html + documentation: https://bio-bwa.sourceforge.net/bwa.shtml arxiv: arXiv:1303.3997 licence: ["GPL-3.0-or-later"] input: @@ -37,6 +37,10 @@ input: type: file description: BWA genome index files pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" + - fasta: + type: file + description: Reference genome in FASTA format + pattern: "*.{fasta,fa}" - sort_bam: type: boolean description: use samtools sort (true) or samtools view (false) @@ -46,6 +50,18 @@ output: type: file description: Output BAM file containing read alignments pattern: "*.{bam}" + - cram: + type: file + description: Output CRAM file containing read alignments + pattern: "*.{cram}" + - csi: + type: file + description: Optional index file for BAM file + pattern: "*.{csi}" + - crai: + type: file + description: Optional index file for CRAM file + pattern: "*.{crai}" - versions: type: file description: File containing software versions @@ -53,3 +69,8 @@ output: authors: - "@drpatelh" - "@jeremy1805" + - "@matthdsm" +maintainers: + - "@drpatelh" + - "@jeremy1805" + - "@matthdsm" diff --git a/modules/nf-core/bwa/mem/tests/main.nf.test b/modules/nf-core/bwa/mem/tests/main.nf.test new file mode 100644 index 0000000..463b76f --- /dev/null +++ b/modules/nf-core/bwa/mem/tests/main.nf.test @@ -0,0 +1,341 @@ +nextflow_process { + + name "Test Process BWA_MEM" + tag "modules_nfcore" + tag "modules" + tag "bwa" + tag "bwa/mem" + tag "bwa/index" + script "../main.nf" + process "BWA_MEM" + + test("Single-End") { + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.cram, + process.out.csi, + process.out.crai, + process.out.versions, + file(process.out.bam[0][1]).name + ).match() + } + ) + } + + } + + test("Single-End Sort") { + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.cram, + process.out.csi, + process.out.crai, + process.out.versions, + file(process.out.bam[0][1]).name + ).match() + } + ) + } + + } + + test("Paired-End") { + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.cram, + process.out.csi, + process.out.crai, + process.out.versions, + file(process.out.bam[0][1]).name + ).match() + } + ) + } + + } + + test("Paired-End Sort") { + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.cram, + process.out.csi, + process.out.crai, + process.out.versions, + file(process.out.bam[0][1]).name + ).match() + } + ) + } + + } + + test("Paired-End - no fasta") { + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = [[:],[]] + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.cram, + process.out.csi, + process.out.crai, + process.out.versions, + file(process.out.bam[0][1]).name + ).match() + } + ) + } + + } + + test("Single-end - stub") { + options "-stub" + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } + ) + } + } + + test("Paired-end - stub") { + options "-stub" + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } + ) + } + } +} diff --git a/modules/nf-core/bwa/mem/tests/main.nf.test.snap b/modules/nf-core/bwa/mem/tests/main.nf.test.snap new file mode 100644 index 0000000..038ee7b --- /dev/null +++ b/modules/nf-core/bwa/mem/tests/main.nf.test.snap @@ -0,0 +1,140 @@ +{ + "Single-End": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,478b816fbd37871f5e8c617833d51d80" + ], + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-20T08:44:32.953673185" + }, + "Single-End Sort": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,478b816fbd37871f5e8c617833d51d80" + ], + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-20T08:44:45.27066093" + }, + "Paired-End": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,478b816fbd37871f5e8c617833d51d80" + ], + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-20T08:44:57.706852274" + }, + "Paired-End Sort": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,478b816fbd37871f5e8c617833d51d80" + ], + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-20T08:45:10.376505036" + }, + "Single-end - stub": { + "content": [ + "test.bam", + "test.csi", + [ + "versions.yml:md5,478b816fbd37871f5e8c617833d51d80" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-20T08:46:07.182072398" + }, + "Paired-End - no fasta": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,478b816fbd37871f5e8c617833d51d80" + ], + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-20T08:45:53.813076501" + }, + "Paired-end - stub": { + "content": [ + "test.bam", + "test.csi", + [ + "versions.yml:md5,478b816fbd37871f5e8c617833d51d80" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-20T08:46:18.412916364" + } +} \ No newline at end of file diff --git a/modules/nf-core/bwa/mem/tests/tags.yml b/modules/nf-core/bwa/mem/tests/tags.yml new file mode 100644 index 0000000..82992d1 --- /dev/null +++ b/modules/nf-core/bwa/mem/tests/tags.yml @@ -0,0 +1,3 @@ +bwa/mem: + - modules/nf-core/bwa/index/** + - modules/nf-core/bwa/mem/** diff --git a/modules/nf-core/calder2/environment.yml b/modules/nf-core/calder2/environment.yml new file mode 100644 index 0000000..e694b72 --- /dev/null +++ b/modules/nf-core/calder2/environment.yml @@ -0,0 +1,7 @@ +name: calder2 +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::r-calder2=0.3 diff --git a/modules/nf-core/calder2/main.nf b/modules/nf-core/calder2/main.nf new file mode 100644 index 0000000..cb77dfe --- /dev/null +++ b/modules/nf-core/calder2/main.nf @@ -0,0 +1,45 @@ +process CALDER2 { + tag '$meta.id' + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-calder2:0.3--r41hdfd78af_0' : + 'biocontainers/r-calder2:0.3--r41hdfd78af_0' }" + + + input: + tuple val(meta), path(cool) + val resolution + + output: + tuple val(meta), path("${meta.id}/") , emit: output_folder + tuple val(meta), path("${meta.id}/intermediate_data/") , emit: intermediate_data_folder , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = resolution ? "::/resolutions/$resolution" : "" + def cpus = task.cpus ?: 1 + def VERSION = '0.3' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + # getting binsize as mandatory input for calder + binsize="\$(cooler info --field bin-size $cool$suffix)" + + calder --input $cool$suffix \\ + --outpath ${prefix} \\ + --nproc $cpus \\ + --type cool \\ + --bin_size "\${binsize}" \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + calder: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/calder2/meta.yml b/modules/nf-core/calder2/meta.yml new file mode 100644 index 0000000..fef6a2c --- /dev/null +++ b/modules/nf-core/calder2/meta.yml @@ -0,0 +1,46 @@ +name: "calder2" +description: Hierarchical Hi-C compartment computation +keywords: + - calder2 + - genome + - topology + - compartments + - domains + - hi-c +tools: + - "calder2": + description: "Hierarchical Hi-C compartment computation" + homepage: "https://github.com/CSOgroup/CALDER2" + documentation: "https://github.com/CSOgroup/CALDER2" + tool_dev_url: "https://github.com/CSOgroup/CALDER2" + doi: "10.1038/s41467-021-22666-3" + licence: ["MIT"] +input: + - meta: + type: map + description: Groovy Map containing sample information. E.g. [ id:'test', single_end:false ] + - input: + type: file + description: Path to COOL file + pattern: "*.{cool.mcool}" + - resolution: + type: value + description: In case a .mcool file is provided, which resolution level to use for the analysis +output: + - meta: + type: map + description: Groovy Map containing sample information. E.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - output: + type: directory + description: Output folder containing sub-compartment (.tsv/.bed) and domain boundaries calls (.bed) + - intermediate_data: + type: directory + description: Output folder containing intermediate data produced during the computation +authors: + - "@lucananni93" +maintainers: + - "@lucananni93" diff --git a/modules/nf-core/cooler/balance/environment.yml b/modules/nf-core/cooler/balance/environment.yml new file mode 100644 index 0000000..b39304d --- /dev/null +++ b/modules/nf-core/cooler/balance/environment.yml @@ -0,0 +1,7 @@ +name: cooler_balance +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::cooler=0.9.2 diff --git a/modules/nf-core/cooler/balance/main.nf b/modules/nf-core/cooler/balance/main.nf index 6e131d5..8e0f393 100644 --- a/modules/nf-core/cooler/balance/main.nf +++ b/modules/nf-core/cooler/balance/main.nf @@ -2,10 +2,10 @@ process COOLER_BALANCE { tag "$meta.id" label 'process_high' - conda "bioconda::cooler=0.8.11" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0': - 'biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + 'https://depot.galaxyproject.org/singularity/cooler:0.9.2--pyh7cba7a3_0' : + 'biocontainers/cooler:0.9.2--pyh7cba7a3_0' }" input: tuple val(meta), path(cool), val(resolution) diff --git a/modules/nf-core/cooler/balance/meta.yml b/modules/nf-core/cooler/balance/meta.yml index af1a780..9b89063 100644 --- a/modules/nf-core/cooler/balance/meta.yml +++ b/modules/nf-core/cooler/balance/meta.yml @@ -1,7 +1,10 @@ name: "cooler_balance" description: Run matrix balancing on a cool file keywords: - - balance + - cooler/balance + - cooler + - cool + - cooler tools: - "cooler": description: Sparse binary format for genomic interaction matrices @@ -10,7 +13,6 @@ tools: tool_dev_url: https://github.com/open2c/cooler doi: "10.1093/bioinformatics/btz540" licence: ["BSD-3-Clause"] - input: - meta: type: map @@ -22,9 +24,8 @@ input: description: Path to COOL file pattern: "*.{cool,mcool}" - resolution: - type: value + type: integer description: Resolution - output: - meta: type: map @@ -39,7 +40,9 @@ output: type: file description: Output COOL file balancing weigths pattern: "*.cool" - authors: - "@nservant" - "@muffato" +maintainers: + - "@nservant" + - "@muffato" diff --git a/modules/nf-core/cooler/cload/environment.yml b/modules/nf-core/cooler/cload/environment.yml new file mode 100644 index 0000000..03abee7 --- /dev/null +++ b/modules/nf-core/cooler/cload/environment.yml @@ -0,0 +1,7 @@ +name: cooler_cload +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::cooler=0.9.2 diff --git a/modules/nf-core/cooler/cload/main.nf b/modules/nf-core/cooler/cload/main.nf index 80109d4..b170a5d 100644 --- a/modules/nf-core/cooler/cload/main.nf +++ b/modules/nf-core/cooler/cload/main.nf @@ -2,10 +2,10 @@ process COOLER_CLOAD { tag "$meta.id" label 'process_high' - conda "bioconda::cooler=0.8.11" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0' : - 'biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + 'https://depot.galaxyproject.org/singularity/cooler:0.9.2--pyh7cba7a3_0' : + 'biocontainers/cooler:0.9.2--pyh7cba7a3_0' }" input: tuple val(meta), path(pairs), path(index), val(cool_bin) @@ -36,4 +36,15 @@ process COOLER_CLOAD { cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.cool + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') + END_VERSIONS + """ } diff --git a/modules/nf-core/cooler/cload/meta.yml b/modules/nf-core/cooler/cload/meta.yml index 8513aae..fa5474a 100644 --- a/modules/nf-core/cooler/cload/meta.yml +++ b/modules/nf-core/cooler/cload/meta.yml @@ -2,6 +2,9 @@ name: cooler_cload description: Create a cooler from genomic pairs and bins keywords: - cool + - cooler + - cload + - hic tools: - cooler: description: Sparse binary format for genomic interaction matrices @@ -10,7 +13,6 @@ tools: tool_dev_url: https://github.com/open2c/cooler doi: "10.1093/bioinformatics/btz540" licence: ["BSD-3-clause"] - input: - meta: type: map @@ -24,12 +26,11 @@ input: type: file description: Path to index file of the contacts. - cool_bin: - type: value + type: integer description: Bins size in bp - chromsizes: type: file description: Path to a chromsizes file. - output: - meta: type: map @@ -45,9 +46,11 @@ output: description: Output COOL file path pattern: "*.cool" - cool_bin: - type: value + type: integer description: Bins size in bp - authors: - "@jianhong" - "@muffato" +maintainers: + - "@jianhong" + - "@muffato" diff --git a/modules/nf-core/cooler/dump/environment.yml b/modules/nf-core/cooler/dump/environment.yml new file mode 100644 index 0000000..b4d88e9 --- /dev/null +++ b/modules/nf-core/cooler/dump/environment.yml @@ -0,0 +1,7 @@ +name: cooler_dump +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::cooler=0.9.2 diff --git a/modules/nf-core/cooler/dump/main.nf b/modules/nf-core/cooler/dump/main.nf index fed7502..3bb6162 100644 --- a/modules/nf-core/cooler/dump/main.nf +++ b/modules/nf-core/cooler/dump/main.nf @@ -2,10 +2,10 @@ process COOLER_DUMP { tag "$meta.id" label 'process_high' - conda "bioconda::cooler=0.8.11" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0' : - 'biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + 'https://depot.galaxyproject.org/singularity/cooler:0.9.2--pyh7cba7a3_0' : + 'biocontainers/cooler:0.9.2--pyh7cba7a3_0' }" input: tuple val(meta), path(cool), val(resolution) diff --git a/modules/nf-core/cooler/dump/meta.yml b/modules/nf-core/cooler/dump/meta.yml index fe60523..2f882ae 100644 --- a/modules/nf-core/cooler/dump/meta.yml +++ b/modules/nf-core/cooler/dump/meta.yml @@ -2,6 +2,8 @@ name: cooler_dump description: Dump a cooler’s data to a text stream. keywords: - dump + - text + - cooler tools: - cooler: description: Sparse binary format for genomic interaction matrices @@ -10,7 +12,6 @@ tools: tool_dev_url: https://github.com/open2c/cooler doi: "10.1093/bioinformatics/btz540" licence: ["BSD-3-Clause"] - input: - meta: type: map @@ -22,9 +23,8 @@ input: description: Path to COOL file pattern: "*.{cool,mcool}" - resolution: - type: value + type: integer description: Resolution - output: - meta: type: map @@ -39,7 +39,9 @@ output: type: file description: Output text file pattern: "*.bedpe" - authors: - "@jianhong" - "@muffato" +maintainers: + - "@jianhong" + - "@muffato" diff --git a/modules/nf-core/cooler/makebins/environment.yml b/modules/nf-core/cooler/makebins/environment.yml new file mode 100644 index 0000000..e48b3a1 --- /dev/null +++ b/modules/nf-core/cooler/makebins/environment.yml @@ -0,0 +1,7 @@ +name: cooler_makebins +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::cooler=0.9.2 diff --git a/modules/nf-core/cooler/makebins/main.nf b/modules/nf-core/cooler/makebins/main.nf index 8c555d1..8a41556 100644 --- a/modules/nf-core/cooler/makebins/main.nf +++ b/modules/nf-core/cooler/makebins/main.nf @@ -1,11 +1,11 @@ process COOLER_MAKEBINS { - tag "${meta.id}" + tag "${meta.id}}" label 'process_low' - conda "bioconda::cooler=0.8.11" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0': - 'biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + 'https://depot.galaxyproject.org/singularity/cooler:0.9.2--pyh7cba7a3_0' : + 'biocontainers/cooler:0.9.2--pyh7cba7a3_0' }" input: tuple val(meta), path(chromsizes), val(cool_bin) diff --git a/modules/nf-core/cooler/makebins/meta.yml b/modules/nf-core/cooler/makebins/meta.yml index 33fd8eb..16e2c59 100644 --- a/modules/nf-core/cooler/makebins/meta.yml +++ b/modules/nf-core/cooler/makebins/meta.yml @@ -2,6 +2,8 @@ name: "cooler_makebins" description: Generate fixed-width genomic bins keywords: - makebins + - cooler + - genomic bins tools: - "cooler": description: Sparse binary format for genomic interaction matrices @@ -10,15 +12,13 @@ tools: tool_dev_url: https://github.com/open2c/cooler doi: "10.1093/bioinformatics/btz540" licence: ["BSD-3-Clause"] - input: - chromsize: type: file description: Path to chromosome size file - cool_bin: - type: value + type: integer description: Resolution (bin size) in base pairs - output: - versions: type: file @@ -28,7 +28,9 @@ output: type: file description: Genome segmentation at a fixed resolution as a BED file. pattern: "*.bed" - authors: - "@nservant" - "@muffato" +maintainers: + - "@nservant" + - "@muffato" diff --git a/modules/nf-core/cooler/zoomify/environment.yml b/modules/nf-core/cooler/zoomify/environment.yml new file mode 100644 index 0000000..2288f37 --- /dev/null +++ b/modules/nf-core/cooler/zoomify/environment.yml @@ -0,0 +1,7 @@ +name: cooler_zoomify +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::cooler=0.9.2 diff --git a/modules/nf-core/cooler/zoomify/main.nf b/modules/nf-core/cooler/zoomify/main.nf index 95e7daf..f9933df 100644 --- a/modules/nf-core/cooler/zoomify/main.nf +++ b/modules/nf-core/cooler/zoomify/main.nf @@ -2,10 +2,10 @@ process COOLER_ZOOMIFY { tag "$meta.id" label 'process_high' - conda "bioconda::cooler=0.8.11" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0' : - 'biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + 'https://depot.galaxyproject.org/singularity/cooler:0.9.2--pyh7cba7a3_0' : + 'biocontainers/cooler:0.9.2--pyh7cba7a3_0' }" input: tuple val(meta), path(cool) @@ -32,4 +32,15 @@ process COOLER_ZOOMIFY { cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.mcool + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') + END_VERSIONS + """ } diff --git a/modules/nf-core/cooler/zoomify/meta.yml b/modules/nf-core/cooler/zoomify/meta.yml index 57f5548..d87aaf2 100644 --- a/modules/nf-core/cooler/zoomify/meta.yml +++ b/modules/nf-core/cooler/zoomify/meta.yml @@ -2,6 +2,8 @@ name: cooler_zoomify description: Generate a multi-resolution cooler file by coarsening keywords: - mcool + - cool + - cooler tools: - cooler: description: Sparse binary format for genomic interaction matrices @@ -10,7 +12,6 @@ tools: tool_dev_url: https://github.com/open2c/cooler doi: "10.1093/bioinformatics/btz540" licence: ["BSD-3-clause"] - input: - meta: type: map @@ -21,7 +22,6 @@ input: type: file description: Path to COOL file pattern: "*.{cool,mcool}" - output: - meta: type: map @@ -36,6 +36,7 @@ output: type: file description: Output mcool file pattern: "*.mcool" - authors: - "@jianhong" +maintainers: + - "@jianhong" diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml new file mode 100644 index 0000000..9b3272b --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -0,0 +1,7 @@ +name: custom_dumpsoftwareversions +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.19 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf deleted file mode 100644 index ebc8727..0000000 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ /dev/null @@ -1,24 +0,0 @@ -process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_single' - - // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.14" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" - - input: - path versions - - output: - path "software_versions.yml" , emit: yml - path "software_versions_mqc.yml", emit: mqc_yml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - template 'dumpsoftwareversions.py' -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml deleted file mode 100644 index c32657d..0000000 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ /dev/null @@ -1,36 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: custom_dumpsoftwareversions -description: Custom module used to dump software versions within the nf-core pipeline template -keywords: - - custom - - dump - - version -tools: - - custom: - description: Custom module used to dump software versions within the nf-core pipeline template - homepage: https://github.com/nf-core/tools - documentation: https://github.com/nf-core/tools - licence: ["MIT"] -input: - - versions: - type: file - description: YML file containing software versions - pattern: "*.yml" - -output: - - yml: - type: file - description: Standard YML file containing software versions - pattern: "software_versions.yml" - - mqc_yml: - type: file - description: MultiQC custom content YML file containing software versions - pattern: "software_versions_mqc.yml" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@drpatelh" - - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py deleted file mode 100755 index da03340..0000000 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python - - -"""Provide functions to merge multiple versions.yml files.""" - - -import yaml -import platform -from textwrap import dedent - - -def _make_versions_html(versions): - """Generate a tabular HTML output of all versions for MultiQC.""" - html = [ - dedent( - """\\ - - - - - - - - - - """ - ) - ] - for process, tmp_versions in sorted(versions.items()): - html.append("") - for i, (tool, version) in enumerate(sorted(tmp_versions.items())): - html.append( - dedent( - f"""\\ - - - - - - """ - ) - ) - html.append("") - html.append("
    Process Name Software Version
    {process if (i == 0) else ''}{tool}{version}
    ") - return "\\n".join(html) - - -def main(): - """Load all version files and generate merged output.""" - versions_this_module = {} - versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, - } - - with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - - # aggregate versions by the module name (derived from fully-qualified process name) - versions_by_module = {} - for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - if versions_by_module[module] != process_versions: - raise AssertionError( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - - versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", - } - - versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), - } - - with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) - with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - - with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) - - -if __name__ == "__main__": - main() diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test new file mode 100644 index 0000000..b1e1630 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -0,0 +1,43 @@ +nextflow_process { + + name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" + script "../main.nf" + process "CUSTOM_DUMPSOFTWAREVERSIONS" + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "dumpsoftwareversions" + tag "custom/dumpsoftwareversions" + + test("Should run without failures") { + when { + process { + """ + def tool1_version = ''' + TOOL1: + tool1: 0.11.9 + '''.stripIndent() + + def tool2_version = ''' + TOOL2: + tool2: 1.9 + '''.stripIndent() + + input[0] = Channel.of(tool1_version, tool2_version).collectFile() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + file(process.out.mqc_yml[0]).readLines()[0..10], + file(process.out.yml[0]).readLines()[0..7] + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap new file mode 100644 index 0000000..5f59a93 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -0,0 +1,33 @@ +{ + "Should run without failures": { + "content": [ + [ + "versions.yml:md5,76d454d92244589d32455833f7c1ba6d" + ], + [ + "data: \"\\n\\n \\n \\n \\n \\n \\n \\n \\n\\", + " \\n\\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n \\n \\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n\\n\\n \\n\\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\" + ], + [ + "CUSTOM_DUMPSOFTWAREVERSIONS:", + " python: 3.11.7", + " yaml: 5.4.1", + "TOOL1:", + " tool1: 0.11.9", + "TOOL2:", + " tool2: '1.9'", + "Workflow:" + ] + ], + "timestamp": "2024-01-09T23:01:18.710682" + } +} \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml new file mode 100644 index 0000000..405aa24 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml @@ -0,0 +1,2 @@ +custom/dumpsoftwareversions: + - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/custom/getchromsizes/environment.yml b/modules/nf-core/custom/getchromsizes/environment.yml new file mode 100644 index 0000000..2ecd012 --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/environment.yml @@ -0,0 +1,8 @@ +name: custom_getchromsizes +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/custom/getchromsizes/main.nf b/modules/nf-core/custom/getchromsizes/main.nf index 060a2e8..3edf7c2 100644 --- a/modules/nf-core/custom/getchromsizes/main.nf +++ b/modules/nf-core/custom/getchromsizes/main.nf @@ -2,10 +2,10 @@ process CUSTOM_GETCHROMSIZES { tag "$fasta" label 'process_single' - conda "bioconda::samtools=1.16.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" input: tuple val(meta), path(fasta) @@ -35,6 +35,9 @@ process CUSTOM_GETCHROMSIZES { """ touch ${fasta}.fai touch ${fasta}.sizes + if [[ "${fasta.extension}" == "gz" ]]; then + touch ${fasta}.gzi + fi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/custom/getchromsizes/meta.yml b/modules/nf-core/custom/getchromsizes/meta.yml index 219ca1d..529be07 100644 --- a/modules/nf-core/custom/getchromsizes/meta.yml +++ b/modules/nf-core/custom/getchromsizes/meta.yml @@ -12,7 +12,6 @@ tools: tool_dev_url: https://github.com/samtools/samtools doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] - input: - meta: type: map @@ -23,7 +22,6 @@ input: type: file description: FASTA file pattern: "*.{fa,fasta,fna,fas}" - output: - meta: type: map @@ -46,8 +44,11 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@tamara-hodgetts" - "@chris-cheshire" - "@muffato" +maintainers: + - "@tamara-hodgetts" + - "@chris-cheshire" + - "@muffato" diff --git a/modules/nf-core/custom/getchromsizes/tests/main.nf.test b/modules/nf-core/custom/getchromsizes/tests/main.nf.test new file mode 100644 index 0000000..2f741a4 --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/tests/main.nf.test @@ -0,0 +1,99 @@ +nextflow_process { + + name "Test Process CUSTOM_GETCHROMSIZES" + script "../main.nf" + process "CUSTOM_GETCHROMSIZES" + + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "custom/getchromsizes" + + test("test_custom_getchromsizes") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_custom_getchromsizes_bgzip") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_custom_getchromsizes - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_custom_getchromsizes_bgzip - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/custom/getchromsizes/tests/main.nf.test.snap b/modules/nf-core/custom/getchromsizes/tests/main.nf.test.snap new file mode 100644 index 0000000..c37b284 --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/tests/main.nf.test.snap @@ -0,0 +1,242 @@ +{ + "test_custom_getchromsizes_bgzip - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,0d5a7c33bddcb1edad6bf0705b258e6f" + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gzi": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,0d5a7c33bddcb1edad6bf0705b258e6f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T12:38:36.927106" + }, + "test_custom_getchromsizes": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.fasta.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,0d5a7c33bddcb1edad6bf0705b258e6f" + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] + ], + "versions": [ + "versions.yml:md5,0d5a7c33bddcb1edad6bf0705b258e6f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-20T13:22:34.14237" + }, + "test_custom_getchromsizes_bgzip": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "3": [ + "versions.yml:md5,0d5a7c33bddcb1edad6bf0705b258e6f" + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] + ], + "versions": [ + "versions.yml:md5,0d5a7c33bddcb1edad6bf0705b258e6f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-20T13:23:06.241379" + }, + "test_custom_getchromsizes - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.fasta.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,0d5a7c33bddcb1edad6bf0705b258e6f" + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gzi": [ + + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,0d5a7c33bddcb1edad6bf0705b258e6f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T12:24:05.697845" + } +} \ No newline at end of file diff --git a/modules/nf-core/custom/getchromsizes/tests/tags.yml b/modules/nf-core/custom/getchromsizes/tests/tags.yml new file mode 100644 index 0000000..d89a805 --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/tests/tags.yml @@ -0,0 +1,2 @@ +custom/getchromsizes: + - modules/nf-core/custom/getchromsizes/** diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml new file mode 100644 index 0000000..1787b38 --- /dev/null +++ b/modules/nf-core/fastqc/environment.yml @@ -0,0 +1,7 @@ +name: fastqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 07d5e43..d79f1c8 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,10 +2,10 @@ process FASTQC { tag "$meta.id" label 'process_medium' - conda "bioconda::fastqc=0.11.9" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'biocontainers/fastqc:0.11.9--0' }" + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" input: tuple val(meta), path(reads) @@ -25,15 +25,25 @@ process FASTQC { def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } def rename_to = old_new_pairs*.join(' ').join(' ') def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + + def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') + // FastQC memory value allowed range (100 - 10000) + def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) + """ printf "%s %s\\n" $rename_to | while read old_name new_name; do [ -f "\${new_name}" ] || ln -s \$old_name \$new_name done - fastqc $args --threads $task.cpus $renamed_files + + fastqc \\ + $args \\ + --threads $task.cpus \\ + --memory $fastqc_memory \\ + $renamed_files cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ @@ -45,7 +55,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ } diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index 4da5bb5..ee5507e 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -50,3 +50,8 @@ authors: - "@grst" - "@ewels" - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test new file mode 100644 index 0000000..70edae4 --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -0,0 +1,212 @@ +nextflow_process { + + name "Test Process FASTQC" + script "../main.nf" + process "FASTQC" + + tag "modules" + tag "modules_nfcore" + tag "fastqc" + + test("sarscov2 single-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
    Mon 2 Oct 2023
    test.gz
    + // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_single") } + ) + } + } + + test("sarscov2 paired-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("") }, + { assert path(process.out.html[0][1][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_paired") } + ) + } + } + + test("sarscov2 interleaved [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_interleaved") } + ) + } + } + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_bam") } + ) + } + } + + test("sarscov2 multiple [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("") }, + { assert path(process.out.html[0][1][1]).text.contains("") }, + { assert path(process.out.html[0][1][2]).text.contains("") }, + { assert path(process.out.html[0][1][3]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_multiple") } + ) + } + } + + test("sarscov2 custom_prefix") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_custom_prefix") } + ) + } + } + + test("sarscov2 single-end [fastq] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.html.collect { file(it[1]).getName() } + + process.out.zip.collect { file(it[1]).getName() } + + process.out.versions ).match("fastqc_stub") } + ) + } + } + +} diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap new file mode 100644 index 0000000..86f7c31 --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -0,0 +1,88 @@ +{ + "fastqc_versions_interleaved": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:07.293713" + }, + "fastqc_stub": { + "content": [ + [ + "test.html", + "test.zip", + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:31:01.425198" + }, + "fastqc_versions_multiple": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:55.797907" + }, + "fastqc_versions_bam": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:26.795862" + }, + "fastqc_versions_single": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:27.043675" + }, + "fastqc_versions_paired": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:47.584191" + }, + "fastqc_versions_custom_prefix": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:41:14.576531" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml new file mode 100644 index 0000000..7834294 --- /dev/null +++ b/modules/nf-core/fastqc/tests/tags.yml @@ -0,0 +1,2 @@ +fastqc: + - modules/nf-core/fastqc/** diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 0000000..2121492 --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,7 @@ +name: multiqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.23 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf new file mode 100644 index 0000000..459dfea --- /dev/null +++ b/modules/nf-core/multiqc/main.nf @@ -0,0 +1,61 @@ +process MULTIQC { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.23--pyhdfd78af_0' : + 'biocontainers/multiqc:1.23--pyhdfd78af_0' }" + + input: + path multiqc_files, stageAs: "?/*" + path(multiqc_config) + path(extra_multiqc_config) + path(multiqc_logo) + path(replace_names) + path(sample_names) + + output: + path "*multiqc_report.html", emit: report + path "*_data" , emit: data + path "*_plots" , optional:true, emit: plots + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def config = multiqc_config ? "--config $multiqc_config" : '' + def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' + def replace = replace_names ? "--replace-names ${replace_names}" : '' + def samples = sample_names ? "--sample-names ${sample_names}" : '' + """ + multiqc \\ + --force \\ + $args \\ + $config \\ + $extra_config \\ + $logo \\ + $replace \\ + $samples \\ + . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ + + stub: + """ + mkdir multiqc_data + touch multiqc_plots + touch multiqc_report.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml new file mode 100644 index 0000000..382c08c --- /dev/null +++ b/modules/nf-core/multiqc/meta.yml @@ -0,0 +1,71 @@ +name: multiqc +description: Aggregate results from bioinformatics analyses across many samples into a single report +keywords: + - QC + - bioinformatics tools + - Beautiful stand-alone HTML report +tools: + - multiqc: + description: | + MultiQC searches a given directory for analysis logs and compiles a HTML report. + It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. + homepage: https://multiqc.info/ + documentation: https://multiqc.info/docs/ + licence: ["GPL-3.0-or-later"] +input: + - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. + pattern: "*.{yml,yaml}" + - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" +output: + - report: + type: file + description: MultiQC report file + pattern: "multiqc_report.html" + - data: + type: directory + description: MultiQC data dir + pattern: "multiqc_data" + - plots: + type: file + description: Plots created by MultiQC + pattern: "*_data" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 0000000..6aa27f4 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,90 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("sarscov2 single-end [fastqc]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_single") } + ) + } + + } + + test("sarscov2 single-end [fastqc] [config]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_config") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match("multiqc_stub") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 0000000..45e95e5 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "multiqc_versions_single": { + "content": [ + [ + "versions.yml:md5,87904cd321df21fac35d18f0fc01bb19" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-10T12:41:34.562023" + }, + "multiqc_stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,87904cd321df21fac35d18f0fc01bb19" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-10T11:27:11.933869532" + }, + "multiqc_versions_config": { + "content": [ + [ + "versions.yml:md5,87904cd321df21fac35d18f0fc01bb19" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-10T11:26:56.709849369" + } +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 0000000..bea6c0d --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/modules/nf-core/pairix/environment.yml b/modules/nf-core/pairix/environment.yml new file mode 100644 index 0000000..51ab400 --- /dev/null +++ b/modules/nf-core/pairix/environment.yml @@ -0,0 +1,7 @@ +name: pairix +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::pairix=0.3.7 diff --git a/modules/nf-core/pairix/main.nf b/modules/nf-core/pairix/main.nf index 66bf652..e71ebb8 100644 --- a/modules/nf-core/pairix/main.nf +++ b/modules/nf-core/pairix/main.nf @@ -2,7 +2,7 @@ process PAIRIX { tag "$meta.id" label 'process_medium' - conda "bioconda::pairix=0.3.7" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pairix:0.3.7--py36h30a8e3e_3' : 'biocontainers/pairix:0.3.7--py36h30a8e3e_3' }" diff --git a/modules/nf-core/pairix/meta.yml b/modules/nf-core/pairix/meta.yml index e1318ef..2b837d1 100644 --- a/modules/nf-core/pairix/meta.yml +++ b/modules/nf-core/pairix/meta.yml @@ -10,9 +10,7 @@ tools: homepage: "https://github.com/4dn-dcic/pairix" documentation: "https://github.com/4dn-dcic/pairix" tool_dev_url: "https://github.com/4dn-dcic/pairix" - licence: ["MIT"] - input: - meta: type: map @@ -22,7 +20,6 @@ input: - pair: type: file description: pair file - output: - meta: type: map @@ -37,6 +34,7 @@ output: type: file description: pair index file pattern: "*.px2" - authors: - "@jianhong" +maintainers: + - "@jianhong" diff --git a/modules/nf-core/pairtools/dedup/environment.yml b/modules/nf-core/pairtools/dedup/environment.yml new file mode 100644 index 0000000..2f7ccc1 --- /dev/null +++ b/modules/nf-core/pairtools/dedup/environment.yml @@ -0,0 +1,8 @@ +name: pairtools_dedup +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::pairtools=1.0.2 + - conda-forge::numpy=1.23 diff --git a/modules/nf-core/pairtools/dedup/main.nf b/modules/nf-core/pairtools/dedup/main.nf index ef7f01f..b5a3f75 100644 --- a/modules/nf-core/pairtools/dedup/main.nf +++ b/modules/nf-core/pairtools/dedup/main.nf @@ -4,7 +4,7 @@ process PAIRTOOLS_DEDUP { // Pinning numpy to 1.23 until https://github.com/open2c/pairtools/issues/170 is resolved // Not an issue with the biocontainers because they were built prior to numpy 1.24 - conda "bioconda::pairtools=1.0.2 conda-forge::numpy=1.23" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pairtools:1.0.2--py39h2a9f597_0' : 'biocontainers/pairtools:1.0.2--py39h2a9f597_0' }" @@ -26,14 +26,13 @@ process PAIRTOOLS_DEDUP { """ pairtools dedup \\ $args \\ - --n-proc ${task.cpus} \\ -o ${prefix}.pairs.gz \\ --output-stats ${prefix}.pairs.stat \\ $input cat <<-END_VERSIONS > versions.yml "${task.process}": - pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools.*version //') + pairtools: \$(pairtools --version | tr '\\n' ',' | sed 's/.*pairtools.*version //' | sed 's/,\$/\\n/') END_VERSIONS """ } diff --git a/modules/nf-core/pairtools/dedup/meta.yml b/modules/nf-core/pairtools/dedup/meta.yml index 14c1980..3c67e3f 100644 --- a/modules/nf-core/pairtools/dedup/meta.yml +++ b/modules/nf-core/pairtools/dedup/meta.yml @@ -2,15 +2,16 @@ name: pairtools_dedup description: Find and remove PCR/optical duplicates keywords: - dedup + - deduplication + - PCR/optical duplicates + - pairs tools: - pairtools: description: CLI tools to process mapped Hi-C data homepage: http://pairtools.readthedocs.io/ documentation: http://pairtools.readthedocs.io/ tool_dev_url: https://github.com/mirnylab/pairtools - licence: ["MIT"] - input: - meta: type: map @@ -20,7 +21,6 @@ input: - input: type: file description: pair file - output: - meta: type: map @@ -39,6 +39,7 @@ output: type: file description: stats of the pairs pattern: "*.{pairs.stat}" - authors: - "@jianhong" +maintainers: + - "@jianhong" diff --git a/modules/nf-core/pairtools/merge/environment.yml b/modules/nf-core/pairtools/merge/environment.yml new file mode 100644 index 0000000..f85858a --- /dev/null +++ b/modules/nf-core/pairtools/merge/environment.yml @@ -0,0 +1,8 @@ +name: pairtools_merge +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::pairtools=1.0.2 + - conda-forge::numpy=1.23 diff --git a/modules/nf-core/pairtools/sort/main.nf~ b/modules/nf-core/pairtools/merge/main.nf similarity index 51% rename from modules/nf-core/pairtools/sort/main.nf~ rename to modules/nf-core/pairtools/merge/main.nf index 68c48bc..4893347 100644 --- a/modules/nf-core/pairtools/sort/main.nf~ +++ b/modules/nf-core/pairtools/merge/main.nf @@ -1,40 +1,37 @@ -process PAIRTOOLS_SORT { - tag "$meta.id" - label 'process_high' +process PAIRTOOLS_MERGE { + tag "${meta.id}" + label 'process_medium' // Pinning numpy to 1.23 until https://github.com/open2c/pairtools/issues/170 is resolved // Not an issue with the biocontainers because they were built prior to numpy 1.24 - conda "bioconda::pairtools=1.0.2 conda-forge::numpy=1.23" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pairtools:1.0.2--py39h2a9f597_0' : 'biocontainers/pairtools:1.0.2--py39h2a9f597_0' }" input: - tuple val(meta), path(input) + tuple val(meta), path(allpairs) output: - tuple val(meta), path("*.pairs.gz"), emit: sorted - path "versions.yml" , emit: versions + tuple val(meta), path("*pairs.gz"), emit:pairs + path("versions.yml"), emit:versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def mem = task.memory.toString().replaceAll(/(\s|\.|B)+/, '') + def prefix = task.ext.prefix ?: "${meta.id}_merged" """ - pairtools \\ - sort \\ - $args \\ - --nproc $task.cpus \\ - --memory "$mem" \\ - -o ${prefix}.pairs.gz \\ - $input + pairtools merge \ + ${args} \ + --nproc ${task.cpus} \ + -o ${prefix}.pairs.gz \ + ${allpairs} cat <<-END_VERSIONS > versions.yml "${task.process}": - pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools.*version //') + pairtools: \$(pairtools --version | tr '\\n' ',' | sed 's/.*pairtools.*version //' | sed 's/,\$/\\n/') END_VERSIONS """ } diff --git a/modules/nf-core/pairtools/merge/meta.yml b/modules/nf-core/pairtools/merge/meta.yml new file mode 100644 index 0000000..b7bccc3 --- /dev/null +++ b/modules/nf-core/pairtools/merge/meta.yml @@ -0,0 +1,40 @@ +name: pairtools_merge +description: Merge multiple pairs/pairsam files +keywords: + - merge + - pairs + - pairsam +tools: + - pairtools: + description: CLI tools to process mapped Hi-C data + homepage: http://pairtools.readthedocs.io/ + documentation: http://pairtools.readthedocs.io/ + tool_dev_url: https://github.com/mirnylab/pairtools + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - allpairs: + type: file + description: All pair files to merge +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - pairs: + type: file + description: Merged pairs file + pattern: "*.{pairs.gz}" +authors: + - "@nservant" +maintainers: + - "@nservant" diff --git a/modules/nf-core/pairtools/parse/environment.yml b/modules/nf-core/pairtools/parse/environment.yml new file mode 100644 index 0000000..0bd69ca --- /dev/null +++ b/modules/nf-core/pairtools/parse/environment.yml @@ -0,0 +1,8 @@ +name: pairtools_parse +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::pairtools=1.0.2 + - conda-forge::numpy=1.23 diff --git a/modules/nf-core/pairtools/parse/main.nf b/modules/nf-core/pairtools/parse/main.nf index a21f37a..b939492 100644 --- a/modules/nf-core/pairtools/parse/main.nf +++ b/modules/nf-core/pairtools/parse/main.nf @@ -4,14 +4,14 @@ process PAIRTOOLS_PARSE { // Pinning numpy to 1.23 until https://github.com/open2c/pairtools/issues/170 is resolved // Not an issue with the biocontainers because they were built prior to numpy 1.24 - conda "bioconda::pairtools=1.0.2 conda-forge::numpy=1.23" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pairtools:1.0.2--py39h2a9f597_0' : 'biocontainers/pairtools:1.0.2--py39h2a9f597_0' }" input: tuple val(meta), path(bam) - tuple val(meta2), path(chromsizes) + path chromsizes output: tuple val(meta), path("*.pairsam.gz") , emit: pairsam @@ -24,20 +24,18 @@ process PAIRTOOLS_PARSE { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def assembly = meta2.id ? "--assembly ${meta2.id}" : "" """ - pairtools parse \\ + pairtools \\ + parse \\ -c $chromsizes \\ - --nproc-in ${task.cpus} --nproc-out ${task.cpus} \\ $args \\ - $assembly \\ --output-stats ${prefix}.pairsam.stat \\ -o ${prefix}.pairsam.gz \\ $bam cat <<-END_VERSIONS > versions.yml "${task.process}": - pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools.*version //') + pairtools: \$(pairtools --version | tr '\\n' ',' | sed 's/.*pairtools.*version //' | sed 's/,\$/\\n/') END_VERSIONS """ } diff --git a/modules/nf-core/pairtools/parse/meta.yml b/modules/nf-core/pairtools/parse/meta.yml index 5c0ce4a..e7e448e 100644 --- a/modules/nf-core/pairtools/parse/meta.yml +++ b/modules/nf-core/pairtools/parse/meta.yml @@ -1,16 +1,16 @@ name: pairtools_parse description: Find ligation junctions in .sam, make .pairs keywords: + - ligation junctions - parse + - pairtools tools: - pairtools: description: CLI tools to process mapped Hi-C data homepage: http://pairtools.readthedocs.io/ documentation: http://pairtools.readthedocs.io/ tool_dev_url: https://github.com/mirnylab/pairtools - licence: ["MIT"] - input: - meta: type: map @@ -24,7 +24,6 @@ input: - chromsizes: type: file description: chromosome size file - output: - meta: type: map @@ -43,6 +42,7 @@ output: type: file description: stats of the pairs pattern: "*.{pairsam.stat}" - authors: - "@jianhong" +maintainers: + - "@jianhong" diff --git a/modules/nf-core/pairtools/restrict/environment.yml b/modules/nf-core/pairtools/restrict/environment.yml new file mode 100644 index 0000000..4ba65c7 --- /dev/null +++ b/modules/nf-core/pairtools/restrict/environment.yml @@ -0,0 +1,8 @@ +name: pairtools_restrict +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::pairtools=1.0.2 + - conda-forge::numpy=1.23 diff --git a/modules/nf-core/pairtools/restrict/main.nf b/modules/nf-core/pairtools/restrict/main.nf index 3adc2f7..dd57ceb 100644 --- a/modules/nf-core/pairtools/restrict/main.nf +++ b/modules/nf-core/pairtools/restrict/main.nf @@ -4,7 +4,7 @@ process PAIRTOOLS_RESTRICT { // Pinning numpy to 1.23 until https://github.com/open2c/pairtools/issues/170 is resolved // Not an issue with the biocontainers because they were built prior to numpy 1.24 - conda "bioconda::pairtools=1.0.2 conda-forge::numpy=1.23" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pairtools:1.0.2--py39h2a9f597_0' : 'biocontainers/pairtools:1.0.2--py39h2a9f597_0' }" @@ -33,7 +33,7 @@ process PAIRTOOLS_RESTRICT { cat <<-END_VERSIONS > versions.yml "${task.process}": - pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools.*version //') + pairtools: \$(pairtools --version | tr '\\n' ',' | sed 's/.*pairtools.*version //' | sed 's/,\$/\\n/') END_VERSIONS """ } diff --git a/modules/nf-core/pairtools/restrict/meta.yml b/modules/nf-core/pairtools/restrict/meta.yml index fc70b0c..12f6b98 100644 --- a/modules/nf-core/pairtools/restrict/meta.yml +++ b/modules/nf-core/pairtools/restrict/meta.yml @@ -1,16 +1,16 @@ name: pairtools_restrict description: Assign restriction fragments to pairs keywords: - - sort + - pairs + - pairstools + - restriction fragments tools: - pairtools: description: CLI tools to process mapped Hi-C data homepage: http://pairtools.readthedocs.io/ documentation: http://pairtools.readthedocs.io/ tool_dev_url: https://github.com/mirnylab/pairtools - licence: ["MIT"] - input: - meta: type: map @@ -26,7 +26,6 @@ input: a tab-separated BED file with the positions of restriction fragments (chrom, start, end). Can be generated using cooler digest. - output: - meta: type: map @@ -41,6 +40,7 @@ output: type: file description: Filtered pairs file pattern: "*.{pairs.gz}" - authors: - "@jianhong" +maintainers: + - "@jianhong" diff --git a/modules/nf-core/pairtools/select/environment.yml b/modules/nf-core/pairtools/select/environment.yml new file mode 100644 index 0000000..ef1b0d4 --- /dev/null +++ b/modules/nf-core/pairtools/select/environment.yml @@ -0,0 +1,8 @@ +name: pairtools_select +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::pairtools=1.0.2 + - conda-forge::numpy=1.23 diff --git a/modules/nf-core/pairtools/select/main.nf b/modules/nf-core/pairtools/select/main.nf index fb4a1a3..69e8d47 100644 --- a/modules/nf-core/pairtools/select/main.nf +++ b/modules/nf-core/pairtools/select/main.nf @@ -4,7 +4,7 @@ process PAIRTOOLS_SELECT { // Pinning numpy to 1.23 until https://github.com/open2c/pairtools/issues/170 is resolved // Not an issue with the biocontainers because they were built prior to numpy 1.24 - conda "bioconda::pairtools=1.0.2 conda-forge::numpy=1.23" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pairtools:1.0.2--py39h2a9f597_0' : 'biocontainers/pairtools:1.0.2--py39h2a9f597_0' }" @@ -32,7 +32,7 @@ process PAIRTOOLS_SELECT { cat <<-END_VERSIONS > versions.yml "${task.process}": - pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools.*version //') + pairtools: \$(pairtools --version | tr '\\n' ',' | sed 's/.*pairtools.*version //' | sed 's/,\$/\\n/') END_VERSIONS """ } diff --git a/modules/nf-core/pairtools/select/meta.yml b/modules/nf-core/pairtools/select/meta.yml index d2008c0..e9aef9a 100644 --- a/modules/nf-core/pairtools/select/meta.yml +++ b/modules/nf-core/pairtools/select/meta.yml @@ -2,15 +2,15 @@ name: pairtools_select description: Select pairs according to given condition by options.args keywords: - select + - pairs + - filter tools: - pairtools: description: CLI tools to process mapped Hi-C data homepage: http://pairtools.readthedocs.io/ documentation: http://pairtools.readthedocs.io/ tool_dev_url: https://github.com/mirnylab/pairtools - licence: ["MIT"] - input: - meta: type: map @@ -20,7 +20,6 @@ input: - input: type: file description: pairs file - output: - meta: type: map @@ -39,6 +38,7 @@ output: type: file description: Rest pairs file. pattern: "*.{unselected.pairs.gz}" - authors: - "@jianhong" +maintainers: + - "@jianhong" diff --git a/modules/nf-core/pairtools/sort/environment.yml b/modules/nf-core/pairtools/sort/environment.yml new file mode 100644 index 0000000..21bd011 --- /dev/null +++ b/modules/nf-core/pairtools/sort/environment.yml @@ -0,0 +1,8 @@ +name: pairtools_sort +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::pairtools=1.0.2 + - conda-forge::numpy=1.23 diff --git a/modules/nf-core/pairtools/sort/main.nf b/modules/nf-core/pairtools/sort/main.nf index 50a11b5..653a8d3 100644 --- a/modules/nf-core/pairtools/sort/main.nf +++ b/modules/nf-core/pairtools/sort/main.nf @@ -4,7 +4,7 @@ process PAIRTOOLS_SORT { // Pinning numpy to 1.23 until https://github.com/open2c/pairtools/issues/170 is resolved // Not an issue with the biocontainers because they were built prior to numpy 1.24 - conda "bioconda::pairtools=1.0.2 conda-forge::numpy=1.23" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pairtools:1.0.2--py39h2a9f597_0' : 'biocontainers/pairtools:1.0.2--py39h2a9f597_0' }" @@ -22,19 +22,19 @@ process PAIRTOOLS_SORT { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def mem = (task.memory.giga*0.8).intValue() - //def mem = avail_mem.toString().replaceAll(/(\s|\.|B)+/, 'G') + def buffer = task.memory.toGiga().intdiv(2) """ - pairtools sort \\ + pairtools \\ + sort \\ $args \\ --nproc $task.cpus \\ - --memory "${mem}G" \\ + --memory ${buffer}G \\ -o ${prefix}.pairs.gz \\ $input cat <<-END_VERSIONS > versions.yml "${task.process}": - pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools.*version //') + pairtools: \$(pairtools --version | tr '\\n' ',' | sed 's/.*pairtools.*version //' | sed 's/,\$/\\n/') END_VERSIONS """ } diff --git a/modules/nf-core/pairtools/sort/meta.yml b/modules/nf-core/pairtools/sort/meta.yml index 0e068de..987aebb 100644 --- a/modules/nf-core/pairtools/sort/meta.yml +++ b/modules/nf-core/pairtools/sort/meta.yml @@ -2,15 +2,15 @@ name: pairtools_sort description: Sort a .pairs/.pairsam file keywords: - sort + - pairs + - pairsam tools: - pairtools: description: CLI tools to process mapped Hi-C data homepage: http://pairtools.readthedocs.io/ documentation: http://pairtools.readthedocs.io/ tool_dev_url: https://github.com/mirnylab/pairtools - licence: ["MIT"] - input: - meta: type: map @@ -20,7 +20,6 @@ input: - input: type: file description: A pairs file - output: - meta: type: map @@ -35,6 +34,9 @@ output: type: file description: Sorted pairs file pattern: "*.{pairs.gz}" - authors: - "@jianhong" + - "@nservant" +maintainers: + - "@jianhong" + - "@nservant" diff --git a/modules/nf-core/pairtools/stats/environment.yml b/modules/nf-core/pairtools/stats/environment.yml new file mode 100644 index 0000000..c1f95fb --- /dev/null +++ b/modules/nf-core/pairtools/stats/environment.yml @@ -0,0 +1,8 @@ +name: pairtools_stats +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::pairtools=1.0.2 + - conda-forge::numpy=1.23 diff --git a/modules/nf-core/pairtools/dedup/main.nf~ b/modules/nf-core/pairtools/stats/main.nf similarity index 55% rename from modules/nf-core/pairtools/dedup/main.nf~ rename to modules/nf-core/pairtools/stats/main.nf index 44a4ef7..b07d061 100644 --- a/modules/nf-core/pairtools/dedup/main.nf~ +++ b/modules/nf-core/pairtools/stats/main.nf @@ -1,21 +1,20 @@ -process PAIRTOOLS_DEDUP { - tag "$meta.id" - label 'process_high' +process PAIRTOOLS_STATS { + tag "${meta.id}" + label 'process_low' // Pinning numpy to 1.23 until https://github.com/open2c/pairtools/issues/170 is resolved // Not an issue with the biocontainers because they were built prior to numpy 1.24 - conda "bioconda::pairtools=1.0.2 conda-forge::numpy=1.23" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pairtools:1.0.2--py39h2a9f597_0' : 'biocontainers/pairtools:1.0.2--py39h2a9f597_0' }" input: - tuple val(meta), path(input) + tuple val(meta), path(pairs) output: - tuple val(meta), path("*.pairs.gz") , emit: pairs - tuple val(meta), path("*.pairs.stat"), emit: stat - path "versions.yml" , emit: versions + tuple val(meta), path("*.pairs.stat"), emit:stats + path("versions.yml"), emit:versions when: task.ext.when == null || task.ext.when @@ -24,16 +23,15 @@ process PAIRTOOLS_DEDUP { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - pairtools dedup \\ - $args \\ - --n-proc ${task.cpus \\ - -o ${prefix}.pairs.gz \\ - --output-stats ${prefix}.pairs.stat \\ - $input + pairtools stats \\ + ${args} \\ + --nproc-in ${task.cpus} --nproc-out ${task.cpus} \\ + -o ${prefix}.pairs.stat \\ + ${pairs} cat <<-END_VERSIONS > versions.yml "${task.process}": - pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools.*version //') + pairtools: \$(pairtools --version | tr '\\n' ',' | sed 's/.*pairtools.*version //' | sed 's/,\$/\\n/') END_VERSIONS """ } diff --git a/modules/nf-core/pairtools/stats/meta.yml b/modules/nf-core/pairtools/stats/meta.yml new file mode 100644 index 0000000..da2a1fa --- /dev/null +++ b/modules/nf-core/pairtools/stats/meta.yml @@ -0,0 +1,40 @@ +name: pairtools_stats +description: Calculate pairs statistics +keywords: + - stats + - pairs + - pairsam +tools: + - pairtools: + description: CLI tools to process mapped Hi-C data + homepage: http://pairtools.readthedocs.io/ + documentation: http://pairtools.readthedocs.io/ + tool_dev_url: https://github.com/mirnylab/pairtools + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - pairs: + type: file + description: pairs file +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - stat: + type: file + description: stats of the pairs + pattern: "*.{pairs.stat}" +authors: + - "@nservant" +maintainers: + - "@nservant" diff --git a/modules/nf-core/samtools/flagstat/environment.yml b/modules/nf-core/samtools/flagstat/environment.yml new file mode 100644 index 0000000..68b8155 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/environment.yml @@ -0,0 +1,8 @@ +name: samtools_flagstat +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf new file mode 100644 index 0000000..754d84b --- /dev/null +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -0,0 +1,46 @@ +process SAMTOOLS_FLAGSTAT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.flagstat"), emit: flagstat + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + samtools \\ + flagstat \\ + --threads ${task.cpus} \\ + $bam \\ + > ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml new file mode 100644 index 0000000..9799135 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -0,0 +1,51 @@ +name: samtools_flagstat +description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type +keywords: + - stats + - mapping + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test b/modules/nf-core/samtools/flagstat/tests/main.nf.test new file mode 100644 index 0000000..24c3c04 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test @@ -0,0 +1,36 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FLAGSTAT" + script "../main.nf" + process "SAMTOOLS_FLAGSTAT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/flagstat" + + test("BAM") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.flagstat).match("flagstat") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap new file mode 100644 index 0000000..e9f85ef --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap @@ -0,0 +1,32 @@ +{ + "flagstat": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:31:37.783927" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,f606681ef971cbb548a4d9e3fbabdbc2" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:41:52.516253882" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/flagstat/tests/tags.yml b/modules/nf-core/samtools/flagstat/tests/tags.yml new file mode 100644 index 0000000..2d2b725 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/flagstat: + - modules/nf-core/samtools/flagstat/** diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml new file mode 100644 index 0000000..260d516 --- /dev/null +++ b/modules/nf-core/samtools/index/environment.yml @@ -0,0 +1,8 @@ +name: samtools_index +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index 0b20aa4..b523c21 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_INDEX { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" input: tuple val(meta), path(input) diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml index 8bd2fa6..01a4ee0 100644 --- a/modules/nf-core/samtools/index/meta.yml +++ b/modules/nf-core/samtools/index/meta.yml @@ -51,3 +51,7 @@ authors: - "@drpatelh" - "@ewels" - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/index/tests/csi.nextflow.config b/modules/nf-core/samtools/index/tests/csi.nextflow.config new file mode 100644 index 0000000..0ed260e --- /dev/null +++ b/modules/nf-core/samtools/index/tests/csi.nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_INDEX { + ext.args = '-c' + } + +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test new file mode 100644 index 0000000..bb7756d --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -0,0 +1,87 @@ +nextflow_process { + + name "Test Process SAMTOOLS_INDEX" + script "../main.nf" + process "SAMTOOLS_INDEX" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/index" + + test("bai") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.bai).match("bai") }, + { assert snapshot(process.out.versions).match("bai_versions") } + ) + } + } + + test("crai") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.crai).match("crai") }, + { assert snapshot(process.out.versions).match("crai_versions") } + ) + } + } + + test("csi") { + + config "./csi.nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert path(process.out.csi.get(0).get(1)).exists() }, + { assert snapshot(process.out.versions).match("csi_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap new file mode 100644 index 0000000..52756e8 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -0,0 +1,74 @@ +{ + "crai_versions": { + "content": [ + [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:04.203740976" + }, + "csi_versions": { + "content": [ + [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:09.57475878" + }, + "crai": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:41:38.446424" + }, + "bai": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:40:46.579747" + }, + "bai_versions": { + "content": [ + [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:41:57.929287369" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/index/tests/tags.yml b/modules/nf-core/samtools/index/tests/tags.yml new file mode 100644 index 0000000..e0f58a7 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/index: + - modules/nf-core/samtools/index/** diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml new file mode 100644 index 0000000..36a12ea --- /dev/null +++ b/modules/nf-core/samtools/sort/environment.yml @@ -0,0 +1,8 @@ +name: samtools_sort +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf index 2b7753f..596c6f7 100644 --- a/modules/nf-core/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -2,17 +2,20 @@ process SAMTOOLS_SORT { tag "$meta.id" label 'process_medium' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" input: - tuple val(meta), path(bam) + tuple val(meta) , path(bam) + tuple val(meta2), path(fasta) output: - tuple val(meta), path("*.bam"), emit: bam - tuple val(meta), path("*.csi"), emit: csi, optional: true + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true path "versions.yml" , emit: versions when: @@ -21,14 +24,24 @@ process SAMTOOLS_SORT { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" + def reference = fasta ? "--reference ${fasta}" : "" if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools cat \\ + --threads $task.cpus \\ + ${bam} \\ + | \\ samtools sort \\ $args \\ - -@ $task.cpus \\ - -o ${prefix}.bam \\ - -T $prefix \\ - $bam + -T ${prefix} \\ + --threads $task.cpus \\ + ${reference} \\ + -o ${prefix}.${extension} \\ + - cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -40,6 +53,7 @@ process SAMTOOLS_SORT { def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.bam + touch ${prefix}.bam.csi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml index 0732843..341a7d0 100644 --- a/modules/nf-core/samtools/sort/meta.yml +++ b/modules/nf-core/samtools/sort/meta.yml @@ -23,8 +23,18 @@ input: e.g. [ id:'test', single_end:false ] - bam: type: file - description: BAM/CRAM/SAM file + description: BAM/CRAM/SAM file(s) pattern: "*.{bam,cram,sam}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta,fna}" + optional: true output: - meta: type: map @@ -33,16 +43,29 @@ output: e.g. [ id:'test', single_end:false ] - bam: type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - versions: + description: Sorted BAM file + pattern: "*.{bam}" + - cram: type: file - description: File containing software versions - pattern: "versions.yml" + description: Sorted CRAM file + pattern: "*.{cram}" + - crai: + type: file + description: CRAM index file (optional) + pattern: "*.crai" - csi: type: file description: BAM index file (optional) pattern: "*.csi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@ewels" + - "@matthdsm" +maintainers: + - "@drpatelh" + - "@ewels" + - "@matthdsm" diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test new file mode 100644 index 0000000..fb38ed9 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test @@ -0,0 +1,104 @@ +nextflow_process { + + name "Test Process SAMTOOLS_SORT" + script "../main.nf" + process "SAMTOOLS_SORT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/sort" + + test("bam") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match("test_bam") + } + ) + } + } + + test("cram") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match("test_cram") + } + ) + } + } + + test("bam_stub") { + + config "./nextflow.config" + options "-stub" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_stub_bam") }, + { assert snapshot(process.out.versions).match("bam_stub_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap new file mode 100644 index 0000000..5a27de1 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap @@ -0,0 +1,114 @@ +{ + "cram": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,21c992d59615936b99f2ad008aa54400" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-05-31T08:13:54.512837189" + }, + "bam_stub_bam": { + "content": [ + "test.sorted.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-05-31T07:29:00.761845507" + }, + "test_cram": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,22b2093be34a7637f5fbc84272b89d06" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-05-31T09:16:51.924951855" + }, + "test_bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,21c992d59615936b99f2ad008aa54400" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-05-31T08:28:12.15952312" + }, + "bam_stub_versions": { + "content": [ + [ + "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-05-31T07:29:00.765038811" + }, + "bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,21c992d59615936b99f2ad008aa54400" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-05-31T08:13:48.538030517" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/sort/tests/nextflow.config b/modules/nf-core/samtools/sort/tests/nextflow.config new file mode 100644 index 0000000..f642771 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + ext.args = "--write-index" + } + +} diff --git a/modules/nf-core/samtools/sort/tests/tags.yml b/modules/nf-core/samtools/sort/tests/tags.yml new file mode 100644 index 0000000..cd63ea2 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/tags.yml @@ -0,0 +1,3 @@ +samtools/sort: + - modules/nf-core/samtools/sort/** + - tests/modules/nf-core/samtools/sort/** diff --git a/nextflow.config b/nextflow.config index 0ecd0e3..7d4761b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,8 +1,8 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - nf-core/hic Nextflow config file +nf-core/hic Nextflow config file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Default config options for all compute environments +Default config options for all compute environments ---------------------------------------------------------------------------------------- */ @@ -10,18 +10,19 @@ params { // Input options - input = null - - + input = null // References - genome = null - igenomes_base = 's3://ngi-igenomes/igenomes' - igenomes_ignore = false + genome = null + igenomes_base = 's3://ngi-igenomes/igenomes/' + igenomes_ignore = false + //fasta = null chromosome_size = null restriction_fragments = null save_reference = false // Mapping + //bwt2_index = null + //bwa_index = null split_fastq = false fastq_chunks_size = 20000000 save_interaction_bam = false @@ -41,26 +42,26 @@ params { ligation_site = null restriction_site = null digest { - 'hindiii'{ - restriction_site='A^AGCTT' - ligation_site='AAGCTAGCTT' - } - 'mboi' { - restriction_site='^GATC' - ligation_site='GATCGATC' - } - 'dpnii' { - restriction_site='^GATC' - ligation_site='GATCGATC' - } - 'arima' { - restriction_site='^GATC,G^ANTC' - ligation_site='GATCGATC,GATCANTC,GANTGATC,GANTANTC' - } - 'arimaV2' { - restriction_site='^GATC,G^ANTC,C^TNAG,T^TAA' - ligation_site='GATCGATC,GATCANTC,GANTGATC,GANTANTC,GATCTNAG,GANTTNAG,CTNAGATC,CTNAANTC,TTAGATC,TTAANTC,TTATTNAG,GATCTAA,GANTTAA,CTNATAA,TTATAA,CTNATNAG' - } + 'hindiii'{ + restriction_site='A^AGCTT' + ligation_site='AAGCTAGCTT' + } + 'mboi' { + restriction_site='^GATC' + ligation_site='GATCGATC' + } + 'dpnii' { + restriction_site='^GATC' + ligation_site='GATCGATC' + } + 'arima' { + restriction_site='^GATC,G^ANTC' + ligation_site='GATCGATC,GATCANTC,GANTGATC,GANTANTC' + } + 'arimaV2' { + restriction_site='^GATC,G^ANTC,C^TNAG,T^TAA' + ligation_site='GATCGATC,GATCANTC,GANTGATC,GANTANTC,GATCTNAG,GANTTNAG,CTNAGATC,CTNAANTC,TTAGATC,TTAANTC,TTATTNAG,GATCTAA,GANTTAA,CTNATAA,TTATAA,CTNATNAG' + } } min_restriction_fragment_size = 0 @@ -69,12 +70,13 @@ params { max_insert_size = 0 save_pairs_intermediates = false - // Dnase Hi-C - dnase = false + // Dnase/Micro-C Hi-C + no_digestion = false min_cis_dist = 0 // Pairtools pairtools_parse_opts = "--walks-policy 5unique" + balancing_opts = "--force" // Contact maps save_raw_maps = false @@ -90,6 +92,7 @@ params { res_dist_decay = '250000' tads_caller = 'insulation' res_tads = '40000' + compartments_caller = 'cooltools' res_compartments = '250000' // Workflow @@ -109,28 +112,24 @@ params { multiqc_methods_description = null // Boilerplate options - outdir = './results' - tracedir = "${params.outdir}/pipeline_info" - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - version = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes,digest' + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' // Config options + config_profile_name = null + config_profile_description = null custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null config_profile_contact = null config_profile_url = null - config_profile_name = null - // Max resource options // Defaults only, expecting to be overwritten @@ -138,6 +137,13 @@ params { max_cpus = 16 max_time = '240.h' + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes,igenomes_base' + validationShowHiddenParams = false + validate_params = true + } // Load base.config by default for all pipelines @@ -154,112 +160,126 @@ try { } // Load nf-core/hic custom profiles from different institutions. -// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! -// try { -// includeConfig "${params.custom_config_base}/pipeline/hic.config" -// } catch (Exception e) { -// System.err.println("WARNING: Could not load nf-core/config/hic profiles: ${params.custom_config_base}/pipeline/hic.config") -// } - - +try { + includeConfig "${params.custom_config_base}/pipeline/hic.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config/hic profiles: ${params.custom_config_base}/pipeline/hic.config") +} profiles { debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - cleanup = false + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + nextflow.enable.configProcessNamesValidation = true } conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda', 'defaults'] + apptainer.enabled = false } mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { - docker.enabled = true - docker.registry = 'quay.io' - docker.userEmulation = true - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - podman.registry = 'quay.io' - conda.enabled = false - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - apptainer.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false } apptainer { - apptainer.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false } - gitpod { - executor.name = 'local' - executor.cpus = 16 - executor.memory = 60.GB + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' } - public_aws_ecr { - includeConfig 'conf/public_aws_ecr.config' + gitpod { + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } } +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Nextflow plugins +plugins { + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} // Load igenomes.config if required if (!params.igenomes_ignore) { @@ -267,8 +287,6 @@ if (!params.igenomes_ignore) { } else { params.genomes = [:] } - - // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -283,28 +301,25 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] -// Set default registry for Docker and Podman independent of -profile -// Will not be used unless Docker / Podman are enabled -// Set to your registry if you have a mirror of containers -docker.registry = 'quay.io' -podman.registry = 'quay.io' +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { @@ -313,9 +328,9 @@ manifest { homePage = 'https://github.com/nf-core/hic' description = """Analysis of Chromosome Conformation Capture data (Hi-C)""" mainScript = 'main.nf' - nextflowVersion = '!>=22.10.1' - version = '2.2.0dev' - doi = '' + nextflowVersion = '!>=23.04.0' + version = '2.2.0dev' + doi = '10.5281/zenodo.2669512' } // Load modules.config for DSL2 module specific options diff --git a/nextflow_schema.json b/nextflow_schema.json index 5c3a23d..0981029 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -15,9 +15,10 @@ "input": { "type": "string", "format": "file-path", + "exists": true, + "schema": "assets/schema_input.json", "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/hic/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" @@ -57,20 +58,13 @@ "fasta": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/plain", "pattern": "^\\S+\\.fn?s?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have an index available this will be generated for you automatically. Combine with `--save_reference` to save the mapping index for future runs.", "fa_icon": "far fa-file-code" }, - "igenomes_base": { - "type": "string", - "format": "directory-path", - "description": "Directory / URL base for iGenomes references.", - "default": "s3://ngi-igenomes/igenomes", - "fa_icon": "fas fa-cloud-download-alt", - "hidden": true - }, "igenomes_ignore": { "type": "boolean", "description": "Do not load the iGenomes reference config.", @@ -87,10 +81,10 @@ "type": "string", "description": "Full path to directory containing Bwa-mem index", "fa_icon": "far fa-file-alt" - } + } } }, - "hic_processing":{ + "hic_processing": { "title": "Hi-C processing", "type": "object", "description": "Define the Hi-C processing subworkflow to use to extract the valid pairs from raw data", @@ -100,7 +94,7 @@ "type": "string", "description": "Hi-C processing method", "default": "hicpro", - "enum": ["hicpro", "pairtools"] + "enum": ["hicpro", "pairtools"] } } }, @@ -148,18 +142,18 @@ } }, "no_digestion_hi_c": { - "title": "DNAse Hi-C", + "title": "Micro-C/DNAse Hi-C", "type": "object", - "description": "Parameters for protocols based on DNAse digestion", + "description": "Parameters for protocols which are not based on restriction enzymes", "default": "", "properties": { - "dnase": { + "no_digestion": { "type": "boolean", - "description": "For Hi-C protocols which are not based on enzyme digestion such as DNase Hi-C" + "description": "For Hi-C protocols which are not based on enzyme digestion such as DNAse Hi-C or Micro-C" }, "min_cis_dist": { "type": "integer", - "description": "Minimum distance between loci to consider. Useful for --dnase mode to remove spurious ligation products. Only values > 0 are considered" + "description": "Minimum distance between loci to consider. Useful for --no-digestion mode to remove spurious ligation products. Only values > 0 are considered" } } }, @@ -187,17 +181,17 @@ }, "bwt2_opts_end2end": { "type": "string", - "default": "'--very-sensitive --end-to-end --reorder'", + "default": "--very-sensitive --end-to-end --reorder", "description": "Option for HiC-Pro end-to-end bowtie mapping" }, "bwt2_opts_trimmed": { "type": "string", - "default": "'--very-sensitive --end-to-end --reorder'", + "default": "--very-sensitive --end-to-end --reorder", "description": "Option for HiC-Pro trimmed reads mapping" }, "bwa_opts": { "type": "string", - "default": "'-5SP -T0'", + "default": "-5SP -T0", "description": "Option for Bwa-mem mapping" }, "save_aligned_intermediates": { @@ -240,7 +234,7 @@ "pairtools_parse_opts": { "type": "string", "description": "Update 'pairtools parse' options", - "default": "--walks-policy 5unique" + "default": "--walks-policy 5unique" }, "save_interaction_bam": { "type": "boolean", @@ -262,7 +256,7 @@ "bin_size": { "type": "string", "pattern": "^(\\d+)(,\\d+)*$", - "default": "1000000,500000", + "default": "1000000", "description": "Resolution to build the maps (comma separated)" }, "hicpro_maps": { @@ -288,9 +282,13 @@ "default": 100, "description": "Maximum number of iteraction for HiC-Pro ICE normalization" }, + "balancing_opts": { + "type": "string", + "description": "Update 'cooler balance' options", + "default": "--force" + }, "res_zoomify": { "type": "string", - "default": "5000", "description": "Maximum resolution to build mcool file" }, "save_raw_maps": { @@ -308,7 +306,7 @@ "res_dist_decay": { "type": "string", "pattern": "^(\\d+)(,\\d+)*$", - "default": "1000000", + "default": "250000", "description": "Resolution to build count/distance plot" }, "tads_caller": { @@ -316,10 +314,15 @@ "default": "insulation", "description": "Define methods for TADs calling" }, + "compartments_caller": { + "type": "string", + "default": "cooltools", + "description": "Define methods for compartments calling" + }, "res_tads": { "type": "string", "pattern": "^(\\d+)(,\\d+)*$", - "default": "40000,20000", + "default": "40000", "description": "Resolution to run TADs callers (comma separated)" }, "res_compartments": { @@ -443,7 +446,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } @@ -514,6 +517,7 @@ }, "multiqc_config": { "type": "string", + "format": "file-path", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true @@ -529,13 +533,6 @@ "description": "Custom MultiQC yaml file containing HTML including a methods description.", "fa_icon": "fas fa-cog" }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", - "hidden": true - }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", @@ -543,12 +540,33 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + }, + "pipelines_testdata_base_path": { + "type": "string", + "fa_icon": "far fa-check-circle", + "description": "Base URL or local path to location of pipeline test dataset files", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "hidden": true } } } diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 0000000..6f5e2c4 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,10 @@ +config { + // Location of nf-tests + testsDir "." + + // nf-test directory used to create temporary files for each test + workDir System.getenv("NFT_WORKDIR") ?: ".nf-test" + + // Location of an optional nextflow.config file specific for executing pipeline tests + configFile "tests/nextflow.config" +} diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 0d62beb..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,10 +0,0 @@ -# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. -# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. -[tool.black] -line-length = 120 -target_version = ["py37", "py38", "py39", "py310"] - -[tool.isort] -profile = "black" -known_first_party = ["nf_core"] -multi_line_output = 3 diff --git a/subworkflows/local/compartments.nf b/subworkflows/local/compartments.nf index fee68a5..4f965d3 100644 --- a/subworkflows/local/compartments.nf +++ b/subworkflows/local/compartments.nf @@ -1,23 +1,36 @@ include { COOLTOOLS_EIGSCIS } from '../../modules/local/cooltools/eigscis' +include { CALDER2 } from '../../modules/nf-core/calder2/main' workflow COMPARTMENTS { - take: - cool - fasta - chrsize + take: + cool + fasta + chrsize - main: - ch_versions = Channel.empty() + main: + ch_versions = Channel.empty() - COOLTOOLS_EIGSCIS( - cool, - fasta.map{it -> it[1]}.collect(), - chrsize.map{it -> it[1]}.collect() - ) - ch_versions = ch_versions.mix(COOLTOOLS_EIGSCIS.out.versions) + if (params.compartments_caller =~ 'cooltools'){ + COOLTOOLS_EIGSCIS( + cool, + fasta.map{it -> it[1]}.collect(), + chrsize.map{it -> it[1]}.collect() + ) + ch_versions = ch_versions.mix(COOLTOOLS_EIGSCIS.out.versions) + ch_comp = COOLTOOLS_EIGSCIS.out.results + } - emit: - versions = ch_versions - compartments = COOLTOOLS_EIGSCIS.out.results -} \ No newline at end of file + if (params.compartments_caller =~ 'calder2'){ + CALDER2( + cool.map{meta, cool, res -> [meta, cool] }, + Channel.value([]) + ) + ch_versions = ch_versions.mix(CALDER2.out.versions) + ch_comp = CALDER2.out.output_folder + } + + emit: + versions = ch_versions + compartments = ch_comp +} diff --git a/subworkflows/local/cooler.nf b/subworkflows/local/cooler.nf index 1299266..3857033 100644 --- a/subworkflows/local/cooler.nf +++ b/subworkflows/local/cooler.nf @@ -5,8 +5,8 @@ */ include { COOLER_ZOOMIFY } from '../../modules/nf-core/cooler/zoomify/main' -include { COOLER_DUMP } from '../../modules/nf-core/cooler/dump/main' -include { COOLER_CLOAD } from '../../modules/nf-core/cooler/cload/main' +include { COOLER_DUMP } from '../../modules/nf-core/cooler/dump/main' +include { COOLER_CLOAD } from '../../modules/nf-core/cooler/cload/main' include { COOLER_BALANCE } from '../../modules/nf-core/cooler/balance/main' include { COOLER_MAKEBINS } from '../../modules/nf-core/cooler/makebins/main' @@ -14,84 +14,84 @@ include { SPLIT_COOLER_DUMP } from '../../modules/local/split_cooler_dump' // add resolution in meta def addResolution(row) { - def meta = [:] - meta.id = row[0].id - meta.resolution = row[2] - return [meta, row[1], row[2]] + def meta = [:] + meta.id = row[0].id + meta.resolution = row[2] + return [meta, row[1], row[2]] } workflow COOLER { - take: - pairs // [meta, pairs, index] - chromsize // [meta, chromsize] - cool_bins - - main: - ch_versions = Channel.empty() - - //***************************************** - // EXPORT BINS - - COOLER_MAKEBINS( - chromsize.combine(cool_bins) - ) - ch_versions = ch_versions.mix(COOLER_MAKEBINS.out.versions) - - //***************************************** - // BUILD COOL FILE PER RESOLUTION - // [meta, pairs, resolution] - - COOLER_CLOAD( - pairs.combine(cool_bins), - chromsize.map{it -> it[1]}.collect() - ) - ch_versions = ch_versions.mix(COOLER_CLOAD.out.versions) - - // Add resolution in meta - COOLER_CLOAD.out.cool - .map{ it -> addResolution(it) } - .set{ ch_cool } - - COOLER_BALANCE( - ch_cool.map{[it[0], it[1], ""]} - ) - ch_versions = ch_versions.mix(COOLER_BALANCE.out.versions) - - // Zoomify at minimum bin resolution - if (!params.res_zoomify){ - ch_res_zoomify = cool_bins.min() - }else{ - ch_res_zoomify = Channel.from(params.res_zoomify).splitCsv().flatten().unique().toInteger() - } - - ch_cool - .combine(ch_res_zoomify) - .filter{ it[2] == it[3] } - .map{ it->[it[0], it[1]] } - .set{ ch_cool_zoomify } - - COOLER_ZOOMIFY( - ch_cool_zoomify - ) - ch_versions = ch_versions.mix(COOLER_ZOOMIFY.out.versions) - - //***************************************** - // DUMP DATA - // [meta, cool] / resolution - - COOLER_DUMP( - COOLER_BALANCE.out.cool.map{[it[0], it[1], ""]} - ) - ch_versions = ch_versions.mix(COOLER_DUMP.out.versions) - - SPLIT_COOLER_DUMP( - COOLER_DUMP.out.bedpe - ) - ch_versions = ch_versions.mix(SPLIT_COOLER_DUMP.out.versions) - - emit: - versions = ch_versions - cool = COOLER_BALANCE.out.cool - mcool = COOLER_ZOOMIFY.out.mcool -} \ No newline at end of file + take: + pairs // [meta, pairs, index] + chromsize // [meta, chromsize] + cool_bins + + main: + ch_versions = Channel.empty() + + //***************************************** + // EXPORT BINS + + COOLER_MAKEBINS( + chromsize.combine(cool_bins) + ) + ch_versions = ch_versions.mix(COOLER_MAKEBINS.out.versions) + + //***************************************** + // BUILD COOL FILE PER RESOLUTION + // [meta, pairs, resolution] + + COOLER_CLOAD( + pairs.combine(cool_bins), + chromsize.map{it -> it[1]}.collect() + ) + ch_versions = ch_versions.mix(COOLER_CLOAD.out.versions) + + // Add resolution in meta + COOLER_CLOAD.out.cool + .map{ it -> addResolution(it) } + .set{ ch_cool } + + COOLER_BALANCE( + ch_cool.map{[it[0], it[1], ""]} + ) + ch_versions = ch_versions.mix(COOLER_BALANCE.out.versions) + + // Zoomify at minimum bin resolution + if (!params.res_zoomify){ + ch_res_zoomify = cool_bins.min() + }else{ + ch_res_zoomify = Channel.from(params.res_zoomify).splitCsv().flatten().unique().toInteger() + } + + ch_cool + .combine(ch_res_zoomify) + .filter{ it[2] == it[3] } + .map{ it->[it[0], it[1]] } + .set{ ch_cool_zoomify } + + COOLER_ZOOMIFY( + ch_cool_zoomify + ) + ch_versions = ch_versions.mix(COOLER_ZOOMIFY.out.versions) + + //***************************************** + // DUMP DATA + // [meta, cool] / resolution + + COOLER_DUMP( + COOLER_BALANCE.out.cool.map{[it[0], it[1], ""]} + ) + ch_versions = ch_versions.mix(COOLER_DUMP.out.versions) + + SPLIT_COOLER_DUMP( + COOLER_DUMP.out.bedpe + ) + ch_versions = ch_versions.mix(SPLIT_COOLER_DUMP.out.versions) + + emit: + versions = ch_versions + cool = COOLER_BALANCE.out.cool + mcool = COOLER_ZOOMIFY.out.mcool +} diff --git a/subworkflows/local/hicpro.nf b/subworkflows/local/hicpro.nf index d13737c..7730e21 100644 --- a/subworkflows/local/hicpro.nf +++ b/subworkflows/local/hicpro.nf @@ -3,7 +3,7 @@ * MAIN WORKFLOW * From the raw sequencing reads to the list of valid interactions */ - + include { HICPRO_MAPPING } from './hicpro_mapping' include { GET_VALID_INTERACTION } from '../../modules/local/hicpro/get_valid_interaction' include { GET_VALID_INTERACTION_DNASE } from '../../modules/local/hicpro/get_valid_interaction_dnase' @@ -15,134 +15,136 @@ include { ICE_NORMALIZATION } from '../../modules/local/hicpro/run_ice' // Remove meta.chunks def removeChunks(row){ - meta = row[0].clone() - meta.remove('chunk') - return [meta, row[1]] + meta = row[0].clone() + meta.remove('chunk') + return [meta, row[1]] } workflow HICPRO { - take: - reads // [meta, read1, read2] - index // path - fragments // path - chrsize // path - ligation_site // value - map_res // values - - main: - ch_versions = Channel.empty() - - // Fastq to paired-end bam - HICPRO_MAPPING( - reads, - index, - ligation_site - ) - ch_versions = ch_versions.mix(HICPRO_MAPPING.out.versions) - - //*************************************** - // DIGESTION PROTOCOLS - - if (!params.dnase){ - GET_VALID_INTERACTION ( - HICPRO_MAPPING.out.bam, - fragments.collect() + take: + reads // [meta, read1, read2] + fasta // [meta, fasta] + index // path + fragments // path + chrsize // path + ligation_site // value + map_res // values + + main: + ch_versions = Channel.empty() + + // Fastq to paired-end bam + HICPRO_MAPPING( + reads, + fasta, + index, + ligation_site + ) + ch_versions = ch_versions.mix(HICPRO_MAPPING.out.versions) + + //*************************************** + // DIGESTION PROTOCOLS + + if (!params.no_digestion){ + GET_VALID_INTERACTION ( + HICPRO_MAPPING.out.bam, + fragments.collect() + ) + ch_versions = ch_versions.mix(GET_VALID_INTERACTION.out.versions) + ch_valid_pairs = GET_VALID_INTERACTION.out.valid_pairs + ch_valid_stats = GET_VALID_INTERACTION.out.stats + + }else{ + + //**************************************** + // DNASE-LIKE PROTOCOLS + + GET_VALID_INTERACTION_DNASE ( + HICPRO_MAPPING.out.bam + ) + ch_versions = ch_versions.mix(GET_VALID_INTERACTION_DNASE.out.versions) + ch_valid_pairs = GET_VALID_INTERACTION_DNASE.out.valid_pairs + ch_valid_stats = GET_VALID_INTERACTION_DNASE.out.stats + } + + + //************************************** + // MERGE AND REMOVE DUPLICATES + + //if (params.split_fastq){ + ch_valid_pairs = ch_valid_pairs + .map{ meta, valid_pairs -> + def newMeta = [ id: meta.id, single_end: meta.singleEnd, part:meta.part ] + [ groupKey(newMeta, meta.part), valid_pairs ] + }.groupTuple() + + MERGE_VALID_INTERACTION ( + ch_valid_pairs ) - ch_versions = ch_versions.mix(GET_VALID_INTERACTION.out.versions) - ch_valid_pairs = GET_VALID_INTERACTION.out.valid_pairs - ch_valid_stats = GET_VALID_INTERACTION.out.stats + ch_versions = ch_versions.mix(MERGE_VALID_INTERACTION.out.versions) - }else{ - //**************************************** - // DNASE-LIKE PROTOCOLS + ch_hicpro_mappingstats = HICPRO_MAPPING.out.mapstats + .map{ meta, stats -> + def newMeta = [ id: meta.id, single_end: meta.singleEnd, part:meta.part ] + [ groupKey(newMeta, meta.part), stats ] + }.groupTuple() - GET_VALID_INTERACTION_DNASE ( - HICPRO_MAPPING.out.bam - ) - ch_versions = ch_versions.mix(GET_VALID_INTERACTION_DNASE.out.versions) - ch_valid_pairs = GET_VALID_INTERACTION_DNASE.out.valid_pairs - ch_valid_stats = GET_VALID_INTERACTION_DNASE.out.stats - } - - - //************************************** - // MERGE AND REMOVE DUPLICATES - - //if (params.split_fastq){ - ch_valid_pairs = ch_valid_pairs - .map{ meta, valid_pairs -> - def newMeta = [ id: meta.id, single_end: meta.singleEnd, part:meta.part ] - [ groupKey(newMeta, meta.part), valid_pairs ] - }.groupTuple() - - MERGE_VALID_INTERACTION ( - ch_valid_pairs - ) - ch_versions = ch_versions.mix(MERGE_VALID_INTERACTION.out.versions) - - - ch_hicpro_mappingstats = HICPRO_MAPPING.out.mapstats - .map{ meta, stats -> - def newMeta = [ id: meta.id, single_end: meta.singleEnd, part:meta.part ] - [ groupKey(newMeta, meta.part), stats ] - }.groupTuple() - - ch_hicpro_pairstats = HICPRO_MAPPING.out.pairstats - .map{ meta, stats -> - def newMeta = [ id: meta.id, single_end: meta.singleEnd, part:meta.part ] - [ groupKey(newMeta, meta.part), stats ] - }.groupTuple() - - ch_hicpro_validstats = ch_valid_stats - .map{ meta, stats -> - def newMeta = [ id: meta.id, single_end: meta.singleEnd, part:meta.part ] - [ groupKey(newMeta, meta.part), stats ] - }.groupTuple() - - MERGE_STATS( - ch_hicpro_mappingstats.concat(ch_hicpro_pairstats, ch_hicpro_validstats) - ) - ch_versions = ch_versions.mix(MERGE_STATS.out.versions) - - //*************************************** - // CONVERTS TO PAIRS - - HICPRO2PAIRS ( - MERGE_VALID_INTERACTION.out.valid_pairs, - chrsize.collect() - ) - ch_versions = ch_versions.mix(HICPRO2PAIRS.out.versions) - - //*************************************** - // CONTACT MAPS - - if (params.hicpro_maps){ - - //build_contact_maps - BUILD_CONTACT_MAPS( - MERGE_VALID_INTERACTION.out.valid_pairs.combine(map_res), - chrsize.collect() + ch_hicpro_pairstats = HICPRO_MAPPING.out.pairstats + .map{ meta, stats -> + def newMeta = [ id: meta.id, single_end: meta.singleEnd, part:meta.part ] + [ groupKey(newMeta, meta.part), stats ] + }.groupTuple() + + ch_hicpro_validstats = ch_valid_stats + .map{ meta, stats -> + def newMeta = [ id: meta.id, single_end: meta.singleEnd, part:meta.part ] + [ groupKey(newMeta, meta.part), stats ] + }.groupTuple() + + MERGE_STATS( + ch_hicpro_mappingstats.concat(ch_hicpro_pairstats, ch_hicpro_validstats) ) - ch_hicpro_raw_maps = BUILD_CONTACT_MAPS.out.maps - - // run_ice - ICE_NORMALIZATION( - BUILD_CONTACT_MAPS.out.maps + ch_versions = ch_versions.mix(MERGE_STATS.out.versions) + + //*************************************** + // CONVERTS TO PAIRS + + HICPRO2PAIRS ( + MERGE_VALID_INTERACTION.out.valid_pairs, + chrsize.collect() ) - ch_hicpro_iced_maps = ICE_NORMALIZATION.out.maps - ch_versions = ch_versions.mix(ICE_NORMALIZATION.out.versions) - - }else{ - ch_hicpro_raw_maps = Channel.empty() - ch_hicpro_iced_maps = Channel.empty() - } - - emit: - versions = ch_versions - pairs = HICPRO2PAIRS.out.pairs - mqc = MERGE_VALID_INTERACTION.out.mqc.concat(MERGE_STATS.out.mqc) - raw_maps = ch_hicpro_raw_maps - iced_maps = ch_hicpro_iced_maps + ch_versions = ch_versions.mix(HICPRO2PAIRS.out.versions) + + //*************************************** + // CONTACT MAPS + + if (params.hicpro_maps){ + + //build_contact_maps + BUILD_CONTACT_MAPS( + MERGE_VALID_INTERACTION.out.valid_pairs.combine(map_res), + chrsize.collect() + ) + ch_hicpro_raw_maps = BUILD_CONTACT_MAPS.out.maps + + // run_ice + ICE_NORMALIZATION( + BUILD_CONTACT_MAPS.out.maps + ) + ch_hicpro_iced_maps = ICE_NORMALIZATION.out.maps + ch_versions = ch_versions.mix(ICE_NORMALIZATION.out.versions) + + }else{ + ch_hicpro_raw_maps = Channel.empty() + ch_hicpro_iced_maps = Channel.empty() + } + + emit: + versions = ch_versions + pairs = HICPRO2PAIRS.out.pairs + mqc = MERGE_VALID_INTERACTION.out.mqc.concat(MERGE_STATS.out.mqc) + raw_maps = ch_hicpro_raw_maps + iced_maps = ch_hicpro_iced_maps } diff --git a/subworkflows/local/hicpro_mapping.nf b/subworkflows/local/hicpro_mapping.nf index 4f6b9e9..05ccd98 100644 --- a/subworkflows/local/hicpro_mapping.nf +++ b/subworkflows/local/hicpro_mapping.nf @@ -13,95 +13,99 @@ include { MAPPING_STATS_DNASE } from '../../modules/local/hicpro/dnase_mapping_s workflow HICPRO_MAPPING { - take: - reads // [meta, read1, read2] - index // [meta, path] - ligation_site // value - - main: - ch_versions = Channel.empty() - - // Align each mates separetly and add mates information in [meta] - ch_reads_r1 = reads - .map{ meta, fastq -> - def newMeta = [ id: meta.id, single_end:true, chunk:meta.chunk, part:meta.part, mates:'R1' ] - [ newMeta, fastq[0] ] - } - - ch_reads_r2 = reads - .map{ meta, fastq -> - def newMeta = [ id: meta.id, single_end:true, chunk:meta.chunk, part:meta.part, mates:'R2' ] - [ newMeta, fastq[1] ] + take: + reads // [meta, read1, read2] + fasta // [meta, fasta] + index // [meta, path] + ligation_site // value + + main: + ch_versions = Channel.empty() + + // Align each mates separetly and add mates information in [meta] + ch_reads_r1 = reads + .map{ meta, fastq -> + def newMeta = [ id: meta.id, single_end:true, chunk:meta.chunk, part:meta.part, mates:'R1' ] + [ newMeta, fastq[0] ] + } + + ch_reads_r2 = reads + .map{ meta, fastq -> + def newMeta = [ id: meta.id, single_end:true, chunk:meta.chunk, part:meta.part, mates:'R2' ] + [ newMeta, fastq[1] ] + } + + ch_reads = ch_reads_r1.concat(ch_reads_r2) + + // bowtie2 - save_unaligned=true - sort_bam=false + BOWTIE2_ALIGN( + ch_reads, + index.collect(), + fasta.collect(), + true, + false + ) + ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions) + + if (!params.no_digestion){ + + // trim reads + TRIM_READS( + BOWTIE2_ALIGN.out.fastq, + ligation_site.collect() + ) + ch_versions = ch_versions.mix(TRIM_READS.out.versions) + + // bowtie2 on trimmed reads - save_unaligned=false - sort_bam=false + BOWTIE2_ALIGN_TRIMMED( + TRIM_READS.out.fastq, + index.collect(), + fasta.collect(), + false, + false + ) + ch_versions = ch_versions.mix(BOWTIE2_ALIGN_TRIMMED.out.versions) + + // Merge the two mapping steps + ch_bowtie2_align = BOWTIE2_ALIGN.out.bam + .combine(BOWTIE2_ALIGN_TRIMMED.out.bam, by:[0]) + + MERGE_BOWTIE2( + ch_bowtie2_align + ) + ch_versions = ch_versions.mix(MERGE_BOWTIE2.out.versions) + ch_mapping_stats = MERGE_BOWTIE2.out.stats + + ch_bams = MERGE_BOWTIE2.out.bam + .map{ meta, bam -> + def newMeta = [ id: meta.id, single_end:false, chunk:meta.chunk, part:meta.part ] + [ newMeta, bam ] + }.groupTuple() + + + }else{ + + MAPPING_STATS_DNASE( + BOWTIE2_ALIGN.out.aligned + ) + ch_versions = ch_versions.mix(MAPPING_STATS_DNASE.out.versions) + ch_mapping_stats = MAPPING_STATS_DNASE.out.stats + + ch_bams = BOWTIE2_ALIGN.out.aligned + .map{ meta, bam -> + def newMeta = [ id: meta.id, single_end: false, chunk:meta.chunk, part:meta.part ] + [ newMeta, bam ] + }.groupTuple() } - ch_reads = ch_reads_r1.concat(ch_reads_r2) - - // bowtie2 - save_unaligned=true - sort_bam=false - BOWTIE2_ALIGN( - ch_reads, - index.collect(), - true, - false - ) - ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions) - - if (!params.dnase){ - - // trim reads - TRIM_READS( - BOWTIE2_ALIGN.out.fastq, - ligation_site.collect() - ) - ch_versions = ch_versions.mix(TRIM_READS.out.versions) - - // bowtie2 on trimmed reads - save_unaligned=false - sort_bam=false - BOWTIE2_ALIGN_TRIMMED( - TRIM_READS.out.fastq, - index.collect(), - false, - false + COMBINE_MATES ( + ch_bams ) - ch_versions = ch_versions.mix(BOWTIE2_ALIGN_TRIMMED.out.versions) - - // Merge the two mapping steps - ch_bowtie2_align = BOWTIE2_ALIGN.out.aligned - .combine(BOWTIE2_ALIGN_TRIMMED.out.aligned, by:[0]) + ch_versions = ch_versions.mix(COMBINE_MATES.out.versions) - MERGE_BOWTIE2( - ch_bowtie2_align - ) - ch_versions = ch_versions.mix(MERGE_BOWTIE2.out.versions) - ch_mapping_stats = MERGE_BOWTIE2.out.stats - - ch_bams = MERGE_BOWTIE2.out.bam - .map{ meta, bam -> - def newMeta = [ id: meta.id, single_end:false, chunk:meta.chunk, part:meta.part ] - [ newMeta, bam ] - }.groupTuple() - - - }else{ - - MAPPING_STATS_DNASE( - BOWTIE2_ALIGN.out.aligned - ) - ch_mapping_stats = MAPPING_STATS_DNASE.out.stats - - ch_bams = BOWTIE2_ALIGN.out.aligned - .map{ meta, bam -> - def newMeta = [ id: meta.id, single_end: false, chunk:meta.chunk, part:meta.part ] - [ newMeta, bam ] - }.groupTuple() - } - - COMBINE_MATES ( - ch_bams - ) - ch_versions = ch_versions.mix(COMBINE_MATES.out.versions) - - emit: - versions = ch_versions - bam = COMBINE_MATES.out.bam - mapstats = ch_mapping_stats - pairstats = COMBINE_MATES.out.stats + emit: + versions = ch_versions + bam = COMBINE_MATES.out.bam + mapstats = ch_mapping_stats + pairstats = COMBINE_MATES.out.stats } diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf deleted file mode 100644 index 37181fc..0000000 --- a/subworkflows/local/input_check.nf +++ /dev/null @@ -1,68 +0,0 @@ -// -// Check input samplesheet and get read channels -// - -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' - -workflow INPUT_CHECK { - take: - samplesheet // file: /path/to/samplesheet.csv - - main: - if (params.split_fastq){ - - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channels(it) } - .splitFastq( by: params.fastq_chunks_size, pe:true, file: true, compress:true) - .map { it -> [it[0], [it[1], it[2]]]} - .groupTuple(by: [0]) - .flatMap { it -> setMetaChunk(it) } - .collate(2) - .set { reads } - - }else{ - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channels(it) } - .map { it -> [it[0], [it[1], it[2]]]} - .groupTuple(by: [0]) - .flatMap { it -> setMetaChunk(it) } - .collate(2) - .set { reads } - } - - emit: - reads // channel: [ val(meta), [ reads ] ] -} - -// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -def create_fastq_channels(LinkedHashMap row) { - def meta = [:] - meta.id = row.sample - meta.single_end = false - - def array = [] - if (!file(row.fastq_1).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" - } - if (!file(row.fastq_2).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" - } - array = [ meta, file(row.fastq_1), file(row.fastq_2) ] - return array -} - -// Set the meta.chunk value in case of technical replicates -def setMetaChunk(row){ - def map = [] - row[1].eachWithIndex() { file,i -> - meta = row[0].clone() - meta.chunk = i - meta.part = row[1].size() - map += [meta, file] - } - return map -} \ No newline at end of file diff --git a/subworkflows/local/pairtools.nf b/subworkflows/local/pairtools.nf index dbb7004..7f88993 100644 --- a/subworkflows/local/pairtools.nf +++ b/subworkflows/local/pairtools.nf @@ -3,107 +3,128 @@ * MAIN WORKFLOW * From the raw sequencing reads to the list of valid interactions */ - + //include { BWAMEM2_MEM } from '../../modules/nf-core/bwamem2/mem/main' include { BWA_MEM } from '../../modules/nf-core/bwa/mem/main' include { PAIRTOOLS_DEDUP } from '../../modules/nf-core/pairtools/dedup/main' -include { PAIRTOOLS_PARSE } from '../../modules/nf-core/pairtools/parse/main' +//include { PAIRTOOLS_PARSE } from '../../modules/nf-core/pairtools/parse/main' include { PAIRTOOLS_RESTRICT } from '../../modules/nf-core/pairtools/restrict/main' include { PAIRTOOLS_SELECT } from '../../modules/nf-core/pairtools/select/main' include { PAIRTOOLS_SORT } from '../../modules/nf-core/pairtools/sort/main' +include { PAIRTOOLS_MERGE } from '../../modules/nf-core/pairtools/merge/main' +include { PAIRTOOLS_STATS } from '../../modules/nf-core/pairtools/stats/main' +include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/samtools/flagstat/main' include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' include { PAIRIX } from '../../modules/nf-core/pairix/main' -include { PAIRTOOLS_MERGE } from '../../modules/local/pairtools/pairtools_merge' +//include { PAIRTOOLS_MERGE } from '../../modules/local/pairtools/pairtools_merge' include { PAIRTOOLS_SPLIT } from '../../modules/local/pairtools/pairtools_split' -include { PAIRTOOLS_STATS } from '../../modules/local/pairtools/pairtools_stats' - +//include { PAIRTOOLS_STATS } from '../../modules/local/pairtools/pairtools_stats' +include { PAIRTOOLS_PARSE } from '../../modules/local/pairtools/pairtools_parse' workflow PAIRTOOLS { - take: - reads // [meta, read1, read2] - index // [meta2, path] - frag // path - chrsize // path - - main: - ch_versions = Channel.empty() - - BWA_MEM( - reads, - index.collect(), - Channel.value([]) - ) - - PAIRTOOLS_PARSE( - BWA_MEM.out.bam, - chrsize.collect() - ) - - PAIRTOOLS_RESTRICT( - PAIRTOOLS_PARSE.out.pairsam, - frag.map{it->it[1]}.collect() - ) - - ch_pairsam = params.dnase ? PAIRTOOLS_PARSE.out.pairsam : PAIRTOOLS_RESTRICT.out.restrict - PAIRTOOLS_SORT( - ch_pairsam - ) - - ch_valid_pairs = PAIRTOOLS_SORT.out.sorted - .map{ meta, pairs -> - def newMeta = [ id: meta.id, single_end: meta.single_end, part:meta.part ] - [ groupKey(newMeta, meta.part), pairs ] - } - .groupTuple() - .view() - .branch { - single: it[0].part <=1 - multiple: it[0].part > 1 - } - - PAIRTOOLS_MERGE( - ch_valid_pairs.multiple - ) - - // Separate pairs/bam files - PAIRTOOLS_SPLIT( - PAIRTOOLS_MERGE.out.pairs.mix(ch_valid_pairs.single) - ) - - // Manage BAM files - SAMTOOLS_SORT( - PAIRTOOLS_SPLIT.out.bam - ) - - SAMTOOLS_INDEX( - SAMTOOLS_SORT.out.bam - ) - - // TODO - add samtools flagstat - - PAIRTOOLS_DEDUP( - PAIRTOOLS_SPLIT.out.pairs - ) - - ch_pairselect = params.keep_dups ? PAIRTOOLS_SPLIT.out.pairs : PAIRTOOLS_DEDUP.out.pairs - PAIRTOOLS_SELECT( - ch_pairselect - ) - - PAIRTOOLS_STATS( - PAIRTOOLS_SELECT.out.selected - ) - - PAIRIX( - PAIRTOOLS_SELECT.out.selected - ) - - emit: - versions = ch_versions - pairs = PAIRIX.out.index - bam = PAIRTOOLS_SPLIT.out.bam.join(SAMTOOLS_INDEX.out.bai) - stats = PAIRTOOLS_STATS.out.stats.map{it->it[1]} + take: + reads // [meta, read1, read2] + fasta // [meta, fasta] + index // [meta2, path] + frag // path + chrsize // path + + main: + ch_versions = Channel.empty() + + BWA_MEM( + reads, + index.collect(), + fasta.collect(), + Channel.value([]) + ) + ch_versions = ch_versions.mix(BWA_MEM.out.versions) + + PAIRTOOLS_PARSE( + BWA_MEM.out.bam, + chrsize.collect() + ) + ch_versions = ch_versions.mix(PAIRTOOLS_PARSE.out.versions) + + PAIRTOOLS_RESTRICT( + PAIRTOOLS_PARSE.out.pairsam, + frag.map{it->it[1]}.collect() + ) + ch_versions = ch_versions.mix(PAIRTOOLS_RESTRICT.out.versions) + + ch_pairsam = params.no_digestion ? PAIRTOOLS_PARSE.out.pairsam : PAIRTOOLS_RESTRICT.out.restrict + PAIRTOOLS_SORT( + ch_pairsam + ) + ch_versions = ch_versions.mix(PAIRTOOLS_SORT.out.versions) + + ch_valid_pairs = PAIRTOOLS_SORT.out.sorted + .map{ meta, pairs -> + def newMeta = [ id: meta.id, single_end: meta.single_end, part:meta.part ] + [ groupKey(newMeta, meta.part), pairs ] + } + .groupTuple() + .branch { + single: it[0].part <=1 + multiple: it[0].part > 1 + } + + PAIRTOOLS_MERGE( + ch_valid_pairs.multiple + ) + ch_versions = ch_versions.mix(PAIRTOOLS_MERGE.out.versions) + + // Separate pairs/bam files + PAIRTOOLS_SPLIT( + PAIRTOOLS_MERGE.out.pairs.mix(ch_valid_pairs.single) + ) + ch_versions = ch_versions.mix(PAIRTOOLS_SPLIT.out.versions) + + // Manage BAM files + SAMTOOLS_SORT( + PAIRTOOLS_SPLIT.out.bam, + fasta + ) + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions) + + SAMTOOLS_INDEX( + SAMTOOLS_SORT.out.bam + ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) + + SAMTOOLS_FLAGSTAT( + SAMTOOLS_SORT.out.bam.join(SAMTOOLS_INDEX.out.bai) + ) + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) + + PAIRTOOLS_DEDUP( + PAIRTOOLS_SPLIT.out.pairs + ) + ch_versions = ch_versions.mix(PAIRTOOLS_DEDUP.out.versions) + + ch_pairselect = params.keep_dups ? PAIRTOOLS_SPLIT.out.pairs : PAIRTOOLS_DEDUP.out.pairs + PAIRTOOLS_SELECT( + ch_pairselect + ) + ch_versions = ch_versions.mix(PAIRTOOLS_SELECT.out.versions) + + PAIRTOOLS_STATS( + PAIRTOOLS_SELECT.out.selected + ) + ch_versions = ch_versions.mix(PAIRTOOLS_STATS.out.versions) + + PAIRIX( + PAIRTOOLS_SELECT.out.selected + ) + ch_versions = ch_versions.mix(PAIRIX.out.versions) + + emit: + versions = ch_versions + pairs = PAIRIX.out.index + bam = PAIRTOOLS_SPLIT.out.bam.join(SAMTOOLS_INDEX.out.bai) + stats = PAIRTOOLS_STATS.out.stats.map{it->it[1]} + flagstat = SAMTOOLS_FLAGSTAT.out.flagstat } diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index 6a55e15..af2b9d0 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -9,81 +9,114 @@ include { GET_RESTRICTION_FRAGMENTS } from '../../modules/local/hicpro/get_restr workflow PREPARE_GENOME { - take: - fasta - restriction_site + take: + fasta + bwt2_index + bwa_index - main: - ch_versions = Channel.empty() + main: + ch_versions = Channel.empty() - //*************************************** - // Bowtie index - if (params.processing == "hicpro"){ - if(!params.bwt2_index){ - BOWTIE2_BUILD ( - fasta - ) - ch_index = BOWTIE2_BUILD.out.index - ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions) + // + // Fasta reference genome + // + def genomeName = params.genome ?: fasta.substring(fasta.lastIndexOf(File.separator)+1) + ch_fasta = Channel.fromPath( fasta ) + .ifEmpty { exit 1, "Genome index: Fasta file not found: ${fasta}" } + .map{it->[[id:genomeName],it]} + + // + // Bowtie index + // + if (params.processing == "hicpro"){ + if(!bwt2_index){ + BOWTIE2_BUILD ( + ch_fasta + ) + ch_index = BOWTIE2_BUILD.out.index + ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions) + }else{ + ch_index = Channel.fromPath( bwt2_index , checkIfExists: true) + .map { it -> [[:], it]} + .ifEmpty { exit 1, "Genome index: Provided index not found: ${params.bwt2_index}" } + } + } + + // + // Bwa-mem index + // + if (params.processing == "pairtools"){ + if(!bwa_index){ + BWA_INDEX ( + ch_fasta + ) + ch_index = BWA_INDEX.out.index + ch_versions = ch_versions.mix(BWA_INDEX.out.versions) + }else{ + ch_index = Channel.fromPath( bwa_index , checkIfExists: true) + .map { it -> [[:], it]} + .ifEmpty { exit 1, "Genome index: Provided index not found: ${params.bwa_index}" } + } + } + + // + // Chromosome size + // + if(!params.chromosome_size){ + CUSTOM_GETCHROMSIZES( + ch_fasta + ) + ch_chromsize = CUSTOM_GETCHROMSIZES.out.sizes + ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) }else{ - Channel.fromPath( params.bwt2_index , checkIfExists: true) - .map { it -> [[:], it]} - .ifEmpty { exit 1, "Genome index: Provided index not found: ${params.bwt2_index}" } - .set { ch_index } + ch_chromsize = Channel.fromPath( params.chromosome_size , checkIfExists: true) + .map { it -> [[:], it]} } - } - //*************************************** - // Bwa-mem index - if (params.processing == "pairtools"){ - if(!params.bwa_index){ - BWA_INDEX ( - fasta - ) - ch_index = BWA_INDEX.out.index - ch_versions = ch_versions.mix(BWA_INDEX.out.versions) + + // + // Digestion parameters + // + if (params.digestion){ + restriction_site = params.digestion ? params.digest[ params.digestion ].restriction_site ?: false : false + ch_restriction_site = Channel.value(restriction_site) + ligation_site = params.digestion ? params.digest[ params.digestion ].ligation_site ?: false : false + ch_ligation_site = Channel.value(ligation_site) + }else if (params.restriction_site && params.ligation_site){ + ch_restriction_site = Channel.value(params.restriction_site) + ch_ligation_site = Channel.value(params.ligation_site) + }else if (params.no_digestion){ + ch_restriction_site = Channel.empty() + ch_ligation_site = Channel.empty() }else{ - Channel.fromPath( params.bwa_index , checkIfExists: true) - .map { it -> [[:], it]} - .ifEmpty { exit 1, "Genome index: Provided index not found: ${params.bwa_index}" } - .set { ch_index } + exit 1, "Ligation motif not found. Please either use the `--digestion` parameters or specify the `--restriction_site` and `--ligation_site`. For DNase/Micro-C Hi-C, please use '--no_digestion' option" } - } - //*************************************** - // Chromosome size - if(!params.chromosome_size){ - CUSTOM_GETCHROMSIZES( - fasta - ) - ch_chromsize = CUSTOM_GETCHROMSIZES.out.sizes - ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) - }else{ - Channel.fromPath( params.chromosome_size , checkIfExists: true) - .map { it -> [[:], it]} - .set {ch_chromsize} - } - //*************************************** - // Restriction fragments - if(!params.restriction_fragments && !params.dnase){ - GET_RESTRICTION_FRAGMENTS( - fasta, - restriction_site - ) - ch_resfrag = GET_RESTRICTION_FRAGMENTS.out.results - ch_versions = ch_versions.mix(GET_RESTRICTION_FRAGMENTS.out.versions) - }else if (!params.dnase){ - Channel.fromPath( params.restriction_fragments, checkIfExists: true ) - .map{ it -> [[:], it] } + // + // Restriction fragments + // + if(!params.restriction_fragments && !params.no_digestion){ + GET_RESTRICTION_FRAGMENTS( + ch_fasta, + restriction_site + ) + ch_resfrag = GET_RESTRICTION_FRAGMENTS.out.results + ch_versions = ch_versions.mix(GET_RESTRICTION_FRAGMENTS.out.versions) + }else if (!params.no_digestion){ + Channel.fromPath( params.restriction_fragments, checkIfExists: true ) + .map { it -> [[:], it] } .set {ch_resfrag} - }else{ - ch_resfrag = Channel.empty() - } + }else{ + ch_resfrag = Channel.empty() + } - emit: - index = ch_index - chromosome_size = ch_chromsize - res_frag = ch_resfrag - versions = ch_versions + emit: + fasta = ch_fasta + index = ch_index + chromosome_size = ch_chromsize + res_frag = ch_resfrag + restriction_site = ch_restriction_site + ligation_site = ch_ligation_site + versions = ch_versions } diff --git a/subworkflows/local/tads.nf b/subworkflows/local/tads.nf index 31c1e38..f70a01f 100644 --- a/subworkflows/local/tads.nf +++ b/subworkflows/local/tads.nf @@ -3,26 +3,26 @@ include { HIC_FIND_TADS } from '../../modules/local/hicexplorer/hicFindTADs' workflow TADS { - take: - cool + take: + cool - main: - ch_versions = Channel.empty() - ch_tads = Channel.empty() + main: + ch_versions = Channel.empty() + ch_tads = Channel.empty() - if (params.tads_caller =~ 'insulation'){ - COOLTOOLS_INSULATION(cool) - ch_versions = ch_versions.mix(COOLTOOLS_INSULATION.out.versions) - ch_tads = ch_tads.mix(COOLTOOLS_INSULATION.out.tsv) - } - - if (params.tads_caller =~ 'hicexplorer'){ - HIC_FIND_TADS(cool) - ch_versions = ch_versions.mix(HIC_FIND_TADS.out.versions) - ch_tads = ch_tads.mix(HIC_FIND_TADS.out.results) - } + if (params.tads_caller =~ 'insulation'){ + COOLTOOLS_INSULATION(cool) + ch_versions = ch_versions.mix(COOLTOOLS_INSULATION.out.versions) + ch_tads = ch_tads.mix(COOLTOOLS_INSULATION.out.tsv) + } - emit: - tads = ch_tads - versions = ch_versions -} \ No newline at end of file + if (params.tads_caller =~ 'hicexplorer'){ + HIC_FIND_TADS(cool) + ch_versions = ch_versions.mix(HIC_FIND_TADS.out.versions) + ch_tads = ch_tads.mix(HIC_FIND_TADS.out.results) + } + + emit: + tads = ch_tads + versions = ch_versions +} diff --git a/subworkflows/local/utils_nfcore_hic_pipeline/main.nf b/subworkflows/local/utils_nfcore_hic_pipeline/main.nf new file mode 100644 index 0000000..fd92f40 --- /dev/null +++ b/subworkflows/local/utils_nfcore_hic_pipeline/main.nf @@ -0,0 +1,294 @@ +// +// Subworkflow with functionality specific to the nf-core/hic pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE +======================================================================================== +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + help // boolean: Display help text + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + pre_help_text = nfCoreLogo(monochrome_logs) + post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFVALIDATION_PLUGIN ( + help, + workflow_command, + pre_help_text, + post_help_text, + validate_params, + "nextflow_schema.json" + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + // + // Custom validation for pipeline parameters + // + validateInputParameters() + + // + // Create channel from input file provided through params.input + // + Channel + .fromSamplesheet("input") + .set { ch_input } + if (params.split_fastq) { + ch_input + .splitFastq( by: params.fastq_chunks_size, pe:true, file: true, compress:true) + .set { ch_input } + } + + ch_input + .map { + meta, fastq_1, fastq_2 -> + if (!fastq_2) { + return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] + } else { + return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + } + } + .groupTuple() + .map { + validateInputSamplesheet(it) + } + .flatMap { it -> setMetaChunk(it) } + .collate(2) + .map { + meta, fastqs -> + return [ meta, fastqs.flatten() ] + } + .set { ch_samplesheet } + + emit: + samplesheet = ch_samplesheet + versions = ch_versions +} + +/* +======================================================================================== + SUBWORKFLOW FOR PIPELINE COMPLETION +======================================================================================== +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) + } + + completionSummary(monochrome_logs) + + if (hook_url) { + imNotification(summary_params, hook_url) + } + } + + workflow.onError { + log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + } +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ +// +// Check and validate pipeline parameters +// +def validateInputParameters() { + genomeExistsError() +} + +// +// Validate channels from input samplesheet +// +def validateInputSamplesheet(input) { + def (metas, fastqs) = input[1..2] + + // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + def endedness_ok = metas.collect{ it.single_end }.unique().size == 1 + if (!endedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") + } + + return [ metas[0], fastqs ] +} +// +// Get attribute from genome config file e.g. fasta +// +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} + +// +// Exit pipeline if incorrect --genome key provided +// +def genomeExistsError() { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } +} + +// +// Generate methods description for MultiQC +// +def toolCitationText() { + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "Bowtie2 (Langmead 2012),", + "BWA-MEM (Li 2013),", + "HiC-Pro (Servant 2015),", + "Pairtools (Open2C 2023),", + "Cooltools (Open2C 2024),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Langmead, B., Salzberg, S. (2012) Fast gapped-read alignment with Bowtie 2. Nat Methods 9, 357–359. https://doi.org/10.1038/nmeth.1923
  • ", + "
  • Li, H. (2013) Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM. arXiv:1303.3997v2
  • ", + "
  • Servant, N., Varoquaux, N., Lajoie, B.R., Viara, E., Chen, CJ., Vert, JP., Heard E., Dekker J., Barillot, E. (2015) HiC-Pro: an optimized and flexible pipeline for Hi-C data processing. Genome Biol 16, 259. https://doi.org/10.1186/s13059-015-0831-x
  • ", + "
  • Open2C, Abdennur, N., Fudenberg, G., Flyamer, IM., Galitsyna, AA., Goloborodko, A., Imakaev, M., Venev, SV. (2023). Pairtools: from sequencing data to chromosome contacts. PloS Comput Biol. 20(5):e1012164. doi: 10.1371/journal.pcbi.1012164
  • ", + "
  • Open2C, Abdennur, N., Abraham, S., Fudenberg, G., Flyamer, IM., Galitsyna, AA., Goloborodko, A., Imakaev, M., Oksuz, BA., & Venev, SV. (2024). Cooltools: Enabling High-Resolution Hi-C Analysis in Python. PLoS Comput Biol. 6;20(5):e1012067. doi: 10.1371/journal.pcbi.1012067
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + String[] manifest_doi = meta.manifest_map.doi.tokenize(",") + for (String doi_ref: manifest_doi) temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } else meta["doi_text"] = "" + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + // meta["tool_bibliography"] = toolBibliographyText() + + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} + +// Set the meta.chunk value in case of technical replicates +def setMetaChunk(row){ + def map = [] + row[1].eachWithIndex() { file, i -> + println row[0] + meta = row[0].clone() + meta.chunk = i + meta.part = row[1].size() + map += [meta, file] + } + return map +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 0000000..ac31f28 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info "${workflow.manifest.name} ${getWorkflowVersion()}" + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 0000000..e5c3a0a --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 0000000..68718e4 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 0000000..e3f0baf --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 0000000..ca964ce --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,111 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 0000000..d0a926b --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 0000000..f847611 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 0000000..14558c3 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,446 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +import org.yaml.snakeyaml.Yaml +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFCORE_PIPELINE { + + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + valid_config = true + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } + if (nextflow_cli_args[0]) { + log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + def temp_doi_ref = "" + String[] manifest_doi = workflow.manifest.doi.tokenize(",") + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + temp_doi_ref + "\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + Yaml yaml = new Yaml() + versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + $workflow.manifest.name: ${getWorkflowVersion()} + Nextflow: $workflow.nextflow.version + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions + .unique() + .map { processVersionsFromYAML(it) } + .unique() + .mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + for (group in summary_params.keySet()) { + def group_params = summary_params.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    $group

    \n" + summary_section += "
    \n" + for (param in group_params.keySet()) { + summary_section += "
    $param
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(monochrome_logs) + if (email_address) { + try { + if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 0000000..d08d243 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 0000000..1dc317f --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,134 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 0000000..1037232 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,166 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:10.562934" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:07.019761" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:14.366181" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 0000000..8940d32 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 0000000..859d103 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 0000000..d0a926b --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 0000000..ac8523c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf new file mode 100644 index 0000000..2585b65 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -0,0 +1,62 @@ +// +// Subworkflow that uses the nf-validation plugin to render help text and parameter summary +// + +/* +======================================================================================== + IMPORT NF-VALIDATION PLUGIN +======================================================================================== +*/ + +include { paramsHelp } from 'plugin/nf-validation' +include { paramsSummaryLog } from 'plugin/nf-validation' +include { validateParameters } from 'plugin/nf-validation' + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFVALIDATION_PLUGIN { + + take: + print_help // boolean: print help + workflow_command // string: default commmand used to run pipeline + pre_help_text // string: string to be printed before help text and summary log + post_help_text // string: string to be printed after help text and summary log + validate_params // boolean: validate parameters + schema_filename // path: JSON schema file, null to use default value + + main: + + log.debug "Using schema file: ${schema_filename}" + + // Default values for strings + pre_help_text = pre_help_text ?: '' + post_help_text = post_help_text ?: '' + workflow_command = workflow_command ?: '' + + // + // Print help message if needed + // + if (print_help) { + log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text + System.exit(0) + } + + // + // Print parameter summary to stdout + // + log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text + + // + // Validate parameters relative to the parameter JSON schema + // + if (validate_params){ + validateParameters(parameters_schema: schema_filename) + } + + emit: + dummy_emit = true +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml new file mode 100644 index 0000000..3d4a6b0 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFVALIDATION_PLUGIN" +description: Use nf-validation to initiate and validate a pipeline +keywords: + - utility + - pipeline + - initialise + - validation +components: [] +input: + - print_help: + type: boolean + description: | + Print help message and exit + - workflow_command: + type: string + description: | + The command to run the workflow e.g. "nextflow run main.nf" + - pre_help_text: + type: string + description: | + Text to print before the help message + - post_help_text: + type: string + description: | + Text to print after the help message + - validate_params: + type: boolean + description: | + Validate the parameters and error if invalid. + - schema_filename: + type: string + description: | + The filename of the schema to validate against. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test new file mode 100644 index 0000000..5784a33 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -0,0 +1,200 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFVALIDATION_PLUGIN" + script "../main.nf" + workflow "UTILS_NFVALIDATION_PLUGIN" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "plugin/nf-validation" + tag "'plugin/nf-validation'" + tag "utils_nfvalidation_plugin" + tag "subworkflows/utils_nfvalidation_plugin" + + test("Should run nothing") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should run help") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with command") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with extra text") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = "pre-help-text" + post_help_text = "post-help-text" + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('pre-help-text') } }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } }, + { assert workflow.stdout.any { it.contains('post-help-text') } } + ) + } + } + + test("Should validate params") { + + when { + + params { + monochrome_logs = true + test_data = '' + outdir = 1 + } + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = true + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json new file mode 100644 index 0000000..7626c1c --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml new file mode 100644 index 0000000..60b1cff --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfvalidation_plugin: + - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/tests/lib/UTILS.groovy b/tests/lib/UTILS.groovy new file mode 100644 index 0000000..1bd58a4 --- /dev/null +++ b/tests/lib/UTILS.groovy @@ -0,0 +1,11 @@ +// Function to remove Nextflow version from pipeline_software_mqc_versions.yml + +class UTILS { + public static String removeNextflowVersion(pipeline_software_mqc_versions) { + def softwareVersions = path(pipeline_software_mqc_versions).yaml + if (softwareVersions.containsKey("Workflow")) { + softwareVersions.Workflow.remove("Nextflow") + } + return softwareVersions + } +} diff --git a/tests/main.nf.test b/tests/main.nf.test new file mode 100644 index 0000000..5f229ce --- /dev/null +++ b/tests/main.nf.test @@ -0,0 +1,99 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + tag "PIPELINE" + config "conf/test.config" + + test("Run with profile test") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(UTILS.removeNextflowVersion("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml")).match("software_versions") }, + { assert snapshot( + workflow.trace.tasks().size(), + // FIXME Not deterministic on CI but works locally + // path("$outputDir/compartments").list(), + path("$outputDir/distance_decay").list(), + path("$outputDir/hicpro/stats/SRR4292758/SRR4292758.mpairstat"), + path("$outputDir/hicpro/stats/SRR4292758/SRR4292758_allValidPairs.mergestat"), + path("$outputDir/hicpro/valid_pairs").list(), + // path("$outputDir/multiqc/multiqc_data").list(), + path("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_boundaries.bed"), + path("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_boundaries.gff"), + path("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_domains.bed"), + path("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_score.bedgraph"), + path("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_tad_score.bm"), + path("$outputDir/tads/insulation").list(), + ).match()}, + + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + { assert new File("$outputDir/compartments/SRR4292758.2000_compartments.cis.bw").exists() }, + { assert new File("$outputDir/compartments/SRR4292758.2000_compartments.cis.lam.txt").exists() }, + { assert new File("$outputDir/compartments/SRR4292758.2000_compartments.cis.vecs.tsv").exists() }, + { assert new File("$outputDir/fastqc/SRR4292758_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/SRR4292758_2_fastqc.html").exists() }, + { assert new File("$outputDir/hicpro/mapping/SRR4292758_0_bwt2pairs.bam").exists() }, + { assert new File("$outputDir/hicpro/stats/SRR4292758/SRR4292758.mRSstat").exists() }, + { assert new File("$outputDir/hicpro/stats/SRR4292758/SRR4292758.null.mmapstat").exists() }, + { assert new File("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_zscore_matrix.cool").exists() }, + ) + } + } + + test("Should split fastqs") { + + when { + params { + outdir = "$outputDir" + split_fastq = true + fastq_chunks_size = 125000 + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(UTILS.removeNextflowVersion("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml")).match("split_software_versions") }, + { assert snapshot( + workflow.trace.tasks().size(), + // FIXME Not deterministic on CI but works locally + // path("$outputDir/compartments").list(), + path("$outputDir/distance_decay").list(), + path("$outputDir/hicpro/stats/SRR4292758/SRR4292758_allValidPairs.mergestat"), + // FIXME path("$outputDir/hicpro/valid_pairs").list(), + // path("$outputDir/multiqc/multiqc_data").list(), + // path("$outputDir/multiqc/multiqc_data").list(), + path("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_boundaries.bed"), + path("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_boundaries.gff"), + path("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_domains.bed"), + path("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_score.bedgraph"), + path("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_tad_score.bm"), + path("$outputDir/tads/insulation").list(), + ).match()}, + + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + { assert new File("$outputDir/compartments/SRR4292758.2000_compartments.cis.bw").exists() }, + { assert new File("$outputDir/compartments/SRR4292758.2000_compartments.cis.lam.txt").exists() }, + { assert new File("$outputDir/compartments/SRR4292758.2000_compartments.cis.vecs.tsv").exists() }, + { assert new File("$outputDir/fastqc/SRR4292758_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/SRR4292758_2_fastqc.html").exists() }, + { assert new File("$outputDir/hicpro/mapping/SRR4292758_0_bwt2pairs.bam").exists() }, + { assert new File("$outputDir/hicpro/mapping/SRR4292758_1_bwt2pairs.bam").exists() }, + { assert new File("$outputDir/hicpro/mapping/SRR4292758_2_bwt2pairs.bam").exists() }, + { assert new File("$outputDir/hicpro/mapping/SRR4292758_3_bwt2pairs.bam").exists() }, + { assert new File("$outputDir/hicpro/stats/SRR4292758/SRR4292758.mpairstat").exists() }, + { assert new File("$outputDir/hicpro/stats/SRR4292758/SRR4292758.mRSstat").exists() }, + { assert new File("$outputDir/hicpro/stats/SRR4292758/SRR4292758.null.mmapstat").exists() }, + { assert new File("$outputDir/tads/hicExplorer/SRR4292758.1000_balanced_hicfindtads_zscore_matrix.cool").exists() }, + ) + } + } +} diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap new file mode 100644 index 0000000..3e7c079 --- /dev/null +++ b/tests/main.nf.test.snap @@ -0,0 +1,76 @@ +{ + "split_software_versions": { + "content": [ + "{BOWTIE2_ALIGN={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE2_ALIGN_TRIMMED={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE2_BUILD={bowtie2=2.5.2}, COMBINE_MATES={python=3.9.12}, COOLER_BALANCE={cooler=0.9.2}, COOLER_CLOAD={cooler=0.9.2}, COOLER_DUMP={cooler=0.9.2}, COOLER_MAKEBINS={cooler=0.9.2}, COOLER_ZOOMIFY={cooler=0.9.2}, COOLTOOLS_EIGSCIS={cooltools=0.5.1}, COOLTOOLS_INSULATION={cooltools=0.5.1}, CUSTOM_GETCHROMSIZES={getchromsizes=1.16.1}, FASTQC={fastqc=0.12.1}, GET_RESTRICTION_FRAGMENTS={python=3.9.12}, GET_VALID_INTERACTION={python=3.9.12}, HICPRO2PAIRS={pairix=0.3.7}, HIC_FIND_TADS={hicexplorer=3.7.2}, HIC_PLOT_DIST_VS_COUNTS={hicexplorer=3.7.2}, MERGE_BOWTIE2={samtools=1.15.1}, MERGE_STATS={python=3.9.12}, MERGE_VALID_INTERACTION={sort=8.3}, SPLIT_COOLER_DUMP={cooler=null}, TRIM_READS={gzip=1.1}, Workflow={nf-core/hic=v2.2.0dev}}" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T18:39:34.552071308" + }, + "software_versions": { + "content": [ + "{BOWTIE2_ALIGN={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE2_ALIGN_TRIMMED={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE2_BUILD={bowtie2=2.5.2}, COMBINE_MATES={python=3.9.12}, COOLER_BALANCE={cooler=0.9.2}, COOLER_CLOAD={cooler=0.9.2}, COOLER_DUMP={cooler=0.9.2}, COOLER_MAKEBINS={cooler=0.9.2}, COOLER_ZOOMIFY={cooler=0.9.2}, COOLTOOLS_EIGSCIS={cooltools=0.5.1}, COOLTOOLS_INSULATION={cooltools=0.5.1}, CUSTOM_GETCHROMSIZES={getchromsizes=1.16.1}, FASTQC={fastqc=0.12.1}, GET_RESTRICTION_FRAGMENTS={python=3.9.12}, GET_VALID_INTERACTION={python=3.9.12}, HICPRO2PAIRS={pairix=0.3.7}, HIC_FIND_TADS={hicexplorer=3.7.2}, HIC_PLOT_DIST_VS_COUNTS={hicexplorer=3.7.2}, MERGE_BOWTIE2={samtools=1.15.1}, MERGE_STATS={python=3.9.12}, MERGE_VALID_INTERACTION={sort=8.3}, SPLIT_COOLER_DUMP={cooler=null}, TRIM_READS={gzip=1.1}, Workflow={nf-core/hic=v2.2.0dev}}" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T18:36:56.07112172" + }, + "Run with profile test": { + "content": [ + 36, + [ + "SRR4292758_distcount.png:md5,5314c9ad16a1e857682fde4e584103bc", + "SRR4292758_distcount.txt:md5,bac196d56bcb489c2ef6fe2ea26e07c9" + ], + "SRR4292758.mpairstat:md5,281aa85283e57308187c33a5bad7dc82", + "SRR4292758_allValidPairs.mergestat:md5,b0a81a4d69ea8e44495a714ac87701e8", + [ + "SRR4292758.allValidPairs:md5,55e00a772766779f5d5c2bf2b6fcef6d", + [ + "SRR4292758_contacts.pairs.gz:md5,201d59212cf443fe5da09dbddeef8677", + "SRR4292758_contacts.pairs.gz.px2:md5,6275247d930753121aa026870864aa25" + ] + ], + "SRR4292758.1000_balanced_hicfindtads_boundaries.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "SRR4292758.1000_balanced_hicfindtads_boundaries.gff:md5,d41d8cd98f00b204e9800998ecf8427e", + "SRR4292758.1000_balanced_hicfindtads_domains.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "SRR4292758.1000_balanced_hicfindtads_score.bedgraph:md5,da38c91d9b23820e290d2c987f27ab70", + "SRR4292758.1000_balanced_hicfindtads_tad_score.bm:md5,81b33e5cd3ff14efac94c2065ac68fe1", + [ + "SRR4292758.1000_balanced_insulation.tsv:md5,fd0a37e94600e9da4c8de0c58f07e3ed" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T18:36:56.085219308" + }, + "Should split fastqs": { + "content": [ + 69, + [ + "SRR4292758_distcount.png:md5,5314c9ad16a1e857682fde4e584103bc", + "SRR4292758_distcount.txt:md5,bac196d56bcb489c2ef6fe2ea26e07c9" + ], + "SRR4292758_allValidPairs.mergestat:md5,b0a81a4d69ea8e44495a714ac87701e8", + "SRR4292758.1000_balanced_hicfindtads_boundaries.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "SRR4292758.1000_balanced_hicfindtads_boundaries.gff:md5,d41d8cd98f00b204e9800998ecf8427e", + "SRR4292758.1000_balanced_hicfindtads_domains.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "SRR4292758.1000_balanced_hicfindtads_score.bedgraph:md5,da38c91d9b23820e290d2c987f27ab70", + "SRR4292758.1000_balanced_hicfindtads_tad_score.bm:md5,81b33e5cd3ff14efac94c2065ac68fe1", + [ + "SRR4292758.1000_balanced_insulation.tsv:md5,fd0a37e94600e9da4c8de0c58f07e3ed" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T18:39:34.716731734" + } +} \ No newline at end of file diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 0000000..d75ad44 --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,27 @@ +params { + // Base directory for nf-core/modules test data + modules_testdata_base_path = 's3://ngi-igenomes/testdata/nf-core/modules/' + + // Base directory for nf-core/fetchngs test data + pipelines_testdata_base_path = 's3://ngi-igenomes/testdata/nf-core/pipelines/fetchngs/1.15.0/' +} + +// Impose sensible resource limits for testing +process { + withName: '.*' { + cpus = 2 + memory = 3.GB + time = 2.h + } +} + +// Impose same minimum Nextflow version as the pipeline for testing +manifest { + nextflowVersion = '!>=23.04.0' +} + +// Disable all Nextflow reporting options +timeline { enabled = false } +report { enabled = false } +trace { enabled = false } +dag { enabled = false } diff --git a/workflows/hic.nf b/workflows/hic.nf index 6c8c252..7ec3e83 100644 --- a/workflows/hic.nf +++ b/workflows/hic.nf @@ -1,42 +1,25 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) - -// Validate input parameters -WorkflowHic.initialise(params, log) - -// Check input path parameters to see if they exist -def checkPathParamList = [ params.input ] -checkPathParamList = [ - params.input, params.multiqc_config, - params.fasta, params.bwt2_index -] - -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - -// Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } - -//***************************************** -// Digestion parameters -if (params.digestion){ - restriction_site = params.digestion ? params.digest[ params.digestion ].restriction_site ?: false : false - ch_restriction_site = Channel.value(restriction_site) - ligation_site = params.digestion ? params.digest[ params.digestion ].ligation_site ?: false : false - ch_ligation_site = Channel.value(ligation_site) -}else if (params.restriction_site && params.ligation_site){ - ch_restriction_site = Channel.value(params.restriction_site) - ch_ligation_site = Channel.value(params.ligation_site) -}else if (params.dnase){ - ch_restriction_site = Channel.empty() - ch_ligation_site = Channel.empty() -}else{ - exit 1, "Ligation motif not found. Please either use the `--digestion` parameters or specify the `--restriction_site` and `--ligation_site`. For DNase Hi-C, please use '--dnase' option" -} +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_hic_pipeline' + +// MODULE: Local to the pipeline +include { HIC_PLOT_DIST_VS_COUNTS } from '../modules/local/hicexplorer/hicPlotDistVsCounts' + +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +include { HICPRO } from '../subworkflows/local/hicpro' +include { PAIRTOOLS } from '../subworkflows/local/pairtools' +include { COOLER } from '../subworkflows/local/cooler' +include { COMPARTMENTS } from '../subworkflows/local/compartments' +include { TADS } from '../subworkflows/local/tads' //**************************************** // Combine all maps resolution for downstream analysis @@ -44,267 +27,221 @@ if (params.digestion){ ch_map_res = Channel.from( params.bin_size ).splitCsv().flatten().toInteger() if (params.res_zoomify){ - ch_zoom_res = Channel.from( params.res_zoomify ).splitCsv().flatten().toInteger() - ch_map_res = ch_map_res.concat(ch_zoom_res) + ch_zoom_res = Channel.from( params.res_zoomify ).splitCsv().flatten().toInteger() + ch_map_res = ch_map_res.concat(ch_zoom_res) } if (params.res_tads && !params.skip_tads){ - ch_tads_res = Channel.from( "${params.res_tads}" ).splitCsv().flatten().toInteger() - ch_map_res = ch_map_res.concat(ch_tads_res) + ch_tads_res = Channel.from( "${params.res_tads}" ).splitCsv().flatten().toInteger() + ch_map_res = ch_map_res.concat(ch_tads_res) }else{ - ch_tads_res=Channel.empty() - if (!params.skip_tads){ - log.warn "[nf-core/hic] Hi-C resolution for TADs calling not specified. See --res_tads" - } + ch_tads_res=Channel.empty() + if (!params.skip_tads){ + log.warn "[nf-core/hic] Hi-C resolution for TADs calling not specified. See --res_tads" + } } if (params.res_dist_decay && !params.skip_dist_decay){ - ch_ddecay_res = Channel.from( "${params.res_dist_decay}" ).splitCsv().flatten().toInteger() - ch_map_res = ch_map_res.concat(ch_ddecay_res) + ch_ddecay_res = Channel.from( "${params.res_dist_decay}" ).splitCsv().flatten().toInteger() + ch_map_res = ch_map_res.concat(ch_ddecay_res) }else{ - ch_ddecay_res = Channel.empty() - if (!params.skip_dist_decay){ - log.warn "[nf-core/hic] Hi-C resolution for distance decay not specified. See --res_dist_decay" - } + ch_ddecay_res = Channel.empty() + if (!params.skip_dist_decay){ + log.warn "[nf-core/hic] Hi-C resolution for distance decay not specified. See --res_dist_decay" + } } if (params.res_compartments && !params.skip_compartments){ - ch_comp_res = Channel.from( "${params.res_compartments}" ).splitCsv().flatten().toInteger() - ch_map_res = ch_map_res.concat(ch_comp_res) + ch_comp_res = Channel.from( "${params.res_compartments}" ).splitCsv().flatten().toInteger() + ch_map_res = ch_map_res.concat(ch_comp_res) }else{ - ch_comp_res = Channel.empty() - if (!params.skip_compartments){ - log.warn "[nf-core/hic] Hi-C resolution for compartment calling not specified. See --res_compartments" - } + ch_comp_res = Channel.empty() + if (!params.skip_compartments){ + log.warn "[nf-core/hic] Hi-C resolution for compartment calling not specified. See --res_compartments" + } } ch_map_res = ch_map_res.unique() -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// MODULE: Local to the pipeline -// -include { HIC_PLOT_DIST_VS_COUNTS } from '../modules/local/hicexplorer/hicPlotDistVsCounts' -include { MULTIQC } from '../modules/local/multiqc' - -// -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' -include { HICPRO } from '../subworkflows/local/hicpro' -include { PAIRTOOLS } from '../subworkflows/local/pairtools' -include { COOLER } from '../subworkflows/local/cooler' -include { COMPARTMENTS } from '../subworkflows/local/compartments' -include { TADS } from '../subworkflows/local/tads' +workflow HIC { -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ + take: + ch_samplesheet // channel: samplesheet read in from --input + ch_fasta + ch_index + ch_chromosome_size + ch_res_frag + ch_restriction_site + ch_ligation_site -// -// MODULE: Installed directly from nf-core/modules -// -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' + main: -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CHANNELS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() -def genomeName = params.genome ?: params.fasta.substring(params.fasta.lastIndexOf(File.separator)+1) -Channel.fromPath( params.fasta ) - .ifEmpty { exit 1, "Genome index: Fasta file not found: ${params.fasta}" } - .map{it->[[id:genomeName],it]} - .set { ch_fasta } + // + // MODULE: Run FastQC + // + FASTQC ( + ch_samplesheet + ) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) + ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + + // + // SUB-WORFLOW: HiC-Pro + // + if (params.processing == 'hicpro'){ + HICPRO ( + ch_samplesheet, + ch_fasta, + ch_index, + ch_res_frag, + ch_chromosome_size, + ch_ligation_site, + ch_map_res + ) + ch_versions = ch_versions.mix(HICPRO.out.versions) + ch_pairs = HICPRO.out.pairs + ch_process_mqc = HICPRO.out.mqc + }else if (params.processing == 'pairtools'){ + PAIRTOOLS( + ch_samplesheet, + ch_fasta, + ch_index, + ch_res_frag, + ch_chromosome_size + ) + ch_versions = ch_versions.mix(PAIRTOOLS.out.versions) + ch_pairs = PAIRTOOLS.out.pairs + ch_process_mqc = PAIRTOOLS.out.stats + } -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ + // + // SUB-WORKFLOW: COOLER + // + COOLER ( + ch_pairs, + ch_chromosome_size, + ch_map_res + ) + ch_versions = ch_versions.mix(COOLER.out.versions) + + // + // MODULE: HICEXPLORER/HIC_PLOT_DIST_VS_COUNTS + // + if (!params.skip_dist_decay){ + COOLER.out.cool + .combine(ch_ddecay_res) + .filter{ it[0].resolution == it[2] } + .map { it -> [it[0], it[1]]} + .set{ ch_distdecay } + + HIC_PLOT_DIST_VS_COUNTS( + ch_distdecay + ) + ch_versions = ch_versions.mix(HIC_PLOT_DIST_VS_COUNTS.out.versions) + } -// Info required for completion email and summary -def multiqc_report = [] + // + // SUB-WORKFLOW: COMPARTMENT CALLING + // + if (!params.skip_compartments){ + COOLER.out.cool + .combine(ch_comp_res) + .filter{ it[0].resolution == it[2] } + .map { it -> [it[0], it[1], it[2]]} + .set{ ch_cool_compartments } + + COMPARTMENTS ( + ch_cool_compartments, + ch_fasta, + ch_chromosome_size + ) + ch_versions = ch_versions.mix(COMPARTMENTS.out.versions) + } -workflow HIC { + // + // SUB-WORKFLOW : TADS CALLING + // + if (!params.skip_tads){ + COOLER.out.cool + .combine(ch_tads_res) + .filter{ it[0].resolution == it[2] } + .map { it -> [it[0], it[1]]} + .set{ ch_cool_tads } + + TADS( + ch_cool_tads + ) + ch_versions = ch_versions.mix(TADS.out.versions) + } - ch_versions = Channel.empty() + // + // Collate and save software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'nf_core_pipeline_software_mqc_versions.yml', + sort: true, + newLine: true + ).set { ch_collated_versions } + + // + // MODULE: MultiQC + // + ch_multiqc_config = Channel.fromPath( + "$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? + Channel.fromPath(params.multiqc_config, checkIfExists: true) : + Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? + Channel.fromPath(params.multiqc_logo, checkIfExists: true) : + Channel.empty() + + summary_params = paramsSummaryMap( + workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? + file(params.multiqc_methods_description, checkIfExists: true) : + file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value( + methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_files = ch_multiqc_files.mix( + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix( + ch_methods_description.collectFile( + name: 'methods_description_mqc.yaml', + sort: true + ) + ) - // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files - // - INPUT_CHECK ( - ch_input - ) + if (params.processing == 'hicpro'){ + ch_multiqc_files = ch_multiqc_files.mix(HICPRO.out.mqc) + } - // - // SUBWORKFLOW: Prepare genome annotation - // - PREPARE_GENOME( - ch_fasta, - ch_restriction_site - ) - ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) - - // - // MODULE: Run FastQC - // - FASTQC ( - INPUT_CHECK.out.reads - ) - ch_versions = ch_versions.mix(FASTQC.out.versions) - - // - // SUB-WORFLOW: HiC-Pro - // - if (params.processing == 'hicpro'){ - HICPRO ( - INPUT_CHECK.out.reads, - PREPARE_GENOME.out.index, - PREPARE_GENOME.out.res_frag, - PREPARE_GENOME.out.chromosome_size, - ch_ligation_site, - ch_map_res - ) - ch_versions = ch_versions.mix(HICPRO.out.versions) - ch_pairs = HICPRO.out.pairs - ch_process_mqc = HICPRO.out.mqc - }else if (params.processing == 'pairtools'){ - PAIRTOOLS( - INPUT_CHECK.out.reads, - PREPARE_GENOME.out.index, - PREPARE_GENOME.out.res_frag, - PREPARE_GENOME.out.chromosome_size - ) - ch_versions = ch_versions.mix(PAIRTOOLS.out.versions) - ch_pairs = PAIRTOOLS.out.pairs - ch_process_mqc = PAIRTOOLS.out.stats - } - - // - // SUB-WORKFLOW: COOLER - // - COOLER ( - ch_pairs, - PREPARE_GENOME.out.chromosome_size, - ch_map_res - ) - ch_versions = ch_versions.mix(COOLER.out.versions) - - // - // MODULE: HICEXPLORER/HIC_PLOT_DIST_VS_COUNTS - // - if (!params.skip_dist_decay){ - COOLER.out.cool - .combine(ch_ddecay_res) - .filter{ it[0].resolution == it[2] } - .map { it -> [it[0], it[1]]} - .set{ ch_distdecay } - - HIC_PLOT_DIST_VS_COUNTS( - ch_distdecay - ) - ch_versions = ch_versions.mix(HIC_PLOT_DIST_VS_COUNTS.out.versions) - } - - // - // SUB-WORKFLOW: COMPARTMENT CALLING - // - if (!params.skip_compartments){ - COOLER.out.cool - .combine(ch_comp_res) - .filter{ it[0].resolution == it[2] } - .map { it -> [it[0], it[1], it[2]]} - .set{ ch_cool_compartments } - - COMPARTMENTS ( - ch_cool_compartments, - ch_fasta, - PREPARE_GENOME.out.chromosome_size - ) - ch_versions = ch_versions.mix(COMPARTMENTS.out.versions) - } - - // - // SUB-WORKFLOW : TADS CALLING - // - if (!params.skip_tads){ - COOLER.out.cool - .combine(ch_tads_res) - .filter{ it[0].resolution == it[2] } - .map { it -> [it[0], it[1]]} - .set{ ch_cool_tads } - - TADS( - ch_cool_tads + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList(), + Channel.value([]), + Channel.value([]) ) - ch_versions = ch_versions.mix(TADS.out.versions) - } - - // - // SOFTWARE VERSION - // - CUSTOM_DUMPSOFTWAREVERSIONS( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) - - // - // MODULE: MultiQC - // - workflow_summary = WorkflowHic.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_config) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.map{it->it[1]}) - ch_multiqc_files = ch_multiqc_files.mix(ch_process_mqc) - - MULTIQC ( - ch_multiqc_config, - ch_multiqc_custom_config.collect().ifEmpty([]), - ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'), - FASTQC.out.zip.map{it->it[1]}, - ch_process_mqc.collect() - ) - multiqc_report = MULTIQC.out.report.toList() -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) - } + emit: + multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] } /*
    Process Name \\", + " \\ Software Version
    CUSTOM_DUMPSOFTWAREVERSIONSpython3.11.7
    yaml5.4.1
    TOOL1tool10.11.9
    TOOL2tool21.9
    WorkflowNextflow
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls