Skip to content

Commit

Permalink
Merge pull request #102 from nextstrain/case-counts-workflow
Browse files Browse the repository at this point in the history
Refactor case counts workflow
  • Loading branch information
joverlee521 authored Jun 7, 2024
2 parents 7c402ee + e9c5772 commit 56f7740
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 37 deletions.
64 changes: 27 additions & 37 deletions .github/workflows/update-ncov-case-counts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,46 +13,36 @@ on:
required: false

jobs:
case_counts:
set_s3_dst:
runs-on: ubuntu-latest
env:
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
SLACK_CHANNELS: ${{ github.event.inputs.slack_channel || 'nextstrain-counts-updates' }}
defaults:
run:
# Login shell is required to include changes by conda init bash.
shell: bash -l -eo pipefail {0}
steps:
- uses: actions/checkout@v4
- uses: conda-incubator/setup-miniconda@v3
with:
python-version: "3.9"
miniforge-variant: Mambaforge
channels: conda-forge,bioconda
- id: s3_dst
run: |
S3_DST=s3://nextstrain-data/files/workflows/forecasts-ncov
- name: setup
run: mamba install "csvtk>=0.23.0"
if [[ "$TRIAL_NAME" ]]; then
S3_DST+=/trial/"$TRIAL_NAME"
fi
- name: download case counts
run: |
./ingest/bin/fetch-ncov-global-case-counts > global_case_counts.tsv
echo "s3_dst=$S3_DST" >> "$GITHUB_OUTPUT"
env:
TRIAL_NAME: ${{ inputs.trial_name }}
outputs:
s3_dst: ${{ steps.s3_dst.outputs.s3_dst }}

- name: upload to S3
case_counts:
needs: [set_s3_dst]
permissions:
id-token: write
uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master
secrets: inherit
with:
runtime: docker
run: |
S3_DST=s3://nextstrain-data/files/workflows/forecasts-ncov/cases
CLOUDFRONT_DOMAIN="data.nextstrain.org"
if [[ "$TRIAL_NAME" ]]; then
S3_DST+=/trial/"$TRIAL_NAME"
fi
./ingest/vendored/upload-to-s3 global_case_counts.tsv "$S3_DST"/global.tsv.gz $CLOUDFRONT_DOMAIN
env:
AWS_DEFAULT_REGION: ${{ vars.AWS_DEFAULT_REGION }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
TRIAL_NAME: ${{ github.event.inputs.trial_name }}

- name: notify_pipeline_failed
if: ${{ failure() }}
run: ./ingest/vendored/notify-on-job-fail "Case counts ingest" "nextstrain/forecasts-ncov"
nextstrain build \
ingest \
upload_all_case_counts \
--config s3_dst="$S3_DST"
env: |
SLACK_CHANNELS: ${{ inputs.slack_channel || vars.SLACK_CHANNELS }}
S3_DST: ${{ needs.set_s3_dst.outputs.s3_dst }}
6 changes: 6 additions & 0 deletions ingest/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,10 @@ rule upload_all_sequence_counts:
)


rule upload_all_case_counts:
input:
"results/upload_global_case_counts.done"


include: "rules/sequence_counts.smk"
include: "rules/case_counts.smk"
31 changes: 31 additions & 0 deletions ingest/rules/case_counts.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""
This part of the workflow summarizes SARS-CoV-2 case counts from public
exteranl data sources (e.g. Our World in Data) and uploads them to AWS S3 for
downstream use by the modeling workflow.
"""


rule fetch_global_case_counts:
output:
global_case_counts = "data/global_case_counts.tsv",
shell:
"""
./bin/fetch-ncov-global-case-counts > {output.global_case_counts}
"""


rule upload_global_case_counts:
input:
global_case_counts = "data/global_case_counts.tsv",
output:
upload_flag = "results/upload_global_case_counts.done",
params:
s3_dst = config["s3_dst"],
cloudfront_domain = config["cloudfront_domain"],
shell:
"""
./vendored/upload-to-s3 \
{input.global_case_counts} \
{params.s3_dst}/cases/global.tsv.gz \
{params.cloudfront_domain:q} 2>&1 | tee {output.upload_flag}
"""

0 comments on commit 56f7740

Please sign in to comment.