Skip to content

Stream 957/adhoc gha #1

Stream 957/adhoc gha

Stream 957/adhoc gha #1

name: dbt_run_scores_backfill
run-name: dbt_run_scores_backfill
on:
workflow_dispatch:
inputs:
environment:
type: choice
description: DBT Run Environment
required: true
options:
- dev
- prod
default: dev
warehouse:
type: choice
description: Snowflake warehouse
required: true
options:
- DBT
- DATA_SCIENCE
default: DATA_SCIENCE
chain:
type: string
description: 'Chain to run the score backfill on'
=

Check failure on line 26 in .github/workflows/dbt_run_scores_backfill.yml

View workflow run for this annotation

GitHub Actions / .github/workflows/dbt_run_scores_backfill.yml

Invalid workflow file

You have an error in your yaml syntax on line 26
required: true
start_date:
type: string
description: 'Start date (YYYY-MM-DD)'
required: true
end_date:
type: string
description: 'End date (YYYY-MM-DD)'
required: true
days_per_range:
type: number
description: 'Number of days per range'
required: true
default: 5
enable_concurrency:
type: boolean
description: 'Enable intra range concurrent dbt runs'
required: true
default: false
env:
USE_VARS: "${{ vars.USE_VARS }}"
DBT_PROFILES_DIR: "${{ vars.DBT_PROFILES_DIR }}"
DBT_VERSION: "${{ vars.DBT_VERSION }}"
ACCOUNT: "${{ vars.ACCOUNT }}"
ROLE: "${{ vars.ROLE }}"
USER: "${{ vars.USER }}"
PASSWORD: "${{ secrets.PASSWORD }}"
REGION: "${{ vars.REGION }}"
DATABASE: "${{ vars.DATABASE }}"
WAREHOUSE: "${{ inputs.warehouse }}"
SCHEMA: "${{ vars.SCHEMA }}"
concurrency:
group: ${{ github.workflow }}
jobs:
generate_date_ranges:
runs-on: ubuntu-latest
outputs:
date_ranges: ${{ steps.set-matrix.outputs.date_ranges }}
steps:
- name: Generate date ranges
id: set-matrix
run: |
start_date="${{ github.event.inputs.start_date }}"
end_date="${{ github.event.inputs.end_date }}"
days_per_range=${{ github.event.inputs.days_per_range }}
ranges=()
current_start=$start_date
while [[ $(date -d "$current_start" +%s) -le $(date -d "$end_date" +%s) ]]; do
range_end=$(date -I -d "$current_start + $((days_per_range - 1)) days")
if [[ $(date -d "$range_end" +%s) -gt $(date -d "$end_date" +%s) ]]; then
range_end=$end_date
fi
ranges+=("$current_start:$range_end")
current_start=$(date -I -d "$range_end + 1 day")
done
# Convert the array to a JSON string manually
json_ranges="["
for range in "${ranges[@]}"; do
json_ranges+="\"$range\","
done
json_ranges="${json_ranges%,}]"
# Escape the JSON string for GitHub Actions
escaped_json_ranges="${json_ranges//'%'/'%25'}"
escaped_json_ranges="${escaped_json_ranges//$'\n'/'%0A'}"
escaped_json_ranges="${escaped_json_ranges//$'\r'/'%0D'}"
# Output the escaped JSON string
echo "date_ranges=$escaped_json_ranges" >> $GITHUB_OUTPUT
shell: bash
run_dbt_jobs:
needs: generate_date_ranges
runs-on: ubuntu-latest
environment:
name: workflow_${{ inputs.environment }}
strategy:
matrix:
date_range: ${{fromJson(needs.generate_date_ranges.outputs.date_ranges)}}
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v1
with:
python-version: "3.7.x"
- name: install dependencies
run: |
pip3 install dbt-snowflake==${{ vars.DBT_VERSION }} cli_passthrough requests click
dbt deps
- name: Run DBT Jobs
run: |
# Split the date range into start and end dates
IFS=':' read -ra DATE_RANGE <<< "${{ matrix.date_range }}"
start_date=${DATE_RANGE[0]}
end_date=${DATE_RANGE[1]}
# Loop over each date in the range
while [[ $(date -d "$start_date" +%s) -le $(date -d "$end_date" +%s) ]]
do
echo "Running dbt job for $start_date"
if [[ "${{ inputs.enable_concurrency }}" == "true" ]]; then
dbt run -s onchain_scores__${{ inputs.chain }},version:latest --vars "{\"current_date_var\": \"$start_date\"}" &
else
dbt run -s onchain_scores__${{ inputs.chain }},version:latest --vars "{\"current_date_var\": \"$start_date\"}"
fi
start_date=$(date -I -d "$start_date + 1 day")
done
# If concurrency is enabled, wait for all background jobs to finish
if [[ "${{ inputs.enable_concurrency }}" == "true" ]]; then
wait
fi