Stream 957/adhoc gha #1
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: dbt_run_scores_backfill | ||
run-name: dbt_run_scores_backfill | ||
on: | ||
workflow_dispatch: | ||
inputs: | ||
environment: | ||
type: choice | ||
description: DBT Run Environment | ||
required: true | ||
options: | ||
- dev | ||
- prod | ||
default: dev | ||
warehouse: | ||
type: choice | ||
description: Snowflake warehouse | ||
required: true | ||
options: | ||
- DBT | ||
- DATA_SCIENCE | ||
default: DATA_SCIENCE | ||
chain: | ||
type: string | ||
description: 'Chain to run the score backfill on' | ||
= | ||
required: true | ||
start_date: | ||
type: string | ||
description: 'Start date (YYYY-MM-DD)' | ||
required: true | ||
end_date: | ||
type: string | ||
description: 'End date (YYYY-MM-DD)' | ||
required: true | ||
days_per_range: | ||
type: number | ||
description: 'Number of days per range' | ||
required: true | ||
default: 5 | ||
enable_concurrency: | ||
type: boolean | ||
description: 'Enable intra range concurrent dbt runs' | ||
required: true | ||
default: false | ||
env: | ||
USE_VARS: "${{ vars.USE_VARS }}" | ||
DBT_PROFILES_DIR: "${{ vars.DBT_PROFILES_DIR }}" | ||
DBT_VERSION: "${{ vars.DBT_VERSION }}" | ||
ACCOUNT: "${{ vars.ACCOUNT }}" | ||
ROLE: "${{ vars.ROLE }}" | ||
USER: "${{ vars.USER }}" | ||
PASSWORD: "${{ secrets.PASSWORD }}" | ||
REGION: "${{ vars.REGION }}" | ||
DATABASE: "${{ vars.DATABASE }}" | ||
WAREHOUSE: "${{ inputs.warehouse }}" | ||
SCHEMA: "${{ vars.SCHEMA }}" | ||
concurrency: | ||
group: ${{ github.workflow }} | ||
jobs: | ||
generate_date_ranges: | ||
runs-on: ubuntu-latest | ||
outputs: | ||
date_ranges: ${{ steps.set-matrix.outputs.date_ranges }} | ||
steps: | ||
- name: Generate date ranges | ||
id: set-matrix | ||
run: | | ||
start_date="${{ github.event.inputs.start_date }}" | ||
end_date="${{ github.event.inputs.end_date }}" | ||
days_per_range=${{ github.event.inputs.days_per_range }} | ||
ranges=() | ||
current_start=$start_date | ||
while [[ $(date -d "$current_start" +%s) -le $(date -d "$end_date" +%s) ]]; do | ||
range_end=$(date -I -d "$current_start + $((days_per_range - 1)) days") | ||
if [[ $(date -d "$range_end" +%s) -gt $(date -d "$end_date" +%s) ]]; then | ||
range_end=$end_date | ||
fi | ||
ranges+=("$current_start:$range_end") | ||
current_start=$(date -I -d "$range_end + 1 day") | ||
done | ||
# Convert the array to a JSON string manually | ||
json_ranges="[" | ||
for range in "${ranges[@]}"; do | ||
json_ranges+="\"$range\"," | ||
done | ||
json_ranges="${json_ranges%,}]" | ||
# Escape the JSON string for GitHub Actions | ||
escaped_json_ranges="${json_ranges//'%'/'%25'}" | ||
escaped_json_ranges="${escaped_json_ranges//$'\n'/'%0A'}" | ||
escaped_json_ranges="${escaped_json_ranges//$'\r'/'%0D'}" | ||
# Output the escaped JSON string | ||
echo "date_ranges=$escaped_json_ranges" >> $GITHUB_OUTPUT | ||
shell: bash | ||
run_dbt_jobs: | ||
needs: generate_date_ranges | ||
runs-on: ubuntu-latest | ||
environment: | ||
name: workflow_${{ inputs.environment }} | ||
strategy: | ||
matrix: | ||
date_range: ${{fromJson(needs.generate_date_ranges.outputs.date_ranges)}} | ||
steps: | ||
- uses: actions/checkout@v3 | ||
- uses: actions/setup-python@v1 | ||
with: | ||
python-version: "3.7.x" | ||
- name: install dependencies | ||
run: | | ||
pip3 install dbt-snowflake==${{ vars.DBT_VERSION }} cli_passthrough requests click | ||
dbt deps | ||
- name: Run DBT Jobs | ||
run: | | ||
# Split the date range into start and end dates | ||
IFS=':' read -ra DATE_RANGE <<< "${{ matrix.date_range }}" | ||
start_date=${DATE_RANGE[0]} | ||
end_date=${DATE_RANGE[1]} | ||
# Loop over each date in the range | ||
while [[ $(date -d "$start_date" +%s) -le $(date -d "$end_date" +%s) ]] | ||
do | ||
echo "Running dbt job for $start_date" | ||
if [[ "${{ inputs.enable_concurrency }}" == "true" ]]; then | ||
dbt run -s onchain_scores__${{ inputs.chain }},version:latest --vars "{\"current_date_var\": \"$start_date\"}" & | ||
else | ||
dbt run -s onchain_scores__${{ inputs.chain }},version:latest --vars "{\"current_date_var\": \"$start_date\"}" | ||
fi | ||
start_date=$(date -I -d "$start_date + 1 day") | ||
done | ||
# If concurrency is enabled, wait for all background jobs to finish | ||
if [[ "${{ inputs.enable_concurrency }}" == "true" ]]; then | ||
wait | ||
fi |