Feathr Scala Tests And Azure E2E Integration #355
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Feathr Scala Tests And Azure E2E Integration | |
on: | |
push: | |
branches: [main] | |
paths-ignore: | |
- "docs/**" | |
- "ui/**" | |
- "**/README.md" | |
pull_request: | |
branches: [main] | |
paths-ignore: | |
- "docs/**" | |
- "ui/**" | |
- "**/README.md" | |
pull_request_target: | |
branches: [main] | |
types: [labeled, edited, opened, synchronize] | |
paths-ignore: | |
- "docs/**" | |
- "ui/**" | |
- "**/README.md" | |
schedule: | |
# Runs daily at 1 PM UTC (9 PM CST), will send notification to TEAMS_WEBHOOK | |
- cron: '00 13 * * *' | |
jobs: | |
sbt_test: | |
runs-on: ubuntu-latest | |
if: github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test')) | |
steps: | |
- uses: actions/checkout@v2 | |
with: | |
ref: ${{ github.event.pull_request.head.sha }} | |
- name: Set up JDK 8 | |
uses: actions/setup-java@v2 | |
with: | |
java-version: "8" | |
distribution: "temurin" | |
- name: Run tests | |
run: sbt clean && sbt test | |
python_lint: | |
runs-on: ubuntu-latest | |
if: github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test')) | |
steps: | |
- name: Set up Python 3.8 | |
uses: actions/setup-python@v2 | |
with: | |
python-version: 3.8 | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
python -m pip install flake8 | |
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi | |
- name: Lint with flake8 | |
run: | | |
# stop the build if there are Python syntax errors or undefined names | |
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics | |
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide | |
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics | |
databricks_test: | |
runs-on: ubuntu-latest | |
if: github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test')) | |
steps: | |
- uses: actions/checkout@v2 | |
with: | |
ref: ${{ github.event.pull_request.head.sha }} | |
- name: Set up JDK 8 | |
uses: actions/setup-java@v2 | |
with: | |
java-version: "8" | |
distribution: "temurin" | |
- name: Build JAR | |
run: | | |
sbt assembly | |
# remote folder for CI upload | |
echo "CI_SPARK_REMOTE_JAR_FOLDER=feathr_jar_github_action_$(date +"%H_%M_%S")" >> $GITHUB_ENV | |
# get local jar name without paths so version change won't affect it | |
echo "FEATHR_LOCAL_JAR_NAME=$(ls target/scala-2.12/*.jar| xargs -n 1 basename)" >> $GITHUB_ENV | |
# get local jar name without path | |
echo "FEATHR_LOCAL_JAR_FULL_NAME_PATH=$(ls target/scala-2.12/*.jar)" >> $GITHUB_ENV | |
- name: Set up Python 3.8 | |
uses: actions/setup-python@v2 | |
with: | |
python-version: 3.8 | |
- name: Install Feathr Package | |
run: | | |
python -m pip install --upgrade pip | |
python -m pip install -e ./feathr_project/[all] | |
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi | |
- name: Set env variable and upload jars | |
env: | |
# set this for databricks CLI | |
DATABRICKS_HOST: ${{secrets.DATABRICKS_HOST}} | |
DATABRICKS_TOKEN: ${{secrets.DATABRICKS_WORKSPACE_TOKEN_VALUE}} | |
run: | | |
# overwrite corresponding environment variables to utilize feathr to upload the files | |
# assuming there will be only one jar in the target folder | |
databricks fs cp ${{ env.FEATHR_LOCAL_JAR_FULL_NAME_PATH}} dbfs:/${{ env.CI_SPARK_REMOTE_JAR_FOLDER}}/${{ env.FEATHR_LOCAL_JAR_NAME}} --overwrite | |
- name: Run Feathr with Databricks | |
env: | |
PROJECT_CONFIG__PROJECT_NAME: "feathr_github_ci_databricks" | |
SPARK_CONFIG__SPARK_CLUSTER: databricks | |
SPARK_CONFIG__DATABRICKS__WORKSPACE_INSTANCE_URL: ${{secrets.DATABRICKS_HOST}} | |
DATABRICKS_WORKSPACE_TOKEN_VALUE: ${{secrets.DATABRICKS_WORKSPACE_TOKEN_VALUE}} | |
SPARK_CONFIG__DATABRICKS__CONFIG_TEMPLATE: '{"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"9.1.x-scala2.12","num_workers":1,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"},"instance_pool_id":"${{secrets.DATABRICKS_INSTANCE_POOL_ID}}"},"libraries":[{"jar":"FEATHR_FILL_IN"}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}}' | |
REDIS_PASSWORD: ${{secrets.REDIS_PASSWORD}} | |
AZURE_CLIENT_ID: ${{secrets.AZURE_CLIENT_ID}} | |
AZURE_TENANT_ID: ${{secrets.AZURE_TENANT_ID}} | |
AZURE_CLIENT_SECRET: ${{secrets.AZURE_CLIENT_SECRET}} | |
S3_ACCESS_KEY: ${{secrets.S3_ACCESS_KEY}} | |
S3_SECRET_KEY: ${{secrets.S3_SECRET_KEY}} | |
ADLS_ACCOUNT: ${{secrets.ADLS_ACCOUNT}} | |
ADLS_KEY: ${{secrets.ADLS_KEY}} | |
BLOB_ACCOUNT: ${{secrets.BLOB_ACCOUNT}} | |
BLOB_KEY: ${{secrets.BLOB_KEY}} | |
JDBC_SF_PASSWORD: ${{secrets.JDBC_SF_PASSWORD}} | |
KAFKA_SASL_JAAS_CONFIG: ${{secrets.KAFKA_SASL_JAAS_CONFIG}} | |
MONITORING_DATABASE_SQL_PASSWORD: ${{secrets.MONITORING_DATABASE_SQL_PASSWORD}} | |
SPARK_CONFIG__DATABRICKS__FEATHR_RUNTIME_LOCATION: dbfs:/${{ env.CI_SPARK_REMOTE_JAR_FOLDER}}/${{ env.FEATHR_LOCAL_JAR_NAME}} | |
COSMOS1_KEY: ${{secrets.COSMOS1_KEY}} | |
SQL1_USER: ${{secrets.SQL1_USER}} | |
SQL1_PASSWORD: ${{secrets.SQL1_PASSWORD}} | |
AWS_ACCESS_KEY_ID: ${{secrets.AWS_ACCESS_KEY_ID}} | |
AWS_SECRET_ACCESS_KEY: ${{secrets.AWS_SECRET_ACCESS_KEY}} | |
run: | | |
# run only test with databricks. run in 4 parallel jobs | |
pytest -n 6 feathr_project/test/ | |
azure_synapse_test: | |
# might be a bit duplication to setup both the azure_synapse test and databricks test, but for now we will keep those to accelerate the test speed | |
runs-on: ubuntu-latest | |
if: github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test')) | |
steps: | |
- uses: actions/checkout@v2 | |
with: | |
ref: ${{ github.event.pull_request.head.sha }} | |
- name: Set up JDK 8 | |
uses: actions/setup-java@v2 | |
with: | |
java-version: "8" | |
distribution: "temurin" | |
- name: Build JAR | |
run: | | |
sbt assembly | |
# remote folder for CI upload | |
echo "CI_SPARK_REMOTE_JAR_FOLDER=feathr_jar_github_action_$(date +"%H_%M_%S")" >> $GITHUB_ENV | |
# get local jar name without paths so version change won't affect it | |
echo "FEATHR_LOCAL_JAR_NAME=$(ls target/scala-2.12/*.jar| xargs -n 1 basename)" >> $GITHUB_ENV | |
# get local jar name without path | |
echo "FEATHR_LOCAL_JAR_FULL_NAME_PATH=$(ls target/scala-2.12/*.jar)" >> $GITHUB_ENV | |
- name: Set up Python 3.8 | |
uses: actions/setup-python@v2 | |
with: | |
python-version: 3.8 | |
- name: Azure Blob Storage Upload (Overwrite) | |
uses: fixpoint/azblob-upload-artifact@v4 | |
with: | |
connection-string: ${{secrets.SPARK_JAR_BLOB_CONNECTION_STRING}} | |
name: ${{ env.CI_SPARK_REMOTE_JAR_FOLDER}} | |
path: ${{ env.FEATHR_LOCAL_JAR_FULL_NAME_PATH}} | |
container: ${{secrets.SPARK_JAR_BLOB_CONTAINER}} | |
cleanup: "true" | |
- name: Install Feathr Package | |
run: | | |
python -m pip install --upgrade pip | |
python -m pip install -e ./feathr_project/[all] | |
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi | |
- name: Run Feathr with Azure Synapse | |
env: | |
PROJECT_CONFIG__PROJECT_NAME: "feathr_github_ci_synapse" | |
SPARK_CONFIG__SPARK_CLUSTER: azure_synapse | |
REDIS_PASSWORD: ${{secrets.REDIS_PASSWORD}} | |
AZURE_CLIENT_ID: ${{secrets.AZURE_CLIENT_ID}} | |
AZURE_TENANT_ID: ${{secrets.AZURE_TENANT_ID}} | |
AZURE_CLIENT_SECRET: ${{secrets.AZURE_CLIENT_SECRET}} | |
S3_ACCESS_KEY: ${{secrets.S3_ACCESS_KEY}} | |
S3_SECRET_KEY: ${{secrets.S3_SECRET_KEY}} | |
ADLS_ACCOUNT: ${{secrets.ADLS_ACCOUNT}} | |
ADLS_KEY: ${{secrets.ADLS_KEY}} | |
BLOB_ACCOUNT: ${{secrets.BLOB_ACCOUNT}} | |
BLOB_KEY: ${{secrets.BLOB_KEY}} | |
JDBC_TABLE: ${{secrets.JDBC_TABLE}} | |
JDBC_USER: ${{secrets.JDBC_USER}} | |
JDBC_PASSWORD: ${{secrets.JDBC_PASSWORD}} | |
JDBC_DRIVER: ${{secrets.JDBC_DRIVER}} | |
JDBC_SF_PASSWORD: ${{secrets.JDBC_SF_PASSWORD}} | |
KAFKA_SASL_JAAS_CONFIG: ${{secrets.KAFKA_SASL_JAAS_CONFIG}} | |
MONITORING_DATABASE_SQL_PASSWORD: ${{secrets.MONITORING_DATABASE_SQL_PASSWORD}} | |
SPARK_CONFIG__AZURE_SYNAPSE__FEATHR_RUNTIME_LOCATION: "abfss://${{secrets.SPARK_JAR_BLOB_CONTAINER}}@feathrazuretest3storage.dfs.core.windows.net/${{ env.CI_SPARK_REMOTE_JAR_FOLDER}}/${{ env.FEATHR_LOCAL_JAR_NAME}}" | |
COSMOS1_KEY: ${{secrets.COSMOS1_KEY}} | |
SQL1_USER: ${{secrets.SQL1_USER}} | |
SQL1_PASSWORD: ${{secrets.SQL1_PASSWORD}} | |
AWS_ACCESS_KEY_ID: ${{secrets.AWS_ACCESS_KEY_ID}} | |
AWS_SECRET_ACCESS_KEY: ${{secrets.AWS_SECRET_ACCESS_KEY}} | |
run: | | |
# skip databricks related test as we just ran the test; also seperate databricks and synapse test to make sure there's no write conflict | |
# run in 4 parallel jobs to make the time shorter | |
pytest -n 6 feathr_project/test/ | |
local_spark_test: | |
runs-on: ubuntu-latest | |
if: github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test')) | |
steps: | |
- uses: actions/checkout@v2 | |
with: | |
ref: ${{ github.event.pull_request.head.sha }} | |
- name: Set up JDK 8 | |
uses: actions/setup-java@v2 | |
with: | |
java-version: "8" | |
distribution: "temurin" | |
- name: Build JAR | |
run: | | |
sbt assembly | |
# remote folder for CI upload | |
echo "CI_SPARK_REMOTE_JAR_FOLDER=feathr_jar_github_action_$(date +"%H_%M_%S")" >> $GITHUB_ENV | |
# get local jar name without paths so version change won't affect it | |
echo "FEATHR_LOCAL_JAR_NAME=$(ls target/scala-2.12/*.jar| xargs -n 1 basename)" >> $GITHUB_ENV | |
# get local jar name without path | |
echo "FEATHR_LOCAL_JAR_FULL_NAME_PATH=$(ls target/scala-2.12/*.jar)" >> $GITHUB_ENV | |
- name: Set up Python 3.8 | |
uses: actions/setup-python@v2 | |
with: | |
python-version: 3.8 | |
- name: Install Feathr Package | |
run: | | |
python -m pip install --upgrade pip | |
python -m pip install -e ./feathr_project/[all] | |
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi | |
- name: Run Feathr with Local Spark | |
env: | |
PROJECT_CONFIG__PROJECT_NAME: "feathr_github_ci_local" | |
SPARK_CONFIG__SPARK_CLUSTER: local | |
REDIS_PASSWORD: ${{secrets.REDIS_PASSWORD}} | |
AZURE_CLIENT_ID: ${{secrets.AZURE_CLIENT_ID}} | |
AZURE_TENANT_ID: ${{secrets.AZURE_TENANT_ID}} | |
AZURE_CLIENT_SECRET: ${{secrets.AZURE_CLIENT_SECRET}} | |
S3_ACCESS_KEY: ${{secrets.S3_ACCESS_KEY}} | |
S3_SECRET_KEY: ${{secrets.S3_SECRET_KEY}} | |
ADLS_ACCOUNT: ${{secrets.ADLS_ACCOUNT}} | |
ADLS_KEY: ${{secrets.ADLS_KEY}} | |
BLOB_ACCOUNT: ${{secrets.BLOB_ACCOUNT}} | |
BLOB_KEY: ${{secrets.BLOB_KEY}} | |
JDBC_TABLE: ${{secrets.JDBC_TABLE}} | |
JDBC_USER: ${{secrets.JDBC_USER}} | |
JDBC_PASSWORD: ${{secrets.JDBC_PASSWORD}} | |
JDBC_DRIVER: ${{secrets.JDBC_DRIVER}} | |
JDBC_SF_PASSWORD: ${{secrets.JDBC_SF_PASSWORD}} | |
KAFKA_SASL_JAAS_CONFIG: ${{secrets.KAFKA_SASL_JAAS_CONFIG}} | |
MONITORING_DATABASE_SQL_PASSWORD: ${{secrets.MONITORING_DATABASE_SQL_PASSWORD}} | |
COSMOS1_KEY: ${{secrets.COSMOS1_KEY}} | |
SQL1_USER: ${{secrets.SQL1_USER}} | |
SQL1_PASSWORD: ${{secrets.SQL1_PASSWORD}} | |
run: | | |
# skip cloud related tests | |
pytest feathr_project/test/test_local_spark_e2e.py | |
failure_notification: | |
# If any failure, warning message will be sent | |
needs: [sbt_test, python_lint, databricks_test, azure_synapse_test, local_spark_test] | |
runs-on: ubuntu-latest | |
if: failure() && github.event_name == 'schedule' | |
steps: | |
- name: Warning | |
run: | | |
curl -H 'Content-Type: application/json' -d '{"text": "[WARNING] Daily CI has failure, please check: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' ${{ secrets.TEAMS_WEBHOOK }} | |
notification: | |
# Final Daily Report with all job status | |
needs: [sbt_test, python_lint, databricks_test, azure_synapse_test, local_spark_test] | |
runs-on: ubuntu-latest | |
if: always() && github.event_name == 'schedule' | |
steps: | |
- name: Get Date | |
run: echo "NOW=$(date +'%Y-%m-%d')" >> $GITHUB_ENV | |
- name: Notification | |
run: | | |
curl -H 'Content-Type: application/json' -d '{"text": "${{env.NOW}} Daily Report: 1. SBT Test ${{needs.sbt_test.result}}, 2. Python Lint Test ${{needs.python_lint.result}}, 3. Databricks Test ${{needs.databricks_test.result}}, 4. Synapse Test ${{needs.azure_synapse_test.result}} , 5. LOCAL SPARK TEST ${{needs.local_spark_test.result}}. Link: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' ${{ secrets.TEAMS_WEBHOOK }} |