Skip to content

Make tests safer to run in parallel by changing the Redis key namespace #5557

Make tests safer to run in parallel by changing the Redis key namespace

Make tests safer to run in parallel by changing the Redis key namespace #5557

Workflow file for this run

name: CI
on:
push:
branches:
# only on merges to master branch
- master
# and version branches, which only include minor versions (eg: v3.4)
- v[0-9]+.[0-9]+
tags:
# also version tags, which include bugfix releases (eg: v3.4.0)
- v[0-9]+.[0-9]+.[0-9]+
pull_request:
type: [opened, reopened, edited]
branches:
# Only for PRs targeting those branches
- master
- v[0-9]+.[0-9]+
schedule:
# run every night at midnight
- cron: '0 0 * * *'
jobs:
# TODO: Fix the required checks!
# When the pre_job triggers and skips builds, it prevents merging the PR because
# the required checks are reported as skipped instead of passed.
# Special job which automatically cancels old runs for the same branch, prevents runs for the
# same file set which has already passed, etc.
pre_job:
name: Skip Duplicate Jobs Pre Job
runs-on: ubuntu-20.04
outputs:
should_skip: ${{ steps.skip_check.outputs.should_skip }}
steps:
- id: skip_check
uses: fkirc/skip-duplicate-actions@4c656bbdb6906310fa6213604828008bc28fe55d # v3.3.0
with:
cancel_others: 'true'
github_token: ${{ github.token }}
# Lint checks which don't depend on any service containes, etc. to be running.
lint-checks:
needs: pre_job
# NOTE: We always want to run job on master since we run some additional checks there (code
# coverage, etc)
# if: ${{ needs.pre_job.outputs.should_skip != 'true' || github.ref == 'refs/heads/master' }}
name: '${{ matrix.name }} - Python ${{ matrix.python-version-short }}'
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
# NOTE: To speed the CI run, we split unit and integration tests into multiple jobs where
# each job runs subset of tests.
# NOTE: We need to use full Python version as part of Python deps cache key otherwise
# setup virtualenv step will fail.
include:
- name: 'Lint Checks (black, flake8, etc.)'
task: 'ci-checks'
python-version-short: '3.8'
python-version: '3.8.10'
- name: 'Compile (pip deps, pylint, etc.)'
task: 'ci-compile'
python-version-short: '3.8'
python-version: '3.8.10'
- name: 'Lint Checks (black, flake8, etc.)'
task: 'ci-checks'
python-version-short: '3.9'
python-version: '3.9.14'
- name: 'Compile (pip deps, pylint, etc.)'
task: 'ci-compile'
python-version-short: '3.9'
python-version: '3.9.14'
env:
TASK: '${{ matrix.task }}'
COLUMNS: '120'
PYLINT_CONCURRENCY: '6'
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Custom Environment Setup
run: |
./scripts/github/setup-environment.sh
- name: 'Set up Python (${{ matrix.python-version }}) and Cache Deps'
uses: ./.github/actions/setup-python
with:
python-version: '${{ matrix.python-version }}'
- name: Cache and Install APT Dependencies
uses: ./.github/actions/apt-packages
- name: Install virtualenv
run: |
./scripts/github/install-virtualenv.sh
- name: Install requirements
run: |
./scripts/ci/install-requirements.sh
- name: Print versions
run: |
./scripts/ci/print-versions.sh
- name: make
# use: script -e -c to print colors
timeout-minutes: 7
run: |
script -e -c "make ${TASK}"
- name: Nightly
# Run any additional nightly checks only as part of a nightly (cron) build
if: "${{ env.IS_NIGHTLY_BUILD == 'yes' }}"
run: |
./scripts/ci/run-nightly-make-task-if-exists.sh "${TASK}"
self-check:
needs: pre_job
name: '${{ matrix.name }} - Python ${{ matrix.python-version-short }}'
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
include:
- name: 'Self-check on Python 3.8'
python-version-short: '3.8'
python-version: '3.8.14'
services:
mongo:
image: mongo:7.0
ports:
- 27017:27017
rabbitmq:
image: rabbitmq:3.8-management
options: >-
--name rabbitmq
ports:
- 5671:5671/tcp # AMQP SSL port
- 5672:5672/tcp # AMQP standard port
- 15672:15672/tcp # Management: HTTP, CLI
redis:
# Docker Hub image
image: redis
# Set health checks to wait until redis has started
options: >-
--name "redis"
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 6379:6379/tcp
env:
# CI st2.conf (with ST2_CI_USER user instead of stanley)
ST2_CONF: 'conf/st2.ci.conf'
# Name of the user who is running the CI (on GitHub Actions this is 'runner')
ST2_CI_USER: 'runner'
# GitHub is juggling how to set vars for multiple shells. Protect our PATH assumptions.
PATH: /home/runner/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
# Space separated list of tests to be skipped if the self-check is running in GitHub Actions
TESTS_TO_SKIP: "tests.test_quickstart_rules tests.test_run_pack_tests_tool"
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Custom Environment Setup
run: |
./scripts/github/setup-environment.sh
- name: 'Set up Python (${{ matrix.python-version }}) and Cache Deps'
uses: ./.github/actions/setup-python
with:
python-version: '${{ matrix.python-version }}'
- name: Cache and Install APT Dependencies
uses: ./.github/actions/apt-packages
- name: Install virtualenv
run: |
./scripts/github/install-virtualenv.sh
- name: Install requirements
run: |
./scripts/ci/install-requirements.sh
- name: Setup Tests
run: |
# prep a ci-specific dev conf file that uses runner instead of stanley
# this user is the username of the user in GitHub actions, used for SSH, etc during
# integration tests (important)
cp conf/st2.dev.conf "${ST2_CONF}"
sed -i -e "s,/home/vagrant/.ssh/stanley_rsa,/home/stanley/.ssh/stanley_rsa," "${ST2_CONF}"
sudo -E ./scripts/ci/add-itest-user-key.sh
- name: Permissions Workaround
run: |
sudo ST2_CI_REPO_PATH="${ST2_CI_REPO_PATH}" scripts/ci/permissions-workaround.sh
- name: Reconfigure RabbitMQ
# bitnami image allows (see bitnami/rabbitmq readme):
# Here we're copying a rabbitmq.config file which won't do anything.
# We need to switch to custom.conf or advanced.config.
timeout-minutes: 2 # may die if rabbitmq fails to start
run: |
./scripts/github/configure-rabbitmq.sh
- name: Print versions
run: |
./scripts/ci/print-versions.sh
- name: make
timeout-minutes: 14 # may die if rabbitmq fails to start
# use: script -e -c to print colors
run: |
script -e -c "make .ci-prepare-integration" && exit 0
- name: Extend the path for upcoming tasks
# pants uses PEP 660 editable wheels to add our code to the virtualenv.
# But PEP 660 editable wheels do not include 'scripts'.
# https://peps.python.org/pep-0660/#limitations
# So, we need to include each bin dir in PATH instead of virtualenv/bin.
run: |
for component_bin in ${GITHUB_WORKSPACE}/st2*/bin; do
echo ${component_bin} | tee -a $GITHUB_PATH
done
echo ${GITHUB_WORKSPACE}/virtualenv/bin | tee -a $GITHUB_PATH
- name: Create symlinks to find the binaries when running st2 actions
# st2 is actually a console_script entry point, not just a 'script'
# so it IS included in the virtualenv. But, st2-run-pack-tests might not be included.
run: |
ln -s ${GITHUB_WORKSPACE}/virtualenv/bin/st2 /usr/local/bin/st2
ln -s ${GITHUB_WORKSPACE}/st2common/bin/st2-run-pack-tests /usr/local/bin/st2-run-pack-tests
- name: Install st2client
timeout-minutes: 5
run: |
cd ./st2client
pip3 install --upgrade pip
python3 setup.py develop
- name: Run self-verification script
env:
ST2_CONF: /home/runner/work/st2/st2/conf/st2.ci.conf
run: |
sudo -E ST2_AUTH_TOKEN=$(st2 auth testu -p 'testp' -t) PATH=${PATH} st2common/bin/st2-self-check
- name: Compress Service Logs Before upload
if: ${{ failure() }}
run: |
./tools/launchdev.sh stop # stop st2 before collecting logs
tar cvzpf logs.tar.gz logs/*
- name: Upload StackStorm services Logs
if: ${{ failure() }}
uses: actions/upload-artifact@v4
with:
name: logs-py${{ matrix.python-version }}
path: logs.tar.gz
retention-days: 7
unit-tests:
needs: pre_job
# NOTE: We always want to run job on master since we run some additional checks there (code
# coverage, etc)
# NB: disabled. See TODO above pre_job
# if: ${{ needs.pre_job.outputs.should_skip != 'true' || github.ref == 'refs/heads/master' }}
name: '${{ matrix.name }} - Python ${{ matrix.python-version-short }}'
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
# NOTE: To speed the CI run, we split unit and integration tests into multiple jobs where
# each job runs subset of tests.
include:
- name: 'Unit Tests (chunk 1)'
task: 'ci-unit'
nosetests_node_total: 2
nosetests_node_index: 0
python-version-short: '3.8'
python-version: '3.8.10'
- name: 'Unit Tests (chunk 2)'
task: 'ci-unit'
nosetests_node_total: 2
nosetests_node_index: 1
python-version-short: '3.8'
python-version: '3.8.10'
- name: 'Unit Tests (chunk 1)'
task: 'ci-unit'
nosetests_node_total: 2
nosetests_node_index: 0
python-version-short: '3.9'
python-version: '3.9.14'
- name: 'Unit Tests (chunk 2)'
task: 'ci-unit'
nosetests_node_total: 2
nosetests_node_index: 1
python-version-short: '3.9'
python-version: '3.9.14'
# This job is slow so we only run in on a daily basis
# - name: 'Micro Benchmarks'
# task: 'micro-benchmarks'
# python-version: '3.6.13'
# nosetests_node_total: 1
# nosetests_node_ index: 0
services:
mongo:
image: mongo:7.0
ports:
- 27017:27017
redis:
# Docker Hub image
image: redis
# Set health checks to wait until redis has started
options: >-
--name "redis"
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 6379:6379/tcp
rabbitmq:
image: rabbitmq:3.8-management
options: >-
--name rabbitmq
ports:
- 5671:5671/tcp # AMQP SSL port
- 5672:5672/tcp # AMQP standard port
- 15672:15672/tcp # Management: HTTP, CLI
env:
TASK: '${{ matrix.task }}'
PYTHON_VERSION_SHORT: '${{ matrix.python-version-short }}'
NODE_TOTAL: '${{ matrix.nosetests_node_total }}'
NODE_INDEX: '${{ matrix.nosetests_node_index }}'
# We need to explicitly specify terminal width otherwise some CLI tests fail on container
# environments where small terminal size is used.
COLUMNS: '120'
# CI st2.conf (with ST2_CI_USER user instead of stanley)
ST2_CONF: 'conf/st2.ci.conf'
# Tell StackStorm that we are indeed in CI mode, previously we hard coded a Travis specific
# environment variable in our test code, making it a PITA when we switch CI providers.
# Now, we simply set this environment varible here in the CI portion of our testing and
# it avoids any CI provider type lock-in.
ST2_CI: 'true'
# Name of the user who is running the CI (on GitHub Actions this is 'runner')
ST2_CI_USER: 'runner'
# GitHub is juggling how to set vars for multiple shells. Protect our PATH assumptions.
PATH: /home/runner/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Custom Environment Setup
run: |
./scripts/github/setup-environment.sh
- name: 'Set up Python (${{ matrix.python-version }}) and Cache Deps'
uses: ./.github/actions/setup-python
with:
python-version: '${{ matrix.python-version }}'
- name: Cache and Install APT Dependencies
uses: ./.github/actions/apt-packages
- name: Install virtualenv
run: |
./scripts/github/install-virtualenv.sh
- name: Install requirements
run: |
./scripts/ci/install-requirements.sh
- name: Setup Tests
run: |
# prep a ci-specific dev conf file that uses runner instead of stanley
# this user is the username of the user in GitHub actions, used for SSH, etc during
# integration tests (important)
cp conf/st2.dev.conf "${ST2_CONF}" ; sed -i -e "s/stanley/${ST2_CI_USER}/" "${ST2_CONF}"
sudo -E ./scripts/ci/add-itest-user-key.sh
- name: Permissions Workaround
run: |
echo "$ST2_CI_REPO_PATH"
sudo ST2_CI_REPO_PATH="${ST2_CI_REPO_PATH}" scripts/ci/permissions-workaround.sh
- name: Reconfigure RabbitMQ
if: "${{ env.TASK == 'ci-unit' || env.TASK == 'ci-integration' }}"
# bitnami image allows (see bitnami/rabbitmq readme):
# Here we're copying a rabbitmq.config file which won't do anything.
# We need to switch to custom.conf or advanced.config.
timeout-minutes: 2 # may die if rabbitmq fails to start
run: |
./scripts/github/configure-rabbitmq.sh
- name: Print versions
run: |
./scripts/ci/print-versions.sh
- name: make
# use: script -e -c to print colors
# TODO: Use dynamic timeout value based on the branch - for master we
# need to use timeout x2 due to coverage overhead
# timeout-minutes: 8
timeout-minutes: 19
run: |
script -e -c "make ${TASK}"
- name: Nightly
# Run any additional nightly checks only as part of a nightly (cron) build
if: "${{ env.IS_NIGHTLY_BUILD == 'yes' }}"
run: |
./scripts/ci/run-nightly-make-task-if-exists.sh "${TASK}"
- name: Codecov
# NOTE: We only generate and submit coverage report for master and version branches and only when the build succeeds (default on GitHub Actions, this was not the case on Travis so we had to explicitly check success)
if: "${{ success() && (env.ENABLE_COVERAGE == 'yes') && (env.PYTHON_VERSION_SHORT == '3.8')}}"
run: |
./scripts/ci/submit-codecov-coverage.sh
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
integration-tests:
needs: pre_job
# NOTE: We always want to run job on master since we run some additional checks there (code
# coverage, etc)
# if: ${{ needs.pre_job.outputs.should_skip != 'true' || github.ref == 'refs/heads/master' }}
name: '${{ matrix.name }} - Python ${{ matrix.python-version-short }}'
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
# NOTE: To speed the CI run, we split unit and integration tests into multiple jobs where
# each job runs subset of tests.
include:
# We run pack tests here since they rely on some integration tests set
# up (aka stanley user being present, etc.)
- name: 'Pack Tests'
task: 'ci-packs-tests'
nosetests_node_total: 1
nosetests_node_index: 0
python-version-short: '3.8'
python-version: '3.8.10'
- name: 'Integration Tests (chunk 1)'
task: 'ci-integration'
nosetests_node_total: 2
nosetests_node_index: 0
python-version-short: '3.8'
python-version: '3.8.10'
- name: 'Integration Tests (chunk 2)'
task: 'ci-integration'
nosetests_node_total: 2
nosetests_node_index: 1
python-version-short: '3.8'
python-version: '3.8.10'
- name: 'Pack Tests'
task: 'ci-packs-tests'
nosetests_node_total: 1
nosetests_node_index: 0
python-version-short: '3.9'
python-version: '3.9.14'
- name: 'Integration Tests (chunk 1)'
task: 'ci-integration'
nosetests_node_total: 2
nosetests_node_index: 0
python-version-short: '3.9'
python-version: '3.9.14'
- name: 'Integration Tests (chunk 2)'
task: 'ci-integration'
nosetests_node_total: 2
nosetests_node_index: 1
python-version-short: '3.9'
python-version: '3.9.14'
services:
mongo:
image: mongo:7.0
ports:
- 27017:27017
# In GHA, these services are started first before the code is checked out.
# We use bitnami images to facilitate reconfiguring RabbitMQ during ci-integration tests.
# We rely on custom config and SSL certs that are in the repo.
# Many images require config in env vars (which we can't change during the test job)
# or they require config in entrypoint args (which we can't override for GHA services)
# bitnami builds ways to get config files from mounted volumes.
rabbitmq:
image: bitnami/rabbitmq:3.8
volumes:
- /home/runner/rabbitmq_conf:/bitnami/conf # RABBITMQ_MOUNTED_CONF_DIR
env:
# tell bitnami/rabbitmq to enable this by default
RABBITMQ_PLUGINS: rabbitmq_management
RABBITMQ_USERNAME: guest
RABBITMQ_PASSWORD: guest
# These are strictly docker options, not entrypoint args (GHA restriction)
options: >-
--name rabbitmq
ports:
# These 6 ports are exposed by bitnami/rabbitmq (see https://www.rabbitmq.com/networking.html#ports)
# host_port:container_port/protocol
- 5671:5671/tcp # AMQP SSL port
- 5672:5672/tcp # AMQP standard port
- 15672:15672/tcp # Management: HTTP, CLI
#- 15671:15671/tcp # Management: SSL port
#- 25672:25672/tcp # inter-node or CLI
#- 4369:4369/tcp # epmd
#
redis:
# Docker Hub image
image: redis
# Set health checks to wait until redis has started
options: >-
--name "redis"
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 6379:6379/tcp
env:
TASK: '${{ matrix.task }}'
PYTHON_VERSION_SHORT: '${{ matrix.python-version-short }}'
NODE_TOTAL: '${{ matrix.nosetests_node_total }}'
NODE_INDEX: '${{ matrix.nosetests_node_index }}'
# We need to explicitly specify terminal width otherwise some CLI tests fail on container
# environments where small terminal size is used.
COLUMNS: '120'
# CI st2.conf (with ST2_CI_USER user instead of stanley)
ST2_CONF: 'conf/st2.ci.conf'
# Tell StackStorm that we are indeed in CI mode, previously we hard coded a Travis specific
# environment variable in our test code, making it a PITA when we switch CI providers.
# Now, we simply set this environment varible here in the CI portion of our testing and
# it avoids any CI provider type lock-in.
ST2_CI: 'true'
# Name of the user who is running the CI (on GitHub Actions this is 'runner')
ST2_CI_USER: 'runner'
# GitHub is juggling how to set vars for multiple shells. Protect our PATH assumptions.
PATH: /home/runner/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Custom Environment Setup
run: |
./scripts/github/setup-environment.sh
- name: 'Set up Python (${{ matrix.python-version }}) and Cache Deps'
uses: ./.github/actions/setup-python
with:
python-version: '${{ matrix.python-version }}'
- name: Cache and Install APT Dependencies
uses: ./.github/actions/apt-packages
- name: Install virtualenv
run: |
./scripts/github/install-virtualenv.sh
- name: Install requirements
run: |
./scripts/ci/install-requirements.sh
- name: Setup Integration Tests
run: |
# prep a ci-specific dev conf file that uses runner instead of stanley
# this user is the username of the user in GitHub actions, used for SSH, etc during
# integration tests (important)
cp conf/st2.dev.conf "${ST2_CONF}" ; sed -i -e "s/stanley/${ST2_CI_USER}/" "${ST2_CONF}"
sudo -E ./scripts/ci/add-itest-user-key.sh
- name: Permissions Workaround
run: |
echo "$ST2_CI_REPO_PATH"
sudo ST2_CI_REPO_PATH="${ST2_CI_REPO_PATH}" scripts/ci/permissions-workaround.sh
- name: Reconfigure RabbitMQ
# bitnami image allows (see bitnami/rabbitmq readme):
# Here we're copying a rabbitmq.config file which won't do anything.
# We need to switch to custom.conf or advanced.config.
timeout-minutes: 2 # may die if rabbitmq fails to start
run: |
./scripts/github/configure-rabbitmq.sh
- name: Print versions
run: |
./scripts/ci/print-versions.sh
- name: make
#timeout-minutes: 7
# TODO: Use dynamic timeout value based on the branch - for master we
# need to use timeout x2 due to coverage overhead
timeout-minutes: 14 # may die if rabbitmq fails to start
# use: script -e -c to print colors
run: |
script -e -c "make ${TASK}" && exit 0
- name: Codecov
# NOTE: We only generate and submit coverage report for master and version branches and only when the build succeeds (default on GitHub Actions, this was not the case on Travis so we had to explicitly check success)
if: "${{ success() && (env.ENABLE_COVERAGE == 'yes') && (env.TASK == 'ci-integration') && (env.PYTHON_VERSION_SHORT == '3.8')}}"
run: |
./scripts/ci/submit-codecov-coverage.sh
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
- name: Compress Service Logs Before upload
if: ${{ failure() && env.TASK == 'ci-integration' }}
run: |
./tools/launchdev.sh stop # stop st2 before collecting logs
tar cvzpf logs.tar.gz logs/*
- name: Upload StackStorm services Logs
if: ${{ failure() && env.TASK == 'ci-integration' }}
uses: actions/upload-artifact@v4
with:
name: logs-py${{ matrix.python-version }}-nose-${{ matrix.nosetests_node_index }}
path: logs.tar.gz
retention-days: 7
slack-notification:
name: Slack notification for failed master builds
if: always()
needs:
- lint-checks
- self-check
- unit-tests
- integration-tests
runs-on: ubuntu-20.04
steps:
- name: Workflow conclusion
# this step creates an environment variable WORKFLOW_CONCLUSION and is the most reliable way to check the status of previous jobs
uses: technote-space/workflow-conclusion-action@v2
- name: CI Run Failure Slack Notification
if: ${{ env.WORKFLOW_CONCLUSION == 'failure' && github.ref == 'refs/heads/master' }}
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
uses: voxmedia/github-action-slack-notify-build@v1
with:
channel: development
status: FAILED
color: danger
# HELPER FOR FUTURE DEVELOPERS:
# If your GitHub Actions job is failing and you need to debug it, by default there is
# no way to SSH into the container.
# The step below can be uncommeted and will stop here and allow you to SSH in.
# When this step is reached, simply refresh the GitHub Actions output for this build
# and this SSH command will be printed every 5 seconds to the output.
# Once you are done debugging in your SSH session, simply: touch /continue
# and this will continue the build.
#
# - name: Setup tmate session for debugging failed jobs (allows SSH into the container)
# uses: mxschmitt/action-tmate@v3
# if: "${{ failure() }}"
#