Skip to content

Commit

Permalink
adds reusable unit test gh/action workflows
Browse files Browse the repository at this point in the history
Signed-off-by: James Kunstle <[email protected]>
  • Loading branch information
JamesKunstle committed Jan 7, 2025
1 parent eae2ec8 commit 343b70f
Show file tree
Hide file tree
Showing 4 changed files with 160 additions and 2 deletions.
16 changes: 16 additions & 0 deletions .github/workflows/unit-nvidia-fast-l40s-x4.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: "Run FAST unit tests on specified EC2"

on:
pull_request:
types: [opened, reopened, synchronize]
push:
branches:
- "main"
- "release-**"

jobs:
unit-fast:
uses: "./.github/workflows/unit-nvidia-reusable.yaml"
with:
ec2_runner_variant: "g6e.12xlarge"
pytest_mark: "fast"
135 changes: 135 additions & 0 deletions .github/workflows/unit-nvidia-reusable.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# SPDX-License-Identifier: Apache-2.0

name: "Run unit tests on specified EC2"

on:
workflow_call:
inputs:
ec2_runner_variant:
description: "EC2 instance type of runner"
type: string
required: true
default: "g6e.12xlarge" # TODO: consider smaller default instance.

Check failure on line 12 in .github/workflows/unit-nvidia-reusable.yaml

View workflow job for this annotation

GitHub Actions / actionlint

input "ec2_runner_variant" of workflow_call event has the default value "g6e.12xlarge", but it is also required. if an input is marked as required, its default value will never be used
pytest_mark:
type: string
description: "pytest.mark of tests that will be run"
required: true
default: "fast"

Check failure on line 17 in .github/workflows/unit-nvidia-reusable.yaml

View workflow job for this annotation

GitHub Actions / actionlint

input "pytest_mark" of workflow_call event has the default value "fast", but it is also required. if an input is marked as required, its default value will never be used

jobs:
# TODO: this startup step could definitely be reused by our workflows elsewhere.
start-ec2-runner:
runs-on: ubuntu-latest
outputs:
label: ${{ steps.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.label }}

steps:
- name: "Harden runner"
# v2.10.1
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f
with:
egress-policy: audit

- name: "Configure AWS credentials"
uses: "aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502" # v4.0.2
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.AWS_REGION }}

- name: "Start EC2 runner"
id: start-ec2-runner
uses: machulav/ec2-github-runner@1827d6ca7544d7044ddbd2e9360564651b463da2 # v2.3.7
with:
mode: start
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
ec2-image-id: ${{ vars.AWS_EC2_AMI }}
ec2-instance-type: ${{ inputs.ec2_runner_variant }}
subnet-id: subnet-024298cefa3bedd61
security-group-id: sg-06300447c4a5fbef3
iam-role-name: instructlab-ci-runner
aws-resource-tags: >
[
{"Key": "Name", "Value": "instructlab-training-ci-github-runner"},
{"Key": "GitHubRepository", "Value": "${{ github.repository }}"},
{"Key": "GitHubRef", "Value": "${{ github.ref }}"},
{"Key": "GitHubPR", "Value": "${{ github.event.number }}"}
]
run-unit-tests:
needs:
- start-ec2-runner
runs-on: ${{needs.start-ec2-runner.outputs.label}}
permissions:
pull-requests: write
steps:
- name: "Harden runner"
# v2.10.1
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f
with:
egress-policy: audit

- name: "Install packages"
run: |
cat /etc/os-release
sudo dnf install -y gcc gcc-c++ make git python3.11 python3.11-devel
- name: "Checkout code"
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0

- name: "Verify environment variables are setup correctly"
run: |
export CUDA_HOME="/usr/local/cuda"
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
export PATH="$PATH:$CUDA_HOME/bin"
nvidia-smi
# installs in $GITHUB_WORKSPACE/venv.
# only has to install Tox because Tox will do the other virtual environment management.
- name: "Setup Python virtual environment"
run: |
python3.11 -m venv --upgrade-deps venv
. venv/bin/activate
pip install tox
- name: "Show disk utilization BEFORE tests"
run: |
df -h
- name: "Run unit tests with Tox and Pytest"
run: |
tox -e py3-unit -- -m ${{inputs.pytest_mark}}
- name: "Show disk utilization AFTER tests"
run: |
df -h
stop-ec2-runner:
needs:
- start-ec2-runner
- run-unit-tests
runs-on: ubuntu-latest
steps:
- name: "Harden runner"
# v2.10.1
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f
with:
egress-policy: audit
- name: "Configure AWS credentials"
uses: "aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502" # v4.0.2
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.AWS_REGION }}

- name: "Stop EC2 runner"
id: start-ec2-runner
uses: machulav/ec2-github-runner@1827d6ca7544d7044ddbd2e9360564651b463da2 # v2.3.7
with:
mode: stop
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
label: ${{ needs.start-ec2-runner.outputs.label }}
ec2-instance-type: ${{ inputs.ec2_runner_variant }}
4 changes: 2 additions & 2 deletions src/instructlab/training/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ class DeepSpeedOffloadStrategy(Enum):

# public API
class DistributedBackend(Enum):
FSDP: str = "fsdp"
DEEPSPEED: str = "deepspeed"
FSDP = "fsdp"
DEEPSPEED = "deepspeed"


# public API
Expand Down
7 changes: 7 additions & 0 deletions tests/test_init.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Third Party
import pytest


@pytest.mark.fast
def test_fake():
assert True

0 comments on commit 343b70f

Please sign in to comment.