From d30978b4832b429d9528ef7b16feaf48fd550c96 Mon Sep 17 00:00:00 2001 From: James Kunstle Date: Mon, 6 Jan 2025 14:35:04 -0800 Subject: [PATCH] adds reusable unit test gh/action workflows Signed-off-by: James Kunstle --- .github/workflows/unittesting-ci-nvidia.yaml | 137 +++++++++++++++++++ src/instructlab/training/config.py | 4 +- tests/test_init.py | 7 + 3 files changed, 146 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/unittesting-ci-nvidia.yaml create mode 100644 tests/test_init.py diff --git a/.github/workflows/unittesting-ci-nvidia.yaml b/.github/workflows/unittesting-ci-nvidia.yaml new file mode 100644 index 00000000..a54c29c4 --- /dev/null +++ b/.github/workflows/unittesting-ci-nvidia.yaml @@ -0,0 +1,137 @@ +# SPDX-License-Identifier: Apache-2.0 + +name: "Run 'fast' marked unit tests via Tox::pytest" +# This tests should run only those tests that are marked as 'fast.' +# The opposite are those that would require the mark 'slow,' which would +# include longer-running integration and smoke tests. +# +# Essentially, this workflow should be used frequently for cheap tests, +# and a 'slow' marked workflow will be used later in review, manually triggered, +# to verify integration correctness. + +on: + pull_request: + types: [opened, reopened, synchronize] + push: + branches: + - "main" + - "release-**" + +env: + pytest_mark: "fast" + ec2_runner_variant: "m8g.xlarge" # 4 Graviton CPU, 16GB RAM + +jobs: + start-ec2-runner: + runs-on: ubuntu-latest + outputs: + label: ${{ steps.start-ec2-runner.outputs.label }} + ec2-instance-id: ${{ steps.start-ec2-runner.outputs.label }} + + steps: + - name: "Harden runner" + uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.1 + with: + egress-policy: audit + + - name: "Configure AWS credentials" + uses: "aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502" # v4.0.2 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ vars.AWS_REGION }} + + - name: "Start EC2 runner" + id: start-ec2-runner + uses: machulav/ec2-github-runner@1827d6ca7544d7044ddbd2e9360564651b463da2 # v2.3.7 + with: + mode: start + github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} + ec2-image-id: ${{ vars.AWS_EC2_AMI }} + ec2-instance-type: ${{ vars.AWS_REGION }} + subnet-id: subnet-024298cefa3bedd61 + security-group-id: sg-06300447c4a5fbef3 + iam-role-name: instructlab-ci-runner + aws-resource-tags: > + [ + {"Key": "Name", "Value": "instructlab-ci-github-large-runner"}, + {"Key": "GitHubRepository", "Value": "${{ github.repository }}"}, + {"Key": "GitHubRef", "Value": "${{ github.ref }}"}, + {"Key": "GitHubPR", "Value": "${{ github.event.number }}"} + ] + + run-unit-tests: + needs: + - start-ec2-runner + runs-on: ${{needs.start-ec2-runner.outputs.label}} + # This job MUST HAVE NO PERMISSIONS and no access to any secrets + # because it'll run incoming user code without discretion. + permissions: {} # this syntax disables permissions for all available options. + steps: + - name: "Harden runner" + uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.1 + with: + egress-policy: audit + + - name: "Install packages" + run: | + cat /etc/os-release + sudo dnf install -y gcc gcc-c++ make git python3.11 python3.11-devel + + - name: "Checkout code" + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + fetch-depth: 0 + + - name: "Verify environment variables are setup correctly" + run: | + export CUDA_HOME="/usr/local/cuda" + export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" + export PATH="$PATH:$CUDA_HOME/bin" + nvidia-smi + + # installs in $GITHUB_WORKSPACE/venv. + # only has to install Tox because Tox will do the other virtual environment management. + - name: "Setup Python virtual environment" + run: | + python3.11 -m venv --upgrade-deps venv + . venv/bin/activate + pip install tox + + - name: "Show disk utilization BEFORE tests" + run: | + df -h + + - name: "Run unit tests with Tox and Pytest" + run: | + tox -e py3-unit -- -m ${{env.pytest_mark}} + + - name: "Show disk utilization AFTER tests" + run: | + df -h + + stop-ec2-runner: + needs: + - start-ec2-runner + - run-unit-tests + runs-on: ubuntu-latest + steps: + - name: "Harden runner" + uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.1 + with: + egress-policy: audit + - name: "Configure AWS credentials" + uses: "aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502" # v4.0.2 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ vars.AWS_REGION }} + + - name: "Stop EC2 runner" + id: start-ec2-runner + uses: machulav/ec2-github-runner@1827d6ca7544d7044ddbd2e9360564651b463da2 # v2.3.7 + with: + mode: stop + github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} + label: ${{ needs.start-ec2-runner.outputs.label }} + ec2-instance-type: ${{ env.ec2_runner_variant }} diff --git a/src/instructlab/training/config.py b/src/instructlab/training/config.py index bf43f2eb..5cdce064 100644 --- a/src/instructlab/training/config.py +++ b/src/instructlab/training/config.py @@ -30,8 +30,8 @@ class DeepSpeedOffloadStrategy(Enum): # public API class DistributedBackend(Enum): - FSDP: str = "fsdp" - DEEPSPEED: str = "deepspeed" + FSDP = "fsdp" + DEEPSPEED = "deepspeed" # public API diff --git a/tests/test_init.py b/tests/test_init.py new file mode 100644 index 00000000..b361b9ea --- /dev/null +++ b/tests/test_init.py @@ -0,0 +1,7 @@ +# Third Party +import pytest + + +@pytest.mark.fast +def test_fake(): + assert True