From 823eca5be274f2ef609cd03889bf4763a803b1e5 Mon Sep 17 00:00:00 2001 From: James Kunstle Date: Mon, 6 Jan 2025 14:35:04 -0800 Subject: [PATCH] adds reusable unit test gh/action workflows Signed-off-by: James Kunstle --- .../workflows/unit-nvidia-fast-l40s-x4.yaml | 16 +++ .github/workflows/unit-nvidia-reusable.yaml | 132 ++++++++++++++++++ src/instructlab/training/config.py | 4 +- tests/test_init.py | 7 + 4 files changed, 157 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/unit-nvidia-fast-l40s-x4.yaml create mode 100644 .github/workflows/unit-nvidia-reusable.yaml create mode 100644 tests/test_init.py diff --git a/.github/workflows/unit-nvidia-fast-l40s-x4.yaml b/.github/workflows/unit-nvidia-fast-l40s-x4.yaml new file mode 100644 index 00000000..e97311a1 --- /dev/null +++ b/.github/workflows/unit-nvidia-fast-l40s-x4.yaml @@ -0,0 +1,16 @@ +name: "Run FAST unit tests on specified EC2" + +on: + pull_request: + types: [opened, reopened, synchronize] + push: + branches: + - "main" + - "release-**" + +jobs: + unit-fast: + uses: "./.github/workflows/unit-nvidia-reusable.yaml" + with: + ec2_runner_variant: "g6e.12xlarge" + pytest_mark: "fast" diff --git a/.github/workflows/unit-nvidia-reusable.yaml b/.github/workflows/unit-nvidia-reusable.yaml new file mode 100644 index 00000000..0f9246c5 --- /dev/null +++ b/.github/workflows/unit-nvidia-reusable.yaml @@ -0,0 +1,132 @@ +# SPDX-License-Identifier: Apache-2.0 + +name: "Run unit tests on specified EC2" + +on: + workflow_call: + inputs: + ec2_runner_variant: + description: "EC2 instance type of runner" + type: string + default: "g6e.12xlarge" # TODO: consider smaller default instance. + pytest_mark: + type: string + description: "pytest.mark of tests that will be run" + default: "fast" + +jobs: + start-ec2-runner: + runs-on: ubuntu-latest + outputs: + label: ${{ steps.start-ec2-runner.outputs.label }} + ec2-instance-id: ${{ steps.start-ec2-runner.outputs.label }} + + steps: + - name: "Harden runner" + # v2.10.1 + uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f + with: + egress-policy: audit + + - name: "Configure AWS credentials" + uses: "aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502" # v4.0.2 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ vars.AWS_REGION }} + + - name: "Start EC2 runner" + id: start-ec2-runner + uses: machulav/ec2-github-runner@1827d6ca7544d7044ddbd2e9360564651b463da2 # v2.3.7 + with: + mode: start + github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} + ec2-image-id: ${{ vars.AWS_EC2_AMI }} + ec2-instance-type: ${{ inputs.ec2_runner_variant }} + subnet-id: subnet-024298cefa3bedd61 + security-group-id: sg-06300447c4a5fbef3 + iam-role-name: instructlab-ci-runner + aws-resource-tags: > + [ + {"Key": "Name", "Value": "instructlab-training-ci-github-runner"}, + {"Key": "GitHubRepository", "Value": "${{ github.repository }}"}, + {"Key": "GitHubRef", "Value": "${{ github.ref }}"}, + {"Key": "GitHubPR", "Value": "${{ github.event.number }}"} + ] + + run-unit-tests: + needs: + - start-ec2-runner + runs-on: ${{needs.start-ec2-runner.outputs.label}} + permissions: + pull-requests: write + steps: + - name: "Harden runner" + # v2.10.1 + uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f + with: + egress-policy: audit + + - name: "Install packages" + run: | + cat /etc/os-release + sudo dnf install -y gcc gcc-c++ make git python3.11 python3.11-devel + + - name: "Checkout code" + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + fetch-depth: 0 + + - name: "Verify environment variables are setup correctly" + run: | + export CUDA_HOME="/usr/local/cuda" + export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" + export PATH="$PATH:$CUDA_HOME/bin" + nvidia-smi + + # installs in $GITHUB_WORKSPACE/venv. + # only has to install Tox because Tox will do the other virtual environment management. + - name: "Setup Python virtual environment" + run: | + python3.11 -m venv --upgrade-deps venv + . venv/bin/activate + pip install tox + + - name: "Show disk utilization BEFORE tests" + run: | + df -h + + - name: "Run unit tests with Tox and Pytest" + run: | + tox -e py3-unit -- -m ${{inputs.pytest_mark}} + + - name: "Show disk utilization AFTER tests" + run: | + df -h + + stop-ec2-runner: + needs: + - start-ec2-runner + - run-unit-tests + runs-on: ubuntu-latest + steps: + - name: "Harden runner" + # v2.10.1 + uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f + with: + egress-policy: audit + - name: "Configure AWS credentials" + uses: "aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502" # v4.0.2 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ vars.AWS_REGION }} + + - name: "Stop EC2 runner" + id: start-ec2-runner + uses: machulav/ec2-github-runner@1827d6ca7544d7044ddbd2e9360564651b463da2 # v2.3.7 + with: + mode: stop + github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} + label: ${{ needs.start-ec2-runner.outputs.label }} + ec2-instance-type: ${{ inputs.ec2_runner_variant }} diff --git a/src/instructlab/training/config.py b/src/instructlab/training/config.py index bf43f2eb..5cdce064 100644 --- a/src/instructlab/training/config.py +++ b/src/instructlab/training/config.py @@ -30,8 +30,8 @@ class DeepSpeedOffloadStrategy(Enum): # public API class DistributedBackend(Enum): - FSDP: str = "fsdp" - DEEPSPEED: str = "deepspeed" + FSDP = "fsdp" + DEEPSPEED = "deepspeed" # public API diff --git a/tests/test_init.py b/tests/test_init.py new file mode 100644 index 00000000..b361b9ea --- /dev/null +++ b/tests/test_init.py @@ -0,0 +1,7 @@ +# Third Party +import pytest + + +@pytest.mark.fast +def test_fake(): + assert True