diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 6eb30c88f..869c6b82c 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -29,6 +29,5 @@ Applicable spec: - [ ] The documentation for charmhub is updated. - [ ] The PR is tagged with appropriate label (`urgent`, `trivial`, `complex`). - [ ] The changelog is updated with changes that affects the users of the charm. -- [ ] The changes do not introduce any regression in code or tests related to LXD runner mode. \ No newline at end of file diff --git a/.github/workflows/e2e_test_run.yaml b/.github/workflows/e2e_test_run.yaml index 7abfa10f5..2cbca1ef3 100644 --- a/.github/workflows/e2e_test_run.yaml +++ b/.github/workflows/e2e_test_run.yaml @@ -12,24 +12,12 @@ on: description: The e2e test runner tag to run the workflow on. type: string required: true - runner-virt-type: - description: The e2e test runner virtualization type. E.g. lxd, or openstack. - # workflow_call does not support choice type. - type: string - required: true workflow_dispatch: inputs: runner-tag: description: The e2e test runner tag to run the workflow on. type: string required: true - runner-virt-type: - description: The e2e test runner virtualization type. - type: choice - required: true - options: - - lxd - - openstack jobs: e2e-test: @@ -37,12 +25,7 @@ jobs: runs-on: [self-hosted, linux, "${{ inputs.runner-tag }}"] steps: - name: Hostname is set to "github-runner" - if: "${{ github.event.inputs.runner-virt-type == 'openstack' }}" run: sudo hostnamectl hostname | grep github-runner - # Snapd can have some issues in privileged LXD containers without setting - # security.nesting=True and this. - - name: Fix snap issue in privileged LXD containers - run: ln -s /bin/true /usr/local/bin/udevadm # Below is a series of simple tests to assess the functionality of the newly spawned runner. - name: Echo hello world run: echo "hello world" @@ -104,11 +87,3 @@ jobs: # ~/.local/bin is added to path runner env through in scripts/env.j2 - name: test check-jsonschema run: check-jsonschema --version - - name: Test Firewall - if: "${{ github.event.inputs.runner-virt-type == 'lxd' }}" - run: | - HOST_IP=$(ip route | grep default | cut -f 3 -d" ") - [ $((ping $HOST_IP -c 5 || :) | grep "Destination Port Unreachable" | wc -l) -eq 5 ] - - name: Test sctp support - if: "${{ github.event.inputs.runner-virt-type == 'lxd' }}" - run: sudo apt-get install lksctp-tools -yq && checksctp diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml index 3577dba13..fa6410f20 100644 --- a/.github/workflows/integration_test.yaml +++ b/.github/workflows/integration_test.yaml @@ -23,7 +23,7 @@ jobs: provider: lxd test-tox-env: integration-juju3.1 modules: '["test_charm_scheduled_events", "test_debug_ssh", "test_charm_upgrade"]' - extra-arguments: "-m openstack" + extra-arguments: '-m openstack --log-format="%(asctime)s %(levelname)s %(message)s"' self-hosted-runner: true self-hosted-runner-label: stg-private-endpoint openstack-interface-tests-private-endpoint: @@ -47,8 +47,8 @@ jobs: pre-run-script: scripts/setup-lxd.sh provider: lxd test-tox-env: integration-juju3.6 - modules: '["test_charm_metrics_failure", "test_charm_metrics_success", "test_charm_fork_repo", "test_charm_runner", "test_reactive"]' - extra-arguments: "-m openstack" + modules: '["test_charm_metrics_failure", "test_charm_metrics_success", "test_charm_fork_repo", "test_charm_fork_path_change", "test_charm_no_runner", "test_charm_runner", "test_reactive"]' + extra-arguments: '-m openstack --log-format="%(asctime)s %(levelname)s %(message)s"' self-hosted-runner: true self-hosted-runner-label: stg-private-endpoint allure-report: diff --git a/.github/workflows/scheduled_e2e_test.yaml b/.github/workflows/scheduled_e2e_test.yaml deleted file mode 100644 index 7e3099109..000000000 --- a/.github/workflows/scheduled_e2e_test.yaml +++ /dev/null @@ -1,472 +0,0 @@ -name: Scheduled End-to-End Test - -# Development shifts toward OpenStack support. -# These test is for local LXD and is ran weekly. - -on: - schedule: - - cron: "15 4 * * 2" - - workflow_dispatch: - -jobs: - build-charm: - name: Build Charm - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Remove Unnecessary Components - run: | - rm -rf .git - rm -rf .github - - - name: Write lxd-profile.yaml - run: | - cat << EOF > ./lxd-profile.yaml - config: - security.nesting: true - security.privileged: true - raw.lxc: | - lxc.apparmor.profile=unconfined - lxc.mount.auto=proc:rw sys:rw cgroup:rw - lxc.cgroup.devices.allow=a - lxc.cap.drop= - devices: - kmsg: - path: /dev/kmsg - source: /dev/kmsg - type: unix-char - EOF - - - name: Cache github-runner Charm - uses: actions/cache@v4 - id: cache-charm - with: - path: github-runner_ubuntu-22.04-amd64.charm - key: github-runner-charm-${{ hashFiles('**/*') }} - - - name: Setup LXD - if: steps.cache-charm.outputs.cache-hit != 'true' - uses: canonical/setup-lxd@main - - - name: Install charmcraft - if: steps.cache-charm.outputs.cache-hit != 'true' - run: sudo snap install charmcraft --classic - - - name: Pack github-runner Charm - if: steps.cache-charm.outputs.cache-hit != 'true' - run: charmcraft pack || ( cat ~/.local/state/charmcraft/log/* && exit 1 ) - - - name: Upload github-runner Charm - uses: actions/upload-artifact@v4 - with: - name: dangerous-test-only-github-runner_ubuntu-22.04-amd64.charm - path: github-runner_ubuntu-22.04-amd64.charm - - run-id: - name: Generate Run ID - runs-on: ubuntu-latest - outputs: - run-id: ${{ steps.run-id.outputs.run-id }} - steps: - - name: Generate Run ID - id: run-id - run: | - echo "run-id=e2e-$(LC_ALL=C tr -dc 'a-z' < /dev/urandom | head -c4)" >> $GITHUB_OUTPUT - - deploy-e2e-test-runner: - name: Deploy End-to-End Test Runner (${{ matrix.event.name }}) - runs-on: ubuntu-latest - needs: [build-charm, run-id] - strategy: - matrix: - event: - - name: pull_request - abbreviation: pr - - name: workflow_dispatch - abbreviation: wd - - name: push - abbreviation: push - - name: schedule - abbreviation: sd - - name: issues - abbreviation: is - steps: - - name: Setup Lxd Juju Controller - uses: charmed-kubernetes/actions-operator@main - with: - juju-channel: 3.1/stable - provider: lxd - - - name: Install GitHub Cli - run: which gh || sudo apt install gh -y - - - name: Check rate limit - env: - GH_TOKEN: ${{ (matrix.event.name == 'issues' || matrix.event.name == 'schedule') && secrets.E2E_TESTING_TOKEN || secrets.GITHUB_TOKEN }} - run: | - # Check rate limit, this check does not count against the primary rate limit: - # https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api?apiVersion=2022-11-28#checking-the-status-of-your-rate-limit - gh api \ - --method GET \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" --jq ".resources.core" \ - /rate_limit - - name: Create Testing Juju Model - run: juju add-model testing - - - name: Set Testing Model Proxy Configuration - run: | - juju model-config juju-http-proxy=$http_proxy - juju model-config juju-https-proxy=$https_proxy - juju model-config juju-no-proxy=$no_proxy - - - name: Change Testing Model Logging Level - run: juju model-config logging-config="=INFO;unit=DEBUG" - - - name: Download github-runner Charm - uses: actions/download-artifact@v4 - with: - name: dangerous-test-only-github-runner_ubuntu-22.04-amd64.charm - - - name: Enable br_netfilter - run: sudo modprobe br_netfilter - - - name: Generate Runner Name - id: runner-name - run: echo name=${{ matrix.event.abbreviation }}-${{ needs.run-id.outputs.run-id }}${{ github.run_attempt }} >> $GITHUB_OUTPUT - - - name: Copy github-runner Charm - run: | - cp github-runner_ubuntu-22.04-amd64.charm /home/$USER/github-runner_ubuntu-22.04-amd64.charm - - - name: Deploy github-runner Charm (Pull Request, Workflow Dispatch and Push) - if: matrix.event.name == 'workflow_dispatch' || matrix.event.name == 'push' || matrix.event.name == 'pull_request' - run: | - juju deploy /home/$USER/github-runner_ubuntu-22.04-amd64.charm \ - ${{ steps.runner-name.outputs.name }} \ - --base ubuntu@22.04 \ - --config path=${{ secrets.E2E_TESTING_REPO }} \ - --config token=${{ secrets.E2E_TESTING_TOKEN }} \ - --config virtual-machines=1 \ - --config denylist=10.0.0.0/8 \ - --config test-mode=insecure - - - name: Checkout branch (Issues, Schedule) - if: matrix.event.name == 'issues' || matrix.event.name == 'schedule' - uses: actions/checkout@v4 - with: - ref: ${{ github.head_ref }} - token: ${{ secrets.E2E_TESTING_TOKEN }} - - name: Create temporary orphan branch (Issues, Schedule) - if: matrix.event.name == 'issues' || matrix.event.name == 'schedule' - run: | - # We dont need all content for the test, so create an orphan branch. - git checkout --orphan ${{ steps.runner-name.outputs.name }} - git reset - - WF_FILE=".github/workflows/schedule_issues_test.yaml" - # Replace workflow event in schedule_issues_test.yaml - if [[ ${{ matrix.event.name }} == 'schedule' ]]; then - sed -i "s/workflow_dispatch:/schedule:\n - cron: '*\/5 * * * *'/" $WF_FILE - else - sed -i "s/workflow_dispatch:/issues:\n types: [opened]/" $WF_FILE - fi - git add $WF_FILE - git config user.name github-actions - git config user.email github-actions@github.com - git commit -m"Add ${{matrix.event.name}} workflow" - git push origin ${{ steps.runner-name.outputs.name }} - - name: Deploy github-runner Charm (Issues, Schedule) - if: matrix.event.name == 'issues' || matrix.event.name == 'schedule' - env: - GH_TOKEN: ${{ secrets.E2E_TESTING_TOKEN }} - run: | - # GitHub does not allow to create multiple forks of the same repo under the same user, - # so we need to create a new repository and push the branch to it. - gh api \ - --method POST \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - /user/repos \ - -f name=${{ steps.runner-name.outputs.name }} - - TESTING_REPO=${{ secrets.E2E_TESTING_TOKEN_ORG }}/${{ steps.runner-name.outputs.name }} - - # Create registration token in order to allow listing of runner binaries - gh api \ - --method POST \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - repos/${TESTING_REPO}/actions/runners/registration-token - - # Push the orphan branch to the newly created repo. - git pull origin ${{ steps.runner-name.outputs.name }} - git remote add testing https://github.com/${TESTING_REPO}.git - git push testing ${{ steps.runner-name.outputs.name }}:main - - juju deploy /home/$USER/github-runner_ubuntu-22.04-amd64.charm \ - ${{ steps.runner-name.outputs.name }} \ - --base ubuntu@22.04 \ - --config path=$TESTING_REPO \ - --config token=${{ secrets.E2E_TESTING_TOKEN }} \ - --config virtual-machines=1 \ - --config denylist=10.0.0.0/8 \ - --config test-mode=insecure - - - name: Watch github-runner (Pull Request) - if: matrix.event.name == 'pull_request' - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - timeout-minutes: 30 - run: | - juju debug-log --replay --tail & - - while :; do - JOBS=$(gh api \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - /repos/${{ secrets.E2E_TESTING_REPO }}/actions/runs/$GITHUB_RUN_ID/attempts/$GITHUB_RUN_ATTEMPT/jobs) - CONCLUSION=$(echo $JOBS | jq -r '.jobs[] | select(.name == "End-to-End Test / End-to-End Test Run") | .conclusion') - STATUS=$(echo $JOBS | jq -r '.jobs[] | select(.name == "End-to-End Test / End-to-End Test Run") | .status') - if [[ $STATUS != "queued" && $STATUS != "in_progress" ]]; then - break - fi - sleep 10 - done - if [[ $STATUS != "completed" || $CONCLUSION != "success" ]]; then - echo "test workflow failed with status: $STATUS, conclusion: $CONCLUSION" - kill $(jobs -p) - exit 1 - fi - - - name: Trigger workflow (Workflow Dispatch and Push) - if: matrix.event.name == 'workflow_dispatch' || matrix.event.name == 'push' - env: - # push requires E2E_TESTING_TOKEN, because if GITHUB_TOKEN is used, no workflow is triggered for a push: - # https://docs.github.com/en/actions/using-workflows/triggering-a-workflow#triggering-a-workflow-from-a-workflow - GH_TOKEN: ${{ matrix.event.name == 'workflow_dispatch' && secrets.GITHUB_TOKEN || secrets.E2E_TESTING_TOKEN }} - run: | - # Base any future branches on the current branch. - REF_SHA=$(gh api \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - /repos/${{ secrets.E2E_TESTING_REPO }}/git/ref/heads/$GITHUB_REF_NAME \ - --jq .object.sha) || (echo "Failed to get REF_SHA using $GITHUB_REF_NAME" && false) - - # Create a temporary reference/branch - # For push, this should trigger the "Push Event Tests" workflow automatically - # because the test is run for branches matching the pattern "push-e2e-*" - gh api \ - --method POST \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - /repos/${{ secrets.E2E_TESTING_REPO }}/git/refs \ - -f ref='refs/heads/${{ steps.runner-name.outputs.name }}' \ - -f sha=$REF_SHA - - # For workflow_dispatch, we need to trigger the "Workflow Dispatch Tests" workflow manually - if ${{ matrix.event.name == 'workflow_dispatch' }}; then - gh workflow run workflow_dispatch_test.yaml \ - -R ${{ secrets.E2E_TESTING_REPO }} \ - --ref ${{ steps.runner-name.outputs.name }} \ - -f runner=${{ steps.runner-name.outputs.name }} - fi - - - name: Watch github-runner (Workflow Dispatch and Push) - if: matrix.event.name == 'workflow_dispatch' || matrix.event.name == 'push' - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - juju debug-log --replay --tail & - - get-workflow-status() { - # Search recent workflow runs for the one designated by the run-id ref - output=$(gh run list \ - -R ${{ secrets.E2E_TESTING_REPO }} \ - -L 100 \ - --json headBranch,status \ - --jq '[.[] | select(.headBranch=="${{ steps.runner-name.outputs.name }}")]') - - # Workflows that have not started have no status - if [ $(echo "$output" | jq 'length') -eq 0 ] - then - echo "not_started" - else - # Parse output with jq to get the status field of the first object - status=$(echo "$output" | jq -r '.[0].status') - echo "$status" - fi - } - - # Wait for the workflow to start while checking its status - for i in {1..360} - do - status=$(get-workflow-status) - echo "workflow status: $status" - if [[ $status != "not_started" && $status != "queued" && $status != "in_progress" ]]; then - break - fi - sleep 10 - done - - # Make sure the workflow was completed or else consider it failed - conclusion=$(gh run list \ - -R ${{ secrets.E2E_TESTING_REPO }} \ - -L 100 \ - --json headBranch,conclusion \ - --jq '.[] | select(.headBranch=="${{ steps.runner-name.outputs.name }}") | .conclusion') - - if [[ $status != "completed" || $conclusion != "success" ]]; then - echo "test workflow failed with status: $status, conclusion: $conclusion" - kill $(jobs -p) - exit 1 - else - echo "Workflow completed with status: $status, conclusion: $conclusion, run-id: ${{ steps.runner-name.outputs.name }}" - kill $(jobs -p) - fi - - - name: Trigger workflow and watch github-runner (Issues, Schedule) - if: matrix.event.name == 'issues' || matrix.event.name == 'schedule' - env: - GH_TOKEN: ${{ secrets.E2E_TESTING_TOKEN }} - run: | - juju debug-log --replay --tail & - - TESTING_REPO=${{ secrets.E2E_TESTING_TOKEN_ORG }}/${{ steps.runner-name.outputs.name }} - - # For issues, we need to trigger the workflow by opening an issue - if ${{ matrix.event.name == 'issues' }}; then - gh api \ - --method POST \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - /repos/${TESTING_REPO}/issues \ - -f title="Test issue ${{ steps.runner-name.outputs.name }}" - fi - - get-workflow-status() { - # Search recent workflow runs for the one designated by the run-id ref - output=$(gh run list \ - -R ${TESTING_REPO} \ - -L 100 \ - --json headBranch,status,createdAt \ - --jq '[.[] | select(.headBranch=="main")] | sort_by(.createdAt)') - - # Workflows that have not started have no status - if [ $(echo "$output" | jq 'length') -eq 0 ] - then - echo "not_started" - else - # Parse output with jq to get the status field of the first object - status=$(echo "$output" | jq -r '.[0].status') - echo "$status" - fi - } - - # Wait for the workflow to start while checking its status - for i in {1..360} - do - status=$(get-workflow-status) - echo "workflow status: $status" - if [[ $status != "not_started" && $status != "queued" && $status != "in_progress" ]]; then - break - fi - sleep 10 - done - - # Make sure the workflow was completed or else consider it failed - runs=$(gh api \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - /repos/${TESTING_REPO}/actions/runs \ - --jq '[.workflow_runs[] | select(.head_branch=="main")] | sort_by(.created_at)') - conclusion=$(echo $runs | jq -r '.[0].conclusion') - wf_run_id=$(echo $runs | jq -r '.[0].id') - - logs_filename=${{matrix.event.name}}-workflow-logs.zip - # We retrieve the logs because the testing repo is deleted at the end of the test - gh api \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - /repos/${TESTING_REPO}/actions/runs/${wf_run_id}/logs > ${logs_filename} \ - || (echo "Failed to retrieve logs from schedule tests" && rm ${logs_filename}) - - - if [[ $status != "completed" || $conclusion != "success" ]]; then - echo "test workflow failed with status: $status, conclusion: $conclusion" - kill $(jobs -p) - exit 1 - else - echo "Workflow completed with status: $status, conclusion: $conclusion, run-id: ${{ steps.runner-name.outputs.name }}" - kill $(jobs -p) - fi - - name: Upload test logs (Issues, Schedule) - if: always() && (matrix.event.name == 'issues' || matrix.event.name == 'schedule') - uses: actions/upload-artifact@v4 - with: - name: ${{matrix.event.name}}-workflow-logs.zip - path: ${{matrix.event.name}}-workflow-logs.zip - if-no-files-found: ignore - - - name: Show Firewall Rules - run: | - juju ssh ${{ steps.runner-name.outputs.name }}/0 sudo nft list ruleset - - - name: Clean Up (Workflow Dispatch and Push) - if: always() && (matrix.event.name == 'workflow_dispatch' || matrix.event.name == 'push') - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - gh api \ - --method DELETE \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - "/repos/${{ secrets.E2E_TESTING_REPO }}/git/refs/heads/${{ steps.runner-name.outputs.name }}" - echo "Deleted ref ${{ steps.runner-name.outputs.name }}" - - - name: Clean Up (Issues, Schedule) - if: always() && (matrix.event.name == 'issues' || matrix.event.name == 'schedule') - env: - GH_TOKEN: ${{ secrets.E2E_TESTING_TOKEN }} - run: | - set +e - - gh api \ - --method DELETE \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - "/repos/${{ secrets.E2E_TESTING_REPO }}/git/refs/heads/${{ steps.runner-name.outputs.name }}" \ - && echo "Deleted ref ${{ steps.runner-name.outputs.name }}" - - TESTING_REPO=${{ secrets.E2E_TESTING_TOKEN_ORG }}/${{ steps.runner-name.outputs.name }} - - set -e - - gh api \ - --method DELETE \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - "/repos/${TESTING_REPO}" - - echo "Deleted repo ${TESTING_REPO}" - - e2e-test: - name: End-to-End Test - needs: [build-charm, run-id] - uses: ./.github/workflows/e2e_test_run.yaml - with: - runner-tag: "pr-${{ needs.run-id.outputs.run-id }}${{ github.run_attempt}}" - runner-virt-type: "lxd" - - required_status_checks: - name: Required E2E Test Status Checks - runs-on: ubuntu-latest - needs: - - deploy-e2e-test-runner - if: always() && !cancelled() - timeout-minutes: 5 - steps: - - run: | - [ '${{ needs.deploy-e2e-test-runner.result }}' = 'success' ] || (echo deploy-e2e-test-runner failed && false) diff --git a/.github/workflows/scheduled_integration_test.yaml b/.github/workflows/scheduled_integration_test.yaml deleted file mode 100644 index e4e43dd87..000000000 --- a/.github/workflows/scheduled_integration_test.yaml +++ /dev/null @@ -1,34 +0,0 @@ -name: Scheduled integration tests - -# Development shifts toward OpenStack support. -# These tests is for local LXD and is ran weekly. - -on: - schedule: - - cron: "15 5 * * 2" - - workflow_dispatch: - -jobs: - # test option values defined at test/conftest.py are passed on via repository secret - # INTEGRATION_TEST_ARGS to operator-workflows automatically. - integration-tests-juju36: - name: Integration test with juju 3.6 - uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main - secrets: inherit - with: - juju-channel: 3.6/stable - pre-run-script: scripts/pre-integration-test.sh - provider: lxd - test-tox-env: integration-juju3.6 - modules: '["test_charm_base_image", "test_charm_fork_repo", "test_charm_no_runner", "test_charm_scheduled_events", "test_charm_lxd_runner", "test_charm_runner", "test_charm_metrics_success", "test_charm_metrics_failure", "test_self_hosted_runner", "test_charm_with_proxy", "test_charm_with_juju_storage", "test_debug_ssh", "test_charm_upgrade"]' - integration-tests: - name: Integration test with juju 3.1 - uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main - secrets: inherit - with: - juju-channel: 3.1/stable - pre-run-script: scripts/pre-integration-test.sh - provider: lxd - test-tox-env: integration-juju3.1 - modules: '["test_charm_base_image", "test_charm_fork_repo", "test_charm_no_runner", "test_charm_scheduled_events", "test_charm_lxd_runner", "test_charm_runner", "test_charm_metrics_success", "test_charm_metrics_failure", "test_self_hosted_runner", "test_charm_with_proxy", "test_charm_with_juju_storage", "test_debug_ssh", "test_charm_upgrade"]' diff --git a/.github/workflows/workflow_dispatch_ssh_debug.yaml b/.github/workflows/workflow_dispatch_ssh_debug.yaml index 7dbc9a79a..edbfb3920 100644 --- a/.github/workflows/workflow_dispatch_ssh_debug.yaml +++ b/.github/workflows/workflow_dispatch_ssh_debug.yaml @@ -13,5 +13,5 @@ jobs: runs-on: [self-hosted, linux, "${{ inputs.runner }}"] steps: - name: Setup tmate session - uses: canonical/action-tmate@chore/env_var_change + uses: canonical/action-tmate@main timeout-minutes: 5 diff --git a/.gitignore b/.gitignore index 02e9d4d40..3e550731e 100644 --- a/.gitignore +++ b/.gitignore @@ -6,10 +6,6 @@ placeholders/ build/ .coverage -# testing artifacts -lxd-profile.* -tinyproxy.conf - # development artifacts clouds.yaml .vscode/ diff --git a/README.md b/README.md index 21f7c79b2..27c8f424d 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,9 @@ [![Promote charm](https://github.com/canonical/github-runner-operator/actions/workflows/promote_charm.yaml/badge.svg)](https://github.com/canonical/github-runner-operator/actions/workflows/promote_charm.yaml) [![Discourse Status](https://img.shields.io/discourse/status?server=https%3A%2F%2Fdiscourse.charmhub.io&style=flat&label=CharmHub%20Discourse)](https://discourse.charmhub.io) -This machine charm creates self-hosted runners for running GitHub Actions. Each unit of this charm will start a configurable number of OpenStack or LXD based virtual machines to host them. Every runner performs only one job, after which it unregisters from GitHub to ensure that each job runs in a clean environment. +**Important** Use the [local-lxd](https://charmhub.io/github-runner?channel=local-lxd/stable) track for LXD runners. + +This machine charm creates self-hosted runners for running GitHub Actions. Each unit of this charm will start a configurable number of OpenStack based virtual machines to host them. Every runner performs only one job, after which it unregisters from GitHub to ensure that each job runs in a clean environment. The charm will periodically check the number of runners and spawn or destroy runners as necessary to match the number provided by configuration of runners. Both the reconciliation interval and the number of runners to maintain are configurable. @@ -17,10 +19,7 @@ For information about how to deploy, integrate, and manage this charm, see the o ## Get started -In order to get familiar with the charm, it is recommended to follow the [GitHub Runner tutorial](https://charmhub.io/github-runner/docs/tutorial-quick-start) which will guide you through the process of deploying the charm -and executing a workflow job using GitHub actions. - -For more information about a production deployment, the how-to-guide +For information about a production deployment, the how-to-guide [How to spawn OpenStack runner](https://charmhub.io/github-runner/docs/how-to-openstack-runner) can be useful. ### Basic operations diff --git a/charmcraft.yaml b/charmcraft.yaml index a395fac8b..22e2eba24 100644 --- a/charmcraft.yaml +++ b/charmcraft.yaml @@ -12,15 +12,6 @@ parts: - libssl-dev # for cryptography - rust-all # for cryptography - pkg-config # for cryptography - scripts: - plugin: dump - source: scripts - organize: - build-lxd-image.sh: scripts/build-lxd-image.sh - reactive_runner.py: scripts/reactive_runner.py - repo_policy_compliance_service.py: scripts/repo_policy_compliance_service.py - prime: - - scripts/ bases: - build-on: - name: "ubuntu" diff --git a/config.yaml b/config.yaml index a3036c0f1..cb4e6f4cd 100644 --- a/config.yaml +++ b/config.yaml @@ -2,20 +2,6 @@ # See LICENSE file for licensing details. options: - base-image: - type: string - default: "jammy" - description: >- - The base ubuntu OS image to use for the runners. Codename (e.g. "jammy") or version tag - (e.g. 22.04) is supported as input. Currently only supports LTS versions of jammy and higher, - i.e. jammy, noble. Only applicable for LXD mode. - denylist: - type: string - default: "" - description: >- - A comma separated list of IPv4 networks in CIDR notation that runners can not access. - The runner will always have access to essential services such as DHCP and DNS regardless - of the denylist configuration. dockerhub-mirror: type: string default: "" @@ -27,9 +13,8 @@ options: type: string default: "" description: >- - Providing a valid clouds.yaml will enable OpenStack integration. Setting the - openstack-clouds-yaml would enable spawning runners on OpenStack, and disable usage of - local LXD for runners. The format for the clouds.yaml is described in the docs: + The clouds.yaml yaml necessary for OpenStack integration. + The format for the clouds.yaml is described in the docs: (https://docs.openstack.org/python-openstackclient/pike/configuration/index.html#clouds-yaml). openstack-flavor: type: string @@ -100,15 +85,6 @@ options: The URL to the repository-policy-compliance service. This option requires the repo-policy-compliance-token to be set. If not set, the repository-policy-compliance service will not be used. This option is only supported when using OpenStack Cloud. - runner-storage: - type: string - default: "juju-storage" - description: >- - The storage for runner LXD instance to mount as disk. Can be "memory" or "juju-storage". If - set to "memory", memory-based storage created with tmpfs will be used as disk for LXD - instances. If set to "juju-storage", storage provided by juju will be used as disk for LXD - instances. This configuration cannot be changed after deployment. The charm needs to be - redeployed to change the storage option. test-mode: type: string description: >- @@ -128,20 +104,3 @@ options: description: >- The number of virtual machine runners. This charm will spawn or destroy virtual machines runners to match this setting. - vm-cpu: - type: int - default: 2 - description: >- - The number of CPUs used per virtual machine runner. - vm-memory: - type: string - default: 7GiB - description: >- - Amount of memory to allocate per virtual machine runner. Positive integers with KiB, MiB, GiB, - TiB, PiB, EiB suffix. - vm-disk: - type: string - default: 10GiB - description: >- - Amount of disk space to allocate to root disk for virtual machine runner. Positive integers - with KiB, MiB, GiB, TiB, PiB, EiB suffix. diff --git a/docs/explanation/charm-architecture.md b/docs/explanation/charm-architecture.md index 5bf4cf9aa..050374754 100644 --- a/docs/explanation/charm-architecture.md +++ b/docs/explanation/charm-architecture.md @@ -4,50 +4,24 @@ A [Juju](https://juju.is/) [charm](https://juju.is/docs/olm/charmed-operators) t Conceptually, the charm can be divided into the following: -- Management of LXD ephemeral virtual machines to host [ephemeral self-hosted runners](https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/autoscaling-with-self-hosted-runners#using-ephemeral-runners-for-autoscaling) +- Management of OpenStack virtual machines to host self-hosted runners - Management of the virtual machine image - Management of the network - GitHub API usage - Management of [Python web service for checking GitHub repository settings](https://github.com/canonical/repo-policy-compliance) - Management of dependencies -## LXD ephemeral virtual machines +## Virtual machines -To ensure a clean and isolated environment for every runner, self-hosted runners use LXD virtual machines. The charm spawns virtual machines, setting resources based on charm configurations. The self-hosted runners start with the ephemeral option and will clean themselves up once the execution has finished, freeing the resources. This is [similar to how GitHub hosts their runners due to security concerns](https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners#self-hosted-runner-security). +To ensure a clean and isolated environment for every runner, self-hosted runners use OpenStack virtual machines. The charm spawns virtual machines, setting resources based on charm configurations. Virtual machines will not be reused between jobs, this is [similar to how GitHub hosts their runners due to security concerns](https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners#self-hosted-runner-security). As the virtual machines are single-use, the charm will replenish virtual machines on a regular schedule. This time period is determined by the [`reconcile-interval` configuration](https://charmhub.io/github-runner/configure#reconcile-interval). On schedule or upon configuration change, the charm performs a reconcile to ensure the number of runners managed by the charm matches the [`virtual-machines` configuration](https://charmhub.io/github-runner/configure#virtual-machines), and the resources used by the runners match the various resource configurations. -To prevent disk IO exhaustion on the Juju machine on disk-intensive GitHub workflows, the charm has two storage options provided by [`runner-storage` configuration](https://charmhub.io/github-runner/configure#runner-storage): - -- Use memory of the Juju machine as disk. Under this option, the [`vm-disk` configuration](https://charmhub.io/github-runner/configure#vm-disk) can impact the memory usage of the Juju machine. -- Use storage mount by Juju as the disk. - ## Virtual machine image -The virtual machine images are built on installation and on a schedule every 6 hours. These images are constructed by launching a virtual machine instance, modifying the instance with configurations and software installs, and then exporting the instance as an image. This process reduces the time needed to launch a virtual machine instance for hosting the self-hosted runner application. - -The software installed in the image includes: - -- APT packages: - - docker.io - - npm - - python3-pip - - shellcheck - - jq - - wget -- npm packages: - - yarn -- Binary downloaded: - - yq - -The configurations applied in the image include: - -- Creating a group named `microk8s`. -- Adding the `ubuntu` user to the `microk8s` group. Note that the `microk8s` package is not installed in the image; this preconfigures the group for users who install the package. -- Adding the `ubuntu` user to the `docker` group. -- Adding iptables rules to accept traffic for the DOCKER-USER chain. This resolves a networking conflict with LXD. +The virtual machine images are built on installation and on a schedule using the [github-runner-image-builder](https://github.com/canonical/github-runner-image-builder). ## Network configuration @@ -72,10 +46,6 @@ It's worth noting that this setup deviates from the behaviour when not using apr where these variables are set in the runner environment. In that scenario, traffic to non-standard ports would also be directed to the HTTP(s) proxy, unlike when using aproxy. -### denylist - -The nftables on the Juju machine are configured to deny traffic from the runner virtual machine to IPs on the [`denylist` configuration](https://charmhub.io/github-runner/configure#denylist). The runner will always have access to essential services such as DHCP and DNS, regardless of the denylist configuration. - ## GitHub API usage The charm requires a GitHub personal access token for the [`token` configuration](https://charmhub.io/github-runner/configure#token). This token is used for: @@ -97,32 +67,6 @@ The [repo-policy-compliance](https://github.com/canonical/repo-policy-compliance Using the [pre-job script](https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/running-scripts-before-or-after-a-job#about-pre--and-post-job-scripts), the self-hosted runners call the Python web service to check if the GitHub repository settings for the job are compliant. If not compliant, it will output an error message and force stop the runner to prevent code from being executed. -## Dependencies management - -Upon installing or upgrading the charm, the kernel will be upgraded, and the Juju machine will be restarted if needed. - -The charm installs the following dependencies: - -- For running repo-policy-compliance - - gunicorn -- For firewall to prevent runners from accessing web service on the denylist - - nftables -- For virtualization and virtual machine management - - lxd - - cpu-checker - - libvirt-clients - - libvirt-daemon-driver-qemu - - apparmor-utils - -These dependencies can be regularly updated using the [landscape-client charm](https://charmhub.io/landscape-client). - -The charm installs the following dependencies and regularly updates them: - -- repo-policy-compliance -- GitHub self-hosted runner application - -The charm checks if the installed versions are the latest and performs upgrades if needed before creating new virtual machines for runners. - ## COS Integration Upon integration through the `cos-agent`, the charm initiates the logging of specific metric events into the file `/var/log/github-runner-metrics.log`. For comprehensive details, please refer to the diff --git a/docs/explanation/ssh-debug.md b/docs/explanation/ssh-debug.md index 617ea5d71..568465cfc 100644 --- a/docs/explanation/ssh-debug.md +++ b/docs/explanation/ssh-debug.md @@ -10,9 +10,3 @@ by default on [tmate-ssh-server charm](https://charmhub.io/tmate-ssh-server/). Authorized keys are registered via [action-tmate](https://github.com/canonical/action-tmate/)'s `limit-access-to-actor` feature. This feature uses GitHub users's SSH key to launch an instance of tmate session with `-a` option, which adds the user's SSH key to `~/.ssh/authorized_keys`. - -### Firewall rules - -By default, if there are any overlapping IPs within the `denylist` config option with the IP -assigned to `tmate-ssh-server`, an exception to that IP will be made so that the `debug-ssh` -relation can be set up correctly. diff --git a/docs/index.md b/docs/index.md index b11d83a63..85a26b43e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,6 +1,6 @@ # GitHub Runner Operator -A [Juju](https://juju.is/) [charm](https://juju.is/docs/olm/charmed-operators) for deploying and managing [GitHub self-hosted runners](https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners) on virtual machines. The charm maintains a set of ephemeral self-hosted runners, each isolated in a single-use virtual machine instance. +A [Juju](https://juju.is/) [charm](https://juju.is/docs/olm/charmed-operators) for deploying and managing [GitHub self-hosted runners](https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners) on virtual machines. The charm maintains a set of self-hosted runners, each isolated in a single-use virtual machine instance. Like any Juju charm, this charm supports one-line deployment, configuration, integration, scaling, and more. For the github-runner-operator charm, this includes: @@ -21,8 +21,10 @@ The charm enforces a set of GitHub repository settings as best practice. This is | | | |--|--| -| [Tutorials](https://charmhub.io/github-runner/docs/quick-start)
Get started - a hands-on introduction to using the GitHub runner charm for new users
| [How-to guides](https://charmhub.io/github-runner/docs/run-on-lxd)
Step-by-step guides covering key operations and common tasks | -| [Reference](https://charmhub.io/github-runner/docs/actions)
Technical information - specifications, APIs, architecture | [Explanation](https://charmhub.io/github-runner/docs/charm-architecture)
Concepts - discussion and clarification of key topics | +| [Overview](https://charmhub.io/github-runner)
Overview of the charm
| [How-to guides](https://charmhub.io/github-runner/docs/how-to-openstack-runner)
Step-by-step guides covering key operations and common tasks | +| [Reference](https://charmhub.io/github-runner/docs/reference-actions)
Technical information - specifications, APIs, architecture | [Explanation](https://charmhub.io/github-runner/docs/explanation-charm-architecture)
Concepts - discussion and clarification of key topics | + +If you want to use ephemeral LXD virtual machines spawned by charm, you can refer to the section [Track local-lxd](https://charmhub.io/github-runner/docs/local-lxd). ## Contributing to this documentation @@ -43,33 +45,53 @@ Thinking about using the GitHub runner charm for your next project? [Get in touc # Contents -1. [Tutorial](tutorial) - 1. [Managing resource usage](tutorial/managing-resource-usage.md) - 1. [Quick start](tutorial/quick-start.md) 1. [How to](how-to) 1. [Add custom labels](how-to/add-custom-labels.md) 1. [Change repository or organization](how-to/change-path.md) 1. [Change GitHub personal access token](how-to/change-token.md) 1. [Comply with security requirements](how-to/comply-security.md) - 1. [Restrict self-hosted runner network access](how-to/configure-denylist.md) - 1. [Configure runner storage](how-to/configure-runner-storage.md) 1. [Contribute](how-to/contribute.md) 1. [Debug with SSH](how-to/debug-with-ssh.md) - 1. [Deploy on ARM64](how-to/deploy-on-arm64.md) 1. [Integrate with COS](how-to/integrate-with-cos.md) 1. [Spawn OpenStack runner](how-to/openstack-runner.md) 1. [Comply with repository policies](how-to/repo-policy.md) - 1. [Run on LXD cloud](how-to/run-on-lxd.md) - 1. [Set base image](how-to/set-base-image.md) 1. [Set up reactive spawning](how-to/reactive.md) 1. [Reference](reference) 1. [Actions](reference/actions.md) - 1. [ARM64](reference/arm64.md) 1. [Configurations](reference/configurations.md) 1. [COS Integration](reference/cos.md) 1. [External Access](reference/external-access.md) 1. [Integrations](reference/integrations.md) 1. [Token scopes](reference/token-scopes.md) 1. [Explanation](explanation) - 1. [ARM64](explanation/arm64.md) 1. [Charm architecture](explanation/charm-architecture.md) + 1. [SSH Debug](explanation/ssh-debug.md) +1. [Track local-lxd](local-lxd) + 1. [Tutorial](local-lxd/tutorial) + 1. [Managing resource usage](local-lxd/tutorial/managing-resource-usage.md) + 1. [Quick start](local-lxd/tutorial/quick-start.md) + 1. [How to](local-lxd/how-to) + 1. [Add custom labels](local-lxd/how-to/add-custom-labels.md) + 1. [Change repository or organization](local-lxd/how-to/change-path.md) + 1. [Change GitHub personal access token](local-lxd/how-to/change-token.md) + 1. [Comply with security requirements](local-lxd/how-to/comply-security.md) + 1. [Restrict self-hosted runner network access](local-lxd/how-to/configure-denylist.md) + 1. [Configure runner storage](local-lxd/how-to/configure-runner-storage.md) + 1. [Debug with SSH](local-lxd/how-to/debug-with-ssh.md) + 1. [Deploy on ARM64](local-lxd/how-to/deploy-on-arm64.md) + 1. [Integrate with COS](local-lxd/how-to/integrate-with-cos.md) + 1. [Comply with repository policies](local-lxd/how-to/repo-policy.md) + 1. [Run on LXD cloud](local-lxd/how-to/run-on-lxd.md) + 1. [Set base image](local-lxd/how-to/set-base-image.md) + 1. [Reference](local-lxd/reference) + 1. [Actions](local-lxd/reference/actions.md) + 1. [ARM64](local-lxd/reference/arm64.md) + 1. [Configurations](local-lxd/reference/configurations.md) + 1. [COS Integration](local-lxd/reference/cos.md) + 1. [External Access](local-lxd/reference/external-access.md) + 1. [Integrations](local-lxd/reference/integrations.md) + 1. [Token scopes](local-lxd/reference/token-scopes.md) + 1. [Explanation](local-lxd/explanation) + 1. [ARM64](local-lxd/explanation/arm64.md) + 1. [Charm architecture](local-lxd/explanation/charm-architecture.md) + 1. [SSH Debug](local-lxd/explanation/ssh-debug.md) diff --git a/docs/explanation/arm64.md b/docs/local-lxd/explanation/arm64.md similarity index 100% rename from docs/explanation/arm64.md rename to docs/local-lxd/explanation/arm64.md diff --git a/docs/local-lxd/explanation/charm-architecture.md b/docs/local-lxd/explanation/charm-architecture.md new file mode 100644 index 000000000..345733810 --- /dev/null +++ b/docs/local-lxd/explanation/charm-architecture.md @@ -0,0 +1,130 @@ +# Charm architecture + +Conceptually, the charm can be divided into the following: + +- Management of LXD ephemeral virtual machines to host [ephemeral self-hosted runners](https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/autoscaling-with-self-hosted-runners#using-ephemeral-runners-for-autoscaling) +- Management of the virtual machine image +- Management of the network +- GitHub API usage +- Management of [Python web service for checking GitHub repository settings](https://github.com/canonical/repo-policy-compliance) +- Management of dependencies + +## LXD ephemeral virtual machines + +To ensure a clean and isolated environment for every runner, self-hosted runners use LXD virtual machines. The charm spawns virtual machines, setting resources based on charm configurations. The self-hosted runners start with the ephemeral option and will clean themselves up once the execution has finished, freeing the resources. This is similar to [how GitHub hosts their runners due to security concerns](https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners#self-hosted-runner-security). + +As the virtual machines are single-use, the charm will replenish virtual machines on a regular schedule. This time period is determined by the [`reconcile-interval` configuration](https://charmhub.io/github-runner/configure#reconcile-interval). + +On schedule or upon configuration change, the charm performs a reconcile to ensure the number of runners managed by the charm matches the [`virtual-machines` configuration](https://charmhub.io/github-runner/configure#virtual-machines), and the resources used by the runners match the various resource configurations. + +To prevent disk IO exhaustion on the Juju machine on disk-intensive GitHub workflows, the charm has two storage options provided by [`runner-storage` configuration](https://charmhub.io/github-runner/configure#runner-storage): + +- Use memory of the Juju machine as disk. Under this option, the [`vm-disk` configuration](https://charmhub.io/github-runner/configure#vm-disk) can impact the memory usage of the Juju machine. +- Use storage mount by Juju as the disk. + +## Virtual machine image + +The virtual machine images are built on installation and on a schedule every 6 hours. These images are constructed by launching a virtual machine instance, modifying the instance with configurations and software installs, and then exporting the instance as an image. This process reduces the time needed to launch a virtual machine instance for hosting the self-hosted runner application. + +The software installed in the image includes: + +- APT packages: + - `docker.io` + - `npm` + - `python3-pip` + - `shellcheck` + - `jq` + - `wget` +- npm packages: + - `yarn` +- Binary downloaded: + - `yq` + +The configurations applied in the image include: + +- Creating a group named `microk8s`. +- Adding the `ubuntu` user to the `microk8s` group. Note that the `microk8s` package is not installed in the image; this preconfigures the group for users who install the package. +- Adding the `ubuntu` user to the `docker` group. +- Adding iptables rules to accept traffic for the DOCKER-USER chain. This resolves a networking conflict with LXD. + +## Network configuration + +The charm respects the HTTP(S) proxy configuration of the model configuration of Juju. The configuration can be set with [`juju model-config`](https://juju.is/docs/juju/juju-model-config) using the following keys: `juju-http-proxy`, `juju-https-proxy`, `juju-no-proxy`. +The GitHub self-hosted runner applications will be configured to utilize the proxy configuration. +This involves setting environment variables such as `http_proxy`, `https_proxy`, `no_proxy`, `HTTP_PROXY`, `HTTPS_PROXY`, and `NO_PROXY` +in various locations within the runner environment, such as `/etc/environment`. + +However, employing this approach with environment variables has its drawbacks. +Not all applications within a workflow may adhere to these variables as they +[lack standardization](https://about.gitlab.com/blog/2021/01/27/we-need-to-talk-no-proxy/). +This inconsistency can result in failed workflows, prompting the introduction of aproxy, as detailed in the subsection below. + +### aproxy +If the proxy configuration is utilized and [aproxy](https://github.com/canonical/aproxy) is specified through the charm's configuration option, +all HTTP(S) requests to standard ports (80, 443) within the GitHub workflow will be automatically directed +to the specified HTTP(s) proxy. Network traffic destined for ports 80 and 443 is redirected to aproxy using iptables. +aproxy then forwards received packets to the designated HTTP(S) proxy. +Beyond that, the environment variables (`http_proxy`, `https_proxy`, `no_proxy`, `HTTP_PROXY`, `HTTPS_PROXY`, `NO_PROXY`) +will no longer be defined in the runner environment. +It's worth noting that this setup deviates from the behavior when not using aproxy, +where these variables are set in the runner environment. In that scenario, traffic to non-standard ports +would also be directed to the HTTP(s) proxy, unlike when using aproxy. + +### denylist + +The nftables on the Juju machine are configured to deny traffic from the runner virtual machine to IPs on the [`denylist` configuration](https://charmhub.io/github-runner/configure#denylist). The runner will always have access to essential services such as DHCP and DNS, regardless of the denylist configuration. + +## GitHub API usage + +The charm requires a GitHub personal access token for the [`token` configuration](https://charmhub.io/github-runner/configure#token). This token is used for: + +- Requesting self-hosted runner registration tokens +- Requesting self-hosted runner removal tokens +- Requesting a list of runner applications +- Requesting a list of self-hosted runners configured in an organization or repository +- Deleting self-hosted runners + +The token is also passed to [repo-policy-compliance](https://github.com/canonical/repo-policy-compliance) to access GitHub API for the service. + +Note that the GitHub API uses a [rate-limiting mechanism](https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api?apiVersion=2022-11-28). When this limit is reached, the charm may not be able to perform the necessary operations and may go into +BlockedStatus. The charm will automatically recover from this state once the rate limit is reset, but using a different token with a higher rate limit may be a better solution depending on your deployment requirements. + +## GitHub repository setting check + +The [repo-policy-compliance](https://github.com/canonical/repo-policy-compliance) is a [Flask application](https://flask.palletsprojects.com/) hosted on [Gunicorn](https://gunicorn.org/) that provides a RESTful HTTP API to check the settings of GitHub repositories. This ensures the GitHub repository settings do not allow the execution of code not reviewed by maintainers on the self-hosted runners. + +Using the [pre-job script](https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/running-scripts-before-or-after-a-job#about-pre--and-post-job-scripts), the self-hosted runners call the Python web service to check if the GitHub repository settings for the job are compliant. If not compliant, it will output an error message and force stop the runner to prevent code from being executed. + +## Dependencies management + +Upon installing or upgrading the charm, the kernel will be upgraded, and the Juju machine will be restarted if needed. + +The charm installs the following dependencies: + +- For running repo-policy-compliance + - gunicorn +- For firewall to prevent runners from accessing web service on the denylist + - nftables +- For virtualization and virtual machine management + - lxd + - cpu-checker + - libvirt-clients + - libvirt-daemon-driver-qemu + - apparmor-utils + +These dependencies can be regularly updated using the [`landscape-client` charm](https://charmhub.io/landscape-client). + +The charm installs the following dependencies and regularly updates them: + +- repo-policy-compliance +- GitHub self-hosted runner application + +The charm checks if the installed versions are the latest and performs upgrades if needed before creating new virtual machines for runners. + +## COS Integration +Upon integration through the `cos-agent`, the charm initiates the logging of specific metric events +into the file `/var/log/github-runner-metrics.log`. For comprehensive details, please refer to the +pertinent [specification](https://discourse.charmhub.io/t/specification-isd075-github-runner-cos-integration/12084). + +Subsequently, the `grafana-agent` transmits this log file to Loki, facilitating access for Grafana's visualization capabilities. +Notably, most events are transmitted during reconciliation. This approach prioritizes long-term monitoring over real-time updates, aligning with the intended monitoring objectives. diff --git a/docs/local-lxd/explanation/ssh-debug.md b/docs/local-lxd/explanation/ssh-debug.md new file mode 100644 index 000000000..be64c96af --- /dev/null +++ b/docs/local-lxd/explanation/ssh-debug.md @@ -0,0 +1,18 @@ +# SSH Debug + +SSH debugging allows a user to identify and resolve issues or errors that occur through the secure +shell (SSH) connection between a client and a server. + +To enhance the security of the runner and the infrastructure behind the runner, only user ssh-keys +registered on [Authorized Keys](https://github.com/tmate-io/tmate-ssh-server/pull/93) are allowed +by default on [tmate-ssh-server charm](https://charmhub.io/tmate-ssh-server/). + +Authorized keys are registered via [action-tmate](https://github.com/canonical/action-tmate/)'s +`limit-access-to-actor` feature. This feature uses a GitHub users's SSH key to launch an instance +of tmate session with `-a` option, which adds the user's SSH key to `~/.ssh/authorized_keys`. + +### Firewall rules + +By default, if there are any overlapping IPs within the `denylist` config option with the IP +assigned to `tmate-ssh-server`, an exception to that IP will be made so that the `debug-ssh` +relation can be set up correctly. diff --git a/docs/local-lxd/how-to/add-custom-labels.md b/docs/local-lxd/how-to/add-custom-labels.md new file mode 100644 index 000000000..6882628a4 --- /dev/null +++ b/docs/local-lxd/how-to/add-custom-labels.md @@ -0,0 +1,15 @@ +# How to add custom labels + +This charm supports adding custom labels to the runners. + +By using [`juju config`](https://juju.is/docs/juju/juju-config) to change the +[charm configuration labels](https://charmhub.io/github-runner/configure#labels), additional +custom labels can be attached to the self-hosted runners. + +```shell +juju config labels= +``` + +Examples of a `COMMA_SEPARATED_LABELS` value include "large,gpu" and "small,arm64". +Accepted values are alphanumeric values with underscores (_); whitespaces before and after the the +word will be automatically trimmed. diff --git a/docs/local-lxd/how-to/change-path.md b/docs/local-lxd/how-to/change-path.md new file mode 100644 index 000000000..30965a2d5 --- /dev/null +++ b/docs/local-lxd/how-to/change-path.md @@ -0,0 +1,9 @@ +# How to change repository or organization + +This charm supports changing the GitHub repository or GitHub organization the self-hosted runners are connected to. + +By using [`juju config`](https://juju.is/docs/juju/juju-config) to change the [charm configuration path](https://charmhub.io/github-runner/configure#path) to another repository or organization, the charm unregisters and removes the old self-hosted runners and instantiates new ones for the new configuration. + +```shell +juju config path= +``` diff --git a/docs/local-lxd/how-to/change-token.md b/docs/local-lxd/how-to/change-token.md new file mode 100644 index 000000000..c058840bc --- /dev/null +++ b/docs/local-lxd/how-to/change-token.md @@ -0,0 +1,19 @@ +# How to change GitHub personal access token + +This charm supports changing the [GitHub personal access token (PAT)](https://github.com/settings/tokens) used. + +## Changing the token + +Create a new [GitHub Personal Access Token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens). + +An example classic token scope for repository use: + +- `repo` + +For managing token scopes (fine-grained token), refer to [the token scopes Reference page](https://charmhub.io/github-runner/docs/reference-token-scopes). + +By using [`juju config`](https://juju.is/docs/juju/juju-config) to change the [charm configuration token](https://charmhub.io/github-runner/configure#token) the charm unregisters and removes the old self-hosted runners and instantiates new ones. + +```shell +juju config token= +``` diff --git a/docs/local-lxd/how-to/comply-security.md b/docs/local-lxd/how-to/comply-security.md new file mode 100644 index 000000000..9368115d7 --- /dev/null +++ b/docs/local-lxd/how-to/comply-security.md @@ -0,0 +1,23 @@ +# How to comply with security requirements + +According to GitHub, running code inside the GitHub self-hosted runner [poses a significant security risk of arbitrary code execution](https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners#self-hosted-runner-security). The self-hosted runners managed by the charm are isolated in its own single-use virtual machine instance. In addition, the charm enforces some repository settings to ensure all code running on the self-hosted runners is reviewed by someone trusted. + +The repository settings are enforced with the [repo-policy-compliance Python library](https://github.com/canonical/repo-policy-compliance). The enforced rules differ depending on how the GitHub Actions workflow is triggered. The details can be found in the [README](https://github.com/canonical/repo-policy-compliance/blob/main/README.md). + +In this guide, a recommended set of policies will be presented, but any set repository settings that passes the [Python library](https://github.com/canonical/repo-policy-compliance) checks will work with the self-hosted runners managed by this charm. + +## Recommended policy + +- For outside collaborators the permission should be set to read. See [here](https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/managing-repository-settings/managing-teams-and-people-with-access-to-your-repository#changing-permissions-for-a-team-or-person) for instructions to change collaborator permissions. Outside collaborators will still be able to contribute with pull requests, but reviews will be needed. Details in a later section. +- Create the following branch protection rules, with the instructions [here](https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/managing-protected-branches/managing-a-branch-protection-rule#creating-a-branch-protection-rule): + - branch name pattern `**` with `Require signed commits` enabled. + - branch name pattern matching only the default branch of the repository, such as `main`, with the follow enabled: + - `Dismiss stale pull request approvals when new commits are pushed` + - `Required signed commits` + - `Do not allow bypassing the above settings` + +With these settings, the common workflow of creating branches with pull requests and merging to the default branch is supported. Other GitHub Actions workflow triggers such as `workflow_dispatch`, `push`, and `schedule` are supported as well. + +### Working with outside collaborators + +Generally, outside collaborators are not completely trusted, but still would need to contribute in some manner. As such, this charm requires pull requests by outside collaborators to be reviewed by someone with `write` permission or above. Once the review is completed, the reviewer should add a comment including the following string: `/canonical/self-hosted-runners/run-workflows `, where `` is the commit SHA of the approved commit. Once posted, the self-hosted runners will run the workflow for this commit. diff --git a/docs/how-to/configure-denylist.md b/docs/local-lxd/how-to/configure-denylist.md similarity index 100% rename from docs/how-to/configure-denylist.md rename to docs/local-lxd/how-to/configure-denylist.md diff --git a/docs/how-to/configure-runner-storage.md b/docs/local-lxd/how-to/configure-runner-storage.md similarity index 70% rename from docs/how-to/configure-runner-storage.md rename to docs/local-lxd/how-to/configure-runner-storage.md index e659c6648..14bc94748 100644 --- a/docs/how-to/configure-runner-storage.md +++ b/docs/local-lxd/how-to/configure-runner-storage.md @@ -16,7 +16,7 @@ The `runner-storage` configuration needs to be set to `memory` during deployment An example deployment: ```shell -juju deploy github-runner --constraints="cores=4 mem=16G root-disk=20G virt-type=virtual-machine" --config token= --config path= --config runner-storage=memory --config vm-memory=2GiB --config vm-disk=10GiB +juju deploy github-runner --channel=local-lxd/stable --constraints="cores=4 mem=16G root-disk=20G virt-type=virtual-machine" --config token= --config path= --config runner-storage=memory --config vm-memory=2GiB --config vm-disk=10GiB ``` ## Storage provided by Juju @@ -26,7 +26,7 @@ The Juju storage needs to be mounted during deployment, and the `runner-storage` An example deployment: ```shell -juju deploy github-runner --constraints="cores=4 mem=6G root-disk=30G virt-type=virtual-machine" --config token= --config path= --config runner-storage=juju-storage --config vm-memory=2GiB --config vm-memory=10GiB --storage runner=rootfs +juju deploy github-runner --channel=local-lxd/stable --constraints="cores=4 mem=6G root-disk=30G virt-type=virtual-machine" --config token= --config path= --config runner-storage=juju-storage --config vm-memory=2GiB --config vm-memory=10GiB --storage runner=rootfs ``` The above example uses `rootfs`, which is using the root disk of the Juju machine. Hence the root-disk size was increase to 30G. diff --git a/docs/local-lxd/how-to/debug-with-ssh.md b/docs/local-lxd/how-to/debug-with-ssh.md new file mode 100644 index 000000000..18453123c --- /dev/null +++ b/docs/local-lxd/how-to/debug-with-ssh.md @@ -0,0 +1,53 @@ +# How to debug with ssh + +The charm exposes an integration `debug-ssh` interface which can be used with +[tmate-ssh-server charm](https://charmhub.io/tmate-ssh-server/) to pre-configure runners with +environment variables to be picked up by [action-tmate](https://github.com/canonical/action-tmate/) +for automatic configuration. + +## Requirements + +To enhance the security of self-hosted runners and its infrastructure, only authorized connections +can be established. Hence, action-tmate users must have +[ssh-key registered](https://docs.github.com/en/authentication/connecting-to-github-with-ssh/adding-a-new-ssh-key-to-your-github-account) +on the GitHub account. + +## Deploying + +Use the following command to deploy and integrate github-runner with tmate-ssh-server. + +```shell +juju deploy tmate-ssh-server +juju integrate tmate-ssh-server github-runner +``` + +Idle runners will be flushed and restarted. Busy runners will be configured automatically on next +spawn. + +## Using the action + +Create a workflow that looks like the following within your workflow to enable action-tmate. + +```yaml +name: SSH Debug workflow example + +on: [pull_request] + +jobs: + build: + runs-on: [self-hosted] + steps: + - uses: actions/checkout@v3 + - name: Setup tmate session + uses: canonical/action-tmate@main +``` + +The output of the action looks like the following. + +``` + +SSH: ssh -p 10022 @ +or: ssh -i -p10022 @ +``` + +Read more about [action-tmate's usage here](https://github.com/canonical/action-tmate). diff --git a/docs/how-to/deploy-on-arm64.md b/docs/local-lxd/how-to/deploy-on-arm64.md similarity index 96% rename from docs/how-to/deploy-on-arm64.md rename to docs/local-lxd/how-to/deploy-on-arm64.md index a08456354..c5320d772 100644 --- a/docs/how-to/deploy-on-arm64.md +++ b/docs/local-lxd/how-to/deploy-on-arm64.md @@ -17,7 +17,7 @@ instance to deploy the GitHub Runner on ARM64 architecture. Run the following command: ```shell -juju deploy github-runner \ +juju deploy github-runner --channel=local-lxd/stable \ --constraints="instance-type=a1.metal arch=arm64" \ --config token= --config path= ``` diff --git a/docs/local-lxd/how-to/integrate-with-cos.md b/docs/local-lxd/how-to/integrate-with-cos.md new file mode 100644 index 000000000..167728ff5 --- /dev/null +++ b/docs/local-lxd/how-to/integrate-with-cos.md @@ -0,0 +1,50 @@ +# How to integrate with COS + +This guide demonstrates the process of integrating with the [Canonical Observability Stack (COS)](https://charmhub.io/topics/canonical-observability-stack) using the optional `cos-agent` integration provided by this charm. + +The `cos-agent` integration can be consumed by the [grafana-agent](https://charmhub.io/grafana-agent) charm, which is responsible for transmitting logs, Prometheus metrics, and Grafana dashboards to the COS stack. + +> NOTE: The Github Runner charm and `grafana-agent` charm function as machine charms, while the COS stack contains Kubernetes charms. Therefore, establishing [cross-model integrations](https://juju.is/docs/juju/manage-cross-model-integrations) is necessary, along with potential firewall rule configurations to allow inter-model traffic. + + +## Requirements +1. Deploy the Github Runner Charm with the application name `github-runner` in the `machine-model`. +2. Deploy the COS stack on a Kubernetes cloud (refer to [this tutorial](https://charmhub.io/topics/canonical-observability-stack/tutorials/install-microk8s)). + - Ensure `loki`, `prometheus`, `grafana`, and `traefik` charms are deployed within a model named `k8s-model`. + - Integration between `loki` and `traefik` is required to enable `grafana-agent` to transmit logs by setting a public IP for the Loki service accessible from the machine cloud. + - Confirm that both models exist in the same Juju controller. If not, adjust the model names by appending the respective controller name (followed by ":") in the subsequent steps. Ensure you have the necessary [permissions](https://juju.is/docs/juju/manage-cross-model-integrations#heading--control-access-to-an-offer) to consume the offers. + +## Steps + +1. Deploy the `grafana-agent` charm in the machine model. + ```shell + juju switch machine-model + juju deploy grafana-agent --channel latest/edge + ``` +2. Integrate the `grafana-agent` charm with the Github Runner charm. + ```shell + juju integrate github-runner grafana-agent + ``` +3. Create offers for `loki`, `prometheus`, and `grafana-agent` in the `k8s-model`. + ```shell + juju switch k8s-model + juju offer loki:logging + juju offer prometheus:receive-remote-write + juju offer grafana:grafana-dashboard + ``` +4. Consume the offers in the machine model. + ```shell + juju switch machine-model + juju consume loki + juju consume prometheus + juju consume grafana + ``` +5. Integrate the `grafana-agent` charm with `loki`, `prometheus`, and `grafana`. + ```shell + juju integrate loki-k8s grafana-agent + juju integrate prometheus-k8s grafana-agent + juju integrate grafana-k8s grafana-agent + ``` + +You should now be able to access a Grafana Dashboard named `GitHub Self-Hosted Runner Metrics`, displaying metrics, and another named `System Resources` exhibiting host resources in Grafana. +Additionally, you can explore Loki logs using Grafana's Explore function. For detailed information about the specific metrics in the `GitHub Self-Hosted Runner Metrics` dashboard, refer to [Metrics](https://charmhub.io/github-runner/docs/reference-cos). diff --git a/docs/local-lxd/how-to/repo-policy.md b/docs/local-lxd/how-to/repo-policy.md new file mode 100644 index 000000000..242b14861 --- /dev/null +++ b/docs/local-lxd/how-to/repo-policy.md @@ -0,0 +1,21 @@ +# How to comply with repository policies + +The charm enforces a set of best practice GitHub repository settings. Self-hosted runners managed by the charm will not run jobs on repositories not compliant with the practices. This will be opt-in in the future. + +The repository settings are enforced with this [Python library](https://github.com/canonical/repo-policy-compliance). The rules enforced are different depending on how the GitHub Actions workflow is triggered. The details can be found in the README. + +In this guide, a recommended set of policies will be presented, but any set repository settings that passes the [Python library](https://github.com/canonical/repo-policy-compliance) checks will work with the self-hosted runners managed by this charm. + +## Recommended policy + +- For outside collaborators the permission should be set to read. See [here](https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/managing-repository-settings/managing-teams-and-people-with-access-to-your-repository#changing-permissions-for-a-team-or-person) for instructions to change collaborator permissions. Outside collaborators will still be able to contribute with pull requests, but reviews will be needed. Details in a later section. +- Create the following branch protection rules, with the instructions [here](https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/managing-protected-branches/managing-a-branch-protection-rule#creating-a-branch-protection-rule): + - branch name pattern matching only the default branch of the repository, such as `main`, with the follow enabled: + - `Dismiss stale pull request approvals when new commits are pushed` + - `Do not allow bypassing the above settings` + +With these settings, the common workflow of creating branches with pull requests and merging to the default branch is supported. Other GitHub Actions workflow triggers such as workflow_dispatch, push, and schedule are supported as well. + +### Working with outside collaborators + +Contributions from outside collaborators (in the case where a repository is public) need to be handled slightly differently. As such, this charm requires pull requests by outside collaborators to be reviewed by someone with `write` permission or above. Once the review is completed, the reviewer should add a comment including the following string: `/canonical/self-hosted-runners/run-workflows `, where `` is the commit SHA of the approved commit. Once posted, the self-hosted runners will run the workflow for this commit. \ No newline at end of file diff --git a/docs/how-to/run-on-lxd.md b/docs/local-lxd/how-to/run-on-lxd.md similarity index 87% rename from docs/how-to/run-on-lxd.md rename to docs/local-lxd/how-to/run-on-lxd.md index 0ddec7a97..87e11bb60 100644 --- a/docs/how-to/run-on-lxd.md +++ b/docs/local-lxd/how-to/run-on-lxd.md @@ -7,7 +7,7 @@ By default, Juju machines on LXD are containers. To run this charm on LXD, add `virt-type=virtual-machine` to the constraints during deployment: ```shell -juju deploy github-runner --constraints="cores=2 mem=16G virt-type=virtual-machine" \ +juju deploy github-runner --channel=local-lxd/stable --constraints="cores=2 mem=16G virt-type=virtual-machine" \ --config token= --config path= ``` diff --git a/docs/how-to/set-base-image.md b/docs/local-lxd/how-to/set-base-image.md similarity index 100% rename from docs/how-to/set-base-image.md rename to docs/local-lxd/how-to/set-base-image.md diff --git a/docs/local-lxd/index.md b/docs/local-lxd/index.md new file mode 100644 index 000000000..7ebdf6804 --- /dev/null +++ b/docs/local-lxd/index.md @@ -0,0 +1,6 @@ +# Track local-lxd + +Documentation under this section is related to the [track](https://juju.is/docs/juju/channel) `local-lxd`. + +You will have to deploy or refresh your charm using `--channel=local-lxd/stable` to use the functionalities +related to LXD runners under this section. diff --git a/docs/local-lxd/reference/actions.md b/docs/local-lxd/reference/actions.md new file mode 100644 index 000000000..90946fa32 --- /dev/null +++ b/docs/local-lxd/reference/actions.md @@ -0,0 +1,5 @@ +# Actions + +See [Actions](https://charmhub.io/github-runner/actions?channel=local-lxd/stable). + +> Read more about actions in the Juju docs: [Action](https://juju.is/docs/juju/action) diff --git a/docs/reference/arm64.md b/docs/local-lxd/reference/arm64.md similarity index 100% rename from docs/reference/arm64.md rename to docs/local-lxd/reference/arm64.md diff --git a/docs/local-lxd/reference/configurations.md b/docs/local-lxd/reference/configurations.md new file mode 100644 index 000000000..66c5fdf60 --- /dev/null +++ b/docs/local-lxd/reference/configurations.md @@ -0,0 +1,5 @@ +# Configurations + +See [Configurations](https://charmhub.io/github-runner/configurations?channel=local-lxd/stable). + +> Read more about configurations in the Juju docs: [Configuration](https://juju.is/docs/juju/configuration) diff --git a/docs/local-lxd/reference/cos.md b/docs/local-lxd/reference/cos.md new file mode 100644 index 000000000..48a79c9d1 --- /dev/null +++ b/docs/local-lxd/reference/cos.md @@ -0,0 +1,66 @@ +# COS Integration + +## Metrics + +### Runner and Charm Insights +Upon [COS](https://charmhub.io/topics/canonical-observability-stack) integration, this charm initiates the transmission of various metrics—refer to the relevant [specification](https://discourse.charmhub.io/t/specification-isd075-github-runner-cos-integration/12084) for comprehensive details—regarding the runner instances and the charm itself. + +There are two dashboards. One for fine-granular metrics, called "GitHub Self-Hosted Runner Metrics", and one for long-term metrics, +called "GitHub Self-Hosted Runner Metrics (Long-Term)". + +The "GitHub Self-Hosted Runner Metrics" metrics dashboard presents the following rows: + +- General: Displays general metrics about the charm and runners, such as: + - Lifecycle counters: Tracks the frequency of Runner initialization, start, stop, and crash events. + - Available runners: A horizontal bar graph showing the number of runners available (and max expected) during the last reconciliation event. Note: This data is updated after each reconciliation event and is not real-time. + - Runners after reconciliation: A time series graph showing the number of runners marked as active/idle, the number of expected runners, and the difference between expected and the former (unknown) during the last reconciliation event over time. Note: This data is updated after each reconciliation event and is not real-time. + - Duration observations: Each data point aggregates the last hour and shows the 50th, 90th, 95th percentile and maximum durations for: + - Runner installation + - Runner idle duration + - Charm reconciliation duration + - Job queue duration - how long a job waits in the queue before a runner picks it up + - Max job queue duration by application: Similar to "Job queue duration" panel, but shows maximum durations by charm application. + - Average reconciliation interval: Shows the average time between reconciliation events, broken down by charm application. +- Jobs: Displays certain metrics about the jobs executed by the runners. These metrics can be displayed per repository by specifying a + regular expression on the `Repository` variable. The following metrics are displayed: + - Proportion charts: Share of jobs by completion status, job conclusion, application, repo policy check failure, HTTP codes and GitHub events over time. + - Job duration observation + - Number of jobs per repository + +The "GitHub Self-Hosted Runner Metrics (Long-Term)" metrics dashboard displays the following rows: + +- General: Contains the following panels: + - Total Jobs + - Runners created per application: Shows the number of runners created per charm application. + - Total unique repositories + - Timeseries chart displaying the number of jobs per day + - Percentage of jobs with low queue time (less than 60 seconds) + +Both dashboards allow for filtering by charm application by specifying a regular expression on the `Application` variable. + + +While the dashboard visualizes a subset of potential metrics, these metrics are logged in a file named `/var/log/github-runner-metrics.log`. Use following Loki query to retrieve lines from this file: + +``` +{filename="/var/log/github-runner-metrics.log"} +``` + +These log events contain valuable details such as charm application, GitHub events triggering workflows along with their respective repositories, and more. Customizing metric visualization is possible to suit specific needs. + +### Machine Host Metrics +The `grafana-agent` autonomously transmits machine host metrics, which are visualized in the `System Resources` dashboard. + +## Logs + +The `grafana-agent` effectively transmits all logs located at `/var/log/**/*log`, from the charm unit to Loki. Additionally, it collects logs concerning crashed runners with accessible but unshut LXD virtual machines. + + +## Alerts + +The charm contains a number of alerts that are sent to COS using the `grafana-agent`. +Please refer to the COS documentation for more information on how to set up alerts. + +Alerts are divided into two categories: + +- Capacity Alerts: Alerts you when there is a shortage of a particular type of runner. +- Failure Alerts: Notification of runner crashes or repo policy related failures. diff --git a/docs/local-lxd/reference/cryptographic-overview.md b/docs/local-lxd/reference/cryptographic-overview.md new file mode 100644 index 000000000..9dbde31d2 --- /dev/null +++ b/docs/local-lxd/reference/cryptographic-overview.md @@ -0,0 +1,60 @@ +# GitHub runner cryptographic overview +This document provides an overview of the cryptographic technologies used in the GitHub runner charm, including encryption, decryption, hashing and digital signatures. + +## Overall Description of Cryptographic Technology Use +The GitHub runner charm uses various cryptographic technologies to ensure secure communication and data integrity. The following sections describe each of the components. + +### TLS +The GitHub runner communicates with GitHub to receive information about the workflows to be executed, retrieve the repositories config and send back workflow logs to the user. This communication uses [urllib3](https://urllib3.readthedocs.io/en/stable/) under the hood using TLS 1.3. + +In OpenStack mode, the runner creates virtual machines to run the workload in OpenStack. The GitHub runner charm interacts with the OpenStack API to create and manage virtual machines. These interactions are secured via TLS. The communication between these virtual machines and runner is done via SSH and secured by 256 byte keypairs. + +[DockerHub Registry](https://charmhub.io/docker-registry) is used as a cache between the official DockerHub and GitHub runners to avoid rate limiting. Communication between GitHub runners and DockerHub cache is secured via TLS 1.3 and certified by [Let’s Encrypt](https://letsencrypt.org/). + +Images that run in the OpenStack VM are built using the [Image Builder](https://github.com/canonical/github-runner-image-builder). This application needs to download the runner binary, yq and the cloud image to base the image on. All these images are downloaded with TLS. + +The GitHub runner charm supports being deployed behind an HTTP proxy. [Aproxy](https://github.com/canonical/aproxy) is installed and enabled when an HTTP proxy is detected so that jobs executing in the runner VMs don’t have to configure the proxy themselves. Aproxy is a transparent proxy service for HTTP and HTTPS/TLS connections. Aproxy works by pre-reading the Host header in HTTP requests and SNI in TLS hellos; it forwards HTTP proxy requests with the hostname therefore, complies with HTTP proxies requiring destination hostname for auditing or access control. Aproxy doesn't and can't decrypt the TLS connections. It works by reading the plaintext SNI information in the client hello during the TLS handshake, so the authentication and encryption of TLS are never compromised. Aproxy supports TLS 1.0 and above except TLS 1.3 Encrypted Client Hello. + +### Signature Verification +Images that run in the OpenStack VM (Cloud images) are verified by SHA256 checksum. Runner binary is also downloaded by [GitHub Runner Charm](https://github.com/canonical/github-runner-operator) and verified by SHA256 in this charm. + +### User SSH Access +Sometimes users need to access the VM instance that is running the workload, to establish this connection [Tmate](https://tmate.io/) is used. Tmate uses the SSH protocol to secure shell connections between users and the GitHub runner. The connection is secured with RSA keypair and ed25519 fingerprints. + +## Cryptographic Technology Used by the Product +The following cryptographic technologies are used internally by our product: + +### TLS +- Communication with GitHub is done via TLS v1.3 +- [The Repo Policy Compliance tool](https://github.com/canonical/repo-policy-compliance) communicates with the [GitHub API](https://docs.github.com/en/rest?apiVersion=2022-11-28) using TLS v1.3. +- Communication with the OpenStack API uses TLS v1.3. + +### Signature Verification +Cloud images that are downloaded by the Image Builder are verified by SHA256 checksum. Runner binary is also downloaded by [the GitHub Runner Charm](https://github.com/canonical/github-runner-operator) and verified by SHA256 in this charm. + +### User SSH Access +Tmate secures SSH connections using [OpenSSL](https://www.openssl.org/). + +### RSA +The communication between these virtual machines and the runner is done via SSH and secured by RSA 256 byte keypairs. + +## Cryptographic Technology Exposed to Users +The following sections describe the cryptographic technologies exposed to the user: + +### Tmate +- [Tmate](https://tmate.io/) uses RSA (384 byte) and ED25519 (32 byte) fingerprints for connections from users to [Tmate](https://tmate.io/) and from the managers to [Tmate](https://tmate.io/). [OpenSSL](https://www.openssl.org/) is being used by [Tmate](https://tmate.io/) to secure the connection between the user/manager to the runner. + +### Docker Hub Cache +[Docker Hub cache](https://github.com/canonical/docker-registry-charm) connection is secured via TLS 1.3 and certified by [Let’s Encrypt](https://letsencrypt.org/). + +### Aproxy +[Aproxy](https://github.com/canonical/aproxy) supports TLS 1.0 and above except TLS 1.3 Encrypted Client Hello. + +## Packages or Technology Providing Cryptographic Functionality +The following packages or technologies provide cryptographic functionality: +- [OpenSSL](https://www.openssl.org/) library is being used for TLS and HTTPS connections. +- Urllib3 python library is being used for TLS and HTTPS connections. +- Default clients in Ubuntu for SSH and TLS are being used. +- [Python hashlib](https://docs.python.org/3/library/hashlib.html) package is being used for SHA256 checksum calculation/verification of runner binary and Cloud Init. +- OpenStack client is being used to generate keypairs. +- [Aproxy](https://github.com/canonical/aproxy) using the [golang.org/x/crypto](http://golang.org/x/crypto) package to parse TLS client hello message. diff --git a/docs/local-lxd/reference/external-access.md b/docs/local-lxd/reference/external-access.md new file mode 100644 index 000000000..887e835ab --- /dev/null +++ b/docs/local-lxd/reference/external-access.md @@ -0,0 +1,16 @@ +# External Access + +The GitHub Runner Charm itself requires access to: + +- GitHub API (e.g. to register and remove runners). +- GitHub website (e.g. to download the runner binary or other applications like yq) +- Ubuntu package repositories (e.g. to install packages) +- Snap store (e.g. to install LXD or aproxy) +- [Ubuntu Cloud Images](https://cloud-images.ubuntu.com/) (for the image used by a runner) +- npm registry (e.g. to download and install specific packages) + +In addition, access is required depending on the requirements of the workloads that the runners +will be running (as they will be running on the same machine as the charm). + +More details on network configuration can be found in the +[charm architecture documentation](https://charmhub.io/github-runner/docs/charm-architecture). diff --git a/docs/local-lxd/reference/integrations.md b/docs/local-lxd/reference/integrations.md new file mode 100644 index 000000000..1317ff661 --- /dev/null +++ b/docs/local-lxd/reference/integrations.md @@ -0,0 +1,14 @@ +# Integrations + +## debug-ssh + +_Interface_: debug-ssh +_Supported charms_: [tmate-ssh-server](https://charmhub.io/tmate-ssh-server) + +Debug-ssh integration provides necessary information for runners to provide ssh reverse-proxy +applications to setup inside the runner. + +Example debug-ssh integrate command: +``` +juju integrate github-runner tmate-ssh-server +``` diff --git a/docs/local-lxd/reference/token-scopes.md b/docs/local-lxd/reference/token-scopes.md new file mode 100644 index 000000000..720693a25 --- /dev/null +++ b/docs/local-lxd/reference/token-scopes.md @@ -0,0 +1,48 @@ +# Token scopes + +## Fine grained access token scopes + +**Note**: In addition to having a token with the necessary permissions, the user who owns the +token also must have admin access to the organization or repository. + +### Organizational Runners + +The following are the permissions scopes required for the GitHub runners when registering as an +organizational runner. + +Organization: + +- Self-hosted runners: read & write + +Repository: + +- Actions: read (required if COS integration is enabled and private repositories exist) +- Administration: read +- Contents: read (not required if the charm is configured to use OpenStack runners) +- Pull requests: read (not required if the charm is configured to use OpenStack runners) + +### Repository Runners + +The following are the permissions scopes required for the GitHub runners when registering as an +repository runner. + +- Actions: read (required if COS integration is enabled and the repository is private) +- Administration: read & write +- Contents: read (not required if the charm is configured to use OpenStack runners) +- Metadata: read +- Pull requests: read (not required if the charm is configured to use OpenStack runners) + +## Personal access token scopes + +### Organizational Runners + +To use this charm for GitHub organizations, the following scopes should be selected: + +- `repo` +- `admin:org` + +### Repository Runners + +To use this charm for GitHub repositories, the following scopes should be selected: + +- `repo` diff --git a/docs/tutorial/managing-resource-usage.md b/docs/local-lxd/tutorial/managing-resource-usage.md similarity index 91% rename from docs/tutorial/managing-resource-usage.md rename to docs/local-lxd/tutorial/managing-resource-usage.md index ddac4fb13..a3c38bbd8 100644 --- a/docs/tutorial/managing-resource-usage.md +++ b/docs/local-lxd/tutorial/managing-resource-usage.md @@ -28,4 +28,4 @@ If memory is used as [runner storage](https://charmhub.io/github-runner/docs/con ## Juju machine constraints -During [deployment of the charm](https://juju.is/docs/juju/juju-deploy), constraints can be used to specify the Juju machine resource requirements. For example, `juju deploy github-runner --constraints="cores=4 mem=16G disk=20G"`. +During [deployment of the charm](https://juju.is/docs/juju/juju-deploy), constraints can be used to specify the Juju machine resource requirements. For example, `juju deploy github-runner --channel=local-lxd/stable --constraints="cores=4 mem=16G disk=20G"`. diff --git a/docs/tutorial/quick-start.md b/docs/local-lxd/tutorial/quick-start.md similarity index 96% rename from docs/tutorial/quick-start.md rename to docs/local-lxd/tutorial/quick-start.md index 892b86a81..4cd59b34a 100644 --- a/docs/tutorial/quick-start.md +++ b/docs/local-lxd/tutorial/quick-start.md @@ -73,7 +73,7 @@ For information on token scopes, see [How to change GitHub personal access token Once the personal access token is created, the charm can be deployed with: ``` -juju deploy github-runner --constraints="cores=4 mem=16G root-disk=20G virt-type=virtual-machine" --config token= --config path= --config runner-storage=memory --config vm-memory=2GiB --config vm-disk=10GiB +juju deploy github-runner --channel=local-lxd/stable --constraints="cores=4 mem=16G root-disk=20G virt-type=virtual-machine" --config token= --config path= --config runner-storage=memory --config vm-memory=2GiB --config vm-disk=10GiB ``` Replacing the `` with the personal access token, and `` the GitHub account name and GitHub repository separated with `/`. diff --git a/docs/reference/token-scopes.md b/docs/reference/token-scopes.md index 9f783c27a..775256f96 100644 --- a/docs/reference/token-scopes.md +++ b/docs/reference/token-scopes.md @@ -18,8 +18,6 @@ Repository: - Actions: read (required if COS integration is enabled and private repositories exist) - Administration: read -- Contents: read (not required if the charm is configured to use OpenStack runners) -- Pull requests: read (not required if the charm is configured to use OpenStack runners) ### Repository Runners @@ -28,9 +26,7 @@ repository runner. - Actions: read (required if COS integration is enabled and the repository is private) - Administration: read & write -- Contents: read (not required if the charm is configured to use OpenStack runners) - Metadata: read -- Pull requests: read (not required if the charm is configured to use OpenStack runners) ## Personal access token scopes diff --git a/github-runner-manager/src-docs/openstack_cloud.openstack_runner_manager.md b/github-runner-manager/src-docs/openstack_cloud.openstack_runner_manager.md index ac3181fc0..f382dd733 100644 --- a/github-runner-manager/src-docs/openstack_cloud.openstack_runner_manager.md +++ b/github-runner-manager/src-docs/openstack_cloud.openstack_runner_manager.md @@ -10,7 +10,6 @@ Manager for self-hosted runner on OpenStack. - **CREATE_SERVER_TIMEOUT** - **RUNNER_LISTENER_PROCESS** - **RUNNER_WORKER_PROCESS** -- **BUILD_OPENSTACK_IMAGE_SCRIPT_FILENAME** - **MAX_METRICS_FILE_SIZE** - **RUNNER_STARTUP_PROCESS** - **OUTDATED_METRICS_STORAGE_IN_SECONDS** @@ -19,7 +18,7 @@ Manager for self-hosted runner on OpenStack. --- - + ## class `OpenStackServerConfig` Configuration for OpenStack server. @@ -50,7 +49,7 @@ __init__(image: str, flavor: str, network: str) → None --- - + ## class `OpenStackRunnerManagerConfig` Configuration for OpenStack runner manager. @@ -93,7 +92,7 @@ __init__( --- - + ## class `OpenStackRunnerManager` Manage self-hosted runner on OpenStack cloud. @@ -104,7 +103,7 @@ Manage self-hosted runner on OpenStack cloud. - `name_prefix`: The name prefix of the runners created. - + ### method `__init__` @@ -136,7 +135,7 @@ The prefix of runner names. --- - + ### method `cleanup` @@ -159,7 +158,7 @@ Cleanup runner and resource on the cloud. --- - + ### method `create_runner` @@ -189,7 +188,7 @@ Create a self-hosted runner. --- - + ### method `delete_runner` @@ -213,7 +212,7 @@ Delete self-hosted runners. --- - + ### method `flush_runners` @@ -236,7 +235,7 @@ Remove idle and/or busy runners. --- - + ### method `get_runner` @@ -259,7 +258,7 @@ Get a self-hosted runner by instance id. --- - + ### method `get_runners` diff --git a/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py b/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py index 964d98ee5..b0ec2b42f 100644 --- a/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py +++ b/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py @@ -60,7 +60,6 @@ logger = logging.getLogger(__name__) -BUILD_OPENSTACK_IMAGE_SCRIPT_FILENAME = "scripts/build-openstack-image.sh" _CONFIG_SCRIPT_PATH = Path("/home/ubuntu/actions-runner/config.sh") RUNNER_APPLICATION = Path("/home/ubuntu/actions-runner") diff --git a/metadata.yaml b/metadata.yaml index d1e0ce9f3..be9160bc2 100644 --- a/metadata.yaml +++ b/metadata.yaml @@ -13,7 +13,7 @@ description: | A [Juju](https://juju.is/) [charm](https://juju.is/docs/olm/charmed-operators) managing [self-hosted runners for GitHub Actions](https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners). - Each unit of this charm will start a configurable number of LXD based virtual machines to host + Each unit of this charm will start a configurable number of virtual machines to host self-hosted runners. Each runner performs only one job, after which it unregisters from GitHub to ensure that each job runs in a clean environment. The charm will periodically check the number of runners and spawn or destroy them as necessary to maintain the configured number of runners. Both diff --git a/pyproject.toml b/pyproject.toml index 8a430a2e7..094aa0bad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,22 +10,13 @@ skips = ["*/*test.py", "*/test_*.py", "*tests/*.py"] [tool.coverage.run] branch = true omit = [ - # These are covered by `tests/integration/test_runner_manager_openstack.py`. - "src/openstack_cloud/openstack_cloud.py", - "src/openstack_cloud/openstack_runner_manager.py", - # Contains interface for calling LXD. Tested in integration tests and end to end tests. - "src/lxd.py", # Contains interface for calling repo policy compliance service. Tested in integration test # and end to end tests. "src/repo_policy_compliance_client.py", - # TODO: 2024-04-17: These files are pending a major refactor. The refactor includes a RunnerManager - # interface class which will include a complete re-organization of the code in these files. - "src/runner.py", - "src/runner_manager.py", ] [tool.coverage.report] -fail_under = 84 +fail_under = 83 show_missing = true [tool.pytest.ini_options] diff --git a/scripts/build-lxd-image.sh b/scripts/build-lxd-image.sh deleted file mode 100755 index 4feabc0a6..000000000 --- a/scripts/build-lxd-image.sh +++ /dev/null @@ -1,154 +0,0 @@ -#!/usr/bin/env bash - -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -set -e - -retry() { - local command="$1" - local wait_message="$2" - local max_try="$3" - - local attempt=0 - - while ! $command - do - attempt=$((attempt + 1)) - if [[ attempt -ge $max_try ]]; then - return - fi - - echo "$wait_message" - sleep 10 - done -} - -cleanup() { - local test_command="$1" - local clean_up_command="$2" - local wait_message="$3" - local max_try="$4" - - local attempt=0 - - while bash -c "$test_command" - do - echo "$wait_message" - - $clean_up_command - - attempt=$((attempt + 1)) - if [[ attempt -ge $max_try ]]; then - # Cleanup failure. - return 1 - fi - - sleep 10 - done -} - -HTTP_PROXY="$1" -HTTPS_PROXY="$2" -NO_PROXY="$3" -BASE_IMAGE="$4" -MODE="$5" - -if [[ -n "$HTTP_PROXY" ]]; then - /snap/bin/lxc config set core.proxy_http "$HTTP_PROXY" -fi -if [[ -n "$HTTPS_PROXY" ]]; then - /snap/bin/lxc config set core.proxy_https "$HTTPS_PROXY" -fi - -cleanup '/snap/bin/lxc info builder &> /dev/null' '/snap/bin/lxc delete builder --force' 'Cleanup LXD VM of previous run' 10 - -if [[ "$MODE" == "test" ]]; then - retry "/snap/bin/lxc launch ubuntu-daily:$BASE_IMAGE builder --device root,size=5GiB" 'Starting LXD container' -else - retry "/snap/bin/lxc launch ubuntu-daily:$BASE_IMAGE builder --vm --device root,size=8GiB" 'Starting LXD VM' -fi -retry '/snap/bin/lxc exec builder -- /usr/bin/who' 'Wait for lxd agent to be ready' 30 -if [[ -n "$HTTP_PROXY" ]]; then - /snap/bin/lxc exec builder -- echo "HTTP_PROXY=$HTTP_PROXY" >> /etc/environment - /snap/bin/lxc exec builder -- echo "http_proxy=$HTTP_PROXY" >> /etc/environment - /snap/bin/lxc exec builder -- echo "Acquire::http::Proxy \"$HTTP_PROXY\";" >> /etc/apt/apt.conf -fi -if [[ -n "$HTTPS_PROXY" ]]; then - /snap/bin/lxc exec builder -- echo "HTTPS_PROXY=$HTTPS_PROXY" >> /etc/environment - /snap/bin/lxc exec builder -- echo "https_proxy=$HTTPS_PROXY" >> /etc/environment - /snap/bin/lxc exec builder -- echo "Acquire::https::Proxy \"$HTTPS_PROXY\";" >> /etc/apt/apt.conf -fi -if [[ -n "$NO_PROXY" ]]; then - /snap/bin/lxc exec builder -- echo "NO_PROXY=$NO_PROXY" >> /etc/environment - /snap/bin/lxc exec builder -- echo "no_proxy=$NO_PROXY" >> /etc/environment -fi -retry '/snap/bin/lxc exec builder -- /usr/bin/nslookup github.com' 'Wait for network to be ready' 30 - -/snap/bin/lxc exec builder -- /usr/bin/apt-get update -/snap/bin/lxc exec builder --env DEBIAN_FRONTEND=noninteractive -- /usr/bin/apt-get upgrade -yq -# This will remove older version of kernel as HWE is installed now. -/snap/bin/lxc exec builder -- /usr/bin/apt-get autoremove --purge - -/snap/bin/lxc restart builder -retry '/snap/bin/lxc exec builder -- /usr/bin/who' 'Wait for lxd agent to be ready' 30 -retry '/snap/bin/lxc exec builder -- /usr/bin/nslookup github.com' 'Wait for network to be ready' 30 - -/snap/bin/lxc exec builder -- /usr/bin/apt-get update -/snap/bin/lxc exec builder --env DEBIAN_FRONTEND=noninteractive -- /usr/bin/apt-get upgrade -yq -/snap/bin/lxc exec builder --env DEBIAN_FRONTEND=noninteractive -- /usr/bin/apt-get install docker.io npm python3-pip shellcheck jq wget unzip gh -yq - -# Uninstall unattended-upgrades, to avoid lock errors when unattended-upgrades is active in the runner -/snap/bin/lxc exec builder --env DEBIAN_FRONTEND=noninteractive -- /usr/bin/systemctl stop apt-daily.timer -/snap/bin/lxc exec builder --env DEBIAN_FRONTEND=noninteractive -- /usr/bin/systemctl disable apt-daily.timer -/snap/bin/lxc exec builder --env DEBIAN_FRONTEND=noninteractive -- /usr/bin/systemctl mask apt-daily.service -/snap/bin/lxc exec builder --env DEBIAN_FRONTEND=noninteractive -- /usr/bin/systemctl stop apt-daily-upgrade.timer -/snap/bin/lxc exec builder --env DEBIAN_FRONTEND=noninteractive -- /usr/bin/systemctl disable apt-daily-upgrade.timer -/snap/bin/lxc exec builder --env DEBIAN_FRONTEND=noninteractive -- /usr/bin/systemctl mask apt-daily-upgrade.service -/snap/bin/lxc exec builder --env DEBIAN_FRONTEND=noninteractive -- /usr/bin/systemctl daemon-reload -/snap/bin/lxc exec builder --env DEBIAN_FRONTEND=noninteractive -- /usr/bin/apt-get purge unattended-upgrades -yq - -if [[ -n "$HTTP_PROXY" ]]; then - /snap/bin/lxc exec builder -- /usr/bin/npm config set proxy "$HTTP_PROXY" -fi -if [[ -n "$HTTPS_PROXY" ]]; then - /snap/bin/lxc exec builder -- /usr/bin/npm config set https-proxy "$HTTPS_PROXY" -fi -/snap/bin/lxc exec builder -- /usr/bin/npm install --global yarn -/snap/bin/lxc exec builder -- /usr/sbin/groupadd microk8s -/snap/bin/lxc exec builder -- /usr/sbin/usermod -aG microk8s ubuntu -/snap/bin/lxc exec builder -- /usr/sbin/usermod -aG docker ubuntu -/snap/bin/lxc exec builder -- /usr/sbin/iptables -I DOCKER-USER -j ACCEPT - -# Reduce image size -/snap/bin/lxc exec builder -- /usr/bin/npm cache clean --force -/snap/bin/lxc exec builder -- /usr/bin/apt-get clean - -# Download and verify checksum of yq -if [[ $(uname -m) == 'aarch64' ]]; then - YQ_ARCH="arm64" -elif [[ $(uname -m) == 'arm64' ]]; then - YQ_ARCH="arm64" -elif [[ $(uname -m) == 'x86_64' ]]; then - YQ_ARCH="amd64" -else - echo "Unsupported CPU architecture: $(uname -m)" - return 1 -fi -/usr/bin/wget "https://github.com/mikefarah/yq/releases/latest/download/yq_linux_$YQ_ARCH" -O "yq_linux_$YQ_ARCH" -/usr/bin/wget https://github.com/mikefarah/yq/releases/latest/download/checksums -O checksums -/usr/bin/wget https://github.com/mikefarah/yq/releases/latest/download/checksums_hashes_order -O checksums_hashes_order -/usr/bin/wget https://github.com/mikefarah/yq/releases/latest/download/extract-checksum.sh -O extract-checksum.sh -/usr/bin/bash extract-checksum.sh SHA-256 "yq_linux_$YQ_ARCH" | /usr/bin/awk '{print $2,$1}' | /usr/bin/sha256sum -c | /usr/bin/grep OK -/snap/bin/lxc file push "yq_linux_$YQ_ARCH" builder/usr/bin/yq --mode 755 - -/snap/bin/lxc exec builder -- /usr/bin/sync -/snap/bin/lxc publish builder --alias builder --reuse -f - -# Swap in the built image -/snap/bin/lxc image alias rename "$BASE_IMAGE" "old-$BASE_IMAGE" || true -/snap/bin/lxc image alias rename builder "$BASE_IMAGE" -/snap/bin/lxc image delete "old-$BASE_IMAGE" || true - -# Clean up LXD instance -cleanup '/snap/bin/lxc info builder &> /dev/null' '/snap/bin/lxc delete builder --force' 'Cleanup LXD instance' 10 diff --git a/scripts/build-openstack-image.sh b/scripts/build-openstack-image.sh deleted file mode 100755 index 76362d46f..000000000 --- a/scripts/build-openstack-image.sh +++ /dev/null @@ -1,186 +0,0 @@ -#!/usr/bin/env bash - -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -set -euo pipefail - -# GitHub runner bin args -RUNNER_TAR_URL="$1" - -# Proxy args -HTTP_PROXY="$2" -HTTPS_PROXY="$3" -NO_PROXY="$4" -BASE_IMAGE="$7" - -# retry function -retry() { - local command="$1" - local wait_message="$2" - local max_try="$3" - - local attempt=0 - - while ! $command - do - attempt=$((attempt + 1)) - if [[ attempt -ge $max_try ]]; then - return - fi - - echo "$wait_message" - sleep 10 - done -} - -# cleanup any existing mounts -cleanup() { - sudo umount /mnt/ubuntu-image/dev/ || true - sudo umount /mnt/ubuntu-image/proc/ || true - sudo umount /mnt/ubuntu-image/sys/ || true - sudo umount /mnt/ubuntu-image || true - sudo qemu-nbd --disconnect /dev/nbd0 -} - -# Check if proxy variables set, doesn't exist or is a different value then update. -if [[ -n "$HTTP_PROXY" ]]; then - if ! grep -q "HTTP_PROXY=" /etc/environment || ! grep -q "HTTP_PROXY=$HTTP_PROXY" /etc/environment; then - sed -i "/^HTTP_PROXY=/d" /etc/environment - echo "HTTP_PROXY=$HTTP_PROXY" >> /etc/environment - fi - if ! grep -q "http_proxy=" /etc/environment || ! grep -q "http_proxy=$HTTP_PROXY" /etc/environment; then - sed -i "/^http_proxy=/d" /etc/environment - echo "http_proxy=$HTTP_PROXY" >> /etc/environment - fi -fi - -if [[ -n "$HTTPS_PROXY" ]]; then - if ! grep -q "HTTPS_PROXY=" /etc/environment || ! grep -q "HTTPS_PROXY=$HTTPS_PROXY" /etc/environment; then - sed -i "/^HTTPS_PROXY=/d" /etc/environment - echo "HTTPS_PROXY=$HTTPS_PROXY" >> /etc/environment - fi - if ! grep -q "https_proxy=" /etc/environment || ! grep -q "https_proxy=$HTTPS_PROXY" /etc/environment; then - sed -i "/^https_proxy=/d" /etc/environment - echo "https_proxy=$HTTPS_PROXY" >> /etc/environment - fi -fi - -if [[ -n "$NO_PROXY" ]]; then - if ! grep -q "NO_PROXY=" /etc/environment || ! grep -q "NO_PROXY=$NO_PROXY" /etc/environment; then - sed -i "/^NO_PROXY=/d" /etc/environment - echo "NO_PROXY=$NO_PROXY" >> /etc/environment - fi - if ! grep -q "no_proxy=" /etc/environment || ! grep -q "no_proxy=$NO_PROXY" /etc/environment; then - sed -i "/^no_proxy=/d" /etc/environment - echo "no_proxy=$NO_PROXY" >> /etc/environment - fi -fi - -# Architecture args -ARCH=$(uname -m) -if [[ $ARCH == 'aarch64' ]]; then - BIN_ARCH="arm64" -elif [[ $ARCH == 'arm64' ]]; then - BIN_ARCH="arm64" -elif [[ $ARCH == 'x86_64' ]]; then - BIN_ARCH="amd64" -else - echo "Unsupported CPU architecture: $ARCH" - return 1 -fi - -# qemu-utils required to unpack qcow image -sudo DEBIAN_FRONTEND=noninteractive apt-get install qemu-utils libguestfs-tools -y - -# enable network block device -sudo modprobe nbd - -# cleanup any existing mounts -cleanup - -retry "sudo wget https://cloud-images.ubuntu.com/$BASE_IMAGE/current/$BASE_IMAGE-server-cloudimg-$BIN_ARCH.img \ - -O $BASE_IMAGE-server-cloudimg-$BIN_ARCH.img" "Downloading cloud image" 3 - -# resize image - installing dependencies requires more disk space -sudo qemu-img resize "$BASE_IMAGE-server-cloudimg-$BIN_ARCH.img" +1.5G - -# mount nbd -echo "Connecting network block device to image" -sudo qemu-nbd --connect=/dev/nbd0 "$BASE_IMAGE-server-cloudimg-$BIN_ARCH.img" -sudo mkdir -p /mnt/ubuntu-image -retry "sudo mount -o rw /dev/nbd0p1 /mnt/ubuntu-image" "Mounting nbd0p1 device" 3 - -# mount required system dirs -echo "Mounting sys dirs" -retry "sudo mount --bind /dev/ /mnt/ubuntu-image/dev/" "Mounting /dev/" 3 -retry "sudo mount --bind /proc/ /mnt/ubuntu-image/proc/" "Mounting /proc/" 3 -retry "sudo mount --bind /sys/ /mnt/ubuntu-image/sys/" "Mounting /sys/" 3 -sudo rm /mnt/ubuntu-image/etc/resolv.conf -f -sudo cp /etc/resolv.conf /mnt/ubuntu-image/etc/resolv.conf - -# resize mount -echo "Resizing mounts" -sudo growpart /dev/nbd0 1 # grow partition size to available space -sudo resize2fs /dev/nbd0p1 # resize fs accordingly - -# chroot and install dependencies -echo "Installing dependencies in chroot env" -sudo chroot /mnt/ubuntu-image/ <> "$GITHUB_ENV" diff --git a/scripts/repo_policy_compliance_service.py b/scripts/repo_policy_compliance_service.py deleted file mode 100644 index 5e914233c..000000000 --- a/scripts/repo_policy_compliance_service.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Flask application for repo policy compliance. - -This module is loaded into juju unit and run on top of gunicorn. -""" - -from flask import Flask # pylint: disable=import-error -from repo_policy_compliance.blueprint import repo_policy_compliance # pylint: disable=import-error - -app = Flask(__name__) -app.register_blueprint(repo_policy_compliance) diff --git a/src-docs/charm.py.md b/src-docs/charm.py.md index 883cb5500..e1d578a03 100644 --- a/src-docs/charm.py.md +++ b/src-docs/charm.py.md @@ -8,12 +8,10 @@ Charm for creating and managing GitHub self-hosted runner instances. **Global Variables** --------------- - **DEBUG_SSH_INTEGRATION_NAME** -- **GROUP_CONFIG_NAME** - **IMAGE_INTEGRATION_NAME** - **LABELS_CONFIG_NAME** - **PATH_CONFIG_NAME** - **RECONCILE_INTERVAL_CONFIG_NAME** -- **TEST_MODE_CONFIG_NAME** - **TOKEN_CONFIG_NAME** - **RECONCILIATION_INTERVAL_TIMEOUT_FACTOR** - **RECONCILE_RUNNERS_EVENT** @@ -28,7 +26,7 @@ Charm for creating and managing GitHub self-hosted runner instances. --- - + ## function `catch_charm_errors` @@ -54,7 +52,7 @@ Catch common errors in charm. --- - + ## function `catch_action_errors` @@ -83,19 +81,7 @@ Catch common errors in actions. ## class `GithubRunnerCharm` Charm for managing GitHub self-hosted runners. - - -**Attributes:** - - - `service_token_path`: The path to token to access local services. - - `repo_check_web_service_path`: The path to repo-policy-compliance service directory. - - `repo_check_web_service_script`: The path to repo-policy-compliance web service script. - - `repo_check_systemd_service`: The path to repo-policy-compliance unit file. - - `juju_storage_path`: The path to juju storage. - - `ram_pool_path`: The path to memdisk storage. - - `kernel_module_path`: The path to kernel modules. - - + ### function `__init__` @@ -113,12 +99,6 @@ Construct the charm. - `kwargs`: List of keyword arguments to be passed to the `CharmBase` class. - -**Raises:** - - - `RuntimeError`: If invalid test configuration was detected. - - --- #### property app diff --git a/src-docs/charm_state.py.md b/src-docs/charm_state.py.md index 64ff18954..68b1df9b8 100644 --- a/src-docs/charm_state.py.md +++ b/src-docs/charm_state.py.md @@ -7,11 +7,8 @@ State of the Charm. **Global Variables** --------------- -- **REACTIVE_MODE_NOT_SUPPORTED_WITH_LXD_ERR_MSG** - **ARCHITECTURES_ARM64** - **ARCHITECTURES_X86** -- **BASE_IMAGE_CONFIG_NAME** -- **DENYLIST_CONFIG_NAME** - **DOCKERHUB_MIRROR_CONFIG_NAME** - **GROUP_CONFIG_NAME** - **LABELS_CONFIG_NAME** @@ -22,20 +19,15 @@ State of the Charm. - **RECONCILE_INTERVAL_CONFIG_NAME** - **REPO_POLICY_COMPLIANCE_TOKEN_CONFIG_NAME** - **REPO_POLICY_COMPLIANCE_URL_CONFIG_NAME** -- **RUNNER_STORAGE_CONFIG_NAME** - **SENSITIVE_PLACEHOLDER** - **TEST_MODE_CONFIG_NAME** - **TOKEN_CONFIG_NAME** - **USE_APROXY_CONFIG_NAME** - **VIRTUAL_MACHINES_CONFIG_NAME** -- **VM_CPU_CONFIG_NAME** -- **VM_MEMORY_CONFIG_NAME** -- **VM_DISK_CONFIG_NAME** - **COS_AGENT_INTEGRATION_NAME** - **DEBUG_SSH_INTEGRATION_NAME** - **IMAGE_INTEGRATION_NAME** - **MONGO_DB_INTEGRATION_NAME** -- **LTS_IMAGE_VERSION_TAG_MAP** --- @@ -69,22 +61,6 @@ Supported system architectures. ---- - -## class `BaseImage` -The ubuntu OS base image to build and deploy runners on. - - - -**Attributes:** - - - `JAMMY`: The jammy ubuntu LTS image. - - `NOBLE`: The noble ubuntu LTS image. - - - - - --- ## class `CharmConfig` @@ -96,7 +72,6 @@ Some charm configurations are grouped into other configuration models. **Attributes:** - - `denylist`: List of IPv4 to block the runners from accessing. - `dockerhub_mirror`: Private docker registry as dockerhub mirror for the runners to use. - `labels`: Additional runner labels to append to default (i.e. os, flavor, architecture). - `openstack_clouds_yaml`: The openstack clouds.yaml configuration. @@ -110,7 +85,7 @@ Some charm configurations are grouped into other configuration models. --- - + ### classmethod `check_reconcile_interval` @@ -139,7 +114,7 @@ Validate the general charm configuration. --- - + ### classmethod `from_charm` @@ -178,7 +153,7 @@ Raised when charm config is invalid. - `msg`: Explanation of the error. - + ### function `__init__` @@ -211,7 +186,6 @@ The charm state. - `charm_config`: Configuration of the juju charm. - `is_metrics_logging_available`: Whether the charm is able to issue metrics. - `proxy_config`: Proxy-related configuration. - - `instance_type`: The type of instances, e.g., local lxd, openstack. - `reactive_config`: The charm configuration related to reactive spawning mode. - `runner_config`: The charm configuration related to runner VM configuration. - `ssh_debug_connections`: SSH debug connections configuration information. @@ -221,7 +195,7 @@ The charm state. --- - + ### classmethod `from_charm` @@ -267,7 +241,7 @@ Charm configuration related to GitHub. --- - + ### classmethod `from_charm` @@ -295,154 +269,6 @@ Get github related charm configuration values from charm. The parsed GitHub configuration values. ---- - -## class `ImmutableConfigChangedError` -Represents an error when changing immutable charm state. - - - -### function `__init__` - -```python -__init__(msg: str) -``` - -Initialize a new instance of the ImmutableConfigChangedError exception. - - - -**Args:** - - - `msg`: Explanation of the error. - - - - - ---- - -## class `InstanceType` -Type of instance for runner. - - - -**Attributes:** - - - `LOCAL_LXD`: LXD instance on the local juju machine. - - `OPENSTACK`: OpenStack instance on a cloud. - - - - - ---- - -## class `LocalLxdRunnerConfig` -Runner configurations for local LXD instances. - - - -**Attributes:** - - - `base_image`: The ubuntu base image to run the runner virtual machines on. - - `virtual_machines`: Number of virtual machine-based runner to spawn. - - `virtual_machine_resources`: Hardware resource used by one virtual machine for a runner. - - `runner_storage`: Storage to be used as disk for the runner. - - - - ---- - - - -### classmethod `check_virtual_machine_resources` - -```python -check_virtual_machine_resources( - vm_resources: VirtualMachineResources -) → VirtualMachineResources -``` - -Validate the virtual_machine_resources field values. - - - -**Args:** - - - `vm_resources`: the virtual_machine_resources value to validate. - - - -**Raises:** - - - `ValueError`: if an invalid number of cpu was given or invalid memory/disk size was given. - - - -**Returns:** - The validated virtual_machine_resources value. - ---- - - - -### classmethod `check_virtual_machines` - -```python -check_virtual_machines(virtual_machines: int) → int -``` - -Validate the virtual machines configuration value. - - - -**Args:** - - - `virtual_machines`: The virtual machines value to validate. - - - -**Raises:** - - - `ValueError`: if a negative integer was passed. - - - -**Returns:** - Validated virtual_machines value. - ---- - - - -### classmethod `from_charm` - -```python -from_charm(charm: CharmBase) → LocalLxdRunnerConfig -``` - -Initialize the config from charm. - - - -**Args:** - - - `charm`: The charm instance. - - - -**Raises:** - - - `CharmConfigInvalidError`: if an invalid runner charm config has been set on the charm. - - - -**Returns:** - Local LXD runner config of the charm. - - --- ## class `OpenStackCloudsYAML` @@ -475,7 +301,7 @@ OpenstackImage from image builder relation data. --- - + ### classmethod `from_charm` @@ -518,7 +344,7 @@ Runner configuration for OpenStack Instances. --- - + ### classmethod `from_charm` @@ -572,7 +398,7 @@ Return the aproxy address. --- - + ### classmethod `check_use_aproxy` @@ -602,7 +428,7 @@ Validate the proxy configuration. --- - + ### classmethod `from_charm` @@ -640,7 +466,7 @@ Represents the configuration for reactive scheduling. --- - + ### classmethod `from_database` @@ -685,7 +511,7 @@ Configuration for the repo policy compliance service. --- - + ### classmethod `from_charm` @@ -715,20 +541,51 @@ Initialize the config from charm. --- -## class `RunnerStorage` -Supported storage as runner disk. +## class `OpenstackRunnerConfig` +Runner configuration for OpenStack Instances. **Attributes:** - - `JUJU_STORAGE`: Represents runner storage from Juju storage. - - `MEMORY`: Represents tempfs storage (ramdisk). + - `virtual_machines`: Number of virtual machine-based runner to spawn. + - `openstack_flavor`: flavor on openstack to use for virtual machines. + - `openstack_network`: Network on openstack to use for virtual machines. + - `openstack_image`: Openstack image to use for virtual machines. + + + + +--- + + + +### classmethod `from_charm` + +```python +from_charm(charm: CharmBase) → OpenstackRunnerConfig +``` + +Initialize the config from charm. + + + +**Args:** + + - `charm`: The charm instance. + +**Raises:** + + - `CharmConfigInvalidError`: Error with charm configuration virtual-machines not of int type. +**Returns:** + Openstack runner config of the charm. + + --- ## class `SSHDebugConnection` @@ -748,7 +605,7 @@ SSH connection information for debug workflow. --- - + ### classmethod `from_charm` @@ -781,7 +638,7 @@ Raised when given machine charm architecture is unsupported. - `arch`: The current machine architecture. - + ### function `__init__` @@ -801,20 +658,3 @@ Initialize a new instance of the CharmConfigInvalidError exception. ---- - -## class `VirtualMachineResources` -Virtual machine resource configuration. - - - -**Attributes:** - - - `cpu`: Number of vCPU for the virtual machine. - - `memory`: Amount of memory for the virtual machine. - - `disk`: Amount of disk for the virtual machine. - - - - - diff --git a/src-docs/errors.py.md b/src-docs/errors.py.md index f976fcf0a..46a8560e9 100644 --- a/src-docs/errors.py.md +++ b/src-docs/errors.py.md @@ -16,15 +16,6 @@ Error for juju configuration. ---- - -## class `IssueMetricEventError` -Represents an error when issuing a metric event. - - - - - --- ## class `LogrotateSetupError` @@ -34,15 +25,6 @@ Represents an error raised when logrotate cannot be setup. ---- - -## class `LxdError` -Error for executing LXD actions. - - - - - --- ## class `MissingMongoDBError` @@ -52,96 +34,6 @@ Error for missing integration data. ---- - -## class `MissingRunnerBinaryError` -Error for missing runner binary. - - - - - ---- - -## class `MissingServerConfigError` -Error for unable to create runner due to missing server configurations. - - - - - ---- - -## class `RunnerAproxyError` -Error for setting up aproxy. - - - - - ---- - -## class `RunnerBinaryError` -Error of getting runner binary. - - - - - ---- - -## class `RunnerCreateError` -Error for runner creation failure. - - - - - ---- - -## class `RunnerFileLoadError` -Error for loading file on runner. - - - - - ---- - -## class `RunnerLogsError` -Base class for all runner logs errors. - - - - - ---- - -## class `RunnerRemoveError` -Error for runner removal failure. - - - - - ---- - -## class `SharedFilesystemError` -Base class for all shared filesystem errors. - - - - - ---- - -## class `SharedFilesystemMountError` -Represents an error related to the mounting of the shared filesystem. - - - - - --- ## class `SubprocessError` @@ -156,7 +48,7 @@ Error for Subprocess calls. - `stdout`: Content of stdout of the subprocess. - `stderr`: Content of stderr of the subprocess. - + ### function `__init__` diff --git a/src-docs/firewall.py.md b/src-docs/firewall.py.md deleted file mode 100644 index 486c03290..000000000 --- a/src-docs/firewall.py.md +++ /dev/null @@ -1,118 +0,0 @@ - - - - -# module `firewall.py` -The runner firewall manager. - - - ---- - -## class `Firewall` -Represent a firewall and provides methods to refresh its configuration. - - - -### function `__init__` - -```python -__init__(network: str) -``` - -Initialize a new Firewall instance. - - - -**Args:** - - - `network`: The LXD network name. - - - - ---- - - - -### function `get_host_ip` - -```python -get_host_ip() → str -``` - -Get the host IP address for the corresponding LXD network. - - - -**Returns:** - The host IP address. - ---- - - - -### function `refresh_firewall` - -```python -refresh_firewall( - denylist: Iterable[FirewallEntry], - allowlist: Optional[Iterable[FirewallEntry]] = None -) → None -``` - -Refresh the firewall configuration. - - - -**Args:** - - - `denylist`: The list of FirewallEntry rules to allow. - - `allowlist`: The list of FirewallEntry rules to allow. - - ---- - -## class `FirewallEntry` -Represent an entry in the firewall. - - - -**Attributes:** - - - `ip_range`: The IP address range using CIDR notation. - - - - ---- - - - -### classmethod `decode` - -```python -decode(entry: str) → FirewallEntry -``` - -Decode a firewall entry from a string. - - - -**Args:** - - - `entry`: The firewall entry string, e.g. '192.168.0.1:80' or '192.168.0.0/24:80-90:udp'. - - - -**Returns:** - - - `FirewallEntry`: A FirewallEntry instance representing the decoded entry. - - - -**Raises:** - - - `ValueError`: If the entry string is not in the expected format. - - diff --git a/src-docs/github_client.py.md b/src-docs/github_client.py.md deleted file mode 100644 index 795b92d10..000000000 --- a/src-docs/github_client.py.md +++ /dev/null @@ -1,55 +0,0 @@ - - - - -# module `github_client.py` -GitHub API client. - -Migrate to PyGithub in the future. PyGithub is still lacking some API such as remove token for runner. - - - ---- - -## class `GithubClient` -GitHub API client. - - - - ---- - - - -### function `get_runner_application` - -```python -get_runner_application( - path: GitHubOrg | GitHubRepo, - arch: Arch, - os: str = 'linux' -) → RunnerApplication -``` - -Get runner application available for download for given arch. - - - -**Args:** - - - `path`: GitHub repository path in the format '/', or the GitHub organization name. - - `arch`: The runner architecture. - - `os`: The operating system that the runner binary should run on. - - - -**Raises:** - - - `RunnerBinaryError`: If the runner application for given architecture and OS is not found. - - - -**Returns:** - The runner application. - - diff --git a/src-docs/lxd.py.md b/src-docs/lxd.py.md deleted file mode 100644 index 3e9bd771d..000000000 --- a/src-docs/lxd.py.md +++ /dev/null @@ -1,891 +0,0 @@ - - - - -# module `lxd.py` -Low-level LXD client interface. - -The LxdClient class offers a low-level interface to isolate the underlying implementation of LXD. - -**Global Variables** ---------------- -- **LXC_BINARY** - - ---- - -## class `LxdClient` -LXD client. - - - -### function `__init__` - -```python -__init__() → None -``` - -Instantiate the LXD client. - - - - - ---- - -## class `LxdImageManager` -LXD image manager. - - - -### function `__init__` - -```python -__init__(pylxd_client: 'Client') -``` - -Instantiate the LXD image manager. - - - -**Args:** - - - `pylxd_client`: Instance of pylxd.Client. - - - - ---- - - - -### function `create` - -```python -create(name: 'str', path: 'Path') → None -``` - -Import a LXD image. - - - -**Args:** - - - `name`: Alias for the image. - - `path`: Path of the LXD image file. - - - -**Raises:** - - - `LxdError`: Unable to import the file as LXD image. - ---- - - - -### function `exists` - -```python -exists(alias: 'str') → bool -``` - -Check if an image with the given name exists. - - - -**Args:** - - - `alias`: Alias name of the image to check. - - - -**Returns:** - Whether the image exists. - - ---- - -## class `LxdInstance` -An LXD instance. - - - -**Attributes:** - - - `name` (str): Name of the LXD instance. - - `files` (LxdInstanceFiles): Manager for the files on the LXD instance. - - `status` (str): Status of the LXD instance. - - - -### function `__init__` - -```python -__init__(pylxd_instance: 'Instance') -``` - -Instantiate the LXD instance representation. - - - -**Args:** - - - `pylxd_instance`: Instance of pylxd.models.Instance for the LXD instance. - - ---- - -#### property status - -Status of the LXD instance. - - - -**Returns:** - Status of the LXD instance. - - - ---- - - - -### function `delete` - -```python -delete(wait: 'bool' = False) → None -``` - -Delete the LXD instance. - - - -**Args:** - - - `wait`: Whether to wait until the LXD instance is stopped before returning. - - - -**Raises:** - - - `LxdError`: Unable to delete the LXD instance. - ---- - - - -### function `execute` - -```python -execute( - cmd: 'list[str]', - cwd: 'Optional[str]' = None, - hide_cmd: 'bool' = False, - **kwargs: 'Any' -) → Tuple[int, IO, IO] -``` - -Execute a command within the LXD instance. - -Exceptions are not raised if command execution failed. Caller should check the exit code and stderr for errors. - -The command is executed with `subprocess.run`, additional arguments can be passed to it as keyword arguments. The following arguments to `subprocess.run` should not be set: `capture_output`, `shell`, `check`. As those arguments are used by this function. - - - -**Args:** - - - `cmd`: Commands to be executed. - - `cwd`: Working directory to execute the commands. - - `hide_cmd`: Hide logging of cmd. - - `kwargs`: Additional keyword arguments for the `subprocess.run` call. - - - - - -**Returns:** - Tuple containing the exit code, stdout, stderr. - ---- - - - -### function `start` - -```python -start(timeout: 'int' = 30, force: 'bool' = True, wait: 'bool' = False) → None -``` - -Start the LXD instance. - - - -**Args:** - - - `timeout`: Timeout for starting the LXD instance. - - `force`: Whether to force start the LXD instance. - - `wait`: Whether to wait until the LXD instance is started before returning. - - - -**Raises:** - - - `LxdError`: Unable to start the LXD instance. - ---- - - - -### function `stop` - -```python -stop(timeout: 'int' = 30, force: 'bool' = True, wait: 'bool' = False) → None -``` - -Stop the LXD instance. - - - -**Args:** - - - `timeout`: Timeout for stopping the LXD instance. - - `force`: Whether to force stop the LXD instance. - - `wait`: Whether to wait until the LXD instance is stopped before returning. - - - -**Raises:** - - - `LxdError`: Unable to stop the LXD instance. - - ---- - -## class `LxdInstanceFileManager` -File manager of an LXD instance. - - - -**Attributes:** - - - `instance` (LxdInstance): LXD instance where the files are located in. - - - -### function `__init__` - -```python -__init__(instance: 'LxdInstance') -``` - -Instantiate the file manager. - - - -**Args:** - - - `instance`: LXD instance where the files are located in. - - - - ---- - - - -### function `mk_dir` - -```python -mk_dir(dir_name: 'str') → None -``` - -Create a directory in the LXD instance. - - - -**Args:** - - - `dir_name`: Name of the directory to create. - ---- - - - -### function `pull_file` - -```python -pull_file(source: 'str', destination: 'str', is_dir: 'bool' = False) → None -``` - -Pull a file from the LXD instance to the local machine. - - - -**Args:** - - - `source`: Path of the file to pull in the LXD instance. - - `destination`: Path in local machine. - - `is_dir`: Whether the source is a directory. - - - -**Raises:** - - - `LxdError`: Unable to load the file from the LXD instance. - ---- - - - -### function `push_file` - -```python -push_file( - source: 'str', - destination: 'str', - mode: 'Optional[str]' = None -) → None -``` - -Push a file to the LXD instance. - - - -**Args:** - - - `source`: Path of the file to push to the LXD instance. - - `destination`: Path in the LXD instance to load the file. - - `mode`: File permissions. - - - -**Raises:** - - - `LxdError`: Unable to load the file into the LXD instance. - ---- - - - -### function `read_file` - -```python -read_file(filepath: 'str') → str -``` - -Read the content of a file in the LXD instance. - - - -**Args:** - - - `filepath`: Path of the file in the LXD instance. - - - -**Raises:** - - - `LxdError`: Unable to load the file from the LXD instance. - - - -**Returns:** - The content of the file. - ---- - - - -### function `write_file` - -```python -write_file( - filepath: 'str', - content: 'Union[str, bytes]', - mode: 'Optional[str]' = None -) → None -``` - -Write a file with the given content into the LXD instance. - - - -**Args:** - - - `filepath`: Path in the LXD instance to load the file. - - `content`: Content of the file. - - `mode`: File permission setting. - - - -**Raises:** - - - `LxdError`: Unable to load the file to the LXD instance. - - ---- - -## class `LxdInstanceManager` -LXD instance manager. - - - -### function `__init__` - -```python -__init__(pylxd_client: 'Client') -``` - -Instantiate the LXD instance manager. - - - -**Args:** - - - `pylxd_client`: Instance of pylxd.Client. - - - - ---- - - - -### function `all` - -```python -all() → list[LxdInstance] -``` - -Get list of LXD instances. - - - -**Raises:** - - - `LxdError`: Unable to get all LXD instances. - - - -**Returns:** - List of LXD instances. - ---- - - - -### function `create` - -```python -create(config: 'LxdInstanceConfig', wait: 'bool') → LxdInstance -``` - -Create an LXD instance. - - - -**Args:** - - - `config`: Configuration for the LXD instance. - - `wait`: Whether to wait until the LXD instance is created before returning. - - - -**Raises:** - - - `LxdError`: Unable to get all LXD instances. - - - -**Returns:** - The created LXD instance. - - ---- - -## class `LxdNetworkManager` -LXD network manager. - - - -### function `__init__` - -```python -__init__(pylxd_client: 'Client') -``` - -Instantiate the LXD profile manager. - - - -**Args:** - - - `pylxd_client`: Instance of pylxd.Client. - - - - ---- - - - -### function `get` - -```python -get(name: 'str') → LxdNetwork -``` - -Get the LXD network information. - - - -**Args:** - - - `name`: The name of the LXD network. - - - -**Returns:** - Information on the LXD network. - - ---- - -## class `LxdProfile` -LXD profile. - - - -### function `__init__` - -```python -__init__(pylxd_profile: 'Profile') -``` - -Instantiate the LXD profile. - - - -**Args:** - - - `pylxd_profile`: Instance of the pylxd.models.Profile. - - - - ---- - - - -### function `delete` - -```python -delete() → None -``` - -Delete the profile. - ---- - - - -### function `save` - -```python -save() → None -``` - -Save the current configuration of profile. - - ---- - -## class `LxdProfileManager` -LXD profile manager. - - - -### function `__init__` - -```python -__init__(pylxd_client: 'Client') -``` - -Instantiate the LXD profile manager. - - - -**Args:** - - - `pylxd_client`: Instance of pylxd.Client. - - - - ---- - - - -### function `create` - -```python -create( - name: 'str', - config: 'LxdResourceProfileConfig', - devices: 'LxdResourceProfileDevices' -) → None -``` - -Create an LXD profile. - - - -**Args:** - - - `name`: Name of the LXD profile to create. - - `config`: Configuration of the LXD profile. - - `devices`: Devices configuration of the LXD profile. - - - -**Raises:** - - - `LxdError`: Unable to create the LXD profile. - ---- - - - -### function `exists` - -```python -exists(name: 'str') → bool -``` - -Check whether an LXD profile of a given name exists. - - - -**Args:** - - - `name`: Name for LXD profile to check. - - - -**Raises:** - - - `LxdError`: Unable to check the LXD profile existence. - - - -**Returns:** - Whether the LXD profile of the given name exists. - ---- - - - -### function `get` - -```python -get(name: 'str') → LxdProfile -``` - -Get an LXD profile. - - - -**Args:** - - - `name`: Name of the LXD profile. - - - -**Raises:** - - - `LxdError`: Unable to get the LXD profile with the name. - - - -**Returns:** - LXDProfile with given name. - - ---- - -## class `LxdStoragePool` -An LXD storage pool. - - - -**Attributes:** - - - `name` (str): Name of the storage pool. - - `driver` (str): Type of driver of the storage pool. - - `used_by` (list[str]): LXD instances using the storage pool. - - `config` (dict[str, any]): Dictionary of the configuration of the storage pool. - - `managed` (bool): Whether LXD manages the storage pool. - - - -### function `__init__` - -```python -__init__(pylxd_storage_pool: 'StoragePool') -``` - -Instantiate the LXD storage pool. - - - -**Args:** - - - `pylxd_storage_pool`: Instance of the pylxd.models.StoragePool. - - - - ---- - - - -### function `delete` - -```python -delete() → None -``` - -Delete the storage pool. - ---- - - - -### function `save` - -```python -save() → None -``` - -Save the current configuration of storage pool. - - ---- - -## class `LxdStoragePoolManager` -LXD storage pool manager. - - - -### function `__init__` - -```python -__init__(pylxd_client: 'Client') -``` - -Instantiate the LXD storage pool manager. - - - -**Args:** - - - `pylxd_client`: Instance of pylxd.Client. - - - - ---- - - - -### function `all` - -```python -all() → list[LxdStoragePool] -``` - -Get all LXD storage pool. - - - -**Returns:** - List of LXD storage pools. - ---- - - - -### function `create` - -```python -create(config: 'LxdStoragePoolConfiguration') → LxdStoragePool -``` - -Create an LXD storage pool. - - - -**Args:** - - - `config`: Configuration for the storage pool. - - - -**Returns:** - The LXD storage pool. - ---- - - - -### function `exists` - -```python -exists(name: 'str') → bool -``` - -Check if an LXD storage pool exists. - - - -**Args:** - - - `name`: Name to check for. - - - -**Returns:** - Whether the storage pool exists. - ---- - - - -### function `get` - -```python -get(name: 'str') → LxdStoragePool -``` - -Get an LXD storage pool. - - - -**Args:** - - - `name`: Name of the storage pool. - - - -**Raises:** - - - `LxdError`: If the storage pool with given name was not found. - - - -**Returns:** - The LXD storage pool. - - diff --git a/src-docs/lxd_type.py.md b/src-docs/lxd_type.py.md deleted file mode 100644 index 0b4ec54bb..000000000 --- a/src-docs/lxd_type.py.md +++ /dev/null @@ -1,134 +0,0 @@ - - - - -# module `lxd_type.py` -Types used by Lxd class. - -The details of the configuration of different types of devices can be found here: https://linuxcontainers.org/lxd/docs/latest/reference/devices/ - -For example, configuration for disk: https://linuxcontainers.org/lxd/docs/latest/reference/devices_disk/# - -The unit of storage and network limits can be found here: https://linuxcontainers.org/lxd/docs/latest/reference/instance_units/#instances-limit-units - - - ---- - -## class `LxdInstanceConfig` -Configuration for the LXD instance. - -See https://documentation.ubuntu.com/lxd/en/latest/howto/instances_create/ - - - -**Attributes:** - - - `name`: Name of the instance. - - `type`: Instance type, i.e. "container" or "virtual-machine". - - `source`: Instance creation source configuration. - - `ephemeral`: Whether the container should be deleted after a single run. - - `profiles`: List of LXD profiles applied to the instance. - - - - - ---- - -## class `LxdInstanceConfigSource` -Configuration for source image in the LXD instance. - - - -**Attributes:** - - - `type`: Type of source configuration, e.g. image, disk - - `server`: The source server URL, e.g. https://cloud-images.ubuntu.com/releases - - `protocol`: Protocol of the configuration, e.g. simplestreams - - `alias`: Alias for configuration. - - - - - ---- - -## class `LxdNetwork` -LXD network information. - - - -**Attributes:** - - - `name`: The name of LXD network. - - `description`: LXD network descriptor. - - `type`: Network type, i.e. "bridge", "physical" - - `config`: The LXD network configuration values. - - `managed`: Whether the network is being managed by lxd. - - `used_by`: Number of instances using the network. - - - - - ---- - -## class `LxdNetworkConfig` -Represent LXD network configuration. - - - - - ---- - -## class `LxdResourceProfileConfig` -Configuration LXD profile. - - - - - ---- - -## class `LxdResourceProfileDevicesDisk` -LXD device profile of disk. - - - - - ---- - -## class `LxdStoragePoolConfig` -Configuration of the storage pool. - - - -**Attributes:** - - - `source`: The storage pool configuration source image. - - `size`: The size of the storage pool, e.g. 30GiB - - - - - ---- - -## class `LxdStoragePoolConfiguration` -Configuration for LXD storage pool. - - - -**Attributes:** - - - `name`: The storage pool name. - - `driver`: The storage driver being used, i.e. "dir", "btrfs", ... . See https://documentation.ubuntu.com/lxd/en/stable-5.0/reference/storage_drivers/ for more information. - - `config`: The storage pool configuration. - - - - - diff --git a/src-docs/runner.py.md b/src-docs/runner.py.md deleted file mode 100644 index 96e5102d2..000000000 --- a/src-docs/runner.py.md +++ /dev/null @@ -1,180 +0,0 @@ - - - - -# module `runner.py` -Manage the lifecycle of runners. - -The `Runner` class stores the information on the runners and manages the lifecycle of the runners on LXD and GitHub. - -The `RunnerManager` class from `runner_manager.py` creates and manages a collection of `Runner` instances. - -**Global Variables** ---------------- -- **APROXY_ARM_REVISION** -- **APROXY_AMD_REVISION** - - ---- - -## class `CreateRunnerConfig` -The configuration values for creating a single runner instance. - - - -**Attributes:** - - - `image`: Name of the image to launch the LXD instance with. - - `resources`: Resource setting for the LXD instance. - - `binary_path`: Path to the runner binary. - - `registration_token`: Token for registering the runner on GitHub. - - `arch`: Current machine architecture. - - - - - ---- - -## class `Runner` -Single instance of GitHub self-hosted runner. - - - -**Attributes:** - - - `runner_application`: The runner application directory path - - `env_file`: The runner environment source .env file path. - - `config_script`: The runner configuration script file path. - - `runner_script`: The runner start script file path. - - `pre_job_script`: The runner pre_job script file path. This is referenced in the env_file in the ACTIONS_RUNNER_HOOK_JOB_STARTED environment variable. - - - -### function `__init__` - -```python -__init__( - clients: RunnerManagerClients, - runner_config: RunnerConfig, - runner_status: RunnerStatus, - instance: Optional[LxdInstance] = None -) -``` - -Construct the runner instance. - - - -**Args:** - - - `clients`: Clients to access various services. - - `runner_config`: Configuration of the runner instance. - - `runner_status`: Status info of the given runner. - - `instance`: LXD instance of the runner if already created. - - - - ---- - - - -### function `create` - -```python -create(config: CreateRunnerConfig) → None -``` - -Create the runner instance on LXD and register it on GitHub. - - - -**Args:** - - - `config`: The instance config to create the LXD VMs and configure GitHub runner with. - - - -**Raises:** - - - `RunnerCreateError`: Unable to create an LXD instance for runner. - ---- - - - -### function `pull_logs` - -```python -pull_logs() → None -``` - -Pull the logs of the runner into a directory. - -Expects the runner to have an instance. - - - -**Raises:** - - - `RunnerLogsError`: If the runner logs could not be pulled. - ---- - - - -### function `remove` - -```python -remove(remove_token: Optional[str]) → None -``` - -Remove this runner instance from LXD and GitHub. - - - -**Args:** - - - `remove_token`: Token for removing the runner on GitHub. - - - -**Raises:** - - - `RunnerRemoveError`: Failure in removing runner. - - ---- - -## class `Snap` -This class represents a snap installation. - - - -**Attributes:** - - - `name`: The snap application name. - - `channel`: The channel to install the snap from. - - `revision`: The revision number of the snap installation. - - - - - ---- - -## class `WgetExecutable` -The executable to be installed through wget. - - - -**Attributes:** - - - `url`: The URL of the executable binary. - - `cmd`: Executable command name. E.g. yq_linux_amd64 -> yq - - - - - diff --git a/src-docs/runner_manager.py.md b/src-docs/runner_manager.py.md deleted file mode 100644 index b1400ec1c..000000000 --- a/src-docs/runner_manager.py.md +++ /dev/null @@ -1,244 +0,0 @@ - - - - -# module `runner_manager.py` -Runner Manager manages the runners on LXD and GitHub. - -**Global Variables** ---------------- -- **RUNNER_INSTALLED_TS_FILE_NAME** -- **REMOVED_RUNNER_LOG_STR** - - ---- - -## class `LXDRunnerManager` -Manage a group of runners according to configuration. - - - -**Attributes:** - - - `runner_bin_path`: The github runner app scripts path. - - `cron_path`: The path to runner build image cron job. - - - -### function `__init__` - -```python -__init__( - app_name: str, - unit: int, - runner_manager_config: LXDRunnerManagerConfig -) → None -``` - -Construct RunnerManager object for creating and managing runners. - - - -**Args:** - - - `app_name`: An name for the set of runners. - - `unit`: Unit number of the set of runners. - - `runner_manager_config`: Configuration for the runner manager. - - - - ---- - - - -### function `build_runner_image` - -```python -build_runner_image() → None -``` - -Build the LXD image for hosting runner. - -Build container image in test mode, else virtual machine image. - - - -**Raises:** - - - `SubprocessError`: Unable to build the LXD image. - ---- - - - -### function `check_runner_bin` - -```python -check_runner_bin() → bool -``` - -Check if runner binary exists. - - - -**Returns:** - Whether runner bin exists. - ---- - - - -### function `flush` - -```python -flush(mode: LXDFlushMode = ) → int -``` - -Remove existing runners. - - - -**Args:** - - - `mode`: Strategy for flushing runners. - - - -**Raises:** - - - `GithubClientError`: If there was an error getting remove-token to unregister runners from GitHub. - - - -**Returns:** - Number of runners removed. - ---- - - - -### function `get_github_info` - -```python -get_github_info() → Iterator[RunnerInfo] -``` - -Get information on the runners from GitHub. - - - -**Returns:** - List of information from GitHub on runners. - ---- - - - -### function `get_latest_runner_bin_url` - -```python -get_latest_runner_bin_url(os_name: str = 'linux') → RunnerApplication -``` - -Get the URL for the latest runner binary. - -The runner binary URL changes when a new version is available. - - - -**Args:** - - - `os_name`: Name of operating system. - - - -**Raises:** - - - `RunnerBinaryError`: If an error occurred while fetching runner application info. - - - -**Returns:** - Information on the runner application. - ---- - - - -### function `has_runner_image` - -```python -has_runner_image() → bool -``` - -Check if the runner image exists. - - - -**Returns:** - Whether the runner image exists. - ---- - - - -### function `reconcile` - -```python -reconcile(quantity: int, resources: VirtualMachineResources) → int -``` - -Bring runners in line with target. - - - -**Args:** - - - `quantity`: Number of intended runners. - - `resources`: Configuration of the virtual machine resources. - - - -**Returns:** - Difference between intended runners and actual runners. - ---- - - - -### function `schedule_build_runner_image` - -```python -schedule_build_runner_image() → None -``` - -Install cron job for building runner image. - ---- - - - -### function `update_runner_bin` - -```python -update_runner_bin(binary: RunnerApplication) → None -``` - -Download a runner file, replacing the current copy. - -Remove the existing runner binary to prevent it from being used. This is done to prevent security issues arising from outdated runner binaries containing security flaws. The newest version of runner binary should always be used. - - - -**Args:** - - - `binary`: Information on the runner binary to download. - - - -**Raises:** - - - `RunnerBinaryError`: If there was an error updating runner binary info. - - diff --git a/src-docs/runner_manager_type.py.md b/src-docs/runner_manager_type.py.md deleted file mode 100644 index 7cb3b71c6..000000000 --- a/src-docs/runner_manager_type.py.md +++ /dev/null @@ -1,96 +0,0 @@ - - - - -# module `runner_manager_type.py` -Types used by RunnerManager class. - - - ---- - -## class `LXDFlushMode` -Strategy for flushing runners. - -During pre-job (repo-check), the runners are marked as idle and if the pre-job fails, the runner falls back to being idle again. Hence wait_repo_check is required. - - - -**Attributes:** - - - `FLUSH_IDLE`: Flush only idle runners. - - `FLUSH_IDLE_WAIT_REPO_CHECK`: Flush only idle runners, then wait until repo-policy-check is completed for the busy runners. - - `FLUSH_BUSY`: Flush busy runners. - - `FLUSH_BUSY_WAIT_REPO_CHECK`: Wait until the repo-policy-check is completed before flush of busy runners. - - `FORCE_FLUSH_WAIT_REPO_CHECK`: Force flush the runners (remove lxd instances even on gh api issues, like invalid token). Wait until repo-policy-check is completed before force flush of busy runners. - - - - - ---- - -## class `LXDRunnerManagerConfig` -Configuration of runner manager. - - - -**Attributes:** - - - `are_metrics_enabled`: Whether metrics for the runners should be collected. - - `charm_state`: The state of the charm. - - `image`: Name of the image for creating LXD instance. - - `lxd_storage_path`: Path to be used as LXD storage. - - `path`: GitHub repository path in the format '/', or the GitHub organization name. - - `service_token`: Token for accessing local service. - - `token`: GitHub personal access token to register runner to the repository or organization. - - `dockerhub_mirror`: URL of dockerhub mirror to use. - - `reactive_config`: The configuration to spawn runners reactively. - - ---- - -#### property are_metrics_enabled - -Whether metrics for the runners should be collected. - - - - ---- - -## class `RunnerInfo` -Information from GitHub of a runner. - -Used as a returned type to method querying runner information. - - - -**Attributes:** - - - `name`: Name of the runner. - - `status`: Status of the runner. - - `busy`: Whether the runner has taken a job. - - - - - ---- - -## class `RunnerManagerClients` -Clients for accessing various services. - - - -**Attributes:** - - - `github`: Used to query GitHub API. - - `jinja`: Used for templating. - - `lxd`: Used to interact with LXD API. - - `repo`: Used to interact with repo-policy-compliance API. - - - - - diff --git a/src-docs/runner_type.py.md b/src-docs/runner_type.py.md deleted file mode 100644 index e3a7025aa..000000000 --- a/src-docs/runner_type.py.md +++ /dev/null @@ -1,102 +0,0 @@ - - - - -# module `runner_type.py` -Types used by Runner class. - - - ---- - -## class `ProxySetting` -Represent HTTP-related proxy settings. - - - -**Attributes:** - - - `no_proxy`: The comma separated URLs to not go through proxy. - - `http`: HTTP proxy URL. - - `https`: HTTPS proxy URL. - - `aproxy_address`: Aproxy URL. - - - - - ---- - -## class `RunnerConfig` -Configuration for runner. - - - -**Attributes:** - - - `app_name`: Application name of the charm. - - `issue_metrics`: Whether to issue metrics. - - `labels`: Custom runner labels. - - `lxd_storage_path`: Path to be used as LXD storage. - - `name`: Name of the runner. - - `path`: GitHub repository path in the format '/', or the GitHub organization name. - - `proxies`: HTTP(S) proxy settings. - - `dockerhub_mirror`: URL of dockerhub mirror to use. - - `ssh_debug_connections`: The SSH debug server connections metadata. - - - - - ---- - -## class `RunnerGithubInfo` -GitHub info of a runner. - - - -**Attributes:** - - - `runner_name`: Name of the runner. - - `runner_id`: ID of the runner assigned by GitHub. - - `online`: Whether GitHub marks this runner as online. - - `busy`: Whether GitHub marks this runner as busy. - - - - - ---- - -## class `RunnerNameByHealth` -Set of runners instance by health state. - - - -**Attributes:** - - - `healthy`: Runners that are correctly running runner script. - - `unhealthy`: Runners that are not running runner script. - - - - - ---- - -## class `RunnerStatus` -Status of runner. - - - -**Attributes:** - - - `runner_id`: ID of the runner. - - `exist`: Whether the runner instance exists on LXD. - - `online`: Whether GitHub marks this runner as online. - - `busy`: Whether GitHub marks this runner as busy. - - - - - diff --git a/src-docs/shared_fs.py.md b/src-docs/shared_fs.py.md deleted file mode 100644 index 2c5c8220e..000000000 --- a/src-docs/shared_fs.py.md +++ /dev/null @@ -1,139 +0,0 @@ - - - - -# module `shared_fs.py` -Classes and functions to operate on the shared filesystem between the charm and the runners. - -**Global Variables** ---------------- -- **DIR_NO_MOUNTPOINT_EXIT_CODE** -- **FILESYSTEM_OWNER** -- **FILESYSTEM_SIZE** - ---- - - - -## function `create` - -```python -create(runner_name: str) → MetricsStorage -``` - -Create a shared filesystem for the runner. - -The method is not idempotent and will raise an exception if the shared filesystem already exists. - - - -**Args:** - - - `runner_name`: The name of the runner. - - - -**Returns:** - The shared filesystem object. - - - -**Raises:** - - - `CreateMetricsStorageError`: If the creation of the shared filesystem fails. - - ---- - - - -## function `list_all` - -```python -list_all() → Iterator[MetricsStorage] -``` - -List all the metric storages. - - - -**Yields:** - A metrics storage object. - - ---- - - - -## function `get` - -```python -get(runner_name: str) → MetricsStorage -``` - -Get the shared filesystem for the runner. - -Mounts the filesystem if it is not currently mounted. - - - -**Args:** - - - `runner_name`: The name of the runner. - - - -**Returns:** - The shared filesystem object. - - - -**Raises:** - - - `GetMetricsStorageError`: If the shared filesystem could not be retrieved/mounted. - - ---- - - - -## function `delete` - -```python -delete(runner_name: str) → None -``` - -Delete the shared filesystem for the runner. - - - -**Args:** - - - `runner_name`: The name of the runner. - - - -**Raises:** - - - `DeleteMetricsStorageError`: If the shared filesystem could not be deleted. - - ---- - - - -## function `move_to_quarantine` - -```python -move_to_quarantine(runner_name: str) → None -``` - -Archive the mshared filesystem for the runner and delete it. - - - -**Args:** - - - `runner_name`: The name of the runner. - - diff --git a/src-docs/utilities.py.md b/src-docs/utilities.py.md index 75a3c0386..3ba494c15 100644 --- a/src-docs/utilities.py.md +++ b/src-docs/utilities.py.md @@ -74,37 +74,7 @@ Looks for all upper-case and all low-case of the `env_var`. --- - - -## function `bytes_with_unit_to_kib` - -```python -bytes_with_unit_to_kib(num_bytes: str) → int -``` - -Convert a positive integer followed by a unit to number of kibibytes. - - - -**Args:** - - - `num_bytes`: A positive integer followed by one of the following unit: KiB, MiB, GiB, TiB, PiB, EiB. - - - -**Raises:** - - - `ValueError`: If invalid unit was detected. - - - -**Returns:** - Number of kilobytes. - - ---- - - + ## function `remove_residual_venv_dirs` diff --git a/src/charm.py b/src/charm.py index 7cd8785ae..643f977ff 100755 --- a/src/charm.py +++ b/src/charm.py @@ -3,11 +3,8 @@ # Copyright 2024 Canonical Ltd. # See LICENSE file for licensing details. -# TODO: 2024-03-12 The module contains too many lines which are scheduled for refactoring. -# pylint: disable=too-many-lines - """Charm for creating and managing GitHub self-hosted runner instances.""" -from utilities import bytes_with_unit_to_kib, execute_command, remove_residual_venv_dirs, retry +from utilities import execute_command, remove_residual_venv_dirs # This is a workaround for https://bugs.launchpad.net/juju/+bug/2058335 # It is important that this is run before importation of any other modules. @@ -18,14 +15,8 @@ import functools import logging -import os -import secrets -import shutil -import urllib.error -from pathlib import Path -from typing import Any, Callable, Dict, Sequence, TypeVar - -import jinja2 +from typing import Any, Callable, Sequence, TypeVar + import ops from charms.data_platform_libs.v0.data_interfaces import DatabaseRequires from charms.grafana_agent.v0.cos_agent import COSAgentProvider @@ -49,7 +40,7 @@ from github_runner_manager.reactive.types_ import QueueConfig as ReactiveQueueConfig from github_runner_manager.reactive.types_ import RunnerConfig as ReactiveRunnerConfig from github_runner_manager.types_ import SystemUserConfig -from github_runner_manager.types_.github import GitHubPath, GitHubRunnerStatus, parse_github_path +from github_runner_manager.types_.github import GitHubPath from ops.charm import ( ActionEvent, CharmBase, @@ -67,36 +58,23 @@ import logrotate from charm_state import ( DEBUG_SSH_INTEGRATION_NAME, - GROUP_CONFIG_NAME, IMAGE_INTEGRATION_NAME, LABELS_CONFIG_NAME, PATH_CONFIG_NAME, RECONCILE_INTERVAL_CONFIG_NAME, - TEST_MODE_CONFIG_NAME, TOKEN_CONFIG_NAME, CharmConfigInvalidError, CharmState, - InstanceType, OpenstackImage, - ProxyConfig, - RunnerStorage, - VirtualMachineResources, ) from errors import ( ConfigurationError, LogrotateSetupError, MissingMongoDBError, - MissingRunnerBinaryError, - RunnerBinaryError, - RunnerError, SubprocessError, TokenError, ) from event_timer import EventTimer, TimerStatusError -from firewall import Firewall, FirewallEntry -from runner import LXD_PROFILE_YAML -from runner_manager import LXDRunnerManager, LXDRunnerManagerConfig -from runner_manager_type import LXDFlushMode # We assume a stuck reconcile event when it takes longer # than 10 times a normal interval. Currently, we are only aware of @@ -159,12 +137,6 @@ def func_with_catch_errors(self: "GithubRunnerCharm", event: EventT) -> None: except TokenError as err: logger.exception("Issue with GitHub token") self.unit.status = BlockedStatus(str(err)) - except MissingRunnerBinaryError: - logger.exception("Missing runner binary") - self.unit.status = MaintenanceStatus( - "GitHub runner application not downloaded; the charm will retry download on " - "reconcile interval" - ) except MissingMongoDBError as err: logger.exception("Missing integration data") self.unit.status = WaitingStatus(str(err)) @@ -198,41 +170,15 @@ def func_with_catch_errors(self: "GithubRunnerCharm", event: ActionEvent) -> Non logger.exception("Issue with charm configuration") self.unit.status = BlockedStatus(str(err)) event.fail(str(err)) - except MissingRunnerBinaryError: - logger.exception("Missing runner binary") - err_msg = ( - "GitHub runner application not downloaded; the charm will retry download on " - "reconcile interval" - ) - self.unit.status = MaintenanceStatus(err_msg) - event.fail(err_msg) return func_with_catch_errors class GithubRunnerCharm(CharmBase): - """Charm for managing GitHub self-hosted runners. - - Attributes: - service_token_path: The path to token to access local services. - repo_check_web_service_path: The path to repo-policy-compliance service directory. - repo_check_web_service_script: The path to repo-policy-compliance web service script. - repo_check_systemd_service: The path to repo-policy-compliance unit file. - juju_storage_path: The path to juju storage. - ram_pool_path: The path to memdisk storage. - kernel_module_path: The path to kernel modules. - """ + """Charm for managing GitHub self-hosted runners.""" _stored = StoredState() - service_token_path = Path("service_token") - repo_check_web_service_path = Path("/home/ubuntu/repo_policy_compliance_service") - repo_check_web_service_script = Path("scripts/repo_policy_compliance_service.py") - repo_check_systemd_service = Path("/etc/systemd/system/repo-policy-compliance.service") - juju_storage_path = Path("/storage/juju") - ram_pool_path = Path("/storage/ram") - kernel_module_path = Path("/etc/modules") - def __init__(self, *args: Any, **kwargs: Any) -> None: """Construct the charm. @@ -240,26 +186,16 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: args: List of arguments to be passed to the `CharmBase` class. kwargs: List of keyword arguments to be passed to the `CharmBase` class. - - Raises: - RuntimeError: If invalid test configuration was detected. """ super().__init__(*args, **kwargs) self._grafana_agent = COSAgentProvider(self) - self.service_token: str | None = None self._event_timer = EventTimer(self.unit.name) - if LXD_PROFILE_YAML.exists(): - if self.config.get(TEST_MODE_CONFIG_NAME) != "insecure": - raise RuntimeError("lxd-profile.yaml detected outside test mode") - logger.critical("test mode is enabled") - self._stored.set_default( path=self.config[PATH_CONFIG_NAME], # for detecting changes token=self.config[TOKEN_CONFIG_NAME], # for detecting changes labels=self.config[LABELS_CONFIG_NAME], # for detecting changes - runner_bin_url=None, ) self.on.define_event("reconcile_runners", ReconcileRunnersEvent) @@ -309,149 +245,9 @@ def _setup_state(self) -> CharmState: except CharmConfigInvalidError as exc: raise ConfigurationError(exc.msg) from exc - def _create_memory_storage(self, path: Path, size: int) -> None: - """Create a tmpfs-based LVM volume group. - - Args: - path: Path to directory for memory storage. - size: Size of the tmpfs in kilobytes. - - Raises: - RunnerError: Unable to setup storage for runner. - """ - if size <= 0: - return - - try: - # Create tmpfs if not exists, else resize it. - if not path.exists(): - path.mkdir(parents=True, exist_ok=True) - execute_command( - ["mount", "-t", "tmpfs", "-o", f"size={size}k", "tmpfs", str(path)] - ) - else: - execute_command(["mount", "-o", f"remount,size={size}k", str(path)]) - except (OSError, SubprocessError) as err: - logger.exception("Unable to setup storage directory") - # Remove the path if is not in use. If the tmpfs is in use, the removal will fail. - if path.exists(): - shutil.rmtree(path, ignore_errors=True) - path.rmdir() - logger.info("Cleaned up storage directory") - raise RunnerError("Failed to configure runner storage") from err - - @retry(tries=5, delay=5, max_delay=60, backoff=2, local_logger=logger) - def _ensure_runner_storage(self, size: int, runner_storage: RunnerStorage) -> Path: - """Ensure the runner storage is setup. - - Args: - size: Size of the storage needed in kibibytes. - runner_storage: Type of storage to use for virtual machine hosting the runners. - - Raises: - ConfigurationError: If there was an error with runner stoarge configuration. - - Returns: - Runner storage path. - """ - match runner_storage: - case RunnerStorage.MEMORY: - logger.info("Creating tmpfs storage") - path = self.ram_pool_path - self._create_memory_storage(self.ram_pool_path, size) - case RunnerStorage.JUJU_STORAGE: - path = self.juju_storage_path - - # tmpfs storage is not created if required size is 0. - if size > 0: - # Check if the storage mounted has enough space - disk = shutil.disk_usage(path) - # Some storage space might be used by existing runners. - if size * 1024 > disk.total: - raise ConfigurationError( - ( - f"Required disk space for runners {size / 1024}MiB is greater than " - f"storage total size {disk.total / 1024 / 1024}MiB" - ) - ) - return path - - @retry(tries=5, delay=5, max_delay=60, backoff=2, local_logger=logger) - def _ensure_service_health(self) -> None: - """Ensure services managed by the charm is healthy. - - Services managed include: - * repo-policy-compliance - - Raises: - SubprocessError: if there was an error starting repo-policy-compliance service. - """ - logger.info("Checking health of repo-policy-compliance service") - try: - execute_command(["/usr/bin/systemctl", "is-active", "repo-policy-compliance"]) - except SubprocessError: - logger.exception("Found inactive repo-policy-compliance service.") - execute_command(["/usr/bin/systemctl", "restart", "repo-policy-compliance"]) - logger.info("Restart repo-policy-compliance service") - raise - - def _get_runner_manager( - self, state: CharmState, token: str | None = None, path: GitHubPath | None = None - ) -> LXDRunnerManager: - """Get a RunnerManager instance. - - Args: - state: Charm state. - token: GitHub personal access token to manage the runners with. If None the token in - charm state is used. - path: GitHub repository path in the format '/', or the GitHub organization - name. If None the path in charm state is used. - - Returns: - An instance of RunnerManager. - """ - if token is None: - token = state.charm_config.token - if path is None: - path = state.charm_config.path - - self._ensure_service_health() - - size_in_kib = ( - bytes_with_unit_to_kib(state.runner_config.virtual_machine_resources.disk) - * state.runner_config.virtual_machines - ) - lxd_storage_path = self._ensure_runner_storage( - size_in_kib, state.runner_config.runner_storage - ) - - if self.service_token is None: - self.service_token = self._get_service_token() - - app_name, unit = self.unit.name.rsplit("/", 1) - - return LXDRunnerManager( - app_name, - unit, - LXDRunnerManagerConfig( - charm_state=state, - dockerhub_mirror=state.charm_config.dockerhub_mirror, - image=state.runner_config.base_image.value, - lxd_storage_path=lxd_storage_path, - path=path, - reactive_config=state.reactive_config, - service_token=self.service_token, - token=token, - ), - ) - - # Pending refactor for RunnerManager class which will unify logic for OpenStack and LXD. - def _common_install_code(self, state: CharmState) -> bool: # noqa: C901 + def _common_install_code(self) -> bool: """Installation code shared between install and upgrade hook. - Args: - state: The charm state instance. - Raises: LogrotateSetupError: Failed to setup logrotate. SubprocessError: Failed to install dependencies. @@ -477,104 +273,23 @@ def _common_install_code(self, state: CharmState) -> bool: # noqa: C901 logger.error("Failed to setup logrotate") raise - if state.instance_type == InstanceType.OPENSTACK: - return True - - self.unit.status = MaintenanceStatus("Installing packages") - try: - # The `_start_services`, `_install_deps` includes retry. - self._install_local_lxd_deps() - self._start_services(state.charm_config.token, state.proxy_config) - except SubprocessError: - logger.error("Failed to install or start local LXD runner dependencies") - raise - - self._refresh_firewall(state) - - runner_manager = self._get_runner_manager(state) - if not runner_manager.has_runner_image(): - self.unit.status = MaintenanceStatus("Building runner image") - runner_manager.build_runner_image() - runner_manager.schedule_build_runner_image() - - self._set_reconcile_timer() - - self.unit.status = MaintenanceStatus("Downloading runner binary") - try: - runner_info = runner_manager.get_latest_runner_bin_url() - logger.info( - "Downloading %s from: %s", runner_info["filename"], runner_info["download_url"] - ) - self._stored.runner_bin_url = runner_info["download_url"] - runner_manager.update_runner_bin(runner_info) - # Safe guard against transient unexpected error. - except RunnerBinaryError as err: - logger.exception("Failed to update runner binary") - # Failure to download runner binary is a transient error. - # The charm automatically update runner binary on a schedule. - self.unit.status = MaintenanceStatus(f"Failed to update runner binary: {err}") - return False - - self.unit.status = ActiveStatus() return True @catch_charm_errors def _on_install(self, _: InstallEvent) -> None: """Handle the installation of charm.""" - state = self._setup_state() - self._common_install_code(state) + self._common_install_code() @catch_charm_errors def _on_start(self, _: StartEvent) -> None: """Handle the start of the charm.""" state = self._setup_state() - if state.instance_type == InstanceType.OPENSTACK: - self.unit.status = MaintenanceStatus("Starting runners") - if not self._get_set_image_ready_status(): - return - runner_scaler = self._get_runner_scaler(state) - self._reconcile_openstack_runners(runner_scaler, state.runner_config.virtual_machines) - return - - runner_manager = self._get_runner_manager(state) - - self._check_and_update_local_lxd_dependencies( - runner_manager, state.charm_config.token, state.proxy_config - ) - self.unit.status = MaintenanceStatus("Starting runners") - try: - runner_manager.flush(LXDFlushMode.FLUSH_IDLE) - self._reconcile_lxd_runners( - runner_manager, - state.runner_config.virtual_machines, - state.runner_config.virtual_machine_resources, - ) - except RunnerError as err: - logger.exception("Failed to start runners") - self.unit.status = ActiveStatus(f"Failed to start runners: {err}") + if not self._get_set_image_ready_status(): return - - self.unit.status = ActiveStatus() - - def _update_kernel(self, now: bool = False) -> None: - """Update the Linux kernel if new version is available. - - Do nothing if no new version is available, else update the kernel and reboot. - This method should only call by event handlers, and not action handlers. As juju-reboot - only works with events. - - Args: - now: Whether the reboot should trigger at end of event handler or now. - """ - logger.info("Updating kernel (if available)") - self._apt_install(["linux-generic"]) - - _, exit_code = execute_command(["ls", "/var/run/reboot-required"], check_exit=False) - if exit_code == 0: - logger.info("Rebooting system...") - self.unit.reboot(now=now) + runner_scaler = self._get_runner_scaler(state) + self._reconcile_openstack_runners(runner_scaler, state.runner_config.virtual_machines) def _set_reconcile_timer(self) -> None: """Set the timer for regular reconciliation checks.""" @@ -615,141 +330,31 @@ def _log_juju_processes() -> None: @catch_charm_errors def _on_upgrade_charm(self, _: UpgradeCharmEvent) -> None: """Handle the update of charm.""" - state = self._setup_state() - logger.info("Reinstalling dependencies...") - if not self._common_install_code(state): - return - - if state.instance_type == InstanceType.OPENSTACK: - # No dependency upgrade needed for openstack. - # No need to flush runners as there was no dependency upgrade. - return - - runner_manager = self._get_runner_manager(state) - logger.info("Flushing the runners...") - runner_manager.flush(LXDFlushMode.FLUSH_BUSY_WAIT_REPO_CHECK) - self._reconcile_lxd_runners( - runner_manager, - state.runner_config.virtual_machines, - state.runner_config.virtual_machine_resources, - ) + self._common_install_code() - # Temporarily ignore too-complex since this is subject to refactor. @catch_charm_errors - def _on_config_changed(self, _: ConfigChangedEvent) -> None: # noqa: C901 + def _on_config_changed(self, _: ConfigChangedEvent) -> None: """Handle the configuration change.""" state = self._setup_state() self._set_reconcile_timer() - prev_config_for_flush: dict[str, str] = {} - should_flush_runners = False if state.charm_config.token != self._stored.token: - prev_config_for_flush[TOKEN_CONFIG_NAME] = str(self._stored.token) - self._start_services(state.charm_config.token, state.proxy_config) self._stored.token = None if self.config[PATH_CONFIG_NAME] != self._stored.path: - prev_config_for_flush[PATH_CONFIG_NAME] = parse_github_path( - self._stored.path, self.config[GROUP_CONFIG_NAME] - ) self._stored.path = self.config[PATH_CONFIG_NAME] if self.config[LABELS_CONFIG_NAME] != self._stored.labels: - should_flush_runners = True self._stored.labels = self.config[LABELS_CONFIG_NAME] - if prev_config_for_flush or should_flush_runners: - if state.instance_type != InstanceType.OPENSTACK: - prev_runner_manager = self._get_runner_manager( - state=state, **prev_config_for_flush - ) - if prev_runner_manager: - self.unit.status = MaintenanceStatus("Removing runners due to config change") - # Flush runner in case the prev token has expired. - prev_runner_manager.flush(LXDFlushMode.FORCE_FLUSH_WAIT_REPO_CHECK) state = self._setup_state() - if state.instance_type == InstanceType.OPENSTACK: - if not self._get_set_image_ready_status(): - return - if state.charm_config.token != self._stored.token: - runner_scaler = self._get_runner_scaler(state) - runner_scaler.flush(flush_mode=FlushMode.FLUSH_IDLE) - self._reconcile_openstack_runners( - runner_scaler, state.runner_config.virtual_machines - ) - # TODO: 2024-04-12: Flush on token changes. + if not self._get_set_image_ready_status(): return - - self._refresh_firewall(state) - - runner_manager = self._get_runner_manager(state) if state.charm_config.token != self._stored.token: - runner_manager.flush(LXDFlushMode.FORCE_FLUSH_WAIT_REPO_CHECK) - self._stored.token = state.charm_config.token - self._reconcile_lxd_runners( - runner_manager, - state.runner_config.virtual_machines, - state.runner_config.virtual_machine_resources, - ) - self.unit.status = ActiveStatus() - - def _check_and_update_local_lxd_dependencies( - self, runner_manager: LXDRunnerManager, token: str, proxy_config: ProxyConfig - ) -> bool: - """Check and update runner binary and services for local LXD runners. - - The runners are flushed if needed. - - Args: - runner_manager: RunnerManager used for finding the runner application to download. - token: GitHub personal access token for repo-policy-compliance to use. - proxy_config: Proxy configuration. - - Returns: - Whether the runner binary or the services was updated. - """ - self.unit.status = MaintenanceStatus("Checking for updates") - - # Check if the runner binary file exists. - if not runner_manager.check_runner_bin(): - self._stored.runner_bin_url = None - try: - self.unit.status = MaintenanceStatus("Checking for runner binary updates") - runner_info = runner_manager.get_latest_runner_bin_url() - except urllib.error.URLError as err: - logger.exception("Failed to check for runner updates") - # Failure to download runner binary is a transient error. - # The charm automatically update runner binary on a schedule. - self.unit.status = MaintenanceStatus(f"Failed to check for runner updates: {err}") - return False - - logger.debug( - "Current runner binary URL: %s, Queried runner binary URL: %s", - self._stored.runner_bin_url, - runner_info.download_url, - ) - runner_bin_updated = False - if runner_info.download_url != self._stored.runner_bin_url: - self.unit.status = MaintenanceStatus("Updating runner binary") - runner_manager.update_runner_bin(runner_info) - self._stored.runner_bin_url = runner_info.download_url - runner_bin_updated = True - - self.unit.status = MaintenanceStatus("Checking for service updates") - service_updated = self._install_repo_policy_compliance(proxy_config) - - if service_updated or runner_bin_updated: - logger.info( - "Flushing runner due to: service updated=%s, runner binary update=%s", - service_updated, - runner_bin_updated, - ) - self.unit.status = MaintenanceStatus("Flushing runners due to updated deps") - runner_manager.flush(LXDFlushMode.FLUSH_IDLE_WAIT_REPO_CHECK) - self._start_services(token, proxy_config) - - self.unit.status = ActiveStatus() - return service_updated or runner_bin_updated + runner_scaler = self._get_runner_scaler(state) + runner_scaler.flush(flush_mode=FlushMode.FLUSH_IDLE) + self._reconcile_openstack_runners(runner_scaler, state.runner_config.virtual_machines) + # TODO: 2024-04-12: Flush on token changes. @catch_charm_errors def _on_reconcile_runners(self, _: ReconcileRunnersEvent) -> None: @@ -771,30 +376,10 @@ def _trigger_reconciliation(self) -> None: self.unit.status = MaintenanceStatus("Reconciling runners") state = self._setup_state() - if state.instance_type == InstanceType.OPENSTACK: - if not self._get_set_image_ready_status(): - return - runner_scaler = self._get_runner_scaler(state) - self._reconcile_openstack_runners(runner_scaler, state.runner_config.virtual_machines) + if not self._get_set_image_ready_status(): return - - runner_manager = self._get_runner_manager(state) - - self._check_and_update_local_lxd_dependencies( - runner_manager, state.charm_config.token, state.proxy_config - ) - - runner_info = runner_manager.get_github_info() - if all(not info.busy for info in runner_info): - self._update_kernel(now=True) - - self._reconcile_lxd_runners( - runner_manager, - state.runner_config.virtual_machines, - state.runner_config.virtual_machine_resources, - ) - - self.unit.status = ActiveStatus() + runner_scaler = self._get_runner_scaler(state) + self._reconcile_openstack_runners(runner_scaler, state.runner_config.virtual_machines) @catch_action_errors def _on_check_runners_action(self, event: ActionEvent) -> None: @@ -803,49 +388,18 @@ def _on_check_runners_action(self, event: ActionEvent) -> None: Args: event: The event fired on check_runners action. """ - online = 0 - offline = 0 - unknown = 0 - runner_names = [] - state = self._setup_state() - if state.instance_type == InstanceType.OPENSTACK: - runner_scaler = self._get_runner_scaler(state) - info = runner_scaler.get_runner_info() - event.set_results( - { - "online": info.online, - "busy": info.busy, - "offline": info.offline, - "unknown": info.unknown, - "runners": info.runners, - "busy-runners": info.busy_runners, - } - ) - return - - runner_manager = self._get_runner_manager(state) - if runner_manager.runner_bin_path is None: - event.fail("Missing runner binary") - return - - runner_info = runner_manager.get_github_info() - for runner in runner_info: - if runner.status == GitHubRunnerStatus.ONLINE.value: - online += 1 - runner_names.append(runner.name) - elif runner.status == GitHubRunnerStatus.OFFLINE.value: - offline += 1 - else: - # might happen if runner dies and GH doesn't notice immediately - unknown += 1 + runner_scaler = self._get_runner_scaler(state) + info = runner_scaler.get_runner_info() event.set_results( { - "online": online, - "offline": offline, - "unknown": unknown, - "runners": ", ".join(runner_names), + "online": info.online, + "busy": info.busy, + "offline": info.offline, + "unknown": info.unknown, + "runners": info.runners, + "busy-runners": info.busy_runners, } ) @@ -859,39 +413,22 @@ def _on_reconcile_runners_action(self, event: ActionEvent) -> None: self.unit.status = MaintenanceStatus("Reconciling runners") state = self._setup_state() - if state.instance_type == InstanceType.OPENSTACK: - if not self._get_set_image_ready_status(): - event.fail("Openstack image not yet provided/ready.") - return - runner_scaler = self._get_runner_scaler(state) - - self.unit.status = MaintenanceStatus("Reconciling runners") - try: - delta = runner_scaler.reconcile(state.runner_config.virtual_machines) - except ReconcileError: - logger.exception(FAILED_TO_RECONCILE_RUNNERS_MSG) - self.unit.status = ActiveStatus(ACTIVE_STATUS_RECONCILIATION_FAILED_MSG) - event.fail(FAILED_RECONCILE_ACTION_ERR_MSG) - return - - self.unit.status = ActiveStatus() - event.set_results({"delta": {"virtual-machines": delta}}) + if not self._get_set_image_ready_status(): + event.fail("Openstack image not yet provided/ready.") return + runner_scaler = self._get_runner_scaler(state) - runner_manager = self._get_runner_manager(state) - - self._check_and_update_local_lxd_dependencies( - runner_manager, state.charm_config.token, state.proxy_config - ) + self.unit.status = MaintenanceStatus("Reconciling runners") + try: + delta = runner_scaler.reconcile(state.runner_config.virtual_machines) + except ReconcileError: + logger.exception(FAILED_TO_RECONCILE_RUNNERS_MSG) + self.unit.status = ActiveStatus(ACTIVE_STATUS_RECONCILIATION_FAILED_MSG) + event.fail(FAILED_RECONCILE_ACTION_ERR_MSG) + return - delta = self._reconcile_lxd_runners( - runner_manager, - state.runner_config.virtual_machines, - state.runner_config.virtual_machine_resources, - ) self.unit.status = ActiveStatus() - self._on_check_runners_action(event) - event.set_results(delta) + event.set_results({"delta": {"virtual-machines": delta}}) @catch_action_errors def _on_flush_runners_action(self, event: ActionEvent) -> None: @@ -902,31 +439,19 @@ def _on_flush_runners_action(self, event: ActionEvent) -> None: """ state = self._setup_state() - if state.instance_type == InstanceType.OPENSTACK: - # Flushing mode not implemented for OpenStack yet. - runner_scaler = self._get_runner_scaler(state) - flushed = runner_scaler.flush(flush_mode=FlushMode.FLUSH_IDLE) - logger.info("Flushed %s runners", flushed) - self.unit.status = MaintenanceStatus("Reconciling runners") - try: - delta = runner_scaler.reconcile(state.runner_config.virtual_machines) - except ReconcileError: - logger.exception(FAILED_TO_RECONCILE_RUNNERS_MSG) - self.unit.status = ActiveStatus(ACTIVE_STATUS_RECONCILIATION_FAILED_MSG) - event.fail(FAILED_RECONCILE_ACTION_ERR_MSG) - return - self.unit.status = ActiveStatus() - event.set_results({"delta": {"virtual-machines": delta}}) + # Flushing mode not implemented for OpenStack yet. + runner_scaler = self._get_runner_scaler(state) + flushed = runner_scaler.flush(flush_mode=FlushMode.FLUSH_IDLE) + logger.info("Flushed %s runners", flushed) + self.unit.status = MaintenanceStatus("Reconciling runners") + try: + delta = runner_scaler.reconcile(state.runner_config.virtual_machines) + except ReconcileError: + logger.exception(FAILED_TO_RECONCILE_RUNNERS_MSG) + self.unit.status = ActiveStatus(ACTIVE_STATUS_RECONCILIATION_FAILED_MSG) + event.fail(FAILED_RECONCILE_ACTION_ERR_MSG) return - - runner_manager = self._get_runner_manager(state) - - runner_manager.flush(LXDFlushMode.FLUSH_BUSY_WAIT_REPO_CHECK) - delta = self._reconcile_lxd_runners( - runner_manager, - state.runner_config.virtual_machines, - state.runner_config.virtual_machine_resources, - ) + self.unit.status = ActiveStatus() event.set_results({"delta": {"virtual-machines": delta}}) @catch_action_errors @@ -936,17 +461,8 @@ def _on_update_dependencies_action(self, event: ActionEvent) -> None: Args: event: Action event of updating dependencies. """ - state = self._setup_state() - if state.instance_type == InstanceType.OPENSTACK: - # No dependencies managed by the charm for OpenStack-based runners. - event.set_results({"flush": False}) - return - - runner_manager = self._get_runner_manager(state) - flushed = self._check_and_update_local_lxd_dependencies( - runner_manager, state.charm_config.token, state.proxy_config - ) - event.set_results({"flush": flushed}) + # No dependencies managed by the charm for OpenStack-based runners. + event.set_results({"flush": False}) @catch_charm_errors def _on_update_status(self, _: UpdateStatusEvent) -> None: @@ -959,43 +475,8 @@ def _on_stop(self, _: StopEvent) -> None: """Handle the stopping of the charm.""" self._event_timer.disable_event_timer("reconcile-runners") state = self._setup_state() - - if state.instance_type == InstanceType.OPENSTACK: - runner_scaler = self._get_runner_scaler(state) - runner_scaler.flush(FlushMode.FLUSH_BUSY) - return - - runner_manager = self._get_runner_manager(state) - runner_manager.flush(LXDFlushMode.FLUSH_BUSY) - - def _reconcile_lxd_runners( - self, runner_manager: LXDRunnerManager, num: int, resources: VirtualMachineResources - ) -> Dict[str, Any]: - """Reconcile the current runners state and intended runner state for LXD mode. - - Args: - runner_manager: For querying and managing the runner state. - num: Target number of virtual machines. - resources: Target resource for each virtual machine. - - Raises: - MissingRunnerBinaryError: If the runner binary is not found. - - Returns: - Changes in runner number due to reconciling runners. - """ - if not LXDRunnerManager.runner_bin_path.is_file(): - logger.warning("Unable to reconcile due to missing runner binary") - raise MissingRunnerBinaryError("Runner binary not found.") - - self.unit.status = MaintenanceStatus("Reconciling runners") - delta_virtual_machines = runner_manager.reconcile( - num, - resources, - ) - - self.unit.status = ActiveStatus() - return {"delta": {"virtual-machines": delta_virtual_machines}} + runner_scaler = self._get_runner_scaler(state) + runner_scaler.flush(FlushMode.FLUSH_BUSY) def _reconcile_openstack_runners(self, runner_scaler: RunnerScaler, num: int) -> None: """Reconcile the current runners state and intended runner state for OpenStack mode. @@ -1013,199 +494,11 @@ def _reconcile_openstack_runners(self, runner_scaler: RunnerScaler, num: int) -> else: self.unit.status = ActiveStatus() - def _install_repo_policy_compliance(self, proxy_config: ProxyConfig) -> bool: - """Install latest version of repo_policy_compliance service. - - Args: - proxy_config: Proxy configuration. - - Returns: - Whether version install is changed. Going from not installed to - installed will return True. - """ - # Prepare environment variables for pip subprocess - env = {} - if http_proxy := proxy_config.http: - env["HTTP_PROXY"] = http_proxy - env["http_proxy"] = http_proxy - if https_proxy := proxy_config.https: - env["HTTPS_PROXY"] = https_proxy - env["https_proxy"] = https_proxy - if no_proxy := proxy_config.no_proxy: - env["NO_PROXY"] = no_proxy - env["no_proxy"] = no_proxy - - old_version = execute_command( - [ - "/usr/bin/python3", - "-m", - "pip", - "show", - "repo-policy-compliance", - ], - check_exit=False, - ) - - execute_command( - [ - "/usr/bin/python3", - "-m", - "pip", - "install", - "--upgrade", - "git+https://github.com/canonical/repo-policy-compliance@main", - ], - env=env, - ) - - new_version = execute_command( - [ - "/usr/bin/python3", - "-m", - "pip", - "show", - "repo-policy-compliance", - ], - check_exit=False, - ) - return old_version != new_version - - def _enable_kernel_modules(self) -> None: - """Enable kernel modules needed by the charm.""" - execute_command(["/usr/sbin/modprobe", "br_netfilter"]) - with self.kernel_module_path.open("a", encoding="utf-8") as modules_file: - modules_file.write("br_netfilter\n") - def _install_deps(self) -> None: """Install dependences for the charm.""" logger.info("Installing charm dependencies.") self._apt_install(["run-one"]) - @retry(tries=5, delay=5, max_delay=60, backoff=2, local_logger=logger) - def _install_local_lxd_deps(self) -> None: - """Install dependencies for running local LXD runners.""" - state = self._setup_state() - - logger.info("Installing local LXD runner dependencies.") - # Snap and Apt will use any proxies configured in the Juju model. - # Binding for snap, apt, and lxd init commands are not available so subprocess.run used. - # Install dependencies used by repo-policy-compliance and the firewall - self._apt_install(["gunicorn", "python3-pip", "nftables"]) - # Install repo-policy-compliance package - self._install_repo_policy_compliance(state.proxy_config) - execute_command( - ["/usr/bin/apt-get", "remove", "-qy", "lxd", "lxd-client"], check_exit=False - ) - self._apt_install( - [ - "cpu-checker", - "libvirt-clients", - "libvirt-daemon-driver-qemu", - "apparmor-utils", - ], - ) - execute_command(["/usr/bin/snap", "install", "lxd", "--channel=latest/stable"]) - execute_command(["/usr/bin/snap", "refresh", "lxd", "--channel=latest/stable"]) - # Add ubuntu user to lxd group, to allow building images with ubuntu user - execute_command(["/usr/sbin/usermod", "-aG", "lxd", "ubuntu"]) - execute_command(["/snap/bin/lxd", "waitready"]) - execute_command(["/snap/bin/lxd", "init", "--auto"]) - execute_command(["/snap/bin/lxc", "network", "set", "lxdbr0", "ipv6.address", "none"]) - execute_command(["/snap/bin/lxd", "waitready"]) - if not LXD_PROFILE_YAML.exists(): - self._enable_kernel_modules() - execute_command( - [ - "/snap/bin/lxc", - "profile", - "device", - "set", - "default", - "eth0", - "security.ipv4_filtering=true", - "security.ipv6_filtering=true", - "security.mac_filtering=true", - "security.port_isolation=true", - ] - ) - logger.info("Finished installing local LXD runner dependencies.") - - @retry(tries=5, delay=5, max_delay=60, backoff=2, local_logger=logger) - def _start_services(self, token: str, proxy_config: ProxyConfig) -> None: - """Ensure all services managed by the charm is running. - - Args: - token: GitHub personal access token for repo-policy-compliance to use. - proxy_config: Proxy configuration. - """ - logger.info("Starting charm services...") - - if self.service_token is None: - self.service_token = self._get_service_token() - - # Move script to home directory - logger.info("Loading the repo policy compliance flask app...") - os.makedirs(self.repo_check_web_service_path, exist_ok=True) - shutil.copyfile( - self.repo_check_web_service_script, - self.repo_check_web_service_path / "app.py", - ) - - # Move the systemd service. - logger.info("Loading the repo policy compliance gunicorn systemd service...") - environment = jinja2.Environment( - loader=jinja2.FileSystemLoader("templates"), autoescape=True - ) - - service_content = environment.get_template("repo-policy-compliance.service.j2").render( - working_directory=str(self.repo_check_web_service_path), - charm_token=self.service_token, - github_token=token, - proxies=proxy_config, - ) - self.repo_check_systemd_service.write_text(service_content, encoding="utf-8") - - execute_command(["/usr/bin/systemctl", "daemon-reload"]) - execute_command(["/usr/bin/systemctl", "restart", "repo-policy-compliance"]) - execute_command(["/usr/bin/systemctl", "enable", "repo-policy-compliance"]) - - logger.info("Finished starting charm services") - - def _get_service_token(self) -> str: - """Get the service token. - - Returns: - The service token. - """ - logger.info("Getting the secret token...") - if self.service_token_path.exists(): - logger.info("Found existing token file.") - service_token = self.service_token_path.read_text(encoding="utf-8") - else: - logger.info("Generate new token.") - service_token = secrets.token_hex(16) - self.service_token_path.write_text(service_token, encoding="utf-8") - return service_token - - def _refresh_firewall(self, state: CharmState) -> None: - """Refresh the firewall configuration and rules. - - Args: - state: Charm state. - """ - # Temp: Monitor the LXD networks to track down issues with missing network. - logger.info(execute_command(["/snap/bin/lxc", "network", "list", "--format", "json"])) - - allowlist = [ - FirewallEntry.decode(str(entry.host)) for entry in state.ssh_debug_connections - ] - firewall = Firewall("lxdbr0") - firewall.refresh_firewall(denylist=state.charm_config.denylist, allowlist=allowlist) - logger.debug( - "firewall update, current firewall: %s", - execute_command(["/usr/sbin/nft", "list", "ruleset"]), - ) - def _apt_install(self, packages: Sequence[str]) -> None: """Execute apt install command. @@ -1227,37 +520,20 @@ def _on_debug_ssh_relation_changed(self, _: ops.RelationChangedEvent) -> None: """Handle debug ssh relation changed event.""" state = self._setup_state() - if state.instance_type == InstanceType.OPENSTACK: - if not self._get_set_image_ready_status(): - return - runner_scaler = self._get_runner_scaler(state) - runner_scaler.flush() - try: - runner_scaler.reconcile(state.runner_config.virtual_machines) - except ReconcileError: - logger.exception(FAILED_TO_RECONCILE_RUNNERS_MSG) + if not self._get_set_image_ready_status(): return - - self._refresh_firewall(state) - runner_manager = self._get_runner_manager(state) - runner_manager.flush(LXDFlushMode.FLUSH_IDLE) - self._reconcile_lxd_runners( - runner_manager, - state.runner_config.virtual_machines, - state.runner_config.virtual_machine_resources, - ) + runner_scaler = self._get_runner_scaler(state) + runner_scaler.flush() + try: + runner_scaler.reconcile(state.runner_config.virtual_machines) + except ReconcileError: + logger.exception(FAILED_TO_RECONCILE_RUNNERS_MSG) @catch_charm_errors def _on_image_relation_joined(self, _: ops.RelationJoinedEvent) -> None: """Handle image relation joined event.""" state = self._setup_state() - if state.instance_type != InstanceType.OPENSTACK: - self.unit.status = BlockedStatus( - "Openstack mode not enabled. Please remove the image integration." - ) - return - clouds_yaml = state.charm_config.openstack_clouds_yaml cloud = list(clouds_yaml["clouds"].keys())[0] auth_map = clouds_yaml["clouds"][cloud]["auth"] @@ -1270,11 +546,6 @@ def _on_image_relation_changed(self, _: ops.RelationChangedEvent) -> None: state = self._setup_state() self.unit.status = MaintenanceStatus("Update image for runners") - if state.instance_type != InstanceType.OPENSTACK: - self.unit.status = BlockedStatus( - "Openstack mode not enabled. Please remove the image integration." - ) - return if not self._get_set_image_ready_status(): return diff --git a/src/charm_state.py b/src/charm_state.py index ba4e8731b..ea8c69c31 100644 --- a/src/charm_state.py +++ b/src/charm_state.py @@ -1,10 +1,6 @@ # Copyright 2024 Canonical Ltd. # See LICENSE file for licensing details. -# TODO: 2024-06-26 The charm contains a lot of states and configuration. The upcoming refactor will -# split each/related class to a file. -# pylint: disable=too-many-lines - """State of the Charm.""" import dataclasses @@ -14,7 +10,7 @@ import re from enum import Enum from pathlib import Path -from typing import NamedTuple, Optional, TypedDict, cast +from typing import Optional, TypedDict, cast from urllib.parse import urlsplit import yaml @@ -33,14 +29,8 @@ ) from errors import MissingMongoDBError -from firewall import FirewallEntry from utilities import get_env_var -REACTIVE_MODE_NOT_SUPPORTED_WITH_LXD_ERR_MSG = ( - "Reactive mode not supported for local LXD instances. " - "Please remove the mongodb integration." -) - logger = logging.getLogger(__name__) ARCHITECTURES_ARM64 = {"aarch64", "arm64"} @@ -48,8 +38,6 @@ CHARM_STATE_PATH = Path("charm_state.json") -BASE_IMAGE_CONFIG_NAME = "base-image" -DENYLIST_CONFIG_NAME = "denylist" DOCKERHUB_MIRROR_CONFIG_NAME = "dockerhub-mirror" GROUP_CONFIG_NAME = "group" LABELS_CONFIG_NAME = "labels" @@ -61,16 +49,12 @@ # bandit thinks this is a hardcoded password REPO_POLICY_COMPLIANCE_TOKEN_CONFIG_NAME = "repo-policy-compliance-token" # nosec REPO_POLICY_COMPLIANCE_URL_CONFIG_NAME = "repo-policy-compliance-url" -RUNNER_STORAGE_CONFIG_NAME = "runner-storage" SENSITIVE_PLACEHOLDER = "*****" TEST_MODE_CONFIG_NAME = "test-mode" # bandit thinks this is a hardcoded password. TOKEN_CONFIG_NAME = "token" # nosec USE_APROXY_CONFIG_NAME = "experimental-use-aproxy" VIRTUAL_MACHINES_CONFIG_NAME = "virtual-machines" -VM_CPU_CONFIG_NAME = "vm-cpu" -VM_MEMORY_CONFIG_NAME = "vm-memory" -VM_DISK_CONFIG_NAME = "vm-disk" # Integration names COS_AGENT_INTEGRATION_NAME = "cos-agent" @@ -134,20 +118,6 @@ def from_charm(cls, charm: CharmBase) -> "GithubConfig": return cls(token=cast(str, token), path=path) -class VirtualMachineResources(NamedTuple): - """Virtual machine resource configuration. - - Attributes: - cpu: Number of vCPU for the virtual machine. - memory: Amount of memory for the virtual machine. - disk: Amount of disk for the virtual machine. - """ - - cpu: int - memory: StorageSize - disk: StorageSize - - class Arch(str, Enum): """Supported system architectures. @@ -160,30 +130,6 @@ class Arch(str, Enum): X64 = "x64" -class RunnerStorage(str, Enum): - """Supported storage as runner disk. - - Attributes: - JUJU_STORAGE: Represents runner storage from Juju storage. - MEMORY: Represents tempfs storage (ramdisk). - """ - - JUJU_STORAGE = "juju-storage" - MEMORY = "memory" - - -class InstanceType(str, Enum): - """Type of instance for runner. - - Attributes: - LOCAL_LXD: LXD instance on the local juju machine. - OPENSTACK: OpenStack instance on a cloud. - """ - - LOCAL_LXD = "local_lxd" - OPENSTACK = "openstack" - - class CharmConfigInvalidError(Exception): """Raised when charm config is invalid. @@ -336,7 +282,6 @@ class CharmConfig(BaseModel): Some charm configurations are grouped into other configuration models. Attributes: - denylist: List of IPv4 to block the runners from accessing. dockerhub_mirror: Private docker registry as dockerhub mirror for the runners to use. labels: Additional runner labels to append to default (i.e. os, flavor, architecture). openstack_clouds_yaml: The openstack clouds.yaml configuration. @@ -347,31 +292,14 @@ class CharmConfig(BaseModel): token: GitHub personal access token for GitHub API. """ - denylist: list[FirewallEntry] dockerhub_mirror: AnyHttpsUrl | None labels: tuple[str, ...] - openstack_clouds_yaml: OpenStackCloudsYAML | None + openstack_clouds_yaml: OpenStackCloudsYAML path: GitHubPath reconcile_interval: int repo_policy_compliance: RepoPolicyComplianceConfig | None token: str - @classmethod - def _parse_denylist(cls, charm: CharmBase) -> list[FirewallEntry]: - """Read charm denylist configuration and parse it into firewall deny entries. - - Args: - charm: The charm instance. - - Returns: - The firewall deny entries. - """ - denylist_str = cast(str, charm.config.get(DENYLIST_CONFIG_NAME, "")) - - entry_list = [entry.strip() for entry in denylist_str.split(",")] - denylist = [FirewallEntry.decode(entry) for entry in entry_list if entry] - return denylist - @classmethod def _parse_dockerhub_mirror(cls, charm: CharmBase) -> str | None: """Parse and validate dockerhub mirror URL. @@ -405,7 +333,7 @@ def _parse_dockerhub_mirror(cls, charm: CharmBase) -> str | None: return dockerhub_mirror @classmethod - def _parse_openstack_clouds_config(cls, charm: CharmBase) -> OpenStackCloudsYAML | None: + def _parse_openstack_clouds_config(cls, charm: CharmBase) -> OpenStackCloudsYAML: """Parse and validate openstack clouds yaml config value. Args: @@ -421,7 +349,7 @@ def _parse_openstack_clouds_config(cls, charm: CharmBase) -> OpenStackCloudsYAML str, charm.config.get(OPENSTACK_CLOUDS_YAML_CONFIG_NAME) ) if not openstack_clouds_yaml_str: - return None + raise CharmConfigInvalidError("No openstack_clouds_yaml") try: openstack_clouds_yaml: OpenStackCloudsYAML = yaml.safe_load( @@ -489,7 +417,6 @@ def from_charm(cls, charm: CharmBase) -> "CharmConfig": f"The {RECONCILE_INTERVAL_CONFIG_NAME} config must be int" ) from err - denylist = cls._parse_denylist(charm) dockerhub_mirror = cast(str, charm.config.get(DOCKERHUB_MIRROR_CONFIG_NAME, "")) or None openstack_clouds_yaml = cls._parse_openstack_clouds_config(charm) @@ -510,7 +437,6 @@ def from_charm(cls, charm: CharmBase) -> "CharmConfig": # pydantic allows to pass str as AnyHttpUrl, mypy complains about it return cls( - denylist=denylist, dockerhub_mirror=dockerhub_mirror, # type: ignore labels=labels, openstack_clouds_yaml=openstack_clouds_yaml, @@ -521,44 +447,6 @@ def from_charm(cls, charm: CharmBase) -> "CharmConfig": ) -LTS_IMAGE_VERSION_TAG_MAP = {"22.04": "jammy", "24.04": "noble"} - - -class BaseImage(str, Enum): - """The ubuntu OS base image to build and deploy runners on. - - Attributes: - JAMMY: The jammy ubuntu LTS image. - NOBLE: The noble ubuntu LTS image. - """ - - JAMMY = "jammy" - NOBLE = "noble" - - def __str__(self) -> str: - """Interpolate to string value. - - Returns: - The enum string value. - """ - return self.value - - @classmethod - def from_charm(cls, charm: CharmBase) -> "BaseImage": - """Retrieve the base image tag from charm. - - Args: - charm: The charm instance. - - Returns: - The base image configuration of the charm. - """ - image_name = cast(str, charm.config.get(BASE_IMAGE_CONFIG_NAME, "jammy")).lower().strip() - if image_name in LTS_IMAGE_VERSION_TAG_MAP: - return cls(LTS_IMAGE_VERSION_TAG_MAP[image_name]) - return cls(image_name) - - class OpenstackImage(BaseModel): """OpenstackImage from image builder relation data. @@ -645,129 +533,7 @@ def from_charm(cls, charm: CharmBase) -> "OpenstackRunnerConfig": ) -class LocalLxdRunnerConfig(BaseModel): - """Runner configurations for local LXD instances. - - Attributes: - base_image: The ubuntu base image to run the runner virtual machines on. - virtual_machines: Number of virtual machine-based runner to spawn. - virtual_machine_resources: Hardware resource used by one virtual machine for a runner. - runner_storage: Storage to be used as disk for the runner. - """ - - base_image: BaseImage - virtual_machines: int - virtual_machine_resources: VirtualMachineResources - runner_storage: RunnerStorage - - @classmethod - def from_charm(cls, charm: CharmBase) -> "LocalLxdRunnerConfig": - """Initialize the config from charm. - - Args: - charm: The charm instance. - - Raises: - CharmConfigInvalidError: if an invalid runner charm config has been set on the charm. - - Returns: - Local LXD runner config of the charm. - """ - try: - base_image = BaseImage.from_charm(charm) - except ValueError as err: - raise CharmConfigInvalidError("Invalid base image") from err - - try: - runner_storage = RunnerStorage(charm.config[RUNNER_STORAGE_CONFIG_NAME]) - except ValueError as err: - raise CharmConfigInvalidError( - f"Invalid {RUNNER_STORAGE_CONFIG_NAME} configuration" - ) from err - except CharmConfigInvalidError as exc: - raise CharmConfigInvalidError(f"Invalid runner storage config, {str(exc)}") from exc - - try: - virtual_machines = int(charm.config[VIRTUAL_MACHINES_CONFIG_NAME]) - except ValueError as err: - raise CharmConfigInvalidError( - f"The {VIRTUAL_MACHINES_CONFIG_NAME} configuration must be int" - ) from err - - try: - cpu = int(charm.config[VM_CPU_CONFIG_NAME]) - except ValueError as err: - raise CharmConfigInvalidError(f"Invalid {VM_CPU_CONFIG_NAME} configuration") from err - - virtual_machine_resources = VirtualMachineResources( - cpu, - cast(str, charm.config[VM_MEMORY_CONFIG_NAME]), - cast(str, charm.config[VM_DISK_CONFIG_NAME]), - ) - - return cls( - base_image=base_image, - virtual_machines=virtual_machines, - virtual_machine_resources=virtual_machine_resources, - runner_storage=runner_storage, - ) - - @validator("virtual_machines") - @classmethod - def check_virtual_machines(cls, virtual_machines: int) -> int: - """Validate the virtual machines configuration value. - - Args: - virtual_machines: The virtual machines value to validate. - - Raises: - ValueError: if a negative integer was passed. - - Returns: - Validated virtual_machines value. - """ - if virtual_machines < 0: - raise ValueError( - f"The {VIRTUAL_MACHINES_CONFIG_NAME} configuration needs to be greater or equal " - "to 0" - ) - - return virtual_machines - - @validator("virtual_machine_resources") - @classmethod - def check_virtual_machine_resources( - cls, vm_resources: VirtualMachineResources - ) -> VirtualMachineResources: - """Validate the virtual_machine_resources field values. - - Args: - vm_resources: the virtual_machine_resources value to validate. - - Raises: - ValueError: if an invalid number of cpu was given or invalid memory/disk size was - given. - - Returns: - The validated virtual_machine_resources value. - """ - if vm_resources.cpu < 1: - raise ValueError(f"The {VM_CPU_CONFIG_NAME} configuration needs to be greater than 0") - if not _valid_storage_size_str(vm_resources.memory): - raise ValueError( - f"Invalid format for {VM_MEMORY_CONFIG_NAME} configuration, must be int with unit " - "(e.g. MiB, GiB)" - ) - if not _valid_storage_size_str(vm_resources.disk): - raise ValueError( - f"Invalid format for {VM_DISK_CONFIG_NAME} configuration, must be int with unit " - "(e.g., MiB, GiB)" - ) - - return vm_resources - - -RunnerConfig = OpenstackRunnerConfig | LocalLxdRunnerConfig +RunnerConfig = OpenstackRunnerConfig class ProxyConfig(BaseModel): @@ -1000,18 +766,6 @@ def from_database(cls, database: DatabaseRequires) -> "ReactiveConfig | None": ) -class ImmutableConfigChangedError(Exception): - """Represents an error when changing immutable charm state.""" - - def __init__(self, msg: str): - """Initialize a new instance of the ImmutableConfigChangedError exception. - - Args: - msg: Explanation of the error. - """ - self.msg = msg - - # Charm State is a list of all the configurations and states of the charm and # has therefore a lot of attributes. @dataclasses.dataclass(frozen=True) @@ -1023,7 +777,6 @@ class CharmState: # pylint: disable=too-many-instance-attributes charm_config: Configuration of the juju charm. is_metrics_logging_available: Whether the charm is able to issue metrics. proxy_config: Proxy-related configuration. - instance_type: The type of instances, e.g., local lxd, openstack. reactive_config: The charm configuration related to reactive spawning mode. runner_config: The charm configuration related to runner VM configuration. ssh_debug_connections: SSH debug connections configuration information. @@ -1032,7 +785,6 @@ class CharmState: # pylint: disable=too-many-instance-attributes arch: Arch is_metrics_logging_available: bool proxy_config: ProxyConfig - instance_type: InstanceType charm_config: CharmConfig runner_config: RunnerConfig reactive_config: ReactiveConfig | None @@ -1058,56 +810,6 @@ def _store_state(cls, state: "CharmState") -> None: json_data = json.dumps(state_dict, ensure_ascii=False) CHARM_STATE_PATH.write_text(json_data, encoding="utf-8") - @classmethod - def _check_immutable_config_change( - cls, runner_storage: RunnerStorage, base_image: BaseImage - ) -> None: - """Ensure immutable config has not changed. - - Args: - runner_storage: The current runner_storage configuration. - base_image: The current base_image configuration. - - Raises: - ImmutableConfigChangedError: If an immutable configuration has changed. - """ - if not CHARM_STATE_PATH.exists(): - return - - json_data = CHARM_STATE_PATH.read_text(encoding="utf-8") - prev_state = json.loads(json_data) - - cls._log_prev_state(prev_state) - - try: - if prev_state["runner_config"]["runner_storage"] != runner_storage: - logger.error( - "Storage option changed from %s to %s, blocking the charm", - prev_state["runner_config"]["runner_storage"], - runner_storage, - ) - raise ImmutableConfigChangedError( - msg=( - "runner-storage config cannot be changed after deployment, " - "redeploy if needed" - ) - ) - except KeyError as exc: - logger.info("Key %s not found, this will be updated to current config.", exc.args[0]) - - try: - if prev_state["runner_config"]["base_image"] != base_image.value: - logger.error( - "Base image option changed from %s to %s, blocking the charm", - prev_state["runner_config"]["base_image"], - runner_storage, - ) - raise ImmutableConfigChangedError( - msg="base-image config cannot be changed after deployment, redeploy if needed" - ) - except KeyError as exc: - logger.info("Key %s not found, this will be updated to current config.", exc.args[0]) - @classmethod def _log_prev_state(cls, prev_state_dict: dict) -> None: """Log the previous state of the charm. @@ -1164,20 +866,9 @@ def from_charm( # noqa: C901 try: runner_config: RunnerConfig - if charm_config.openstack_clouds_yaml is not None: - instance_type = InstanceType.OPENSTACK - runner_config = OpenstackRunnerConfig.from_charm(charm) - else: - instance_type = InstanceType.LOCAL_LXD - runner_config = LocalLxdRunnerConfig.from_charm(charm) - cls._check_immutable_config_change( - runner_storage=runner_config.runner_storage, - base_image=runner_config.base_image, - ) + runner_config = OpenstackRunnerConfig.from_charm(charm) except ValueError as exc: raise CharmConfigInvalidError(f"Invalid configuration: {str(exc)}") from exc - except ImmutableConfigChangedError as exc: - raise CharmConfigInvalidError(exc.msg) from exc try: arch = _get_supported_arch() @@ -1193,10 +884,6 @@ def from_charm( # noqa: C901 reactive_config = ReactiveConfig.from_database(database) - if instance_type == InstanceType.LOCAL_LXD and reactive_config: - logger.error(REACTIVE_MODE_NOT_SUPPORTED_WITH_LXD_ERR_MSG) - raise CharmConfigInvalidError(REACTIVE_MODE_NOT_SUPPORTED_WITH_LXD_ERR_MSG) - state = cls( arch=arch, is_metrics_logging_available=bool(charm.model.relations[COS_AGENT_INTEGRATION_NAME]), @@ -1205,7 +892,6 @@ def from_charm( # noqa: C901 runner_config=runner_config, reactive_config=reactive_config, ssh_debug_connections=ssh_debug_connections, - instance_type=instance_type, ) cls._store_state(state) diff --git a/src/errors.py b/src/errors.py index 7212b4642..6073e119c 100644 --- a/src/errors.py +++ b/src/errors.py @@ -19,34 +19,6 @@ ) -class RunnerCreateError(RunnerError): - """Error for runner creation failure.""" - - -class RunnerFileLoadError(RunnerError): - """Error for loading file on runner.""" - - -class RunnerRemoveError(RunnerError): - """Error for runner removal failure.""" - - -class RunnerBinaryError(RunnerError): - """Error of getting runner binary.""" - - -class RunnerAproxyError(RunnerError): - """Error for setting up aproxy.""" - - -class MissingServerConfigError(RunnerError): - """Error for unable to create runner due to missing server configurations.""" - - -class MissingRunnerBinaryError(Exception): - """Error for missing runner binary.""" - - class ConfigurationError(Exception): """Error for juju configuration.""" @@ -55,10 +27,6 @@ class MissingMongoDBError(Exception): """Error for missing integration data.""" -class LxdError(Exception): - """Error for executing LXD actions.""" - - class SubprocessError(Exception): """Error for Subprocess calls. @@ -92,21 +60,5 @@ def __init__( self.stderr = stderr -class IssueMetricEventError(Exception): - """Represents an error when issuing a metric event.""" - - class LogrotateSetupError(Exception): """Represents an error raised when logrotate cannot be setup.""" - - -class SharedFilesystemError(MetricsStorageError): - """Base class for all shared filesystem errors.""" - - -class SharedFilesystemMountError(SharedFilesystemError): - """Represents an error related to the mounting of the shared filesystem.""" - - -class RunnerLogsError(Exception): - """Base class for all runner logs errors.""" diff --git a/src/firewall.py b/src/firewall.py deleted file mode 100644 index b4bf07c00..000000000 --- a/src/firewall.py +++ /dev/null @@ -1,188 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -"""The runner firewall manager.""" -import dataclasses -import ipaddress -import json -import typing - -import yaml - -from utilities import execute_command - -NetworkT = typing.TypeVar("NetworkT", ipaddress.IPv4Network, ipaddress.IPv6Network) - - -@dataclasses.dataclass -class FirewallEntry: - """Represent an entry in the firewall. - - Attributes: - ip_range: The IP address range using CIDR notation. - """ - - ip_range: str - - @classmethod - def decode(cls, entry: str) -> "FirewallEntry": - """Decode a firewall entry from a string. - - Args: - entry: The firewall entry string, e.g. '192.168.0.1:80' or '192.168.0.0/24:80-90:udp'. - - Returns: - FirewallEntry: A FirewallEntry instance representing the decoded entry. - - Raises: - ValueError: If the entry string is not in the expected format. - """ - try: - ipaddress.IPv4Network(entry) - except ValueError as exc: - raise ValueError(f"incorrect firewall entry format: {entry}") from exc - return cls(ip_range=entry) - - -class Firewall: # pylint: disable=too-few-public-methods - """Represent a firewall and provides methods to refresh its configuration.""" - - _ACL_RULESET_NAME = "github" - - def __init__(self, network: str): - """Initialize a new Firewall instance. - - Args: - network: The LXD network name. - """ - self._network = network - - def get_host_ip(self) -> str: - """Get the host IP address for the corresponding LXD network. - - Returns: - The host IP address. - """ - address, _ = execute_command( - ["/snap/bin/lxc", "network", "get", self._network, "ipv4.address"] - ) - return str(ipaddress.IPv4Interface(address.strip()).ip) - - def _exclude_network( - self, - networks: list[NetworkT], - exclude: list[NetworkT], - ) -> list[NetworkT]: - """Excludes the network segment from a pool of networks. - - Args: - networks: The network pool to apply. - exclude: The networks to exclude from the pool. - - Returns: - The network pool without the network segments in excludes. - """ - total_networks_without_excluded = networks - - for exclude_net in exclude: - scoped_excluded_networks: list[NetworkT] = [] - for net in total_networks_without_excluded: - if net == exclude_net or net.subnet_of(exclude_net): - continue - if net.overlaps(exclude_net): - scoped_excluded_networks.extend(net.address_exclude(exclude_net)) - else: - scoped_excluded_networks.append(net) - total_networks_without_excluded = scoped_excluded_networks - - return total_networks_without_excluded - - def refresh_firewall( - self, - denylist: typing.Iterable[FirewallEntry], - allowlist: typing.Iterable[FirewallEntry] | None = None, - ) -> None: - """Refresh the firewall configuration. - - Args: - denylist: The list of FirewallEntry rules to allow. - allowlist: The list of FirewallEntry rules to allow. - """ - current_acls = [ - acl["name"] - for acl in yaml.safe_load( - execute_command(["lxc", "network", "acl", "list", "-f", "yaml"])[0] - ) - ] - if self._ACL_RULESET_NAME not in current_acls: - execute_command(["/snap/bin/lxc", "network", "acl", "create", self._ACL_RULESET_NAME]) - execute_command( - [ - "/snap/bin/lxc", - "network", - "set", - self._network, - f"security.acls={self._ACL_RULESET_NAME}", - ] - ) - execute_command( - [ - "/snap/bin/lxc", - "network", - "set", - self._network, - "security.acls.default.egress.action=allow", - ] - ) - acl_config = yaml.safe_load( - execute_command(["/snap/bin/lxc", "network", "acl", "show", self._ACL_RULESET_NAME])[0] - ) - host_ip = self.get_host_ip() - egress_rules = [ - { - "action": "reject", - "destination": host_ip, - "destination_port": "1-8079,8081-65535", - "protocol": "tcp", - "state": "enabled", - }, - { - "action": "reject", - "destination": host_ip, - "protocol": "udp", - "state": "enabled", - }, - { - "action": "reject", - "destination": host_ip, - "protocol": "icmp4", - "state": "enabled", - }, - { - "action": "reject", - "destination": "::/0", - "state": "enabled", - }, - ] - - allowed_ips = [ - ipaddress.IPv4Network(host_ip), - *[ipaddress.IPv4Network(entry.ip_range) for entry in (allowlist or [])], - ] - ips_to_deny = [ipaddress.IPv4Network(entry.ip_range) for entry in denylist] - denied_ips = self._exclude_network(networks=ips_to_deny, exclude=allowed_ips) - egress_rules.extend( - [ - { - "action": "reject", - "destination": str(ip), - "state": "enabled", - } - for ip in denied_ips - ] - ) - acl_config["egress"] = egress_rules - execute_command( - ["lxc", "network", "acl", "edit", self._ACL_RULESET_NAME], - input=json.dumps(acl_config).encode("ascii"), - ) diff --git a/src/github_client.py b/src/github_client.py deleted file mode 100644 index b14d3b799..000000000 --- a/src/github_client.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -"""GitHub API client. - -Migrate to PyGithub in the future. PyGithub is still lacking some API such as -remove token for runner. -""" -import logging -from typing import ParamSpec, TypeVar - -from github_runner_manager.github_client import GithubClient as GitHubRunnerManagerGitHubClient -from github_runner_manager.github_client import catch_http_errors -from github_runner_manager.types_.github import ( - GitHubOrg, - GitHubPath, - GitHubRepo, - RunnerApplication, - RunnerApplicationList, -) - -from charm_state import Arch -from errors import RunnerBinaryError - -logger = logging.getLogger(__name__) - -# Parameters of the function decorated with retry -ParamT = ParamSpec("ParamT") -# Return type of the function decorated with retry -ReturnT = TypeVar("ReturnT") - - -class GithubClient(GitHubRunnerManagerGitHubClient): - """GitHub API client.""" - - @catch_http_errors - def get_runner_application( - self, path: GitHubPath, arch: Arch, os: str = "linux" - ) -> RunnerApplication: - """Get runner application available for download for given arch. - - Args: - path: GitHub repository path in the format '/', or the GitHub organization - name. - arch: The runner architecture. - os: The operating system that the runner binary should run on. - - Raises: - RunnerBinaryError: If the runner application for given architecture and OS is not - found. - - Returns: - The runner application. - """ - runner_applications: RunnerApplicationList = [] - if isinstance(path, GitHubRepo): - runner_applications = self._client.actions.list_runner_applications_for_repo( - owner=path.owner, repo=path.repo - ) - if isinstance(path, GitHubOrg): - runner_applications = self._client.actions.list_runner_applications_for_org( - org=path.org - ) - logger.debug("Response of runner applications list: %s", runner_applications) - try: - return next( - bin - for bin in runner_applications - if bin["os"] == os and bin["architecture"] == arch - ) - except StopIteration as err: - raise RunnerBinaryError( - f"Unable query GitHub runner binary information for {os} {arch}" - ) from err diff --git a/src/lxd.py b/src/lxd.py deleted file mode 100644 index 8d7cef1e9..000000000 --- a/src/lxd.py +++ /dev/null @@ -1,608 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Low-level LXD client interface. - -The LxdClient class offers a low-level interface to isolate the underlying -implementation of LXD. -""" -from __future__ import annotations - -import io -import logging -import tempfile -from pathlib import Path -from typing import IO, Any, Optional, Tuple, Union - -import pylxd.models - -from errors import LxdError, SubprocessError -from lxd_type import ( - LxdInstanceConfig, - LxdNetwork, - LxdResourceProfileConfig, - LxdResourceProfileDevices, - LxdStoragePoolConfiguration, -) -from utilities import execute_command, secure_run_subprocess - -LXC_BINARY = "/snap/bin/lxc" - -logger = logging.getLogger(__name__) - - -class LxdInstanceFileManager: - """File manager of an LXD instance. - - Attributes: - instance (LxdInstance): LXD instance where the files are located in. - """ - - def __init__(self, instance: LxdInstance): - """Instantiate the file manager. - - Args: - instance: LXD instance where the files are located in. - """ - self.instance = instance - - def mk_dir(self, dir_name: str) -> None: - """Create a directory in the LXD instance. - - Args: - dir_name: Name of the directory to create. - """ - self.instance.execute(["/usr/bin/mkdir", "-p", dir_name]) - - def push_file(self, source: str, destination: str, mode: Optional[str] = None) -> None: - """Push a file to the LXD instance. - - Args: - source: Path of the file to push to the LXD instance. - destination: Path in the LXD instance to load the file. - mode: File permissions. - - Raises: - LxdError: Unable to load the file into the LXD instance. - """ - lxc_cmd = [ - LXC_BINARY, - "file", - "push", - "--create-dirs", - source, - f"{self.instance.name}/{destination.lstrip('/')}", - ] - - if mode: - lxc_cmd += ["--mode", mode] - - try: - execute_command(lxc_cmd) - except SubprocessError as err: - logger.exception("Failed to push file") - raise LxdError( - f"Unable to push file into the LXD instance {self.instance.name}" - ) from err - - def write_file( - self, filepath: str, content: Union[str, bytes], mode: Optional[str] = None - ) -> None: - """Write a file with the given content into the LXD instance. - - Args: - filepath: Path in the LXD instance to load the file. - content: Content of the file. - mode: File permission setting. - - Raises: - LxdError: Unable to load the file to the LXD instance. - """ - if isinstance(content, str): - content = content.encode("utf-8") - - with tempfile.NamedTemporaryFile() as file: - file.write(content) - file.flush() - - try: - self.push_file(file.name, filepath, mode) - # TODO: 2024-04-02 - We should define a new error, wrap it and re-raise it. - except LxdError: # pylint: disable=try-except-raise - raise - - def pull_file(self, source: str, destination: str, is_dir: bool = False) -> None: - """Pull a file from the LXD instance to the local machine. - - Args: - source: Path of the file to pull in the LXD instance. - destination: Path in local machine. - is_dir: Whether the source is a directory. - - Raises: - LxdError: Unable to load the file from the LXD instance. - """ - lxc_cmd = [ - LXC_BINARY, - "file", - "pull", - *(["-r"] if is_dir else []), - f"{self.instance.name}/{source.lstrip('/')}", - destination, - ] - - try: - execute_command(lxc_cmd) - except SubprocessError as err: - logger.exception("Failed to pull file") - raise LxdError( - f"Unable to pull file {source} from the LXD instance {self.instance.name}" - ) from err - - def read_file(self, filepath: str) -> str: - """Read the content of a file in the LXD instance. - - Args: - filepath: Path of the file in the LXD instance. - - Raises: - LxdError: Unable to load the file from the LXD instance. - - Returns: - The content of the file. - """ - with tempfile.NamedTemporaryFile() as file: - try: - self.pull_file(filepath, file.name) - # TODO: 2024-04-02 - We should define a new error, wrap it and re-raise it. - except LxdError: # pylint: disable=try-except-raise - raise - - return file.read().decode("utf-8") - - -class LxdInstance: - """An LXD instance. - - Attributes: - name (str): Name of the LXD instance. - files (LxdInstanceFiles): Manager for the files on the LXD instance. - status (str): Status of the LXD instance. - """ - - def __init__(self, pylxd_instance: pylxd.models.Instance): - """Instantiate the LXD instance representation. - - Args: - pylxd_instance: Instance of pylxd.models.Instance for the LXD - instance. - """ - self._pylxd_instance = pylxd_instance - self.name = self._pylxd_instance.name - self.files = LxdInstanceFileManager(self) - - @property - def status(self) -> str: - """Status of the LXD instance. - - Returns: - Status of the LXD instance. - """ - return self._pylxd_instance.status - - def start(self, timeout: int = 30, force: bool = True, wait: bool = False) -> None: - """Start the LXD instance. - - Args: - timeout: Timeout for starting the LXD instance. - force: Whether to force start the LXD instance. - wait: Whether to wait until the LXD instance is started before - returning. - - Raises: - LxdError: Unable to start the LXD instance. - """ - try: - self._pylxd_instance.start(timeout, force, wait) - except pylxd.exceptions.LXDAPIException as err: - logger.exception("Failed to start the LXD instance") - raise LxdError(f"Unable to start the LXD instance {self.name}") from err - - def stop(self, timeout: int = 30, force: bool = True, wait: bool = False) -> None: - """Stop the LXD instance. - - Args: - timeout: Timeout for stopping the LXD instance. - force: Whether to force stop the LXD instance. - wait: Whether to wait until the LXD instance is stopped before - returning. - - Raises: - LxdError: Unable to stop the LXD instance. - """ - try: - self._pylxd_instance.stop(timeout, force, wait) - except pylxd.exceptions.LXDAPIException as err: - logger.exception("Failed to stop the LXD instance") - raise LxdError(f"Unable to stop the LXD instance {self.name}") from err - - def delete(self, wait: bool = False) -> None: - """Delete the LXD instance. - - Args: - wait: Whether to wait until the LXD instance is stopped before - returning. - - Raises: - LxdError: Unable to delete the LXD instance. - """ - try: - self._pylxd_instance.delete(wait) - except pylxd.exceptions.LXDAPIException as err: - logger.exception("Failed to delete the LXD instance") - raise LxdError(f"Unable to delete the LXD instance {self.name}") from err - - def execute( - self, cmd: list[str], cwd: Optional[str] = None, hide_cmd: bool = False, **kwargs: Any - ) -> Tuple[int, IO, IO]: - """Execute a command within the LXD instance. - - Exceptions are not raised if command execution failed. Caller should - check the exit code and stderr for errors. - - The command is executed with `subprocess.run`, additional arguments can be passed to it as - keyword arguments. The following arguments to `subprocess.run` should not be set: - `capture_output`, `shell`, `check`. As those arguments are used by this function. - - Args: - cmd: Commands to be executed. - cwd: Working directory to execute the commands. - hide_cmd: Hide logging of cmd. - kwargs: Additional keyword arguments for the `subprocess.run` call. - - - Returns: - Tuple containing the exit code, stdout, stderr. - """ - lxc_cmd = [LXC_BINARY, "exec", self.name] - if cwd: - lxc_cmd += ["--cwd", cwd] - - lxc_cmd += ["--"] + cmd - - result = secure_run_subprocess(lxc_cmd, hide_cmd, **kwargs) - return (result.returncode, io.BytesIO(result.stdout), io.BytesIO(result.stderr)) - - -class LxdInstanceManager: - """LXD instance manager.""" - - def __init__(self, pylxd_client: pylxd.Client): - """Instantiate the LXD instance manager. - - Args: - pylxd_client: Instance of pylxd.Client. - """ - self._pylxd_client = pylxd_client - - def all(self) -> list[LxdInstance]: - """Get list of LXD instances. - - Raises: - LxdError: Unable to get all LXD instances. - - Returns: - List of LXD instances. - """ - try: - return [LxdInstance(instance) for instance in self._pylxd_client.instances.all()] - except pylxd.exceptions.LXDAPIException as err: - logger.exception("Failed to get all LXD instances") - raise LxdError("Unable to get all LXD instances") from err - - def create(self, config: LxdInstanceConfig, wait: bool) -> LxdInstance: - """Create an LXD instance. - - Args: - config: Configuration for the LXD instance. - wait: Whether to wait until the LXD instance is created before - returning. - - Raises: - LxdError: Unable to get all LXD instances. - - Returns: - The created LXD instance. - """ - try: - pylxd_instance = self._pylxd_client.instances.create(config=config, wait=wait) - return LxdInstance(pylxd_instance) - except pylxd.exceptions.LXDAPIException as err: - logger.exception("Failed to create the LXD instance") - raise LxdError(f"Unable to create the LXD instance {config['name']}") from err - - -class LxdProfileManager: - """LXD profile manager.""" - - def __init__(self, pylxd_client: pylxd.Client): - """Instantiate the LXD profile manager. - - Args: - pylxd_client: Instance of pylxd.Client. - """ - self._pylxd_client = pylxd_client - - def exists(self, name: str) -> bool: - """Check whether an LXD profile of a given name exists. - - Args: - name: Name for LXD profile to check. - - Raises: - LxdError: Unable to check the LXD profile existence. - - Returns: - Whether the LXD profile of the given name exists. - """ - try: - return self._pylxd_client.profiles.exists(name) - except pylxd.exceptions.LXDAPIException as err: - logger.exception("Failed to check if LXD profile exists") - raise LxdError(f"Unable to check if LXD profile {name} exists") from err - - def create( - self, name: str, config: LxdResourceProfileConfig, devices: LxdResourceProfileDevices - ) -> None: - """Create an LXD profile. - - Args: - name: Name of the LXD profile to create. - config: Configuration of the LXD profile. - devices: Devices configuration of the LXD profile. - - Raises: - LxdError: Unable to create the LXD profile. - """ - try: - self._pylxd_client.profiles.create(name, config, devices) - except pylxd.exceptions.LXDAPIException as err: - logger.exception("Failed to create LXD profile") - raise LxdError(f"Unable to create LXD profile {name}") from err - - def get(self, name: str) -> LxdProfile: - """Get an LXD profile. - - Args: - name: Name of the LXD profile. - - Raises: - LxdError: Unable to get the LXD profile with the name. - - Returns: - LXDProfile with given name. - """ - try: - return self._pylxd_client.profiles.get(name) - except pylxd.exceptions.LXDAPIException as err: - logger.exception("Failed to get LXD profile") - raise LxdError(f"Unable to get LXD profile {name}") from err - - -class LxdProfile: - """LXD profile.""" - - def __init__( - self, - pylxd_profile: pylxd.models.Profile, - ): - """Instantiate the LXD profile. - - Args: - pylxd_profile: Instance of the pylxd.models.Profile. - """ - self._pylxd_profile = pylxd_profile - - self.name = self._pylxd_profile.name - self.description = self._pylxd_profile.description - self.config = self._pylxd_profile.config - self.devices = self._pylxd_profile.devices - self.used_by = self._pylxd_profile.used_by - - def save(self) -> None: - """Save the current configuration of profile.""" - self._pylxd_profile.config = self.config - self._pylxd_profile.save() - - def delete(self) -> None: - """Delete the profile.""" - self._pylxd_profile.delete() - - -# Disable pylint as public method number check as this class can be extended in the future. -class LxdNetworkManager: # pylint: disable=too-few-public-methods - """LXD network manager.""" - - def __init__(self, pylxd_client: pylxd.Client): - """Instantiate the LXD profile manager. - - Args: - pylxd_client: Instance of pylxd.Client. - """ - self._pylxd_client = pylxd_client - - def get(self, name: str) -> LxdNetwork: - """Get the LXD network information. - - Args: - name: The name of the LXD network. - - Returns: - Information on the LXD network. - """ - network = self._pylxd_client.networks.get(name) - return LxdNetwork( - network.name, - network.description, - network.type, - network.config, - network.managed, - network.used_by, - ) - - -class LxdStoragePoolManager: - """LXD storage pool manager.""" - - def __init__(self, pylxd_client: pylxd.Client): - """Instantiate the LXD storage pool manager. - - Args: - pylxd_client: Instance of pylxd.Client. - """ - self._pylxd_client = pylxd_client - - def all(self) -> list[LxdStoragePool]: - """Get all LXD storage pool. - - Returns: - List of LXD storage pools. - """ - return [LxdStoragePool(pool) for pool in self._pylxd_client.storage_pools.all()] - - def get(self, name: str) -> LxdStoragePool: - """Get an LXD storage pool. - - Args: - name: Name of the storage pool. - - Raises: - LxdError: If the storage pool with given name was not found. - - Returns: - The LXD storage pool. - """ - try: - return LxdStoragePool(self._pylxd_client.storage_pools.get(name)) - except pylxd.exceptions.NotFound as err: - logger.exception("LXD storage pool not found") - raise LxdError(f"LXD storage pool {name} not found") from err - - def exists(self, name: str) -> bool: - """Check if an LXD storage pool exists. - - Args: - name: Name to check for. - - Returns: - Whether the storage pool exists. - """ - return self._pylxd_client.storage_pools.exists(name) - - def create(self, config: LxdStoragePoolConfiguration) -> LxdStoragePool: - """Create an LXD storage pool. - - Args: - config: Configuration for the storage pool. - - Returns: - The LXD storage pool. - """ - return self._pylxd_client.storage_pools.create(config) - - -class LxdStoragePool: - """An LXD storage pool. - - Attributes: - name (str): Name of the storage pool. - driver (str): Type of driver of the storage pool. - used_by (list[str]): LXD instances using the storage pool. - config (dict[str, any]): Dictionary of the configuration of the - storage pool. - managed (bool): Whether LXD manages the storage pool. - """ - - def __init__( - self, - pylxd_storage_pool: pylxd.models.StoragePool, - ): - """Instantiate the LXD storage pool. - - Args: - pylxd_storage_pool: Instance of the pylxd.models.StoragePool. - """ - self._pylxd_storage_pool = pylxd_storage_pool - - self.name = self._pylxd_storage_pool.name - self.driver = self._pylxd_storage_pool.driver - self.used_by = self._pylxd_storage_pool.used_by - self.config = self._pylxd_storage_pool.config - self.managed = self._pylxd_storage_pool.managed - - def save(self) -> None: - """Save the current configuration of storage pool.""" - self._pylxd_storage_pool.config = self.config - self._pylxd_storage_pool.save() - - def delete(self) -> None: - """Delete the storage pool.""" - self._pylxd_storage_pool.delete() - - -class LxdImageManager: # pylint: disable=too-few-public-methods - """LXD image manager.""" - - def __init__(self, pylxd_client: pylxd.Client): - """Instantiate the LXD image manager. - - Args: - pylxd_client: Instance of pylxd.Client. - """ - self._pylxd_client = pylxd_client - - def create(self, name: str, path: Path) -> None: - """Import a LXD image. - - Args: - name: Alias for the image. - path: Path of the LXD image file. - - Raises: - LxdError: Unable to import the file as LXD image. - """ - result = secure_run_subprocess(["lxc", "image", "import", "--alias", name, str(path)]) - if result.returncode != 0: - raise LxdError(result.stdout.decode("utf-8")) - - def exists(self, alias: str) -> bool: - """Check if an image with the given name exists. - - Args: - alias: Alias name of the image to check. - - Returns: - Whether the image exists. - """ - # There is no direct method to check if an image exists by alias in pylxd, we therefore - # use the pylxd client to get the image by alias and catch the NotFound exception. - try: - self._pylxd_client.images.get_by_alias(alias) - except pylxd.exceptions.NotFound: - return False - - return True - - -# Disable pylint as the public methods of this class are split into instances and profiles. -class LxdClient: # pylint: disable=too-few-public-methods - """LXD client.""" - - def __init__(self) -> None: - """Instantiate the LXD client.""" - pylxd_client = pylxd.Client() - self.instances = LxdInstanceManager(pylxd_client) - self.profiles = LxdProfileManager(pylxd_client) - self.networks = LxdNetworkManager(pylxd_client) - self.storage_pools = LxdStoragePoolManager(pylxd_client) - self.images = LxdImageManager(pylxd_client) diff --git a/src/lxd_type.py b/src/lxd_type.py deleted file mode 100644 index 77a3b3cb6..000000000 --- a/src/lxd_type.py +++ /dev/null @@ -1,127 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Types used by Lxd class. - -The details of the configuration of different types of devices can be found here: -https://linuxcontainers.org/lxd/docs/latest/reference/devices/ - -For example, configuration for disk: -https://linuxcontainers.org/lxd/docs/latest/reference/devices_disk/# - -The unit of storage and network limits can be found here: -https://linuxcontainers.org/lxd/docs/latest/reference/instance_units/#instances-limit-units -""" - -from dataclasses import dataclass -from typing import TypedDict - -# The keys are not valid identifiers, hence this is defined with function-based syntax. -LxdResourceProfileConfig = TypedDict( - "LxdResourceProfileConfig", {"limits.cpu": str, "limits.memory": str} -) -LxdResourceProfileConfig.__doc__ = "Configuration LXD profile." - - -# The keys are not valid identifiers, hence this is defined with function-based syntax. -LxdResourceProfileDevicesDisk = TypedDict( - "LxdResourceProfileDevicesDisk", - {"path": str, "pool": str, "type": str, "size": str, "io.cache": str}, -) -LxdResourceProfileDevicesDisk.__doc__ = "LXD device profile of disk." - - -LxdResourceProfileDevices = dict[str, LxdResourceProfileDevicesDisk] - - -class LxdInstanceConfigSource(TypedDict): - """Configuration for source image in the LXD instance. - - Attributes: - type: Type of source configuration, e.g. image, disk - server: The source server URL, e.g. https://cloud-images.ubuntu.com/releases - protocol: Protocol of the configuration, e.g. simplestreams - alias: Alias for configuration. - """ - - type: str - server: str - protocol: str - alias: str - - -class LxdInstanceConfig(TypedDict): - """Configuration for the LXD instance. - - See https://documentation.ubuntu.com/lxd/en/latest/howto/instances_create/ - - Attributes: - name: Name of the instance. - type: Instance type, i.e. "container" or "virtual-machine". - source: Instance creation source configuration. - ephemeral: Whether the container should be deleted after a single run. - profiles: List of LXD profiles applied to the instance. - """ - - name: str - type: str - source: LxdInstanceConfigSource - ephemeral: bool - profiles: list[str] - - -# The keys are not valid identifiers, hence this is defined with function-based syntax. -LxdNetworkConfig = TypedDict( - "LxdNetworkConfig", - {"ipv4.address": str, "ipv4.nat": str, "ipv6.address": str, "ipv6.nat": str}, -) -LxdNetworkConfig.__doc__ = "Represent LXD network configuration." - - -class LxdStoragePoolConfig(TypedDict): - """Configuration of the storage pool. - - Attributes: - source: The storage pool configuration source image. - size: The size of the storage pool, e.g. 30GiB - """ - - source: str - size: str - - -class LxdStoragePoolConfiguration(TypedDict): - """Configuration for LXD storage pool. - - Attributes: - name: The storage pool name. - driver: The storage driver being used, i.e. "dir", "btrfs", ... . See \ - https://documentation.ubuntu.com/lxd/en/stable-5.0/reference/storage_drivers/ \ - for more information. - config: The storage pool configuration. - """ - - name: str - driver: str - config: LxdStoragePoolConfig - - -@dataclass -class LxdNetwork: - """LXD network information. - - Attributes: - name: The name of LXD network. - description: LXD network descriptor. - type: Network type, i.e. "bridge", "physical" - config: The LXD network configuration values. - managed: Whether the network is being managed by lxd. - used_by: Number of instances using the network. - """ - - name: str - description: str - type: str - config: LxdNetworkConfig - managed: bool - used_by: tuple[str] diff --git a/src/runner.py b/src/runner.py deleted file mode 100644 index a1b64fcfc..000000000 --- a/src/runner.py +++ /dev/null @@ -1,935 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Manage the lifecycle of runners. - -The `Runner` class stores the information on the runners and manages the -lifecycle of the runners on LXD and GitHub. - -The `RunnerManager` class from `runner_manager.py` creates and manages a -collection of `Runner` instances. -""" - -import json -import logging -import pathlib -import secrets -import textwrap -import time -from dataclasses import dataclass -from pathlib import Path -from typing import Iterable, NamedTuple, Optional, Sequence - -import yaml -from github_runner_manager.metrics.runner_logs import SYSLOG_PATH, create_logs_dir -from github_runner_manager.metrics.storage import MetricsStorage -from github_runner_manager.types_.github import GitHubOrg - -import shared_fs -from charm_state import Arch, SSHDebugConnection, VirtualMachineResources -from errors import ( - CreateMetricsStorageError, - GithubClientError, - LxdError, - RunnerAproxyError, - RunnerCreateError, - RunnerError, - RunnerFileLoadError, - RunnerLogsError, - RunnerRemoveError, - SubprocessError, -) -from lxd import LxdInstance -from lxd_type import LxdInstanceConfig -from runner_manager_type import RunnerManagerClients -from runner_type import RunnerConfig, RunnerStatus -from utilities import execute_command, retry - -logger = logging.getLogger(__name__) -LXD_PROFILE_YAML = pathlib.Path(__file__).parent.parent / "lxd-profile.yaml" -if not LXD_PROFILE_YAML.exists(): - LXD_PROFILE_YAML = LXD_PROFILE_YAML.parent / "lxd-profile.yml" -LXDBR_DNSMASQ_LEASES_FILE = Path("/var/snap/lxd/common/lxd/networks/lxdbr0/dnsmasq.leases") - -APROXY_ARM_REVISION = 9 -APROXY_AMD_REVISION = 8 - -METRICS_EXCHANGE_PATH = Path("/metrics-exchange") -DIAG_DIR_PATH = Path("/home/ubuntu/github-runner/_diag") - - -class Snap(NamedTuple): - """This class represents a snap installation. - - Attributes: - name: The snap application name. - channel: The channel to install the snap from. - revision: The revision number of the snap installation. - """ - - name: str - channel: str - revision: Optional[int] = None - - -@dataclass -class WgetExecutable: - """The executable to be installed through wget. - - Attributes: - url: The URL of the executable binary. - cmd: Executable command name. E.g. yq_linux_amd64 -> yq - """ - - url: str - cmd: str - - -@dataclass -class CreateRunnerConfig: - """The configuration values for creating a single runner instance. - - Attributes: - image: Name of the image to launch the LXD instance with. - resources: Resource setting for the LXD instance. - binary_path: Path to the runner binary. - registration_token: Token for registering the runner on GitHub. - arch: Current machine architecture. - """ - - image: str - resources: VirtualMachineResources - binary_path: Path - registration_token: str - arch: Arch = Arch.X64 - - -class Runner: - """Single instance of GitHub self-hosted runner. - - Attributes: - runner_application: The runner application directory path - env_file: The runner environment source .env file path. - config_script: The runner configuration script file path. - runner_script: The runner start script file path. - pre_job_script: The runner pre_job script file path. This is referenced in the env_file in - the ACTIONS_RUNNER_HOOK_JOB_STARTED environment variable. - """ - - runner_application = Path("/home/ubuntu/github-runner") - env_file = runner_application / ".env" - config_script = runner_application / "config.sh" - runner_script = runner_application / "start.sh" - pre_job_script = runner_application / "pre-job.sh" - - def __init__( - self, - clients: RunnerManagerClients, - runner_config: RunnerConfig, - runner_status: RunnerStatus, - instance: Optional[LxdInstance] = None, - ): - """Construct the runner instance. - - Args: - clients: Clients to access various services. - runner_config: Configuration of the runner instance. - runner_status: Status info of the given runner. - instance: LXD instance of the runner if already created. - """ - # Dependency injection to share the instances across different `Runner` instance. - self._clients = clients - self.config = runner_config - self.status = runner_status - self.instance = instance - - self._shared_fs: Optional[MetricsStorage] = None - - def create(self, config: CreateRunnerConfig) -> None: - """Create the runner instance on LXD and register it on GitHub. - - Args: - config: The instance config to create the LXD VMs and configure GitHub runner with. - - Raises: - RunnerCreateError: Unable to create an LXD instance for runner. - """ - logger.info("Creating runner: %s", self.config.name) - - if self.config.issue_metrics: - try: - self._shared_fs = shared_fs.create(self.config.name) - except CreateMetricsStorageError: - logger.exception( - "Unable to create shared filesystem for runner %s. " - "Will not create metrics for this runner.", - self.config.name, - ) - try: - self.instance = self._create_instance(config.image, config.resources) - self._start_instance() - # Wait some initial time for the instance to boot up - time.sleep(60) - self._wait_boot_up() - self._install_binaries(config.binary_path, config.arch) - self._configure_runner() - - self._register_runner( - config.registration_token, - labels=[self.config.app_name, config.image, *self.config.labels], - ) - self._start_runner() - except (RunnerError, LxdError) as err: - raise RunnerCreateError(f"Unable to create runner {self.config.name}") from err - - def _remove_lxd_runner(self, remove_token: Optional[str]) -> None: - """Remove running LXD runner instance. - - Args: - remove_token: The Github remove token to execute removal with config.sh script. - - Raises: - LxdError:If there was an error removing LXD runner instance. - """ - logger.info("Executing command to removal of runner and clean up...") - - if not self.instance: - return - - if remove_token: - self.instance.execute( - [ - "/usr/bin/sudo", - "-u", - "ubuntu", - str(self.config_script), - "remove", - "--token", - remove_token, - ], - hide_cmd=True, - ) - - if self.instance.status == "Running": - logger.info("Removing LXD instance of runner: %s", self.config.name) - try: - self.instance.stop(wait=True, timeout=60) - except LxdError: - logger.exception( - "Unable to gracefully stop runner %s within timeout.", self.config.name - ) - logger.info("Force stopping of runner %s", self.config.name) - try: - self.instance.stop(force=True) - except LxdError as exc: - logger.error("Error stopping instance, %s", exc) - raise - else: - # Delete ephemeral instances that have error or stopped status which LXD failed to - # clean up. - logger.warning( - "Found runner %s with status %s, forcing deletion", - self.config.name, - self.instance.status, - ) - try: - self.instance.delete(wait=True) - except LxdError as exc: - logger.error("Error deleting instance, %s", exc) - raise - - def remove(self, remove_token: Optional[str]) -> None: - """Remove this runner instance from LXD and GitHub. - - Args: - remove_token: Token for removing the runner on GitHub. - - Raises: - RunnerRemoveError: Failure in removing runner. - """ - logger.info("Removing runner: %s", self.config.name) - - try: - self._remove_lxd_runner(remove_token=remove_token) - except LxdError as exc: - raise RunnerRemoveError(f"Unable to remove {self.config.name}") from exc - - if self.status.runner_id is None: - return - - logger.info("Removing runner on GitHub: %s", self.config.name) - - # The runner should cleanup itself. Cleanup on GitHub in case of runner cleanup error. - logger.debug( - "Ensure runner %s with id %s is removed from GitHub %s", - self.config.name, - self.status.runner_id, - self.config.path.path(), - ) - try: - self._clients.github.delete_runner(self.config.path, self.status.runner_id) - except GithubClientError: - logger.exception("Unable the remove runner on GitHub: %s", self.config.name) - # This can occur when attempting to remove a busy runner. - # The caller should retry later, after GitHub mark the runner as offline. - - def pull_logs(self) -> None: - """Pull the logs of the runner into a directory. - - Expects the runner to have an instance. - - Raises: - RunnerLogsError: If the runner logs could not be pulled. - """ - if self.instance is None: - raise RunnerLogsError( - f"Cannot pull the logs for {self.config.name} as runner has no running instance." - ) - - target_log_path = create_logs_dir(self.config.name) - - try: - self.instance.files.pull_file(str(DIAG_DIR_PATH), str(target_log_path), is_dir=True) - self.instance.files.pull_file(str(SYSLOG_PATH), str(target_log_path)) - except LxdError as exc: - raise RunnerLogsError(f"Cannot pull the logs for {self.config.name}.") from exc - - def _add_shared_filesystem(self, path: Path) -> None: - """Add the shared filesystem to the runner instance. - - Args: - path: Path to the shared filesystem. - """ - try: - execute_command( - [ - "sudo", - "lxc", - "config", - "device", - "add", - self.config.name, - "metrics", - "disk", - f"source={path}", - f"path={METRICS_EXCHANGE_PATH}", - ], - check_exit=True, - ) - except SubprocessError: - logger.exception( - "Unable to add shared filesystem to runner %s. " - "Will not create metrics for this runner.", - self.config.name, - ) - - @retry(tries=5, delay=10, local_logger=logger) - def _create_instance( - self, image: str, resources: VirtualMachineResources, ephemeral: bool = True - ) -> LxdInstance: - """Create an instance of runner. - - Args: - image: Image used to launch the instance hosting the runner. - resources: Configuration of the virtual machine resources. - ephemeral: Whether the instance is ephemeral. - - Raises: - LxdError: if there was an error creating an LXD instance. - - Returns: - LXD instance of the runner. - """ - logger.info("Creating an LXD instance for runner: %s", self.config.name) - - self._ensure_runner_storage_pool() - self._ensure_runner_profile() - resource_profile = self._get_resource_profile(resources) - - # Create runner instance. - instance_config: LxdInstanceConfig = { - "name": self.config.name, - "type": "container" if LXD_PROFILE_YAML.exists() else "virtual-machine", - "source": { - "type": "image", - "alias": image, - }, - "ephemeral": ephemeral, - "profiles": ["default", "runner", resource_profile], - } - - try: - instance = self._clients.lxd.instances.create(config=instance_config, wait=True) - except LxdError: - logger.exception( - "Removing resource profile and storage profile due to LXD instance create failure" - ) - - # LxdError on creating LXD instance could be caused by improper initialization of - # storage pool. If other runner LXD instance exists then it cannot be the cause. - if not self._clients.lxd.instances.all(): - # Removing the storage pool and retry can solve the problem. - self._remove_runner_storage_pool() - raise - - self.status.exist = True - - if self._shared_fs: - self._add_shared_filesystem(self._shared_fs.path) - - return instance - - @retry(tries=5, delay=10, local_logger=logger) - def _ensure_runner_profile(self) -> None: - """Ensure the runner profile is present on LXD. - - Raises: - RunnerError: Unable to create the runner profile. - """ - if self._clients.lxd.profiles.exists("runner"): - logger.info("Found existing runner LXD profile") - return - - logger.info("Creating runner LXD profile") - profile_config = {} - profile_devices = {} - if LXD_PROFILE_YAML.exists(): - additional_lxc_profile = yaml.safe_load(LXD_PROFILE_YAML.read_text()) - profile_config = { - k: json.dumps(v) if isinstance(v, bool) else v - for k, v in additional_lxc_profile["config"].items() - } - profile_devices = additional_lxc_profile["devices"] - self._clients.lxd.profiles.create("runner", profile_config, profile_devices) - - # Verify the action is successful. - if not self._clients.lxd.profiles.exists("runner"): - raise RunnerError("Failed to create runner LXD profile") - - @retry(tries=5, delay=10, local_logger=logger) - def _ensure_runner_storage_pool(self) -> None: - """Ensure the runner storage pool exists. - - Raises: - RunnerError: If there was an error creating LXD storage pool. - """ - if self._clients.lxd.storage_pools.exists("runner"): - logger.info("Found existing runner LXD storage pool.") - return - - logger.info("Creating runner LXD storage pool.") - self._clients.lxd.storage_pools.create( - { - "name": "runner", - "driver": "dir", - "config": {"source": str(self.config.lxd_storage_path)}, - } - ) - - # Verify the action is successful. - if not self._clients.lxd.storage_pools.exists("runner"): - raise RunnerError("Failed to create runner LXD storage pool") - - def _remove_runner_storage_pool(self) -> None: - """Remove the runner storage pool if exists.""" - if self._clients.lxd.storage_pools.exists("runner"): - logger.info("Removing existing runner LXD storage pool.") - runner_storage_pool = self._clients.lxd.storage_pools.get("runner") - - # The resource profile needs to be removed first as it uses the storage pool. - for used_by in runner_storage_pool.used_by: - _, profile_name = used_by.rsplit("/", 1) - profile = self._clients.lxd.profiles.get(profile_name) - profile.delete() - - runner_storage_pool.delete() - - @classmethod - def _get_resource_profile_name(cls, cpu: int, memory: str, disk: str) -> str: - """Get the LXD profile name for resource limit. - - Args: - cpu: CPU resource limit. - memory: Memory resource limit. - disk: Disk resource limit. - - Returns: - Name for the LXD profile of the given resource limits. - """ - return f"cpu-{cpu}-mem-{memory}-disk-{disk}" - - @retry(tries=5, delay=10, local_logger=logger) - def _get_resource_profile(self, resources: VirtualMachineResources) -> str: - """Get the LXD profile name of given resource limit. - - Args: - resources: Resources limit of the runner instance. - - Raises: - RunnerError: Unable to create the profile on LXD. - - Returns: - str: Name of the profile for the given resource limit. - """ - # Ensure the resource profile exists. - profile_name = self._get_resource_profile_name( - resources.cpu, resources.memory, resources.disk - ) - if not self._clients.lxd.profiles.exists(profile_name): - logger.info("Creating LXD profile for resource usage.") - try: - resource_profile_config = { - "limits.cpu": str(resources.cpu), - "limits.memory": resources.memory, - } - resource_profile_devices = { - "root": { - "path": "/", - "pool": "runner", - "type": "disk", - "size": resources.disk, - } - } - # Temporary fix to allow tmpfs to work for LXD VM. - if not LXD_PROFILE_YAML.exists(): - resource_profile_devices["root"]["io.cache"] = "unsafe" - - self._clients.lxd.profiles.create( - profile_name, resource_profile_config, resource_profile_devices - ) - except LxdError as error: - logger.error(error) - raise RunnerError( - "Resources were not provided in the correct format, check the juju config for " - "cpu, memory and disk." - ) from error - - # Verify the action is successful. - if not self._clients.lxd.profiles.exists(profile_name): - raise RunnerError(f"Unable to create {profile_name} LXD profile") - else: - logger.info("Found existing LXD profile for resource usage.") - - return profile_name - - @retry(tries=5, delay=10, local_logger=logger) - def _start_instance(self) -> None: - """Start an instance and wait for it to boot. - - Raises: - RunnerError: If the runner has not instantiated before calling this operation. - """ - if self.instance is None: - raise RunnerError("Runner operation called prior to runner creation.") - - logger.info("Starting LXD instance for runner: %s", self.config.name) - - # Setting `wait=True` only ensure the instance has begin to boot up. - self.instance.start(wait=True) - - @retry(tries=20, delay=30, local_logger=logger) - def _wait_boot_up(self) -> None: - """Wait for LXD instance to boot up. - - Raises: - RunnerError: If there was an error while waiting for the runner to boot up. - """ - if self.instance is None: - raise RunnerError("Runner operation called prior to runner creation.") - - # Wait for the instance to finish to boot up and network to be up. - if self.instance.execute(["/usr/bin/who"])[0] != 0: - raise RunnerError("Runner system is not ready") - if self.instance.execute(["/usr/bin/nslookup", "github.com"])[0] != 0: - raise RunnerError("Runner network is not ready") - - logger.info("Finished booting up LXD instance for runner: %s", self.config.name) - - @retry(tries=10, delay=10, max_delay=120, backoff=2, local_logger=logger) - def _install_binaries(self, runner_binary: Path, arch: Arch) -> None: - """Install runner binary and other binaries. - - Args: - runner_binary: Path to the compressed runner binary. - arch: The runner system architecture. - - Raises: - RunnerFileLoadError: Unable to load the runner binary into the runner instance. - RunnerError: If the runner has not instantiated before calling this operation. - """ - if self.instance is None: - raise RunnerError("Runner operation called prior to runner creation.") - - self._snap_install( - [ - Snap( - name="aproxy", - channel="edge", - revision=APROXY_ARM_REVISION if arch == Arch.ARM64 else APROXY_AMD_REVISION, - ) - ] - ) - - # The LXD instance is meant to run untrusted workload. Hardcoding the tmp directory should - # be fine. - binary_path = "/tmp/runner.tgz" # nosec B108 - - logger.info("Installing runner binary on LXD instance: %s", self.config.name) - - # Creating directory and putting the file are idempotent, and can be retried. - self.instance.files.mk_dir(str(self.runner_application)) - self.instance.files.push_file(str(runner_binary), binary_path) - - self.instance.execute( - ["/usr/bin/tar", "-xzf", binary_path, "-C", str(self.runner_application)] - ) - self.instance.execute( - ["/usr/bin/chown", "-R", "ubuntu:ubuntu", str(self.runner_application)] - ) - - # Verify the config script is written to runner. - exit_code, _, stderr = self.instance.execute(["test", "-f", str(self.config_script)]) - if exit_code == 0: - logger.info("Runner binary loaded on runner instance %s.", self.config.name) - else: - logger.error( - "Unable to load runner binary on runner instance %s: %s", - self.config.name, - stderr.read(), - ) - raise RunnerFileLoadError(f"Failed to load runner binary on {self.config.name}") - - def _should_render_templates_with_metrics(self) -> bool: - """Whether to render templates with metrics. - - Returns: - True if the runner should render templates with metrics. - """ - return self._shared_fs is not None - - def _get_default_ip(self) -> Optional[str]: - """Get the default IP of the runner. - - Raises: - RunnerError: If the runner has not instantiated before calling this operation. - - Returns: - The default IP of the runner or None if not found. - """ - if self.instance is None: - raise RunnerError("Runner operation called prior to runner creation.") - - default_ip = None - # parse LXD dnsmasq leases file to get the default IP of the runner - # format: timestamp mac-address ip-address hostname client-id - lines = LXDBR_DNSMASQ_LEASES_FILE.read_text("utf-8").splitlines(keepends=False) - for line in lines: - columns = line.split() - if len(columns) >= 4 and columns[3] == self.instance.name: - default_ip = columns[2] - break - - return default_ip - - def _configure_aproxy(self, proxy_address: str) -> None: - """Configure aproxy. - - Args: - proxy_address: Proxy to configure aproxy with. - - Raises: - RunnerAproxyError: If unable to configure aproxy. - RunnerError: If the runner has not instantiated before calling this operation. - """ - if self.instance is None: - raise RunnerError("Runner operation called prior to runner creation.") - - logger.info("Configuring aproxy for the runner.") - - aproxy_port = 54969 - - self.instance.execute( - ["snap", "set", "aproxy", f"proxy={proxy_address}", f"listen=:{aproxy_port}"] - ) - exit_code, stdout, _ = self.instance.execute(["snap", "logs", "aproxy.aproxy", "-n=all"]) - stdout_message = stdout.read().decode("utf-8") - if exit_code != 0 or ( - "Started Service for snap application aproxy.aproxy" not in stdout_message - and "Started snap.aproxy.aproxy.service" not in stdout_message - ): - raise RunnerAproxyError("Aproxy service did not configure correctly") - - default_ip = self._get_default_ip() - if not default_ip: - raise RunnerAproxyError("Unable to find default IP for aproxy configuration.") - - nft_input = textwrap.dedent( - f"""\ - define default-ip = {default_ip} - define private-ips = {{ 10.0.0.0/8, 127.0.0.1/8, 172.16.0.0/12, 192.168.0.0/16 }} - table ip aproxy - flush table ip aproxy - table ip aproxy {{ - chain prerouting {{ - type nat hook prerouting priority dstnat; policy accept; - ip daddr != $private-ips tcp dport {{ 80, 443 }} dnat to $default-ip:{aproxy_port} - }} - chain output {{ - type nat hook output priority -100; policy accept; - ip daddr != $private-ips tcp dport {{ 80, 443 }} dnat to $default-ip:{aproxy_port} - }} - }}""" - ) - self.instance.execute(["nft", "-f", "-"], input=nft_input.encode("utf-8")) - - def _configure_docker_proxy(self) -> None: - """Configure docker proxy. - - Raises: - RunnerError: If the runner has not instantiated before calling this operation. - """ - if self.instance is None: - raise RunnerError("Runner operation called prior to runner creation.") - - # Creating directory and putting the file are idempotent, and can be retried. - logger.info("Adding proxy setting to the runner.") - docker_proxy_contents = self._clients.jinja.get_template("systemd-docker-proxy.j2").render( - proxies=self.config.proxies - ) - # Set docker daemon proxy config - docker_service_path = Path("/etc/systemd/system/docker.service.d") - docker_service_proxy = docker_service_path / "http-proxy.conf" - self.instance.files.mk_dir(str(docker_service_path)) - self._put_file(str(docker_service_proxy), docker_proxy_contents) - self.instance.execute(["systemctl", "daemon-reload"]) - self.instance.execute(["systemctl", "restart", "docker"]) - # Set docker client proxy config - docker_client_proxy = { - "proxies": { - "default": { - key: value - for key, value in ( - ("httpProxy", self.config.proxies.http), - ("httpsProxy", self.config.proxies.https), - ("noProxy", self.config.proxies.no_proxy), - ) - if value - } - } - } - docker_client_proxy_content = json.dumps(docker_client_proxy) - # Configure the docker client for root user and ubuntu user. - self._put_file("/root/.docker/config.json", docker_client_proxy_content) - self._put_file("/home/ubuntu/.docker/config.json", docker_client_proxy_content) - self.instance.execute(["/usr/bin/chown", "-R", "ubuntu:ubuntu", "/home/ubuntu/.docker"]) - - @retry(tries=5, delay=10, max_delay=60, backoff=2, local_logger=logger) - def _configure_runner(self) -> None: - """Load configuration on to the runner. - - Raises: - RunnerFileLoadError: Unable to load configuration file on the runner. - RunnerError: If the runner has not instantiated before calling this operation. - """ - if self.instance is None: - raise RunnerError("Runner operation called prior to runner creation.") - - # Load the runner startup script. - startup_contents = self._clients.jinja.get_template("start.j2").render( - issue_metrics=self._should_render_templates_with_metrics() - ) - try: - self._put_file(str(self.runner_script), startup_contents, mode="0755") - # TODO: 2024-04-02 - We should define a new error, wrap it and re-raise it. - except RunnerFileLoadError: # pylint: disable=try-except-raise - raise - self.instance.execute(["/usr/bin/sudo", "chown", "ubuntu:ubuntu", str(self.runner_script)]) - self.instance.execute(["/usr/bin/sudo", "chmod", "u+x", str(self.runner_script)]) - - # Load the runner pre-job script. - bridge_address_range = self._clients.lxd.networks.get("lxdbr0").config["ipv4.address"] - host_ip, _ = bridge_address_range.split("/") - one_time_token = self._clients.repo.get_one_time_token() - pre_job_contents = self._clients.jinja.get_template("pre-job.j2").render( - repo_policy_base_url=f"http://{host_ip}:8080", - repo_policy_one_time_token=one_time_token, - issue_metrics=self._should_render_templates_with_metrics(), - metrics_exchange_path=str(METRICS_EXCHANGE_PATH), - do_repo_policy_check=True, - ) - self._put_file(str(self.pre_job_script), pre_job_contents) - self.instance.execute( - ["/usr/bin/sudo", "chown", "ubuntu:ubuntu", str(self.pre_job_script)] - ) - self.instance.execute(["/usr/bin/sudo", "chmod", "u+x", str(self.pre_job_script)]) - - # Set permission to the same as GitHub-hosted runner for this directory. - # Some GitHub Actions require this permission setting to run. - # As the user already has sudo access, this does not give the user any additional access. - self.instance.execute(["/usr/bin/sudo", "chmod", "777", "/usr/local/bin"]) - - selected_ssh_connection: SSHDebugConnection | None = ( - secrets.choice(self.config.ssh_debug_connections) - if self.config.ssh_debug_connections - else None - ) - logger.info("SSH Debug info: %s", selected_ssh_connection) - # Load `/etc/environment` file. - environment_contents = self._clients.jinja.get_template("environment.j2").render( - proxies=self.config.proxies, ssh_debug_info=selected_ssh_connection - ) - self._put_file("/etc/environment", environment_contents) - - # Load `.env` config file for GitHub self-hosted runner. - env_contents = self._clients.jinja.get_template("env.j2").render( - proxies=self.config.proxies, - pre_job_script=str(self.pre_job_script), - dockerhub_mirror=self.config.dockerhub_mirror, - ssh_debug_info=selected_ssh_connection, - ) - self._put_file(str(self.env_file), env_contents) - self.instance.execute(["/usr/bin/chown", "ubuntu:ubuntu", str(self.env_file)]) - - if self.config.dockerhub_mirror: - docker_daemon_config = {"registry-mirrors": [self.config.dockerhub_mirror]} - self._put_file("/etc/docker/daemon.json", json.dumps(docker_daemon_config)) - self.instance.execute(["systemctl", "restart", "docker"]) - - if self.config.proxies: - if aproxy_address := self.config.proxies.aproxy_address: - self._configure_aproxy(aproxy_address) - else: - self._configure_docker_proxy() - - # Ensure the no existing /usr/bin/python. - self.instance.execute(["rm", "/usr/bin/python"]) - # Make python an alias of python3. - self.instance.execute(["ln", "-s", "/usr/bin/python3", "/usr/bin/python"]) - - @retry(tries=5, delay=30, local_logger=logger) - def _register_runner(self, registration_token: str, labels: Sequence[str]) -> None: - """Register the runner on GitHub. - - Args: - registration_token: Registration token request from GitHub. - labels: Labels to tag the runner with. - - Raises: - RunnerError: If the runner has not instantiated before calling this operation. - """ - if self.instance is None: - raise RunnerError("Runner operation called prior to runner creation.") - - logger.info("Registering runner %s, labels: %s", self.config.name, labels) - - register_cmd = [ - "/usr/bin/sudo", - "-u", - "ubuntu", - str(self.config_script), - "--url", - f"https://github.com/{self.config.path.path()}", - "--token", - registration_token, - "--ephemeral", - "--unattended", - "--labels", - ",".join(labels), - "--name", - self.instance.name, - ] - - if isinstance(self.config.path, GitHubOrg): - register_cmd += ["--runnergroup", self.config.path.group] - - logger.info("Executing registration command...") - self.instance.execute( - register_cmd, - cwd=str(self.runner_application), - hide_cmd=True, - ) - - @retry(tries=5, delay=30, local_logger=logger) - def _start_runner(self) -> None: - """Start the GitHub runner. - - Raises: - RunnerError: If the runner has not instantiated before calling this operation. - """ - if self.instance is None: - raise RunnerError("Runner operation called prior to runner creation.") - - logger.info("Starting runner %s", self.config.name) - - self.instance.execute( - [ - "/usr/bin/sudo", - "-u", - "ubuntu", - str(self.runner_script), - ] - ) - - logger.info("Started runner %s", self.config.name) - - def _put_file(self, filepath: str, content: str, mode: Optional[str] = None) -> None: - """Put a file into the runner instance. - - Args: - filepath: Path to load the file in the runner instance. - content: Content of the file. - mode: File permission setting. - - Raises: - RunnerFileLoadError: Failed to load the file into the runner instance. - RunnerError: If the runner has not instantiated before calling this operation. - """ - if self.instance is None: - raise RunnerError("Runner operation called prior to runner creation.") - - self.instance.files.write_file(filepath, content, mode) - content_on_runner = self.instance.files.read_file(filepath) - if content_on_runner != content: - logger.error( - "Loaded file %s in runner %s did not match expected content", - filepath, - self.instance.name, - ) - logger.debug( - "Excepted file content for file %s on runner %s: %s\nFound: %s", - filepath, - self.instance.name, - content, - content_on_runner, - ) - raise RunnerFileLoadError( - f"Failed to load file {filepath} to runner {self.instance.name}" - ) - - def _snap_install(self, snaps: Iterable[Snap]) -> None: - """Installs the given snap packages. - - This is a temporary solution to provide tools not offered by the base ubuntu image. Custom - images based on the GitHub action runner image will be used in the future. - - Raises: - RunnerError: if the runner was not created before calling the method. - RunnerCreateError: If there was an error installing a snap. - - Args: - snaps: snaps to be installed. - """ - if self.instance is None: - raise RunnerError("Runner operation called prior to runner creation.") - - for snap in snaps: - logger.info("Installing %s via snap...", snap.name) - cmd = ["snap", "install", snap.name, f"--channel={snap.channel}"] - if snap.revision is not None: - cmd.append(f"--revision={snap.revision}") - exit_code, _, stderr = self.instance.execute(cmd) - - if exit_code != 0: - err_msg = stderr.read().decode("utf-8") - logger.error("Unable to install %s due to %s", snap.name, err_msg) - raise RunnerCreateError(f"Unable to install {snap.name}") diff --git a/src/runner_manager.py b/src/runner_manager.py deleted file mode 100644 index 727fc6782..000000000 --- a/src/runner_manager.py +++ /dev/null @@ -1,838 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Runner Manager manages the runners on LXD and GitHub.""" - -import hashlib -import logging -import random -import secrets -import tarfile -import time -from datetime import datetime, timedelta, timezone -from pathlib import Path -from typing import Iterator, Optional, Type - -import jinja2 -import requests -import requests.adapters -import urllib3 -from github_runner_manager.metrics import events as metric_events -from github_runner_manager.metrics import github as github_metrics -from github_runner_manager.metrics import runner as runner_metrics -from github_runner_manager.metrics import runner_logs -from github_runner_manager.metrics.runner import RUNNER_INSTALLED_TS_FILE_NAME -from github_runner_manager.repo_policy_compliance_client import RepoPolicyComplianceClient -from github_runner_manager.types_.github import RunnerApplication, SelfHostedRunner - -import shared_fs -from charm_state import VirtualMachineResources -from errors import ( - GetMetricsStorageError, - GithubClientError, - GithubMetricsError, - IssueMetricEventError, - RunnerBinaryError, - RunnerCreateError, - RunnerLogsError, - SubprocessError, -) -from github_client import GithubClient -from lxd import LxdClient, LxdInstance -from runner import LXD_PROFILE_YAML, CreateRunnerConfig, Runner, RunnerConfig, RunnerStatus -from runner_manager_type import ( - LXDFlushMode, - LXDRunnerManagerConfig, - RunnerInfo, - RunnerManagerClients, -) -from runner_type import ProxySetting as RunnerProxySetting -from runner_type import RunnerNameByHealth -from utilities import execute_command, retry, set_env_var - -REMOVED_RUNNER_LOG_STR = "Removed runner: %s" - -logger = logging.getLogger(__name__) - - -BUILD_IMAGE_SCRIPT_FILENAME = Path("scripts/build-lxd-image.sh") - -IssuedMetricEventsStats = dict[Type[metric_events.Event], int] - - -class LXDRunnerManager: - """Manage a group of runners according to configuration. - - Attributes: - runner_bin_path: The github runner app scripts path. - cron_path: The path to runner build image cron job. - """ - - runner_bin_path = Path("/home/ubuntu/github-runner-app") - cron_path = Path("/etc/cron.d") - - def __init__( - self, - app_name: str, - unit: int, - runner_manager_config: LXDRunnerManagerConfig, - ) -> None: - """Construct RunnerManager object for creating and managing runners. - - Args: - app_name: An name for the set of runners. - unit: Unit number of the set of runners. - runner_manager_config: Configuration for the runner manager. - """ - self.app_name = app_name - self.instance_name = f"{app_name}-{unit}" - self.config = runner_manager_config - self.proxies = runner_manager_config.charm_state.proxy_config - - # Setting the env var to this process and any child process spawned. - if no_proxy := self.proxies.no_proxy: - set_env_var("NO_PROXY", no_proxy) - if http_proxy := self.proxies.http: - set_env_var("HTTP_PROXY", http_proxy) - if https_proxy := self.proxies.https: - set_env_var("HTTPS_PROXY", https_proxy) - - self.session = requests.Session() - adapter = requests.adapters.HTTPAdapter( - max_retries=urllib3.Retry( - total=3, backoff_factor=0.3, status_forcelist=[500, 502, 503, 504] - ) - ) - self.session.mount("http://", adapter) - self.session.mount("https://", adapter) - - self._clients = RunnerManagerClients( - GithubClient(token=self.config.token), - jinja2.Environment(loader=jinja2.FileSystemLoader("templates"), autoescape=True), - LxdClient(), - RepoPolicyComplianceClient("http://127.0.0.1:8080", self.config.service_token), - ) - - def check_runner_bin(self) -> bool: - """Check if runner binary exists. - - Returns: - Whether runner bin exists. - """ - return self.runner_bin_path.exists() - - @retry(tries=5, delay=30, local_logger=logger) - def get_latest_runner_bin_url(self, os_name: str = "linux") -> RunnerApplication: - """Get the URL for the latest runner binary. - - The runner binary URL changes when a new version is available. - - Args: - os_name: Name of operating system. - - Raises: - RunnerBinaryError: If an error occurred while fetching runner application info. - - Returns: - Information on the runner application. - """ - try: - return self._clients.github.get_runner_application( - path=self.config.path, arch=self.config.charm_state.arch.value, os=os_name - ) - except RunnerBinaryError: - logger.error("Failed to get runner application info.") - raise - - @retry(tries=5, delay=30, local_logger=logger) - def update_runner_bin(self, binary: RunnerApplication) -> None: - """Download a runner file, replacing the current copy. - - Remove the existing runner binary to prevent it from being used. This - is done to prevent security issues arising from outdated runner - binaries containing security flaws. The newest version of runner binary - should always be used. - - Args: - binary: Information on the runner binary to download. - - Raises: - RunnerBinaryError: If there was an error updating runner binary info. - """ - logger.info("Downloading runner binary from: %s", binary["download_url"]) - - try: - # Delete old version of runner binary. - LXDRunnerManager.runner_bin_path.unlink(missing_ok=True) - except OSError as err: - logger.exception("Unable to perform file operation on the runner binary path") - raise RunnerBinaryError("File operation failed on the runner binary path") from err - - try: - # Download the new file - response = self.session.get(binary["download_url"], stream=True, timeout=10 * 60) - - logger.info( - "Download of runner binary from %s return status code: %i", - binary["download_url"], - response.status_code, - ) - - if not binary["sha256_checksum"]: - logger.error("Checksum for runner binary is not found, unable to verify download.") - raise RunnerBinaryError( - "Checksum for runner binary is not found in GitHub response." - ) - - sha256 = hashlib.sha256() - - with LXDRunnerManager.runner_bin_path.open(mode="wb") as file: - # Process with chunk_size of 128 KiB. - for chunk in response.iter_content(chunk_size=128 * 1024, decode_unicode=False): - file.write(chunk) - - sha256.update(chunk) - except requests.RequestException as err: - logger.exception("Failed to download of runner binary") - raise RunnerBinaryError("Failed to download runner binary") from err - - logger.info("Finished download of runner binary.") - - # Verify the checksum if checksum is present. - if binary["sha256_checksum"] != sha256.hexdigest(): - logger.error( - "Expected hash of runner binary (%s) doesn't match the calculated hash (%s)", - binary["sha256_checksum"], - sha256, - ) - raise RunnerBinaryError("Checksum mismatch for downloaded runner binary") - - # Verify the file integrity. - if not tarfile.is_tarfile(file.name): - logger.error("Failed to decompress downloaded GitHub runner binary.") - raise RunnerBinaryError("Downloaded runner binary cannot be decompressed.") - - logger.info("Validated newly downloaded runner binary and enabled it.") - - def get_github_info(self) -> Iterator[RunnerInfo]: - """Get information on the runners from GitHub. - - Returns: - List of information from GitHub on runners. - """ - remote_runners = self._get_runner_github_info() - return ( - RunnerInfo(runner.name, runner.status, runner.busy) - for runner in remote_runners.values() - ) - - def _get_runner_health_states(self) -> RunnerNameByHealth: - """Get all runners sorted into health groups. - - Returns: - All runners sorted by health statuses. - """ - local_runners = [ - instance - # Pylint cannot find the `all` method. - for instance in self._clients.lxd.instances.all() # pylint: disable=no-member - if instance.name.startswith(f"{self.instance_name}-") - ] - - healthy = [] - unhealthy = [] - - for runner in local_runners: - # we need to hide the command to prevent sensitive information on the workload - # from being exposed. - _, stdout, _ = runner.execute(["ps", "aux"], hide_cmd=True) - if f"/bin/bash {Runner.runner_script}" in stdout.read().decode("utf-8"): - healthy.append(runner.name) - else: - unhealthy.append(runner.name) - - return RunnerNameByHealth(healthy, unhealthy) - - def _create_runner( - self, registration_token: str, resources: VirtualMachineResources, runner: Runner - ) -> None: - """Create a runner. - - Issues RunnerInstalled metric if metrics_logging is enabled. - - Args: - registration_token: Token for registering runner to GitHub. - resources: Configuration of the virtual machine resources. - runner: Runner to be created. - """ - if self.config.are_metrics_enabled: - ts_now = time.time() - runner.create( - config=CreateRunnerConfig( - image=self.config.image, - resources=resources, - binary_path=LXDRunnerManager.runner_bin_path, - registration_token=registration_token, - arch=self.config.charm_state.arch, - ) - ) - ts_after = time.time() - try: - metric_events.issue_event( - event=metric_events.RunnerInstalled( - timestamp=ts_after, - flavor=self.app_name, - duration=ts_after - ts_now, - ), - ) - except IssueMetricEventError: - logger.exception("Failed to issue RunnerInstalled metric") - - try: - fs = shared_fs.get(runner.config.name) - except GetMetricsStorageError: - logger.exception( - "Failed to get shared filesystem for runner %s, " - "will not be able to issue all metrics.", - runner.config.name, - ) - else: - try: - (fs.path / RUNNER_INSTALLED_TS_FILE_NAME).write_text( - str(ts_after), encoding="utf-8" - ) - except FileNotFoundError: - logger.exception( - "Failed to write runner-installed.timestamp into shared filesystem " - "for runner %s, will not be able to issue all metrics.", - runner.config.name, - ) - - else: - runner.create( - config=CreateRunnerConfig( - image=self.config.image, - resources=resources, - binary_path=LXDRunnerManager.runner_bin_path, - registration_token=registration_token, - arch=self.config.charm_state.arch, - ) - ) - - def _issue_runner_metrics(self) -> IssuedMetricEventsStats: - """Issue runner metrics. - - Returns: - The stats of issued metric events. - """ - runner_states = self._get_runner_health_states() - - total_stats: IssuedMetricEventsStats = {} - for extracted_metrics in runner_metrics.extract( - metrics_storage_manager=shared_fs, runners=set(runner_states.healthy) - ): - if extracted_metrics.pre_job: - try: - job_metrics = github_metrics.job( - github_client=self._clients.github, - pre_job_metrics=extracted_metrics.pre_job, - runner_name=extracted_metrics.runner_name, - ) - except GithubMetricsError: - logger.exception("Failed to calculate job metrics") - job_metrics = None - else: - logger.debug( - "No pre-job metrics found for %s, will not calculate job metrics.", - extracted_metrics.runner_name, - ) - job_metrics = None - - issued_events = runner_metrics.issue_events( - runner_metrics=extracted_metrics, - job_metrics=job_metrics, - flavor=self.app_name, - ) - for event_type in issued_events: - total_stats[event_type] = total_stats.get(event_type, 0) + 1 - return total_stats - - def _issue_reconciliation_metric( - self, - metric_stats: IssuedMetricEventsStats, - reconciliation_start_ts: float, - reconciliation_end_ts: float, - expected_quantity: int, - ) -> None: - """Issue reconciliation metric. - - Args: - metric_stats: The stats of issued metric events. - reconciliation_start_ts: The timestamp of when reconciliation started. - reconciliation_end_ts: The timestamp of when reconciliation ended. - expected_quantity: The expected quantity of runners. - """ - runners = self._get_runners() - runner_states = self._get_runner_health_states() - healthy_runners = set(runner_states.healthy) - online_runners = [ - runner for runner in runners if runner.status.exist and runner.status.online - ] - active_runner_names = { - runner.config.name for runner in online_runners if runner.status.busy - } - offline_runner_names = { - runner.config.name - for runner in runners - if runner.status.exist and not runner.status.online - } - - active_count = len(active_runner_names) - idle_online_count = len(online_runners) - active_count - idle_offline_count = len((offline_runner_names & healthy_runners) - active_runner_names) - - try: - metric_events.issue_event( - event=metric_events.Reconciliation( - timestamp=time.time(), - flavor=self.app_name, - crashed_runners=metric_stats.get(metric_events.RunnerStart, 0) - - metric_stats.get(metric_events.RunnerStop, 0), - idle_runners=idle_online_count + idle_offline_count, - active_runners=active_count, - expected_runners=expected_quantity, - duration=reconciliation_end_ts - reconciliation_start_ts, - ) - ) - except IssueMetricEventError: - logger.exception("Failed to issue Reconciliation metric") - - def _get_runner_config(self, name: str) -> RunnerConfig: - """Get the configuration for a runner. - - Sets the proxy settings for the runner according to the configuration - and creates a new runner configuration object. - - Args: - name: Name of the runner. - - Returns: - Configuration for the runner. - """ - if self.proxies and not self.proxies.use_aproxy: - # If the proxy setting are set, then add NO_PROXY local variables. - if self.proxies.no_proxy: - no_proxy = f"{self.proxies.no_proxy}," - else: - no_proxy = "" - no_proxy = f"{no_proxy}{name},.svc" - - proxies = RunnerProxySetting( - no_proxy=no_proxy, - http=self.proxies.http, - https=self.proxies.https, - aproxy_address=None, - ) - elif self.proxies.use_aproxy: - proxies = RunnerProxySetting( - aproxy_address=self.proxies.aproxy_address, no_proxy=None, http=None, https=None - ) - else: - proxies = None - - return RunnerConfig( - app_name=self.app_name, - dockerhub_mirror=self.config.dockerhub_mirror, - issue_metrics=self.config.are_metrics_enabled, - labels=self.config.charm_state.charm_config.labels, - lxd_storage_path=self.config.lxd_storage_path, - path=self.config.path, - proxies=proxies, - name=name, - ssh_debug_connections=self.config.charm_state.ssh_debug_connections, - ) - - def _spawn_new_runners(self, count: int, resources: VirtualMachineResources) -> None: - """Spawn new runners. - - Args: - count: Number of runners to spawn. - resources: Configuration of the virtual machine resources. - - Raises: - RunnerCreateError: If there was an error spawning new runner. - """ - if not LXDRunnerManager.runner_bin_path.exists(): - raise RunnerCreateError("Unable to create runner due to missing runner binary.") - logger.info("Getting registration token for GitHub runners.") - registration_token = self._clients.github.get_runner_registration_token(self.config.path) - remove_token = self._clients.github.get_runner_remove_token(self.config.path) - logger.info("Attempting to add %i runner(s).", count) - for _ in range(count): - config = self._get_runner_config(self._generate_runner_name()) - runner = Runner(self._clients, config, RunnerStatus()) - try: - self._create_runner(registration_token, resources, runner) - logger.info("Created runner: %s", runner.config.name) - except RunnerCreateError: - logger.error("Unable to create runner: %s", runner.config.name) - runner.remove(remove_token) - logger.info("Cleaned up runner: %s", runner.config.name) - raise - - def _remove_runners(self, count: int, runners: list[Runner]) -> None: - """Remove runners. - - Args: - count: Number of runners to remove. - runners: List of online runners. - """ - logger.info("Attempting to remove %i runner(s).", count) - # Idle runners are online runners that have not taken a job. - idle_runners = [runner for runner in runners if not runner.status.busy] - offset = min(count, len(idle_runners)) - if offset != 0: - logger.info("Removing %i runner(s).", offset) - remove_runners = idle_runners[:offset] - - logger.info("Cleaning up idle runners.") - - remove_token = self._clients.github.get_runner_remove_token(self.config.path) - - for runner in remove_runners: - runner.remove(remove_token) - logger.info(REMOVED_RUNNER_LOG_STR, runner.config.name) - else: - logger.info("There are no idle runners to remove.") - - def _cleanup_offline_runners( - self, runner_states: RunnerNameByHealth, all_runners: list[Runner] - ) -> None: - """Cleanup runners that are not running the github run.sh script. - - Args: - runner_states: Runner names grouped by health. - all_runners: All currently running runners. - """ - if not runner_states.unhealthy: - logger.info("No unhealthy runners.") - return - - logger.info("Cleaning up unhealthy runners.") - remove_token = self._clients.github.get_runner_remove_token(self.config.path) - unhealthy_runners = [ - runner for runner in all_runners if runner.config.name in set(runner_states.unhealthy) - ] - - for runner in unhealthy_runners: - if self.config.are_metrics_enabled: - try: - logger.info("Pulling the logs of the crashed runner %s.", runner.config.name) - runner.pull_logs() - except RunnerLogsError: - logger.exception("Failed to get logs of crashed runner %s", runner.config.name) - runner.remove(remove_token) - logger.info(REMOVED_RUNNER_LOG_STR, runner.config.name) - - def reconcile(self, quantity: int, resources: VirtualMachineResources) -> int: - """Bring runners in line with target. - - Args: - quantity: Number of intended runners. - resources: Configuration of the virtual machine resources. - - Returns: - Difference between intended runners and actual runners. - """ - start_ts = time.time() - - runners = self._get_runners() - # Add/Remove runners to match the target quantity - online_runners = [ - runner for runner in runners if runner.status.exist and runner.status.online - ] - runner_states = self._get_runner_health_states() - logger.info( - ( - "Expected runner count: %i, Online count: %i, Offline count: %i, " - "healthy count: %i, unhealthy count: %i" - ), - quantity, - len(online_runners), - len(runners) - len(online_runners), - len(runner_states.healthy), - len(runner_states.unhealthy), - ) - - runner_logs.remove_outdated() - if self.config.are_metrics_enabled: - metric_stats = self._issue_runner_metrics() - - self._cleanup_offline_runners(runner_states=runner_states, all_runners=runners) - - delta = quantity - len(runner_states.healthy) - # Spawn new runners - if delta > 0: - self._spawn_new_runners(delta, resources) - elif delta < 0: - self._remove_runners(count=-delta, runners=online_runners) - else: - logger.info("No changes to number of runners needed.") - - if self.config.are_metrics_enabled: - end_ts = time.time() - self._issue_reconciliation_metric( - metric_stats=metric_stats, - reconciliation_start_ts=start_ts, - reconciliation_end_ts=end_ts, - expected_quantity=quantity, - ) - return delta - - def _runners_in_pre_job(self) -> bool: - """Check there exist runners in the pre-job script stage. - - If a runner has taken a job for 1 minute or more, it is assumed to exit the pre-job script. - - Returns: - Whether there are runners that has taken a job and run for less than 1 minute. - """ - now = datetime.now(timezone.utc) - busy_runners = [ - runner for runner in self._get_runners() if runner.status.exist and runner.status.busy - ] - for runner in busy_runners: - # Check if `_work` directory exists, if it exists the runner has started a job. - exit_code, stdout, _ = runner.instance.execute( - ["/usr/bin/stat", "-c", "'%w'", "/home/ubuntu/github-runner/_work"] - ) - if exit_code != 0: - return False - # The date is between two single quotes('). - _, output, _ = stdout.read().decode("utf-8").strip().split("'") - date_str, time_str, timezone_str = output.split(" ") - timezone_str = f"{timezone_str[:3]}:{timezone_str[3:]}" - job_start_time = datetime.fromisoformat(f"{date_str}T{time_str[:12]}{timezone_str}") - if job_start_time + timedelta(minutes=1) > now: - return False - return True - - def flush(self, mode: LXDFlushMode = LXDFlushMode.FLUSH_IDLE) -> int: - """Remove existing runners. - - Args: - mode: Strategy for flushing runners. - - Raises: - GithubClientError: If there was an error getting remove-token to unregister runners \ - from GitHub. - - Returns: - Number of runners removed. - """ - try: - remove_token = self._clients.github.get_runner_remove_token(self.config.path) - except GithubClientError: - logger.exception("Failed to get remove-token to unregister runners from GitHub.") - if mode != LXDFlushMode.FORCE_FLUSH_WAIT_REPO_CHECK: - raise - logger.info("Proceeding with flush without remove-token.") - remove_token = None - - # Removing non-busy runners - runners = [ - runner - for runner in self._get_runners() - if runner.status.exist and not runner.status.busy - ] - - logger.info("Removing existing %i non-busy local runners", len(runners)) - - remove_count = len(runners) - for runner in runners: - runner.remove(remove_token) - logger.info(REMOVED_RUNNER_LOG_STR, runner.config.name) - - if mode in ( - LXDFlushMode.FLUSH_IDLE_WAIT_REPO_CHECK, - LXDFlushMode.FLUSH_BUSY_WAIT_REPO_CHECK, - LXDFlushMode.FORCE_FLUSH_WAIT_REPO_CHECK, - ): - for _ in range(5): - if not self._runners_in_pre_job(): - break - time.sleep(30) - else: - logger.warning( - ( - "Proceed with flush runner after timeout waiting on runner in setup " - "stage, pre-job script might fail in currently running jobs" - ) - ) - - if mode in ( - LXDFlushMode.FLUSH_BUSY_WAIT_REPO_CHECK, - LXDFlushMode.FLUSH_BUSY, - LXDFlushMode.FORCE_FLUSH_WAIT_REPO_CHECK, - ): - busy_runners = [runner for runner in self._get_runners() if runner.status.exist] - - logger.info("Removing existing %i busy local runners", len(runners)) - - remove_count += len(busy_runners) - for runner in busy_runners: - runner.remove(remove_token) - logger.info(REMOVED_RUNNER_LOG_STR, runner.config.name) - - return remove_count - - def _generate_runner_name(self) -> str: - """Generate a runner name based on charm name. - - Returns: - Generated name of runner. - """ - suffix = secrets.token_hex(12) - return f"{self.instance_name}-{suffix}" - - def _get_runner_github_info(self) -> dict[str, SelfHostedRunner]: - """Get a mapping of runner name to GitHub self-hosted runner info. - - Returns: - A mapping of runner name to GitHub self-hosted runner info. - """ - remote_runners_list: list[SelfHostedRunner] = self._clients.github.get_runner_github_info( - self.config.path - ) - - logger.debug("List of runners found on GitHub:%s", remote_runners_list) - - return { - runner.name: runner - for runner in remote_runners_list - if runner.name.startswith(f"{self.instance_name}-") - } - - def _get_runners(self) -> list[Runner]: - """Query for the list of runners. - - Returns: - List of `Runner` from information on LXD or GitHub. - """ - - def create_runner_info( - name: str, - local_runner: Optional[LxdInstance], - remote_runner: Optional[SelfHostedRunner], - ) -> Runner: - """Create runner from information from GitHub and LXD. - - Args: - name: Name of the runner. - local_runner: The Lxd runner. - remote_runner: The Github self hosted runner. - - Returns: - Wrapped runner information. - """ - logger.debug( - ( - "Found runner %s with GitHub info [status: %s, busy: %s, labels: %s] and LXD " - "info [status: %s]" - ), - name, - getattr(remote_runner, "status", None), - getattr(remote_runner, "busy", None), - getattr(remote_runner, "labels", None), - getattr(local_runner, "status", None), - ) - - runner_id = getattr(remote_runner, "id", None) - running = local_runner is not None - online = getattr(remote_runner, "status", None) == "online" - busy = getattr(remote_runner, "busy", None) - - config = self._get_runner_config(name) - return Runner( - self._clients, - config, - RunnerStatus(runner_id, running, online, busy), - local_runner, - ) - - remote_runners = self._get_runner_github_info() - local_runners = { - instance.name: instance - # Pylint cannot find the `all` method. - for instance in self._clients.lxd.instances.all() # pylint: disable=no-member - if instance.name.startswith(f"{self.instance_name}-") - } - - runners: list[Runner] = [] - for name in set(local_runners.keys()) | set(remote_runners.keys()): - runners.append( - create_runner_info(name, local_runners.get(name), remote_runners.get(name)) - ) - - return runners - - def _build_image_command(self) -> list[str]: - """Get command for building runner image. - - Returns: - Command to execute to build runner image. - """ - http_proxy = self.proxies.http or "" - https_proxy = self.proxies.https or "" - no_proxy = self.proxies.no_proxy or "" - - cmd = [ - "/usr/bin/bash", - str(BUILD_IMAGE_SCRIPT_FILENAME.absolute()), - http_proxy, - https_proxy, - no_proxy, - self.config.image, - ] - if LXD_PROFILE_YAML.exists(): - cmd += ["test"] - return cmd - - def has_runner_image(self) -> bool: - """Check if the runner image exists. - - Returns: - Whether the runner image exists. - """ - return self._clients.lxd.images.exists(self.config.image) - - @retry(tries=3, delay=30, local_logger=logger) - def build_runner_image(self) -> None: - """Build the LXD image for hosting runner. - - Build container image in test mode, else virtual machine image. - - Raises: - SubprocessError: Unable to build the LXD image. - """ - try: - execute_command(self._build_image_command()) - except SubprocessError as exc: - logger.error("Error executing build image command, %s", exc) - raise - - def schedule_build_runner_image(self) -> None: - """Install cron job for building runner image.""" - # Replace empty string in the build image command list and form a string. - build_image_command = " ".join( - [part if part else "''" for part in self._build_image_command()] - ) - - cron_file = self.cron_path / "build-runner-image" - # Randomized the time executing the building of image to prevent all instances of the charm - # building images at the same time, using up the disk, and network IO of the server. - # The random number are not used for security purposes. - minute = random.randint(0, 59) # nosec B311 - base_hour = random.randint(0, 5) # nosec B311 - hours = ",".join([str(base_hour + offset) for offset in (0, 6, 12, 18)]) - cron_file.write_text(f"{minute} {hours} * * * ubuntu {build_image_command}\n") diff --git a/src/runner_manager_type.py b/src/runner_manager_type.py deleted file mode 100644 index 4c5ca9820..000000000 --- a/src/runner_manager_type.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Types used by RunnerManager class.""" - -from dataclasses import dataclass -from enum import Enum, auto -from pathlib import Path - -import jinja2 -from github_runner_manager.repo_policy_compliance_client import RepoPolicyComplianceClient -from github_runner_manager.types_.github import GitHubPath, GitHubRunnerStatus - -from charm_state import CharmState, ReactiveConfig -from github_client import GithubClient -from lxd import LxdClient - - -class LXDFlushMode(Enum): - """Strategy for flushing runners. - - During pre-job (repo-check), the runners are marked as idle and if the pre-job fails, the - runner falls back to being idle again. Hence wait_repo_check is required. - - Attributes: - FLUSH_IDLE: Flush only idle runners. - FLUSH_IDLE_WAIT_REPO_CHECK: Flush only idle runners, then wait until repo-policy-check is - completed for the busy runners. - FLUSH_BUSY: Flush busy runners. - FLUSH_BUSY_WAIT_REPO_CHECK: Wait until the repo-policy-check is completed before - flush of busy runners. - FORCE_FLUSH_WAIT_REPO_CHECK: Force flush the runners (remove lxd instances even on - gh api issues, like invalid token). - Wait until repo-policy-check is completed before force flush of busy runners. - """ - - FLUSH_IDLE = auto() - FLUSH_IDLE_WAIT_REPO_CHECK = auto() - FLUSH_BUSY = auto() - FLUSH_BUSY_WAIT_REPO_CHECK = auto() - FORCE_FLUSH_WAIT_REPO_CHECK = auto() - - -@dataclass -class RunnerManagerClients: - """Clients for accessing various services. - - Attributes: - github: Used to query GitHub API. - jinja: Used for templating. - lxd: Used to interact with LXD API. - repo: Used to interact with repo-policy-compliance API. - """ - - github: GithubClient - jinja: jinja2.Environment - lxd: LxdClient - repo: RepoPolicyComplianceClient - - -@dataclass -# The instance attributes are all required. -class LXDRunnerManagerConfig: # pylint: disable=too-many-instance-attributes - """Configuration of runner manager. - - Attributes: - are_metrics_enabled: Whether metrics for the runners should be collected. - charm_state: The state of the charm. - image: Name of the image for creating LXD instance. - lxd_storage_path: Path to be used as LXD storage. - path: GitHub repository path in the format '/', or the - GitHub organization name. - service_token: Token for accessing local service. - token: GitHub personal access token to register runner to the - repository or organization. - dockerhub_mirror: URL of dockerhub mirror to use. - reactive_config: The configuration to spawn runners reactively. - """ - - charm_state: CharmState - image: str - lxd_storage_path: Path - path: GitHubPath - service_token: str - token: str - dockerhub_mirror: str | None = None - reactive_config: ReactiveConfig | None = None - - @property - def are_metrics_enabled(self) -> bool: - """Whether metrics for the runners should be collected.""" - return self.charm_state.is_metrics_logging_available - - -@dataclass -class RunnerInfo: - """Information from GitHub of a runner. - - Used as a returned type to method querying runner information. - - Attributes: - name: Name of the runner. - status: Status of the runner. - busy: Whether the runner has taken a job. - """ - - name: str - status: GitHubRunnerStatus - busy: bool diff --git a/src/runner_type.py b/src/runner_type.py deleted file mode 100644 index eec8793ee..000000000 --- a/src/runner_type.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Types used by Runner class.""" - - -from dataclasses import dataclass -from pathlib import Path -from typing import Optional - -from github_runner_manager.types_.github import GitHubPath - -from charm_state import SSHDebugConnection - - -@dataclass -class RunnerNameByHealth: - """Set of runners instance by health state. - - Attributes: - healthy: Runners that are correctly running runner script. - unhealthy: Runners that are not running runner script. - """ - - healthy: tuple[str, ...] - unhealthy: tuple[str, ...] - - -@dataclass -class ProxySetting: - """Represent HTTP-related proxy settings. - - Attributes: - no_proxy: The comma separated URLs to not go through proxy. - http: HTTP proxy URL. - https: HTTPS proxy URL. - aproxy_address: Aproxy URL. - """ - - no_proxy: Optional[str] - http: Optional[str] - https: Optional[str] - aproxy_address: Optional[str] - - -@dataclass -# The instance attributes are all required and is better standalone each. -class RunnerConfig: # pylint: disable=too-many-instance-attributes - """Configuration for runner. - - Attributes: - app_name: Application name of the charm. - issue_metrics: Whether to issue metrics. - labels: Custom runner labels. - lxd_storage_path: Path to be used as LXD storage. - name: Name of the runner. - path: GitHub repository path in the format '/', or the GitHub organization - name. - proxies: HTTP(S) proxy settings. - dockerhub_mirror: URL of dockerhub mirror to use. - ssh_debug_connections: The SSH debug server connections metadata. - """ - - app_name: str - issue_metrics: bool - labels: tuple[str] - lxd_storage_path: Path - name: str - path: GitHubPath - proxies: ProxySetting - dockerhub_mirror: str | None = None - ssh_debug_connections: list[SSHDebugConnection] | None = None - - -@dataclass -class RunnerStatus: - """Status of runner. - - Attributes: - runner_id: ID of the runner. - exist: Whether the runner instance exists on LXD. - online: Whether GitHub marks this runner as online. - busy: Whether GitHub marks this runner as busy. - """ - - runner_id: Optional[int] = None - exist: bool = False - online: bool = False - busy: bool = False - - -@dataclass -class RunnerGithubInfo: - """GitHub info of a runner. - - Attributes: - runner_name: Name of the runner. - runner_id: ID of the runner assigned by GitHub. - online: Whether GitHub marks this runner as online. - busy: Whether GitHub marks this runner as busy. - """ - - runner_name: str - runner_id: int - online: bool - busy: bool diff --git a/src/shared_fs.py b/src/shared_fs.py deleted file mode 100644 index e8af848c1..000000000 --- a/src/shared_fs.py +++ /dev/null @@ -1,285 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Classes and functions to operate on the shared filesystem between the charm and the runners.""" -import logging -import shutil -from pathlib import Path -from typing import Iterator - -import github_runner_manager.metrics.storage as metrics_storage -from github_runner_manager.types_ import SystemUserConfig - -from errors import ( - CreateMetricsStorageError, - DeleteMetricsStorageError, - GetMetricsStorageError, - SharedFilesystemMountError, - SubprocessError, -) -from utilities import execute_command - -DIR_NO_MOUNTPOINT_EXIT_CODE = 32 -METRICS_STORAGE_USER_CONFIG = SystemUserConfig( - user="ubuntu", - group="ubuntu", -) - -logger = logging.getLogger(__name__) - -FILESYSTEM_OWNER = "ubuntu:ubuntu" -FILESYSTEM_IMAGES_PATH = Path("/home/ubuntu/runner-fs-images") -FILESYSTEM_QUARANTINE_PATH = Path("/home/ubuntu/runner-fs-quarantine") -FILESYSTEM_SIZE = "1M" - - -class _UnmountSharedFilesystemError(Exception): - """Represents an error unmounting a shared filesystem.""" - - -def create(runner_name: str) -> metrics_storage.MetricsStorage: - """Create a shared filesystem for the runner. - - The method is not idempotent and will raise an exception - if the shared filesystem already exists. - - Args: - runner_name: The name of the runner. - - Returns: - The shared filesystem object. - - Raises: - CreateMetricsStorageError: If the creation of the shared filesystem fails. - """ - ms = metrics_storage.StorageManager(system_user_config=METRICS_STORAGE_USER_CONFIG).create( - runner_name - ) - try: - FILESYSTEM_IMAGES_PATH.mkdir(exist_ok=True) - except OSError as exc: - raise CreateMetricsStorageError("Failed to create shared filesystem images path") from exc - - runner_img_path = _get_runner_image_path(runner_name) - - try: - execute_command( - ["dd", "if=/dev/zero", f"of={runner_img_path}", f"bs={FILESYSTEM_SIZE}", "count=1"], - check_exit=True, - ) - execute_command(["mkfs.ext4", f"{runner_img_path}"], check_exit=True) - _mount(runner_fs_path=ms.path, runner_image_path=runner_img_path) - execute_command(["sudo", "chown", FILESYSTEM_OWNER, str(ms.path)], check_exit=True) - except (SubprocessError, SharedFilesystemMountError) as exc: - raise CreateMetricsStorageError( - f"Failed to create shared filesystem for runner {runner_name}" - ) from exc - return ms - - -def list_all() -> Iterator[metrics_storage.MetricsStorage]: - """List all the metric storages. - - Yields: - A metrics storage object. - """ - for ms in metrics_storage.StorageManager( - system_user_config=METRICS_STORAGE_USER_CONFIG - ).list_all(): - try: - # we try to check if it is mounted by using this module's get function - get(ms.runner_name) - except GetMetricsStorageError: - logger.error("Failed to get shared filesystem for runner %s", ms.runner_name) - else: - yield ms - - -def get(runner_name: str) -> metrics_storage.MetricsStorage: - """Get the shared filesystem for the runner. - - Mounts the filesystem if it is not currently mounted. - - Args: - runner_name: The name of the runner. - - Returns: - The shared filesystem object. - - Raises: - GetMetricsStorageError: If the shared filesystem could not be retrieved/mounted. - """ - ms = metrics_storage.StorageManager(system_user_config=METRICS_STORAGE_USER_CONFIG).get( - runner_name - ) - - try: - is_mounted = _is_mountpoint(ms.path) - except SharedFilesystemMountError as exc: - raise GetMetricsStorageError( - f"Failed to determine if shared filesystem is mounted for runner {runner_name}" - ) from exc - - if not is_mounted: - logger.info( - "Shared filesystem for runner %s is not mounted (may happen after reboot). " - "Will be mounted now.", - runner_name, - ) - runner_img_path = _get_runner_image_path(runner_name) - try: - _mount(runner_fs_path=ms.path, runner_image_path=runner_img_path) - except SharedFilesystemMountError as exc: - raise GetMetricsStorageError( - f"Shared filesystem for runner {runner_name} could not be mounted." - ) from exc - - return ms - - -def delete(runner_name: str) -> None: - """Delete the shared filesystem for the runner. - - Args: - runner_name: The name of the runner. - - Raises: - DeleteMetricsStorageError: If the shared filesystem could not be deleted. - """ - try: - runner_fs_path = ( - metrics_storage.StorageManager(system_user_config=METRICS_STORAGE_USER_CONFIG) - .get(runner_name) - .path - ) - except GetMetricsStorageError as exc: - raise DeleteMetricsStorageError( - f"Failed to get shared filesystem for runner {runner_name}" - ) from exc - - try: - _unmount_runner_fs_path(runner_fs_path) - except _UnmountSharedFilesystemError as exc: - raise DeleteMetricsStorageError( - "Unexpected error while deleting shared Filesystem for runner " - f"{runner_name}: {str(exc)}" - ) from exc - - runner_image_path = _get_runner_image_path(runner_name) - try: - runner_image_path.unlink(missing_ok=True) - except OSError as exc: - raise DeleteMetricsStorageError( - f"Failed to remove runner image for shared filesystem of runner {runner_name}" - ) from exc - - try: - shutil.rmtree(runner_fs_path) - except OSError as exc: - raise DeleteMetricsStorageError( - f"Failed to remove shared filesystem for runner {runner_name}" - ) from exc - - -def move_to_quarantine( - runner_name: str, -) -> None: - """Archive the mshared filesystem for the runner and delete it. - - Args: - runner_name: The name of the runner. - """ - metrics_storage.StorageManager( - system_user_config=METRICS_STORAGE_USER_CONFIG - ).move_to_quarantine(runner_name) - - -def _unmount_runner_fs_path(runner_fs_path: Path) -> Path: - """Unmount shared filesystem for given runner. - - Args: - runner_fs_path: The path to unmount. - - Raises: - _UnmountSharedFilesystemError: If there was an error trying to unmount shared filesystem. - - Returns: - The runner shared filesystem path that was unmounted. - """ - if not runner_fs_path.exists(): - raise _UnmountSharedFilesystemError(f"Shared filesystem '{runner_fs_path}' not found.") - try: - is_mounted = _is_mountpoint(runner_fs_path) - except SharedFilesystemMountError as exc: - raise _UnmountSharedFilesystemError( - f"Failed to determine if shared filesystem is mounted {runner_fs_path}" - ) from exc - - if not is_mounted: - logger.warning("Shared filesystem for runner %s is not mounted", runner_fs_path) - else: - try: - execute_command( - ["sudo", "umount", str(runner_fs_path)], - check_exit=True, - ) - except SubprocessError as exc: - raise _UnmountSharedFilesystemError( - f"Failed to unmount shared filesystem for runner {runner_fs_path}" - ) from exc - - return runner_fs_path - - -def _is_mountpoint(path: Path) -> bool: - """Check if the path is a mountpoint. - - Args: - path: The path to check. - - Returns: - True if the path is a mountpoint, False otherwise. - - Raises: - SharedFilesystemMountError: If the check fails. - """ - _, ret_code = execute_command(["mountpoint", "-q", str(path)], check_exit=False) - if ret_code not in (0, DIR_NO_MOUNTPOINT_EXIT_CODE): - raise SharedFilesystemMountError( - f"Failed to check if path {path} is a mountpoint. " - f"mountpoint command return code: {ret_code}" - ) - return ret_code == 0 - - -def _mount(runner_fs_path: Path, runner_image_path: Path) -> None: - """Mount the shared filesystem. - - Args: - runner_fs_path: The path of the shared filesystem. - runner_image_path: The path of the runner image. - - Raises: - SharedFilesystemMountError: If the mount fails. - """ - try: - execute_command( - ["sudo", "mount", "-o", "loop", str(runner_image_path), str(runner_fs_path)], - check_exit=True, - ) - except SubprocessError as exc: - raise SharedFilesystemMountError( - f"Failed to mount shared filesystem {runner_fs_path}" - ) from exc - - -def _get_runner_image_path(runner_name: str) -> Path: - """Get the path of the runner image. - - Args: - runner_name: The name of the runner. - - Returns: - The path of the runner image. - """ - return FILESYSTEM_IMAGES_PATH / f"{runner_name}.img" diff --git a/src/utilities.py b/src/utilities.py index 86c32c4d2..f41144764 100644 --- a/src/utilities.py +++ b/src/utilities.py @@ -83,38 +83,6 @@ def get_env_var(env_var: str) -> Optional[str]: return os.environ.get(env_var.upper(), os.environ.get(env_var.lower(), None)) -def bytes_with_unit_to_kib(num_bytes: str) -> int: - """Convert a positive integer followed by a unit to number of kibibytes. - - Args: - num_bytes: A positive integer followed by one of the following unit: KiB, MiB, GiB, TiB, - PiB, EiB. - - Raises: - ValueError: If invalid unit was detected. - - Returns: - Number of kilobytes. - """ - num_of_kib = { - "KiB": 1024**0, - "MiB": 1024**1, - "GiB": 1024**2, - "TiB": 1024**3, - "PiB": 1024**4, - "EiB": 1024**5, - } - - num = num_bytes[:-3] - unit = num_bytes[-3:] - if unit not in num_of_kib: - raise ValueError( - "Must be a positive integer followed by a unit", - ) - - return num_of_kib[unit] * int(num) - - # This is a workaround for https://bugs.launchpad.net/juju/+bug/2058335 def remove_residual_venv_dirs() -> None: # pragma: no cover """Remove the residual empty directories from last revision if it exists.""" diff --git a/templates/env.j2 b/templates/env.j2 deleted file mode 100644 index c0de54aad..000000000 --- a/templates/env.j2 +++ /dev/null @@ -1,27 +0,0 @@ -PATH=/home/ubuntu/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin -{% if proxies.http %} -HTTP_PROXY={{proxies.http}} -http_proxy={{proxies.http}} -{% endif %} -{% if proxies.https %} -HTTPS_PROXY={{proxies.https}} -https_proxy={{proxies.https}} -{% endif %} -{% if proxies.ftp_proxy %} -{% endif %} -{% if proxies.no_proxy %} -NO_PROXY={{proxies.no_proxy}} -no_proxy={{proxies.no_proxy}} -{% endif %} -{% if dockerhub_mirror %} -DOCKERHUB_MIRROR={{dockerhub_mirror}} -CONTAINER_REGISTRY_URL={{dockerhub_mirror}} -{% endif %} -LANG=C.UTF-8 -ACTIONS_RUNNER_HOOK_JOB_STARTED={{pre_job_script}} -{% if ssh_debug_info %} -TMATE_SERVER_HOST={{ssh_debug_info['host']}} -TMATE_SERVER_PORT={{ssh_debug_info['port']}} -TMATE_SERVER_RSA_FINGERPRINT={{ssh_debug_info['rsa_fingerprint']}} -TMATE_SERVER_ED25519_FINGERPRINT={{ssh_debug_info['ed25519_fingerprint']}} -{% endif %} diff --git a/templates/environment.j2 b/templates/environment.j2 deleted file mode 100644 index e5a873229..000000000 --- a/templates/environment.j2 +++ /dev/null @@ -1,19 +0,0 @@ -PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin -{% if proxies.http %} -HTTP_PROXY={{proxies.http}} -http_proxy={{proxies.http}} -{% endif %} -{% if proxies.https %} -HTTPS_PROXY={{proxies.https}} -https_proxy={{proxies.https}} -{% endif %} -{% if proxies.no_proxy %} -NO_PROXY={{proxies.no_proxy}} -no_proxy={{proxies.no_proxy}} -{% endif %} -{% if ssh_debug_info %} -TMATE_SERVER_HOST={{ssh_debug_info['host']}} -TMATE_SERVER_PORT={{ssh_debug_info['port']}} -TMATE_SERVER_RSA_FINGERPRINT={{ssh_debug_info['rsa_fingerprint']}} -TMATE_SERVER_ED25519_FINGERPRINT={{ssh_debug_info['ed25519_fingerprint']}} -{% endif %} diff --git a/templates/pre-job.j2 b/templates/pre-job.j2 deleted file mode 100644 index c3e810988..000000000 --- a/templates/pre-job.j2 +++ /dev/null @@ -1,140 +0,0 @@ -#!/usr/bin/env bash - -timestamp=$(date +%s) - -# Disable exit-on-error, due the need for error handling. -set +e - -{% if issue_metrics %} -jq -n \ - --arg workflow "$GITHUB_WORKFLOW" \ - --arg repository "$GITHUB_REPOSITORY" \ - --arg event "$GITHUB_EVENT_NAME" \ - --argjson timestamp "$timestamp" \ - --arg workflow_run_id "$GITHUB_RUN_ID" \ - '{ - "workflow": $workflow, - "repository": $repository, - "event": $event, - "timestamp": $timestamp, - "workflow_run_id": $workflow_run_id - }' > "{{ metrics_exchange_path }}/pre-job-metrics.json" || true -{% endif %} - -{% if do_repo_policy_check %} - - # Log common env variables. - logger -s "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}, \ - GITHUB_REPOSITORY: ${GITHUB_REPOSITORY}, \ - GITHUB_SHA: ${GITHUB_SHA}" - - # Prepare curl arguments - CURL_ARGS=( - --silent - --show-error - --max-time 60 - --noproxy '*' - --fail-with-body - -o repo_check_output.txt - --stderr repo_check_error.txt - --write-out "%{http_code}" - -H 'Authorization: Bearer {{repo_policy_one_time_token}}' - -H 'Content-Type: application/json' - ) - - # Set REPO_CHECK to a failure code as a safe guard. - REPO_CHECK=1 - - # Special Workflow dispatch repo-policy-compliance service check designed to fail: - if [[ "${GITHUB_WORKFLOW}" == "Workflow Dispatch Failure Tests 2a34f8b1-41e4-4bcb-9bbf-7a74e6c482f7" ]]; then - logger -s "Running the test workflow for integration tests, this test is configured to fail" - - REPO_CHECK_HTTP_CODE=$(curl "${CURL_ARGS[@]}" \ - -X POST \ - {{repo_policy_base_url}}/always-fail/check-run) - REPO_CHECK=$? - - # Pull request - Request repo-policy-compliance service check: - elif [[ "${GITHUB_EVENT_NAME}" == "pull_request" ]]; then - - GITHUB_SOURCE_REPOSITORY=$(cat "${GITHUB_EVENT_PATH}" | jq -r '.pull_request.head.repo.full_name') - COMMIT_SHA=$(cat "${GITHUB_EVENT_PATH}" | jq -r '.pull_request.head.sha') - - logger -s " \ - GITHUB_SOURCE_REPOSITORY: ${GITHUB_SOURCE_REPOSITORY} \ - GITHUB_BASE_REF: ${GITHUB_BASE_REF}, \ - GITHUB_HEAD_REF: ${GITHUB_HEAD_REF}, \ - COMMIT_SHA: ${COMMIT_SHA}" - - REPO_CHECK_HTTP_CODE=$(curl "${CURL_ARGS[@]}" \ - -d "{\"repository_name\": \"${GITHUB_REPOSITORY}\", \"source_repository_name\": \"${GITHUB_SOURCE_REPOSITORY}\", \"target_branch_name\": \"${GITHUB_BASE_REF}\", \"source_branch_name\": \"${GITHUB_HEAD_REF}\", \"commit_sha\": \"${COMMIT_SHA}\"}" \ - {{repo_policy_base_url}}/pull_request/check-run) - REPO_CHECK=$? - - else - # Workflow dispatch, Push and Schedule use their respective endpoints, all other events use default by default. - CHECK_NAME="default" - if [[ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ]] || [[ "${GITHUB_EVENT_NAME}" == "push" ]] || [[ "${GITHUB_EVENT_NAME}" == "schedule" ]]; then - CHECK_NAME="${GITHUB_EVENT_NAME}" - fi - - logger -s "GITHUB_REF_NAME: ${GITHUB_REF_NAME}" - - REPO_CHECK_HTTP_CODE=$(curl "${CURL_ARGS[@]}" \ - -d "{\"repository_name\": \"${GITHUB_REPOSITORY}\"}" \ - {{repo_policy_base_url}}/${CHECK_NAME}/check-run) - REPO_CHECK=$? - - fi - - if [[ $REPO_CHECK -ne 0 ]]; then - if [[ -s repo_check_output.txt ]]; then - logger -p user.error -s -f repo_check_output.txt - fi - if [[ -s repo_check_error.txt ]]; then - logger -p user.error -s -f repo_check_error.txt - fi - if [[ $REPO_CHECK_HTTP_CODE -ge 500 ]] && [[ $REPO_CHECK_HTTP_CODE -lt 600 ]]; then - logger -p user.error -s "The repository setup check failed with HTTP code ${REPO_CHECK_HTTP_CODE}." - logger -p user.error -s "This runner will be stopped or lost, please contact the repo-policy-compliance server operators or try again later." - else - logger -p user.error -s "Stopping execution of jobs due to repository setup is not compliant with policies." - logger -p user.error -s "This runner will be stopped or lost, please fix the setup of the repository, then rerun this job." - fi - - # Killing the runner.Listener process to stop the runner application. This will prevent jobs from being executed. - pkill -2 Runner.Listener - - - {% if issue_metrics %} - # Write Post Job metrics with status "repo-policy-check-failure" . - # We write it here, rather than relying on the post-job script, - # as it may not run due to the poweroff command below. - post_job_timestamp=$(date +%s) - - jq -n \ - --argjson timestamp "$post_job_timestamp" \ - --argjson http_code "$REPO_CHECK_HTTP_CODE" \ - '{ - "timestamp": $timestamp, - "status": "repo-policy-check-failure", - "status_info": {code: $http_code} - }' > "{{ metrics_exchange_path }}/post-job-metrics.json" || true - {% endif %} - - # Shutdown the instance as a safe guard. The time delay is needed for the runner application to upload the logs. - bash -c "sleep 10; sudo systemctl poweroff -i" & - - exit 1 - - fi - - logger -s "The repository setup check has passed, proceeding to execute jobs" -{% endif %} - -if [[ -n "$DOCKERHUB_MIRROR" ]]; then - logger -s "A private docker registry is setup as a dockerhub mirror for this self-hosted runner." - logger -s "The docker daemon on this self-hosted runner is configured to use the dockerhub mirror." - logger -s "The URL to the private docker registry is in the DOCKERHUB_MIRROR environment variable." - logger -s "For microk8s, see instructions here: https://microk8s.io/docs/dockerhub-limits" -fi diff --git a/templates/repo-policy-compliance.service.j2 b/templates/repo-policy-compliance.service.j2 deleted file mode 100644 index 34a900509..000000000 --- a/templates/repo-policy-compliance.service.j2 +++ /dev/null @@ -1,23 +0,0 @@ -[Unit] -Description=Gunicorn instance to serve repo policy compliance endpoints -After=network.target - -[Service] -User=ubuntu -Group=www-data -WorkingDirectory={{working_directory}} -Environment="GITHUB_TOKEN={{github_token}}" -Environment="CHARM_TOKEN={{charm_token}}" -{% if proxies.http %} -Environment="HTTP_PROXY={{proxies.http}}" -Environment="http_proxy={{proxies.http}}" -{% endif %} -{% if proxies.https %} -Environment="HTTPS_PROXY={{proxies.https}}" -Environment="https_proxy={{proxies.https}}" -{% endif %} -{% if proxies.no_proxy %} -Environment="NO_PROXY={{proxies.no_proxy}}" -Environment="no_proxy={{proxies.no_proxy}}" -{% endif %} -ExecStart=/usr/bin/gunicorn --bind 0.0.0.0:8080 --timeout 60 app:app diff --git a/templates/start.j2 b/templates/start.j2 deleted file mode 100644 index 625edf6df..000000000 --- a/templates/start.j2 +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash - -# Don't exit on error to make sure the machine gets powered off. -set +e - -{% if issue_metrics %} - -write_post_metrics(){ - # Expects the exit code of the run.sh script as the first argument. - - # Only write the post-job metrics if the file does not already exist - which may indicate - # that the job has failed inside pre-job. - - if [ -f /metrics-exchange/post-job-metrics.json ]; then - return - fi - - timestamp=$(date +%s) - - # Write the post-job metrics using status abnormal and exit code if exit code is non-zero - if [ "$1" != "0" ]; then - jq -n \ - --argjson timestamp "$timestamp" \ - --arg status "abnormal" \ - --argjson exit_code "$1" \ - '{ - "timestamp": $timestamp, - "status": $status, - "status_info": {code: $exit_code} - }' > /metrics-exchange/post-job-metrics.json - return - else - # If exit code is zero, write the post-job metrics using status normal - jq -n \ - --argjson timestamp "$timestamp" \ - '{ - "timestamp": $timestamp, - "status": "normal" - }' > /metrics-exchange/post-job-metrics.json - fi -} - -(/home/ubuntu/github-runner/run.sh; write_post_metrics $?; sudo systemctl poweroff -i) &>/dev/null & -{% else %} -(/home/ubuntu/github-runner/run.sh; sudo systemctl poweroff -i) &>/dev/null & -{% endif %} diff --git a/templates/systemd-docker-proxy.j2 b/templates/systemd-docker-proxy.j2 deleted file mode 100644 index d0cbc3f02..000000000 --- a/templates/systemd-docker-proxy.j2 +++ /dev/null @@ -1,10 +0,0 @@ -[Service] -{% if proxies.http %} -Environment="HTTP_PROXY={{proxies.http}}" -{% endif %} -{% if proxies.https %} -Environment="HTTPS_PROXY={{proxies.https}}" -{% endif %} -{% if proxies.no_proxy %} -Environment="NO_PROXY={{proxies.no_proxy}}" -{% endif %} diff --git a/tests/conftest.py b/tests/conftest.py index c07f2e872..4942b019c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -47,11 +47,6 @@ def pytest_addoption(parser: Parser): action="store", help="No proxy configuration value for juju model proxy configuration.", ) - parser.addoption( - "--loop-device", - action="store", - help="The loop device to create shared FS for metrics logging", - ) parser.addoption( "--openstack-clouds-yaml", action="store", diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index cb41d84a5..20a5642da 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -19,6 +19,7 @@ from github import Github, GithubException from github.Branch import Branch from github.Repository import Repository +from github_runner_manager.github_client import GithubClient from juju.application import Application from juju.client._definitions import FullStatus, UnitStatus from juju.model import Model @@ -33,18 +34,13 @@ PATH_CONFIG_NAME, USE_APROXY_CONFIG_NAME, VIRTUAL_MACHINES_CONFIG_NAME, - InstanceType, ) -from github_client import GithubClient from tests.integration.helpers.common import ( MONGODB_APP_NAME, - InstanceHelper, deploy_github_runner_charm, - inject_lxd_profile, reconcile, wait_for, ) -from tests.integration.helpers.lxd import LXDInstanceHelper, ensure_charm_has_runner from tests.integration.helpers.openstack import OpenStackInstanceHelper, PrivateEndpointConfigs from tests.status_name import ACTIVE @@ -55,20 +51,6 @@ nest_asyncio.apply() -@pytest_asyncio.fixture(scope="module", name="instance_type") -async def instance_type_fixture( - request: pytest.FixtureRequest, pytestconfig: pytest.Config -) -> InstanceType: - # Due to scope being module we cannot use request.node.get_closes_marker as openstack - # mark is not available in this scope. - openstack_marker = pytestconfig.getoption("-m") == "openstack" - - if openstack_marker: - return InstanceType.OPENSTACK - else: - return InstanceType.LOCAL_LXD - - @pytest.fixture(scope="module") def metadata() -> dict[str, Any]: """Metadata information of the charm.""" @@ -102,18 +84,12 @@ def openstack_clouds_yaml_fixture(pytestconfig: pytest.Config) -> str | None: @pytest.fixture(scope="module") -def charm_file( - pytestconfig: pytest.Config, loop_device: Optional[str], openstack_clouds_yaml: Optional[str] -) -> str: +def charm_file(pytestconfig: pytest.Config, openstack_clouds_yaml: Optional[str]) -> str: """Path to the built charm.""" charm = pytestconfig.getoption("--charm-file") assert charm, "Please specify the --charm-file command line option" charm_path_str = f"./{charm}" - if openstack_clouds_yaml: - return charm_path_str - - inject_lxd_profile(charm_file=Path(charm_path_str), loop_device=loop_device) return charm_path_str @@ -192,12 +168,6 @@ def openstack_no_proxy_fixture(pytestconfig: pytest.Config) -> str: return "" if no_proxy is None else no_proxy -@pytest.fixture(scope="module") -def loop_device(pytestconfig: pytest.Config) -> Optional[str]: - """Configured loop_device setting.""" - return pytestconfig.getoption("--loop-device") - - @pytest.fixture(scope="module", name="private_endpoint_config") def private_endpoint_config_fixture(pytestconfig: pytest.Config) -> PrivateEndpointConfigs | None: """The private endpoint configuration values.""" @@ -341,34 +311,12 @@ def runner_manager_github_client(token: str) -> GithubClient: @pytest_asyncio.fixture(scope="module") async def app_no_runner( model: Model, - charm_file: str, - app_name: str, - path: str, - token: str, - http_proxy: str, - https_proxy: str, - no_proxy: str, - existing_app: Optional[str], + basic_app: Application, ) -> AsyncIterator[Application]: """Application with no runner.""" - if existing_app: - application = model.applications[existing_app] - else: - # Set the scheduled event to 1 hour to avoid interfering with the tests. - application = await deploy_github_runner_charm( - model=model, - charm_file=charm_file, - app_name=app_name, - path=path, - token=token, - runner_storage="memory", - http_proxy=http_proxy, - https_proxy=https_proxy, - no_proxy=no_proxy, - reconcile_interval=60, - ) - await model.wait_for_idle(apps=[application.name], status=ACTIVE) - return application + await basic_app.set_config({VIRTUAL_MACHINES_CONFIG_NAME: "0"}) + await model.wait_for_idle(apps=[basic_app.name], status=ACTIVE, timeout=90 * 60) + yield basic_app @pytest_asyncio.fixture(scope="module", name="image_builder") @@ -433,7 +381,6 @@ async def app_openstack_runner_fixture( app_name=app_name, path=path, token=token, - runner_storage="juju-storage", http_proxy=openstack_http_proxy, https_proxy=openstack_https_proxy, no_proxy=openstack_no_proxy, @@ -450,7 +397,6 @@ async def app_openstack_runner_fixture( LABELS_CONFIG_NAME: app_name, }, wait_idle=False, - use_local_lxd=False, ) await model.integrate(f"{image_builder.name}:image", f"{application.name}:image") await model.wait_for_idle(apps=[application.name], status=ACTIVE, timeout=90 * 60) @@ -458,18 +404,6 @@ async def app_openstack_runner_fixture( return application -@pytest_asyncio.fixture(scope="module") -async def app_one_runner(model: Model, app_no_runner: Application) -> AsyncIterator[Application]: - """Application with a single runner. - - Test should ensure it returns with the application in a good state and has - one runner. - """ - await ensure_charm_has_runner(app=app_no_runner, model=model) - - return app_no_runner - - @pytest_asyncio.fixture(scope="module", name="app_scheduled_events") async def app_scheduled_events_fixture( model: Model, @@ -514,7 +448,6 @@ async def app_runner( app_name=f"{app_name}-test", path=path, token=token, - runner_storage="memory", http_proxy=http_proxy, https_proxy=https_proxy, no_proxy=no_proxy, @@ -541,7 +474,6 @@ async def app_no_wait_fixture( app_name=app_name, path=path, token=token, - runner_storage="juju-storage", http_proxy=http_proxy, https_proxy=https_proxy, no_proxy=no_proxy, @@ -573,7 +505,7 @@ async def tmate_ssh_server_unit_ip_fixture( status: FullStatus = await model.get_status([tmate_ssh_server_app.name]) try: unit_status: UnitStatus = next( - iter(status.applications[tmate_ssh_server_app.name].units.values()) + iter(status.applications[tmate_ssh_server_app.name].units.values()) # type: ignore[union-attr] ) assert unit_status.public_address, "Invalid unit address" return unit_status.public_address @@ -664,34 +596,6 @@ async def app_with_forked_repo( return basic_app -@pytest_asyncio.fixture(scope="module") -async def app_juju_storage( - model: Model, - charm_file: str, - app_name: str, - path: str, - token: str, - http_proxy: str, - https_proxy: str, - no_proxy: str, -) -> AsyncIterator[Application]: - """Application with juju storage setup.""" - # Set the scheduled event to 1 hour to avoid interfering with the tests. - application = await deploy_github_runner_charm( - model=model, - charm_file=charm_file, - app_name=app_name, - path=path, - token=token, - runner_storage="juju-storage", - http_proxy=http_proxy, - https_proxy=https_proxy, - no_proxy=no_proxy, - reconcile_interval=60, - ) - return application - - @pytest_asyncio.fixture(scope="module", name="test_github_branch") async def test_github_branch_fixture(github_repository: Repository) -> AsyncIterator[Branch]: """Create a new branch for testing, from latest commit in current branch.""" @@ -727,22 +631,8 @@ def get_branch(): @pytest_asyncio.fixture(scope="module", name="app_for_metric") async def app_for_metric_fixture( - model: Model, basic_app: Application, - instance_type: InstanceType, - existing_app: Optional[str], ) -> AsyncIterator[Application]: - # OpenStack integration does not need the grafana agent to collect metric. - if instance_type == InstanceType.LOCAL_LXD and not existing_app: - grafana_agent = await model.deploy( - "grafana-agent", - application_name=f"grafana-agent-{basic_app.name}", - channel="latest/edge", - ) - await model.relate(f"{basic_app.name}:cos-agent", f"{grafana_agent.name}:cos-agent") - await model.wait_for_idle(apps=[basic_app.name], status=ACTIVE) - await model.wait_for_idle(apps=[grafana_agent.name]) - yield basic_app @@ -774,26 +664,13 @@ async def app_for_reactive_fixture( @pytest_asyncio.fixture(scope="module", name="basic_app") -async def basic_app_fixture( - request: pytest.FixtureRequest, instance_type: InstanceType -) -> Application: +async def basic_app_fixture(request: pytest.FixtureRequest) -> Application: """Setup the charm with the basic configuration.""" - if instance_type == InstanceType.OPENSTACK: - app = request.getfixturevalue("app_openstack_runner") - else: - app = request.getfixturevalue("app_no_runner") - return app + return request.getfixturevalue("app_openstack_runner") @pytest_asyncio.fixture(scope="function", name="instance_helper") -async def instance_helper_fixture( - request: pytest.FixtureRequest, instance_type: InstanceType -) -> InstanceHelper: +async def instance_helper_fixture(request: pytest.FixtureRequest) -> OpenStackInstanceHelper: """Instance helper fixture.""" - helper: InstanceHelper - if instance_type == InstanceType.OPENSTACK: - openstack_connection = request.getfixturevalue("openstack_connection") - helper = OpenStackInstanceHelper(openstack_connection=openstack_connection) - else: - helper = LXDInstanceHelper() - return helper + openstack_connection = request.getfixturevalue("openstack_connection") + return OpenStackInstanceHelper(openstack_connection=openstack_connection) diff --git a/tests/integration/helpers/charm_metrics.py b/tests/integration/helpers/charm_metrics.py index 6baea6990..330da17f1 100644 --- a/tests/integration/helpers/charm_metrics.py +++ b/tests/integration/helpers/charm_metrics.py @@ -20,12 +20,8 @@ from juju.application import Application from juju.unit import Unit -from tests.integration.helpers.common import ( - InstanceHelper, - get_file_content, - run_in_unit, - wait_for, -) +from tests.integration.helpers.common import get_file_content, run_in_unit, wait_for +from tests.integration.helpers.openstack import OpenStackInstanceHelper logger = logging.getLogger(__name__) @@ -39,7 +35,7 @@ async def wait_for_workflow_to_start( unit: Unit, workflow: Workflow, - instance_helper: InstanceHelper, + instance_helper: OpenStackInstanceHelper, branch: Branch | None = None, started_time: float | None = None, timeout: int = 20 * 60, @@ -105,28 +101,6 @@ async def clear_metrics_log(unit: Unit) -> None: assert retcode == 0, f"Failed to clear metrics log, {stderr}" -async def print_loop_device_info(unit: Unit, loop_device: str) -> None: - """Print loop device info on the unit. - - Args: - unit: The unit to print the loop device info on. - loop_device: The loop device to print the info for. - """ - retcode, stdout, stderr = await run_in_unit( - unit=unit, - command="sudo losetup -lJ", - ) - assert retcode == 0, f"Failed to get loop devices: {stdout} {stderr}" - assert stdout is not None, "Failed to get loop devices, no stdout message" - loop_devices_info = json.loads(stdout) - for loop_device_info in loop_devices_info["loopdevices"]: - if loop_device_info["name"] == loop_device: - logging.info("Loop device %s info: %s", loop_device, loop_device_info) - break - else: - logging.info("Loop device %s not found", loop_device) - - async def get_metrics_log(unit: Unit) -> str: """Retrieve the metrics log from the unit. @@ -140,7 +114,10 @@ async def get_metrics_log(unit: Unit) -> str: async def cancel_workflow_run( - unit: Unit, workflow: Workflow, instance_helper: InstanceHelper, branch: Branch | None = None + unit: Unit, + workflow: Workflow, + instance_helper: OpenStackInstanceHelper, + branch: Branch | None = None, ): """Cancel the workflow run. diff --git a/tests/integration/helpers/common.py b/tests/integration/helpers/common.py index fcc0731fa..b4ac857cb 100644 --- a/tests/integration/helpers/common.py +++ b/tests/integration/helpers/common.py @@ -6,10 +6,8 @@ import inspect import logging import pathlib -import subprocess import time import typing -import zipfile from datetime import datetime, timezone from functools import partial from typing import Awaitable, Callable, ParamSpec, TypeVar, cast @@ -27,15 +25,12 @@ from juju.unit import Unit from charm_state import ( - DENYLIST_CONFIG_NAME, PATH_CONFIG_NAME, RECONCILE_INTERVAL_CONFIG_NAME, - RUNNER_STORAGE_CONFIG_NAME, TEST_MODE_CONFIG_NAME, TOKEN_CONFIG_NAME, VIRTUAL_MACHINES_CONFIG_NAME, ) -from runner_manager import LXDRunnerManager from tests.status_name import ACTIVE DISPATCH_TEST_WORKFLOW_FILENAME = "workflow_dispatch_test.yaml" @@ -51,143 +46,6 @@ logger = logging.getLogger(__name__) -class InstanceHelper(typing.Protocol): - """Helper for running commands in instances.""" - - async def run_in_instance( - self, - unit: Unit, - command: str, - timeout: int | None = None, - assert_on_failure: bool = False, - assert_msg: str | None = None, - ) -> tuple[int, str | None, str | None]: - """Run command in instance. - - Args: - unit: Juju unit to execute the command in. - command: Command to execute. - timeout: Amount of time to wait for the execution. - assert_on_failure: Perform assertion on non-zero exit code. - assert_msg: Message for the failure assertion. - """ - ... - - async def expose_to_instance( - self, - unit: Unit, - port: int, - host: str = "localhost", - ) -> None: - """Expose a port on the juju machine to the OpenStack instance. - - Uses SSH remote port forwarding from the juju machine to the OpenStack instance containing - the runner. - - Args: - unit: The juju unit of the github-runner charm. - port: The port on the juju machine to expose to the runner. - host: Host for the reverse tunnel. - """ - ... - - async def ensure_charm_has_runner(self, app: Application): - """Ensure charm has a runner. - - Args: - app: The GitHub Runner Charm app to create the runner for. - """ - ... - - async def get_runner_names(self, unit: Unit) -> list[str]: - """Get the name of all the runners in the unit. - - Args: - unit: The GitHub Runner Charm unit to get the runner names for. - """ - ... - - async def get_runner_name(self, unit: Unit) -> str: - """Get the name of the runner. - - Args: - unit: The GitHub Runner Charm unit to get the runner name for. - """ - ... - - async def delete_single_runner(self, unit: Unit) -> None: - """Delete the only runner. - - Args: - unit: The GitHub Runner Charm unit to delete the runner name for. - """ - ... - - -async def check_runner_binary_exists(unit: Unit) -> bool: - """Checks if runner binary exists in the charm. - - Args: - unit: Unit instance to check for the LXD profile. - - Returns: - Whether the runner binary file exists in the charm. - """ - return_code, _, _ = await run_in_unit(unit, f"test -f {LXDRunnerManager.runner_bin_path}") - return return_code == 0 - - -async def get_repo_policy_compliance_pip_info(unit: Unit) -> None | str: - """Get pip info for repo-policy-compliance. - - Args: - unit: Unit instance to check for the LXD profile. - - Returns: - If repo-policy-compliance is installed, returns the pip show output, else returns none. - """ - return_code, stdout, stderr = await run_in_unit( - unit, "python3 -m pip show repo-policy-compliance" - ) - - if return_code == 0: - return stdout or stderr - - return None - - -async def install_repo_policy_compliance_from_git_source(unit: Unit, source: None | str) -> None: - """Install repo-policy-compliance pip package from the git source. - - Args: - unit: Unit instance to check for the LXD profile. - source: The git source to install the package. If none the package is removed. - """ - return_code, stdout, stderr = await run_in_unit( - unit, "python3 -m pip uninstall --yes repo-policy-compliance" - ) - assert return_code == 0, f"Failed to uninstall repo-policy-compliance: {stdout} {stderr}" - - if source: - return_code, stdout, stderr = await run_in_unit(unit, f"python3 -m pip install {source}") - assert ( - return_code == 0 - ), f"Failed to install repo-policy-compliance from source, {stdout} {stderr}" - - -async def remove_runner_bin(unit: Unit) -> None: - """Remove runner binary. - - Args: - unit: Unit instance to check for the LXD profile. - """ - await run_in_unit(unit, f"rm {LXDRunnerManager.runner_bin_path}") - - # No file should exists under with the filename. - return_code, _, _ = await run_in_unit(unit, f"test -f {LXDRunnerManager.runner_bin_path}") - assert return_code != 0 - - async def run_in_unit( unit: Unit, command: str, timeout=None, assert_on_failure=False, assert_msg="" ) -> tuple[int, str | None, str | None]: @@ -238,7 +96,6 @@ async def deploy_github_runner_charm( app_name: str, path: str, token: str, - runner_storage: str, http_proxy: str, https_proxy: str, no_proxy: str, @@ -247,7 +104,6 @@ async def deploy_github_runner_charm( config: dict | None = None, deploy_kwargs: dict | None = None, wait_idle: bool = True, - use_local_lxd: bool = True, ) -> Application: """Deploy github-runner charm. @@ -257,7 +113,6 @@ async def deploy_github_runner_charm( app_name: Application name for the deployment. path: Path representing the GitHub repo/org. token: GitHub Personal Token for the application to use. - runner_storage: Runner storage to use, i.e. "memory" or "juju_storage", http_proxy: HTTP proxy for the application to use. https_proxy: HTTPS proxy for the application to use. no_proxy: No proxy configuration for the application. @@ -267,14 +122,10 @@ async def deploy_github_runner_charm( config: Additional custom config to use. deploy_kwargs: Additional model deploy arguments. wait_idle: wait for model to become idle. - use_local_lxd: Whether to use local LXD or not. Returns: The charm application that was deployed. """ - if use_local_lxd: - subprocess.run(["sudo", "modprobe", "br_netfilter"]) - await model.set_config( { "juju-http-proxy": http_proxy, @@ -284,20 +135,13 @@ async def deploy_github_runner_charm( } ) - storage = {} - if runner_storage == "juju-storage": - storage["runner"] = {"pool": "rootfs", "size": 11} - default_config = { PATH_CONFIG_NAME: path, TOKEN_CONFIG_NAME: token, VIRTUAL_MACHINES_CONFIG_NAME: 0, TEST_MODE_CONFIG_NAME: "insecure", RECONCILE_INTERVAL_CONFIG_NAME: reconcile_interval, - RUNNER_STORAGE_CONFIG_NAME: runner_storage, } - if use_local_lxd: - default_config[DENYLIST_CONFIG_NAME] = "10.10.0.0/16" if config: default_config.update(config) @@ -308,7 +152,6 @@ async def deploy_github_runner_charm( base="ubuntu@22.04", config=default_config, constraints=constraints or DEFAULT_RUNNER_CONSTRAINTS, - storage=storage, # type: ignore[arg-type] **(deploy_kwargs or {}), ) @@ -535,43 +378,6 @@ async def wait_for( raise TimeoutError() -def inject_lxd_profile(charm_file: pathlib.Path, loop_device: str | None) -> None: - """Injects LXD profile to charm file. - - Args: - charm_file: Path to charm file to deploy. - loop_device: Loop device used to mount runner image. - """ - lxd_profile_str = """config: - security.nesting: true - security.privileged: true - raw.lxc: | - lxc.apparmor.profile=unconfined - lxc.mount.auto=proc:rw sys:rw cgroup:rw - lxc.cgroup.devices.allow=a - lxc.cap.drop= -devices: - kmsg: - path: /dev/kmsg - source: /dev/kmsg - type: unix-char -""" - if loop_device: - lxd_profile_str += f""" loop-control: - path: /dev/loop-control - type: unix-char - loop14: - path: {loop_device} - type: unix-block -""" - - with zipfile.ZipFile(charm_file, mode="a") as file: - file.writestr( - "lxd-profile.yaml", - lxd_profile_str, - ) - - async def is_upgrade_charm_event_emitted(unit: Unit) -> bool: """Check if the upgrade_charm event is emitted. diff --git a/tests/integration/helpers/lxd.py b/tests/integration/helpers/lxd.py deleted file mode 100644 index aee139ca4..000000000 --- a/tests/integration/helpers/lxd.py +++ /dev/null @@ -1,308 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. -import json -import logging -from typing import Any - -import yaml -from juju.application import Application -from juju.model import Model -from juju.unit import Unit - -from charm_state import VIRTUAL_MACHINES_CONFIG_NAME -from runner import Runner -from tests.integration.helpers.common import InstanceHelper, reconcile, run_in_unit, wait_for - -logger = logging.getLogger(__name__) - - -class LXDInstanceHelper(InstanceHelper): - """Helper class to interact with LXD instances.""" - - async def run_in_instance( - self, - unit: Unit, - command: str, - timeout: int | None = None, - assert_on_failure: bool = False, - assert_msg: str | None = None, - ) -> tuple[int, str | None, str | None]: - """Run command in LXD instance. - - Args: - unit: Juju unit to execute the command in. - command: Command to execute. - timeout: Amount of time to wait for the execution. - assert_on_failure: Not used in lxd - assert_msg: Not used in lxd - - Returns: - Tuple of return code, stdout and stderr. - """ - name = await self.get_runner_name(unit) - return await run_in_lxd_instance(unit, name, command, timeout=timeout) - - async def expose_to_instance( - self, - unit: Unit, - port: int, - host: str = "localhost", - ) -> None: - """Expose a port on the juju machine to the OpenStack instance. - - Uses SSH remote port forwarding from the juju machine to the OpenStack instance containing - the runner. - - Args: - unit: The juju unit of the github-runner charm. - port: The port on the juju machine to expose to the runner. - host: Host for the reverse tunnel. - - Raises: - NotImplementedError: Not implemented yet. - """ - raise NotImplementedError - - async def ensure_charm_has_runner(self, app: Application): - """Reconcile the charm to contain one runner. - - Args: - app: The GitHub Runner Charm app to create the runner for. - """ - await ensure_charm_has_runner(app, app.model) - - async def get_runner_name(self, unit: Unit) -> str: - """Get the name of the runner. - - Expects only one runner to be present. - - Args: - unit: The GitHub Runner Charm unit to get the runner name for. - - Returns: - The Github runner name deployed in the given unit. - """ - return await get_runner_name(unit) - - async def get_runner_names(self, unit: Unit) -> list[str]: - """Get the name of all the runners in the unit. - - Args: - unit: The GitHub Runner Charm unit to get the runner names for. - - Raises: - NotImplementedError: Not implemented yet. - """ - raise NotImplementedError - - async def delete_single_runner(self, unit: Unit) -> None: - """Delete the only runner. - - - Args: - unit: The GitHub Runner Charm unit to check. - - Raises: - NotImplementedError: Not implemented yet. - """ - raise NotImplementedError - - -async def assert_resource_lxd_profile(unit: Unit, configs: dict[str, Any]) -> None: - """Check for LXD profile of the matching resource config. - - Args: - unit: Unit instance to check for the LXD profile. - configs: Configs of the application. - """ - cpu = configs["vm-cpu"]["value"] - mem = configs["vm-memory"]["value"] - disk = configs["vm-disk"]["value"] - resource_profile_name = Runner._get_resource_profile_name(cpu, mem, disk) - - # Verify the profile exists. - return_code, stdout, _ = await run_in_unit(unit, "lxc profile list --format json") - assert return_code == 0 - assert stdout is not None - profiles = json.loads(stdout) - profile_names = [profile["name"] for profile in profiles] - assert resource_profile_name in profile_names - - # Verify the profile contains the correct resource settings. - return_code, stdout, _ = await run_in_unit(unit, f"lxc profile show {resource_profile_name}") - assert return_code == 0 - assert stdout is not None - profile_content = yaml.safe_load(stdout) - assert f"{cpu}" == profile_content["config"]["limits.cpu"] - assert mem == profile_content["config"]["limits.memory"] - assert disk == profile_content["devices"]["root"]["size"] - - -async def get_runner_names(unit: Unit) -> tuple[str, ...]: - """Get names of the runners in LXD. - - Args: - unit: Unit instance to check for the LXD profile. - - Returns: - Tuple of runner names. - """ - return_code, stdout, _ = await run_in_unit(unit, "lxc list --format json") - - assert return_code == 0 - assert stdout is not None - - lxc_instance: list[dict[str, str]] = json.loads(stdout) - return tuple(runner["name"] for runner in lxc_instance if runner["name"] != "builder") - - -async def wait_till_num_of_runners(unit: Unit, num: int, timeout: int = 10 * 60) -> None: - """Wait and check the number of runners. - - Args: - unit: Unit instance to check for the LXD profile. - num: Number of runner instances to check for. - timeout: Number of seconds to wait for the runners. - """ - - async def get_lxc_instances() -> None | list[dict]: - """Get lxc instances list info. - - Returns: - List of lxc instance dictionaries, None if failed to get list. - """ - return_code, stdout, _ = await run_in_unit(unit, "lxc list --format json") - if return_code != 0 or not stdout: - logger.error("Failed to run lxc list, %s", return_code) - return None - return json.loads(stdout) - - async def is_desired_num_runners(): - """Return whether there are desired number of lxc instances running. - - Returns: - Whether the desired number of lxc runners have been reached. - """ - lxc_instances = await get_lxc_instances() - if lxc_instances is None: - return False - return len(lxc_instances) == num - - await wait_for(is_desired_num_runners, timeout=timeout, check_interval=30) - - instances = await get_lxc_instances() - if not instances: - return - - for instance in instances: - return_code, stdout, _ = await run_in_unit(unit, f"lxc exec {instance['name']} -- ps aux") - assert return_code == 0 - - assert stdout is not None - assert f"/bin/bash {Runner.runner_script}" in stdout - - -async def run_in_lxd_instance( - unit: Unit, - name: str, - command: str, - env: dict[str, str] | None = None, - cwd: str | None = None, - timeout: int | None = None, -) -> tuple[int, str | None, str | None]: - """Run command in LXD instance of a juju unit. - - Args: - unit: Juju unit to execute the command in. - name: Name of LXD instance. - command: Command to execute. - env: Mapping of environment variable name to value. - cwd: Work directory of the command. - timeout: Amount of time to wait for the execution. - - Returns: - Tuple of return code and stdout. - """ - lxc_cmd = f"/snap/bin/lxc exec {name}" - if env: - for key, value in env.items(): - lxc_cmd += f"--env {key}={value}" - if cwd: - lxc_cmd += f" --cwd {cwd}" - lxc_cmd += f" -- {command}" - return await run_in_unit(unit, lxc_cmd, timeout) - - -async def start_test_http_server(unit: Unit, port: int): - """Start test http server. - - Args: - unit: The unit to start the test server in. - port: Http server port. - """ - await run_in_unit( - unit, - f"""cat < /etc/systemd/system/test-http-server.service -[Unit] -Description=Simple HTTP server for testing -After=network.target - -[Service] -User=ubuntu -Group=www-data -WorkingDirectory=/home/ubuntu -ExecStart=python3 -m http.server {port} -EOT""", - ) - await run_in_unit(unit, "/usr/bin/systemctl daemon-reload") - await run_in_unit(unit, "/usr/bin/systemctl start test-http-server") - - async def server_is_ready() -> bool: - """Check if the server is ready. - - Returns: - Whether the server is ready. - """ - return_code, stdout, _ = await run_in_unit(unit, f"curl http://localhost:{port}") - return return_code == 0 and bool(stdout) - - await wait_for(server_is_ready, timeout=30, check_interval=3) - - -async def set_app_runner_amount(app: Application, model: Model, num_runners: int) -> None: - """Reconcile the application to a runner amount. - - Args: - app: The GitHub Runner Charm app to create the runner for. - model: The machine charm model. - num_runners: The number of runners. - """ - await app.set_config({VIRTUAL_MACHINES_CONFIG_NAME: f"{num_runners}"}) - await reconcile(app=app, model=model) - await wait_till_num_of_runners(unit=app.units[0], num=num_runners) - - -async def ensure_charm_has_runner(app: Application, model: Model) -> None: - """Reconcile the charm to contain one runner. - - Args: - app: The GitHub Runner Charm app to create the runner for. - model: The machine charm model. - """ - await set_app_runner_amount(app, model, 1) - - -async def get_runner_name(unit: Unit) -> str: - """Get the name of the runner. - - Expects only one runner to be present. - - Args: - unit: The GitHub Runner Charm unit to get the runner name for. - - Returns: - The Github runner name deployed in the given unit. - """ - runners = await get_runner_names(unit) - assert len(runners) == 1 - return runners[0] diff --git a/tests/integration/helpers/openstack.py b/tests/integration/helpers/openstack.py index 9696f5d10..7fc3c0b90 100644 --- a/tests/integration/helpers/openstack.py +++ b/tests/integration/helpers/openstack.py @@ -12,12 +12,12 @@ from charm import RUNNER_MANAGER_USER from charm_state import VIRTUAL_MACHINES_CONFIG_NAME -from tests.integration.helpers.common import InstanceHelper, reconcile, run_in_unit, wait_for +from tests.integration.helpers.common import reconcile, run_in_unit, wait_for logger = logging.getLogger(__name__) -class OpenStackInstanceHelper(InstanceHelper): +class OpenStackInstanceHelper: """Helper class to interact with OpenStack instances.""" def __init__(self, openstack_connection: openstack.connection.Connection): @@ -266,7 +266,7 @@ async def _install_repo_policy( """Start the repo policy compliance service. Args: - unit: Unit instance to check for the LXD profile. + unit: Unit instance to check for the profile. github_token: GitHub token to use in the repo-policy service. charm_token: Charm token to use in the repo-policy service. https_proxy: HTTPS proxy url to use. diff --git a/tests/integration/test_charm_base_image.py b/tests/integration/test_charm_base_image.py deleted file mode 100644 index ceee4a6b7..000000000 --- a/tests/integration/test_charm_base_image.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Integration tests for github-runner charm containing one runner.""" - -from github.Branch import Branch -from github.Repository import Repository -from juju.application import Application -from juju.model import Model - -from charm_state import BASE_IMAGE_CONFIG_NAME -from tests.integration.helpers.common import ( - DISPATCH_E2E_TEST_RUN_WORKFLOW_FILENAME, - dispatch_workflow, -) -from tests.integration.helpers.lxd import ( - ensure_charm_has_runner, - get_runner_name, - run_in_lxd_instance, -) - - -async def test_runner_base_image( - model: Model, - app_no_wait: Application, - github_repository: Repository, - test_github_branch: Branch, -) -> None: - """ - arrange: A runner with noble as base image. - act: Dispatch a workflow. - assert: A runner is created with noble OS base and the workflow job is successfully run. - """ - await app_no_wait.set_config( - { - BASE_IMAGE_CONFIG_NAME: "noble", - } - ) - await model.wait_for_idle(apps=[app_no_wait.name], timeout=35 * 60) - await ensure_charm_has_runner(app_no_wait, model) - - # Runner with noble base image is created - unit = app_no_wait.units[0] - runner_name = await get_runner_name(unit) - code, stdout, stderr = await run_in_lxd_instance(unit, runner_name, "lsb_release -a") - assert code == 0, f"Unable to get release name, {stdout} {stderr}" - assert "noble" in str(stdout) - - # Workflow completes successfully - await dispatch_workflow( - app=app_no_wait, - branch=test_github_branch, - github_repository=github_repository, - conclusion="success", - workflow_id_or_name=DISPATCH_E2E_TEST_RUN_WORKFLOW_FILENAME, - dispatch_input={"runner-tag": app_no_wait.name, "runner-virt-type": "lxd"}, - ) diff --git a/tests/integration/test_charm_fork_path_change.py b/tests/integration/test_charm_fork_path_change.py new file mode 100644 index 000000000..300220c69 --- /dev/null +++ b/tests/integration/test_charm_fork_path_change.py @@ -0,0 +1,65 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +"""Integration tests for github-runner charm with a fork repo. + +Tests a path change in the repo. +""" +import logging + +import pytest +from github.Repository import Repository +from juju.application import Application +from juju.model import Model +from ops.model import ActiveStatus + +from charm_state import PATH_CONFIG_NAME +from tests.integration.helpers.common import reconcile +from tests.integration.helpers.openstack import OpenStackInstanceHelper + +logger = logging.getLogger(__name__) + + +@pytest.mark.openstack +@pytest.mark.asyncio +@pytest.mark.abort_on_fail +async def test_path_config_change( + model: Model, + app_with_forked_repo: Application, + github_repository: Repository, + path: str, + instance_helper: OpenStackInstanceHelper, +) -> None: + """ + arrange: A working application with one runner in a forked repository. + act: Change the path configuration to the main repository and reconcile runners. + assert: No runners connected to the forked repository and one runner in the main repository. + """ + logger.info("test_path_config_change") + await model.wait_for_idle( + apps=[app_with_forked_repo.name], status=ActiveStatus.name, idle_period=30, timeout=10 * 60 + ) + + unit = app_with_forked_repo.units[0] + + logger.info("Ensure there is a runner (this calls reconcile)") + await instance_helper.ensure_charm_has_runner(app_with_forked_repo) + + await app_with_forked_repo.set_config({PATH_CONFIG_NAME: path}) + + logger.info("Reconciling (again)") + await reconcile(app=app_with_forked_repo, model=model) + + runner_names = await instance_helper.get_runner_names(unit) + logger.info("runners: %s", runner_names) + assert len(runner_names) == 1 + runner_name = runner_names[0] + + runners_in_repo = github_repository.get_self_hosted_runners() + logger.info("runners in github repo: %s", list(runners_in_repo)) + + runner_in_repo_with_same_name = tuple( + filter(lambda runner: runner.name == runner_name, runners_in_repo) + ) + + assert len(runner_in_repo_with_same_name) == 1 diff --git a/tests/integration/test_charm_fork_repo.py b/tests/integration/test_charm_fork_repo.py index 925387778..607721e3e 100644 --- a/tests/integration/test_charm_fork_repo.py +++ b/tests/integration/test_charm_fork_repo.py @@ -15,16 +15,11 @@ from juju.application import Application from juju.model import Model -from charm_state import PATH_CONFIG_NAME from tests.integration.helpers.common import ( DISPATCH_FAILURE_TEST_WORKFLOW_FILENAME, - InstanceHelper, dispatch_workflow, - reconcile, ) -from tests.integration.helpers.lxd import ensure_charm_has_runner, get_runner_names from tests.integration.helpers.openstack import OpenStackInstanceHelper, setup_repo_policy -from tests.status_name import ACTIVE @pytest.mark.openstack @@ -35,7 +30,7 @@ async def test_dispatch_workflow_failure( app_with_forked_repo: Application, forked_github_repository: Repository, forked_github_branch: Branch, - instance_helper: InstanceHelper, + instance_helper: OpenStackInstanceHelper, token: str, https_proxy: str, ) -> None: @@ -49,25 +44,12 @@ async def test_dispatch_workflow_failure( """ start_time = datetime.now(timezone.utc) - if isinstance(instance_helper, OpenStackInstanceHelper): - await setup_repo_policy( - app=app_with_forked_repo, - openstack_connection=instance_helper.openstack_connection, - token=token, - https_proxy=https_proxy, - ) - else: - grafana_agent = await model.deploy( - "grafana-agent", - application_name=f"grafana-agent-{app_with_forked_repo.name}", - channel="latest/edge", - ) - await model.relate( - f"{app_with_forked_repo.name}:cos-agent", f"{grafana_agent.name}:cos-agent" - ) - await model.wait_for_idle(apps=[app_with_forked_repo.name], status=ACTIVE) - await model.wait_for_idle(apps=[grafana_agent.name]) - await instance_helper.ensure_charm_has_runner(app_with_forked_repo) + await setup_repo_policy( + app=app_with_forked_repo, + openstack_connection=instance_helper.openstack_connection, + token=token, + https_proxy=https_proxy, + ) workflow = forked_github_repository.get_workflow( id_or_file_name=DISPATCH_FAILURE_TEST_WORKFLOW_FILENAME @@ -98,36 +80,3 @@ async def test_dispatch_workflow_failure( ) assert "Endpoint designed for testing that always fails" in logs assert "Should not echo if pre-job script failed" not in logs - - -@pytest.mark.asyncio -@pytest.mark.abort_on_fail -async def test_path_config_change( - model: Model, - app_with_forked_repo: Application, - github_repository: Repository, - path: str, -) -> None: - """ - arrange: A working application with one runner in a forked repository. - act: Change the path configuration to the main repository and reconcile runners. - assert: No runners connected to the forked repository and one runner in the main repository. - """ - unit = app_with_forked_repo.units[0] - await ensure_charm_has_runner(app=app_with_forked_repo, model=model) - - await app_with_forked_repo.set_config({PATH_CONFIG_NAME: path}) - - await reconcile(app=app_with_forked_repo, model=model) - - runner_names = await get_runner_names(unit) - assert len(runner_names) == 1 - runner_name = runner_names[0] - - runners_in_repo = github_repository.get_self_hosted_runners() - - runner_in_repo_with_same_name = tuple( - filter(lambda runner: runner.name == runner_name, runners_in_repo) - ) - - assert len(runner_in_repo_with_same_name) == 1 diff --git a/tests/integration/test_charm_lxd_runner.py b/tests/integration/test_charm_lxd_runner.py deleted file mode 100644 index c656f724d..000000000 --- a/tests/integration/test_charm_lxd_runner.py +++ /dev/null @@ -1,201 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Integration tests for github-runner charm containing one runner.""" -from typing import AsyncIterator - -import pytest -import pytest_asyncio -from github.Repository import Repository -from juju.application import Application -from juju.model import Model - -from charm import GithubRunnerCharm -from charm_state import RUNNER_STORAGE_CONFIG_NAME, TOKEN_CONFIG_NAME, VIRTUAL_MACHINES_CONFIG_NAME -from tests.integration.helpers.lxd import ( - ensure_charm_has_runner, - get_runner_names, - reconcile, - run_in_lxd_instance, - run_in_unit, - start_test_http_server, - wait_till_num_of_runners, -) -from tests.status_name import ACTIVE, BLOCKED - - -@pytest_asyncio.fixture(scope="function", name="app") -async def app_fixture( - model: Model, - app_one_runner: Application, -) -> AsyncIterator[Application]: - """Setup and teardown the charm after each test. - - Ensure the charm has one runner before starting a test. - """ - await ensure_charm_has_runner(app_one_runner, model) - yield app_one_runner - - -@pytest.mark.asyncio -@pytest.mark.abort_on_fail -async def test_network_access(app: Application) -> None: - """ - arrange: A working application with one runner. Setup a HTTP server in the juju unit. - act: Make HTTP call to the HTTP server from inside a runner. - assert: The HTTP call failed. - """ - unit = app.units[0] - port = 4040 - - await start_test_http_server(unit, port) - - names = await get_runner_names(unit) - assert names - - return_code, stdout, stderr = await run_in_unit(unit, "lxc network get lxdbr0 ipv4.address") - assert return_code == 0, f"Failed to get network address {stdout} {stderr}" - assert stdout is not None - host_ip, _ = stdout.split("/", 1) - - return_code, stdout, _ = await run_in_lxd_instance( - unit, names[0], f"curl http://{host_ip}:{port}" - ) - - assert return_code == 7 - assert stdout is None - - -@pytest.mark.asyncio -@pytest.mark.abort_on_fail -async def test_token_config_changed(model: Model, app: Application, token_alt: str) -> None: - """ - arrange: A working application with one runner. - act: Change the token configuration. - assert: The repo-policy-compliance using the new token. - """ - unit = app.units[0] - - await app.set_config({TOKEN_CONFIG_NAME: token_alt}) - await model.wait_for_idle(status=ACTIVE, timeout=30 * 60) - - return_code, stdout, stderr = await run_in_unit( - unit, "cat /etc/systemd/system/repo-policy-compliance.service" - ) - - assert ( - return_code == 0 - ), f"Failed to get repo-policy-compliance unit file contents {stdout} {stderr}" - assert stdout is not None - assert f"GITHUB_TOKEN={token_alt}" in stdout - - -@pytest.mark.asyncio -@pytest.mark.abort_on_fail -async def test_reconcile_runners_with_lxd_storage_pool_failure( - model: Model, app: Application -) -> None: - """ - arrange: A working application with one runners. - act: - 1. a. Set virtual-machines config to 0. - b. Run reconcile_runners action. - c. Delete content in the runner LXD storage directory. - 2. a. Set virtual-machines config to 1. - b. Run reconcile_runners action. - assert: - 1. No runner should exist. - 2. One runner should exist. - """ - unit = app.units[0] - - # 1. - await app.set_config({VIRTUAL_MACHINES_CONFIG_NAME: "0"}) - - await reconcile(app=app, model=model) - await wait_till_num_of_runners(unit, 0) - - exit_code, stdout, stderr = await run_in_unit( - unit, f"rm -rf {GithubRunnerCharm.ram_pool_path}/*" - ) - assert exit_code == 0, f"Failed to delete ram pool {stdout} {stderr}" - - # 2. - await app.set_config({VIRTUAL_MACHINES_CONFIG_NAME: "1"}) - - await reconcile(app=app, model=model) - - await wait_till_num_of_runners(unit, 1) - - -@pytest.mark.asyncio -@pytest.mark.abort_on_fail -async def test_change_runner_storage(model: Model, app: Application) -> None: - """ - arrange: A working application with one runners using memory as disk. - act: - 1. Change runner-storage to juju-storage. - 2. Change runner-storage back to memory. - assert: - 1. Application in blocked state. - 2. Application back to active state. - """ - unit = app.units[0] - - # 1. - await app.set_config({RUNNER_STORAGE_CONFIG_NAME: "juju-storage"}) - await model.wait_for_idle(status=BLOCKED, timeout=1 * 60) - assert ( - "runner-storage config cannot be changed after deployment" in unit.workload_status_message - ) - - # 2. - await app.set_config({RUNNER_STORAGE_CONFIG_NAME: "memory"}) - await model.wait_for_idle(status=ACTIVE, timeout=1 * 60) - - -async def test_runner_labels( - model: Model, app: Application, github_repository: Repository -) -> None: - """ - arrange: A working application with one runner. - act: Change the runner label. - assert: A runner with the testing label is found. - """ - unit = app.units[0] - - test_labels = ("label_test", "additional_label", app.name) - await app.set_config({"labels": f"{test_labels[0]}, {test_labels[1]}"}) - await model.wait_for_idle() - - await wait_till_num_of_runners(unit, num=1) - - found = False - for runner in github_repository.get_self_hosted_runners(): - runner_labels = tuple(label["name"] for label in runner.labels()) - if all(test_label in runner_labels for test_label in test_labels): - found = True - - assert found, "Runner with testing label not found." - - -async def test_disabled_apt_daily_upgrades(model: Model, app: Application) -> None: - """ - arrange: Given a github runner running on lxd image. - act: When the runner is spawned. - assert: No apt related background services are running. - """ - await model.wait_for_idle() - unit = app.units[0] - await wait_till_num_of_runners(unit, num=1) - names = await get_runner_names(unit) - assert names, "LXD runners not ready" - - ret_code, stdout, stderr = await run_in_lxd_instance( - unit, names[0], "sudo systemctl list-units --no-pager" - ) - assert ret_code == 0, f"Failed to list systemd units {stdout} {stderr}" - assert stdout, "No units listed in stdout" - - assert "apt-daily" not in stdout # this also checks for apt-daily-upgrade service - assert "unattended-upgrades" not in stdout diff --git a/tests/integration/test_charm_metrics_failure.py b/tests/integration/test_charm_metrics_failure.py index 6ce23fa0d..5a7636b3b 100644 --- a/tests/integration/test_charm_metrics_failure.py +++ b/tests/integration/test_charm_metrics_failure.py @@ -10,7 +10,6 @@ import pytest_asyncio from github.Branch import Branch from github.Repository import Repository -from github_runner_manager.metrics import runner_logs from github_runner_manager.metrics.runner import PostJobStatus from juju.application import Application from juju.model import Model @@ -20,37 +19,26 @@ assert_events_after_reconciliation, cancel_workflow_run, clear_metrics_log, - print_loop_device_info, wait_for_runner_to_be_marked_offline, wait_for_workflow_to_start, ) from tests.integration.helpers.common import ( DISPATCH_CRASH_TEST_WORKFLOW_FILENAME, DISPATCH_FAILURE_TEST_WORKFLOW_FILENAME, - InstanceHelper, dispatch_workflow, reconcile, - run_in_unit, -) -from tests.integration.helpers.lxd import ( - ensure_charm_has_runner, - get_runner_name, - run_in_lxd_instance, ) from tests.integration.helpers.openstack import OpenStackInstanceHelper, setup_repo_policy @pytest_asyncio.fixture(scope="function", name="app") -async def app_fixture( - model: Model, app_for_metric: Application, loop_device: str -) -> AsyncIterator[Application]: +async def app_fixture(model: Model, app_for_metric: Application) -> AsyncIterator[Application]: """Setup and teardown the charm after each test. Clear the metrics log before each test. """ unit = app_for_metric.units[0] await clear_metrics_log(unit) - await print_loop_device_info(unit, loop_device) await app_for_metric.set_config( { VIRTUAL_MACHINES_CONFIG_NAME: "0", @@ -73,7 +61,7 @@ async def test_charm_issues_metrics_for_failed_repo_policy( forked_github_branch: Branch, token: str, https_proxy: str, - instance_helper: InstanceHelper, + instance_helper: OpenStackInstanceHelper, ): """ arrange: A properly integrated charm with a runner registered on the fork repo. @@ -83,15 +71,12 @@ async def test_charm_issues_metrics_for_failed_repo_policy( """ await app.set_config({PATH_CONFIG_NAME: forked_github_repository.full_name}) - if isinstance(instance_helper, OpenStackInstanceHelper): - await setup_repo_policy( - app=app, - openstack_connection=instance_helper.openstack_connection, - token=token, - https_proxy=https_proxy, - ) - else: - await instance_helper.ensure_charm_has_runner(app) + await setup_repo_policy( + app=app, + openstack_connection=instance_helper.openstack_connection, + token=token, + https_proxy=https_proxy, + ) # Clear metrics log to make reconciliation event more predictable unit = app.units[0] @@ -127,7 +112,7 @@ async def test_charm_issues_metrics_for_abnormal_termination( app: Application, github_repository: Repository, test_github_branch: Branch, - instance_helper: InstanceHelper, + instance_helper: OpenStackInstanceHelper, ): """ arrange: A properly integrated charm with a runner registered on the fork repo. @@ -180,41 +165,3 @@ async def test_charm_issues_metrics_for_abnormal_termination( github_repository=github_repository, post_job_status=PostJobStatus.ABNORMAL, ) - - -@pytest.mark.asyncio -@pytest.mark.abort_on_fail -async def test_charm_retrieves_logs_from_unhealthy_runners( - model: Model, - app: Application, -): - """ - arrange: A properly integrated charm with one runner. - act: Kill the start.sh script, which marks the runner as unhealthy. After that, reconcile. - assert: The logs are pulled from the crashed runner. - """ - await ensure_charm_has_runner(app=app, model=model) - - unit = app.units[0] - runner_name = await get_runner_name(unit) - - kill_start_sh_cmd = "pkill -9 start.sh" - ret_code, stdout, stderr = await run_in_lxd_instance(unit, runner_name, kill_start_sh_cmd) - assert ret_code == 0, f"Failed to kill start.sh, {stdout} {stderr}" - - # Set the number of virtual machines to 0 to avoid to speedup reconciliation. - await app.set_config({VIRTUAL_MACHINES_CONFIG_NAME: "0"}) - await reconcile(app=app, model=model) - - ret_code, stdout, stderr = await run_in_unit(unit, f"ls {runner_logs.RUNNER_LOGS_DIR_PATH}") - assert ret_code == 0, f"Failed to list crashed runner logs {stdout} {stderr}" - assert stdout - assert runner_name in stdout, "Failed to find crashed runner log" - - ret_code, stdout, _ = await run_in_unit( - unit, f"ls {runner_logs.RUNNER_LOGS_DIR_PATH}/{runner_name}" - ) - assert ret_code == 0, "Failed to list crashed runner log" - assert stdout - assert "_diag" in stdout, "Failed to find crashed runner diag log" - assert "syslog" in stdout, "Failed to find crashed runner syslog log" diff --git a/tests/integration/test_charm_metrics_success.py b/tests/integration/test_charm_metrics_success.py index 283d7eba1..24261ed41 100644 --- a/tests/integration/test_charm_metrics_success.py +++ b/tests/integration/test_charm_metrics_success.py @@ -14,34 +14,28 @@ from juju.application import Application from juju.model import Model -from charm_state import PATH_CONFIG_NAME, VIRTUAL_MACHINES_CONFIG_NAME +from charm_state import VIRTUAL_MACHINES_CONFIG_NAME from tests.integration.helpers.charm_metrics import ( assert_events_after_reconciliation, clear_metrics_log, get_metrics_log, - print_loop_device_info, ) from tests.integration.helpers.common import ( DISPATCH_TEST_WORKFLOW_FILENAME, - InstanceHelper, dispatch_workflow, reconcile, - run_in_unit, ) -from tests.integration.helpers.lxd import ensure_charm_has_runner, get_runner_name +from tests.integration.helpers.openstack import OpenStackInstanceHelper @pytest_asyncio.fixture(scope="function", name="app") -async def app_fixture( - model: Model, app_for_metric: Application, loop_device: str -) -> AsyncIterator[Application]: +async def app_fixture(model: Model, app_for_metric: Application) -> AsyncIterator[Application]: """Setup and teardown the charm after each test. Clear the metrics log before each test. """ unit = app_for_metric.units[0] await clear_metrics_log(unit) - await print_loop_device_info(unit, loop_device) yield app_for_metric @@ -50,7 +44,7 @@ async def app_fixture( @pytest.mark.asyncio @pytest.mark.abort_on_fail async def test_charm_issues_runner_installed_metric( - app: Application, model: Model, instance_helper: InstanceHelper + app: Application, model: Model, instance_helper: OpenStackInstanceHelper ): """ arrange: A charm integrated with grafana-agent using the cos-agent integration. @@ -83,7 +77,7 @@ async def test_charm_issues_metrics_after_reconciliation( app: Application, github_repository: Repository, test_github_branch: Branch, - instance_helper: InstanceHelper, + instance_helper: OpenStackInstanceHelper, ): """ arrange: A properly integrated charm with a runner registered on the fork repo. @@ -111,43 +105,3 @@ async def test_charm_issues_metrics_after_reconciliation( await assert_events_after_reconciliation( app=app, github_repository=github_repository, post_job_status=PostJobStatus.NORMAL ) - - -@pytest.mark.asyncio -@pytest.mark.abort_on_fail -async def test_charm_remounts_shared_fs( - model: Model, - app: Application, - forked_github_repository: Repository, - forked_github_branch: Branch, -): - """ - arrange: A properly integrated charm with a runner registered on the fork repo. - act: Dispatch a test workflow and afterwards unmount the shared fs. After that, reconcile. - assert: The RunnerStart, RunnerStop and Reconciliation metric is logged. - """ - await app.set_config({PATH_CONFIG_NAME: forked_github_repository.full_name}) - await ensure_charm_has_runner(app=app, model=model) - - # Clear metrics log to make reconciliation event more predictable - unit = app.units[0] - runner_name = await get_runner_name(unit) - await clear_metrics_log(unit) - await dispatch_workflow( - app=app, - branch=forked_github_branch, - github_repository=forked_github_repository, - conclusion="success", - workflow_id_or_name=DISPATCH_TEST_WORKFLOW_FILENAME, - ) - - # unmount shared fs - await run_in_unit(unit, f"sudo umount /home/ubuntu/runner-fs/{runner_name}") - - # Set the number of virtual machines to 0 to speedup reconciliation - await app.set_config({VIRTUAL_MACHINES_CONFIG_NAME: "0"}) - await reconcile(app=app, model=model) - - await assert_events_after_reconciliation( - app=app, github_repository=forked_github_repository, post_job_status=PostJobStatus.NORMAL - ) diff --git a/tests/integration/test_charm_no_runner.py b/tests/integration/test_charm_no_runner.py index 52891a768..dd82c529c 100644 --- a/tests/integration/test_charm_no_runner.py +++ b/tests/integration/test_charm_no_runner.py @@ -2,28 +2,15 @@ # See LICENSE file for licensing details. """Integration tests for github-runner charm with no runner.""" -import functools -import json import logging -from datetime import datetime, timezone import pytest from juju.application import Application from juju.model import Model from charm_state import VIRTUAL_MACHINES_CONFIG_NAME -from tests.integration.helpers.common import ( - check_runner_binary_exists, - get_repo_policy_compliance_pip_info, - install_repo_policy_compliance_from_git_source, - is_upgrade_charm_event_emitted, - reconcile, - remove_runner_bin, - run_in_unit, - wait_for, -) -from tests.integration.helpers.lxd import wait_till_num_of_runners -from tests.status_name import ACTIVE +from tests.integration.helpers.common import reconcile, wait_for +from tests.integration.helpers.openstack import OpenStackInstanceHelper logger = logging.getLogger(__name__) @@ -32,119 +19,7 @@ "repo-policy-compliance@48b36c130b207278d20c3847ce651ac13fb9e9d7" ) - -@pytest.mark.asyncio -@pytest.mark.abort_on_fail -async def test_update_dependencies_action_latest_service( - model: Model, app_no_runner: Application -) -> None: - """ - arrange: A working application with latest version of repo-policy-compliance service. - act: Run update-dependencies action. - assert: - a. Service is installed in the charm. - b. Action did not flushed the runners. - """ - unit = app_no_runner.units[0] - - action = await unit.run_action("update-dependencies") - await action.wait() - assert action.results["flush"] == "False" - - await model.wait_for_idle(status=ACTIVE) - assert await get_repo_policy_compliance_pip_info(unit) is not None - - -@pytest.mark.asyncio -@pytest.mark.abort_on_fail -async def test_update_dependencies_action_no_service( - model: Model, app_no_runner: Application -) -> None: - """ - arrange: Remove repo-policy-compliance service installation. - act: Run update-dependencies action. - assert: - a. Service is installed in the charm. - b. Action flushed the runners. - """ - unit = app_no_runner.units[0] - - await install_repo_policy_compliance_from_git_source(unit, None) - assert await get_repo_policy_compliance_pip_info(unit) is None - - action = await unit.run_action("update-dependencies") - await action.wait() - await model.wait_for_idle(status=ACTIVE) - - assert action.results["flush"] == "True" - assert await get_repo_policy_compliance_pip_info(unit) is not None - - -@pytest.mark.asyncio -@pytest.mark.abort_on_fail -async def test_update_dependencies_action_old_service( - model: Model, app_no_runner: Application -) -> None: - """ - arrange: Replace repo-policy-compliance service installation to a older version. - act: Run update-dependencies action. - assert: - a. Service is installed in the charm. - b. Action flushed the runners. - """ - unit = app_no_runner.units[0] - latest_version_info = await get_repo_policy_compliance_pip_info(unit) - - await install_repo_policy_compliance_from_git_source( - unit, REPO_POLICY_COMPLIANCE_VER_0_2_GIT_SOURCE - ) - assert await get_repo_policy_compliance_pip_info(unit) != latest_version_info - - action = await unit.run_action("update-dependencies") - await action.wait() - await model.wait_for_idle(status=ACTIVE) - - assert action.results["flush"] == "True" - assert await get_repo_policy_compliance_pip_info(unit) is not None - - -@pytest.mark.asyncio -@pytest.mark.abort_on_fail -async def test_update_dependencies_action_on_runner_binary( - model: Model, app_no_runner: Application -) -> None: - """ - arrange: Remove runner binary if exists. - act: - 1. Run update-dependencies action. - 2. Run update-dependencies action. - assert: - 1. a. Runner binary exists in the charm. - b. Action flushed the runners. - 2. a. Runner binary exists in the charm. - b. Action did not flushed the runners. - """ - unit = app_no_runner.units[0] - - await remove_runner_bin(unit) - - action = await unit.run_action("update-dependencies") - await action.wait() - await model.wait_for_idle(status=ACTIVE) - - # The runners should be flushed on update of runner binary. - assert action.results["flush"] == "True" - - assert await check_runner_binary_exists(unit) - - action = await unit.run_action("update-dependencies") - await action.wait() - await model.wait_for_idle(status=ACTIVE) - - # The runners should be flushed on update of runner binary. - assert action.results["flush"] == "False" - - assert await check_runner_binary_exists(unit) +pytestmark = pytest.mark.openstack @pytest.mark.asyncio @@ -163,12 +38,16 @@ async def test_check_runners_no_runners(app_no_runner: Application) -> None: assert action.results["online"] == "0" assert action.results["offline"] == "0" assert action.results["unknown"] == "0" - assert not action.results["runners"] + assert action.results["runners"] == "()" @pytest.mark.asyncio @pytest.mark.abort_on_fail -async def test_reconcile_runners(model: Model, app_no_runner: Application) -> None: +async def test_reconcile_runners( + model: Model, + app_no_runner: Application, + instance_helper: OpenStackInstanceHelper, +) -> None: """ arrange: A working application with no runners. act: @@ -193,54 +72,15 @@ async def test_reconcile_runners(model: Model, app_no_runner: Application) -> No await reconcile(app=app, model=model) - await wait_till_num_of_runners(unit, 1) + async def _runners_number(number) -> bool: + """Check if there is the expected number of runners.""" + return len(await instance_helper.get_runner_names(unit)) == number + + await wait_for(lambda: _runners_number(1), timeout=10 * 60, check_interval=10) # 2. await app.set_config({VIRTUAL_MACHINES_CONFIG_NAME: "0"}) await reconcile(app=app, model=model) - await wait_till_num_of_runners(unit, 0) - - -@pytest.mark.asyncio -@pytest.mark.abort_on_fail -async def test_charm_no_runner_upgrade( - model: Model, app_no_runner: Application, charm_file: str -) -> None: - """ - arrange: A working application with no runners. - act: Upgrade the charm. - assert: The upgrade_charm hook ran successfully and the image has not been rebuilt. - """ - logger.info("Wait for idlle before test start") - await model.wait_for_idle(apps=[app_no_runner.name]) - start_time = datetime.now(tz=timezone.utc) - - logger.info("Refreshing runner") - await app_no_runner.refresh(path=charm_file) - - unit = app_no_runner.units[0] - logger.info("Waiting for upgrade event") - await wait_for( - functools.partial(is_upgrade_charm_event_emitted, unit), timeout=360, check_interval=60 - ) - await model.wait_for_idle(status=ACTIVE) - - logger.info("Running 'lxd image list' in unit") - ret_code, stdout, stderr = await run_in_unit( - unit=unit, command="/snap/bin/lxc image list --format json" - ) - assert ret_code == 0, f"Failed to read the image list: {stderr}" - assert stdout is not None, f"Failed to read the image list: {stderr}" - images = json.loads(stdout) - jammy_image = next( - (image for image in images if "jammy" in {alias["name"] for alias in image["aliases"]}), - None, - ) - assert jammy_image is not None, "Jammy image not found." - # len("2024-04-10T00:00:00") == 19 - assert ( - datetime.fromisoformat(jammy_image["created_at"][:19]).replace(tzinfo=timezone.utc) - <= start_time - ), f"Image has been rebuilt after the upgrade: {jammy_image['created_at'][:19]} > {start_time}" + await wait_for(lambda: _runners_number(0), timeout=10 * 60, check_interval=10) diff --git a/tests/integration/test_charm_runner.py b/tests/integration/test_charm_runner.py index 3fd587200..a4135d546 100644 --- a/tests/integration/test_charm_runner.py +++ b/tests/integration/test_charm_runner.py @@ -12,18 +12,10 @@ from juju.application import Application from juju.model import Model -from charm_state import ( - VIRTUAL_MACHINES_CONFIG_NAME, - VM_CPU_CONFIG_NAME, - VM_DISK_CONFIG_NAME, - VM_MEMORY_CONFIG_NAME, - InstanceType, -) -from tests.integration.helpers import lxd +from charm_state import VIRTUAL_MACHINES_CONFIG_NAME from tests.integration.helpers.common import ( DISPATCH_TEST_WORKFLOW_FILENAME, DISPATCH_WAIT_TEST_WORKFLOW_FILENAME, - InstanceHelper, dispatch_workflow, reconcile, wait_for, @@ -35,7 +27,7 @@ async def app_fixture( model: Model, basic_app: Application, - instance_helper: InstanceHelper, + instance_helper: OpenStackInstanceHelper, ) -> AsyncIterator[Application]: """Setup and teardown the charm after each test. @@ -72,7 +64,6 @@ async def test_check_runner(app: Application) -> None: @pytest.mark.abort_on_fail async def test_flush_runner_and_resource_config( app: Application, - instance_type: InstanceType, github_repository: Repository, test_github_branch: Branch, ) -> None: @@ -88,16 +79,10 @@ async def test_flush_runner_and_resource_config( assert: 1. One runner exists. 2. Check the resource matches the configuration. - 3. Nothing. - 4. a. The runner name should be different to the runner prior running - the action. - b. LXD profile matching virtual machine resources of step 2 exists. - 5. The runner is not flushed since by default it flushes idle. (Only valid for OpenStack) + 3. The runner is not flushed since by default it flushes idle. Test are combined to reduce number of runner spawned. """ - unit = app.units[0] - # 1. action: Action = await app.units[0].run_action("check-runners") await action.wait() @@ -111,26 +96,9 @@ async def test_flush_runner_and_resource_config( assert len(runner_names) == 1 # 2. - # Check if the LXD profile is checked by the charm. Only for local LXD. - configs = await app.get_config() - if instance_type == InstanceType.LOCAL_LXD: - await lxd.assert_resource_lxd_profile(unit, configs) - # OpenStack flavor is not managed by the charm. The charm takes it as a config option. - # Therefore no need to check it. - - # 3. - await app.set_config( - {VM_CPU_CONFIG_NAME: "1", VM_MEMORY_CONFIG_NAME: "3GiB", VM_DISK_CONFIG_NAME: "8GiB"} - ) - - # 4. action = await app.units[0].run_action("flush-runners") await action.wait() - configs = await app.get_config() - if instance_type == InstanceType.LOCAL_LXD: - await lxd.assert_resource_lxd_profile(unit, configs) - action = await app.units[0].run_action("check-runners") await action.wait() @@ -143,23 +111,22 @@ async def test_flush_runner_and_resource_config( assert len(new_runner_names) == 1 assert new_runner_names[0] != runner_names[0] - # 5. - if instance_type == InstanceType.OPENSTACK: - workflow = await dispatch_workflow( - app=app, - branch=test_github_branch, - github_repository=github_repository, - conclusion="success", - workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME, - dispatch_input={"runner": app.name, "minutes": "5"}, - wait=False, - ) - await wait_for(lambda: workflow.update() or workflow.status == "in_progress") - action = await app.units[0].run_action("flush-runners") - await action.wait() - - assert action.status == "completed" - assert action.results["delta"]["virtual-machines"] == "0" + # 3. + workflow = await dispatch_workflow( + app=app, + branch=test_github_branch, + github_repository=github_repository, + conclusion="success", + workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME, + dispatch_input={"runner": app.name, "minutes": "5"}, + wait=False, + ) + await wait_for(lambda: workflow.update() or workflow.status == "in_progress") + action = await app.units[0].run_action("flush-runners") + await action.wait() + + assert action.status == "completed" + assert action.results["delta"]["virtual-machines"] == "0" @pytest.mark.openstack @@ -171,20 +138,19 @@ async def test_repo_policy_enabled( test_github_branch: Branch, token: str, https_proxy: str, - instance_helper: InstanceHelper, + instance_helper: OpenStackInstanceHelper, ) -> None: """ arrange: A working application with one runner with repo policy enabled. act: Dispatch a workflow. assert: Workflow run successfully passed. """ - if isinstance(instance_helper, OpenStackInstanceHelper): - await setup_repo_policy( - app=app, - openstack_connection=instance_helper.openstack_connection, - token=token, - https_proxy=https_proxy, - ) + await setup_repo_policy( + app=app, + openstack_connection=instance_helper.openstack_connection, + token=token, + https_proxy=https_proxy, + ) await dispatch_workflow( app=app, diff --git a/tests/integration/test_charm_scheduled_events.py b/tests/integration/test_charm_scheduled_events.py index 6e2224f85..a3e06ca5e 100644 --- a/tests/integration/test_charm_scheduled_events.py +++ b/tests/integration/test_charm_scheduled_events.py @@ -7,15 +7,18 @@ scheduled events are in its own module. """ +import logging from asyncio import sleep import pytest from juju.application import Application from juju.model import Model -from tests.integration.helpers.common import InstanceHelper, wait_for +from tests.integration.helpers.common import wait_for +from tests.integration.helpers.openstack import OpenStackInstanceHelper from tests.status_name import ACTIVE +logger = logging.getLogger(__name__) pytestmark = pytest.mark.openstack @@ -24,7 +27,7 @@ async def test_update_interval( model: Model, app_scheduled_events: Application, - instance_helper: InstanceHelper, + instance_helper: OpenStackInstanceHelper, ) -> None: """ arrange: A working application with one runner. @@ -52,6 +55,7 @@ async def _no_runners_available() -> bool: await wait_for(_no_runners_available, timeout=30, check_interval=3) + logger.info("Wait for 10 minutes") await sleep(10 * 60) await model.wait_for_idle(status=ACTIVE, timeout=20 * 60) diff --git a/tests/integration/test_charm_upgrade.py b/tests/integration/test_charm_upgrade.py index 2f9fb3caf..fabcb5c94 100644 --- a/tests/integration/test_charm_upgrade.py +++ b/tests/integration/test_charm_upgrade.py @@ -33,7 +33,6 @@ async def test_charm_upgrade( model: Model, ops_test: OpsTest, charm_file: str, - loop_device: str | None, app_name: str, path: str, token: str, @@ -53,7 +52,7 @@ async def test_charm_upgrade( """ latest_stable_path = tmp_path / "github-runner.charm" latest_stable_revision = 302 # update this value every release to stable. - # download the charm and inject lxd profile for testing + # download the charm retcode, stdout, stderr = await ops_test.juju( "download", "github-runner", @@ -76,7 +75,6 @@ async def test_charm_upgrade( app_name=app_name, path=path, token=token, - runner_storage="juju-storage", http_proxy=openstack_http_proxy, https_proxy=openstack_https_proxy, no_proxy=openstack_no_proxy, @@ -90,7 +88,6 @@ async def test_charm_upgrade( VIRTUAL_MACHINES_CONFIG_NAME: 1, }, wait_idle=False, - use_local_lxd=False, ) await model.integrate(f"{image_builder.name}:image", f"{application.name}:image") await model.wait_for_idle( diff --git a/tests/integration/test_charm_with_juju_storage.py b/tests/integration/test_charm_with_juju_storage.py deleted file mode 100644 index 566e02d10..000000000 --- a/tests/integration/test_charm_with_juju_storage.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Integration tests for github-runner charm with juju-storage as disk.""" - -import pytest -from juju.application import Application -from juju.model import Model - -from charm_state import VIRTUAL_MACHINES_CONFIG_NAME -from tests.integration.helpers.common import reconcile -from tests.integration.helpers.lxd import wait_till_num_of_runners - - -@pytest.mark.asyncio -@pytest.mark.abort_on_fail -async def test_spawn_one_runner(model: Model, app_juju_storage: Application) -> None: - """ - arrange: A working application with no runners and juju storage setup. - act: Spawn one runner. - assert: One runner should exist. - """ - await app_juju_storage.set_config({VIRTUAL_MACHINES_CONFIG_NAME: "1"}) - await reconcile(app=app_juju_storage, model=model) - - await wait_till_num_of_runners(unit=app_juju_storage.units[0], num=1) diff --git a/tests/integration/test_charm_with_proxy.py b/tests/integration/test_charm_with_proxy.py deleted file mode 100644 index 0006ae4bf..000000000 --- a/tests/integration/test_charm_with_proxy.py +++ /dev/null @@ -1,455 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Test the usage of a proxy server.""" -import logging -import subprocess -from asyncio import sleep -from pathlib import Path -from typing import AsyncIterator, Optional -from urllib.parse import urlparse - -import pytest -import pytest_asyncio -from juju.application import Application -from juju.model import Model -from juju.unit import Unit - -from charm_state import ( - DENYLIST_CONFIG_NAME, - PATH_CONFIG_NAME, - RECONCILE_INTERVAL_CONFIG_NAME, - TEST_MODE_CONFIG_NAME, - TOKEN_CONFIG_NAME, - USE_APROXY_CONFIG_NAME, - VIRTUAL_MACHINES_CONFIG_NAME, -) -from tests.integration.helpers.lxd import ( - ensure_charm_has_runner, - get_runner_names, - reconcile, - run_in_lxd_instance, -) -from tests.status_name import ACTIVE -from utilities import execute_command - -NO_PROXY = "127.0.0.1,localhost,::1" -PROXY_PORT = 8899 -NON_STANDARD_PORT = 9432 - - -@pytest.fixture(scope="module", name="proxy_logs_filepath") -def proxy_logs_filepath_fixture(tmp_path_factory) -> Path: - """Get the path to the proxy logs file.""" - return tmp_path_factory.mktemp("tinyproxy") / "tinyproxy.log" - - -@pytest_asyncio.fixture(scope="module", name="proxy") -async def proxy_fixture(proxy_logs_filepath: Path) -> AsyncIterator[str]: - """Start tinyproxy and return the proxy server address.""" - result = subprocess.run(["which", "tinyproxy"]) - assert ( - result.returncode == 0 - ), "Cannot find tinyproxy in PATH, install tinyproxy with `apt install tinyproxy -y`" - - tinyproxy_config = Path("tinyproxy.conf") - tinyproxy_config_value = f"""Port {PROXY_PORT} -Listen 0.0.0.0 -Timeout 600 -LogFile "{proxy_logs_filepath}" -LogLevel Connect -""" - - logging.info("tinyproxy config: %s", tinyproxy_config_value) - tinyproxy_config.write_text(tinyproxy_config_value) - - process = subprocess.Popen(["tinyproxy", "-d", "-c", str(tinyproxy_config)]) - - # Get default ip using following commands - stdout, _ = execute_command( - [ - "/bin/bash", - "-c", - r"ip route get $(ip route show 0.0.0.0/0 | grep -oP 'via \K\S+') |" - r" grep -oP 'src \K\S+'", - ], - check_exit=True, - ) - default_ip = stdout.strip() - - yield f"http://{default_ip}:{PROXY_PORT}" - - process.terminate() - if tinyproxy_config.exists(): - tinyproxy_config.unlink() - - -@pytest_asyncio.fixture(scope="module", name="app_with_prepared_machine") -async def app_with_prepared_machine_fixture( - model: Model, - charm_file: str, - app_name: str, - path: str, - token: str, - proxy: str, -) -> Application: - """Application with proxy setup and firewall to block all other network access.""" - await model.set_config( - { - "apt-http-proxy": proxy, - "apt-https-proxy": proxy, - "apt-no-proxy": NO_PROXY, - "juju-http-proxy": proxy, - "juju-https-proxy": proxy, - "juju-no-proxy": NO_PROXY, - "snap-http-proxy": proxy, - "snap-https-proxy": proxy, - "snap-no-proxy": NO_PROXY, - "logging-config": "=INFO;unit=DEBUG", - } - ) - - machine = await model.add_machine(constraints={"root-disk": 15}, series="jammy") - # Wait until juju agent has the hostname of the machine. - for _ in range(120): - if machine.hostname is not None: - break - await sleep(10) - else: - assert False, "Timeout waiting for machine to start" - - # Disable external network access for the juju machine. - proxy_url = urlparse(proxy) - await machine.ssh(f"sudo iptables -I OUTPUT -d {proxy_url.hostname} -j ACCEPT") - await machine.ssh("sudo iptables -I OUTPUT -d 0.0.0.0/8 -j ACCEPT") - await machine.ssh("sudo iptables -I OUTPUT -d 10.0.0.0/8 -j ACCEPT") - await machine.ssh("sudo iptables -I OUTPUT -d 100.64.0.0/10 -j ACCEPT") - await machine.ssh("sudo iptables -I OUTPUT -d 127.0.0.0/8 -j ACCEPT") - await machine.ssh("sudo iptables -I OUTPUT -d 169.254.0.0/16 -j ACCEPT") - await machine.ssh("sudo iptables -I OUTPUT -d 172.16.0.0/12 -j ACCEPT") - await machine.ssh("sudo iptables -I OUTPUT -d 192.0.0.0/24 -j ACCEPT") - await machine.ssh("sudo iptables -I OUTPUT -d 192.0.2.0/24 -j ACCEPT") - await machine.ssh("sudo iptables -I OUTPUT -d 192.88.99.0/24 -j ACCEPT") - await machine.ssh("sudo iptables -I OUTPUT -d 192.168.0.0/16 -j ACCEPT") - await machine.ssh("sudo iptables -I OUTPUT -d 198.18.0.0/15 -j ACCEPT") - await machine.ssh("sudo iptables -I OUTPUT -d 198.51.100.0/24 -j ACCEPT") - await machine.ssh("sudo iptables -I OUTPUT -d 203.0.113.0/24 -j ACCEPT") - await machine.ssh("sudo iptables -I OUTPUT -d 224.0.0.0/4 -j ACCEPT") - await machine.ssh("sudo iptables -I OUTPUT -d 233.252.0.0/24 -j ACCEPT") - await machine.ssh("sudo iptables -I OUTPUT -d 240.0.0.0/4 -j ACCEPT") - await machine.ssh("sudo iptables -P OUTPUT DROP") - - # Test the external network access is disabled. - await machine.ssh("ping -c1 canonical.com 2>&1 | grep '100% packet loss'") - - # Ensure iptables rules are restored on reboot, which might happen during the test. - await machine.ssh("sudo iptables-save | sudo tee /etc/iptables.rules.v4") - await machine.ssh( - """cat < None: - """Clear the tinyproxy log file content. - - Args: - proxy_logs_filepath: The path to the tinyproxy log file. - """ - proxy_logs_filepath.write_text("") - - -@pytest_asyncio.fixture(scope="function", name="app") -async def app_fixture( - app_with_prepared_machine: Application, model: Model, proxy_logs_filepath: Path -) -> AsyncIterator[Application]: - """Setup and teardown the app. - - Make sure before each test: - - no runner exists - - Proxy logs are cleared - """ - await app_with_prepared_machine.set_config( - { - VIRTUAL_MACHINES_CONFIG_NAME: "0", - } - ) - await reconcile(app=app_with_prepared_machine, model=model) - - _clear_tinyproxy_log(proxy_logs_filepath) - - yield app_with_prepared_machine - - -def _assert_proxy_var_in(text: str, not_in=False): - """Assert that proxy environment variables are set / not set. - - Args: - text: The text to search for proxy environment variables. - not_in: Whether the proxy environment variables should be set or not. - """ - for proxy_var in ( - "http_proxy", - "https_proxy", - "no_proxy", - "HTTP_PROXY", - "HTTPS_PROXY", - "NO_PROXY", - ): - assert (proxy_var in text) != not_in - - -async def _assert_proxy_vars_in_file(unit: Unit, runner_name: str, file_path: str, not_set=False): - """Assert that proxy environment variables are set / not set in a file. - - Args: - unit: The unit to run the command on. - runner_name: The name of the runner. - file_path: The path to the file to check for proxy environment variables. - not_set: Whether the proxy environment variables should be set or not. - """ - return_code, stdout, stderr = await run_in_lxd_instance(unit, runner_name, f"cat {file_path}") - assert return_code == 0, f"Failed to read file {stdout} {stderr}" - assert stdout, "File is empty" - _assert_proxy_var_in(stdout, not_in=not_set) - - -async def _assert_docker_proxy_vars(unit: Unit, runner_name: str, not_set=False): - """Assert that proxy environment variables are set / not set for docker. - - Args: - unit: The unit to run the command on. - runner_name: The name of the runner. - not_set: Whether the proxy environment variables should be set or not. - """ - return_code, _, _ = await run_in_lxd_instance( - unit, runner_name, "docker run --rm alpine sh -c 'env | grep -i _PROXY'" - ) - assert return_code == (1 if not_set else 0) - - -async def _assert_proxy_vars(unit: Unit, runner_name: str, not_set=False): - """Assert that proxy environment variables are set / not set in the runner. - - Args: - unit: The unit to run the command on. - runner_name: The name of the runner. - not_set: Whether the proxy environment variables should be set or not. - """ - await _assert_proxy_vars_in_file(unit, runner_name, "/etc/environment", not_set=not_set) - await _assert_proxy_vars_in_file( - unit, runner_name, "/home/ubuntu/github-runner/.env", not_set=not_set - ) - await _assert_docker_proxy_vars(unit, runner_name, not_set=not_set) - - -async def _assert_proxy_vars_set(unit: Unit, runner_name: str): - """Assert that proxy environment variables are set in the runner. - - Args: - unit: The unit to run the command on. - runner_name: The name of the runner. - """ - await _assert_proxy_vars(unit, runner_name, not_set=False) - - -async def _assert_proxy_vars_not_set(unit: Unit, runner_name: str): - """Assert that proxy environment variables are not set in the runner. - - Args: - unit: The unit to run the command on. - runner_name: The name of the runner. - """ - await _assert_proxy_vars(unit, runner_name, not_set=True) - - -async def _get_aproxy_logs(unit: Unit, runner_name: str) -> Optional[str]: - """Get the aproxy logs. - - Args: - runner_name: The name of the runner. - unit: The unit to run the command on. - - Returns: - The aproxy logs if existent, otherwise None. - """ - return_code, stdout, stderr = await run_in_lxd_instance( - unit, runner_name, "snap logs aproxy.aproxy -n=all" - ) - assert return_code == 0, f"Failed to get aproxy logs {stdout} {stderr}" - return stdout - - -async def _curl_as_ubuntu_user( - unit: Unit, runner_name: str, url: str -) -> tuple[int, str | None, str | None]: - """Run curl as a logged in ubuntu user. - - This should simulate the bevahiour of a curl inside the runner with environment variables set. - - Args: - unit: The unit to run the command on. - runner_name: The name of the runner. - url: The URL to curl. - - Returns: - The return code, stdout, stderr of the curl command. - """ - return await run_in_lxd_instance( - unit, - runner_name, - f"su - ubuntu -c 'curl {url}'", - ) - - -@pytest.mark.asyncio -@pytest.mark.abort_on_fail -async def test_usage_of_aproxy(model: Model, app: Application, proxy_logs_filepath: Path) -> None: - """ - arrange: A working application with a runner using aproxy configured for a proxy server. - act: Run curl in the runner - 1. URL with standard port - 2. URL with non-standard port - assert: That no proxy vars are set in the runner and that - 1. the aproxy and tinyproxy log contains the request - 2. neither the aproxy nor the tinyproxy log contains the request - """ - await app.set_config( - { - USE_APROXY_CONFIG_NAME: "true", - } - ) - await ensure_charm_has_runner(app, model) - unit = app.units[0] - names = await get_runner_names(unit) - assert names - runner_name = names[0] - - # Clear the logs to avoid false positives if the log already contains matching requests - _clear_tinyproxy_log(proxy_logs_filepath) - - # 1. URL with standard port, should succeed, gets intercepted by aproxy - return_code, stdout, stderr = await _curl_as_ubuntu_user( - unit, runner_name, "http://canonical.com" - ) - assert ( - return_code == 0 - ), f"Expected successful connection to http://canonical.com. Error msg: {stdout} {stderr}" - - # 2. URL with non-standard port, should fail, request does not get intercepted by aproxy - return_code, stdout, stderr = await _curl_as_ubuntu_user( - unit, - runner_name, - f"http://canonical.com:{NON_STANDARD_PORT}", - ) - assert return_code == 7, ( - f"Expected cannot connect error for http://canonical.com:{NON_STANDARD_PORT}. " - f"Error msg: {stdout} {stderr}" - ) - - aproxy_logs = await _get_aproxy_logs(unit, runner_name) - assert aproxy_logs is not None - assert "canonical.com" in aproxy_logs - assert f"http://canonical.com:{NON_STANDARD_PORT}" not in aproxy_logs - - proxy_logs = proxy_logs_filepath.read_text(encoding="utf-8") - assert "GET http://canonical.com/" in proxy_logs - assert f"GET http://canonical.com:{NON_STANDARD_PORT}/" not in proxy_logs - - -@pytest.mark.asyncio -@pytest.mark.abort_on_fail -async def test_use_proxy_without_aproxy( - model: Model, app: Application, proxy_logs_filepath: Path -) -> None: - """ - arrange: A working application with a runner not using aproxy configured for a proxy server. - act: Run curl in the runner - 1. URL with standard port - 2. URL with non-standard port - assert: That the proxy vars are set in the runner, aproxy logs are empty, and that - the tinyproxy log contains both requests - (requests to non-standard ports will be forwarded when using env vars). - """ - await app.set_config( - { - USE_APROXY_CONFIG_NAME: "false", - } - ) - await ensure_charm_has_runner(app, model) - unit = app.units[0] - names = await get_runner_names(unit) - assert names - runner_name = names[0] - - await _assert_proxy_vars_set(unit, runner_name) - - # Clear the logs to avoid false positives if the log already contains matching requests - _clear_tinyproxy_log(proxy_logs_filepath) - - # 1. URL with standard port, should succeed - return_code, stdout, stderr = await _curl_as_ubuntu_user( - unit, runner_name, "http://canonical.com" - ) - assert ( - return_code == 0 - ), f"Expected successful connection to http://canonical.com. Error msg: {stdout} {stderr}" - - # 2. URL with non-standard port, should return an error message by the proxy like this: - # - # - # 500 Unable to connect - # - #

Unable to connect

- #

Tinyproxy was unable to connect to the remote web server.

- #
- #

Generated by tinyproxy version 1.11.0.

- # - # - return_code, stdout, stderr = await _curl_as_ubuntu_user( - unit, - runner_name, - f"http://canonical.com:{NON_STANDARD_PORT}", - ) - assert return_code == 0, ( - f"Expected error response from proxy for http://canonical.com:{NON_STANDARD_PORT}. " - f"Error msg: {stdout} {stderr}" - ) - - proxy_logs = proxy_logs_filepath.read_text(encoding="utf-8") - assert "GET http://canonical.com/" in proxy_logs - assert f"GET http://canonical.com:{NON_STANDARD_PORT}/" in proxy_logs - - aproxy_logs = await _get_aproxy_logs(unit, runner_name) - assert aproxy_logs is None diff --git a/tests/integration/test_debug_ssh.py b/tests/integration/test_debug_ssh.py index d153b6591..eefaf5854 100644 --- a/tests/integration/test_debug_ssh.py +++ b/tests/integration/test_debug_ssh.py @@ -10,8 +10,8 @@ from juju.application import Application from juju.model import Model -from charm_state import DENYLIST_CONFIG_NAME -from tests.integration.helpers.common import InstanceHelper, dispatch_workflow, get_job_logs +from tests.integration.helpers.common import dispatch_workflow, get_job_logs +from tests.integration.helpers.openstack import OpenStackInstanceHelper from tests.status_name import ACTIVE logger = logging.getLogger(__name__) @@ -27,30 +27,19 @@ async def test_ssh_debug( github_repository: Repository, test_github_branch: Branch, tmate_ssh_server_unit_ip: str, - instance_helper: InstanceHelper, + instance_helper: OpenStackInstanceHelper, ): """ - arrange: given an integrated GitHub-Runner charm and tmate-ssh-server charm with a denylist \ - covering ip ranges of tmate-ssh-server. + arrange: given an integrated GitHub-Runner charm and tmate-ssh-server charm. act: when canonical/action-tmate is triggered. assert: the ssh connection info from action-log and tmate-ssh-server matches. """ - await app_no_wait_tmate.set_config( - { - DENYLIST_CONFIG_NAME: ( - "0.0.0.0/8,10.0.0.0/8,100.64.0.0/10,169.254.0.0/16," - "172.16.0.0/12,192.0.0.0/24,192.0.2.0/24,192.88.99.0/24,192.168.0.0/16," - "198.18.0.0/15,198.51.100.0/24,203.0.113.0/24,224.0.0.0/4,233.252.0.0/24," - "240.0.0.0/4" - ), - } - ) await model.wait_for_idle(status=ACTIVE, timeout=60 * 120) unit = app_no_wait_tmate.units[0] # We need the runner to connect to the current machine, instead of the tmate_ssh_server unit, # as the tmate_ssh_server is not routable. - dnat_comman_in_runner = "sudo iptables -t nat -A OUTPUT -p tcp --dport 10022 -j DNAT --to-destination 127.0.0.1:10022" + dnat_comman_in_runner = f"sudo iptables -t nat -A OUTPUT -p tcp -d {tmate_ssh_server_unit_ip} --dport 10022 -j DNAT --to-destination 127.0.0.1:10022" _, _, _ = await instance_helper.run_in_instance( unit, dnat_comman_in_runner, diff --git a/tests/integration/test_e2e.py b/tests/integration/test_e2e.py index bed193216..ffc371649 100644 --- a/tests/integration/test_e2e.py +++ b/tests/integration/test_e2e.py @@ -9,19 +9,18 @@ from juju.application import Application from juju.model import Model -from charm_state import InstanceType from tests.integration.helpers.common import ( DISPATCH_E2E_TEST_RUN_WORKFLOW_FILENAME, - InstanceHelper, dispatch_workflow, ) +from tests.integration.helpers.openstack import OpenStackInstanceHelper @pytest_asyncio.fixture(scope="function", name="app") async def app_fixture( model: Model, basic_app: Application, - instance_helper: InstanceHelper, + instance_helper: OpenStackInstanceHelper, ) -> AsyncIterator[Application]: """Setup and teardown the charm after each test. @@ -39,24 +38,17 @@ async def test_e2e_workflow( app: Application, github_repository: Repository, test_github_branch: Branch, - instance_type: InstanceType, ): """ arrange: An app connected to an OpenStack cloud with no runners. act: Run e2e test workflow. assert: No exception thrown. """ - virt_type: str - if instance_type == InstanceType.OPENSTACK: - virt_type = "openstack" - else: - virt_type = "lxd" - await dispatch_workflow( app=app, branch=test_github_branch, github_repository=github_repository, conclusion="success", workflow_id_or_name=DISPATCH_E2E_TEST_RUN_WORKFLOW_FILENAME, - dispatch_input={"runner-tag": app.name, "runner-virt-type": virt_type}, + dispatch_input={"runner-tag": app.name}, ) diff --git a/tests/integration/test_reactive.py b/tests/integration/test_reactive.py index f99e3ff24..e3bf41928 100644 --- a/tests/integration/test_reactive.py +++ b/tests/integration/test_reactive.py @@ -57,6 +57,7 @@ async def app_fixture( await reconcile(app_for_reactive, app_for_reactive.model) +@pytest.mark.abort_on_fail async def test_reactive_mode_spawns_runner( ops_test: OpsTest, app: Application, @@ -123,6 +124,7 @@ async def _runner_installed_in_metrics_log() -> bool: await _assert_metrics_are_logged(app, github_repository) +@pytest.mark.abort_on_fail async def test_reactive_mode_does_not_consume_jobs_with_unsupported_labels( ops_test: OpsTest, app: Application, @@ -160,6 +162,7 @@ async def test_reactive_mode_does_not_consume_jobs_with_unsupported_labels( run.cancel() # cancel the run to avoid a queued run in GitHub actions page +@pytest.mark.abort_on_fail async def test_reactive_mode_scale_down( ops_test: OpsTest, app: Application, diff --git a/tests/integration/test_self_hosted_runner.py b/tests/integration/test_self_hosted_runner.py deleted file mode 100644 index 46c8280b1..000000000 --- a/tests/integration/test_self_hosted_runner.py +++ /dev/null @@ -1,192 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Integration tests for self-hosted runner managed by the github-runner charm.""" - -from datetime import datetime, timezone -from time import sleep - -import github -import pytest -from github.Repository import Repository -from github_runner_manager.types_.github import GitHubRepo -from juju.application import Application -from juju.model import Model - -from charm_state import ( - DOCKERHUB_MIRROR_CONFIG_NAME, - PATH_CONFIG_NAME, - VIRTUAL_MACHINES_CONFIG_NAME, -) -from github_client import GithubClient -from tests.integration.helpers.common import ( - DISPATCH_TEST_WORKFLOW_FILENAME, - DISPATCH_WAIT_TEST_WORKFLOW_FILENAME, - get_job_logs, - get_workflow_runs, - reconcile, -) -from tests.integration.helpers.lxd import ( - get_runner_names, - run_in_lxd_instance, - wait_till_num_of_runners, -) -from tests.status_name import ACTIVE - - -@pytest.mark.asyncio -@pytest.mark.abort_on_fail -async def test_dispatch_workflow_with_dockerhub_mirror( - model: Model, app_runner: Application, github_repository: Repository -) -> None: - """ - arrange: A working application with no runners. - act: - 1. Set dockerhub-mirror config and spawn one runner. - 2. Dispatch a workflow. - assert: - 1. registry-mirrors is setup in /etc/docker/daemon.json of runner. - 2. Message about dockerhub_mirror appears in logs. - """ - start_time = datetime.now(timezone.utc) - - unit = app_runner.units[0] - - fake_url = "https://example.com:5000" - - # 1. - await app_runner.set_config( - {VIRTUAL_MACHINES_CONFIG_NAME: "1", DOCKERHUB_MIRROR_CONFIG_NAME: fake_url} - ) - action = await unit.run_action("reconcile-runners") - await action.wait() - await model.wait_for_idle(status=ACTIVE) - names = await get_runner_names(unit) - assert len(names) == 1 - - runner_to_be_used = names[0] - - return_code, stdout, stderr = await run_in_lxd_instance( - unit, runner_to_be_used, "cat /etc/docker/daemon.json" - ) - assert return_code == 0, f"Failed to get docker daemon contents, {stdout} {stderr}" - assert stdout is not None - assert "registry-mirrors" in stdout - assert fake_url in stdout - - # 2. - main_branch = github_repository.get_branch(github_repository.default_branch) - workflow = github_repository.get_workflow(id_or_file_name=DISPATCH_TEST_WORKFLOW_FILENAME) - - workflow.create_dispatch(main_branch, {"runner": app_runner.name}) - - # Wait until the runner is used up. - for _ in range(30): - runners = await get_runner_names(unit) - if runner_to_be_used not in runners: - break - sleep(30) - else: - assert False, "Timeout while waiting for workflow to complete" - - # Unable to find the run id of the workflow that was dispatched. - # Therefore, all runs after this test start should pass the conditions. - for run in get_workflow_runs(start_time, workflow, runner_to_be_used): - jobs = run.jobs() - try: - logs = get_job_logs(jobs[0]) - except github.GithubException.GithubException: - continue - - if f"Job is about to start running on the runner: {app_runner.name}-" in logs: - assert run.jobs()[0].conclusion == "success" - assert ( - "A private docker registry is setup as a dockerhub mirror for this self-hosted" - " runner." - ) in logs - - -@pytest.mark.asyncio -@pytest.mark.abort_on_fail -async def test_flush_busy_runner( - model: Model, - app_runner: Application, - forked_github_repository: Repository, - runner_manager_github_client: GithubClient, -) -> None: - """ - arrange: A working application with one runner. - act: - 1. Dispatch a workflow that waits for 30 mins. - 2. Run flush-runners action. - assert: - 1. The runner is in busy status. - 2. a. The flush-runners action should take less than the timeout. - b. The runner should be flushed. - """ - unit = app_runner.units[0] - - config = await app_runner.get_config() - - await app_runner.set_config( - {PATH_CONFIG_NAME: forked_github_repository.full_name, VIRTUAL_MACHINES_CONFIG_NAME: "1"} - ) - await reconcile(app=app_runner, model=model) - await wait_till_num_of_runners(unit, 1) - - names = await get_runner_names(unit) - assert len(names) == 1 - - runner_to_be_used = names[0] - - # 1. - main_branch = forked_github_repository.get_branch(forked_github_repository.default_branch) - workflow = forked_github_repository.get_workflow( - id_or_file_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME - ) - - assert workflow.create_dispatch(main_branch, {"runner": app_runner.name, "minutes": "30"}) - - # Wait until runner online and then busy. - for _ in range(30): - all_runners = runner_manager_github_client.get_runner_github_info( - GitHubRepo( - owner=forked_github_repository.owner.login, repo=forked_github_repository.name - ) - ) - runners = [runner for runner in all_runners if runner.name == runner_to_be_used] - - if not runners: - # if runner is not online yet. - sleep(30) - continue - - assert len(runners) == 1, "Should not occur as GitHub enforce unique naming of runner" - runner = runners[0] - if runner["busy"]: - start_time = datetime.now(timezone.utc) - break - - sleep(30) - else: - assert False, "Timeout while waiting for runner to take up the workflow" - - # 2. - action = await unit.run_action("flush-runners") - await action.wait() - - end_time = datetime.now(timezone.utc) - - # The flushing of runner should take less than the 30 minutes timeout of the workflow. - diff = end_time - start_time - assert diff.total_seconds() < 30 * 60 - - names = await get_runner_names(unit) - assert runner_to_be_used not in names, "Found a runner that should be flushed" - - # Ensure the app_runner is back to 0 runners. - await app_runner.set_config( - {VIRTUAL_MACHINES_CONFIG_NAME: "0", PATH_CONFIG_NAME: config[PATH_CONFIG_NAME]} - ) - await reconcile(app=app_runner, model=model) - await wait_till_num_of_runners(unit, 0) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 5a9182080..bb06bf614 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -11,7 +11,7 @@ from github_runner_manager.manager.runner_scaler import RunnerScaler import utilities -from tests.unit.mock import MockGhapiClient, MockLxdClient, MockRepoPolicyComplianceClient +from tests.unit.mock import MockGhapiClient @pytest.fixture(name="exec_command") @@ -19,16 +19,6 @@ def exec_command_fixture(): return unittest.mock.MagicMock(return_value=("", 0)) -@pytest.fixture(name="lxd_exec_command") -def lxd_exec_command_fixture(): - return unittest.mock.MagicMock(return_value=("", 0)) - - -@pytest.fixture(name="runner_binary_path") -def runner_binary_path_fixture(tmp_path): - return tmp_path / "github-runner-app" - - def disk_usage_mock(total_disk: int): """Mock disk usage factory. @@ -45,54 +35,21 @@ def disk_usage_mock(total_disk: int): @pytest.fixture(autouse=True) -def mocks(monkeypatch, tmp_path, exec_command, lxd_exec_command, runner_binary_path): +def mocks(monkeypatch, tmp_path, exec_command): runner_scaler_mock = unittest.mock.MagicMock(spec=RunnerScaler) cron_path = tmp_path / "cron.d" cron_path.mkdir() - monkeypatch.setattr( - "charm.GithubRunnerCharm.service_token_path", tmp_path / "mock_service_token" - ) - monkeypatch.setattr( - "charm.GithubRunnerCharm.repo_check_web_service_path", - tmp_path / "repo_policy_compliance_service", - ) - monkeypatch.setattr( - "charm.GithubRunnerCharm.repo_check_systemd_service", tmp_path / "systemd_service" - ) monkeypatch.setattr("charm.RunnerScaler", runner_scaler_mock) - monkeypatch.setattr("charm.GithubRunnerCharm.kernel_module_path", tmp_path / "modules") - monkeypatch.setattr("charm.GithubRunnerCharm._update_kernel", lambda self, now: None) monkeypatch.setattr("charm.execute_command", exec_command) - monkeypatch.setattr("charm.shutil", unittest.mock.MagicMock()) - monkeypatch.setattr("charm.shutil.disk_usage", disk_usage_mock(30 * 1024 * 1024 * 1024)) monkeypatch.setattr("charm_state.CHARM_STATE_PATH", Path(tmp_path / "charm_state.json")) monkeypatch.setattr("event_timer.jinja2", unittest.mock.MagicMock()) monkeypatch.setattr("event_timer.execute_command", exec_command) - monkeypatch.setattr( - "firewall.Firewall.get_host_ip", unittest.mock.MagicMock(return_value="10.0.0.1") - ) - monkeypatch.setattr("firewall.Firewall.refresh_firewall", unittest.mock.MagicMock()) - monkeypatch.setattr("runner.execute_command", lxd_exec_command) - monkeypatch.setattr("runner.shared_fs", unittest.mock.MagicMock()) monkeypatch.setattr( "github_runner_manager.metrics.events.METRICS_LOG_PATH", Path(tmp_path / "metrics.log") ) - monkeypatch.setattr("runner.time", unittest.mock.MagicMock()) monkeypatch.setattr("github_runner_manager.github_client.GhApi", MockGhapiClient) - monkeypatch.setattr("runner_manager_type.jinja2", unittest.mock.MagicMock()) - monkeypatch.setattr("runner_manager_type.LxdClient", MockLxdClient) - monkeypatch.setattr("runner_manager.github_metrics", unittest.mock.MagicMock()) - monkeypatch.setattr("runner_manager.runner_logs", unittest.mock.MagicMock()) - monkeypatch.setattr("runner_manager.LxdClient", MockLxdClient) - monkeypatch.setattr("runner_manager.shared_fs", unittest.mock.MagicMock()) - monkeypatch.setattr("runner_manager.execute_command", exec_command) - monkeypatch.setattr("runner_manager.LXDRunnerManager.runner_bin_path", runner_binary_path) - monkeypatch.setattr("runner_manager.LXDRunnerManager.cron_path", cron_path) - monkeypatch.setattr( - "runner_manager.RepoPolicyComplianceClient", MockRepoPolicyComplianceClient - ) monkeypatch.setattr("github_runner_manager.utilities.time", unittest.mock.MagicMock()) diff --git a/tests/unit/factories.py b/tests/unit/factories.py index 2dc9d2a97..fff8cbdb0 100644 --- a/tests/unit/factories.py +++ b/tests/unit/factories.py @@ -7,6 +7,7 @@ # pylint: disable=too-few-public-methods import random +import secrets from typing import Generic, TypeVar from unittest.mock import MagicMock @@ -14,12 +15,12 @@ import factory.fuzzy import invoke.runners import openstack.compute.v2.server +import yaml from pydantic.networks import IPvAnyAddress from charm_state import ( COS_AGENT_INTEGRATION_NAME, DEBUG_SSH_INTEGRATION_NAME, - DENYLIST_CONFIG_NAME, DOCKERHUB_MIRROR_CONFIG_NAME, GROUP_CONFIG_NAME, LABELS_CONFIG_NAME, @@ -29,14 +30,10 @@ OPENSTACK_NETWORK_CONFIG_NAME, PATH_CONFIG_NAME, RECONCILE_INTERVAL_CONFIG_NAME, - RUNNER_STORAGE_CONFIG_NAME, TEST_MODE_CONFIG_NAME, TOKEN_CONFIG_NAME, USE_APROXY_CONFIG_NAME, VIRTUAL_MACHINES_CONFIG_NAME, - VM_CPU_CONFIG_NAME, - VM_DISK_CONFIG_NAME, - VM_MEMORY_CONFIG_NAME, SSHDebugConnection, ) @@ -127,23 +124,34 @@ class Meta: model = factory.SubFactory(MockGithubRunnerCharmModelFactory) config = factory.Dict( { - DENYLIST_CONFIG_NAME: "", DOCKERHUB_MIRROR_CONFIG_NAME: "", GROUP_CONFIG_NAME: "default", LABELS_CONFIG_NAME: "", - OPENSTACK_CLOUDS_YAML_CONFIG_NAME: "", + OPENSTACK_CLOUDS_YAML_CONFIG_NAME: yaml.safe_dump( + { + "clouds": { + "openstack": { + "auth": { + "auth_url": "https://project-keystone.url/", + "password": secrets.token_hex(16), + "project_domain_name": "Default", + "project_name": "test-project-name", + "user_domain_name": "Default", + "username": "test-user-name", + }, + "region_name": secrets.token_hex(16), + } + } + } + ), OPENSTACK_NETWORK_CONFIG_NAME: "external", OPENSTACK_FLAVOR_CONFIG_NAME: "m1.small", PATH_CONFIG_NAME: factory.Sequence(lambda n: f"mock_path_{n}"), RECONCILE_INTERVAL_CONFIG_NAME: 10, - RUNNER_STORAGE_CONFIG_NAME: "juju-storage", TEST_MODE_CONFIG_NAME: "", TOKEN_CONFIG_NAME: factory.Sequence(lambda n: f"mock_token_{n}"), USE_APROXY_CONFIG_NAME: False, VIRTUAL_MACHINES_CONFIG_NAME: 1, - VM_CPU_CONFIG_NAME: 2, - VM_MEMORY_CONFIG_NAME: "7GiB", - VM_DISK_CONFIG_NAME: "10GiB", } ) diff --git a/tests/unit/mock.py b/tests/unit/mock.py index 78c0c6990..61c4c8b4b 100644 --- a/tests/unit/mock.py +++ b/tests/unit/mock.py @@ -6,17 +6,12 @@ from __future__ import annotations import hashlib -import io import logging import secrets -from pathlib import Path -from typing import IO, Optional, Sequence, Union from github_runner_manager.types_.github import RegistrationToken, RemoveToken, RunnerApplication -from errors import LxdError, RunnerError -from lxd_type import LxdNetwork -from runner import LxdInstanceConfig +from errors import RunnerError logger = logging.getLogger(__name__) @@ -32,315 +27,6 @@ ) -class MockLxdClient: - """Mock the behavior of the LXD client.""" - - def __init__(self): - """Fake init implementation for LxdClient.""" - self.instances = MockLxdInstanceManager() - self.profiles = MockLxdProfileManager() - self.networks = MockLxdNetworkManager() - self.storage_pools = MockLxdStoragePoolManager() - self.images = MockLxdImageManager() - - -class MockLxdInstanceManager: - """Mock the behavior of the LXD Instances.""" - - def __init__(self): - """Fake init implementation for LxdInstanceManager.""" - self.instances = {} - - def create(self, config: LxdInstanceConfig, wait: bool = False) -> MockLxdInstance: - """Create an instance with given config. - - Args: - config: The instance configuration to create the instance with. - wait: Placeholder for wait argument. - - Returns: - Mock instance that was created. - """ - self.instances[config["name"]] = MockLxdInstance(config["name"]) - return self.instances[config["name"]] - - def get(self, name: str): - """Get an instance with given name. - - Args: - name: The name of the instance to get. - - Returns: - Instance with given name. - """ - return self.instances[name] - - def all(self): - """Return all instances that have not been deleted. - - Returns: - All Lxd fake instances that have not been deleted. - """ - return [i for i in self.instances.values() if not i.deleted] - - -class MockLxdProfileManager: - """Mock the behavior of the LXD Profiles.""" - - def __init__(self): - """Initialization method for LxdProfileManager fake.""" - self.profiles = set() - - def create(self, name: str, config: dict[str, str], devices: dict[str, str]): - """Fake implementation of create method of LxdProfile manager. - - Args: - name: The name of LXD profile. - config: The config of LXD profile to create. - devices: The devices mapping of LXD profile to create with. - """ - self.profiles.add(name) - - def exists(self, name: str) -> bool: - """Fake implementation of exists method of LxdProfile manager. - - Args: - name: The name of LXD profile. - - Returns: - Whether given LXD profile exists. - """ - return name in self.profiles - - -class MockLxdNetworkManager: - """Mock the behavior of the LXD networks.""" - - def __init__(self): - """Placeholder for initialization method for LxdInstance stub.""" - pass - - def get(self, name: str) -> LxdNetwork: - """Stub method get for LxdNetworkManager. - - Args: - name: the name of the LxdNetwork to get. - - Returns: - LxdNetwork stub. - """ - return LxdNetwork( - "lxdbr0", "", "bridge", {"ipv4.address": "10.1.1.1/24"}, True, ("default") - ) - - -class MockLxdInstance: - """Mock the behavior of an LXD Instance.""" - - def __init__(self, name: str): - """Fake implementation of initialization method for LxdInstance fake. - - Args: - name: The mock instance name to create. - """ - self.name = name - self.status = "Stopped" - self.deleted = False - - self.files = MockLxdInstanceFileManager() - - def start(self, wait: bool = True, timeout: int = 60): - """Fake implementation of start method for LxdInstance fake. - - Args: - wait: Placeholder for wait argument. - timeout: Placeholder for timeout argument. - """ - self.status = "Running" - - def stop(self, wait: bool = True, timeout: int = 60): - """Fake implementation of stop method for LxdInstance fake. - - Args: - wait: Placeholder for wait argument. - timeout: Placeholder for timeout argument. - """ - self.status = "Stopped" - # Ephemeral virtual machine should be deleted on stop. - self.deleted = True - - def delete(self, wait: bool = True): - """Fake implementation of delete method for LxdInstance fake. - - Args: - wait: Placeholder for wait argument. - """ - self.deleted = True - - def execute( - self, cmd: Sequence[str], cwd: Optional[str] = None, hide_cmd: bool = False - ) -> tuple[int, IO, IO]: - """Implementation for execute for LxdInstance fake. - - Args: - cmd: Placeholder for command to execute. - cwd: Placeholder for working directory to execute command. - hide_cmd: Placeholder for to hide command that is being executed. - - Returns: - Empty tuples values that represent a successful command execution. - """ - return 0, io.BytesIO(b""), io.BytesIO(b"") - - -class MockLxdInstanceFileManager: - """Mock the behavior of an LXD Instance's files.""" - - def __init__(self): - """Initializer for fake instance of LxdInstanceFileManager.""" - self.files = {} - - def mk_dir(self, path): - """Placeholder for mk_dir implementation of LxdInstanceFileManager. - - Args: - path: The path to create. - """ - pass - - def push_file(self, source: str, destination: str, mode: Optional[str] = None): - """Fake push_file implementation of LxdInstanceFileManager. - - Args: - source: Placeholder argument for source file path copy from. - destination: File path to write to. - mode: Placeholder for file write mode. - """ - self.files[destination] = "mock_content" - - def write_file(self, filepath: str, data: Union[bytes, str], mode: Optional[str] = None): - """Fake write_file implementation of LxdInstanceFileManager. - - Args: - filepath: The file path to read. - data: File contents to write - mode: Placeholder for file write mode. - """ - self.files[filepath] = data - - def read_file(self, filepath: str): - """Fake read_file implementation of LxdInstanceFileManager. - - Args: - filepath: The file path to read. - - Returns: - Contents of file. - """ - return self.files.get(str(filepath), None) - - -class MockLxdStoragePoolManager: - """Mock the behavior of LXD storage pools.""" - - def __init__(self): - """Initialize fake storage pools.""" - self.pools = {} - - def all(self): - """Get all non-deleted fake lxd storage pools. - - Returns: - List of all non deleted fake LXD storages. - """ - return [pool for pool in self.pools.values() if not pool.deleted] - - def get(self, name): - """Get a fake storage pool of given name. - - Args: - name: Name of the storage pool to get. - - Returns: - Fake storage pool of given name. - """ - return self.pools[name] - - def exists(self, name): - """Check if given storage exists in the fake LxdStoragePool. - - Args: - name: Fake storage pool name to check for existence. - - Returns: - If storage pool of given name exists. - """ - if name in self.pools: - return not self.pools[name].deleted - else: - return False - - def create(self, config): - """Fake LxdStoragePoolManager create function. - - Args: - config: The LXD storage pool config. - - Returns: - Created LXDStoragePool fake. - """ - self.pools[config["name"]] = MockLxdStoragePool() - return self.pools[config["name"]] - - -class MockLxdStoragePool: - """Mock the behavior of an LXD storage pool.""" - - def __init__(self): - """LXD storage pool fake initialization method.""" - self.deleted = False - - def save(self): - """LXD storage pool fake save method placeholder.""" - pass - - def delete(self): - """LXD storage pool fake delete method.""" - self.deleted = True - - -class MockLxdImageManager: - """Mock the behavior of LXD images.""" - - def __init__(self, images: set[str] | None = None): - """Fake init implementation for LxdImageManager. - - Args: - images: Set of images to initialize. - """ - self.images: set[str] = images or set() - - def create(self, name: str, _: Path) -> None: - """Import an LXD image into the fake set. - - Args: - name: Alias for the image. - _: Path of the LXD image file. - """ - self.images.add(name) - - def exists(self, name: str) -> bool: - """Check if an image with the given name exists. - - Args: - name: image name. - - Returns: - Whether the image exists. - """ - return name in self.images - - class MockErrorResponse: """Mock of an error response for request library.""" @@ -357,19 +43,6 @@ def json(self): return {"metadata": {"err": "test error"}} -def mock_lxd_error_func(*args, **kwargs): - """A stub function that always raises LxdError. - - Args: - args: Placeholder for positional arguments. - kwargs: Placeholder for key word arguments. - - Raises: - LxdError: always. - """ - raise LxdError(MockErrorResponse()) - - def mock_runner_error_func(*args, **kwargs): """A stub function that always raises RunnerError. diff --git a/tests/unit/mock_runner_managers.py b/tests/unit/mock_runner_managers.py index b52afa538..558b8159f 100644 --- a/tests/unit/mock_runner_managers.py +++ b/tests/unit/mock_runner_managers.py @@ -7,6 +7,7 @@ from typing import Iterable, Iterator, Sequence from unittest.mock import MagicMock +from github_runner_manager.github_client import GithubClient from github_runner_manager.manager.cloud_runner_manager import ( CloudRunnerInstance, CloudRunnerManager, @@ -18,7 +19,6 @@ from github_runner_manager.types_.github import GitHubRunnerStatus, SelfHostedRunner from charm_state import GitHubPath -from github_client import GithubClient from tests.unit.mock import MockGhapiClient diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index 7e70c6d59..7508f29be 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -7,15 +7,13 @@ import typing import unittest import urllib.error -from pathlib import Path -from unittest.mock import MagicMock, call, patch +from unittest.mock import MagicMock, patch import pytest import yaml from github_runner_manager.errors import ReconcileError from github_runner_manager.manager.runner_manager import FlushMode from github_runner_manager.manager.runner_scaler import RunnerScaler -from github_runner_manager.types_.github import GitHubOrg, GitHubRepo, GitHubRunnerStatus from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus, StatusBase, WaitingStatus from ops.testing import Harness @@ -27,35 +25,24 @@ catch_charm_errors, ) from charm_state import ( - GROUP_CONFIG_NAME, IMAGE_INTEGRATION_NAME, OPENSTACK_CLOUDS_YAML_CONFIG_NAME, PATH_CONFIG_NAME, - RECONCILE_INTERVAL_CONFIG_NAME, TOKEN_CONFIG_NAME, USE_APROXY_CONFIG_NAME, - VIRTUAL_MACHINES_CONFIG_NAME, - VM_CPU_CONFIG_NAME, - VM_DISK_CONFIG_NAME, Arch, - InstanceType, OpenStackCloudsYAML, OpenstackImage, - ProxyConfig, - VirtualMachineResources, ) from errors import ( ConfigurationError, LogrotateSetupError, MissingMongoDBError, - MissingRunnerBinaryError, RunnerError, SubprocessError, TokenError, ) from event_timer import EventTimer, TimerEnableError -from firewall import FirewallEntry -from runner_manager import LXDRunnerManagerConfig, RunnerInfo TEST_PROXY_SERVER_URL = "http://proxy.server:1234" @@ -126,56 +113,47 @@ def mock_download_latest_runner_image(*args): return "www.example.com" -def mock_get_github_info(): - """A stub function that returns mock Github runner information. - - Returns: - RunnerInfo with different name, statuses, busy values. - """ - return [ - RunnerInfo("test runner 0", GitHubRunnerStatus.ONLINE.value, True), - RunnerInfo("test runner 1", GitHubRunnerStatus.ONLINE.value, False), - RunnerInfo("test runner 2", GitHubRunnerStatus.OFFLINE.value, False), - RunnerInfo("test runner 3", GitHubRunnerStatus.OFFLINE.value, False), - RunnerInfo("test runner 4", "unknown", False), - ] - - -def setup_charm_harness(monkeypatch: pytest.MonkeyPatch, runner_bin_path: Path) -> Harness: +def setup_charm_harness(monkeypatch: pytest.MonkeyPatch) -> Harness: """Setup harness with patched runner manager methods. Args: monkeypatch: Instance of pytest monkeypatch for patching RunnerManager methods. - runner_bin_path: Runner binary temporary path fixture. Returns: Harness with patched RunnerManager instance. """ - - def stub_update_runner_bin(*args, **kwargs) -> None: - """Update runner bin stub function. - - Args: - args: Placeholder for positional argument values. - kwargs: Placeholder for keyword argument values. - """ - runner_bin_path.touch() - harness = Harness(GithubRunnerCharm) - harness.update_config({PATH_CONFIG_NAME: "mock/repo", TOKEN_CONFIG_NAME: "mocktoken"}) - harness.begin() - monkeypatch.setattr( - "runner_manager.LXDRunnerManager.update_runner_bin", stub_update_runner_bin + cloud_yaml = { + "clouds": { + "microstack": { + "auth": { + "auth_url": secrets.token_hex(16), + "project_name": secrets.token_hex(16), + "project_domain_name": secrets.token_hex(16), + "username": secrets.token_hex(16), + "user_domain_name": secrets.token_hex(16), + "password": secrets.token_hex(16), + }, + "region_name": secrets.token_hex(16), + } + } + } + harness.update_config( + { + PATH_CONFIG_NAME: "mock/repo", + TOKEN_CONFIG_NAME: "mocktoken", + OPENSTACK_CLOUDS_YAML_CONFIG_NAME: yaml.safe_dump(cloud_yaml), + } ) - monkeypatch.setattr("runner_manager.LXDRunnerManager._runners_in_pre_job", lambda self: False) + harness.begin() monkeypatch.setattr("charm.EventTimer.ensure_event_timer", MagicMock()) monkeypatch.setattr("charm.logrotate.setup", MagicMock()) return harness @pytest.fixture(name="harness") -def harness_fixture(monkeypatch, runner_binary_path: Path) -> Harness: - return setup_charm_harness(monkeypatch, runner_binary_path) +def harness_fixture(monkeypatch) -> Harness: + return setup_charm_harness(monkeypatch) @patch.dict( @@ -213,54 +191,16 @@ def test_common_install_code( act: Fire install/upgrade event. assert: Common install commands are run on the mock. """ - monkeypatch.setattr("charm.logrotate.setup", setup_logrotate := MagicMock()) + state_mock = MagicMock() + harness.charm._setup_state = MagicMock(return_value=state_mock) - monkeypatch.setattr( - "runner_manager.LXDRunnerManager.schedule_build_runner_image", - schedule_build_runner_image := MagicMock(), - ) + monkeypatch.setattr("charm.logrotate.setup", setup_logrotate := MagicMock()) event_timer_mock = MagicMock(spec=EventTimer) harness.charm._event_timer = event_timer_mock getattr(harness.charm.on, hook).emit() - calls = [ - call(["/usr/bin/snap", "install", "lxd", "--channel=latest/stable"]), - call(["/snap/bin/lxd", "init", "--auto"]), - call(["/usr/bin/systemctl", "enable", "repo-policy-compliance"]), - ] - exec_command.assert_has_calls(calls, any_order=True) setup_logrotate.assert_called_once() - schedule_build_runner_image.assert_called_once() - event_timer_mock.ensure_event_timer.assert_called_once() - - -@pytest.mark.parametrize( - "hook", - [ - pytest.param("install", id="Install"), - pytest.param("upgrade_charm", id="Upgrade"), - ], -) -def test_common_install_code_does_not_rebuild_image( - hook: str, harness: Harness, monkeypatch: pytest.MonkeyPatch -): - """ - arrange: Set up charm and runner manager to not have runner image. - act: Fire upgrade event. - assert: Image is not rebuilt. - """ - monkeypatch.setattr( - "runner_manager.LXDRunnerManager.build_runner_image", - build_runner_image := MagicMock(), - ) - monkeypatch.setattr( - "runner_manager.LXDRunnerManager.has_runner_image", - MagicMock(return_value=True), - ) - getattr(harness.charm.on, hook).emit() - - assert not build_runner_image.called def test_on_config_changed_failure(harness: Harness): @@ -275,36 +215,6 @@ def test_on_config_changed_failure(harness: Harness): assert "Invalid proxy configuration" in harness.charm.unit.status.message -def test_get_runner_manager(harness: Harness): - """ - arrange: Set up charm. - act: Get runner manager. - assert: Runner manager is returned with the correct config. - """ - state = harness.charm._setup_state() - runner_manager = harness.charm._get_runner_manager(state) - assert runner_manager is not None - assert runner_manager.config.token == "mocktoken" - assert runner_manager.proxies == ProxyConfig( - http=None, https=None, no_proxy=None, use_aproxy=False - ) - - -def test_on_flush_runners_action_fail(harness: Harness, runner_binary_path: Path): - """ - arrange: Set up charm without runner binary downloaded. - act: Run flush runner action. - assert: Action fail with missing runner binary. - """ - runner_binary_path.unlink(missing_ok=True) - mock_event = MagicMock() - harness.charm._on_flush_runners_action(mock_event) - mock_event.fail.assert_called_with( - "GitHub runner application not downloaded; the charm will retry download on reconcile " - "interval" - ) - - def test_on_flush_runners_reconcile_error_fail(harness: Harness): """ arrange: Set up charm with Openstack mode and ReconcileError. @@ -312,7 +222,6 @@ def test_on_flush_runners_reconcile_error_fail(harness: Harness): assert: Action fails with generic message and goes in ActiveStatus. """ state_mock = MagicMock() - state_mock.instance_type = InstanceType.OPENSTACK harness.charm._setup_state = MagicMock(return_value=state_mock) runner_scaler_mock = MagicMock(spec=RunnerScaler) @@ -326,18 +235,6 @@ def test_on_flush_runners_reconcile_error_fail(harness: Harness): assert harness.charm.unit.status.message == ACTIVE_STATUS_RECONCILIATION_FAILED_MSG -def test_on_flush_runners_action_success(harness: Harness, runner_binary_path: Path): - """ - arrange: Set up charm without runner binary downloaded. - act: Run flush runner action. - assert: Action fail with missing runner binary. - """ - mock_event = MagicMock() - runner_binary_path.touch() - harness.charm._on_flush_runners_action(mock_event) - mock_event.set_results.assert_called() - - def test_on_reconcile_runners_action_reconcile_error_fail( harness: Harness, monkeypatch: pytest.MonkeyPatch ): @@ -347,7 +244,6 @@ def test_on_reconcile_runners_action_reconcile_error_fail( assert: Action fails with generic message and goes in ActiveStatus """ state_mock = MagicMock() - state_mock.instance_type = InstanceType.OPENSTACK harness.charm._setup_state = MagicMock(return_value=state_mock) runner_scaler_mock = MagicMock(spec=RunnerScaler) @@ -374,7 +270,6 @@ def test_on_reconcile_runners_reconcile_error(harness: Harness, monkeypatch: pyt assert: Unit goes into ActiveStatus with error message. """ state_mock = MagicMock() - state_mock.instance_type = InstanceType.OPENSTACK harness.charm._setup_state = MagicMock(return_value=state_mock) runner_scaler_mock = MagicMock(spec=RunnerScaler) @@ -400,7 +295,6 @@ def test_on_stop_busy_flush(harness: Harness, monkeypatch: pytest.MonkeyPatch): assert: Runner scaler mock flushes the runners using busy mode. """ state_mock = MagicMock() - state_mock.instance_type = InstanceType.OPENSTACK harness.charm._setup_state = MagicMock(return_value=state_mock) runner_scaler_mock = MagicMock(spec=RunnerScaler) harness.charm._get_runner_scaler = MagicMock(return_value=runner_scaler_mock) @@ -426,6 +320,8 @@ def test_on_install_failure(hook: str, harness: Harness, monkeypatch: pytest.Mon 2. Mock _install_deps raises error. assert: Charm goes into error state in both cases. """ + state_mock = MagicMock() + harness.charm._setup_state = MagicMock(return_value=state_mock) monkeypatch.setattr("charm.logrotate.setup", setup_logrotate := unittest.mock.MagicMock()) setup_logrotate.side_effect = LogrotateSetupError("Failed to setup logrotate") @@ -440,61 +336,6 @@ def test_on_install_failure(hook: str, harness: Harness, monkeypatch: pytest.Mon assert "mock stderr" in str(exc.value) -def test__refresh_firewall(monkeypatch, harness: Harness, runner_binary_path: Path): - """ - arrange: given multiple tmate-ssh-server units in relation. - act: when refresh_firewall is called. - assert: the unit ip addresses are included in allowlist. - """ - runner_binary_path.touch() - - relation_id = harness.add_relation("debug-ssh", "tmate-ssh-server") - harness.add_relation_unit(relation_id, "tmate-ssh-server/0") - harness.add_relation_unit(relation_id, "tmate-ssh-server/1") - harness.add_relation_unit(relation_id, "tmate-ssh-server/2") - test_unit_ip_addresses = ["127.0.0.1", "127.0.0.2", "127.0.0.3"] - - harness.update_relation_data( - relation_id, - "tmate-ssh-server/0", - { - "host": test_unit_ip_addresses[0], - "port": "10022", - "rsa_fingerprint": "SHA256:abcd", - "ed25519_fingerprint": "abcd", - }, - ) - harness.update_relation_data( - relation_id, - "tmate-ssh-server/1", - { - "host": test_unit_ip_addresses[1], - "port": "10022", - "rsa_fingerprint": "SHA256:abcd", - "ed25519_fingerprint": "abcd", - }, - ) - harness.update_relation_data( - relation_id, - "tmate-ssh-server/2", - { - "host": test_unit_ip_addresses[2], - "port": "10022", - "rsa_fingerprint": "SHA256:abcd", - "ed25519_fingerprint": "abcd", - }, - ) - - monkeypatch.setattr("charm.Firewall", mock_firewall := unittest.mock.MagicMock()) - state = harness.charm._setup_state() - harness.charm._refresh_firewall(state) - mocked_firewall_instance = mock_firewall.return_value - allowlist = mocked_firewall_instance.refresh_firewall.call_args_list[0][1]["allowlist"] - assert all( - FirewallEntry(ip) in allowlist for ip in test_unit_ip_addresses - ), "Expected IP firewall entry not found in allowlist arg." - - def test_charm_goes_into_waiting_state_on_missing_integration_data( monkeypatch: pytest.MonkeyPatch, harness: Harness ): @@ -541,156 +382,10 @@ def test_database_integration_events_trigger_reconciliation( class TestCharm(unittest.TestCase): """Test the GithubRunner charm.""" - @patch("charm.LXDRunnerManager") - @patch("pathlib.Path.mkdir") - @patch("pathlib.Path.write_text") - @patch("subprocess.run") - def test_org_register(self, run, wt, mkdir, rm): - harness = Harness(GithubRunnerCharm) - harness.update_config( - { - PATH_CONFIG_NAME: "mockorg", - TOKEN_CONFIG_NAME: "mocktoken", - GROUP_CONFIG_NAME: "mockgroup", - RECONCILE_INTERVAL_CONFIG_NAME: 5, - } - ) - harness.begin() - harness.charm.on.config_changed.emit() - token = harness.charm.service_token - state = harness.charm._setup_state() - rm.assert_called_with( - "github-runner", - "0", - LXDRunnerManagerConfig( - path=GitHubOrg(org="mockorg", group="mockgroup"), - token="mocktoken", - image="jammy", - service_token=token, - lxd_storage_path=GithubRunnerCharm.juju_storage_path, - charm_state=state, - ), - ) - - @patch("charm.LXDRunnerManager") - @patch("pathlib.Path.mkdir") - @patch("pathlib.Path.write_text") - @patch("subprocess.run") - def test_repo_register(self, run, wt, mkdir, rm): - harness = Harness(GithubRunnerCharm) - harness.update_config( - { - PATH_CONFIG_NAME: "mockorg/repo", - TOKEN_CONFIG_NAME: "mocktoken", - RECONCILE_INTERVAL_CONFIG_NAME: 5, - } - ) - harness.begin() - harness.charm.on.config_changed.emit() - token = harness.charm.service_token - state = harness.charm._setup_state() - rm.assert_called_with( - "github-runner", - "0", - LXDRunnerManagerConfig( - path=GitHubRepo(owner="mockorg", repo="repo"), - token="mocktoken", - image="jammy", - service_token=token, - lxd_storage_path=GithubRunnerCharm.juju_storage_path, - charm_state=state, - ), - ) - - @patch("charm.LXDRunnerManager") - @patch("pathlib.Path.mkdir") - @patch("pathlib.Path.write_text") - @patch("subprocess.run") - def test_exceed_free_disk_size(self, run, wt, mkdir, rm): - """ - arrange: Charm with 30GiB of storage for runner. - act: Configuration that uses 100GiB of disk. - assert: Charm enters block state. - """ - rm.return_value = mock_rm = MagicMock() - mock_rm.get_latest_runner_bin_url = mock_get_latest_runner_bin_url - mock_rm.download_latest_runner_image = mock_download_latest_runner_image - - harness = Harness(GithubRunnerCharm) - harness.update_config({PATH_CONFIG_NAME: "mockorg/repo", TOKEN_CONFIG_NAME: "mocktoken"}) - harness.begin() - - harness.update_config({VIRTUAL_MACHINES_CONFIG_NAME: 10}) - harness.charm.on.reconcile_runners.emit() - assert harness.charm.unit.status == BlockedStatus( - ( - "Required disk space for runners 102400.0MiB is greater than storage total size " - "30720.0MiB" - ) - ) - - @patch("charm.LXDRunnerManager") - @patch("pathlib.Path.mkdir") - @patch("pathlib.Path.write_text") - @patch("subprocess.run") - def test_update_config(self, run, wt, mkdir, rm): - rm.return_value = mock_rm = MagicMock() - mock_rm.get_latest_runner_bin_url = mock_get_latest_runner_bin_url - mock_rm.download_latest_runner_image = mock_download_latest_runner_image - - harness = Harness(GithubRunnerCharm) - harness.update_config({PATH_CONFIG_NAME: "mockorg/repo", TOKEN_CONFIG_NAME: "mocktoken"}) - harness.begin() - - # update to 0 virtual machines - harness.update_config({VIRTUAL_MACHINES_CONFIG_NAME: 0}) - harness.charm.on.reconcile_runners.emit() - token = harness.charm.service_token - state = harness.charm._setup_state() - rm.assert_called_with( - "github-runner", - "0", - LXDRunnerManagerConfig( - path=GitHubRepo(owner="mockorg", repo="repo"), - token="mocktoken", - image="jammy", - service_token=token, - lxd_storage_path=GithubRunnerCharm.juju_storage_path, - charm_state=state, - ), - ) - mock_rm.reconcile.assert_called_with(0, VirtualMachineResources(2, "7GiB", "10GiB")), - mock_rm.reset_mock() - - # update to 10 VMs with 4 cpu and 7GiB memory - harness.update_config( - {VIRTUAL_MACHINES_CONFIG_NAME: 5, VM_CPU_CONFIG_NAME: 4, VM_DISK_CONFIG_NAME: "6GiB"} - ) - harness.charm.on.reconcile_runners.emit() - token = harness.charm.service_token - state = harness.charm._setup_state() - rm.assert_called_with( - "github-runner", - "0", - LXDRunnerManagerConfig( - path=GitHubRepo(owner="mockorg", repo="repo"), - token="mocktoken", - image="jammy", - service_token=token, - lxd_storage_path=GithubRunnerCharm.juju_storage_path, - charm_state=state, - ), - ) - mock_rm.reconcile.assert_called_with( - 5, VirtualMachineResources(cpu=4, memory="7GiB", disk="6GiB") - ) - mock_rm.reset_mock() - - @patch("charm.LXDRunnerManager") @patch("pathlib.Path.mkdir") @patch("pathlib.Path.write_text") @patch("subprocess.run") - def test_on_update_status(self, run, wt, mkdir, rm): + def test_on_update_status(self, run, wt, mkdir): """ arrange: reconciliation event timer mocked to be \ 1. active. \ @@ -702,10 +397,6 @@ def test_on_update_status(self, run, wt, mkdir, rm): 2. ensure_event_timer is called. 3. Charm throws error. """ - rm.return_value = mock_rm = MagicMock() - mock_rm.get_latest_runner_bin_url = mock_get_latest_runner_bin_url - mock_rm.download_latest_runner_image = mock_download_latest_runner_image - harness = Harness(GithubRunnerCharm) harness.update_config({PATH_CONFIG_NAME: "mockorg/repo", TOKEN_CONFIG_NAME: "mocktoken"}) @@ -731,41 +422,11 @@ def test_on_update_status(self, run, wt, mkdir, rm): with pytest.raises(TimerEnableError): harness.charm.on.update_status.emit() - @patch("charm.LXDRunnerManager") - @patch("pathlib.Path.mkdir") - @patch("pathlib.Path.write_text") - @patch("subprocess.run") - def test_on_stop(self, run, wt, mkdir, rm): - rm.return_value = mock_rm = MagicMock() - harness = Harness(GithubRunnerCharm) - harness.update_config({PATH_CONFIG_NAME: "mockorg/repo", TOKEN_CONFIG_NAME: "mocktoken"}) - harness.begin() - harness.charm.on.stop.emit() - mock_rm.flush.assert_called() - - @patch("charm.LXDRunnerManager") - @patch("pathlib.Path.mkdir") - @patch("pathlib.Path.write_text") - @patch("subprocess.run") - def test_on_start_failure(self, run, wt, mkdir, rm): - """Test various error thrown during install.""" - rm.return_value = mock_rm = MagicMock() - mock_rm.get_latest_runner_bin_url = mock_get_latest_runner_bin_url - - harness = Harness(GithubRunnerCharm) - harness.update_config({PATH_CONFIG_NAME: "mockorg/repo", TOKEN_CONFIG_NAME: "mocktoken"}) - harness.begin() - - harness.charm._reconcile_lxd_runners = raise_runner_error - harness.charm.on.start.emit() - assert harness.charm.unit.status == ActiveStatus("Failed to start runners: mock error") - - @patch("charm.LXDRunnerManager") @patch("charm.RunnerScaler") @patch("pathlib.Path.mkdir") @patch("pathlib.Path.write_text") @patch("subprocess.run") - def test_on_config_changed_openstack_clouds_yaml(self, run, wt, mkdir, orm, rm): + def test_on_config_changed_openstack_clouds_yaml(self, run, wt, mkdir, orm): """ arrange: Setup mocked charm. act: Fire config changed event to use openstack-clouds-yaml. @@ -801,30 +462,10 @@ def test_on_config_changed_openstack_clouds_yaml(self, run, wt, mkdir, orm, rm): assert harness.charm.unit.status == BlockedStatus("Please provide image integration.") - @patch("charm.LXDRunnerManager") - @patch("pathlib.Path.mkdir") - @patch("pathlib.Path.write_text") - @patch("subprocess.run") - def test_check_runners_action(self, run, wt, mkdir, rm): - rm.return_value = mock_rm = MagicMock() - mock_event = MagicMock() - - mock_rm.get_github_info = mock_get_github_info - - harness = Harness(GithubRunnerCharm) - harness.update_config({PATH_CONFIG_NAME: "mockorg/repo", TOKEN_CONFIG_NAME: "mocktoken"}) - harness.begin() - - harness.charm._on_check_runners_action(mock_event) - mock_event.set_results.assert_called_with( - {"online": 2, "offline": 2, "unknown": 1, "runners": "test runner 0, test runner 1"} - ) - - @patch("charm.LXDRunnerManager") @patch("pathlib.Path.mkdir") @patch("pathlib.Path.write_text") @patch("subprocess.run") - def test_check_runners_action_with_errors(self, run, wt, mkdir, rm): + def test_check_runners_action_with_errors(self, run, wt, mkdir): mock_event = MagicMock() harness = Harness(GithubRunnerCharm) @@ -834,32 +475,12 @@ def test_check_runners_action_with_errors(self, run, wt, mkdir, rm): harness.charm._on_check_runners_action(mock_event) mock_event.fail.assert_called_with("Invalid Github config, Missing path configuration") - @patch("charm.LXDRunnerManager") - @patch("pathlib.Path.mkdir") - @patch("pathlib.Path.write_text") - @patch("subprocess.run") - def test_on_flush_runners_action(self, run, wt, mkdir, rm): - mock_event = MagicMock() - - harness = Harness(GithubRunnerCharm) - harness.begin() - - harness.charm._on_flush_runners_action(mock_event) - mock_event.fail.assert_called_with("Invalid Github config, Missing path configuration") - mock_event.reset_mock() - - harness.update_config({PATH_CONFIG_NAME: "mockorg/repo", TOKEN_CONFIG_NAME: "mocktoken"}) - harness.charm._on_flush_runners_action(mock_event) - mock_event.set_results.assert_called() - mock_event.reset_mock() - @pytest.mark.parametrize( "exception, expected_status", [ pytest.param(ConfigurationError, BlockedStatus, id="charm config error"), pytest.param(TokenError, BlockedStatus, id="github token error"), - pytest.param(MissingRunnerBinaryError, MaintenanceStatus, id="runner binary error"), ], ) def test_catch_charm_errors( @@ -901,7 +522,6 @@ def test_event_handler(self, _: typing.Any): "exception, expected_status", [ pytest.param(ConfigurationError, BlockedStatus, id="charm config error"), - pytest.param(MissingRunnerBinaryError, MaintenanceStatus, id="runner binary error"), ], ) def test_catch_action_errors( @@ -976,32 +596,6 @@ def test_openstack_image_ready_status( assert is_ready == expected_value -@pytest.mark.parametrize( - "hook", - [ - pytest.param("_on_image_relation_changed", id="image relation changed"), - pytest.param("_on_image_relation_joined", id="image relation joined"), - ], -) -def test__on_image_relation_hooks_not_openstack(hook: str): - """ - arrange: given a hook that is for OpenStack mode but the image relation exists. - act: when the hook is triggered. - assert: the charm falls into BlockedStatus. - """ - harness = Harness(GithubRunnerCharm) - harness.begin() - state_mock = MagicMock() - state_mock.instance_type = InstanceType.LOCAL_LXD - harness.charm._setup_state = MagicMock(return_value=state_mock) - - getattr(harness.charm, hook)(MagicMock()) - - assert harness.charm.unit.status == BlockedStatus( - "Openstack mode not enabled. Please remove the image integration." - ) - - def test__on_image_relation_image_not_ready(): """ arrange: given a charm with OpenStack instance type and a monkeypatched \ @@ -1012,7 +606,6 @@ def test__on_image_relation_image_not_ready(): harness = Harness(GithubRunnerCharm) harness.begin() state_mock = MagicMock() - state_mock.instance_type = InstanceType.OPENSTACK harness.charm._setup_state = MagicMock(return_value=state_mock) harness.charm._get_set_image_ready_status = MagicMock(return_value=False) @@ -1032,7 +625,6 @@ def test__on_image_relation_image_ready(): harness = Harness(GithubRunnerCharm) harness.begin() state_mock = MagicMock() - state_mock.instance_type = InstanceType.OPENSTACK harness.charm._setup_state = MagicMock(return_value=state_mock) harness.charm._get_set_image_ready_status = MagicMock(return_value=True) runner_manager_mock = MagicMock() @@ -1056,7 +648,6 @@ def test__on_image_relation_joined(): harness.add_relation_unit(relation_id, "image-builder/0") harness.begin() state_mock = MagicMock() - state_mock.instance_type = InstanceType.OPENSTACK state_mock.charm_config.openstack_clouds_yaml = OpenStackCloudsYAML( clouds={ "test-cloud": { diff --git a/tests/unit/test_charm_state.py b/tests/unit/test_charm_state.py index f3d3564f7..f29c47a3a 100644 --- a/tests/unit/test_charm_state.py +++ b/tests/unit/test_charm_state.py @@ -4,8 +4,6 @@ import logging import platform import secrets -import typing -from pathlib import Path from unittest.mock import MagicMock import pytest @@ -18,9 +16,7 @@ import charm_state from charm_state import ( - BASE_IMAGE_CONFIG_NAME, DEBUG_SSH_INTEGRATION_NAME, - DENYLIST_CONFIG_NAME, DOCKERHUB_MIRROR_CONFIG_NAME, GROUP_CONFIG_NAME, IMAGE_INTEGRATION_NAME, @@ -28,30 +24,18 @@ OPENSTACK_CLOUDS_YAML_CONFIG_NAME, PATH_CONFIG_NAME, RECONCILE_INTERVAL_CONFIG_NAME, - RUNNER_STORAGE_CONFIG_NAME, TOKEN_CONFIG_NAME, USE_APROXY_CONFIG_NAME, - VIRTUAL_MACHINES_CONFIG_NAME, - VM_CPU_CONFIG_NAME, - VM_DISK_CONFIG_NAME, - VM_MEMORY_CONFIG_NAME, Arch, - BaseImage, CharmConfig, CharmConfigInvalidError, CharmState, - FirewallEntry, GithubConfig, - ImmutableConfigChangedError, - LocalLxdRunnerConfig, OpenstackImage, OpenstackRunnerConfig, ProxyConfig, - ReactiveConfig, - RunnerStorage, SSHDebugConnection, UnsupportedArchitectureError, - VirtualMachineResources, ) from errors import MissingMongoDBError from tests.unit.factories import MockGithubRunnerCharmFactory @@ -220,35 +204,6 @@ def test_parse_labels(labels, expected_valid_labels): assert result == expected_valid_labels -@pytest.mark.parametrize( - "denylist_config, expected_entries", - [ - ("", []), - ("192.168.1.1", [FirewallEntry(ip_range="192.168.1.1")]), - ( - "192.168.1.1, 192.168.1.2, 192.168.1.3", - [ - FirewallEntry(ip_range="192.168.1.1"), - FirewallEntry(ip_range="192.168.1.2"), - FirewallEntry(ip_range="192.168.1.3"), - ], - ), - ], -) -def test_parse_denylist(denylist_config: str, expected_entries: typing.List[FirewallEntry]): - """ - arrange: Create a mock CharmBase instance with provided denylist configuration. - act: Call _parse_denylist method with the mock CharmBase instance. - assert: Verify that the method returns the expected list of FirewallEntry objects. - """ - mock_charm = MockGithubRunnerCharmFactory() - mock_charm.config[DENYLIST_CONFIG_NAME] = denylist_config - - result = CharmConfig._parse_denylist(mock_charm) - - assert result == expected_entries - - def test_parse_dockerhub_mirror_invalid_scheme(): """ arrange: Create a mock CharmBase instance with an invalid DockerHub mirror configuration. @@ -321,14 +276,13 @@ def test_parse_openstack_clouds_config_empty(): """ arrange: Create a mock CharmBase instance with an empty OpenStack clouds YAML config. act: Call _parse_openstack_clouds_config method with the mock CharmBase instance. - assert: Verify that the method returns None. + assert: Verify that the method raises CharmConfigInvalidError """ mock_charm = MockGithubRunnerCharmFactory() mock_charm.config[OPENSTACK_CLOUDS_YAML_CONFIG_NAME] = "" - result = CharmConfig._parse_openstack_clouds_config(mock_charm) - - assert result is None + with pytest.raises(CharmConfigInvalidError): + CharmConfig._parse_openstack_clouds_config(mock_charm) def test_parse_openstack_clouds_config_invalid_yaml(invalid_yaml_config: str): @@ -452,7 +406,6 @@ def test_charm_config_from_charm_valid(): mock_charm.config = { PATH_CONFIG_NAME: "owner/repo", RECONCILE_INTERVAL_CONFIG_NAME: "5", - DENYLIST_CONFIG_NAME: "192.168.1.1,192.168.1.2", DOCKERHUB_MIRROR_CONFIG_NAME: "https://example.com", # "clouds: { openstack: { auth: { username: 'admin' }}}" OPENSTACK_CLOUDS_YAML_CONFIG_NAME: yaml.safe_dump( @@ -482,70 +435,12 @@ def test_charm_config_from_charm_valid(): assert result.path == GitHubRepo(owner="owner", repo="repo") assert result.reconcile_interval == 5 - assert result.denylist == [ - FirewallEntry(ip_range="192.168.1.1"), - FirewallEntry(ip_range="192.168.1.2"), - ] assert result.dockerhub_mirror == "https://example.com" assert result.openstack_clouds_yaml == test_openstack_config assert result.labels == ("label1", "label2", "label3") assert result.token == "abc123" -@pytest.mark.parametrize( - "base_image, expected_str", - [ - (BaseImage.JAMMY, "jammy"), - (BaseImage.NOBLE, "noble"), - ], -) -def test_base_image_str_parametrized(base_image, expected_str): - """ - Parametrized test case for __str__ method of BaseImage enum. - - arrange: Pass BaseImage enum values and expected string. - act: Call __str__ method on each enum value. - assert: Ensure the returned string matches the expected string. - """ - assert str(base_image) == expected_str - - -def test_base_image_from_charm_invalid_image(): - """ - arrange: Create a mock CharmBase instance with an invalid base image configuration. - act: Call from_charm method with the mock CharmBase instance. - assert: Verify that the method raises an error. - """ - mock_charm = MockGithubRunnerCharmFactory() - mock_charm.config[BASE_IMAGE_CONFIG_NAME] = "invalid" - - with pytest.raises(ValueError): - BaseImage.from_charm(mock_charm) - - -@pytest.mark.parametrize( - "image_name, expected_result", - [ - ("noble", BaseImage.NOBLE), # Valid custom configuration "noble" - ("24.04", BaseImage.NOBLE), # Valid custom configuration "noble" - ("jammy", BaseImage.JAMMY), # Valid custom configuration "jammy" - ("22.04", BaseImage.JAMMY), # Valid custom configuration "jammy" - ], -) -def test_base_image_from_charm(image_name: str, expected_result: BaseImage): - """ - arrange: Create a mock CharmBase instance with the provided image_name configuration. - act: Call from_charm method with the mock CharmBase instance. - assert: Verify that the method returns the expected base image tag. - """ - mock_charm = MockGithubRunnerCharmFactory() - mock_charm.config[BASE_IMAGE_CONFIG_NAME] = image_name - - result = BaseImage.from_charm(mock_charm) - - assert result == expected_result - - def test_openstack_image_from_charm_no_connections(): """ arrange: Mock CharmBase instance without relation. @@ -607,181 +502,6 @@ def test_openstack_image_from_charm(): assert image.tags == test_tags -@pytest.mark.parametrize("virtual_machines", [(-1), (-5)]) # Invalid value # Invalid value -def test_check_virtual_machines_invalid(virtual_machines): - """ - arrange: Provide an invalid virtual machines value. - act: Call check_virtual_machines method with the provided value. - assert: Verify that the method raises ValueError with the correct message. - """ - with pytest.raises(ValueError) as exc_info: - LocalLxdRunnerConfig.check_virtual_machines(virtual_machines) - assert ( - str(exc_info.value) - == "The virtual-machines configuration needs to be greater or equal to 0" - ) - - -@pytest.mark.parametrize( - "virtual_machines", [(0), (5), (10)] # Minimum valid value # Valid value # Valid value -) -def test_check_virtual_machines_valid(virtual_machines): - """ - arrange: Provide a valid virtual machines value. - act: Call check_virtual_machines method with the provided value. - assert: Verify that the method returns the same value. - """ - result = LocalLxdRunnerConfig.check_virtual_machines(virtual_machines) - - assert result == virtual_machines - - -@pytest.mark.parametrize( - "vm_resources", - [ - VirtualMachineResources(cpu=0, memory="1GiB", disk="10GiB"), # Invalid CPU value - VirtualMachineResources(cpu=1, memory="invalid", disk="10GiB"), # Invalid memory value - VirtualMachineResources(cpu=1, memory="1GiB", disk="invalid"), # Invalid disk value - ], -) -def test_check_virtual_machine_resources_invalid(vm_resources): - """ - arrange: Provide an invalid virtual_machine_resources value. - act: Call check_virtual_machine_resources method with the provided value. - assert: Verify that the method raises ValueError. - """ - with pytest.raises(ValueError): - LocalLxdRunnerConfig.check_virtual_machine_resources(vm_resources) - - -@pytest.mark.parametrize( - "vm_resources, expected_result", - [ - ( - VirtualMachineResources(cpu=1, memory="1GiB", disk="10GiB"), - VirtualMachineResources(cpu=1, memory="1GiB", disk="10GiB"), - ), # Valid configuration - ( - VirtualMachineResources(cpu=2, memory="2GiB", disk="20GiB"), - VirtualMachineResources(cpu=2, memory="2GiB", disk="20GiB"), - ), # Valid configuration - ], -) -def test_check_virtual_machine_resources_valid(vm_resources, expected_result): - """ - arrange: Provide a valid virtual_machine_resources value. - act: Call check_virtual_machine_resources method with the provided value. - assert: Verify that the method returns the same value. - """ - result = LocalLxdRunnerConfig.check_virtual_machine_resources(vm_resources) - - assert result == expected_result - - -def test_runner_charm_config_from_charm_invalid_base_image(): - """ - arrange: Create a mock CharmBase instance with an invalid base image configuration. - act: Call from_charm method with the mock CharmBase instance. - assert: Verify that the method raises CharmConfigInvalidError with the correct message. - """ - mock_charm = MockGithubRunnerCharmFactory() - mock_charm.config[BASE_IMAGE_CONFIG_NAME] = "invalid" - - with pytest.raises(CharmConfigInvalidError) as exc_info: - LocalLxdRunnerConfig.from_charm(mock_charm) - assert str(exc_info.value) == "Invalid base image" - - -def test_runner_charm_config_from_charm_invalid_storage_config(): - """ - arrange: Create a mock CharmBase instance with an invalid storage configuration. - act: Call from_charm method with the mock CharmBase instance. - assert: Verify that the method raises CharmConfigInvalidError with the correct message. - """ - mock_charm = MockGithubRunnerCharmFactory() - mock_charm.config = { - BASE_IMAGE_CONFIG_NAME: "jammy", - RUNNER_STORAGE_CONFIG_NAME: "invalid", - VIRTUAL_MACHINES_CONFIG_NAME: "5", - VM_CPU_CONFIG_NAME: "2", - VM_MEMORY_CONFIG_NAME: "4GiB", - VM_DISK_CONFIG_NAME: "20GiB", - } - - with pytest.raises(CharmConfigInvalidError) as exc_info: - LocalLxdRunnerConfig.from_charm(mock_charm) - assert "Invalid runner-storage config" in str(exc_info.value) - - -def test_runner_charm_config_from_charm_invalid_cpu_config(): - """ - arrange: Create a mock CharmBase instance with an invalid cpu configuration. - act: Call from_charm method with the mock CharmBase instance. - assert: Verify that the method raises CharmConfigInvalidError with the correct message. - """ - mock_charm = MockGithubRunnerCharmFactory() - mock_charm.config = { - BASE_IMAGE_CONFIG_NAME: "jammy", - RUNNER_STORAGE_CONFIG_NAME: "memory", - VIRTUAL_MACHINES_CONFIG_NAME: "5", - VM_CPU_CONFIG_NAME: "invalid", - VM_MEMORY_CONFIG_NAME: "4GiB", - VM_DISK_CONFIG_NAME: "20GiB", - } - - with pytest.raises(CharmConfigInvalidError) as exc_info: - LocalLxdRunnerConfig.from_charm(mock_charm) - assert str(exc_info.value) == "Invalid vm-cpu configuration" - - -def test_runner_charm_config_from_charm_invalid_virtual_machines_config(): - """ - arrange: Create a mock CharmBase instance with an invalid virtual machines configuration. - act: Call from_charm method with the mock CharmBase instance. - assert: Verify that the method raises CharmConfigInvalidError with the correct message. - """ - mock_charm = MockGithubRunnerCharmFactory() - mock_charm.config = { - BASE_IMAGE_CONFIG_NAME: "jammy", - RUNNER_STORAGE_CONFIG_NAME: "memory", - VIRTUAL_MACHINES_CONFIG_NAME: "invalid", - VM_CPU_CONFIG_NAME: "2", - VM_MEMORY_CONFIG_NAME: "4GiB", - VM_DISK_CONFIG_NAME: "20GiB", - } - - with pytest.raises(CharmConfigInvalidError) as exc_info: - LocalLxdRunnerConfig.from_charm(mock_charm) - assert str(exc_info.value) == "The virtual-machines configuration must be int" - - -def test_runner_charm_config_from_charm_valid(): - """ - arrange: Create a mock CharmBase instance with valid configuration. - act: Call from_charm method with the mock CharmBase instance. - assert: Verify that the method returns a LocalLxdRunnerConfig instance with the expected - values. - """ - mock_charm = MockGithubRunnerCharmFactory() - mock_charm.config = { - BASE_IMAGE_CONFIG_NAME: "jammy", - RUNNER_STORAGE_CONFIG_NAME: "memory", - VIRTUAL_MACHINES_CONFIG_NAME: "5", - VM_CPU_CONFIG_NAME: "2", - VM_MEMORY_CONFIG_NAME: "4GiB", - VM_DISK_CONFIG_NAME: "20GiB", - } - - result = LocalLxdRunnerConfig.from_charm(mock_charm) - - assert result.base_image == BaseImage.JAMMY - assert result.runner_storage == RunnerStorage("memory") - assert result.virtual_machines == 5 - assert result.virtual_machine_resources == VirtualMachineResources( - cpu=2, memory="4GiB", disk="20GiB" - ) - - @pytest.mark.parametrize( "http, https, use_aproxy, expected_address", [ @@ -1059,130 +779,17 @@ def mock_charm_state_data(): "arch": "x86_64", "is_metrics_logging_available": True, "proxy_config": {"http": "http://example.com", "https": "https://example.com"}, - "charm_config": {"denylist": ["192.168.1.1"], "token": secrets.token_hex(16)}, + "charm_config": {"token": secrets.token_hex(16)}, "reactive_config": {"uri": "mongodb://user:password@localhost:27017"}, "runner_config": { - "base_image": "jammy", "virtual_machines": 2, - "runner_storage": "memory", }, - "instance_type": "local-lxd", "ssh_debug_connections": [ {"host": "10.1.2.4", "port": 22}, ], } -@pytest.mark.parametrize( - "immutable_config", - [ - pytest.param("runner_storage", id="Runner storage"), - pytest.param("base_image", id="Base image"), - ], -) -def test_check_immutable_config_key_error( - mock_charm_state_path: Path, - mock_charm_state_data: dict[str, typing.Any], - immutable_config: str, - monkeypatch: pytest.MonkeyPatch, - caplog: pytest.LogCaptureFixture, -): - """ - arrange: Mock CHARM_STATE_PATH and read_text method to return modified immutable config values. - act: Call _check_immutable_config_change method. - assert: None is returned. - """ - mock_charm_state_data["runner_config"].pop(immutable_config) - monkeypatch.setattr(charm_state, "CHARM_STATE_PATH", mock_charm_state_path) - monkeypatch.setattr( - charm_state.CHARM_STATE_PATH, - "read_text", - MagicMock(return_value=json.dumps(mock_charm_state_data)), - ) - - assert CharmState._check_immutable_config_change(RunnerStorage.MEMORY, BaseImage.JAMMY) is None - assert any( - f"Key {immutable_config} not found, this will be updated to current config." in message - for message in caplog.messages - ) - - -def test_check_immutable_config_change_no_previous_state( - mock_charm_state_path: Path, mock_charm_state_data: dict, monkeypatch: pytest.MonkeyPatch -): - """ - arrange: Mock CHARM_STATE_PATH and read_text method to return no previous state. - act: Call _check_immutable_config_change method. - assert: Ensure no exception is raised. - """ - monkeypatch.setattr(charm_state, "CHARM_STATE_PATH", mock_charm_state_path) - monkeypatch.setattr(charm_state.CHARM_STATE_PATH, "exists", MagicMock(return_value=False)) - state = CharmState(**mock_charm_state_data) - - assert state._check_immutable_config_change("new_runner_storage", "new_base_image") is None - - -def test_check_immutable_config_change_storage_changed( - mock_charm_state_path: Path, mock_charm_state_data: dict, monkeypatch: pytest.MonkeyPatch -): - """ - arrange: Mock CHARM_STATE_PATH and read_text method to return previous state with different \ - storage. - act: Call _check_immutable_config_change method. - assert: Ensure ImmutableConfigChangedError is raised. - """ - monkeypatch.setattr(charm_state, "CHARM_STATE_PATH", mock_charm_state_path) - monkeypatch.setattr( - charm_state.CHARM_STATE_PATH, - "read_text", - MagicMock(return_value=json.dumps(mock_charm_state_data)), - ) - state = CharmState(**mock_charm_state_data) - - with pytest.raises(ImmutableConfigChangedError): - state._check_immutable_config_change(RunnerStorage.JUJU_STORAGE, BaseImage.JAMMY) - - -def test_check_immutable_config_change_base_image_changed( - mock_charm_state_path, mock_charm_state_data, monkeypatch: pytest.MonkeyPatch -): - """ - arrange: Mock CHARM_STATE_PATH and read_text method to return previous state with different \ - base image. - act: Call _check_immutable_config_change method. - assert: Ensure ImmutableConfigChangedError is raised. - """ - monkeypatch.setattr(charm_state, "CHARM_STATE_PATH", mock_charm_state_path) - monkeypatch.setattr( - charm_state.CHARM_STATE_PATH, - "read_text", - MagicMock(return_value=json.dumps(mock_charm_state_data)), - ) - state = CharmState(**mock_charm_state_data) - - with pytest.raises(ImmutableConfigChangedError): - state._check_immutable_config_change(RunnerStorage.MEMORY, BaseImage.NOBLE) - - -def test_check_immutable_config( - mock_charm_state_path, mock_charm_state_data, monkeypatch: pytest.MonkeyPatch -): - """ - arrange: Mock CHARM_STATE_PATH and read_text method to return previous state with same config. - act: Call _check_immutable_config_change method. - assert: None is returned. - """ - monkeypatch.setattr(charm_state, "CHARM_STATE_PATH", mock_charm_state_path) - monkeypatch.setattr( - charm_state.CHARM_STATE_PATH, - "read_text", - MagicMock(return_value=json.dumps(mock_charm_state_data)), - ) - state = CharmState(**mock_charm_state_data) - - assert state._check_immutable_config_change(RunnerStorage.MEMORY, BaseImage.JAMMY) is None - - class MockModel(BaseModel): """A Mock model class used for pydantic error testing.""" @@ -1196,11 +803,6 @@ class MockModel(BaseModel): ValidationError([], MockModel), ), (ProxyConfig, "from_charm", ValueError), - ( - CharmState, - "_check_immutable_config_change", - ImmutableConfigChangedError("Immutable config changed"), - ), (CharmConfig, "from_charm", ValidationError([], MockModel)), (CharmConfig, "from_charm", ValueError), (charm_state, "_get_supported_arch", UnsupportedArchitectureError(arch="testarch")), @@ -1224,7 +826,6 @@ def test_charm_state_from_charm_invalid_cases( mock_charm_config_from_charm.return_value = mock_charm_config monkeypatch.setattr(CharmConfig, "from_charm", mock_charm_config_from_charm) monkeypatch.setattr(OpenstackRunnerConfig, "from_charm", MagicMock()) - monkeypatch.setattr(LocalLxdRunnerConfig, "from_charm", MagicMock()) monkeypatch.setattr(charm_state, "_get_supported_arch", MagicMock()) monkeypatch.setattr(SSHDebugConnection, "from_charm", MagicMock()) monkeypatch.setattr(module, target, MagicMock(side_effect=exc)) @@ -1244,8 +845,6 @@ def test_charm_state_from_charm(monkeypatch: pytest.MonkeyPatch): monkeypatch.setattr(ProxyConfig, "from_charm", MagicMock()) monkeypatch.setattr(CharmConfig, "from_charm", MagicMock()) monkeypatch.setattr(OpenstackRunnerConfig, "from_charm", MagicMock()) - monkeypatch.setattr(LocalLxdRunnerConfig, "from_charm", MagicMock()) - monkeypatch.setattr(CharmState, "_check_immutable_config_change", MagicMock()) monkeypatch.setattr(charm_state, "_get_supported_arch", MagicMock()) monkeypatch.setattr(charm_state, "ReactiveConfig", MagicMock()) monkeypatch.setattr(SSHDebugConnection, "from_charm", MagicMock()) @@ -1269,38 +868,3 @@ def test_charm_state__log_prev_state_redacts_sensitive_information( assert mock_charm_state_data["charm_config"]["token"] not in caplog.text assert charm_state.SENSITIVE_PLACEHOLDER in caplog.text - - -def test_charm_state_from_charm_reactive_with_lxd_raises_error(monkeypatch: pytest.MonkeyPatch): - """ - arrange: Mock CharmBase and necessary methods to enable reactive config and lxd storage. - act: Call CharmState.from_charm. - assert: Ensure an error is raised - """ - mock_charm = MockGithubRunnerCharmFactory() - mock_database = MagicMock(spec=DatabaseRequires) - - monkeypatch.setattr( - ReactiveConfig, - "from_database", - MagicMock(return_value=ReactiveConfig(mq_uri="mongodb://localhost:27017")), - ) - charm_config_mock = MagicMock() - charm_config_mock.openstack_clouds_yaml = None - monkeypatch.setattr(CharmConfig, "from_charm", MagicMock(return_value=charm_config_mock)) - - # mock all other required methods - monkeypatch.setattr(ProxyConfig, "from_charm", MagicMock()) - monkeypatch.setattr(OpenstackRunnerConfig, "from_charm", MagicMock()) - monkeypatch.setattr(LocalLxdRunnerConfig, "from_charm", MagicMock()) - monkeypatch.setattr(CharmState, "_check_immutable_config_change", MagicMock()) - monkeypatch.setattr(charm_state, "_get_supported_arch", MagicMock()) - monkeypatch.setattr(SSHDebugConnection, "from_charm", MagicMock()) - monkeypatch.setattr(json, "loads", MagicMock()) - monkeypatch.setattr(json, "dumps", MagicMock()) - monkeypatch.setattr(charm_state, "CHARM_STATE_PATH", MagicMock()) - - with pytest.raises(CharmConfigInvalidError) as exc: - CharmState.from_charm(mock_charm, mock_database) - - assert "Reactive mode not supported for local LXD instances" in str(exc.value) diff --git a/tests/unit/test_firewall.py b/tests/unit/test_firewall.py deleted file mode 100644 index 917c0bde4..000000000 --- a/tests/unit/test_firewall.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Test cases for firewall module.""" - -from ipaddress import IPv4Network - -import pytest - -from firewall import Firewall - - -@pytest.mark.parametrize( - "domain_ranges, exclude_ranges, expected", - [ - pytest.param([], [], [], id="empty domain[no exclude]"), - pytest.param([], (IPv4Network("127.0.0.1/32")), [], id="empty domain[one ip exclude]"), - pytest.param( - [], - [IPv4Network("127.0.0.1/32"), IPv4Network("127.0.0.2/32")], - [], - id="empty domain[multiple ips exclude]", - ), - pytest.param( - [IPv4Network("127.0.0.1/32")], - [IPv4Network("127.0.0.2/32")], - [IPv4Network("127.0.0.1/32")], - id="single ip single exclude ip[no overlap]", - ), - pytest.param( - [IPv4Network("127.0.0.1/32")], - [IPv4Network("127.0.0.1/32")], - [], - id="single ip single exclude ip[overlap]", - ), - pytest.param( - [IPv4Network("127.0.0.0/30")], - [IPv4Network("127.0.0.2/32")], - [IPv4Network("127.0.0.0/31"), IPv4Network("127.0.0.3/32")], - id="single domain single exclude ip[overlap single ip]", - ), - pytest.param( - [IPv4Network("127.0.0.0/28")], # 127.0.0.0-14 - [IPv4Network("127.0.0.1/32"), IPv4Network("127.0.1.1/32")], - [ - IPv4Network("127.0.0.0/32"), # 127.0.0.0 - IPv4Network("127.0.0.2/31"), # 127.0.0.2-3 - IPv4Network("127.0.0.4/30"), # 127.0.0.4-7 - IPv4Network("127.0.0.8/29"), # 127.0.0.8-14 - ], - id="single domain multiple exclude ips[overlap partial ips]", - ), - pytest.param( - [IPv4Network("127.0.0.0/30")], # 127.0.0.0-3 - [ - IPv4Network("127.0.0.0/32"), - IPv4Network("127.0.0.1/32"), - IPv4Network("127.0.0.2/32"), - IPv4Network("127.0.0.3/32"), - ], - [], - id="single domain multiple exclude ips[overlap all ips]", - ), - pytest.param( - [IPv4Network("127.0.0.0/30")], - [IPv4Network("127.0.1.0/30")], - [IPv4Network("127.0.0.0/30")], - id="single domain single exclude domain[no overlap]", - ), - pytest.param( - [IPv4Network("127.0.0.0/28")], # 127.0.0.0-15 - [IPv4Network("127.0.0.0/30")], # 127.0.0.0-4 - [ - IPv4Network("127.0.0.8/29"), # 127.0.0.8-15 - IPv4Network("127.0.0.4/30"), # 127.0.0.5-7 - ], - id="single domain single exclude domain[overlap partial range]", - ), - pytest.param( - [IPv4Network("127.0.0.0/30")], - [IPv4Network("127.0.0.0/30")], - [], - id="single domain single exclude domain[overlap full range]", - ), - pytest.param( - [IPv4Network("127.0.0.0/30"), IPv4Network("127.0.1.0/30")], - [IPv4Network("127.0.2.0/30")], - [IPv4Network("127.0.0.0/30"), IPv4Network("127.0.1.0/30")], - id="multiple domain single exclude domain[no overlap]", - ), - pytest.param( - [IPv4Network("127.0.0.0/28"), IPv4Network("127.0.1.0/28")], - [IPv4Network("127.0.0.0/30")], - [ - IPv4Network("127.0.0.8/29"), # 127.0.0.8-15 - IPv4Network("127.0.0.4/30"), # 127.0.0.5-7 - IPv4Network("127.0.1.0/28"), - ], - id="multiple domain single exclude domain[partial overlap]", - ), - pytest.param( - [IPv4Network("127.0.0.0/30"), IPv4Network("127.0.1.0/30")], - [IPv4Network("127.0.1.0/30")], - [IPv4Network("127.0.0.0/30")], - id="multiple domain single exclude domain[full overlap(equivalent network)]", - ), - pytest.param( - [IPv4Network("127.0.0.0/30"), IPv4Network("127.0.1.0/30")], - [IPv4Network("127.0.0.0/8")], - [], - id="multiple domain single exclude domain[full overlap(bigger network)]", - ), - pytest.param( - [IPv4Network("127.0.0.0/30"), IPv4Network("127.0.1.0/30")], - [IPv4Network("127.0.2.0/30"), IPv4Network("127.0.3.0/30")], - [IPv4Network("127.0.0.0/30"), IPv4Network("127.0.1.0/30")], - id="multiple domain multiple exclude domain[no overlaps]", - ), - pytest.param( - [IPv4Network("127.0.0.0/28"), IPv4Network("127.0.1.0/28")], - [IPv4Network("127.0.0.0/30"), IPv4Network("127.0.1.0/30")], - [ - IPv4Network("127.0.0.4/30"), # 127.0.0.5-7 - IPv4Network("127.0.0.8/29"), # 127.0.0.8-15 - IPv4Network("127.0.1.4/30"), # 127.0.1.5-7 - IPv4Network("127.0.1.8/29"), # 127.0.1.8-15 - ], - id="multiple domain multiple exclude domain[multiple partial overlaps]", - ), - pytest.param( - [IPv4Network("127.0.0.0/30"), IPv4Network("127.0.1.0/30")], - [IPv4Network("127.0.0.0/30"), IPv4Network("127.0.1.0/30")], - [], - id=( - "multiple domain multiple exclude domain[multiple full " - "overlaps(equivalent network)]" - ), - ), - pytest.param( - [IPv4Network("127.0.0.0/30"), IPv4Network("127.0.1.0/30")], - [IPv4Network("127.0.0.0/8")], - [], - id="multiple domain multiple exclude domain[multiple full overlaps(bigger network)]", - ), - ], -) -def test__exclude_network( - domain_ranges: list[IPv4Network], - exclude_ranges: list[IPv4Network], - expected: list[IPv4Network], -): - """ - arrange: given domain networks and some IPs to exclude from the domains. - act: when _exclude_network is called. - assert: new ip networks are returned with excluded target IP ranges. - """ - result = Firewall("test")._exclude_network(domain_ranges, exclude_ranges) - assert all(net in result for net in expected) and all( - net in expected for net in result - ), f"Difference in networks found, expected: {expected}, got: {result}." diff --git a/tests/unit/test_lxd_runner_manager.py b/tests/unit/test_lxd_runner_manager.py deleted file mode 100644 index b55757622..000000000 --- a/tests/unit/test_lxd_runner_manager.py +++ /dev/null @@ -1,568 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Test cases of LXDRunnerManager class.""" -import random -import secrets -from pathlib import Path -from unittest.mock import MagicMock, call - -import github_runner_manager.reactive.runner_manager -import pytest -from github_runner_manager.metrics.events import ( - Reconciliation, - RunnerInstalled, - RunnerStart, - RunnerStop, -) -from github_runner_manager.metrics.runner import RUNNER_INSTALLED_TS_FILE_NAME -from github_runner_manager.metrics.storage import MetricsStorage -from github_runner_manager.types_.github import GitHubOrg, GitHubRepo, RunnerApplication -from pytest import MonkeyPatch - -import shared_fs -from charm_state import Arch, CharmConfig, CharmState, ProxyConfig, VirtualMachineResources -from errors import IssueMetricEventError, RunnerBinaryError -from runner import Runner, RunnerStatus -from runner_manager import BUILD_IMAGE_SCRIPT_FILENAME, LXDRunnerManager, LXDRunnerManagerConfig -from runner_type import RunnerNameByHealth -from tests.unit.mock import TEST_BINARY, MockLxdImageManager - -FAKE_MONGODB_URI = "mongodb://example.com/db" - -IMAGE_NAME = "jammy" - -RUNNER_MANAGER_TIME_MODULE = "runner_manager.time.time" -TEST_PROXY_SERVER_URL = "http://proxy.server:1234" - - -@pytest.fixture(scope="function", name="token") -def token_fixture(): - return secrets.token_hex() - - -@pytest.fixture(scope="function", name="charm_config") -def charm_config_fixture(): - """Mock charm config instance.""" - mock_charm_config = MagicMock(spec=CharmConfig) - mock_charm_config.labels = ("test",) - return mock_charm_config - - -@pytest.fixture(scope="function", name="charm_state") -def charm_state_fixture(charm_config: MagicMock): - mock = MagicMock(spec=CharmState) - mock.is_metrics_logging_available = False - mock.arch = Arch.X64 - mock.ssh_debug_connections = None - mock.charm_config = charm_config - return mock - - -@pytest.fixture( - scope="function", - name="runner_manager", - params=[ - (GitHubOrg("test_org", "test_group"), ProxyConfig()), - ( - GitHubRepo("test_owner", "test_repo"), - ProxyConfig( - no_proxy="test_no_proxy", - http=TEST_PROXY_SERVER_URL, - https=TEST_PROXY_SERVER_URL, - use_aproxy=False, - ), - ), - ], -) -def runner_manager_fixture(request, tmp_path, monkeypatch, token, charm_state): - charm_state.proxy_config = request.param[1] - monkeypatch.setattr( - "runner_manager.LXDRunnerManager.runner_bin_path", tmp_path / "mock_runner_binary" - ) - pool_path = tmp_path / "test_storage" - pool_path.mkdir(exist_ok=True) - - runner_manager = LXDRunnerManager( - "test app", - "0", - LXDRunnerManagerConfig( - path=request.param[0], - token=token, - image=IMAGE_NAME, - service_token=secrets.token_hex(16), - lxd_storage_path=pool_path, - charm_state=charm_state, - ), - ) - runner_manager.runner_bin_path.write_bytes(TEST_BINARY) - return runner_manager - - -@pytest.fixture(autouse=True, name="issue_event_mock") -def issue_event_mock_fixture(monkeypatch: MonkeyPatch) -> MagicMock: - """Mock the issue_event function.""" - issue_event_mock = MagicMock() - monkeypatch.setattr("github_runner_manager.metrics.events.issue_event", issue_event_mock) - return issue_event_mock - - -@pytest.fixture(autouse=True, name="shared_fs") -def shared_fs_fixture(tmp_path: Path, monkeypatch: MonkeyPatch) -> MagicMock: - """Mock the shared filesystem module.""" - shared_fs_mock = MagicMock(spec=shared_fs) - monkeypatch.setattr("runner_manager.shared_fs", shared_fs_mock) - monkeypatch.setattr("runner.shared_fs", shared_fs_mock) - return shared_fs_mock - - -@pytest.fixture(autouse=True, name="runner_metrics") -def runner_metrics_fixture(monkeypatch: MonkeyPatch) -> MagicMock: - """Mock the runner metrics module.""" - runner_metrics_mock = MagicMock() - monkeypatch.setattr("runner_manager.runner_metrics", runner_metrics_mock) - return runner_metrics_mock - - -@pytest.fixture(name="reactive_reconcile_mock") -def reactive_reconcile_fixture(monkeypatch: MonkeyPatch, tmp_path: Path) -> MagicMock: - """Mock the job class.""" - reconcile_mock = MagicMock(spec=github_runner_manager.reactive.runner_manager.reconcile) - monkeypatch.setattr("runner_manager.reactive_runner_manager.reconcile", reconcile_mock) - reconcile_mock.side_effect = lambda quantity, **kwargs: quantity - return reconcile_mock - - -@pytest.mark.parametrize( - "arch", - [ - pytest.param(Arch.ARM64), - pytest.param(Arch.X64), - ], -) -def test_get_latest_runner_bin_url(runner_manager: LXDRunnerManager, arch: Arch, charm_state): - """ - arrange: Nothing. - act: Get runner bin url of existing binary. - assert: Correct mock data returned. - """ - charm_state.arch = arch - mock_gh_client = MagicMock() - app = RunnerApplication( - os="linux", - architecture=arch.value, - download_url=(download_url := "https://www.example.com"), - filename=(filename := "test_runner_binary"), - ) - mock_gh_client.get_runner_application.return_value = app - runner_manager._clients.github = mock_gh_client - - runner_bin = runner_manager.get_latest_runner_bin_url(os_name="linux") - assert runner_bin["os"] == "linux" - assert runner_bin["architecture"] == arch.value - assert runner_bin["download_url"] == download_url - assert runner_bin["filename"] == filename - - -def test_get_latest_runner_bin_url_missing_binary(runner_manager: LXDRunnerManager): - """ - arrange: Given a mocked GH API client that does not return any runner binaries. - act: Get runner bin url of non-existing binary. - assert: Error related to runner bin raised. - """ - runner_manager._clients.github = MagicMock() - runner_manager._clients.github.get_runner_application.side_effect = RunnerBinaryError - - with pytest.raises(RunnerBinaryError): - runner_manager.get_latest_runner_bin_url(os_name="not_exist") - - -def test_update_runner_bin(runner_manager: LXDRunnerManager): - """ - arrange: Remove the existing runner binary. - act: Update runner binary. - assert: Runner binary in runner manager is set. - """ - - class MockRequestLibResponse: - """A mock requests library response.""" - - def __init__(self, *args, **kwargs): - """Initialize successful requests library response. - - Args: - args: Placeholder for positional arguments. - kwargs: Placeholder for keyword arguments. - """ - self.status_code = 200 - - def iter_content(self, *args, **kwargs): - """Mock content iterator returning an iterator over a single test runner binary. - - Args: - args: Placeholder positional arguments. - kwargs: Placeholder keyword arguments. - - Returns: - An iterator over a single test runner binary. - """ - return iter([TEST_BINARY]) - - runner_manager.runner_bin_path.unlink(missing_ok=True) - - runner_manager.session.get = MockRequestLibResponse - runner_bin = runner_manager.get_latest_runner_bin_url(os_name="linux") - - runner_manager.update_runner_bin(runner_bin) - - assert runner_manager.runner_bin_path.read_bytes() == TEST_BINARY - - -def test_reconcile_zero_count(runner_manager: LXDRunnerManager): - """ - arrange: Nothing. - act: Reconcile with the current amount of runner. - assert: No error should be raised. - """ - # Reconcile with no change to runner count. - delta = runner_manager.reconcile(0, VirtualMachineResources(2, "7GiB", "10Gib")) - - assert delta == 0 - - -def test_reconcile_create_runner(runner_manager: LXDRunnerManager): - """ - arrange: Nothing. - act: Reconcile to create a runner. - assert: One runner should be created. - """ - # Create a runner. - delta = runner_manager.reconcile(1, VirtualMachineResources(2, "7GiB", "10Gib")) - - assert delta == 1 - - -def test_reconcile_remove_runner(runner_manager: LXDRunnerManager): - """ - arrange: Create online runners. - act: Reconcile to remove a runner. - assert: One runner should be removed. - """ - - def mock_get_runners(): - """Create three mock runners. - - Returns: - Three mock runners. - """ - runners = [] - for _ in range(3): - # 0 is a mock runner id. - status = RunnerStatus(0, True, True, False) - runners.append(Runner(MagicMock(), MagicMock(), status, None)) - return runners - - # Create online runners. - runner_manager._get_runners = mock_get_runners - runner_manager._get_runner_health_states = lambda: RunnerNameByHealth( - ( - f"{runner_manager.instance_name}-0", - f"{runner_manager.instance_name}-1", - f"{runner_manager.instance_name}-2", - ), - (), - ) - - delta = runner_manager.reconcile(2, VirtualMachineResources(2, "7GiB", "10Gib")) - - assert delta == -1 - - -def test_reconcile(runner_manager: LXDRunnerManager, tmp_path: Path): - """ - arrange: Setup one runner. - act: Reconcile with the current amount of runner. - assert: Still have one runner. - """ - runner_manager.reconcile(1, VirtualMachineResources(2, "7GiB", "10Gib")) - # Reconcile with no change to runner count. - runner_manager.reconcile(1, VirtualMachineResources(2, "7GiB", "10Gib")) - - assert len(runner_manager._get_runners()) == 1 - - -def test_empty_flush(runner_manager: LXDRunnerManager): - """ - arrange: No initial runners. - act: Perform flushing with no runners. - assert: No error thrown. - """ - # Verifying the RunnerManager does not crash if flushing with no runners. - runner_manager.flush() - - -def test_flush(runner_manager: LXDRunnerManager, tmp_path: Path): - """ - arrange: Create some runners. - act: Perform flushing. - assert: No runners. - """ - # Create a runner. - runner_manager.reconcile(2, VirtualMachineResources(2, "7GiB", "10Gib")) - - runner_manager.flush() - assert len(runner_manager._get_runners()) == 0 - - -def test_reconcile_issues_runner_installed_event( - runner_manager: LXDRunnerManager, - monkeypatch: MonkeyPatch, - issue_event_mock: MagicMock, - charm_state: MagicMock, -): - """ - arrange: Enable issuing of metrics and mock timestamps. - act: Reconcile to create a runner. - assert: The expected event is issued. - """ - charm_state.is_metrics_logging_available = True - t_mock = MagicMock(return_value=12345) - monkeypatch.setattr(RUNNER_MANAGER_TIME_MODULE, t_mock) - - runner_manager.reconcile(1, VirtualMachineResources(2, "7GiB", "10Gib")) - - issue_event_mock.assert_has_calls( - [call(event=RunnerInstalled(timestamp=12345, flavor=runner_manager.app_name, duration=0))] - ) - - -def test_reconcile_issues_no_runner_installed_event_if_metrics_disabled( - runner_manager: LXDRunnerManager, issue_event_mock: MagicMock, charm_state: MagicMock -): - """ - arrange: Disable issuing of metrics. - act: Reconcile to create a runner. - assert: The expected event is not issued. - """ - charm_state.is_metrics_logging_available = False - - runner_manager.reconcile(1, VirtualMachineResources(2, "7GiB", "10Gib")) - - issue_event_mock.assert_not_called() - - -def test_reconcile_error_on_issue_event_is_ignored( - runner_manager: LXDRunnerManager, - issue_event_mock: MagicMock, - charm_state: MagicMock, -): - """ - arrange: Enable issuing of metrics and mock the metric issuing to raise an expected error. - act: Reconcile. - assert: No error is raised. - """ - charm_state.is_metrics_logging_available = True - - issue_event_mock.side_effect = IssueMetricEventError("test error") - - delta = runner_manager.reconcile(1, VirtualMachineResources(2, "7GiB", "10Gib")) - - assert delta == 1 - - -def test_reconcile_issues_reconciliation_metric_event( - runner_manager: LXDRunnerManager, - monkeypatch: MonkeyPatch, - issue_event_mock: MagicMock, - runner_metrics: MagicMock, - charm_state: MagicMock, -): - """ - arrange: \ - - Enable issuing of metrics \ - - Mock timestamps \ - - Mock the result of runner_metrics.issue_event to contain 2 RunnerStart and 1 RunnerStop \ - events, meaning one runner was active and one crashed. \ - - Create two online runners , one active and one idle. - act: Reconcile. - assert: The expected event is issued. We expect two idle runners and one crashed runner - to be reported. - """ - charm_state.is_metrics_logging_available = True - t_mock = MagicMock(return_value=12345) - monkeypatch.setattr(RUNNER_MANAGER_TIME_MODULE, t_mock) - runner_metrics.extract.return_value = (MagicMock() for _ in range(2)) - runner_metrics.issue_events.side_effect = [{RunnerStart, RunnerStop}, {RunnerStart}] - - online_idle_runner_name = f"{runner_manager.instance_name}-0" - offline_idle_runner_name = f"{runner_manager.instance_name}-1" - active_runner_name = f"{runner_manager.instance_name}-2" - - def mock_get_runners(): - """Create three mock runners where one is busy. - - Returns: - Mock runners with one busy runner. - """ - runners = [] - - online_idle_runner = RunnerStatus(runner_id=0, exist=True, online=True, busy=False) - offline_idle_runner = RunnerStatus(runner_id=1, exist=True, online=False, busy=False) - active_runner = RunnerStatus(runner_id=2, exist=True, online=True, busy=True) - - for runner_status, runner_config in zip( - (online_idle_runner, offline_idle_runner, active_runner), - (online_idle_runner_name, offline_idle_runner_name, active_runner_name), - ): - config = MagicMock() - config.name = runner_config - runners.append( - Runner( - clients=MagicMock(), - runner_config=config, - runner_status=runner_status, - instance=None, - ) - ) - - return runners - - # Create online runners. - runner_manager._get_runners = mock_get_runners - runner_manager._get_runner_health_states = lambda: RunnerNameByHealth( - healthy=( - online_idle_runner_name, - offline_idle_runner_name, - active_runner_name, - ), - unhealthy=(), - ) - - quantity = random.randint(0, 5) - runner_manager.reconcile( - quantity=quantity, resources=VirtualMachineResources(2, "7GiB", "10Gib") - ) - - issue_event_mock.assert_any_call( - event=Reconciliation( - timestamp=12345, - flavor=runner_manager.app_name, - crashed_runners=1, - idle_runners=2, - active_runners=1, - expected_runners=quantity, - duration=0, - ) - ) - - -def test_reconcile_places_timestamp_in_newly_created_runner( - runner_manager: LXDRunnerManager, - monkeypatch: MonkeyPatch, - shared_fs: MagicMock, - tmp_path: Path, - charm_state: MagicMock, -): - """ - arrange: Enable issuing of metrics, mock timestamps and create the directory for the shared\ - filesystem. - act: Reconcile to create a runner. - assert: The expected timestamp is placed in the shared filesystem. - """ - charm_state.is_metrics_logging_available = True - t_mock = MagicMock(return_value=12345) - monkeypatch.setattr(RUNNER_MANAGER_TIME_MODULE, t_mock) - runner_shared_fs = tmp_path / "runner_fs" - runner_shared_fs.mkdir() - fs = MetricsStorage(path=runner_shared_fs, runner_name="test_runner") - shared_fs.get.return_value = fs - - runner_manager.reconcile(1, VirtualMachineResources(2, "7GiB", "10Gib")) - - assert (fs.path / RUNNER_INSTALLED_TS_FILE_NAME).exists() - assert (fs.path / RUNNER_INSTALLED_TS_FILE_NAME).read_text() == "12345" - - -def test_reconcile_error_on_placing_timestamp_is_ignored( - runner_manager: LXDRunnerManager, shared_fs: MagicMock, tmp_path: Path, charm_state: MagicMock -): - """ - arrange: Enable issuing of metrics and do not create the directory for the shared filesystem\ - in order to let a FileNotFoundError to be raised inside the RunnerManager. - act: Reconcile to create a runner. - assert: No exception is raised. - """ - charm_state.is_metrics_logging_available = True - runner_shared_fs = tmp_path / "runner_fs" - fs = MetricsStorage(path=runner_shared_fs, runner_name="test_runner") - shared_fs.get.return_value = fs - - runner_manager.reconcile(1, VirtualMachineResources(2, "7GiB", "10Gib")) - - assert not (fs.path / RUNNER_INSTALLED_TS_FILE_NAME).exists() - - -def test_reconcile_places_no_timestamp_in_newly_created_runner_if_metrics_disabled( - runner_manager: LXDRunnerManager, shared_fs: MagicMock, tmp_path: Path, charm_state: MagicMock -): - """ - arrange: Disable issuing of metrics, mock timestamps and the shared filesystem module. - act: Reconcile to create a runner. - assert: No timestamp is placed in the shared filesystem. - """ - charm_state.is_metrics_logging_available = False - - fs = MetricsStorage(path=tmp_path, runner_name="test_runner") - shared_fs.get.return_value = fs - - runner_manager.reconcile(1, VirtualMachineResources(2, "7GiB", "10Gib")) - - assert not (fs.path / RUNNER_INSTALLED_TS_FILE_NAME).exists() - - -def test_schedule_build_runner_image( - runner_manager: LXDRunnerManager, - tmp_path: Path, - charm_state: CharmState, - monkeypatch: MonkeyPatch, -): - """ - arrange: Mock the cron path and the randint function. - act: Schedule the build runner image. - assert: The cron file is created with the expected content. - """ - runner_manager.cron_path = tmp_path / "cron" - runner_manager.cron_path.mkdir() - monkeypatch.setattr(random, "randint", MagicMock(spec=random.randint, return_value=4)) - - runner_manager.schedule_build_runner_image() - - cronfile = runner_manager.cron_path / "build-runner-image" - http = charm_state.proxy_config.http or "''" - https = charm_state.proxy_config.https or "''" - no_proxy = charm_state.proxy_config.no_proxy or "''" - - cmd = f"/usr/bin/bash {BUILD_IMAGE_SCRIPT_FILENAME.absolute()} {http} {https} {no_proxy}" - - assert cronfile.exists() - assert cronfile.read_text() == f"4 4,10,16,22 * * * ubuntu {cmd} jammy\n" - - -def test_has_runner_image(runner_manager: LXDRunnerManager): - """ - arrange: Multiple setups. - 1. no runner image exists. - 2. runner image with wrong name exists. - 3. runner image with correct name exists. - act: Check if runner image exists. - assert: - 1 and 2. False is returned. - 3. True is returned. - """ - assert not runner_manager.has_runner_image() - - runner_manager._clients.lxd.images = MockLxdImageManager({"hirsute"}) - assert not runner_manager.has_runner_image() - - runner_manager._clients.lxd.images = MockLxdImageManager({IMAGE_NAME}) - assert runner_manager.has_runner_image() diff --git a/tests/unit/test_runner.py b/tests/unit/test_runner.py deleted file mode 100644 index e6d57f305..000000000 --- a/tests/unit/test_runner.py +++ /dev/null @@ -1,565 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Test cases of Runner class.""" - -import secrets -import unittest -from pathlib import Path -from unittest.mock import MagicMock, call - -import github_runner_manager.metrics.runner_logs -import jinja2 -import pytest -from _pytest.monkeypatch import MonkeyPatch -from github_runner_manager.metrics.storage import MetricsStorage -from github_runner_manager.types_.github import GitHubOrg, GitHubRepo - -from charm_state import SSHDebugConnection, VirtualMachineResources -from errors import ( - CreateMetricsStorageError, - LxdError, - RunnerCreateError, - RunnerLogsError, - RunnerRemoveError, -) -from lxd import LxdInstance, LxdInstanceFileManager -from runner import DIAG_DIR_PATH, CreateRunnerConfig, Runner, RunnerConfig, RunnerStatus -from runner_manager_type import RunnerManagerClients -from runner_type import ProxySetting -from tests.unit.factories import SSHDebugInfoFactory -from tests.unit.mock import ( - MockLxdClient, - MockRepoPolicyComplianceClient, - mock_lxd_error_func, - mock_runner_error_func, -) - -TEST_PROXY_SERVER_URL = "http://proxy.server:1234" - - -@pytest.fixture(scope="module", name="vm_resources") -def vm_resources_fixture(): - return VirtualMachineResources(2, "7Gib", "10Gib") - - -@pytest.fixture(scope="function", name="token") -def token_fixture(): - return secrets.token_hex() - - -@pytest.fixture(scope="function", name="binary_path") -def binary_path_fixture(tmp_path: Path): - return tmp_path / "test_binary" - - -@pytest.fixture(scope="module", name="instance", params=["Running", "Stopped", None]) -def instance_fixture(request): - if request.param[0] is None: - return None - - attrs = {"status": request.param[0], "execute.return_value": (0, "", "")} - instance = unittest.mock.MagicMock(**attrs) - return instance - - -@pytest.fixture(scope="function", name="lxd") -def mock_lxd_client_fixture(): - return MockLxdClient() - - -@pytest.fixture(autouse=True, scope="function", name="shared_fs") -def shared_fs_fixture(monkeypatch: MonkeyPatch) -> MagicMock: - """Mock the module for handling the Shared Filesystem.""" - mock = MagicMock() - monkeypatch.setattr("runner.shared_fs", mock) - return mock - - -@pytest.fixture(autouse=True, scope="function", name="exc_cmd_mock") -def exc_command_fixture(monkeypatch: MonkeyPatch) -> MagicMock: - """Mock the execution of a command.""" - exc_cmd_mock = MagicMock() - monkeypatch.setattr("runner.execute_command", exc_cmd_mock) - return exc_cmd_mock - - -@pytest.fixture(name="log_dir_base_path") -def log_dir_base_path_fixture(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: - """Mock the create_logs_dir function and return the base path of the log directory.""" - log_dir_base_path = tmp_path / "log_dir" - - def create_logs_dir(runner_name: str) -> Path: - """Create the directory to store the logs of the crashed runners. - - Args: - runner_name: The name of the runner. - - Returns: - The path to the directory where the logs of the crashed runners will be stored. - """ - target_log_path = log_dir_base_path / runner_name - target_log_path.mkdir(parents=True, exist_ok=True) - - return target_log_path - - create_logs_dir_mock = MagicMock( - spec=github_runner_manager.metrics.runner_logs.create_logs_dir - ) - create_logs_dir_mock.side_effect = create_logs_dir - monkeypatch.setattr("runner.create_logs_dir", create_logs_dir_mock) - - return log_dir_base_path - - -@pytest.fixture(scope="function", name="jinja") -def jinja2_environment_fixture() -> MagicMock: - """Mock the jinja2 environment. - - Provides distinct mocks for each template. - """ - jinja2_mock = MagicMock(spec=jinja2.Environment) - template_mocks = { - "start.j2": MagicMock(), - "pre-job.j2": MagicMock(), - "env.j2": MagicMock(), - "environment.j2": MagicMock(), - "systemd-docker-proxy.j2": MagicMock(), - } - jinja2_mock.get_template.side_effect = lambda x: template_mocks.get(x, MagicMock()) - return jinja2_mock - - -@pytest.fixture(scope="function", name="ssh_debug_connections") -def ssh_debug_connections_fixture() -> list[SSHDebugConnection]: - """A list of randomly generated ssh_debug_connections.""" - return SSHDebugInfoFactory.create_batch(size=100) - - -@pytest.fixture( - scope="function", - name="runner", - params=[ - ( - GitHubOrg("test_org", "test_group"), - ProxySetting(no_proxy=None, http=None, https=None, aproxy_address=None), - ), - ( - GitHubRepo("test_owner", "test_repo"), - ProxySetting( - no_proxy="test_no_proxy", - http=TEST_PROXY_SERVER_URL, - https=TEST_PROXY_SERVER_URL, - aproxy_address=None, - ), - ), - ], -) -def runner_fixture( - request, - lxd: MockLxdClient, - jinja: MagicMock, - tmp_path: Path, - ssh_debug_connections: list[SSHDebugConnection], -): - client = RunnerManagerClients( - MagicMock(), - jinja, - lxd, - MockRepoPolicyComplianceClient(), - ) - pool_path = tmp_path / "test_storage" - pool_path.mkdir(exist_ok=True) - config = RunnerConfig( - name="test_runner", - app_name="test_app", - path=request.param[0], - proxies=request.param[1], - lxd_storage_path=pool_path, - labels=("test", "label"), - dockerhub_mirror=None, - issue_metrics=False, - ssh_debug_connections=ssh_debug_connections, - ) - lxd_instance_mock = MagicMock(spec=LxdInstance) - lxd_instance_mock.files = MagicMock(LxdInstanceFileManager) - status = RunnerStatus() - return Runner(client, config, status, lxd_instance_mock) - - -def test_create( - runner: Runner, - vm_resources: VirtualMachineResources, - token: str, - binary_path: Path, - lxd: MockLxdClient, -): - """ - arrange: Nothing. - act: Create a runner. - assert: An lxd instance for the runner is created. - """ - runner.create( - config=CreateRunnerConfig( - image="test_image", - resources=vm_resources, - binary_path=binary_path, - registration_token=token, - ) - ) - - instances = lxd.instances.all() - assert len(instances) == 1 - - if runner.config.proxies: - instance = instances[0] - env_proxy = instance.files.read_file("/home/ubuntu/github-runner/.env") - systemd_docker_proxy = instance.files.read_file( - "/etc/systemd/system/docker.service.d/http-proxy.conf" - ) - # Test the file has being written to. This value does not contain the string as the - # jinja2.environment.Environment is mocked with MagicMock. - assert env_proxy is not None - assert systemd_docker_proxy is not None - - -def test_create_lxd_fail( - runner: Runner, - vm_resources: VirtualMachineResources, - token: str, - binary_path: Path, - lxd: MockLxdClient, - monkeypatch: pytest.MonkeyPatch, -): - """ - arrange: Setup the create runner to fail with lxd error. - act: Create a runner. - assert: Correct exception should be thrown. Any created instance should be - cleanup. - """ - monkeypatch.setattr(lxd.profiles, "exists", mock_lxd_error_func) - - with pytest.raises(RunnerCreateError): - runner.create( - config=CreateRunnerConfig( - image="test_image", - resources=vm_resources, - binary_path=binary_path, - registration_token=token, - ) - ) - - assert len(lxd.instances.all()) == 0 - - -def test_create_runner_fail( - runner: Runner, - vm_resources: VirtualMachineResources, - token: str, - binary_path: Path, - lxd: MockLxdClient, -): - """ - arrange: Setup the create runner to fail with runner error. - act: Create a runner. - assert: Correct exception should be thrown. Any created instance should be - cleanup. - """ - runner._clients.lxd.instances.create = mock_runner_error_func - - with pytest.raises(RunnerCreateError): - runner.create( - config=CreateRunnerConfig( - image="test_image", - resources=vm_resources, - binary_path=binary_path, - registration_token=token, - ) - ) - - -def test_create_with_metrics( - runner: Runner, - vm_resources: VirtualMachineResources, - token: str, - binary_path: Path, - lxd: MockLxdClient, - shared_fs: MagicMock, - exc_cmd_mock: MagicMock, - jinja: MagicMock, -): - """ - arrange: Config the runner to issue metrics and mock the shared filesystem. - act: Create a runner. - assert: The command for adding a device has been executed and the templates are - rendered to issue metrics. - """ - runner.config.issue_metrics = True - shared_fs.create.return_value = MetricsStorage( - path=Path("/home/ubuntu/shared_fs"), runner_name="test_runner" - ) - runner.create( - config=CreateRunnerConfig( - image="test_image", - resources=vm_resources, - binary_path=binary_path, - registration_token=token, - ) - ) - - exc_cmd_mock.assert_called_once_with( - [ - "sudo", - "lxc", - "config", - "device", - "add", - "test_runner", - "metrics", - "disk", - "source=/home/ubuntu/shared_fs", - "path=/metrics-exchange", - ], - check_exit=True, - ) - - jinja.get_template("start.j2").render.assert_called_once_with(issue_metrics=True) - jinja.get_template("pre-job.j2").render.assert_called_once() - assert "issue_metrics" in jinja.get_template("pre-job.j2").render.call_args[1] - - -def test_create_with_metrics_and_shared_fs_error( - runner: Runner, - vm_resources: VirtualMachineResources, - token: str, - binary_path: Path, - lxd: MockLxdClient, - shared_fs: MagicMock, -): - """ - arrange: Config the runner to issue metrics and mock the shared filesystem module\ - to throw an expected error. - act: Create a runner. - assert: The runner is created despite the error on the shared filesystem. - """ - runner.config.issue_metrics = True - shared_fs.create.side_effect = CreateMetricsStorageError("") - - runner.create( - config=CreateRunnerConfig( - image="test_image", - resources=vm_resources, - binary_path=binary_path, - registration_token=token, - ) - ) - - instances = lxd.instances.all() - assert len(instances) == 1 - - -def test_remove( - runner: Runner, - vm_resources: VirtualMachineResources, - token: str, - binary_path: Path, - lxd: MockLxdClient, -): - """ - arrange: Create a runner. - act: Remove the runner. - assert: The lxd instance for the runner is removed. - """ - runner.create( - config=CreateRunnerConfig( - image="test_image", - resources=vm_resources, - binary_path=binary_path, - registration_token=token, - ) - ) - runner.remove("test_token") - assert len(lxd.instances.all()) == 0 - - -def test_remove_failed_instance( - runner: Runner, - vm_resources: VirtualMachineResources, - token: str, - binary_path: Path, - lxd: MockLxdClient, -): - """ - arrange: Create a stopped runner that failed to remove itself. - act: Remove the runner. - assert: The lxd instance for the runner is removed. - """ - # Cases where the ephemeral instance encountered errors and the status was Stopped but not - # removed was found before. - runner.create( - config=CreateRunnerConfig( - image="test_image", - resources=vm_resources, - binary_path=binary_path, - registration_token=token, - ) - ) - runner.instance.status = "Stopped" - runner.remove("test_token") - assert len(lxd.instances.all()) == 0 - - -def test_remove_none( - runner: Runner, - token: str, - lxd: MockLxdClient, -): - """ - arrange: Not creating a runner. - act: Remove the runner. - assert: The lxd instance for the runner is removed. - """ - runner.remove(token) - assert len(lxd.instances.all()) == 0 - - -def test_remove_with_stop_error( - runner: Runner, - vm_resources: VirtualMachineResources, - token: str, - binary_path: Path, - lxd: MockLxdClient, -): - """ - arrange: Create a runner. Set up LXD stop fails with LxdError. - act: Remove the runner. - assert: RunnerRemoveError is raised. - """ - runner.create( - config=CreateRunnerConfig( - image="test_image", - resources=vm_resources, - binary_path=binary_path, - registration_token=token, - ) - ) - runner.instance.stop = mock_lxd_error_func - - with pytest.raises(RunnerRemoveError): - runner.remove("test_token") - - -def test_remove_with_delete_error( - runner: Runner, - vm_resources: VirtualMachineResources, - token: str, - binary_path: Path, - lxd: MockLxdClient, -): - """ - arrange: Create a runner. Set up LXD delete fails with LxdError. - act: Remove the runner. - assert: RunnerRemoveError is raised. - """ - runner.create( - config=CreateRunnerConfig( - image="test_image", - resources=vm_resources, - binary_path=binary_path, - registration_token=token, - ) - ) - runner.instance.status = "Stopped" - runner.instance.delete = mock_lxd_error_func - - with pytest.raises(RunnerRemoveError): - runner.remove("test_token") - - -def test_random_ssh_connection_choice( - runner: Runner, - vm_resources: VirtualMachineResources, - token: str, - binary_path: Path, -): - """ - arrange: given a mock runner with random batch of ssh debug infos. - act: when runner.configure_runner is called. - assert: selected ssh_debug_info is random. - """ - runner.create( - config=CreateRunnerConfig( - image="test_image", - resources=vm_resources, - binary_path=binary_path, - registration_token=token, - ) - ) - runner._configure_runner() - first_call_args = runner._clients.jinja.get_template("env.j2").render.call_args.kwargs - runner._configure_runner() - second_call_args = runner._clients.jinja.get_template("env.j2").render.call_args.kwargs - - assert first_call_args["ssh_debug_info"] != second_call_args["ssh_debug_info"], ( - "Same ssh debug info found, this may have occurred with a very low probability. " - "Just try again." - ) - - -def test_pull_logs(runner: Runner, log_dir_base_path: Path): - """ - arrange: Mock the Runner instance and the base log directory path. - act: Get the logs of the runner. - assert: The expected log directory is created and logs are pulled. - """ - runner.config.name = "test-runner" - runner.instance.files.pull_file = MagicMock() - - runner.pull_logs() - - assert log_dir_base_path.exists() - - log_dir_path = log_dir_base_path / "test-runner" - log_dir_base_path.exists() - - runner.instance.files.pull_file.assert_has_calls( - [ - call(str(DIAG_DIR_PATH), str(log_dir_path), is_dir=True), - call(str(github_runner_manager.metrics.runner_logs.SYSLOG_PATH), str(log_dir_path)), - ] - ) - - -@pytest.mark.usefixtures("log_dir_base_path") -def test_pull_logs_no_instance(runner: Runner): - """ - arrange: Mock the Runner instance to be None. - act: Get the logs of the runner. - assert: A RunnerLogsError is raised. - """ - runner.config.name = "test-runner" - runner.instance = None - - with pytest.raises(RunnerLogsError) as exc_info: - runner.pull_logs() - - assert "Cannot pull the logs for test-runner as runner has no running instance." in str( - exc_info.value - ) - - -@pytest.mark.usefixtures("log_dir_base_path") -def test_pull_logs_lxd_error(runner: Runner): - """ - arrange: Mock the Runner instance to raise an LxdError. - act: Get the logs of the runner. - assert: A RunnerLogsError is raised. - """ - runner.config.name = "test-runner" - runner.instance.files.pull_file = MagicMock(side_effect=LxdError("Cannot pull file")) - - with pytest.raises(RunnerLogsError) as exc_info: - runner.pull_logs() - - assert "Cannot pull the logs for test-runner." in str(exc_info.value) - assert "Cannot pull file" in str(exc_info.value.__cause__) diff --git a/tests/unit/test_shared_fs.py b/tests/unit/test_shared_fs.py deleted file mode 100644 index 12dc11506..000000000 --- a/tests/unit/test_shared_fs.py +++ /dev/null @@ -1,334 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. -import secrets -import shutil -from pathlib import Path -from unittest.mock import MagicMock, Mock - -import pytest -from _pytest.monkeypatch import MonkeyPatch -from github_runner_manager.metrics.storage import MetricsStorage - -import shared_fs -from errors import ( - CreateMetricsStorageError, - DeleteMetricsStorageError, - GetMetricsStorageError, - SubprocessError, -) - -MOUNTPOINT_FAILURE_EXIT_CODE = 1 - - -@pytest.fixture(autouse=True, name="filesystem_paths") -def filesystem_paths_fixture(monkeypatch: MonkeyPatch, tmp_path: Path) -> dict[str, Path]: - """Mock the hardcoded filesystem paths.""" - fs_path = tmp_path / "runner-fs" - fs_images_path = tmp_path / "images" - monkeypatch.setattr("shared_fs.FILESYSTEM_IMAGES_PATH", fs_images_path) - return {"base": fs_path, "images": fs_images_path} - - -@pytest.fixture(autouse=True, name="metrics_storage_mock") -def metrics_storage_fixture( - monkeypatch: MonkeyPatch, filesystem_paths: dict[str, Path] -) -> MagicMock: - """Mock the metrics storage.""" - metrics_storage_mock = MagicMock() - storage_manager_cls_mock = MagicMock(return_value=metrics_storage_mock) - monkeypatch.setattr(shared_fs.metrics_storage, "StorageManager", storage_manager_cls_mock) - fs_base_path = filesystem_paths["base"] - fs_base_path.mkdir() - - def create(runner_name: str) -> MetricsStorage: - """Create metrics storage for the runner. - - Args: - runner_name: The name of the runner. - - Raises: - CreateMetricsStorageError: If the creation of the metrics storage fails. - - Returns: - The metrics storage. - """ - if (fs_base_path / runner_name).exists(): - raise CreateMetricsStorageError("Filesystem already exists") - (fs_base_path / runner_name).mkdir() - return MetricsStorage(fs_base_path, runner_name) - - def list_all(): - """List all shared filesystems. - - Returns: - A generator of metrics storage objects. - """ - return ( - MetricsStorage(runner_dir, str(runner_dir.name)) - for runner_dir in fs_base_path.iterdir() - ) - - def get(runner_name: str) -> MetricsStorage: - """Get the metrics storage for the runner. - - Args: - runner_name: The name of the runner. - - Raises: - GetMetricsStorageError: If the filesystem is not found. - - Returns: - The metrics storage. - """ - if not (fs_base_path / runner_name).exists(): - raise GetMetricsStorageError("Filesystem not found") - return MetricsStorage(fs_base_path / runner_name, runner_name) - - metrics_storage_mock.create.side_effect = create - metrics_storage_mock.get.side_effect = get - metrics_storage_mock.list_all.side_effect = list_all - metrics_storage_mock.delete.side_effect = lambda runner_name: shutil.rmtree( - fs_base_path / runner_name - ) - - return metrics_storage_mock - - -@pytest.fixture(autouse=True, name="exc_cmd_mock") -def exc_command_fixture(monkeypatch: MonkeyPatch) -> Mock: - """Mock the execution of a command.""" - exc_cmd_mock = Mock(return_value=("", 0)) - monkeypatch.setattr("shared_fs.execute_command", exc_cmd_mock) - return exc_cmd_mock - - -def exc_cmd_side_effect(*args, **_): - """Mock command to return NOT_A_MOUNTPOINT exit code. - - Args: - args: Positional argument placeholder. - - Returns: - Fake exc_cmd return values. - """ - if args[0][0] == "mountpoint": - return "", shared_fs.DIR_NO_MOUNTPOINT_EXIT_CODE - return "", 0 - - -def test_create_creates_directory(): - """ - arrange: Given a runner name and a path for the filesystems. - act: Call create. - assert: The shared filesystem path is created. - """ - runner_name = secrets.token_hex(16) - - fs = shared_fs.create(runner_name) - - assert fs.path.exists() - assert fs.path.is_dir() - - -def test_create_raises_exception(exc_cmd_mock: MagicMock): - """ - arrange: Given a runner name and a mocked execute_command which raises an expected exception. - act: Call create. - assert: The expected exception is raised. - """ - runner_name = secrets.token_hex(16) - exc_cmd_mock.side_effect = SubprocessError( - cmd=["mock"], return_code=1, stdout="mock stdout", stderr="mock stderr" - ) - - with pytest.raises(CreateMetricsStorageError): - shared_fs.create(runner_name) - - -def test_create_raises_exception_if_already_exists(): - """ - arrange: Given a runner name and an already existing shared filesystem. - act: Call create. - assert: The expected exception is raised. - """ - runner_name = secrets.token_hex(16) - shared_fs.create(runner_name) - - with pytest.raises(CreateMetricsStorageError): - shared_fs.create(runner_name) - - -def test_list_shared_filesystems(): - """ - arrange: Create shared filesystems for multiple runners. - act: Call list. - assert: A generator listing all the shared filesystems is returned. - """ - runner_names = [secrets.token_hex(16) for _ in range(3)] - for runner_name in runner_names: - shared_fs.create(runner_name) - - fs_list = list(shared_fs.list_all()) - - assert len(fs_list) == 3 - for fs in fs_list: - assert isinstance(fs, MetricsStorage) - assert fs.runner_name in runner_names - - -def test_list_shared_filesystems_empty(): - """ - arrange: Nothing. - act: Call list. - assert: An empty generator is returned. - """ - fs_list = list(shared_fs.list_all()) - - assert len(fs_list) == 0 - - -def test_list_shared_filesystems_ignore_unmounted_fs(exc_cmd_mock: MagicMock): - """ - arrange: Create shared filesystems for multiple runners and mock mountpoint cmd \ - to return NOT_A_MOUNTPOINT exit code for a dedicated runner. - act: Call list. - assert: A generator listing all the shared filesystems except the one of the dedicated runner - is returned. - """ - runner_names = [secrets.token_hex(16) for _ in range(3)] - for runner_name in runner_names: - shared_fs.create(runner_name) - - runner_with_mount_failure = runner_names[0] - - def exc_cmd_side_effect(*args, **_): - """Mock command to return NOT_A_MOUNTPOINT exit code. - - Args: - args: Positional argument placeholder. - - Returns: - Fake exc_cmd return values. - """ - if args[0][0] == "mountpoint" and runner_with_mount_failure in args[0][2]: - return "", MOUNTPOINT_FAILURE_EXIT_CODE - return "", 0 - - exc_cmd_mock.side_effect = exc_cmd_side_effect - - fs_list = list(shared_fs.list_all()) - - assert len(fs_list) == 2 - assert runner_with_mount_failure not in [fs.runner_name for fs in fs_list] - - -def test_delete_filesystem(): - """ - arrange: Create a shared filesystem for a runner. - act: Call delete - assert: The shared filesystem is deleted. - """ - runner_name = secrets.token_hex(16) - shared_fs.create(runner_name) - - shared_fs.delete(runner_name) - - with pytest.raises(GetMetricsStorageError): - shared_fs.get(runner_name) - - -def test_delete_raises_error(): - """ - arrange: Nothing. - act: Call delete. - assert: A DeleteMetricsStorageError is raised. - """ - runner_name = secrets.token_hex(16) - - with pytest.raises(DeleteMetricsStorageError): - shared_fs.delete(runner_name) - - -def test_delete_filesystem_ignores_unmounted_filesystem(exc_cmd_mock: MagicMock): - """ - arrange: Create a shared filesystem for a runner and mock mountpoint cmd \ - to return NOT_A_MOUNTPOINT exit code. - act: Call delete. - assert: The shared filesystem is deleted. - """ - runner_name = secrets.token_hex(16) - shared_fs.create(runner_name) - - exc_cmd_mock.side_effect = exc_cmd_side_effect - - shared_fs.delete(runner_name) - - with pytest.raises(GetMetricsStorageError): - shared_fs.get(runner_name) - - -def test_get_shared_filesystem(): - """ - arrange: Given a runner name. - act: Call create and get. - assert: A metrics storage object for this runner is returned. - """ - runner_name = secrets.token_hex(16) - - shared_fs.create(runner_name) - fs = shared_fs.get(runner_name) - - assert isinstance(fs, MetricsStorage) - assert fs.runner_name == runner_name - - -def test_get_raises_error_if_not_found(): - """ - arrange: Nothing. - act: Call get. - assert: A GetMetricsStorageError is raised. - """ - runner_name = secrets.token_hex(16) - - with pytest.raises(GetMetricsStorageError): - shared_fs.get(runner_name) - - -def test_get_mounts_if_unmounted(filesystem_paths: dict[str, Path], exc_cmd_mock: MagicMock): - """ - arrange: Given a runner name and a mock mountpoint cmd which returns NOT_A_MOUNTPOINT \ - exit code. - act: Call create and get. - assert: The shared filesystem is mounted. - """ - runner_name = secrets.token_hex(16) - shared_fs.create(runner_name) - - exc_cmd_mock.side_effect = exc_cmd_side_effect - - shared_fs.get(runner_name) - - exc_cmd_mock.assert_any_call( - [ - "sudo", - "mount", - "-o", - "loop", - str(shared_fs._get_runner_image_path(runner_name)), - str(filesystem_paths["base"] / runner_name), - ], - check_exit=True, - ) - - -def test_move_to_quarantine(metrics_storage_mock: MagicMock): - """ - arrange: Given a runner name. - act: Call move_to_quarantine. - assert: The method is called on the metrics storage manager. - """ - runner_name = secrets.token_hex(16) - - shared_fs.move_to_quarantine(runner_name) - - metrics_storage_mock.move_to_quarantine.assert_called_once_with(runner_name) diff --git a/tox.ini b/tox.ini index f58f1cb86..181e4073a 100644 --- a/tox.ini +++ b/tox.ini @@ -9,9 +9,8 @@ envlist = lint, unit, static, coverage-report [vars] src_path = {toxinidir}/src/ tst_path = {toxinidir}/tests/ -scripts_path = {toxinidir}/scripts/ github_runner_manager_path = {toxinidir}/github-runner-manager/ -all_path = {[vars]src_path} {[vars]tst_path} {[vars]scripts_path} +all_path = {[vars]src_path} {[vars]tst_path} [testenv] @@ -72,8 +71,8 @@ commands = isort --check-only --diff {[vars]all_path} black --check --diff {[vars]all_path} mypy {[vars]all_path} - pylint {[vars]src_path} {[vars]scripts_path} - pydocstyle {[vars]src_path} {[vars]scripts_path} + pylint {[vars]src_path} + pydocstyle {[vars]src_path} [testenv:unit] description = Run unit tests