From 6af36edc3d32e1de74ae968275dc78f6e375473c Mon Sep 17 00:00:00 2001 From: Gonzalo Reyero Ferreras <87083379+greyerof@users.noreply.github.com> Date: Mon, 5 Feb 2024 17:00:45 +0100 Subject: [PATCH] Github CI: Step to use /mnt for docker storage. (#1857) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Step to use /mnt for docker storage. The job "smoke-tests-container" of the "Test Incoming Changes" workflow was running out of disk space again. Debugging with tmate I saw that there's another 75GB disk mounted (sdb) in /mnt that's not being used. runner@fv-az975-229:~/work/cnf-certification-test/cnf-certification-test$ df -h Filesystem Size Used Avail Use% Mounted on /dev/root 73G 73G 463M 100% / tmpfs 7.9G 172K 7.9G 1% /dev/shm tmpfs 3.2G 1.6M 3.2G 1% /run tmpfs 5.0M 0 5.0M 0% /run/lock /dev/sda15 105M 6.1M 99M 6% /boot/efi /dev/sdb1 74G 4.1G 66G 6% /mnt tmpfs 1.6G 12K 1.6G 1% /run/user/1001 runner@fv-az975-229:~/work/cnf-certification-test/cnf-certification-test$ lsblk NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS loop0 7:0 0 111.9M 1 loop /snap/lxd/24322 loop1 7:1 0 63.9M 1 loop /snap/core20/2105 loop2 7:2 0 40.4M 1 loop /snap/snapd/20671 sda 8:0 0 75G 0 disk ├─sda1 8:1 0 74.9G 0 part / ├─sda14 8:14 0 4M 0 part └─sda15 8:15 0 106M 0 part /boot/efi sdb 8:16 0 75G 0 disk └─sdb1 8:17 0 75G 0 part /mnt I've reconfigured the docker daemon to use a newly created folder /mnt/docker-storage as storage folder, so all the images (including the ones kind uses) should be stored there. With this change, before the cnf test suite starts, this is the new disk usage: runner@fv-az570-650:~/work/cnf-certification-test/cnf-certification-test$ df -h Filesystem Size Used Avail Use% Mounted on /dev/root 73G 54G 20G 74% / tmpfs 7.9G 172K 7.9G 1% /dev/shm tmpfs 3.2G 1.6M 3.2G 1% /run tmpfs 5.0M 0 5.0M 0% /run/lock /dev/sda15 105M 6.1M 99M 6% /boot/efi /dev/sdb1 74G 23G 48G 32% /mnt tmpfs 1.6G 12K 1.6G 1% /run/user/1001 The kind cluster and its containers are now created in /mnt, so now there's 20G of free space left in /. Added the same workaround for the "smoke-tests-local" job. * Added same step to workflow for QE in ubuntu. --- .github/workflows/pre-main.yaml | 28 ++++++++++++++++++++++++++++ .github/workflows/qe-hosted.yml | 18 ++++++++++++++++-- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pre-main.yaml b/.github/workflows/pre-main.yaml index 7f672d5ae..a9642fb6b 100644 --- a/.github/workflows/pre-main.yaml +++ b/.github/workflows/pre-main.yaml @@ -232,6 +232,20 @@ jobs: max_attempts: 3 command: cd ${GITHUB_WORKSPACE}/cnf-certification-test-partner && make bootstrap-cluster && make bootstrap-docker-ubuntu-local && make bootstrap-python-ubuntu-local && pip3 install pyyaml + # Restart docker using /mnt/docker-storage (sdb) instead of /var/lib/docker (sda). + # This step needs to be done right after the partner repo's bootstrap scripts, as they + # overwrite the docker's daemon.json. + - name: Make docker to use /mnt (sdb) for storage + run: | + df -h + lsblk + sudo mkdir /mnt/docker-storage + sudo jq '. +={"data-root" : "/mnt/docker-storage"}' < /etc/docker/daemon.json > /tmp/docker-daemon.json + sudo cp /tmp/docker-daemon.json /etc/docker/daemon.json + cat /etc/docker/daemon.json + sudo systemctl restart docker + sudo ls -la /mnt/docker-storage + - name: Run 'make rebuild-cluster' uses: nick-fields/retry@v3 with: @@ -327,6 +341,20 @@ jobs: max_attempts: 3 command: cd ${GITHUB_WORKSPACE}/cnf-certification-test-partner && make bootstrap-cluster && make bootstrap-docker-ubuntu-local && make bootstrap-python-ubuntu-local && pip3 install pyyaml + # Restart docker using /mnt/docker-storage (sdb) instead of /var/lib/docker (sda). + # This step needs to be done right after the partner repo's bootstrap scripts, as they + # overwrite the docker's daemon.json. + - name: Make docker to use /mnt (sdb) for storage + run: | + df -h + lsblk + sudo mkdir /mnt/docker-storage + sudo jq '. +={"data-root" : "/mnt/docker-storage"}' < /etc/docker/daemon.json > /tmp/docker-daemon.json + sudo cp /tmp/docker-daemon.json /etc/docker/daemon.json + cat /etc/docker/daemon.json + sudo systemctl restart docker + sudo ls -la /mnt/docker-storage + - name: Run 'make rebuild-cluster' uses: nick-fields/retry@v3 with: diff --git a/.github/workflows/qe-hosted.yml b/.github/workflows/qe-hosted.yml index eb845dcb9..fa22ceee8 100644 --- a/.github/workflows/qe-hosted.yml +++ b/.github/workflows/qe-hosted.yml @@ -17,7 +17,7 @@ jobs: runs-on: ubuntu-22.04 strategy: fail-fast: false - matrix: + matrix: suite: [accesscontrol, affiliatedcertification, manageability, networking, lifecycle, performance, platformalteration, observability, operator] env: SHELL: /bin/bash @@ -64,6 +64,20 @@ jobs: max_attempts: 3 command: cd ${GITHUB_WORKSPACE}/cnf-certification-test-partner; make bootstrap-cluster; make make bootstrap-docker-ubuntu-local; make bootstrap-python-ubuntu-local; pip3 install pyyaml + # Restart docker using /mnt/docker-storage (sdb) instead of /var/lib/docker (sda). + # This step needs to be done right after the partner repo's bootstrap scripts, as they + # overwrite the docker's daemon.json. + - name: Make docker to use /mnt (sdb) for storage + run: | + df -h + lsblk + sudo mkdir /mnt/docker-storage + sudo jq '. +={"data-root" : "/mnt/docker-storage"}' < /etc/docker/daemon.json > /tmp/docker-daemon.json + sudo cp /tmp/docker-daemon.json /etc/docker/daemon.json + cat /etc/docker/daemon.json + sudo systemctl restart docker + sudo ls -la /mnt/docker-storage + - name: Run 'make rebuild-cluster' uses: nick-fields/retry@v3 with: @@ -73,7 +87,7 @@ jobs: - name: Install partner resources uses: nick-fields/retry@v3 - with: + with: timeout_minutes: 90 max_attempts: 3 command: cd ${GITHUB_WORKSPACE}/cnf-certification-test-partner; make install-for-qe