Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ci] Run scheduler unittests with GitHub Actions #3268

Merged
merged 10 commits into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 114 additions & 0 deletions .github/pseudo-cluster/docker-compose.yml
teojgo marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
services:
munge-key-generator:
image: ghcr.io/reframe-hpc/munge-ubuntu:20.04
hostname: munge-host
healthcheck:
test: ["CMD-SHELL", "test -f /scratch/munge.key"]
interval: 10s
timeout: 10s
retries: 5
volumes:
- shared-scratch:/scratch

frontend:
image: slurm-reframe
container_name: frontend
build:
dockerfile: .github/pseudo-cluster/reframe/Dockerfile
context: ../../
hostname: login
user: admin
init: True
volumes:
- shared-home:/home/admin:rw
- shared-scratch:/scratch:rw
links:
- slurm-master
depends_on:
munge-key-generator:
condition: service_healthy
slurm-master:
condition: service_started
node0:
condition: service_started
node1:
condition: service_started
node2:
condition: service_started
environment:
- SLURM_CPUS_ON_NODE=1
- BACKEND=${BACKEND:-squeue}

slurm-master:
image: ghcr.io/reframe-hpc/slurm-master-ubuntu:20.04
hostname: slurm-master
user: admin
volumes:
- shared-home:/home/admin
- shared-scratch:/scratch:rw
depends_on:
munge-key-generator:
condition: service_healthy
environment:
- SLURM_CPUS_ON_NODE=1

node0:
image: ghcr.io/reframe-hpc/slurm-node-ubuntu:20.04
hostname: nid00
container_name: slurm-node0
user: admin
volumes:
- shared-home:/home/admin
- shared-scratch:/scratch:rw
environment:
- SLURM_NODENAME=nid00
- SLURM_CPUS_ON_NODE=1
depends_on:
munge-key-generator:
condition: service_healthy
slurm-master:
condition: service_started
links:
- slurm-master

node1:
image: ghcr.io/reframe-hpc/slurm-node-ubuntu:20.04
hostname: nid01
container_name: slurm-node1
user: admin
volumes:
- shared-home:/home/admin
- shared-scratch:/scratch:rw
environment:
- SLURM_NODENAME=nid01
- SLURM_CPUS_ON_NODE=1
depends_on:
munge-key-generator:
condition: service_healthy
slurm-master:
condition: service_started
links:
- slurm-master

node2:
image: ghcr.io/reframe-hpc/slurm-node-ubuntu:20.04
hostname: nid02
container_name: slurm-node2
user: admin
volumes:
- shared-home:/home/admin
- shared-scratch:/scratch:rw
environment:
- SLURM_NODENAME=nid02
- SLURM_CPUS_ON_NODE=1
depends_on:
munge-key-generator:
condition: service_healthy
slurm-master:
condition: service_started
links:
- slurm-master

volumes:
shared-home:
shared-scratch:
42 changes: 42 additions & 0 deletions .github/pseudo-cluster/reframe/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
FROM ubuntu:20.04

ARG DEBIAN_FRONTEND=noninteractive

RUN apt update -y && \
apt install -y \
build-essential \
clang jq libomp-dev tree vim \
git \
mariadb-client \
munge \
slurm-client \
slurm-wlm-torque \
sudo \
python3 \
python3-pip \
wget \
curl \
mpich \
libmpich-dev && \
rm -rf /var/lib/apt/lists/*

RUN useradd -m admin -s /usr/bin/bash -d /home/admin && \
echo "admin:admin" | chpasswd && adduser admin sudo && \
echo "admin ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers

COPY .github/pseudo-cluster/reframe/slurm.conf /etc/slurm-llnl/
COPY .github/pseudo-cluster/reframe/cgroup.conf /etc/slurm-llnl/
COPY .github/pseudo-cluster/reframe/docker-entrypoint.sh /etc/slurm-llnl/
COPY . /usr/local/share/reframe

RUN mkdir /scratch && \
chown -R admin:admin /scratch

RUN chmod +rx /etc/slurm-llnl/docker-entrypoint.sh

WORKDIR /home/admin

ENV USER admin
ENV SHELL bash

ENTRYPOINT ["/etc/slurm-llnl/docker-entrypoint.sh"]
1 change: 1 addition & 0 deletions .github/pseudo-cluster/reframe/cgroup.conf
24 changes: 24 additions & 0 deletions .github/pseudo-cluster/reframe/docker-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash

trap exit 0 INT

while [ ! -f /scratch/munge.key ]
do
sleep 1
done

sudo cp /scratch/munge.key /etc/munge/munge.key
sudo service munge start
sudo sed -i "s/REPLACE_IT/CPUs=${SLURM_CPUS_ON_NODE}/g" /etc/slurm-llnl/slurm.conf

# Needs to be copied in the shared home directory
cp -r /usr/local/share/reframe .
cd reframe
./bootstrap.sh

echo "Running unittests with backend scheduler: ${BACKEND}"

tempdir=$(mktemp -d -p /scratch)
TMPDIR=$tempdir ./test_reframe.py -v \
--rfm-user-config=ci-scripts/configs/ci-cluster.py \
--rfm-user-system=pseudo-cluster:compute-${BACKEND:-squeue}
1 change: 1 addition & 0 deletions .github/pseudo-cluster/reframe/slurm.conf
24 changes: 24 additions & 0 deletions .github/workflows/test-schedulers.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: ReFrame CI / Scheduler backend tests
on:
pull_request: []

jobs:
scheduler-test:
runs-on: ubuntu-latest
strategy:
matrix:
scheduler: ['pbs', 'squeue', 'torque']
steps:
- uses: actions/checkout@v4
- name: Login to GitHub Container Registry
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build Images
run: |
docker compose -f .github/pseudo-cluster/docker-compose.yml build
- name: Run Unittests with ${{ matrix.scheduler }} sceduler
run: |
BACKEND=${{ matrix.scheduler }} docker compose -f .github/pseudo-cluster/docker-compose.yml up --abort-on-container-exit --exit-code-from frontend
169 changes: 0 additions & 169 deletions Jenkinsfile

This file was deleted.

Loading
Loading