From 993808bf1863c37a75771dcdbdc131953409992c Mon Sep 17 00:00:00 2001 From: mloubout Date: Mon, 3 Feb 2025 22:00:55 -0500 Subject: [PATCH 1/2] arch: switch amd openmp offload to native offload arch --- devito/arch/compiler.py | 2 +- tests/test_gpu_common.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/devito/arch/compiler.py b/devito/arch/compiler.py index 58ee41c437..f509f0b580 100644 --- a/devito/arch/compiler.py +++ b/devito/arch/compiler.py @@ -563,7 +563,7 @@ def __init_finalize__(self, **kwargs): if language in ['C', 'openmp']: self.ldflags += ['-target', 'x86_64-pc-linux-gnu'] self.ldflags += ['-fopenmp'] - self.ldflags += ['--offload-arch=%s' % platform.march] + self.ldflags += ['--offload-arch=native'] elif platform in [POWER8, POWER9]: # It doesn't make much sense to use AOMP on Power, but it should work self.cflags.append('-mcpu=native') diff --git a/tests/test_gpu_common.py b/tests/test_gpu_common.py index a22ab93df4..9a6c2d16cf 100644 --- a/tests/test_gpu_common.py +++ b/tests/test_gpu_common.py @@ -643,6 +643,7 @@ def test_streaming_multi_input(self, opt, ntmps): assert np.all(grad.data == grad1.data) + @switchconfig(safe_math=True) def test_streaming_multi_input_conddim_foward(self): nt = 10 grid = Grid(shape=(4, 4)) @@ -671,6 +672,7 @@ def test_streaming_multi_input_conddim_foward(self): assert np.all(v.data == v1.data) + @switchconfig(safe_math=True) def test_streaming_multi_input_conddim_backward(self): nt = 10 grid = Grid(shape=(4, 4)) From 3bf89eb1daf4c448dc84f02a91a44223aff389af Mon Sep 17 00:00:00 2001 From: mloubout Date: Mon, 3 Feb 2025 22:14:05 -0500 Subject: [PATCH 2/2] tests: switch sensitive amd gpu tests to safe math --- .github/workflows/docker-bases.yml | 6 ++-- devito/arch/compiler.py | 2 +- docker/Dockerfile.amd | 4 +++ docker/Dockerfile.devito | 48 ++++++++++++++++-------------- tests/test_gpu_common.py | 2 -- 5 files changed, 35 insertions(+), 27 deletions(-) diff --git a/.github/workflows/docker-bases.yml b/.github/workflows/docker-bases.yml index 31627c0fe1..c0e0360f89 100644 --- a/.github/workflows/docker-bases.yml +++ b/.github/workflows/docker-bases.yml @@ -222,6 +222,10 @@ jobs: file: './docker/Dockerfile.amd' push: true target: 'amdclang' + build-args: | + ROCM_VERSION=5.5.1 + UCX_BRANCH=v1.13.1 + OMPI_BRANCH=v4.1.4 tags: devitocodes/bases:amd - name: AMD HIP image @@ -231,6 +235,4 @@ jobs: file: './docker/Dockerfile.amd' push: true target: 'hip' - build-args: | - arch=hip tags: devitocodes/bases:amd-hip diff --git a/devito/arch/compiler.py b/devito/arch/compiler.py index f509f0b580..58ee41c437 100644 --- a/devito/arch/compiler.py +++ b/devito/arch/compiler.py @@ -563,7 +563,7 @@ def __init_finalize__(self, **kwargs): if language in ['C', 'openmp']: self.ldflags += ['-target', 'x86_64-pc-linux-gnu'] self.ldflags += ['-fopenmp'] - self.ldflags += ['--offload-arch=native'] + self.ldflags += ['--offload-arch=%s' % platform.march] elif platform in [POWER8, POWER9]: # It doesn't make much sense to use AOMP on Power, but it should work self.cflags.append('-mcpu=native') diff --git a/docker/Dockerfile.amd b/docker/Dockerfile.amd index 1cf4183bdc..31aff9ff14 100644 --- a/docker/Dockerfile.amd +++ b/docker/Dockerfile.amd @@ -45,6 +45,10 @@ ENV ROCM_HOME=/opt/rocm \ UCX_HOME=/opt/ucx \ OMPI_HOME=/opt/ompi +# Adding ROCM +ENV PATH=$ROCM_HOME/bin:$PATH \ + LD_LIBRARY_PATH=$ROCM_HOME/lib:$ROCM_HOME/lib/llvm/lib:$LD_LIBRARY_PATH + # Until rocm base has it fixed RUN ln -s /opt/rocm/llvm/bin/offload-arch /opt/rocm/bin/offload-arch | echo "offload-arch already exis" diff --git a/docker/Dockerfile.devito b/docker/Dockerfile.devito index 99b21c87fb..e5a6932ccb 100644 --- a/docker/Dockerfile.devito +++ b/docker/Dockerfile.devito @@ -12,42 +12,46 @@ ARG USER_ID=1000 ARG GROUP_ID=1000 ################## Install devito ############################################ -# Copy Devito -ADD . /app/devito -# Update if outdated -RUN apt-get update +# Update if outdated and install extras +RUN apt-get update && \ + apt-get install -y git cmake libncurses5-dev libncursesw5-dev libdrm-dev libsystemd-dev -# Remove git files -RUN rm -rf /app/devito/.git +# Usefull utilities +# Nvtop +RUN git clone https://github.com/Syllo/nvtop.git /app/nvtop && \ + mkdir -p /app/nvtop/build && cd /app/nvtop/build && \ + cmake .. -DNVIDIA_SUPPORT=ON -DAMDGPU_SUPPORT=ON -DINTEL_SUPPORT=ON && \ + make && make install -# Install pip dependencies and devito as a pip package +# Install pip dependencies RUN python3 -m venv /venv && \ /venv/bin/pip install --no-cache-dir --upgrade pip && \ /venv/bin/pip install --no-cache-dir jupyter && \ /venv/bin/pip install --no-cache-dir wheel && \ - eval "$MPI4PY_FLAGS /venv/bin/pip install --no-cache-dir -r /app/devito/requirements-mpi.txt" && \ - /venv/bin/pip install --no-cache-dir -e /app/devito[extras,tests] && \ - rm -rf ~/.cache/pip - -# Usefull utilities -# Nvtop -RUN apt-get install -y git cmake libncurses5-dev libncursesw5-dev libdrm-dev libsystemd-dev cmake && \ - git clone https://github.com/Syllo/nvtop.git /app/nvtop && \ - mkdir -p /app/nvtop/build && cd /app/nvtop/build && \ - cmake .. -DNVIDIA_SUPPORT=ON -DAMDGPU_SUPPORT=ON -DINTEL_SUPPORT=ON && \ - make && \ - make install && \ ln -fs /app/nvtop/build/src/nvtop /venv/bin/nvtop -# Safety cleanup -RUN apt-get clean && apt-get autoclean && apt-get autoremove -y && \ - rm -rf /var/lib/apt/lists/* +# Copy Devito +ADD . /app/devito + +# Remove git files +RUN rm -rf /app/devito/.git + +# Mpi4py +RUN eval "$MPI4PY_FLAGS /venv/bin/pip install --no-cache-dir --verbose -r /app/devito/requirements-mpi.txt" + +# Devito +RUN /venv/bin/pip install --no-cache-dir -e /app/devito[extras,tests] && rm -rf ~/.cache/pip FROM $base as user # COPY is much faster than RUN chown by order of magnitude so we have a final step that # just copies the built image into the user. +# Last installs (such as gdb needed in user mode) and cleanup +RUN apt-get update && apt install gdb -y && \ + apt-get clean && apt-get autoclean && apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* + # User/Group Ids ARG USER_ID=1000 ARG GROUP_ID=1000 diff --git a/tests/test_gpu_common.py b/tests/test_gpu_common.py index 9a6c2d16cf..a22ab93df4 100644 --- a/tests/test_gpu_common.py +++ b/tests/test_gpu_common.py @@ -643,7 +643,6 @@ def test_streaming_multi_input(self, opt, ntmps): assert np.all(grad.data == grad1.data) - @switchconfig(safe_math=True) def test_streaming_multi_input_conddim_foward(self): nt = 10 grid = Grid(shape=(4, 4)) @@ -672,7 +671,6 @@ def test_streaming_multi_input_conddim_foward(self): assert np.all(v.data == v1.data) - @switchconfig(safe_math=True) def test_streaming_multi_input_conddim_backward(self): nt = 10 grid = Grid(shape=(4, 4))