From 72deb8c26ee901d2a56e8400b72c29a12f59866c Mon Sep 17 00:00:00 2001 From: Daniel Peter Date: Tue, 13 Apr 2021 15:52:42 +0300 Subject: [PATCH] updates compilation for cuda/hip code --- Makefile.in | 2 +- configure | 38 +++++++++---------- configure.ac | 4 +- src/gpu/kernels/compute_gradient_kernel.cpp | 30 --------------- ...nt_kernel.cu => compute_gradient_kernel.h} | 6 ++- .../compute_kernels_acoustic_kernel.cpp | 2 +- .../compute_kernels_acoustic_kernel.cu | 3 ++ .../compute_kernels_hess_ac_cudakernel.cpp | 2 +- .../compute_kernels_hess_ac_cudakernel.cu | 3 ++ src/gpu/kernels/kernel_cuda.mk | 1 - src/gpu/kernels/kernel_proto.cu.h | 5 --- src/gpu/prepare_constants_cuda.h | 18 ++++----- src/gpu/rules.mk | 2 +- .../create_specfem3D_gpu_cuda_method_stubs.pl | 2 +- 14 files changed, 46 insertions(+), 72 deletions(-) delete mode 100644 src/gpu/kernels/compute_gradient_kernel.cpp rename src/gpu/kernels/{compute_gradient_kernel.cu => compute_gradient_kernel.h} (95%) diff --git a/Makefile.in b/Makefile.in index 672517902..81336766c 100644 --- a/Makefile.in +++ b/Makefile.in @@ -263,7 +263,7 @@ GENCODE_AMD_MI100 = --amdgpu-target=gfx908 @COND_HIP_TRUE@@COND_HIP_PLATFORM_AMD_TRUE@HIP_CFLAG_ENDING = -x hip # NVIDIA default Tesla @COND_HIP_TRUE@@COND_HIP_PLATFORM_NVIDIA_TRUE@GENCODE_HIP = $(GENCODE_30) -@COND_HIP_TRUE@@COND_HIP_PLATFORM_AMD_TRUE@HIP_CFLAG_ENDING = -x cu +@COND_HIP_TRUE@@COND_HIP_PLATFORM_NVIDIA_TRUE@HIP_CFLAG_ENDING = # no need for ending # specific targets @COND_HIP_TRUE@@COND_HIP_MI8_TRUE@GENCODE_HIP = $(GENCODE_AMD_MI8) # --with-hip=MI8 .. diff --git a/configure b/configure index 6664369be..606100e0c 100755 --- a/configure +++ b/configure @@ -630,13 +630,13 @@ VTK_INCLUDES VTK_MAJOR OMP_LIB OMP_FCFLAGS -HIP_LIBS -HIP_LDFLAGS -HIP_CPPFLAGS COND_HIP_PLATFORM_NVIDIA_FALSE COND_HIP_PLATFORM_NVIDIA_TRUE COND_HIP_PLATFORM_AMD_FALSE COND_HIP_PLATFORM_AMD_TRUE +HIP_LIBS +HIP_LDFLAGS +HIP_CPPFLAGS HIPCONFIG_PROG HIPCC_PROG HIP_LIB @@ -8488,22 +8488,6 @@ $as_echo "$as_me: HIP_FLAGS: ${HIP_FLAGS}" >&6;} ;; GPU_PLATFORM=amd fi - if test x"$GPU_PLATFORM" = xamd; then - COND_HIP_PLATFORM_AMD_TRUE= - COND_HIP_PLATFORM_AMD_FALSE='#' -else - COND_HIP_PLATFORM_AMD_TRUE='#' - COND_HIP_PLATFORM_AMD_FALSE= -fi - - if test x"$GPU_PLATFORM" = xnvidia; then - COND_HIP_PLATFORM_NVIDIA_TRUE= - COND_HIP_PLATFORM_NVIDIA_FALSE='#' -else - COND_HIP_PLATFORM_NVIDIA_TRUE='#' - COND_HIP_PLATFORM_NVIDIA_FALSE= -fi - # for compilation errors like: # /usr/bin/ld: obj/assemble_MPI_scalar_gpu.hip.o: relocation R_X86_64_32 against `.rodata.str1.1' can not be used @@ -8699,6 +8683,22 @@ ac_compiler_gnu=$ac_cv_fc_compiler_gnu fi + if test x"$GPU_PLATFORM" = xamd; then + COND_HIP_PLATFORM_AMD_TRUE= + COND_HIP_PLATFORM_AMD_FALSE='#' +else + COND_HIP_PLATFORM_AMD_TRUE='#' + COND_HIP_PLATFORM_AMD_FALSE= +fi + + if test x"$GPU_PLATFORM" = xnvidia; then + COND_HIP_PLATFORM_NVIDIA_TRUE= + COND_HIP_PLATFORM_NVIDIA_FALSE='#' +else + COND_HIP_PLATFORM_NVIDIA_TRUE='#' + COND_HIP_PLATFORM_NVIDIA_FALSE= +fi + ### diff --git a/configure.ac b/configure.ac index f123e240d..2855e0184 100644 --- a/configure.ac +++ b/configure.ac @@ -562,8 +562,6 @@ AS_IF([test x"$want_hip" != xno], [ AS_IF([test x"$GPU_PLATFORM" = x],[ GPU_PLATFORM=amd ]) - AM_CONDITIONAL([COND_HIP_PLATFORM_AMD], [test x"$GPU_PLATFORM" = xamd]) - AM_CONDITIONAL([COND_HIP_PLATFORM_NVIDIA], [test x"$GPU_PLATFORM" = xnvidia]) # for compilation errors like: # /usr/bin/ld: obj/assemble_MPI_scalar_gpu.hip.o: relocation R_X86_64_32 against `.rodata.str1.1' can not be used @@ -661,6 +659,8 @@ AS_IF([test x"$want_hip" != xno], [ AC_SUBST([HIP_LDFLAGS]) AC_SUBST([HIP_LIBS]) ]) +AM_CONDITIONAL([COND_HIP_PLATFORM_AMD], [test x"$GPU_PLATFORM" = xamd]) +AM_CONDITIONAL([COND_HIP_PLATFORM_NVIDIA], [test x"$GPU_PLATFORM" = xnvidia]) ### diff --git a/src/gpu/kernels/compute_gradient_kernel.cpp b/src/gpu/kernels/compute_gradient_kernel.cpp deleted file mode 100644 index 1e0d6e483..000000000 --- a/src/gpu/kernels/compute_gradient_kernel.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/* -!===================================================================== -! -! S p e c f e m 3 D V e r s i o n 3 . 0 -! --------------------------------------- -! -! Main historical authors: Dimitri Komatitsch and Jeroen Tromp -! CNRS, France -! and Princeton University, USA -! (there are currently many more authors!) -! (c) October 2017 -! -! This program is free software; you can redistribute it and/or modify -! it under the terms of the GNU General Public License as published by -! the Free Software Foundation; either version 3 of the License, or -! (at your option) any later version. -! -! This program is distributed in the hope that it will be useful, -! but WITHOUT ANY WARRANTY; without even the implied warranty of -! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -! GNU General Public License for more details. -! -! You should have received a copy of the GNU General Public License along -! with this program; if not, write to the Free Software Foundation, Inc., -! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -! -!===================================================================== -*/ - -//#include "compute_gradient_kernel.cu" diff --git a/src/gpu/kernels/compute_gradient_kernel.cu b/src/gpu/kernels/compute_gradient_kernel.h similarity index 95% rename from src/gpu/kernels/compute_gradient_kernel.cu rename to src/gpu/kernels/compute_gradient_kernel.h index 5f189f388..f7b92b07c 100644 --- a/src/gpu/kernels/compute_gradient_kernel.cu +++ b/src/gpu/kernels/compute_gradient_kernel.h @@ -27,9 +27,12 @@ !===================================================================== */ +#ifndef COMPUTE_GRADIENT_GPU_H +#define COMPUTE_GRADIENT_GPU_H + // needed in compute_kernels_acoustic_kernel.cu and compute_kernels_hess_ac_cudakernel.cu -__device__ void compute_gradient_kernel(int ijk, +__device__ __forceinline__ void compute_gradient_kernel(int ijk, int ispec,int ispec_irreg, field* scalar_field, field* vector_field_loc, @@ -111,3 +114,4 @@ __device__ void compute_gradient_kernel(int ijk, } +#endif // COMPUTE_GRADIENT_GPU_H diff --git a/src/gpu/kernels/compute_kernels_acoustic_kernel.cpp b/src/gpu/kernels/compute_kernels_acoustic_kernel.cpp index 00616e8ad..593e7dfd5 100644 --- a/src/gpu/kernels/compute_kernels_acoustic_kernel.cpp +++ b/src/gpu/kernels/compute_kernels_acoustic_kernel.cpp @@ -28,6 +28,6 @@ */ // includes device function compute_gradient_kernel() -#include "compute_gradient_kernel.cu" +#include "compute_gradient_kernel.h" #include "compute_kernels_acoustic_kernel.cu" diff --git a/src/gpu/kernels/compute_kernels_acoustic_kernel.cu b/src/gpu/kernels/compute_kernels_acoustic_kernel.cu index 11f8e87ec..07d19e612 100644 --- a/src/gpu/kernels/compute_kernels_acoustic_kernel.cu +++ b/src/gpu/kernels/compute_kernels_acoustic_kernel.cu @@ -27,6 +27,9 @@ !===================================================================== */ +// includes device function compute_gradient_kernel() +#include "compute_gradient_kernel.h" + __global__ void compute_kernels_acoustic_kernel(int* ispec_is_acoustic, int* d_ibool, diff --git a/src/gpu/kernels/compute_kernels_hess_ac_cudakernel.cpp b/src/gpu/kernels/compute_kernels_hess_ac_cudakernel.cpp index 86e028b31..94f8c09a6 100644 --- a/src/gpu/kernels/compute_kernels_hess_ac_cudakernel.cpp +++ b/src/gpu/kernels/compute_kernels_hess_ac_cudakernel.cpp @@ -28,6 +28,6 @@ */ // includes device function compute_gradient_kernel() -#include "compute_gradient_kernel.cu" +#include "compute_gradient_kernel.h" #include "compute_kernels_hess_ac_cudakernel.cu" diff --git a/src/gpu/kernels/compute_kernels_hess_ac_cudakernel.cu b/src/gpu/kernels/compute_kernels_hess_ac_cudakernel.cu index 5e5e706dc..f89f82ca2 100644 --- a/src/gpu/kernels/compute_kernels_hess_ac_cudakernel.cu +++ b/src/gpu/kernels/compute_kernels_hess_ac_cudakernel.cu @@ -27,6 +27,9 @@ !===================================================================== */ +// includes device function compute_gradient_kernel() +#include "compute_gradient_kernel.h" + __global__ void compute_kernels_hess_ac_cudakernel(int* ispec_is_acoustic, int* d_ibool, diff --git a/src/gpu/kernels/kernel_cuda.mk b/src/gpu/kernels/kernel_cuda.mk index c12f3891a..cf787091e 100644 --- a/src/gpu/kernels/kernel_cuda.mk +++ b/src/gpu/kernels/kernel_cuda.mk @@ -13,7 +13,6 @@ cuda_kernels_OBJS := \ $O/compute_dynamic_fault_cuda.cuda-kernel.o \ $O/compute_elastic_seismogram_kernel.cuda-kernel.o \ $O/compute_element_strain_cudakernel.cuda-kernel.o \ - $O/compute_gradient_kernel.cuda-kernel.o \ $O/compute_kernels_acoustic_kernel.cuda-kernel.o \ $O/compute_kernels_ani_cudakernel.cuda-kernel.o \ $O/compute_kernels_cudakernel.cuda-kernel.o \ diff --git a/src/gpu/kernels/kernel_proto.cu.h b/src/gpu/kernels/kernel_proto.cu.h index 7f2860348..598f60ede 100644 --- a/src/gpu/kernels/kernel_proto.cu.h +++ b/src/gpu/kernels/kernel_proto.cu.h @@ -603,11 +603,6 @@ __global__ void compute_element_strain_cudakernel(int* ispec_is_elastic, int NSPEC_AB) ; -// -// src/gpu/kernels/compute_gradient_kernel.cu -// - - // // src/gpu/kernels/compute_kernels_acoustic_kernel.cu // diff --git a/src/gpu/prepare_constants_cuda.h b/src/gpu/prepare_constants_cuda.h index a25cba8d7..f9e39bba1 100644 --- a/src/gpu/prepare_constants_cuda.h +++ b/src/gpu/prepare_constants_cuda.h @@ -110,15 +110,15 @@ __device__ __constant__ realw d_wgll_cube[NGLL3]; // needed only for gravity cas // If not added dummy kernel, optimizer will delete constant variables, // because they have not used in any of the kernel. __global__ void dummy_kernel(){ - d_hprime_xx[0]=1; -// d_hprime_yy[0]=1; -// d_hprime_zz[0]=1; - d_hprimewgll_xx[0]=1; -// d_hprimewgll_yy[0]=1; -// d_hprimewgll_zz[0]=1; - d_wgllwgll_xy[0]=1; - d_wgllwgll_xz[0]=1; - d_wgllwgll_yz[0]=1; + d_hprime_xx[0]=1; + //d_hprime_yy[0]=1; + //d_hprime_zz[0]=1; + d_hprimewgll_xx[0]=1; + //d_hprimewgll_yy[0]=1; + //d_hprimewgll_zz[0]=1; + d_wgllwgll_xy[0]=1; + d_wgllwgll_xz[0]=1; + d_wgllwgll_yz[0]=1; } #endif diff --git a/src/gpu/rules.mk b/src/gpu/rules.mk index cfdef3bf5..77f9c38d2 100644 --- a/src/gpu/rules.mk +++ b/src/gpu/rules.mk @@ -144,7 +144,7 @@ ifeq ($(CUDA),yes) endif ## HIP compilation -HIPCC_CFLAGS := ${HIP_CFLAGS} ${HIP_CFLAG_ENDING} # adding either -x hip or -x cu depending on platform +HIPCC_CFLAGS := ${HIP_CFLAGS} ${HIP_CFLAG_ENDING} # adding -x hip depending on platform ifeq ($(HIP), yes) BUILD_VERSION_TXT += HIP SELECTOR_CFLAG += $(FC_DEFINE)USE_HIP diff --git a/utils/create_specfem3D_gpu_cuda_method_stubs.pl b/utils/create_specfem3D_gpu_cuda_method_stubs.pl index 4f13df220..785e3747e 100755 --- a/utils/create_specfem3D_gpu_cuda_method_stubs.pl +++ b/utils/create_specfem3D_gpu_cuda_method_stubs.pl @@ -57,7 +57,7 @@ END $warning = <