Skip to content

Commit

Permalink
GPU version of interface-discontinuity-based wavefield injection, wit…
Browse files Browse the repository at this point in the history
…h script to run the small example updated
  • Loading branch information
Tianshi Liu committed Dec 27, 2024
1 parent 0fb684c commit 3c62f5f
Show file tree
Hide file tree
Showing 19 changed files with 359 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ module load intel/2020u4 intelmpi/2020u4

## set number of processors
NPROC=4
GPU_MODE=false # set to true if using GPU

currentdir=`pwd`

Expand Down Expand Up @@ -95,10 +96,19 @@ mpirun -np $NPROC ./bin/xgenerate_databases
# DATABASES_MPI/proc*_wavefield_discontinuity.bin files
mpirun -np $NPROC ../fk_coupling/compute_fk_injection_field

echo
echo " launch solver..."
echo
mpirun -np $NPROC ./bin/xspecfem3D
if ${GPU_MODE}; then
sed -i "/^GPU_MODE/c\GPU_MODE = .true." DATA/Par_file
echo
echo " launch solver on GPU..."
echo
CUDA_VISIBLE_DEVICES=0 mpirun -np $NPROC ./bin/xspecfem3D
else
sed -i "/^GPU_MODE/c\GPU_MODE = .false." DATA/Par_file
echo
echo " launch solver on CPU ..."
echo
mpirun -np $NPROC ./bin/xspecfem3D
fi

# compute reference seismograms using FK
../fk_coupling/compute_fk_receiver
Expand Down
16 changes: 16 additions & 0 deletions src/gpu/compute_forces_viscoelastic_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -990,6 +990,10 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat,
mp->d_hprimewgll_xx,
mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
d_kappav, d_muv,
mp->is_wavefield_discontinuity,
mp->d_displ_wd,
mp->d_ispec_to_elem_wd,
mp->d_ibool_wd,
mp->pml_conditions,
mp->d_is_CPML,
FORWARD_OR_ADJOINT);
Expand All @@ -1013,6 +1017,10 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat,
mp->d_hprimewgll_xx,
mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
d_kappav, d_muv,
mp->is_wavefield_discontinuity,
mp->d_displ_wd,
mp->d_ispec_to_elem_wd,
mp->d_ibool_wd,
mp->pml_conditions,
mp->d_is_CPML,
FORWARD_OR_ADJOINT);
Expand All @@ -1039,6 +1047,10 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat,
mp->d_hprimewgll_xx,
mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
d_kappav, d_muv,
mp->is_wavefield_discontinuity,
mp->d_displ_wd,
mp->d_ispec_to_elem_wd,
mp->d_ibool_wd,
mp->pml_conditions,
mp->d_is_CPML,
3); // 3 == backward
Expand All @@ -1063,6 +1075,10 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat,
mp->d_hprimewgll_xx,
mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
d_kappav, d_muv,
mp->is_wavefield_discontinuity,
mp->d_displ_wd,
mp->d_ispec_to_elem_wd,
mp->d_ibool_wd,
mp->pml_conditions,
mp->d_is_CPML,
3); // 3 == backward
Expand Down
29 changes: 29 additions & 0 deletions src/gpu/kernels/Kernel_2_viscoelastic_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,28 @@ __device__ __forceinline__ void load_shared_memory_displ_visco(const int* tx, c

/* ----------------------------------------------------------------------------------------------- */

// add displacement discontinuity in case of wavefield discontinuity

__device__ __forceinline__ void add_displacement_discontinuity_element(
const int* tx, const int* working_element,
realw_const_p d_displ_wd,
const int* ispec_to_elem_wd,
const int* ibool_wd,
realw* sh_displx,
realw* sh_disply,
realw* sh_displz) {
int ispec_wd = ispec_to_elem_wd[(*working_element)]-1;
if (ispec_wd >= 0) {
int offset = ispec_wd * NGLL3_PADDED + (*tx);
int iglob_wd = ibool_wd[offset]-1;
sh_displx[(*tx)] = sh_displx[(*tx)] + d_displ_wd[iglob_wd*3];
sh_disply[(*tx)] = sh_disply[(*tx)] + d_displ_wd[iglob_wd*3 + 1];
sh_displz[(*tx)] = sh_displz[(*tx)] + d_displ_wd[iglob_wd*3 + 2];
}
}

/* ----------------------------------------------------------------------------------------------- */

// loads hprime into shared memory for element

__device__ __forceinline__ void load_shared_memory_hprime(const int* tx,
Expand Down Expand Up @@ -738,6 +760,10 @@ Kernel_2_noatt_iso_impl(const int nb_blocks_to_compute,
realw_const_p d_wgllwgll_xy,realw_const_p d_wgllwgll_xz,realw_const_p d_wgllwgll_yz,
realw_const_p d_kappav,
realw_const_p d_muv,
const int is_wavefield_discontinuity,
realw_const_p d_displ_wd,
const int* d_ispec_to_elem_wd,
const int* d_ibool_wd,
const int pml_conditions,
const int* d_is_CPML,
const int FORWARD_OR_ADJOINT){
Expand Down Expand Up @@ -849,6 +875,9 @@ Kernel_2_noatt_iso_impl(const int nb_blocks_to_compute,
load_shared_memory_displ<3>(&tx,&iglob,d_displ,sh_tempx,sh_tempy,sh_tempz);
}else{
load_shared_memory_displ<1>(&tx,&iglob,d_displ,sh_tempx,sh_tempy,sh_tempz);
if (is_wavefield_discontinuity) {
add_displacement_discontinuity_element(&tx,&working_element,d_displ_wd,d_ispec_to_elem_wd,d_ibool_wd,sh_tempx,sh_tempy,sh_tempz);
}
}
}

Expand Down
1 change: 1 addition & 0 deletions src/gpu/kernels/kernel_cuda.mk
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ cuda_kernels_OBJS := \
$O/transfer_surface_to_host_kernel.cuda-kernel.o \
$O/UpdateDispVeloc_kernel.cuda-kernel.o \
$O/UpdatePotential_kernel.cuda-kernel.o \
$O/wavefield_discontinuity_kernel.cuda-kernel.o \
$(EMPTY_MACRO)


24 changes: 24 additions & 0 deletions src/gpu/kernels/kernel_proto.cu.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ Kernel_2_noatt_iso_impl(const int nb_blocks_to_compute,
realw_const_p d_wgllwgll_xy,realw_const_p d_wgllwgll_xz,realw_const_p d_wgllwgll_yz,
realw_const_p d_kappav,
realw_const_p d_muv,
const int is_wavefield_discontinuity,
realw_const_p d_displ_wd,
const int* d_ispec_to_element_wd,
const int* d_ibool_wd,
const int pml_conditions,
const int* d_is_CPML,
const int FORWARD_OR_ADJOINT);
Expand Down Expand Up @@ -527,6 +531,26 @@ __global__ void compute_coupling_elastic_ac_kernel(field* potential_dot_dot_acou
int backward_simulation) ;


//
// src/gpu/kernels/wavefield_discontinuity_kernel.cu
//

__global__ void add_acceleration_discontinuity_kernel(
realw_const_p accel_wd,
realw_const_p mass_in_wd,
const int* boundary_to_iglob_wd,
const int size, realw* accel
);

__global__ void add_traction_discontinuity_kernel(
realw_const_p traction_wd,
const int* face_ispec_wd,
const int* face_ijk_wd,
realw_const_p face_jacobian2Dw_wd,
const int* d_ibool,
const int size, realw* accel);


//
// src/gpu/kernels/compute_coupling_ocean_cuda_kernel.cu
//
Expand Down
1 change: 1 addition & 0 deletions src/gpu/kernels/wavefield_discontinuity_kernel.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#include "wavefield_discontinuity_kernel.cu"
41 changes: 41 additions & 0 deletions src/gpu/kernels/wavefield_discontinuity_kernel.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
__global__ void add_acceleration_discontinuity_kernel(
realw_const_p accel_wd,
realw_const_p mass_in_wd,
const int* boundary_to_iglob_wd,
const int size, realw* accel
) {
int id = threadIdx.x + (blockIdx.x + blockIdx.y*gridDim.x)*blockDim.x;
int iglob = boundary_to_iglob_wd[id] - 1;
realw mass_in = mass_in_wd[id];
if (id < size) {
accel[iglob*3] = accel[iglob*3] - accel_wd[id*3] * mass_in;
accel[iglob*3 + 1] = accel[iglob*3 + 1] - accel_wd[id*3 + 1] * mass_in;
accel[iglob*3 + 2] = accel[iglob*3 + 2] - accel_wd[id*3 + 2] * mass_in;
}
}

__global__ void add_traction_discontinuity_kernel(
realw_const_p traction_wd,
const int* face_ispec_wd,
const int* face_ijk_wd,
realw_const_p face_jacobian2Dw_wd,
const int* d_ibool,
const int size, realw* accel) {
int igll = threadIdx.x;
int iface_wd = blockIdx.x + gridDim.x*blockIdx.y;
int i, j, k, ispec, iglob;
realw jacobianw;
if (iface_wd < size) {
ispec = face_ispec_wd[iface_wd] - 1;
i = face_ijk_wd[INDEX3(NDIM,NGLL2,0,igll,iface_wd)]-1;
j = face_ijk_wd[INDEX3(NDIM,NGLL2,1,igll,iface_wd)]-1;
k = face_ijk_wd[INDEX3(NDIM,NGLL2,2,igll,iface_wd)]-1;

iglob = d_ibool[INDEX4_PADDED(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1;

jacobianw = face_jacobian2Dw_wd[INDEX2(NGLL2,igll,iface_wd)];
atomicAdd(&accel[iglob*3], traction_wd[INDEX3(NDIM,NGLL2,0,igll,iface_wd)] * jacobianw);
atomicAdd(&accel[iglob*3+1], traction_wd[INDEX3(NDIM,NGLL2,1,igll,iface_wd)] * jacobianw);
atomicAdd(&accel[iglob*3+2], traction_wd[INDEX3(NDIM,NGLL2,2,igll,iface_wd)] * jacobianw);
}
}
14 changes: 14 additions & 0 deletions src/gpu/mesh_constants_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -789,6 +789,20 @@ typedef struct mesh_ {
int use_Kelvin_Voigt_damping;
realw* d_Kelvin_Voigt_eta;

// prescribed wavefield discontinuity on an interface
int is_wavefield_discontinuity;
int* d_ispec_to_elem_wd;
int* d_ibool_wd;
int* d_boundary_to_iglob_wd;
realw* d_mass_in_wd;
int* d_face_ijk_wd;
int* d_face_ispec_wd;
realw* d_face_normal_wd;
realw* d_face_jacobian2Dw_wd;
realw* d_displ_wd;
realw* d_accel_wd;
realw* d_traction_wd;

// for option NB_RUNS_FOR_ACOUSTIC_GPU
int* run_number_of_the_source;

Expand Down
49 changes: 49 additions & 0 deletions src/gpu/prepare_mesh_constants_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ void FC_FUNC_(prepare_constants_device,
int* SAVE_SEISMOGRAMS_ACCELERATION,int* SAVE_SEISMOGRAMS_PRESSURE,
int* h_NB_RUNS_ACOUSTIC_GPU,
int* FAULT_SIMULATION,
int* IS_WAVEFIELD_DISCONTINUITY,
int* UNDO_ATTENUATION_AND_OR_PML,
int* PML_CONDITIONS) {

Expand Down Expand Up @@ -420,6 +421,9 @@ void FC_FUNC_(prepare_constants_device,
// Kelvin_voigt initialization
mp->use_Kelvin_Voigt_damping = 0;

// prescribed wavefield discontinuity
mp->is_wavefield_discontinuity = *IS_WAVEFIELD_DISCONTINUITY;

GPU_ERROR_CHECKING("prepare_constants_device");
}

Expand Down Expand Up @@ -1542,6 +1546,51 @@ void FC_FUNC_(prepare_fault_device,
}


/* ----------------------------------------------------------------------------------------------- */

// wavefield discontinuity

/* ----------------------------------------------------------------------------------------------- */

extern EXTERN_LANG
void FC_FUNC_(prepare_wavefield_discontinuity_device,
PREPARE_WAVEFIELD_DISCONTINUITY_DEVICE)(
long* Mesh_pointer,
int* ispec_to_elem_wd,
int* nglob_wd,
int* nspec_wd,
int* ibool_wd,
int* boundary_to_iglob_wd,
realw* mass_in_wd,
int* nfaces_wd,
int* face_ijk_wd,
int* face_ispec_wd,
realw* face_normal_wd,
realw* face_jacobian2Dw_wd) {
TRACE("prepare_wavefield_discontinuity_device");
Mesh* mp = (Mesh*)(*Mesh_pointer);

// arrays
gpuCreateCopy_todevice_int((void**)&mp->d_ispec_to_elem_wd, ispec_to_elem_wd, mp->NSPEC_AB);
gpuCreateCopy_todevice_int((void**)&mp->d_boundary_to_iglob_wd, boundary_to_iglob_wd, (*nglob_wd));
gpuCreateCopy_todevice_realw((void**)&mp->d_mass_in_wd, mass_in_wd, (*nglob_wd));
gpuCreateCopy_todevice_int((void**)&mp->d_face_ispec_wd, face_ispec_wd, (*nfaces_wd));
gpuCreateCopy_todevice_int((void**)&mp->d_face_ijk_wd, face_ijk_wd, 3*NGLL2*(*nfaces_wd));
gpuCreateCopy_todevice_realw((void**)&mp->d_face_normal_wd, face_normal_wd, NDIM*NGLL2*(*nfaces_wd));
gpuCreateCopy_todevice_realw((void**)&mp->d_face_jacobian2Dw_wd, face_jacobian2Dw_wd, NGLL2*(*nfaces_wd));
// global indexing for wavefield discontinuity points (padded)
int size_padded = NGLL3_PADDED * (*nspec_wd);
gpuMalloc_int((void**) &mp->d_ibool_wd, size_padded);
gpuMemcpy2D_todevice_int(mp->d_ibool_wd, NGLL3_PADDED, ibool_wd, NGLL3, NGLL3, (*nspec_wd));

// allocate discontinuity field
int size = NDIM * (*nglob_wd);
gpuMalloc_realw((void**)&(mp->d_displ_wd),size);
gpuMalloc_realw((void**)&(mp->d_accel_wd),size);
size = NDIM * NGLL2 * (*nfaces_wd);
gpuMalloc_realw((void**)&(mp->d_traction_wd),size);
}

/* ----------------------------------------------------------------------------------------------- */

// cleanup
Expand Down
1 change: 1 addition & 0 deletions src/gpu/rules.mk
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ gpu_specfem3D_OBJECTS = \
$O/transfer_fields_cuda.o \
$O/update_displacement_cuda.o \
$O/write_seismograms_cuda.o \
$O/wavefield_discontinuity_cuda.o \
$(EMPTY_MACRO)

gpu_specfem3D_STUBS = \
Expand Down
23 changes: 23 additions & 0 deletions src/gpu/transfer_fields_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,29 @@ void FC_FUNC_(transfer_pml_displ_from_device,

/* ----------------------------------------------------------------------------------------------- */

// prescribed wavefield discontinuity. this needs to be called in every time step

/* ----------------------------------------------------------------------------------------------- */

extern EXTERN_LANG
void FC_FUNC_(transfer_wavefield_discontinuity_to_device,
TRANSFER_WAVEFIELD_DISCONTINUITY_TO_DEVICE)(
int* size_point, int* size_face,
realw* displ_wd, realw* accel_wd,
realw* traction_wd, long* Mesh_pointer) {

TRACE("transfer_wavefield_discontinuity_to_device");

Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container

gpuMemcpy_todevice_realw(mp->d_displ_wd, displ_wd, (*size_point));
gpuMemcpy_todevice_realw(mp->d_accel_wd, accel_wd, (*size_point));
gpuMemcpy_todevice_realw(mp->d_traction_wd, traction_wd, (*size_face));

}

/* ----------------------------------------------------------------------------------------------- */

extern EXTERN_LANG
void FC_FUNC_(transfer_pml_displ_to_device,
TRANSFER_PML_DISPL_TO_DEVICE)(int* size, realw* PML_displ_old, realw* PML_displ_new, long* Mesh_pointer) {
Expand Down
Loading

0 comments on commit 3c62f5f

Please sign in to comment.