Skip to content

Commit

Permalink
adds improved stacey conditions for acoustic and elastic domains; upd…
Browse files Browse the repository at this point in the history
…ates seismic unix output; adds ipati_water model option
  • Loading branch information
Daniel Peter committed Sep 3, 2012
1 parent be6dff4 commit a3712c1
Show file tree
Hide file tree
Showing 47 changed files with 2,305 additions and 1,620 deletions.
14 changes: 6 additions & 8 deletions src/cuda/check_fields_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -752,8 +752,7 @@ __global__ void get_maximum_kernel(realw* array, int size, realw* d_max){
extern "C"
void FC_FUNC_(get_norm_acoustic_from_device,
GET_NORM_ACOUSTIC_FROM_DEVICE)(realw* norm,
long* Mesh_pointer_f,
int* SIMULATION_TYPE) {
long* Mesh_pointer_f) {

TRACE("get_norm_acoustic_from_device");
//double start_time = get_time();
Expand Down Expand Up @@ -816,9 +815,9 @@ TRACE("get_norm_acoustic_from_device");
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);

if(*SIMULATION_TYPE == 1 ){
if(mp->simulation_type == 1 ){
get_maximum_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,size,d_max);
}else if(*SIMULATION_TYPE == 3 ){
}else if(mp->simulation_type == 3 ){
get_maximum_kernel<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,size,d_max);
}

Expand Down Expand Up @@ -925,8 +924,7 @@ __global__ void get_maximum_vector_kernel(realw* array, int size, realw* d_max){
extern "C"
void FC_FUNC_(get_norm_elastic_from_device,
GET_NORM_ELASTIC_FROM_DEVICE)(realw* norm,
long* Mesh_pointer_f,
int* SIMULATION_TYPE) {
long* Mesh_pointer_f) {

TRACE("get_norm_elastic_from_device");
//double start_time = get_time();
Expand Down Expand Up @@ -957,9 +955,9 @@ void FC_FUNC_(get_norm_elastic_from_device,
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);

if(*SIMULATION_TYPE == 1 ){
if(mp->simulation_type == 1 ){
get_maximum_vector_kernel<<<grid,threads>>>(mp->d_displ,size,d_max);
}else if(*SIMULATION_TYPE == 3 ){
}else if(mp->simulation_type == 3 ){
get_maximum_vector_kernel<<<grid,threads>>>(mp->d_b_displ,size,d_max);
}

Expand Down
2 changes: 0 additions & 2 deletions src/cuda/compute_add_sources_acoustic_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ void FC_FUNC_(compute_add_sources_ac_cuda,
COMPUTE_ADD_SOURCES_AC_CUDA)(long* Mesh_pointer_f,
int* phase_is_innerf,
int* NSOURCESf,
int* SIMULATION_TYPEf,
double* h_stf_pre_compute,
int* myrankf) {

Expand Down Expand Up @@ -153,7 +152,6 @@ void FC_FUNC_(compute_add_sources_ac_s3_cuda,
COMPUTE_ADD_SOURCES_AC_s3_CUDA)(long* Mesh_pointer_f,
int* phase_is_innerf,
int* NSOURCESf,
int* SIMULATION_TYPEf,
double* h_stf_pre_compute,
int* myrankf) {

Expand Down
145 changes: 137 additions & 8 deletions src/cuda/compute_coupling_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -117,15 +117,13 @@ extern "C"
void FC_FUNC_(compute_coupling_ac_el_cuda,
COMPUTE_COUPLING_AC_EL_CUDA)(long* Mesh_pointer_f,
int* phase_is_innerf,
int* num_coupling_ac_el_facesf,
int* SIMULATION_TYPEf) {
int* num_coupling_ac_el_facesf) {
TRACE("compute_coupling_ac_el_cuda");
//double start_time = get_time();

Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
int phase_is_inner = *phase_is_innerf;
int num_coupling_ac_el_faces = *num_coupling_ac_el_facesf;
int SIMULATION_TYPE = *SIMULATION_TYPEf;

// way 1: exact blocksize to match NGLLSQUARE
int blocksize = NGLL2;
Expand All @@ -152,7 +150,7 @@ void FC_FUNC_(compute_coupling_ac_el_cuda,
phase_is_inner);

// adjoint simulations
if (SIMULATION_TYPE == 3 ){
if (mp->simulation_type == 3 ){
compute_coupling_acoustic_el_kernel<<<grid,threads>>>(mp->d_b_displ,
mp->d_b_potential_dot_dot_acoustic,
num_coupling_ac_el_faces,
Expand Down Expand Up @@ -279,15 +277,13 @@ extern "C"
void FC_FUNC_(compute_coupling_el_ac_cuda,
COMPUTE_COUPLING_EL_AC_CUDA)(long* Mesh_pointer_f,
int* phase_is_innerf,
int* num_coupling_ac_el_facesf,
int* SIMULATION_TYPEf) {
int* num_coupling_ac_el_facesf) {
TRACE("compute_coupling_el_ac_cuda");
//double start_time = get_time();

Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
int phase_is_inner = *phase_is_innerf;
int num_coupling_ac_el_faces = *num_coupling_ac_el_facesf;
int SIMULATION_TYPE = *SIMULATION_TYPEf;

// way 1: exact blocksize to match NGLLSQUARE
int blocksize = 25;
Expand Down Expand Up @@ -319,7 +315,7 @@ void FC_FUNC_(compute_coupling_el_ac_cuda,
mp->d_displ);

// adjoint simulations
if (SIMULATION_TYPE == 3 ){
if (mp->simulation_type == 3 ){
compute_coupling_elastic_ac_kernel<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,
mp->d_b_accel,
num_coupling_ac_el_faces,
Expand All @@ -343,3 +339,136 @@ void FC_FUNC_(compute_coupling_el_ac_cuda,
exit_on_cuda_error("compute_coupling_el_ac_cuda");
#endif
}

/* ----------------------------------------------------------------------------------------------- */

/* OCEANS load on free surface */

/* ----------------------------------------------------------------------------------------------- */


__global__ void compute_coupling_ocean_cuda_kernel(realw* accel,
realw* rmassx,realw* rmassy,realw* rmassz,
realw* rmass_ocean_load,
int num_free_surface_faces,
int* free_surface_ispec,
int* free_surface_ijk,
realw* free_surface_normal,
int* ibool,
int* updated_dof_ocean_load) {
// gets spectral element face id
int igll = threadIdx.x ; // threadIdx.y*blockDim.x will be always = 0 for thread block (25,1,1)
int iface = blockIdx.x + gridDim.x*blockIdx.y;
realw nx,ny,nz;
realw force_normal_comp;

// for all faces on free surface
if( iface < num_free_surface_faces ){

int ispec = free_surface_ispec[iface]-1;

// gets global point index
int i = free_surface_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1; // (1,igll,iface)
int j = free_surface_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1;
int k = free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1;

int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)] - 1;

//if(igll == 0 ) printf("igll %d %d %d %d\n",igll,i,j,k,iglob);

// only update this global point once

// daniel: TODO - there might be better ways to implement a mutex like below,
// and find a workaround to not use the temporary update array.
// atomicExch: returns the old value, i.e. 0 indicates that we still have to do this point

if( atomicExch(&updated_dof_ocean_load[iglob],1) == 0){

// get normal
nx = free_surface_normal[INDEX3(NDIM,NGLL2,0,igll,iface)]; //(1,igll,iface)
ny = free_surface_normal[INDEX3(NDIM,NGLL2,1,igll,iface)];
nz = free_surface_normal[INDEX3(NDIM,NGLL2,2,igll,iface)];

// make updated component of right-hand side
// we divide by rmass() which is 1 / M
// we use the total force which includes the Coriolis term above
force_normal_comp = accel[iglob*3]*nx / rmassx[iglob]
+ accel[iglob*3+1]*ny / rmassy[iglob]
+ accel[iglob*3+2]*nz / rmassz[iglob];

// probably wouldn't need atomicAdd anymore, but just to be sure...
atomicAdd(&accel[iglob*3], + (rmass_ocean_load[iglob] - rmassx[iglob]) * force_normal_comp * nx);
atomicAdd(&accel[iglob*3+1], + (rmass_ocean_load[iglob] - rmassy[iglob]) * force_normal_comp * ny);
atomicAdd(&accel[iglob*3+2], + (rmass_ocean_load[iglob] - rmassz[iglob]) * force_normal_comp * nz);
}
}
}

/* ----------------------------------------------------------------------------------------------- */

extern "C"
void FC_FUNC_(compute_coupling_ocean_cuda,
COMPUTE_COUPLING_OCEAN_CUDA)(long* Mesh_pointer_f) {

TRACE("compute_coupling_ocean_cuda");

Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container

// checks if anything to do
if( mp->num_free_surface_faces == 0 ) return;

// block sizes: exact blocksize to match NGLLSQUARE
int blocksize = NGLL2;

int num_blocks_x = mp->num_free_surface_faces;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}

dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);


// initializes temporary array to zero
print_CUDA_error_if_any(cudaMemset(mp->d_updated_dof_ocean_load,0,
sizeof(int)*mp->NGLOB_AB),88501);

#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("before kernel compute_coupling_ocean_cuda");
#endif

compute_coupling_ocean_cuda_kernel<<<grid,threads,0,mp->compute_stream>>>(mp->d_accel,
mp->d_rmassx,mp->d_rmassy,mp->d_rmassz,
mp->d_rmass_ocean_load,
mp->num_free_surface_faces,
mp->d_free_surface_ispec,
mp->d_free_surface_ijk,
mp->d_free_surface_normal,
mp->d_ibool,
mp->d_updated_dof_ocean_load);
// for backward/reconstructed potentials
if(mp->simulation_type == 3) {
// re-initializes array
print_CUDA_error_if_any(cudaMemset(mp->d_updated_dof_ocean_load,0,
sizeof(int)*mp->NGLOB_AB),88502);

compute_coupling_ocean_cuda_kernel<<<grid,threads,0,mp->compute_stream>>>(mp->d_b_accel,
mp->d_rmassx,mp->d_rmassy,mp->d_rmassz,
mp->d_rmass_ocean_load,
mp->num_free_surface_faces,
mp->d_free_surface_ispec,
mp->d_free_surface_ijk,
mp->d_free_surface_normal,
mp->d_ibool,
mp->d_updated_dof_ocean_load);

}


#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("compute_coupling_ocean_cuda");
#endif
}

Loading

0 comments on commit a3712c1

Please sign in to comment.