diff --git a/src/cuda/check_fields_cuda.cu b/src/cuda/check_fields_cuda.cu index bd11abd61..f06639c29 100644 --- a/src/cuda/check_fields_cuda.cu +++ b/src/cuda/check_fields_cuda.cu @@ -752,8 +752,7 @@ __global__ void get_maximum_kernel(realw* array, int size, realw* d_max){ extern "C" void FC_FUNC_(get_norm_acoustic_from_device, GET_NORM_ACOUSTIC_FROM_DEVICE)(realw* norm, - long* Mesh_pointer_f, - int* SIMULATION_TYPE) { + long* Mesh_pointer_f) { TRACE("get_norm_acoustic_from_device"); //double start_time = get_time(); @@ -816,9 +815,9 @@ TRACE("get_norm_acoustic_from_device"); dim3 grid(num_blocks_x,num_blocks_y); dim3 threads(blocksize,1,1); - if(*SIMULATION_TYPE == 1 ){ + if(mp->simulation_type == 1 ){ get_maximum_kernel<<>>(mp->d_potential_dot_dot_acoustic,size,d_max); - }else if(*SIMULATION_TYPE == 3 ){ + }else if(mp->simulation_type == 3 ){ get_maximum_kernel<<>>(mp->d_b_potential_dot_dot_acoustic,size,d_max); } @@ -925,8 +924,7 @@ __global__ void get_maximum_vector_kernel(realw* array, int size, realw* d_max){ extern "C" void FC_FUNC_(get_norm_elastic_from_device, GET_NORM_ELASTIC_FROM_DEVICE)(realw* norm, - long* Mesh_pointer_f, - int* SIMULATION_TYPE) { + long* Mesh_pointer_f) { TRACE("get_norm_elastic_from_device"); //double start_time = get_time(); @@ -957,9 +955,9 @@ void FC_FUNC_(get_norm_elastic_from_device, dim3 grid(num_blocks_x,num_blocks_y); dim3 threads(blocksize,1,1); - if(*SIMULATION_TYPE == 1 ){ + if(mp->simulation_type == 1 ){ get_maximum_vector_kernel<<>>(mp->d_displ,size,d_max); - }else if(*SIMULATION_TYPE == 3 ){ + }else if(mp->simulation_type == 3 ){ get_maximum_vector_kernel<<>>(mp->d_b_displ,size,d_max); } diff --git a/src/cuda/compute_add_sources_acoustic_cuda.cu b/src/cuda/compute_add_sources_acoustic_cuda.cu index 14686f87b..756f4eacd 100644 --- a/src/cuda/compute_add_sources_acoustic_cuda.cu +++ b/src/cuda/compute_add_sources_acoustic_cuda.cu @@ -99,7 +99,6 @@ void FC_FUNC_(compute_add_sources_ac_cuda, COMPUTE_ADD_SOURCES_AC_CUDA)(long* Mesh_pointer_f, int* phase_is_innerf, int* NSOURCESf, - int* SIMULATION_TYPEf, double* h_stf_pre_compute, int* myrankf) { @@ -153,7 +152,6 @@ void FC_FUNC_(compute_add_sources_ac_s3_cuda, COMPUTE_ADD_SOURCES_AC_s3_CUDA)(long* Mesh_pointer_f, int* phase_is_innerf, int* NSOURCESf, - int* SIMULATION_TYPEf, double* h_stf_pre_compute, int* myrankf) { diff --git a/src/cuda/compute_coupling_cuda.cu b/src/cuda/compute_coupling_cuda.cu index ce420d857..fc0c87bef 100644 --- a/src/cuda/compute_coupling_cuda.cu +++ b/src/cuda/compute_coupling_cuda.cu @@ -117,15 +117,13 @@ extern "C" void FC_FUNC_(compute_coupling_ac_el_cuda, COMPUTE_COUPLING_AC_EL_CUDA)(long* Mesh_pointer_f, int* phase_is_innerf, - int* num_coupling_ac_el_facesf, - int* SIMULATION_TYPEf) { + int* num_coupling_ac_el_facesf) { TRACE("compute_coupling_ac_el_cuda"); //double start_time = get_time(); Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container int phase_is_inner = *phase_is_innerf; int num_coupling_ac_el_faces = *num_coupling_ac_el_facesf; - int SIMULATION_TYPE = *SIMULATION_TYPEf; // way 1: exact blocksize to match NGLLSQUARE int blocksize = NGLL2; @@ -152,7 +150,7 @@ void FC_FUNC_(compute_coupling_ac_el_cuda, phase_is_inner); // adjoint simulations - if (SIMULATION_TYPE == 3 ){ + if (mp->simulation_type == 3 ){ compute_coupling_acoustic_el_kernel<<>>(mp->d_b_displ, mp->d_b_potential_dot_dot_acoustic, num_coupling_ac_el_faces, @@ -279,15 +277,13 @@ extern "C" void FC_FUNC_(compute_coupling_el_ac_cuda, COMPUTE_COUPLING_EL_AC_CUDA)(long* Mesh_pointer_f, int* phase_is_innerf, - int* num_coupling_ac_el_facesf, - int* SIMULATION_TYPEf) { + int* num_coupling_ac_el_facesf) { TRACE("compute_coupling_el_ac_cuda"); //double start_time = get_time(); Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container int phase_is_inner = *phase_is_innerf; int num_coupling_ac_el_faces = *num_coupling_ac_el_facesf; - int SIMULATION_TYPE = *SIMULATION_TYPEf; // way 1: exact blocksize to match NGLLSQUARE int blocksize = 25; @@ -319,7 +315,7 @@ void FC_FUNC_(compute_coupling_el_ac_cuda, mp->d_displ); // adjoint simulations - if (SIMULATION_TYPE == 3 ){ + if (mp->simulation_type == 3 ){ compute_coupling_elastic_ac_kernel<<>>(mp->d_b_potential_dot_dot_acoustic, mp->d_b_accel, num_coupling_ac_el_faces, @@ -343,3 +339,136 @@ void FC_FUNC_(compute_coupling_el_ac_cuda, exit_on_cuda_error("compute_coupling_el_ac_cuda"); #endif } + +/* ----------------------------------------------------------------------------------------------- */ + +/* OCEANS load on free surface */ + +/* ----------------------------------------------------------------------------------------------- */ + + +__global__ void compute_coupling_ocean_cuda_kernel(realw* accel, + realw* rmassx,realw* rmassy,realw* rmassz, + realw* rmass_ocean_load, + int num_free_surface_faces, + int* free_surface_ispec, + int* free_surface_ijk, + realw* free_surface_normal, + int* ibool, + int* updated_dof_ocean_load) { + // gets spectral element face id + int igll = threadIdx.x ; // threadIdx.y*blockDim.x will be always = 0 for thread block (25,1,1) + int iface = blockIdx.x + gridDim.x*blockIdx.y; + realw nx,ny,nz; + realw force_normal_comp; + + // for all faces on free surface + if( iface < num_free_surface_faces ){ + + int ispec = free_surface_ispec[iface]-1; + + // gets global point index + int i = free_surface_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1; // (1,igll,iface) + int j = free_surface_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1; + int k = free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1; + + int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)] - 1; + + //if(igll == 0 ) printf("igll %d %d %d %d\n",igll,i,j,k,iglob); + + // only update this global point once + + // daniel: TODO - there might be better ways to implement a mutex like below, + // and find a workaround to not use the temporary update array. + // atomicExch: returns the old value, i.e. 0 indicates that we still have to do this point + + if( atomicExch(&updated_dof_ocean_load[iglob],1) == 0){ + + // get normal + nx = free_surface_normal[INDEX3(NDIM,NGLL2,0,igll,iface)]; //(1,igll,iface) + ny = free_surface_normal[INDEX3(NDIM,NGLL2,1,igll,iface)]; + nz = free_surface_normal[INDEX3(NDIM,NGLL2,2,igll,iface)]; + + // make updated component of right-hand side + // we divide by rmass() which is 1 / M + // we use the total force which includes the Coriolis term above + force_normal_comp = accel[iglob*3]*nx / rmassx[iglob] + + accel[iglob*3+1]*ny / rmassy[iglob] + + accel[iglob*3+2]*nz / rmassz[iglob]; + + // probably wouldn't need atomicAdd anymore, but just to be sure... + atomicAdd(&accel[iglob*3], + (rmass_ocean_load[iglob] - rmassx[iglob]) * force_normal_comp * nx); + atomicAdd(&accel[iglob*3+1], + (rmass_ocean_load[iglob] - rmassy[iglob]) * force_normal_comp * ny); + atomicAdd(&accel[iglob*3+2], + (rmass_ocean_load[iglob] - rmassz[iglob]) * force_normal_comp * nz); + } + } +} + +/* ----------------------------------------------------------------------------------------------- */ + +extern "C" +void FC_FUNC_(compute_coupling_ocean_cuda, + COMPUTE_COUPLING_OCEAN_CUDA)(long* Mesh_pointer_f) { + + TRACE("compute_coupling_ocean_cuda"); + + Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container + + // checks if anything to do + if( mp->num_free_surface_faces == 0 ) return; + + // block sizes: exact blocksize to match NGLLSQUARE + int blocksize = NGLL2; + + int num_blocks_x = mp->num_free_surface_faces; + int num_blocks_y = 1; + while(num_blocks_x > 65535) { + num_blocks_x = (int) ceil(num_blocks_x*0.5f); + num_blocks_y = num_blocks_y*2; + } + + dim3 grid(num_blocks_x,num_blocks_y); + dim3 threads(blocksize,1,1); + + + // initializes temporary array to zero + print_CUDA_error_if_any(cudaMemset(mp->d_updated_dof_ocean_load,0, + sizeof(int)*mp->NGLOB_AB),88501); + +#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING + exit_on_cuda_error("before kernel compute_coupling_ocean_cuda"); +#endif + + compute_coupling_ocean_cuda_kernel<<compute_stream>>>(mp->d_accel, + mp->d_rmassx,mp->d_rmassy,mp->d_rmassz, + mp->d_rmass_ocean_load, + mp->num_free_surface_faces, + mp->d_free_surface_ispec, + mp->d_free_surface_ijk, + mp->d_free_surface_normal, + mp->d_ibool, + mp->d_updated_dof_ocean_load); + // for backward/reconstructed potentials + if(mp->simulation_type == 3) { + // re-initializes array + print_CUDA_error_if_any(cudaMemset(mp->d_updated_dof_ocean_load,0, + sizeof(int)*mp->NGLOB_AB),88502); + + compute_coupling_ocean_cuda_kernel<<compute_stream>>>(mp->d_b_accel, + mp->d_rmassx,mp->d_rmassy,mp->d_rmassz, + mp->d_rmass_ocean_load, + mp->num_free_surface_faces, + mp->d_free_surface_ispec, + mp->d_free_surface_ijk, + mp->d_free_surface_normal, + mp->d_ibool, + mp->d_updated_dof_ocean_load); + + } + + +#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING + exit_on_cuda_error("compute_coupling_ocean_cuda"); +#endif +} + diff --git a/src/cuda/compute_forces_acoustic_cuda.cu b/src/cuda/compute_forces_acoustic_cuda.cu index 149aef66d..85f2604d6 100644 --- a/src/cuda/compute_forces_acoustic_cuda.cu +++ b/src/cuda/compute_forces_acoustic_cuda.cu @@ -538,7 +538,6 @@ __global__ void Kernel_2_acoustic_impl(int nb_blocks_to_compute, /* ----------------------------------------------------------------------------------------------- */ void Kernel_2_acoustic(int nb_blocks_to_compute, Mesh* mp, int d_iphase, - int SIMULATION_TYPE, int* d_ibool, realw* d_xix, realw* d_xiy, @@ -599,7 +598,7 @@ void Kernel_2_acoustic(int nb_blocks_to_compute, Mesh* mp, int d_iphase, d_kappastore, mp->d_wgll_cube); - if(SIMULATION_TYPE == 3) { + if(mp->simulation_type == 3) { Kernel_2_acoustic_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute, mp->NGLOB_AB, d_ibool, @@ -647,8 +646,7 @@ void FC_FUNC_(compute_forces_acoustic_cuda, COMPUTE_FORCES_ACOUSTIC_CUDA)(long* Mesh_pointer_f, int* iphase, int* nspec_outer_acoustic, - int* nspec_inner_acoustic, - int* SIMULATION_TYPE) { + int* nspec_inner_acoustic) { TRACE("compute_forces_acoustic_cuda"); //double start_time = get_time(); @@ -700,7 +698,6 @@ void FC_FUNC_(compute_forces_acoustic_cuda, nb_blocks_to_compute = mp->h_num_elem_colors_acoustic[icolor]; Kernel_2_acoustic(nb_blocks_to_compute,mp,*iphase, - *SIMULATION_TYPE, mp->d_ibool + color_offset_nonpadded, mp->d_xix + color_offset, mp->d_xiy + color_offset, @@ -724,7 +721,6 @@ void FC_FUNC_(compute_forces_acoustic_cuda, // no mesh coloring: uses atomic updates Kernel_2_acoustic(num_elements, mp, *iphase, - *SIMULATION_TYPE, mp->d_ibool, mp->d_xix, mp->d_xiy, @@ -741,129 +737,6 @@ void FC_FUNC_(compute_forces_acoustic_cuda, } } -/* ----------------------------------------------------------------------------------------------- */ - -/* KERNEL 3 */ - -/* ----------------------------------------------------------------------------------------------- */ - - -__global__ void kernel_3_a_acoustic_cuda_device(realw* potential_dot_dot_acoustic, - int size, - realw* rmass_acoustic) { - int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x; - - /* because of block and grid sizing problems, there is a small */ - /* amount of buffer at the end of the calculation */ - if(id < size) { - // multiplies pressure with the inverse of the mass matrix - potential_dot_dot_acoustic[id] = potential_dot_dot_acoustic[id]*rmass_acoustic[id]; - } -} - -/* ----------------------------------------------------------------------------------------------- */ - -__global__ void kernel_3_b_acoustic_cuda_device(realw* potential_dot_acoustic, - realw* potential_dot_dot_acoustic, - int size, - realw deltatover2, - realw* rmass_acoustic) { - int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x; - - /* because of block and grid sizing problems, there is a small */ - /* amount of buffer at the end of the calculation */ - if(id < size) { - // Newmark time scheme: corrector term - potential_dot_acoustic[id] = potential_dot_acoustic[id] + deltatover2*potential_dot_dot_acoustic[id]; - } -} - -/* ----------------------------------------------------------------------------------------------- */ - -extern "C" -void FC_FUNC_(kernel_3_a_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)( - long* Mesh_pointer, - int* size_F, - int* SIMULATION_TYPE) { - -TRACE("kernel_3_a_acoustic_cuda"); - - Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper - int size = *size_F; - - int blocksize = BLOCKSIZE_KERNEL3; - int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize; - int num_blocks_x = size_padded/blocksize; - int num_blocks_y = 1; - while(num_blocks_x > 65535) { - num_blocks_x = (int) ceil(num_blocks_x*0.5f); - num_blocks_y = num_blocks_y*2; - } - dim3 grid(num_blocks_x,num_blocks_y); - dim3 threads(blocksize,1,1); - - kernel_3_a_acoustic_cuda_device<<< grid, threads>>>(mp->d_potential_dot_dot_acoustic, - size, - mp->d_rmass_acoustic); - - if(*SIMULATION_TYPE == 3) { - kernel_3_a_acoustic_cuda_device<<< grid, threads>>>(mp->d_b_potential_dot_dot_acoustic, - size, - mp->d_rmass_acoustic); - } - -#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING - //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y); - exit_on_cuda_error("after kernel 3 a"); -#endif -} - -/* ----------------------------------------------------------------------------------------------- */ - -extern "C" -void FC_FUNC_(kernel_3_b_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)( - long* Mesh_pointer, - int* size_F, - realw* deltatover2_F, - int* SIMULATION_TYPE, - realw* b_deltatover2_F) { - -TRACE("kernel_3_b_acoustic_cuda"); - - Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper - int size = *size_F; - realw deltatover2 = *deltatover2_F; - realw b_deltatover2 = *b_deltatover2_F; - - int blocksize = BLOCKSIZE_KERNEL3; - int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize; - int num_blocks_x = size_padded/blocksize; - int num_blocks_y = 1; - while(num_blocks_x > 65535) { - num_blocks_x = (int) ceil(num_blocks_x*0.5f); - num_blocks_y = num_blocks_y*2; - } - dim3 grid(num_blocks_x,num_blocks_y); - dim3 threads(blocksize,1,1); - - kernel_3_b_acoustic_cuda_device<<< grid, threads>>>(mp->d_potential_dot_acoustic, - mp->d_potential_dot_dot_acoustic, - size, deltatover2, - mp->d_rmass_acoustic); - - if(*SIMULATION_TYPE == 3) { - kernel_3_b_acoustic_cuda_device<<< grid, threads>>>(mp->d_b_potential_dot_acoustic, - mp->d_b_potential_dot_dot_acoustic, - size, b_deltatover2, - mp->d_rmass_acoustic); - } - -#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING - //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y); - exit_on_cuda_error("after kernel 3 b"); -#endif -} - /* ----------------------------------------------------------------------------------------------- */ @@ -915,8 +788,7 @@ __global__ void enforce_free_surface_cuda_kernel( extern "C" void FC_FUNC_(acoustic_enforce_free_surf_cuda, ACOUSTIC_ENFORCE_FREE_SURF_CUDA)(long* Mesh_pointer_f, - int* SIMULATION_TYPE, - int* ABSORB_FREE_SURFACE) { + int* ABSORB_FREE_SURFACE) { TRACE("acoustic_enforce_free_surf_cuda"); @@ -947,7 +819,7 @@ TRACE("acoustic_enforce_free_surf_cuda"); mp->d_ibool, mp->d_ispec_is_acoustic); // for backward/reconstructed potentials - if(*SIMULATION_TYPE == 3) { + if(mp->simulation_type == 3) { enforce_free_surface_cuda_kernel<<>>(mp->d_b_potential_acoustic, mp->d_b_potential_dot_acoustic, mp->d_b_potential_dot_dot_acoustic, diff --git a/src/cuda/compute_forces_elastic_cuda.cu b/src/cuda/compute_forces_elastic_cuda.cu index bd4d95110..62df527fc 100644 --- a/src/cuda/compute_forces_elastic_cuda.cu +++ b/src/cuda/compute_forces_elastic_cuda.cu @@ -160,7 +160,6 @@ void FC_FUNC_(transfer_boundary_from_device_a, if( mp->size_mpi_buffer > 0 ){ -//daniel: todo - check below with this... int blocksize = BLOCKSIZE_TRANSFER; int size_padded = ((int)ceil(((double)mp->max_nibool_interfaces_ext_mesh)/((double)blocksize)))*blocksize; int num_blocks_x = size_padded/blocksize; @@ -172,20 +171,6 @@ void FC_FUNC_(transfer_boundary_from_device_a, dim3 grid(num_blocks_x,num_blocks_y); dim3 threads(blocksize,1,1); -/* -//daniel: todo - check originally used... - int num_blocks_x = *nspec_outer_elastic; - int num_blocks_y = 1; - while(num_blocks_x > 65535) { - num_blocks_x = (int) ceil(num_blocks_x*0.5f); - num_blocks_y = num_blocks_y*2; - } - - int blocksize = NGLL3_PADDED; - dim3 grid(num_blocks_x,num_blocks_y); - dim3 threads(blocksize,1,1); -*/ - prepare_boundary_accel_on_device<<compute_stream>>>(mp->d_accel,mp->d_send_accel_buffer, mp->num_interfaces_ext_mesh, mp->max_nibool_interfaces_ext_mesh, @@ -200,9 +185,6 @@ void FC_FUNC_(transfer_boundary_from_device_a, cudaMemcpyAsync(mp->h_send_accel_buffer,mp->d_send_accel_buffer, mp->size_mpi_buffer*sizeof(realw),cudaMemcpyDeviceToHost,mp->copy_stream); - // cudaMemcpyAsync(mp->h_send_accel_buffer,mp->d_send_accel_buffer, - // 3* mp->max_nibool_interfaces_ext_mesh* mp->num_interfaces_ext_mesh*sizeof(realw), - // cudaMemcpyDeviceToHost,mp->compute_stream); } } @@ -1265,7 +1247,7 @@ __global__ void Kernel_2_impl(int nb_blocks_to_compute, /* ----------------------------------------------------------------------------------------------- */ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase, - int COMPUTE_AND_STORE_STRAIN,int SIMULATION_TYPE, + int COMPUTE_AND_STORE_STRAIN, int ATTENUATION,int ANISOTROPY, int* d_ibool, realw* d_xix, @@ -1374,7 +1356,7 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase, d_epsilondev_xz, d_epsilondev_yz, d_epsilon_trace_over_3, - SIMULATION_TYPE, + mp->simulation_type, ATTENUATION,mp->NSPEC_AB, d_one_minus_sum_beta, d_factor_common, @@ -1409,7 +1391,7 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase, mp->d_wgll_cube); - if(SIMULATION_TYPE == 3) { + if(mp->simulation_type == 3) { Kernel_2_impl<<< grid,threads,0,mp->compute_stream>>>(nb_blocks_to_compute, mp->NGLOB_AB, d_ibool, @@ -1432,7 +1414,7 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase, d_b_epsilondev_xz, d_b_epsilondev_yz, d_b_epsilon_trace_over_3, - SIMULATION_TYPE, + mp->simulation_type, ATTENUATION,mp->NSPEC_AB, d_one_minus_sum_beta, d_factor_common, @@ -1490,7 +1472,6 @@ void FC_FUNC_(compute_forces_elastic_cuda, int* iphase, int* nspec_outer_elastic, int* nspec_inner_elastic, - int* SIMULATION_TYPE, int* COMPUTE_AND_STORE_STRAIN, int* ATTENUATION, int* ANISOTROPY) { @@ -1556,7 +1537,7 @@ void FC_FUNC_(compute_forces_elastic_cuda, //} Kernel_2(nb_blocks_to_compute,mp,*iphase, - *COMPUTE_AND_STORE_STRAIN,*SIMULATION_TYPE, + *COMPUTE_AND_STORE_STRAIN, *ATTENUATION,*ANISOTROPY, mp->d_ibool + color_offset_nonpadded, mp->d_xix + color_offset, @@ -1638,7 +1619,7 @@ void FC_FUNC_(compute_forces_elastic_cuda, // no mesh coloring: uses atomic updates Kernel_2(num_elements,mp,*iphase, - *COMPUTE_AND_STORE_STRAIN,*SIMULATION_TYPE, + *COMPUTE_AND_STORE_STRAIN, *ATTENUATION,*ANISOTROPY, mp->d_ibool, mp->d_xix, @@ -1703,7 +1684,6 @@ void FC_FUNC_(compute_forces_elastic_cuda, /* ----------------------------------------------------------------------------------------------- */ -//daniel: todo - use this instead above call to fortran routine to avoid compilation problems extern "C" void FC_FUNC_(sync_copy_from_device, SYNC_copy_FROM_DEVICE)(long* Mesh_pointer_f, @@ -1729,310 +1709,3 @@ void FC_FUNC_(sync_copy_from_device, // memory copy is now finished, so non-blocking MPI send can proceed } -/* ----------------------------------------------------------------------------------------------- */ - -// KERNEL 3 - -/* ----------------------------------------------------------------------------------------------- */ - -__global__ void kernel_3_cuda_device(realw* veloc, - realw* accel, int size, - realw deltatover2, - realw* rmass) { - int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x; - - /* because of block and grid sizing problems, there is a small */ - /* amount of buffer at the end of the calculation */ - if(id < size) { - realw new_accel = accel[id] * rmass[id / 3]; - veloc[id] += deltatover2 * new_accel; - accel[id] = new_accel; -/* - accel[3*id] = accel[3*id]*rmass[id]; - accel[3*id+1] = accel[3*id+1]*rmass[id]; - accel[3*id+2] = accel[3*id+2]*rmass[id]; - - veloc[3*id] = veloc[3*id] + deltatover2*accel[3*id]; - veloc[3*id+1] = veloc[3*id+1] + deltatover2*accel[3*id+1]; - veloc[3*id+2] = veloc[3*id+2] + deltatover2*accel[3*id+2]; -*/ - } -} - -/* ----------------------------------------------------------------------------------------------- */ - -__global__ void kernel_3_accel_cuda_device(realw* accel, - int size, - realw* rmass) { - int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x; - - /* because of block and grid sizing problems, there is a small */ - /* amount of buffer at the end of the calculation */ - if(id < size) { - accel[id] *= rmass[id / 3]; -/* - accel[3*id] = accel[3*id]*rmass[id]; - accel[3*id+1] = accel[3*id+1]*rmass[id]; - accel[3*id+2] = accel[3*id+2]*rmass[id]; -*/ - } -} - -/* ----------------------------------------------------------------------------------------------- */ - -__global__ void kernel_3_veloc_cuda_device(realw* veloc, - realw* accel, - int size, - realw deltatover2) { - int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x; - - /* because of block and grid sizing problems, there is a small */ - /* amount of buffer at the end of the calculation */ - if(id < size) { - veloc[3*id] = veloc[3*id] + deltatover2*accel[3*id]; - veloc[3*id+1] = veloc[3*id+1] + deltatover2*accel[3*id+1]; - veloc[3*id+2] = veloc[3*id+2] + deltatover2*accel[3*id+2]; - } -} - -/* ----------------------------------------------------------------------------------------------- */ - -extern "C" -void FC_FUNC_(kernel_3_a_cuda, - KERNEL_3_A_CUDA)(long* Mesh_pointer, - int* size_F, - realw* deltatover2_F, - int* SIMULATION_TYPE_f, - realw* b_deltatover2_F, - int* OCEANS) { -TRACE("kernel_3_a_cuda"); - - Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper - //int size = *size_F; - int size = *size_F * 3; - int SIMULATION_TYPE = *SIMULATION_TYPE_f; - realw deltatover2 = *deltatover2_F; - realw b_deltatover2 = *b_deltatover2_F; - - int blocksize = BLOCKSIZE_KERNEL3; - int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize; - - int num_blocks_x = size_padded/blocksize; - int num_blocks_y = 1; - while(num_blocks_x > 65535) { - num_blocks_x = (int) ceil(num_blocks_x*0.5f); - num_blocks_y = num_blocks_y*2; - } - - dim3 grid(num_blocks_x,num_blocks_y); - dim3 threads(blocksize,1,1); - - // check whether we can update accel and veloc, or only accel at this point - if( *OCEANS == 0 ){ - // updates both, accel and veloc - kernel_3_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_veloc, - mp->d_accel, - size, deltatover2, mp->d_rmass); - - if(SIMULATION_TYPE == 3) { - kernel_3_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_b_veloc, - mp->d_b_accel, - size, b_deltatover2,mp->d_rmass); - } - }else{ - // updates only accel - kernel_3_accel_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_accel, - size, mp->d_rmass); - - if(SIMULATION_TYPE == 3) { - kernel_3_accel_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_b_accel, - size, mp->d_rmass); - } - } - -#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING - //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y); - exit_on_cuda_error("after kernel 3 a"); -#endif -} - -/* ----------------------------------------------------------------------------------------------- */ - -extern "C" -void FC_FUNC_(kernel_3_b_cuda, - KERNEL_3_B_CUDA)(long* Mesh_pointer, - int* size_F, - realw* deltatover2_F, - int* SIMULATION_TYPE_f, - realw* b_deltatover2_F) { - TRACE("kernel_3_b_cuda"); - - Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper - int size = *size_F; - int SIMULATION_TYPE = *SIMULATION_TYPE_f; - realw deltatover2 = *deltatover2_F; - realw b_deltatover2 = *b_deltatover2_F; - - int blocksize = BLOCKSIZE_KERNEL3; - int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize; - - int num_blocks_x = size_padded/blocksize; - int num_blocks_y = 1; - while(num_blocks_x > 65535) { - num_blocks_x = (int) ceil(num_blocks_x*0.5f); - num_blocks_y = num_blocks_y*2; - } - - dim3 grid(num_blocks_x,num_blocks_y); - dim3 threads(blocksize,1,1); - - // updates only veloc at this point - kernel_3_veloc_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_veloc, - mp->d_accel, - size,deltatover2); - - if(SIMULATION_TYPE == 3) { - kernel_3_veloc_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_b_veloc, - mp->d_b_accel, - size,b_deltatover2); - } - -#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING - //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y); - exit_on_cuda_error("after kernel 3 b"); -#endif -} - - -/* ----------------------------------------------------------------------------------------------- */ - -/* OCEANS load on free surface */ - -/* ----------------------------------------------------------------------------------------------- */ - - -__global__ void elastic_ocean_load_cuda_kernel(realw* accel, - realw* rmass, - realw* rmass_ocean_load, - int num_free_surface_faces, - int* free_surface_ispec, - int* free_surface_ijk, - realw* free_surface_normal, - int* ibool, - int* updated_dof_ocean_load) { - // gets spectral element face id - int igll = threadIdx.x ; // threadIdx.y*blockDim.x will be always = 0 for thread block (25,1,1) - int iface = blockIdx.x + gridDim.x*blockIdx.y; - realw nx,ny,nz; - realw force_normal_comp,additional_term; - - // for all faces on free surface - if( iface < num_free_surface_faces ){ - - int ispec = free_surface_ispec[iface]-1; - - // gets global point index - int i = free_surface_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1; // (1,igll,iface) - int j = free_surface_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1; - int k = free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1; - - int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)] - 1; - - //if(igll == 0 ) printf("igll %d %d %d %d\n",igll,i,j,k,iglob); - - // only update this global point once - - // daniel: TODO - there might be better ways to implement a mutex like below, - // and find a workaround to not use the temporary update array. - // atomicExch: returns the old value, i.e. 0 indicates that we still have to do this point - - if( atomicExch(&updated_dof_ocean_load[iglob],1) == 0){ - - // get normal - nx = free_surface_normal[INDEX3(NDIM,NGLL2,0,igll,iface)]; //(1,igll,iface) - ny = free_surface_normal[INDEX3(NDIM,NGLL2,1,igll,iface)]; - nz = free_surface_normal[INDEX3(NDIM,NGLL2,2,igll,iface)]; - - // make updated component of right-hand side - // we divide by rmass() which is 1 / M - // we use the total force which includes the Coriolis term above - force_normal_comp = ( accel[iglob*3]*nx + accel[iglob*3+1]*ny + accel[iglob*3+2]*nz ) / rmass[iglob]; - - additional_term = (rmass_ocean_load[iglob] - rmass[iglob]) * force_normal_comp; - - // probably wouldn't need atomicAdd anymore, but just to be sure... - atomicAdd(&accel[iglob*3], + additional_term * nx); - atomicAdd(&accel[iglob*3+1], + additional_term * ny); - atomicAdd(&accel[iglob*3+2], + additional_term * nz); - } - } -} - -/* ----------------------------------------------------------------------------------------------- */ - -extern "C" -void FC_FUNC_(elastic_ocean_load_cuda, - ELASTIC_OCEAN_LOAD_CUDA)(long* Mesh_pointer_f, - int* SIMULATION_TYPE) { - - TRACE("elastic_ocean_load_cuda"); - - Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container - - // checks if anything to do - if( mp->num_free_surface_faces == 0 ) return; - - // block sizes: exact blocksize to match NGLLSQUARE - int blocksize = NGLL2; - - int num_blocks_x = mp->num_free_surface_faces; - int num_blocks_y = 1; - while(num_blocks_x > 65535) { - num_blocks_x = (int) ceil(num_blocks_x*0.5f); - num_blocks_y = num_blocks_y*2; - } - - dim3 grid(num_blocks_x,num_blocks_y); - dim3 threads(blocksize,1,1); - - - // initializes temporary array to zero - print_CUDA_error_if_any(cudaMemset(mp->d_updated_dof_ocean_load,0, - sizeof(int)*mp->NGLOB_AB),88501); - -#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING - exit_on_cuda_error("before kernel elastic_ocean_load_cuda"); -#endif - - elastic_ocean_load_cuda_kernel<<compute_stream>>>(mp->d_accel, - mp->d_rmass, - mp->d_rmass_ocean_load, - mp->num_free_surface_faces, - mp->d_free_surface_ispec, - mp->d_free_surface_ijk, - mp->d_free_surface_normal, - mp->d_ibool, - mp->d_updated_dof_ocean_load); - // for backward/reconstructed potentials - if(*SIMULATION_TYPE == 3) { - // re-initializes array - print_CUDA_error_if_any(cudaMemset(mp->d_updated_dof_ocean_load,0, - sizeof(int)*mp->NGLOB_AB),88502); - - elastic_ocean_load_cuda_kernel<<compute_stream>>>(mp->d_b_accel, - mp->d_rmass, - mp->d_rmass_ocean_load, - mp->num_free_surface_faces, - mp->d_free_surface_ispec, - mp->d_free_surface_ijk, - mp->d_free_surface_normal, - mp->d_ibool, - mp->d_updated_dof_ocean_load); - - } - - -#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING - exit_on_cuda_error("elastic_ocean_load_cuda"); -#endif -} diff --git a/src/cuda/compute_stacey_acoustic_cuda.cu b/src/cuda/compute_stacey_acoustic_cuda.cu index 304da3a55..c983c8d16 100644 --- a/src/cuda/compute_stacey_acoustic_cuda.cu +++ b/src/cuda/compute_stacey_acoustic_cuda.cu @@ -50,7 +50,8 @@ __global__ void compute_stacey_acoustic_kernel(realw* potential_dot_acoustic, int* ispec_is_inner, int* ispec_is_acoustic, int phase_is_inner, - int SIMULATION_TYPE, int SAVE_FORWARD, + int SIMULATION_TYPE, + int SAVE_FORWARD, int num_abs_boundary_faces, realw* b_potential_dot_acoustic, realw* b_potential_dot_dot_acoustic, @@ -121,18 +122,15 @@ __global__ void compute_stacey_acoustic_kernel(realw* potential_dot_acoustic, extern "C" void FC_FUNC_(compute_stacey_acoustic_cuda, - COMPUTE_STACEY_ACOUSTIC_CUDA)( - long* Mesh_pointer_f, - int* phase_is_innerf, - int* SIMULATION_TYPEf, - int* SAVE_FORWARDf, - realw* h_b_absorb_potential) { + COMPUTE_STACEY_ACOUSTIC_CUDA)(long* Mesh_pointer_f, + int* phase_is_innerf, + int* SAVE_FORWARDf, + realw* h_b_absorb_potential) { TRACE("compute_stacey_acoustic_cuda"); //double start_time = get_time(); Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container int phase_is_inner = *phase_is_innerf; - int SIMULATION_TYPE = *SIMULATION_TYPEf; int SAVE_FORWARD = *SAVE_FORWARDf; // way 1: Elapsed time: 4.385948e-03 @@ -154,7 +152,7 @@ TRACE("compute_stacey_acoustic_cuda"); dim3 threads(blocksize,1,1); // adjoint simulations: reads in absorbing boundary - if (SIMULATION_TYPE == 3 && mp->d_num_abs_boundary_faces > 0 ){ + if (mp->simulation_type == 3 && mp->d_num_abs_boundary_faces > 0 ){ // copies array to GPU print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_potential,h_b_absorb_potential, mp->d_b_reclen_potential,cudaMemcpyHostToDevice),7700); @@ -171,7 +169,8 @@ TRACE("compute_stacey_acoustic_cuda"); mp->d_ispec_is_inner, mp->d_ispec_is_acoustic, phase_is_inner, - SIMULATION_TYPE,SAVE_FORWARD, + mp->simulation_type, + SAVE_FORWARD, mp->d_num_abs_boundary_faces, mp->d_b_potential_dot_acoustic, mp->d_b_potential_dot_dot_acoustic, @@ -179,7 +178,7 @@ TRACE("compute_stacey_acoustic_cuda"); mp->gravity); // adjoint simulations: stores absorbed wavefield part - if (SIMULATION_TYPE == 1 && SAVE_FORWARD && mp->d_num_abs_boundary_faces > 0 ){ + if (mp->simulation_type == 1 && SAVE_FORWARD && mp->d_num_abs_boundary_faces > 0 ){ // copies array to CPU print_CUDA_error_if_any(cudaMemcpy(h_b_absorb_potential,mp->d_b_absorb_potential, mp->d_b_reclen_potential,cudaMemcpyDeviceToHost),7701); diff --git a/src/cuda/compute_stacey_elastic_cuda.cu b/src/cuda/compute_stacey_elastic_cuda.cu index a4e7458b7..618c5f9a6 100644 --- a/src/cuda/compute_stacey_elastic_cuda.cu +++ b/src/cuda/compute_stacey_elastic_cuda.cu @@ -132,7 +132,6 @@ extern "C" void FC_FUNC_(compute_stacey_elastic_cuda, COMPUTE_STACEY_ELASTIC_CUDA)(long* Mesh_pointer_f, int* phase_is_innerf, - int* SIMULATION_TYPEf, int* SAVE_FORWARDf, realw* h_b_absorb_field) { @@ -144,7 +143,6 @@ TRACE("compute_stacey_elastic_cuda"); if( mp->d_num_abs_boundary_faces == 0 ) return; int phase_is_inner = *phase_is_innerf; - int SIMULATION_TYPE = *SIMULATION_TYPEf; int SAVE_FORWARD = *SAVE_FORWARDf; // way 1 @@ -165,7 +163,7 @@ TRACE("compute_stacey_elastic_cuda"); dim3 grid(num_blocks_x,num_blocks_y); dim3 threads(blocksize,1,1); - if(SIMULATION_TYPE == 3 && mp->d_num_abs_boundary_faces > 0) { + if(mp->simulation_type == 3 && mp->d_num_abs_boundary_faces > 0) { // The read is done in fortran print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_field,h_b_absorb_field, mp->d_b_reclen_field,cudaMemcpyHostToDevice),7700); @@ -187,7 +185,8 @@ TRACE("compute_stacey_elastic_cuda"); mp->d_ispec_is_inner, mp->d_ispec_is_elastic, phase_is_inner, - SIMULATION_TYPE,SAVE_FORWARD, + mp->simulation_type, + SAVE_FORWARD, mp->d_num_abs_boundary_faces, mp->d_b_accel, mp->d_b_absorb_field); @@ -197,10 +196,10 @@ TRACE("compute_stacey_elastic_cuda"); #endif // ! adjoint simulations: stores absorbed wavefield part - // if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. num_abs_boundary_faces > 0 ) & + // if (mp->simulation_type == 1 .and. SAVE_FORWARD .and. num_abs_boundary_faces > 0 ) & // write(IOABS,rec=it) b_reclen_field,b_absorb_field,b_reclen_field - if(SIMULATION_TYPE == 1 && SAVE_FORWARD && mp->d_num_abs_boundary_faces > 0 ) { + if(mp->simulation_type == 1 && SAVE_FORWARD && mp->d_num_abs_boundary_faces > 0 ) { print_CUDA_error_if_any(cudaMemcpy(h_b_absorb_field,mp->d_b_absorb_field, mp->d_b_reclen_field,cudaMemcpyDeviceToHost),7701); // The write is done in fortran diff --git a/src/cuda/it_update_displacement_cuda.cu b/src/cuda/it_update_displacement_cuda.cu index 08ef470bc..10a3cf4ff 100644 --- a/src/cuda/it_update_displacement_cuda.cu +++ b/src/cuda/it_update_displacement_cuda.cu @@ -68,14 +68,13 @@ __global__ void UpdateDispVeloc_kernel(realw* displ, extern "C" void FC_FUNC_(it_update_displacement_cuda, IT_UPDATE_DISPLACMENT_CUDA)(long* Mesh_pointer_f, - int* size_F, - realw* deltat_F, - realw* deltatsqover2_F, - realw* deltatover2_F, - int* SIMULATION_TYPE, - realw* b_deltat_F, - realw* b_deltatsqover2_F, - realw* b_deltatover2_F) { + int* size_F, + realw* deltat_F, + realw* deltatsqover2_F, + realw* deltatover2_F, + realw* b_deltat_F, + realw* b_deltatsqover2_F, + realw* b_deltatover2_F) { TRACE("it_update_displacement_cuda"); @@ -120,7 +119,7 @@ TRACE("it_update_displacement_cuda"); //#endif // kernel for backward fields - if(*SIMULATION_TYPE == 3) { + if(mp->simulation_type == 3) { realw b_deltat = *b_deltat_F; realw b_deltatsqover2 = *b_deltatsqover2_F; realw b_deltatover2 = *b_deltatover2_F; @@ -178,7 +177,6 @@ void FC_FUNC_(it_update_displacement_ac_cuda, realw* deltat_F, realw* deltatsqover2_F, realw* deltatover2_F, - int* SIMULATION_TYPE, realw* b_deltat_F, realw* b_deltatsqover2_F, realw* b_deltatover2_F) { @@ -212,7 +210,7 @@ TRACE("it_update_displacement_ac_cuda"); mp->d_potential_dot_dot_acoustic, size,deltat,deltatsqover2,deltatover2); - if(*SIMULATION_TYPE == 3) { + if(mp->simulation_type == 3) { realw b_deltat = *b_deltat_F; realw b_deltatsqover2 = *b_deltatsqover2_F; realw b_deltatover2 = *b_deltatover2_F; @@ -229,3 +227,310 @@ TRACE("it_update_displacement_ac_cuda"); exit_on_cuda_error("it_update_displacement_ac_cuda"); #endif } + + +/* ----------------------------------------------------------------------------------------------- */ + +// elastic domains + +// KERNEL 3 + +/* ----------------------------------------------------------------------------------------------- */ + +__global__ void kernel_3_cuda_device(realw* veloc, + realw* accel, + int size, + realw deltatover2, + realw* rmassx, + realw* rmassy, + realw* rmassz) { + int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x; + + /* because of block and grid sizing problems, there is a small */ + /* amount of buffer at the end of the calculation */ + if(id < size) { + accel[3*id] = accel[3*id]*rmassx[id]; + accel[3*id+1] = accel[3*id+1]*rmassy[id]; + accel[3*id+2] = accel[3*id+2]*rmassz[id]; + + veloc[3*id] = veloc[3*id] + deltatover2*accel[3*id]; + veloc[3*id+1] = veloc[3*id+1] + deltatover2*accel[3*id+1]; + veloc[3*id+2] = veloc[3*id+2] + deltatover2*accel[3*id+2]; + } +} + +/* ----------------------------------------------------------------------------------------------- */ + +__global__ void kernel_3_accel_cuda_device(realw* accel, + int size, + realw* rmassx, + realw* rmassy, + realw* rmassz) { + int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x; + + /* because of block and grid sizing problems, there is a small */ + /* amount of buffer at the end of the calculation */ + if(id < size) { + accel[3*id] = accel[3*id]*rmassx[id]; + accel[3*id+1] = accel[3*id+1]*rmassy[id]; + accel[3*id+2] = accel[3*id+2]*rmassz[id]; + } +} + +/* ----------------------------------------------------------------------------------------------- */ + +__global__ void kernel_3_veloc_cuda_device(realw* veloc, + realw* accel, + int size, + realw deltatover2) { + int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x; + + /* because of block and grid sizing problems, there is a small */ + /* amount of buffer at the end of the calculation */ + if(id < size) { + veloc[3*id] = veloc[3*id] + deltatover2*accel[3*id]; + veloc[3*id+1] = veloc[3*id+1] + deltatover2*accel[3*id+1]; + veloc[3*id+2] = veloc[3*id+2] + deltatover2*accel[3*id+2]; + } +} + +/* ----------------------------------------------------------------------------------------------- */ + +extern "C" +void FC_FUNC_(kernel_3_a_cuda, + KERNEL_3_A_CUDA)(long* Mesh_pointer, + int* size_F, + realw* deltatover2_F, + realw* b_deltatover2_F, + int* OCEANS) { +TRACE("kernel_3_a_cuda"); + + Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper + + int size = *size_F; + + realw deltatover2 = *deltatover2_F; + realw b_deltatover2 = *b_deltatover2_F; + + int blocksize = BLOCKSIZE_KERNEL3; + int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize; + + int num_blocks_x = size_padded/blocksize; + int num_blocks_y = 1; + while(num_blocks_x > 65535) { + num_blocks_x = (int) ceil(num_blocks_x*0.5f); + num_blocks_y = num_blocks_y*2; + } + + dim3 grid(num_blocks_x,num_blocks_y); + dim3 threads(blocksize,1,1); + + // check whether we can update accel and veloc, or only accel at this point + if( *OCEANS == 0 ){ + // updates both, accel and veloc + kernel_3_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_veloc, + mp->d_accel, + size, deltatover2, + mp->d_rmassx,mp->d_rmassy,mp->d_rmassz); + + if(mp->simulation_type == 3) { + kernel_3_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_b_veloc, + mp->d_b_accel, + size, b_deltatover2, + mp->d_rmassx,mp->d_rmassy,mp->d_rmassz); + } + }else{ + // updates only accel + kernel_3_accel_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_accel, + size, + mp->d_rmassx, + mp->d_rmassy, + mp->d_rmassz); + + if(mp->simulation_type == 3) { + kernel_3_accel_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_b_accel, + size, + mp->d_rmassx, + mp->d_rmassy, + mp->d_rmassz); + } + } + +#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING + //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y); + exit_on_cuda_error("after kernel 3 a"); +#endif +} + +/* ----------------------------------------------------------------------------------------------- */ + +extern "C" +void FC_FUNC_(kernel_3_b_cuda, + KERNEL_3_B_CUDA)(long* Mesh_pointer, + int* size_F, + realw* deltatover2_F, + realw* b_deltatover2_F) { + TRACE("kernel_3_b_cuda"); + + Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper + int size = *size_F; + + realw deltatover2 = *deltatover2_F; + realw b_deltatover2 = *b_deltatover2_F; + + int blocksize = BLOCKSIZE_KERNEL3; + int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize; + + int num_blocks_x = size_padded/blocksize; + int num_blocks_y = 1; + while(num_blocks_x > 65535) { + num_blocks_x = (int) ceil(num_blocks_x*0.5f); + num_blocks_y = num_blocks_y*2; + } + + dim3 grid(num_blocks_x,num_blocks_y); + dim3 threads(blocksize,1,1); + + // updates only veloc at this point + kernel_3_veloc_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_veloc, + mp->d_accel, + size,deltatover2); + + if(mp->simulation_type == 3) { + kernel_3_veloc_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_b_veloc, + mp->d_b_accel, + size,b_deltatover2); + } + +#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING + //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y); + exit_on_cuda_error("after kernel 3 b"); +#endif +} + + +/* ----------------------------------------------------------------------------------------------- */ + +// acoustic domains + +// KERNEL 3 + +/* ----------------------------------------------------------------------------------------------- */ + + +__global__ void kernel_3_a_acoustic_cuda_device(realw* potential_dot_dot_acoustic, + int size, + realw* rmass_acoustic) { + int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x; + + /* because of block and grid sizing problems, there is a small */ + /* amount of buffer at the end of the calculation */ + if(id < size) { + // multiplies pressure with the inverse of the mass matrix + potential_dot_dot_acoustic[id] = potential_dot_dot_acoustic[id]*rmass_acoustic[id]; + } +} + +/* ----------------------------------------------------------------------------------------------- */ + +__global__ void kernel_3_b_acoustic_cuda_device(realw* potential_dot_acoustic, + realw* potential_dot_dot_acoustic, + int size, + realw deltatover2, + realw* rmass_acoustic) { + int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x; + + /* because of block and grid sizing problems, there is a small */ + /* amount of buffer at the end of the calculation */ + if(id < size) { + // Newmark time scheme: corrector term + potential_dot_acoustic[id] = potential_dot_acoustic[id] + deltatover2*potential_dot_dot_acoustic[id]; + } +} + +/* ----------------------------------------------------------------------------------------------- */ + +extern "C" +void FC_FUNC_(kernel_3_a_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)( + long* Mesh_pointer, + int* size_F) { + +TRACE("kernel_3_a_acoustic_cuda"); + + Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper + int size = *size_F; + + int blocksize = BLOCKSIZE_KERNEL3; + int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize; + int num_blocks_x = size_padded/blocksize; + int num_blocks_y = 1; + while(num_blocks_x > 65535) { + num_blocks_x = (int) ceil(num_blocks_x*0.5f); + num_blocks_y = num_blocks_y*2; + } + dim3 grid(num_blocks_x,num_blocks_y); + dim3 threads(blocksize,1,1); + + kernel_3_a_acoustic_cuda_device<<< grid, threads>>>(mp->d_potential_dot_dot_acoustic, + size, + mp->d_rmass_acoustic); + + if(mp->simulation_type == 3) { + kernel_3_a_acoustic_cuda_device<<< grid, threads>>>(mp->d_b_potential_dot_dot_acoustic, + size, + mp->d_rmass_acoustic); + } + +#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING + //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y); + exit_on_cuda_error("after kernel 3 a"); +#endif +} + +/* ----------------------------------------------------------------------------------------------- */ + +extern "C" +void FC_FUNC_(kernel_3_b_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)( + long* Mesh_pointer, + int* size_F, + realw* deltatover2_F, + realw* b_deltatover2_F) { + +TRACE("kernel_3_b_acoustic_cuda"); + + Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper + int size = *size_F; + + realw deltatover2 = *deltatover2_F; + realw b_deltatover2 = *b_deltatover2_F; + + int blocksize = BLOCKSIZE_KERNEL3; + int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize; + int num_blocks_x = size_padded/blocksize; + int num_blocks_y = 1; + while(num_blocks_x > 65535) { + num_blocks_x = (int) ceil(num_blocks_x*0.5f); + num_blocks_y = num_blocks_y*2; + } + dim3 grid(num_blocks_x,num_blocks_y); + dim3 threads(blocksize,1,1); + + kernel_3_b_acoustic_cuda_device<<< grid, threads>>>(mp->d_potential_dot_acoustic, + mp->d_potential_dot_dot_acoustic, + size, deltatover2, + mp->d_rmass_acoustic); + + if(mp->simulation_type == 3) { + kernel_3_b_acoustic_cuda_device<<< grid, threads>>>(mp->d_b_potential_dot_acoustic, + mp->d_b_potential_dot_dot_acoustic, + size, b_deltatover2, + mp->d_rmass_acoustic); + } + +#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING + //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y); + exit_on_cuda_error("after kernel 3 b"); +#endif +} + + diff --git a/src/cuda/mesh_constants_cuda.h b/src/cuda/mesh_constants_cuda.h index 4a7fb4c20..86095c7bc 100644 --- a/src/cuda/mesh_constants_cuda.h +++ b/src/cuda/mesh_constants_cuda.h @@ -192,6 +192,13 @@ typedef struct mesh_ { int myrank; int NPROC; + // constants + int simulation_type; + int use_mesh_coloring_gpu; + int absorbing_conditions; + int gravity; + + // ------------------------------------------------------------------ // // GLL points & weights // ------------------------------------------------------------------ // @@ -210,9 +217,6 @@ typedef struct mesh_ { // inner / outer elements int* d_ispec_is_inner; - // mesh coloring - int use_mesh_coloring_gpu; - // pointers to constant memory arrays realw* d_hprime_xx; //realw* d_hprime_yy; // only needed if NGLLX != NGLLY != NGLLZ @@ -285,7 +289,9 @@ typedef struct mesh_ { int num_colors_outer_elastic,num_colors_inner_elastic; int nspec_elastic; - realw* d_rmass; + realw* d_rmassx; + realw* d_rmassy; + realw* d_rmassz; // mpi buffer realw* d_send_accel_buffer; @@ -481,7 +487,6 @@ typedef struct mesh_ { realw* d_coupling_ac_el_jacobian2Dw; // gravity - int gravity; realw* d_minus_deriv_gravity; realw* d_minus_g; diff --git a/src/cuda/prepare_mesh_constants_cuda.cu b/src/cuda/prepare_mesh_constants_cuda.cu index 55c91dc63..ae62644f7 100644 --- a/src/cuda/prepare_mesh_constants_cuda.cu +++ b/src/cuda/prepare_mesh_constants_cuda.cu @@ -154,6 +154,10 @@ TRACE("prepare_constants_device"); mp->NSPEC_AB = *NSPEC_AB; mp->NGLOB_AB = *NGLOB_AB; + // constants + mp->simulation_type = *SIMULATION_TYPE; + mp->absorbing_conditions = *ABSORBING_CONDITIONS; + // sets constant arrays setConst_hprime_xx(h_hprime_xx,mp); // setConst_hprime_yy(h_hprime_yy,mp); // only needed if NGLLX != NGLLY != NGLLZ @@ -289,41 +293,54 @@ TRACE("prepare_constants_device"); } // inner elements - print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ispec_is_inner,mp->NSPEC_AB*sizeof(int)),1205); - print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_inner, h_ispec_is_inner, - mp->NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),1206); + copy_todevice_int((void**)&mp->d_ispec_is_inner,h_ispec_is_inner,mp->NSPEC_AB); + +// print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ispec_is_inner,mp->NSPEC_AB*sizeof(int)),1205); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_inner, h_ispec_is_inner, + // mp->NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),1206); // absorbing boundaries mp->d_num_abs_boundary_faces = *h_num_abs_boundary_faces; - if( *ABSORBING_CONDITIONS && mp->d_num_abs_boundary_faces > 0 ){ - print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_ispec), - (mp->d_num_abs_boundary_faces)*sizeof(int)),1101); - print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_ispec, h_abs_boundary_ispec, - (mp->d_num_abs_boundary_faces)*sizeof(int), - cudaMemcpyHostToDevice),1102); - - print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_ijk), - 3*NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(int)),1103); - print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_ijk, h_abs_boundary_ijk, - 3*NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(int), - cudaMemcpyHostToDevice),1104); - - print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_normal), - 3*NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(realw)),1105); - print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_normal, h_abs_boundary_normal, - 3*NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(realw), - cudaMemcpyHostToDevice),1106); - - print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_jacobian2Dw), - NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(realw)),1107); - print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_jacobian2Dw, h_abs_boundary_jacobian2Dw, - NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(realw), - cudaMemcpyHostToDevice),1108); + if( mp->absorbing_conditions && mp->d_num_abs_boundary_faces > 0 ){ + copy_todevice_int((void**)&mp->d_abs_boundary_ispec,h_abs_boundary_ispec,mp->d_num_abs_boundary_faces); + +// print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_ispec), +// (mp->d_num_abs_boundary_faces)*sizeof(int)),1101); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_ispec, h_abs_boundary_ispec, +// (mp->d_num_abs_boundary_faces)*sizeof(int), +// cudaMemcpyHostToDevice),1102); + + copy_todevice_int((void**)&mp->d_abs_boundary_ijk,h_abs_boundary_ijk, + 3*NGLL2*(mp->d_num_abs_boundary_faces)); + +// print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_ijk), +// 3*NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(int)),1103); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_ijk, h_abs_boundary_ijk, +// 3*NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(int), +// cudaMemcpyHostToDevice),1104); + + copy_todevice_realw((void**)&mp->d_abs_boundary_normal,h_abs_boundary_normal, + NDIM*NGLL2*(mp->d_num_abs_boundary_faces)); + +// print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_normal), +// 3*NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(realw)),1105); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_normal, h_abs_boundary_normal, +// 3*NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(realw), +// cudaMemcpyHostToDevice),1106); + + copy_todevice_realw((void**)&mp->d_abs_boundary_jacobian2Dw,h_abs_boundary_jacobian2Dw, + NGLL2*(mp->d_num_abs_boundary_faces)); + +// print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_jacobian2Dw), +// NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(realw)),1107); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_jacobian2Dw, h_abs_boundary_jacobian2Dw, +// NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(realw), + // cudaMemcpyHostToDevice),1108); } // sources mp->nsources_local = *nsources_local_f; - if (*SIMULATION_TYPE == 1 || *SIMULATION_TYPE == 3){ + if (mp->simulation_type == 1 || mp->simulation_type == 3){ // not needed in case of pure adjoint simulations (SIMULATION_TYPE == 2) copy_todevice_realw((void**)&mp->d_sourcearrays,h_sourcearrays,(*NSOURCES)*NDIM*NGLL3); @@ -388,7 +405,6 @@ void FC_FUNC_(prepare_fields_acoustic_device, int* num_free_surface_faces, int* free_surface_ispec, int* free_surface_ijk, - int* ABSORBING_CONDITIONS, int* b_reclen_potential, realw* b_absorb_potential, int* ELASTIC_SIMULATION, @@ -406,7 +422,7 @@ void FC_FUNC_(prepare_fields_acoustic_device, Mesh* mp = (Mesh*)(*Mesh_pointer_f); /* Assuming NGLLX==5. Padded is then 128 (5^3+3) */ int size_padded = NGLL3_PADDED * mp->NSPEC_AB; - int size_nonpadded = NGLL3 * mp->NSPEC_AB; +// int size_nonpadded = NGLL3 * mp->NSPEC_AB; int size_glob = mp->NGLOB_AB; // allocates arrays on device (GPU) @@ -421,9 +437,11 @@ void FC_FUNC_(prepare_fields_acoustic_device, } // mass matrix - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_acoustic),sizeof(realw)*size_glob),2005); - print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_acoustic,rmass_acoustic, - sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100); + copy_todevice_realw((void**)&mp->d_rmass_acoustic,rmass_acoustic,mp->NGLOB_AB); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_acoustic),sizeof(realw)*size_glob),2005); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_acoustic,rmass_acoustic, +// sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100); // padded array print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rhostore),size_padded*sizeof(realw)),2006); @@ -434,45 +452,63 @@ void FC_FUNC_(prepare_fields_acoustic_device, } // non-padded array - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappastore),size_nonpadded*sizeof(realw)),2007); - print_CUDA_error_if_any(cudaMemcpy(mp->d_kappastore,kappastore, - NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),2105); + copy_todevice_realw((void**)&mp->d_kappastore,kappastore,NGLL3*mp->NSPEC_AB); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappastore),size_nonpadded*sizeof(realw)),2007); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_kappastore,kappastore, +// NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),2105); // phase elements mp->num_phase_ispec_acoustic = *num_phase_ispec_acoustic; - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_acoustic), - mp->num_phase_ispec_acoustic*2*sizeof(int)),2008); - print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_acoustic,phase_ispec_inner_acoustic, - mp->num_phase_ispec_acoustic*2*sizeof(int),cudaMemcpyHostToDevice),2101); - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_is_acoustic), - mp->NSPEC_AB*sizeof(int)),2009); - print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_acoustic,ispec_is_acoustic, - mp->NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),2102); + copy_todevice_int((void**)&mp->d_phase_ispec_inner_acoustic,phase_ispec_inner_acoustic, + 2*mp->num_phase_ispec_acoustic); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_acoustic), +// mp->num_phase_ispec_acoustic*2*sizeof(int)),2008); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_acoustic,phase_ispec_inner_acoustic, +// mp->num_phase_ispec_acoustic*2*sizeof(int),cudaMemcpyHostToDevice),2101); + + copy_todevice_int((void**)&mp->d_ispec_is_acoustic,ispec_is_acoustic,mp->NSPEC_AB); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_is_acoustic), +// mp->NSPEC_AB*sizeof(int)),2009); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_acoustic,ispec_is_acoustic, +// mp->NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),2102); // free surface if( *NOISE_TOMOGRAPHY == 0 ){ // allocate surface arrays mp->num_free_surface_faces = *num_free_surface_faces; if( mp->num_free_surface_faces > 0 ){ - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ispec), - mp->num_free_surface_faces*sizeof(int)),2201); - print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec,free_surface_ispec, - mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),2203); - - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ijk), - 3*NGLL2*mp->num_free_surface_faces*sizeof(int)),2202); - print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk, - 3*NGLL2*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),2204); + + copy_todevice_int((void**)&mp->d_free_surface_ispec,free_surface_ispec,mp->num_free_surface_faces); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ispec), +// mp->num_free_surface_faces*sizeof(int)),2201); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec,free_surface_ispec, +// mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),2203); + + + copy_todevice_int((void**)&mp->d_free_surface_ijk,free_surface_ijk, + 3*NGLL2*mp->num_free_surface_faces); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ijk), +// 3*NGLL2*mp->num_free_surface_faces*sizeof(int)),2202); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk, +// 3*NGLL2*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),2204); } } // absorbing boundaries - if( *ABSORBING_CONDITIONS ){ + if( mp->absorbing_conditions ){ mp->d_b_reclen_potential = *b_reclen_potential; - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_absorb_potential),mp->d_b_reclen_potential),2301); - print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_potential,b_absorb_potential, - mp->d_b_reclen_potential,cudaMemcpyHostToDevice),2302); + + copy_todevice_realw((void**)&mp->d_b_absorb_potential,b_absorb_potential,mp->d_b_reclen_potential); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_absorb_potential),mp->d_b_reclen_potential),2301); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_potential,b_absorb_potential, +// mp->d_b_reclen_potential,cudaMemcpyHostToDevice),2302); } @@ -488,25 +524,36 @@ void FC_FUNC_(prepare_fields_acoustic_device, // coupling with elastic parts if( *ELASTIC_SIMULATION && *num_coupling_ac_el_faces > 0 ){ - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_ispec), - (*num_coupling_ac_el_faces)*sizeof(int)),2601); - print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_ispec,coupling_ac_el_ispec, - (*num_coupling_ac_el_faces)*sizeof(int),cudaMemcpyHostToDevice),2602); - - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_ijk), - 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(int)),2603); - print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_ijk,coupling_ac_el_ijk, - 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(int),cudaMemcpyHostToDevice),2604); - - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_normal), - 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw)),2605); - print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_normal,coupling_ac_el_normal, - 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw),cudaMemcpyHostToDevice),2606); - - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_jacobian2Dw), - NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw)),2607); - print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_jacobian2Dw,coupling_ac_el_jacobian2Dw, - NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw),cudaMemcpyHostToDevice),2608); + + copy_todevice_int((void**)&mp->d_coupling_ac_el_ispec,coupling_ac_el_ispec,(*num_coupling_ac_el_faces)); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_ispec), +// (*num_coupling_ac_el_faces)*sizeof(int)),2601); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_ispec,coupling_ac_el_ispec, +// (*num_coupling_ac_el_faces)*sizeof(int),cudaMemcpyHostToDevice),2602); + + copy_todevice_int((void**)&mp->d_coupling_ac_el_ijk,coupling_ac_el_ijk,3*NGLL2*(*num_coupling_ac_el_faces)); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_ijk), +// 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(int)),2603); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_ijk,coupling_ac_el_ijk, +// 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(int),cudaMemcpyHostToDevice),2604); + + copy_todevice_realw((void**)&mp->d_coupling_ac_el_normal,coupling_ac_el_normal, + 3*NGLL2*(*num_coupling_ac_el_faces)); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_normal), +// 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw)),2605); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_normal,coupling_ac_el_normal, +// 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw),cudaMemcpyHostToDevice),2606); + + copy_todevice_realw((void**)&mp->d_coupling_ac_el_jacobian2Dw,coupling_ac_el_jacobian2Dw, + NGLL2*(*num_coupling_ac_el_faces)); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_jacobian2Dw), +// NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw)),2607); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_jacobian2Dw,coupling_ac_el_jacobian2Dw, +// NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw),cudaMemcpyHostToDevice),2608); } @@ -528,7 +575,6 @@ void FC_FUNC_(prepare_fields_acoustic_device, extern "C" void FC_FUNC_(prepare_fields_acoustic_adj_dev, PREPARE_FIELDS_ACOUSTIC_ADJ_DEV)(long* Mesh_pointer_f, - int* SIMULATION_TYPE, int* APPROXIMATE_HESS_KL) { TRACE("prepare_fields_acoustic_adj_dev"); @@ -538,7 +584,7 @@ void FC_FUNC_(prepare_fields_acoustic_adj_dev, int size_glob = mp->NGLOB_AB; // kernel simulations - if( *SIMULATION_TYPE != 3 ) return; + if( mp->simulation_type != 3 ) return; // allocates backward/reconstructed arrays on device (GPU) print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_potential_acoustic),sizeof(realw)*size_glob),3014); @@ -579,16 +625,17 @@ extern "C" void FC_FUNC_(prepare_fields_elastic_device, PREPARE_FIELDS_ELASTIC_DEVICE)(long* Mesh_pointer_f, int* size, - realw* rmass, + realw* rmassx, + realw* rmassy, + realw* rmassz, realw* rho_vp, realw* rho_vs, int* num_phase_ispec_elastic, int* phase_ispec_inner_elastic, int* ispec_is_elastic, - int* ABSORBING_CONDITIONS, realw* h_b_absorb_field, int* h_b_reclen_field, - int* SIMULATION_TYPE,int* SAVE_FORWARD, + int* SAVE_FORWARD, int* COMPUTE_AND_STORE_STRAIN, realw* epsilondev_xx,realw* epsilondev_yy,realw* epsilondev_xy, realw* epsilondev_xz,realw* epsilondev_yz, @@ -636,7 +683,7 @@ TRACE("prepare_fields_elastic_device"); Mesh* mp = (Mesh*)(*Mesh_pointer_f); /* Assuming NGLLX==5. Padded is then 128 (5^3+3) */ int size_padded = NGLL3_PADDED * (mp->NSPEC_AB); - int size_nonpadded = NGLL3 * (mp->NSPEC_AB); +// int size_nonpadded = NGLL3 * (mp->NSPEC_AB); print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_displ),sizeof(realw)*(*size)),4001); print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_veloc),sizeof(realw)*(*size)),4002); @@ -663,23 +710,32 @@ TRACE("prepare_fields_elastic_device"); } // mass matrix - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass),sizeof(realw)*mp->NGLOB_AB),4005); - // transfer element data - print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass,rmass, - sizeof(realw)*mp->NGLOB_AB,cudaMemcpyHostToDevice),4010); + copy_todevice_realw((void**)&mp->d_rmassx,rmassx,mp->NGLOB_AB); + copy_todevice_realw((void**)&mp->d_rmassy,rmassy,mp->NGLOB_AB); + copy_todevice_realw((void**)&mp->d_rmassz,rmassz,mp->NGLOB_AB); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass),sizeof(realw)*mp->NGLOB_AB),4005); +// // transfer element data +// print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass,rmass, +// sizeof(realw)*mp->NGLOB_AB,cudaMemcpyHostToDevice),4010); // element indices - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_is_elastic),mp->NSPEC_AB*sizeof(int)),4009); - print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_elastic,ispec_is_elastic, - mp->NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),4012); + copy_todevice_int((void**)&mp->d_ispec_is_elastic,ispec_is_elastic,mp->NSPEC_AB); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_is_elastic),mp->NSPEC_AB*sizeof(int)),4009); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_elastic,ispec_is_elastic, +// mp->NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),4012); // phase elements mp->num_phase_ispec_elastic = *num_phase_ispec_elastic; - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_elastic), - mp->num_phase_ispec_elastic*2*sizeof(int)),4008); - print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_elastic,phase_ispec_inner_elastic, - mp->num_phase_ispec_elastic*2*sizeof(int),cudaMemcpyHostToDevice),4011); + + copy_todevice_int((void**)&mp->d_phase_ispec_inner_elastic,phase_ispec_inner_elastic,2*mp->num_phase_ispec_elastic); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_elastic), +// mp->num_phase_ispec_elastic*2*sizeof(int)),4008); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_elastic,phase_ispec_inner_elastic, +// mp->num_phase_ispec_elastic*2*sizeof(int),cudaMemcpyHostToDevice),4011); // for seismograms if( mp->nrec_local > 0 ){ @@ -691,24 +747,30 @@ TRACE("prepare_fields_elastic_device"); } // absorbing conditions - if( *ABSORBING_CONDITIONS && mp->d_num_abs_boundary_faces > 0){ + if( mp->absorbing_conditions && mp->d_num_abs_boundary_faces > 0){ // non-padded arrays - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_vp),size_nonpadded*sizeof(realw)),4006); - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_vs),size_nonpadded*sizeof(realw)),4007); + copy_todevice_realw((void**)&mp->d_rho_vp,rho_vp,NGLL3*mp->NSPEC_AB); + copy_todevice_realw((void**)&mp->d_rho_vs,rho_vs,NGLL3*mp->NSPEC_AB); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_vp),size_nonpadded*sizeof(realw)),4006); +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_vs),size_nonpadded*sizeof(realw)),4007); // rho_vp, rho_vs non-padded; they are needed for stacey boundary condition - print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vp, rho_vp, - NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),4013); - print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vs, rho_vs, - NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),4014); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vp, rho_vp, +// NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),4013); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vs, rho_vs, +// NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),4014); // absorb_field array used for file i/o - if(*SIMULATION_TYPE == 3 || ( *SIMULATION_TYPE == 1 && *SAVE_FORWARD )){ + if(mp->simulation_type == 3 || ( mp->simulation_type == 1 && *SAVE_FORWARD )){ mp->d_b_reclen_field = *h_b_reclen_field; - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_absorb_field), - mp->d_b_reclen_field),4016); - print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_field, h_b_absorb_field, - mp->d_b_reclen_field,cudaMemcpyHostToDevice),4017); + + copy_todevice_realw((void**)&mp->d_b_absorb_field,h_b_absorb_field,mp->d_b_reclen_field); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_absorb_field), +// mp->d_b_reclen_field),4016); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_field, h_b_absorb_field, +// mp->d_b_reclen_field,cudaMemcpyHostToDevice),4017); } } @@ -717,83 +779,117 @@ TRACE("prepare_fields_elastic_device"); // strains int epsilondev_size = NGLL3*mp->NSPEC_AB; // note: non-aligned; if align, check memcpy below and indexing - print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_xx, - epsilondev_size*sizeof(realw)),4301); - print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xx,epsilondev_xx,epsilondev_size*sizeof(realw), - cudaMemcpyHostToDevice),4302); - print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_yy, - epsilondev_size*sizeof(realw)),4302); - print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_yy,epsilondev_yy,epsilondev_size*sizeof(realw), - cudaMemcpyHostToDevice),4303); - print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_xy, - epsilondev_size*sizeof(realw)),4304); - print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xy,epsilondev_xy,epsilondev_size*sizeof(realw), - cudaMemcpyHostToDevice),4305); - print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_xz, - epsilondev_size*sizeof(realw)),4306); - print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xz,epsilondev_xz,epsilondev_size*sizeof(realw), - cudaMemcpyHostToDevice),4307); - print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_yz, - epsilondev_size*sizeof(realw)),4308); - print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_yz,epsilondev_yz,epsilondev_size*sizeof(realw), - cudaMemcpyHostToDevice),4309); + copy_todevice_realw((void**)&mp->d_epsilondev_xx,epsilondev_xx,epsilondev_size); + +// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_xx, +// epsilondev_size*sizeof(realw)),4301); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xx,epsilondev_xx,epsilondev_size*sizeof(realw), +// cudaMemcpyHostToDevice),4302); + + copy_todevice_realw((void**)&mp->d_epsilondev_yy,epsilondev_yy,epsilondev_size); + +// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_yy, +// epsilondev_size*sizeof(realw)),4302); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_yy,epsilondev_yy,epsilondev_size*sizeof(realw), +// cudaMemcpyHostToDevice),4303); + + copy_todevice_realw((void**)&mp->d_epsilondev_xy,epsilondev_xy,epsilondev_size); + +// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_xy, +// epsilondev_size*sizeof(realw)),4304); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xy,epsilondev_xy,epsilondev_size*sizeof(realw), +// cudaMemcpyHostToDevice),4305); + + copy_todevice_realw((void**)&mp->d_epsilondev_xz,epsilondev_xz,epsilondev_size); + +// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_xz, +// epsilondev_size*sizeof(realw)),4306); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xz,epsilondev_xz,epsilondev_size*sizeof(realw), +// cudaMemcpyHostToDevice),4307); + + copy_todevice_realw((void**)&mp->d_epsilondev_yz,epsilondev_yz,epsilondev_size); + +// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_yz, +// epsilondev_size*sizeof(realw)),4308); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_yz,epsilondev_yz,epsilondev_size*sizeof(realw), +// cudaMemcpyHostToDevice),4309); } // attenuation memory variables if( *ATTENUATION ){ // memory arrays - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_xx), - (*R_size)*sizeof(realw)),4401); - print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xx,R_xx,(*R_size)*sizeof(realw), - cudaMemcpyHostToDevice),4402); - - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_yy), - (*R_size)*sizeof(realw)),4403); - print_CUDA_error_if_any(cudaMemcpy(mp->d_R_yy,R_yy,(*R_size)*sizeof(realw), - cudaMemcpyHostToDevice),4404); - - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_xy), - (*R_size)*sizeof(realw)),4405); - print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xy,R_xy,(*R_size)*sizeof(realw), - cudaMemcpyHostToDevice),4406); - - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_xz), - (*R_size)*sizeof(realw)),4407); - print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xz,R_xz,(*R_size)*sizeof(realw), - cudaMemcpyHostToDevice),4408); - - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_yz), - (*R_size)*sizeof(realw)),4409); - print_CUDA_error_if_any(cudaMemcpy(mp->d_R_yz,R_yz,(*R_size)*sizeof(realw), - cudaMemcpyHostToDevice),4410); + copy_todevice_realw((void**)&mp->d_R_xx,R_xx,(*R_size)); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_xx), +// (*R_size)*sizeof(realw)),4401); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xx,R_xx,(*R_size)*sizeof(realw), +// cudaMemcpyHostToDevice),4402); + + copy_todevice_realw((void**)&mp->d_R_yy,R_yy,(*R_size)); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_yy), +// (*R_size)*sizeof(realw)),4403); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_R_yy,R_yy,(*R_size)*sizeof(realw), +// cudaMemcpyHostToDevice),4404); + + copy_todevice_realw((void**)&mp->d_R_xy,R_xy,(*R_size)); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_xy), +// (*R_size)*sizeof(realw)),4405); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xy,R_xy,(*R_size)*sizeof(realw), +// cudaMemcpyHostToDevice),4406); + + copy_todevice_realw((void**)&mp->d_R_xz,R_xz,(*R_size)); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_xz), +// (*R_size)*sizeof(realw)),4407); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xz,R_xz,(*R_size)*sizeof(realw), +// cudaMemcpyHostToDevice),4408); + + copy_todevice_realw((void**)&mp->d_R_yz,R_yz,(*R_size)); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_yz), +// (*R_size)*sizeof(realw)),4409); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_R_yz,R_yz,(*R_size)*sizeof(realw), +// cudaMemcpyHostToDevice),4410); // attenuation factors - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_one_minus_sum_beta), - NGLL3*mp->NSPEC_AB*sizeof(realw)),4430); - print_CUDA_error_if_any(cudaMemcpy(mp->d_one_minus_sum_beta ,one_minus_sum_beta, - NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),4431); + copy_todevice_realw((void**)&mp->d_one_minus_sum_beta,one_minus_sum_beta,NGLL3*mp->NSPEC_AB); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_one_minus_sum_beta), +// NGLL3*mp->NSPEC_AB*sizeof(realw)),4430); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_one_minus_sum_beta ,one_minus_sum_beta, +// NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),4431); - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_factor_common), - N_SLS*NGLL3*mp->NSPEC_AB*sizeof(realw)),4432); - print_CUDA_error_if_any(cudaMemcpy(mp->d_factor_common ,factor_common, - N_SLS*NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),4433); + copy_todevice_realw((void**)&mp->d_factor_common,factor_common,N_SLS*NGLL3*mp->NSPEC_AB); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_factor_common), +// N_SLS*NGLL3*mp->NSPEC_AB*sizeof(realw)),4432); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_factor_common ,factor_common, +// N_SLS*NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),4433); // alpha,beta,gamma factors - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_alphaval), - N_SLS*sizeof(realw)),4434); - print_CUDA_error_if_any(cudaMemcpy(mp->d_alphaval ,alphaval, - N_SLS*sizeof(realw),cudaMemcpyHostToDevice),4435); + copy_todevice_realw((void**)&mp->d_alphaval,alphaval,N_SLS); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_alphaval), +// N_SLS*sizeof(realw)),4434); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_alphaval ,alphaval, +// N_SLS*sizeof(realw),cudaMemcpyHostToDevice),4435); - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_betaval), - N_SLS*sizeof(realw)),4436); - print_CUDA_error_if_any(cudaMemcpy(mp->d_betaval ,betaval, - N_SLS*sizeof(realw),cudaMemcpyHostToDevice),4437); + copy_todevice_realw((void**)&mp->d_betaval,betaval,N_SLS); - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_gammaval), - N_SLS*sizeof(realw)),4438); - print_CUDA_error_if_any(cudaMemcpy(mp->d_gammaval ,gammaval, - N_SLS*sizeof(realw),cudaMemcpyHostToDevice),4439); +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_betaval), +// N_SLS*sizeof(realw)),4436); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_betaval ,betaval, +// N_SLS*sizeof(realw),cudaMemcpyHostToDevice),4437); + + copy_todevice_realw((void**)&mp->d_gammaval,gammaval,N_SLS); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_gammaval), +// N_SLS*sizeof(realw)),4438); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_gammaval ,gammaval, +// N_SLS*sizeof(realw),cudaMemcpyHostToDevice),4439); } @@ -896,29 +992,41 @@ TRACE("prepare_fields_elastic_device"); mp->num_free_surface_faces = *num_free_surface_faces; if( mp->num_free_surface_faces > 0 ){ // mass matrix - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_ocean_load), - sizeof(realw)*mp->NGLOB_AB),4501); - print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_ocean_load,rmass_ocean_load, - sizeof(realw)*mp->NGLOB_AB,cudaMemcpyHostToDevice),4502); + copy_todevice_realw((void**)&mp->d_rmass_ocean_load,rmass_ocean_load,mp->NGLOB_AB); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_ocean_load), +// sizeof(realw)*mp->NGLOB_AB),4501); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_ocean_load,rmass_ocean_load, +// sizeof(realw)*mp->NGLOB_AB,cudaMemcpyHostToDevice),4502); // surface normal - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_normal), - 3*NGLL2*(mp->num_free_surface_faces)*sizeof(realw)),4503); - print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_normal,free_surface_normal, - 3*NGLL2*(mp->num_free_surface_faces)*sizeof(realw),cudaMemcpyHostToDevice),4504); + copy_todevice_realw((void**)&mp->d_free_surface_normal,free_surface_normal, + 3*NGLL2*(mp->num_free_surface_faces)); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_normal), +// 3*NGLL2*(mp->num_free_surface_faces)*sizeof(realw)),4503); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_normal,free_surface_normal, +// 3*NGLL2*(mp->num_free_surface_faces)*sizeof(realw),cudaMemcpyHostToDevice),4504); // temporary global array: used to synchronize updates on global accel array print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_updated_dof_ocean_load), sizeof(int)*mp->NGLOB_AB),4505); if( *NOISE_TOMOGRAPHY == 0 && *ACOUSTIC_SIMULATION == 0 ){ - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ispec), - mp->num_free_surface_faces*sizeof(int)),4601); - print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec,free_surface_ispec, - mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),4603); - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ijk), - 3*NGLL2*mp->num_free_surface_faces*sizeof(int)),4602); - print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk, - 3*NGLL2*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),4604); + + copy_todevice_int((void**)&mp->d_free_surface_ispec,free_surface_ispec,mp->num_free_surface_faces); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ispec), +// mp->num_free_surface_faces*sizeof(int)),4601); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec,free_surface_ispec, +// mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),4603); + + copy_todevice_int((void**)&mp->d_free_surface_ijk,free_surface_ijk, + 3*NGLL2*mp->num_free_surface_faces); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ijk), +// 3*NGLL2*mp->num_free_surface_faces*sizeof(int)),4602); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk, +// 3*NGLL2*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),4604); } } } @@ -941,7 +1049,6 @@ extern "C" void FC_FUNC_(prepare_fields_elastic_adj_dev, PREPARE_FIELDS_ELASTIC_ADJ_DEV)(long* Mesh_pointer_f, int* size, - int* SIMULATION_TYPE, int* COMPUTE_AND_STORE_STRAIN, realw* epsilon_trace_over_3, realw* b_epsilondev_xx,realw* b_epsilondev_yy,realw* b_epsilondev_xy, @@ -958,7 +1065,7 @@ void FC_FUNC_(prepare_fields_elastic_adj_dev, Mesh* mp = (Mesh*)(*Mesh_pointer_f); // checks if kernel simulation - if( *SIMULATION_TYPE != 3 ) return; + if( mp->simulation_type != 3 ) return; // kernel simulations // allocates backward/reconstructed arrays on device (GPU) @@ -985,81 +1092,104 @@ void FC_FUNC_(prepare_fields_elastic_adj_dev, int epsilondev_size = NGLL3*mp->NSPEC_AB; // note: non-aligned; if align, check memcpy below and indexing // solid pressure - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_epsilon_trace_over_3), - NGLL3*mp->NSPEC_AB*sizeof(realw)),5310); - print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilon_trace_over_3,epsilon_trace_over_3, - NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),5311); + copy_todevice_realw((void**)&mp->d_epsilon_trace_over_3,epsilon_trace_over_3,NGLL3*mp->NSPEC_AB); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_epsilon_trace_over_3), +// NGLL3*mp->NSPEC_AB*sizeof(realw)),5310); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilon_trace_over_3,epsilon_trace_over_3, +// NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),5311); // backward solid pressure - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilon_trace_over_3), - NGLL3*mp->NSPEC_AB*sizeof(realw)),5312); - print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilon_trace_over_3 ,b_epsilon_trace_over_3, - NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),5313); + + copy_todevice_realw((void**)&mp->d_b_epsilon_trace_over_3,b_epsilon_trace_over_3,NGLL3*mp->NSPEC_AB); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilon_trace_over_3), +// NGLL3*mp->NSPEC_AB*sizeof(realw)),5312); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilon_trace_over_3 ,b_epsilon_trace_over_3, +// NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),5313); + // prepares backward strains - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xx), - epsilondev_size*sizeof(realw)),5321); - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yy), - epsilondev_size*sizeof(realw)),5322); - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xy), - epsilondev_size*sizeof(realw)),5323); - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xz), - epsilondev_size*sizeof(realw)),5324); - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yz), - epsilondev_size*sizeof(realw)),5325); - - print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xx,b_epsilondev_xx, - epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5326); - print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yy,b_epsilondev_yy, - epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5327); - print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xy,b_epsilondev_xy, - epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5328); - print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xz,b_epsilondev_xz, - epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5329); - print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yz,b_epsilondev_yz, - epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5330); + + copy_todevice_realw((void**)&mp->d_b_epsilondev_xx,b_epsilondev_xx,epsilondev_size); + copy_todevice_realw((void**)&mp->d_b_epsilondev_yy,b_epsilondev_yy,epsilondev_size); + copy_todevice_realw((void**)&mp->d_b_epsilondev_xy,b_epsilondev_xy,epsilondev_size); + copy_todevice_realw((void**)&mp->d_b_epsilondev_xz,b_epsilondev_xz,epsilondev_size); + copy_todevice_realw((void**)&mp->d_b_epsilondev_yz,b_epsilondev_yz,epsilondev_size); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xx), +// epsilondev_size*sizeof(realw)),5321); +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yy), +// epsilondev_size*sizeof(realw)),5322); +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xy), +// epsilondev_size*sizeof(realw)),5323); +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xz), +// epsilondev_size*sizeof(realw)),5324); +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yz), +// epsilondev_size*sizeof(realw)),5325); + +// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xx,b_epsilondev_xx, +// epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5326); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yy,b_epsilondev_yy, +// epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5327); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xy,b_epsilondev_xy, +// epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5328); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xz,b_epsilondev_xz, +// epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5329); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yz,b_epsilondev_yz, +// epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5330); } // attenuation memory variables if( *ATTENUATION ){ - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xx), - (*R_size)*sizeof(realw)),5421); - print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xx,b_R_xx,(*R_size)*sizeof(realw), - cudaMemcpyHostToDevice),5422); - - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_yy), - (*R_size)*sizeof(realw)),5423); - print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_yy,b_R_yy,(*R_size)*sizeof(realw), - cudaMemcpyHostToDevice),5424); - - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xy), - (*R_size)*sizeof(realw)),5425); - print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xy,b_R_xy,(*R_size)*sizeof(realw), - cudaMemcpyHostToDevice),5426); - - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xz), - (*R_size)*sizeof(realw)),5427); - print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xz,b_R_xz,(*R_size)*sizeof(realw), - cudaMemcpyHostToDevice),5428); - - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_yz), - (*R_size)*sizeof(realw)),5429); - print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_yz,b_R_yz,(*R_size)*sizeof(realw), - cudaMemcpyHostToDevice),5420); + copy_todevice_realw((void**)&mp->d_b_R_xx,b_R_xx,(*R_size)); + copy_todevice_realw((void**)&mp->d_b_R_yy,b_R_yy,(*R_size)); + copy_todevice_realw((void**)&mp->d_b_R_xy,b_R_xy,(*R_size)); + copy_todevice_realw((void**)&mp->d_b_R_xz,b_R_xz,(*R_size)); + copy_todevice_realw((void**)&mp->d_b_R_yz,b_R_yz,(*R_size)); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xx), +// (*R_size)*sizeof(realw)),5421); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xx,b_R_xx,(*R_size)*sizeof(realw), +// cudaMemcpyHostToDevice),5422); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_yy), +// (*R_size)*sizeof(realw)),5423); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_yy,b_R_yy,(*R_size)*sizeof(realw), +// cudaMemcpyHostToDevice),5424); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xy), +// (*R_size)*sizeof(realw)),5425); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xy,b_R_xy,(*R_size)*sizeof(realw), +// cudaMemcpyHostToDevice),5426); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xz), +// (*R_size)*sizeof(realw)),5427); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xz,b_R_xz,(*R_size)*sizeof(realw), +// cudaMemcpyHostToDevice),5428); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_yz), +// (*R_size)*sizeof(realw)),5429); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_yz,b_R_yz,(*R_size)*sizeof(realw), +// cudaMemcpyHostToDevice),5420); // alpha,beta,gamma factors for backward fields - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_alphaval), - N_SLS*sizeof(realw)),5434); - print_CUDA_error_if_any(cudaMemcpy(mp->d_b_alphaval ,b_alphaval, - N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5435); - - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_betaval), - N_SLS*sizeof(realw)),5436); - print_CUDA_error_if_any(cudaMemcpy(mp->d_b_betaval ,b_betaval, - N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5437); - - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_gammaval), - N_SLS*sizeof(realw)),5438); - print_CUDA_error_if_any(cudaMemcpy(mp->d_b_gammaval ,b_gammaval, - N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5439); + copy_todevice_realw((void**)&mp->d_b_alphaval,b_alphaval,N_SLS); + copy_todevice_realw((void**)&mp->d_b_betaval,b_betaval,N_SLS); + copy_todevice_realw((void**)&mp->d_b_gammaval,b_gammaval,N_SLS); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_alphaval), +// N_SLS*sizeof(realw)),5434); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_alphaval ,b_alphaval, +// N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5435); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_betaval), +// N_SLS*sizeof(realw)),5436); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_betaval ,b_betaval, +// N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5437); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_gammaval), +// N_SLS*sizeof(realw)),5438); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_gammaval ,b_gammaval, +// N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5439); } if( *APPROXIMATE_HESS_KL ){ @@ -1148,7 +1278,6 @@ void FC_FUNC_(prepare_fields_noise_device, int* free_surface_ispec, int* free_surface_ijk, int* num_free_surface_faces, - int* SIMULATION_TYPE, int* NOISE_TOMOGRAPHY, int* NSTEP, realw* noise_sourcearray, @@ -1165,15 +1294,20 @@ void FC_FUNC_(prepare_fields_noise_device, // free surface mp->num_free_surface_faces = *num_free_surface_faces; - print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_free_surface_ispec, - mp->num_free_surface_faces*sizeof(int)),7001); - print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec, free_surface_ispec, - mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),7002); + copy_todevice_int((void**)&mp->d_free_surface_ispec,free_surface_ispec,mp->num_free_surface_faces); - print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_free_surface_ijk, - 3*NGLL2*mp->num_free_surface_faces*sizeof(int)),7003); - print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk, - 3*NGLL2*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),7004); +// print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_free_surface_ispec, +// mp->num_free_surface_faces*sizeof(int)),7001); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec, free_surface_ispec, +// mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),7002); + + copy_todevice_int((void**)&mp->d_free_surface_ijk,free_surface_ijk, + 3*NGLL2*mp->num_free_surface_faces); + +// print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_free_surface_ijk, +// 3*NGLL2*mp->num_free_surface_faces*sizeof(int)),7003); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk, +// 3*NGLL2*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),7004); // alloc storage for the surface buffer to be copied print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_noise_surface_movie, @@ -1181,37 +1315,48 @@ void FC_FUNC_(prepare_fields_noise_device, // prepares noise source array if( *NOISE_TOMOGRAPHY == 1 ){ - print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_noise_sourcearray, - 3*NGLL3*(*NSTEP)*sizeof(realw)),7101); - print_CUDA_error_if_any(cudaMemcpy(mp->d_noise_sourcearray, noise_sourcearray, - 3*NGLL3*(*NSTEP)*sizeof(realw),cudaMemcpyHostToDevice),7102); + copy_todevice_realw((void**)&mp->d_noise_sourcearray,noise_sourcearray, + 3*NGLL3*(*NSTEP)); + +// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_noise_sourcearray, +// 3*NGLL3*(*NSTEP)*sizeof(realw)),7101); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_noise_sourcearray, noise_sourcearray, +// 3*NGLL3*(*NSTEP)*sizeof(realw),cudaMemcpyHostToDevice),7102); } // prepares noise directions if( *NOISE_TOMOGRAPHY > 1 ){ int nface_size = NGLL2*(*num_free_surface_faces); // allocates memory on GPU - print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_x_noise, - nface_size*sizeof(realw)),7301); - print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_y_noise, - nface_size*sizeof(realw)),7302); - print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_z_noise, - nface_size*sizeof(realw)),7303); - print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_mask_noise, - nface_size*sizeof(realw)),7304); - print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_free_surface_jacobian2Dw, - nface_size*sizeof(realw)),7305); + copy_todevice_realw((void**)&mp->d_normal_x_noise,normal_x_noise,nface_size); + copy_todevice_realw((void**)&mp->d_normal_y_noise,normal_y_noise,nface_size); + copy_todevice_realw((void**)&mp->d_normal_z_noise,normal_z_noise,nface_size); + + copy_todevice_realw((void**)&mp->d_mask_noise,mask_noise,nface_size); + copy_todevice_realw((void**)&mp->d_free_surface_jacobian2Dw,free_surface_jacobian2Dw,nface_size); + +// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_x_noise, +// nface_size*sizeof(realw)),7301); +// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_y_noise, +// nface_size*sizeof(realw)),7302); +// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_z_noise, +// nface_size*sizeof(realw)),7303); + +// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_mask_noise, +// nface_size*sizeof(realw)),7304); +// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_free_surface_jacobian2Dw, +// nface_size*sizeof(realw)),7305); // transfers data onto GPU - print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_x_noise, normal_x_noise, - nface_size*sizeof(realw),cudaMemcpyHostToDevice),7306); - print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_y_noise, normal_y_noise, - nface_size*sizeof(realw),cudaMemcpyHostToDevice),7307); - print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_z_noise, normal_z_noise, - nface_size*sizeof(realw),cudaMemcpyHostToDevice),7308); - print_CUDA_error_if_any(cudaMemcpy(mp->d_mask_noise, mask_noise, - nface_size*sizeof(realw),cudaMemcpyHostToDevice),7309); - print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_jacobian2Dw, free_surface_jacobian2Dw, - nface_size*sizeof(realw),cudaMemcpyHostToDevice),7310); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_x_noise, normal_x_noise, +// nface_size*sizeof(realw),cudaMemcpyHostToDevice),7306); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_y_noise, normal_y_noise, +// nface_size*sizeof(realw),cudaMemcpyHostToDevice),7307); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_z_noise, normal_z_noise, +// nface_size*sizeof(realw),cudaMemcpyHostToDevice),7308); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_mask_noise, mask_noise, +// nface_size*sizeof(realw),cudaMemcpyHostToDevice),7309); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_jacobian2Dw, free_surface_jacobian2Dw, +// nface_size*sizeof(realw),cudaMemcpyHostToDevice),7310); } // prepares noise strength kernel @@ -1254,15 +1399,20 @@ void FC_FUNC_(prepare_fields_gravity_device, mp->gravity = *GRAVITY; if( mp->gravity ){ - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_minus_deriv_gravity), - (mp->NGLOB_AB)*sizeof(realw)),8000); - print_CUDA_error_if_any(cudaMemcpy(mp->d_minus_deriv_gravity, minus_deriv_gravity, - (mp->NGLOB_AB)*sizeof(realw),cudaMemcpyHostToDevice),8001); - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_minus_g), - (mp->NGLOB_AB)*sizeof(realw)),8002); - print_CUDA_error_if_any(cudaMemcpy(mp->d_minus_g, minus_g, - (mp->NGLOB_AB)*sizeof(realw),cudaMemcpyHostToDevice),8003); + copy_todevice_realw((void**)&mp->d_minus_deriv_gravity,minus_deriv_gravity,mp->NGLOB_AB); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_minus_deriv_gravity), +// (mp->NGLOB_AB)*sizeof(realw)),8000); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_minus_deriv_gravity, minus_deriv_gravity, +// (mp->NGLOB_AB)*sizeof(realw),cudaMemcpyHostToDevice),8001); + + copy_todevice_realw((void**)&mp->d_minus_g,minus_g,mp->NGLOB_AB); + +// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_minus_g), +// (mp->NGLOB_AB)*sizeof(realw)),8002); +// print_CUDA_error_if_any(cudaMemcpy(mp->d_minus_g, minus_g, +// (mp->NGLOB_AB)*sizeof(realw),cudaMemcpyHostToDevice),8003); if( *ACOUSTIC_SIMULATION == 0 ){ @@ -1280,6 +1430,8 @@ void FC_FUNC_(prepare_fields_gravity_device, } +/* ----------------------------------------------------------------------------------------------- */ + extern "C" void FC_FUNC_(prepare_seismogram_fields, PREPARE_SEISMOGRAM_FIELDS)(long* Mesh_pointer,int* nrec_local, double* nu, double* hxir, double* hetar, double* hgammar) { @@ -1287,19 +1439,19 @@ void FC_FUNC_(prepare_seismogram_fields, TRACE("prepare_constants_device"); Mesh* mp = (Mesh*)(*Mesh_pointer); - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_nu),3*3* *nrec_local*sizeof(double)),8100); - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_hxir),5* *nrec_local*sizeof(double)),8100); - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_hetar),5* *nrec_local*sizeof(double)),8100); - print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_hgammar),5* *nrec_local*sizeof(double)),8100); + print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_nu),3*3*(*nrec_local)*sizeof(double)),8100); + print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_hxir),5*(*nrec_local)*sizeof(double)),8100); + print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_hetar),5*(*nrec_local)*sizeof(double)),8100); + print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_hgammar),5*(*nrec_local)*sizeof(double)),8100); - print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_seismograms_d,3**nrec_local*sizeof(realw)),8101); - print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_seismograms_v,3**nrec_local*sizeof(realw)),8101); - print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_seismograms_a,3**nrec_local*sizeof(realw)),8101); + print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_seismograms_d,3*(*nrec_local)*sizeof(realw)),8101); + print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_seismograms_v,3*(*nrec_local)*sizeof(realw)),8101); + print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_seismograms_a,3*(*nrec_local)*sizeof(realw)),8101); - print_CUDA_error_if_any(cudaMemcpy(mp->d_nu,nu,3*3* *nrec_local*sizeof(double),cudaMemcpyHostToDevice),8101); - print_CUDA_error_if_any(cudaMemcpy(mp->d_hxir,hxir,5* *nrec_local*sizeof(double),cudaMemcpyHostToDevice),8101); - print_CUDA_error_if_any(cudaMemcpy(mp->d_hetar,hetar,5* *nrec_local*sizeof(double),cudaMemcpyHostToDevice),8101); - print_CUDA_error_if_any(cudaMemcpy(mp->d_hgammar,hgammar,5* *nrec_local*sizeof(double),cudaMemcpyHostToDevice),8101); + print_CUDA_error_if_any(cudaMemcpy(mp->d_nu,nu,3*3*(*nrec_local)*sizeof(double),cudaMemcpyHostToDevice),8101); + print_CUDA_error_if_any(cudaMemcpy(mp->d_hxir,hxir,5*(*nrec_local)*sizeof(double),cudaMemcpyHostToDevice),8101); + print_CUDA_error_if_any(cudaMemcpy(mp->d_hetar,hetar,5*(*nrec_local)*sizeof(double),cudaMemcpyHostToDevice),8101); + print_CUDA_error_if_any(cudaMemcpy(mp->d_hgammar,hgammar,5*(*nrec_local)*sizeof(double),cudaMemcpyHostToDevice),8101); cudaMallocHost((void**)&mp->h_seismograms_d_it,3**nrec_local*sizeof(realw)); cudaMallocHost((void**)&mp->h_seismograms_v_it,3**nrec_local*sizeof(realw)); @@ -1315,7 +1467,6 @@ void FC_FUNC_(prepare_seismogram_fields, extern "C" void FC_FUNC_(prepare_cleanup_device, PREPARE_CLEANUP_DEVICE)(long* Mesh_pointer_f, - int* SIMULATION_TYPE, int* SAVE_FORWARD, int* ACOUSTIC_SIMULATION, int* ELASTIC_SIMULATION, @@ -1362,7 +1513,7 @@ TRACE("prepare_cleanup_device"); cudaFree(mp->d_ibool); // sources - if (*SIMULATION_TYPE == 1 || *SIMULATION_TYPE == 3){ + if (mp->simulation_type == 1 || mp->simulation_type == 3){ cudaFree(mp->d_sourcearrays); cudaFree(mp->d_stf_pre_compute); } @@ -1393,7 +1544,7 @@ TRACE("prepare_cleanup_device"); if( *ABSORBING_CONDITIONS ) cudaFree(mp->d_b_absorb_potential); - if( *SIMULATION_TYPE == 3 ) { + if( mp->simulation_type == 3 ) { cudaFree(mp->d_b_potential_acoustic); cudaFree(mp->d_b_potential_dot_acoustic); cudaFree(mp->d_b_potential_dot_dot_acoustic); @@ -1416,7 +1567,10 @@ TRACE("prepare_cleanup_device"); cudaFree(mp->d_veloc); cudaFree(mp->d_accel); cudaFree(mp->d_send_accel_buffer); - cudaFree(mp->d_rmass); + + cudaFree(mp->d_rmassx); + cudaFree(mp->d_rmassy); + cudaFree(mp->d_rmassz); cudaFree(mp->d_phase_ispec_inner_elastic); cudaFree(mp->d_ispec_is_elastic); @@ -1430,11 +1584,11 @@ TRACE("prepare_cleanup_device"); cudaFree(mp->d_rho_vp); cudaFree(mp->d_rho_vs); - if(*SIMULATION_TYPE == 3 || ( *SIMULATION_TYPE == 1 && *SAVE_FORWARD )) + if(mp->simulation_type == 3 || ( mp->simulation_type == 1 && *SAVE_FORWARD )) cudaFree(mp->d_b_absorb_field); } - if( *SIMULATION_TYPE == 3 ) { + if( mp->simulation_type == 3 ) { cudaFree(mp->d_b_displ); cudaFree(mp->d_b_veloc); cudaFree(mp->d_b_accel); @@ -1450,7 +1604,7 @@ TRACE("prepare_cleanup_device"); cudaFree(mp->d_epsilondev_xy); cudaFree(mp->d_epsilondev_xz); cudaFree(mp->d_epsilondev_yz); - if( *SIMULATION_TYPE == 3 ){ + if( mp->simulation_type == 3 ){ cudaFree(mp->d_epsilon_trace_over_3); cudaFree(mp->d_b_epsilon_trace_over_3); cudaFree(mp->d_b_epsilondev_xx); @@ -1472,7 +1626,7 @@ TRACE("prepare_cleanup_device"); cudaFree(mp->d_R_xy); cudaFree(mp->d_R_xz); cudaFree(mp->d_R_yz); - if( *SIMULATION_TYPE == 3){ + if( mp->simulation_type == 3){ cudaFree(mp->d_b_R_xx); cudaFree(mp->d_b_R_yy); cudaFree(mp->d_b_R_xy); @@ -1522,7 +1676,7 @@ TRACE("prepare_cleanup_device"); } // ELASTIC_SIMULATION // purely adjoint & kernel array - if( *SIMULATION_TYPE == 2 || *SIMULATION_TYPE == 3 ){ + if( mp->simulation_type == 2 || mp->simulation_type == 3 ){ if(mp->nadj_rec_local > 0 ){ cudaFree(mp->d_adj_sourcearrays); cudaFree(mp->d_pre_computed_irec); diff --git a/src/cuda/specfem3D_gpu_cuda_method_stubs.c b/src/cuda/specfem3D_gpu_cuda_method_stubs.c index c96f208d5..c6231321c 100644 --- a/src/cuda/specfem3D_gpu_cuda_method_stubs.c +++ b/src/cuda/specfem3D_gpu_cuda_method_stubs.c @@ -79,13 +79,11 @@ void FC_FUNC_(get_max_accel, void FC_FUNC_(get_norm_acoustic_from_device, GET_NORM_ACOUSTIC_FROM_DEVICE)(realw* norm, - long* Mesh_pointer_f, - int* SIMULATION_TYPE) {} + long* Mesh_pointer_f) {} void FC_FUNC_(get_norm_elastic_from_device, GET_NORM_ELASTIC_FROM_DEVICE)(realw* norm, - long* Mesh_pointer_f, - int* SIMULATION_TYPE) {} + long* Mesh_pointer_f) {} // @@ -96,7 +94,6 @@ void FC_FUNC_(compute_add_sources_ac_cuda, COMPUTE_ADD_SOURCES_AC_CUDA)(long* Mesh_pointer_f, int* phase_is_innerf, int* NSOURCESf, - int* SIMULATION_TYPEf, double* h_stf_pre_compute, int* myrankf) {} @@ -104,7 +101,6 @@ void FC_FUNC_(compute_add_sources_ac_s3_cuda, COMPUTE_ADD_SOURCES_AC_s3_CUDA)(long* Mesh_pointer_f, int* phase_is_innerf, int* NSOURCESf, - int* SIMULATION_TYPEf, double* h_stf_pre_compute, int* myrankf) {} @@ -170,14 +166,15 @@ void FC_FUNC_(add_sources_el_sim_type_2_or_3, void FC_FUNC_(compute_coupling_ac_el_cuda, COMPUTE_COUPLING_AC_EL_CUDA)(long* Mesh_pointer_f, int* phase_is_innerf, - int* num_coupling_ac_el_facesf, - int* SIMULATION_TYPEf) {} + int* num_coupling_ac_el_facesf) {} void FC_FUNC_(compute_coupling_el_ac_cuda, COMPUTE_COUPLING_EL_AC_CUDA)(long* Mesh_pointer_f, int* phase_is_innerf, - int* num_coupling_ac_el_facesf, - int* SIMULATION_TYPEf) {} + int* num_coupling_ac_el_facesf) {} + +void FC_FUNC_(compute_coupling_ocean_cuda, + COMPUTE_COUPLING_OCEAN_CUDA)(long* Mesh_pointer_f) {} // @@ -211,25 +208,11 @@ void FC_FUNC_(compute_forces_acoustic_cuda, COMPUTE_FORCES_ACOUSTIC_CUDA)(long* Mesh_pointer_f, int* iphase, int* nspec_outer_acoustic, - int* nspec_inner_acoustic, - int* SIMULATION_TYPE) {} - -void FC_FUNC_(kernel_3_a_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)( - long* Mesh_pointer, - int* size_F, - int* SIMULATION_TYPE) {} - -void FC_FUNC_(kernel_3_b_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)( - long* Mesh_pointer, - int* size_F, - realw* deltatover2_F, - int* SIMULATION_TYPE, - realw* b_deltatover2_F) {} + int* nspec_inner_acoustic) {} void FC_FUNC_(acoustic_enforce_free_surf_cuda, ACOUSTIC_ENFORCE_FREE_SURF_CUDA)(long* Mesh_pointer_f, - int* SIMULATION_TYPE, - int* ABSORB_FREE_SURFACE) {} + int* ABSORB_FREE_SURFACE) {} // @@ -278,7 +261,6 @@ void FC_FUNC_(compute_forces_elastic_cuda, int* iphase, int* nspec_outer_elastic, int* nspec_inner_elastic, - int* SIMULATION_TYPE, int* COMPUTE_AND_STORE_STRAIN, int* ATTENUATION, int* ANISOTROPY) {} @@ -288,25 +270,6 @@ void FC_FUNC_(sync_copy_from_device, int* iphase, realw* send_buffer) {} -void FC_FUNC_(kernel_3_a_cuda, - KERNEL_3_A_CUDA)(long* Mesh_pointer, - int* size_F, - realw* deltatover2_F, - int* SIMULATION_TYPE_f, - realw* b_deltatover2_F, - int* OCEANS) {} - -void FC_FUNC_(kernel_3_b_cuda, - KERNEL_3_B_CUDA)(long* Mesh_pointer, - int* size_F, - realw* deltatover2_F, - int* SIMULATION_TYPE_f, - realw* b_deltatover2_F) {} - -void FC_FUNC_(elastic_ocean_load_cuda, - ELASTIC_OCEAN_LOAD_CUDA)(long* Mesh_pointer_f, - int* SIMULATION_TYPE) {} - // // src/cuda/compute_kernels_cuda.cu @@ -338,12 +301,10 @@ void FC_FUNC_(compute_kernels_hess_cuda, // void FC_FUNC_(compute_stacey_acoustic_cuda, - COMPUTE_STACEY_ACOUSTIC_CUDA)( - long* Mesh_pointer_f, - int* phase_is_innerf, - int* SIMULATION_TYPEf, - int* SAVE_FORWARDf, - realw* h_b_absorb_potential) {} + COMPUTE_STACEY_ACOUSTIC_CUDA)(long* Mesh_pointer_f, + int* phase_is_innerf, + int* SAVE_FORWARDf, + realw* h_b_absorb_potential) {} // @@ -353,7 +314,6 @@ void FC_FUNC_(compute_stacey_acoustic_cuda, void FC_FUNC_(compute_stacey_elastic_cuda, COMPUTE_STACEY_ELASTIC_CUDA)(long* Mesh_pointer_f, int* phase_is_innerf, - int* SIMULATION_TYPEf, int* SAVE_FORWARDf, realw* h_b_absorb_field) {} @@ -375,14 +335,13 @@ void FC_FUNC_(initialize_cuda_device, void FC_FUNC_(it_update_displacement_cuda, IT_UPDATE_DISPLACMENT_CUDA)(long* Mesh_pointer_f, - int* size_F, - realw* deltat_F, - realw* deltatsqover2_F, - realw* deltatover2_F, - int* SIMULATION_TYPE, - realw* b_deltat_F, - realw* b_deltatsqover2_F, - realw* b_deltatover2_F) {} + int* size_F, + realw* deltat_F, + realw* deltatsqover2_F, + realw* deltatover2_F, + realw* b_deltat_F, + realw* b_deltatsqover2_F, + realw* b_deltatover2_F) {} void FC_FUNC_(it_update_displacement_ac_cuda, it_update_displacement_ac_cuda)(long* Mesh_pointer_f, @@ -390,11 +349,33 @@ void FC_FUNC_(it_update_displacement_ac_cuda, realw* deltat_F, realw* deltatsqover2_F, realw* deltatover2_F, - int* SIMULATION_TYPE, realw* b_deltat_F, realw* b_deltatsqover2_F, realw* b_deltatover2_F) {} +void FC_FUNC_(kernel_3_a_cuda, + KERNEL_3_A_CUDA)(long* Mesh_pointer, + int* size_F, + realw* deltatover2_F, + realw* b_deltatover2_F, + int* OCEANS) {} + +void FC_FUNC_(kernel_3_b_cuda, + KERNEL_3_B_CUDA)(long* Mesh_pointer, + int* size_F, + realw* deltatover2_F, + realw* b_deltatover2_F) {} + +void FC_FUNC_(kernel_3_a_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)( + long* Mesh_pointer, + int* size_F) {} + +void FC_FUNC_(kernel_3_b_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)( + long* Mesh_pointer, + int* size_F, + realw* deltatover2_F, + realw* b_deltatover2_F) {} + // // src/cuda/noise_tomography_cuda.cu @@ -475,7 +456,6 @@ void FC_FUNC_(prepare_fields_acoustic_device, int* num_free_surface_faces, int* free_surface_ispec, int* free_surface_ijk, - int* ABSORBING_CONDITIONS, int* b_reclen_potential, realw* b_absorb_potential, int* ELASTIC_SIMULATION, @@ -490,22 +470,22 @@ void FC_FUNC_(prepare_fields_acoustic_device, void FC_FUNC_(prepare_fields_acoustic_adj_dev, PREPARE_FIELDS_ACOUSTIC_ADJ_DEV)(long* Mesh_pointer_f, - int* SIMULATION_TYPE, int* APPROXIMATE_HESS_KL) {} void FC_FUNC_(prepare_fields_elastic_device, PREPARE_FIELDS_ELASTIC_DEVICE)(long* Mesh_pointer_f, int* size, - realw* rmass, + realw* rmassx, + realw* rmassy, + realw* rmassz, realw* rho_vp, realw* rho_vs, int* num_phase_ispec_elastic, int* phase_ispec_inner_elastic, int* ispec_is_elastic, - int* ABSORBING_CONDITIONS, realw* h_b_absorb_field, int* h_b_reclen_field, - int* SIMULATION_TYPE,int* SAVE_FORWARD, + int* SAVE_FORWARD, int* COMPUTE_AND_STORE_STRAIN, realw* epsilondev_xx,realw* epsilondev_yy,realw* epsilondev_xy, realw* epsilondev_xz,realw* epsilondev_yz, @@ -551,7 +531,6 @@ void FC_FUNC_(prepare_fields_elastic_device, void FC_FUNC_(prepare_fields_elastic_adj_dev, PREPARE_FIELDS_ELASTIC_ADJ_DEV)(long* Mesh_pointer_f, int* size, - int* SIMULATION_TYPE, int* COMPUTE_AND_STORE_STRAIN, realw* epsilon_trace_over_3, realw* b_epsilondev_xx,realw* b_epsilondev_yy,realw* b_epsilondev_xy, @@ -578,7 +557,6 @@ void FC_FUNC_(prepare_fields_noise_device, int* free_surface_ispec, int* free_surface_ijk, int* num_free_surface_faces, - int* SIMULATION_TYPE, int* NOISE_TOMOGRAPHY, int* NSTEP, realw* noise_sourcearray, @@ -602,7 +580,6 @@ void FC_FUNC_(prepare_seismogram_fields, void FC_FUNC_(prepare_cleanup_device, PREPARE_CLEANUP_DEVICE)(long* Mesh_pointer_f, - int* SIMULATION_TYPE, int* SAVE_FORWARD, int* ACOUSTIC_SIMULATION, int* ELASTIC_SIMULATION, @@ -774,7 +751,6 @@ void FC_FUNC_(transfer_kernels_hess_ac_tohost, void FC_FUNC_(transfer_seismograms_el_from_d, TRANSFER_SEISMOGRAMS_EL_FROM_D)(int* nrec_local, long* Mesh_pointer_f, - int* SIMULATION_TYPEf, realw* seismograms_d, realw* seismograms_v, realw* seismograms_a, @@ -785,7 +761,7 @@ void FC_FUNC_(transfer_station_el_from_device, realw* b_displ, realw* b_veloc, realw* b_accel, long* Mesh_pointer_f,int* number_receiver_global, int* ispec_selected_rec,int* ispec_selected_source, - int* ibool,int* SIMULATION_TYPEf) {} + int* ibool) {} void FC_FUNC_(transfer_station_ac_from_device, TRANSFER_STATION_AC_FROM_DEVICE)( @@ -799,6 +775,5 @@ void FC_FUNC_(transfer_station_ac_from_device, int* number_receiver_global, int* ispec_selected_rec, int* ispec_selected_source, - int* ibool, - int* SIMULATION_TYPEf) {} + int* ibool) {} diff --git a/src/cuda/write_seismograms_cuda.cu b/src/cuda/write_seismograms_cuda.cu index 95163d0d3..8bba41905 100644 --- a/src/cuda/write_seismograms_cuda.cu +++ b/src/cuda/write_seismograms_cuda.cu @@ -195,7 +195,6 @@ extern "C" void FC_FUNC_(transfer_seismograms_el_from_d, TRANSFER_SEISMOGRAMS_EL_FROM_D)(int* nrec_local, long* Mesh_pointer_f, - int* SIMULATION_TYPEf, realw* seismograms_d, realw* seismograms_v, realw* seismograms_a, @@ -345,16 +344,15 @@ void FC_FUNC_(transfer_station_el_from_device, realw* b_displ, realw* b_veloc, realw* b_accel, long* Mesh_pointer_f,int* number_receiver_global, int* ispec_selected_rec,int* ispec_selected_source, - int* ibool,int* SIMULATION_TYPEf) { + int* ibool) { TRACE("transfer_station_el_from_device"); Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper + // checks if anything to do if( mp->nrec_local == 0 ) return; - int SIMULATION_TYPE = *SIMULATION_TYPEf; - - if(SIMULATION_TYPE == 1) { + if(mp->simulation_type == 1) { transfer_field_from_device(mp,mp->d_displ,displ, number_receiver_global, mp->d_ispec_selected_rec, ispec_selected_rec, ibool); transfer_field_from_device(mp,mp->d_veloc,veloc, number_receiver_global, @@ -362,7 +360,7 @@ TRACE("transfer_station_el_from_device"); transfer_field_from_device(mp,mp->d_accel,accel, number_receiver_global, mp->d_ispec_selected_rec, ispec_selected_rec, ibool); } - else if(SIMULATION_TYPE == 2) { + else if(mp->simulation_type == 2) { transfer_field_from_device(mp,mp->d_displ,displ, number_receiver_global, mp->d_ispec_selected_source, ispec_selected_source, ibool); transfer_field_from_device(mp,mp->d_veloc,veloc, number_receiver_global, @@ -370,7 +368,7 @@ TRACE("transfer_station_el_from_device"); transfer_field_from_device(mp,mp->d_accel,accel, number_receiver_global, mp->d_ispec_selected_source, ispec_selected_source, ibool); } - else if(SIMULATION_TYPE == 3) { + else if(mp->simulation_type == 3) { transfer_field_from_device(mp,mp->d_b_displ,b_displ, number_receiver_global, mp->d_ispec_selected_rec, ispec_selected_rec, ibool); transfer_field_from_device(mp,mp->d_b_veloc,b_veloc, number_receiver_global, @@ -420,7 +418,7 @@ TRACE("transfer_field_acoustic_from_device"); int irec_local,irec,ispec,iglob,j; // checks if anything to do - if( mp->nrec_local == 0 ) return; + if( mp->nrec_local < 1 ) return; // sets up kernel dimensions int blocksize = NGLL3; @@ -442,8 +440,12 @@ TRACE("transfer_field_acoustic_from_device"); d_potential); +#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING + exit_on_cuda_error("transfer_field_acoustic_from_device kernel"); +#endif + print_CUDA_error_if_any(cudaMemcpy(mp->h_station_seismo_potential,mp->d_station_seismo_potential, - mp->nrec_local*NGLL3*sizeof(realw),cudaMemcpyDeviceToHost),500); + mp->nrec_local*NGLL3*sizeof(realw),cudaMemcpyDeviceToHost),55000); //printf("copy local receivers: %i \n",mp->nrec_local); @@ -483,19 +485,17 @@ void FC_FUNC_(transfer_station_ac_from_device, int* number_receiver_global, int* ispec_selected_rec, int* ispec_selected_source, - int* ibool, - int* SIMULATION_TYPEf) { + int* ibool) { TRACE("transfer_station_ac_from_device"); //double start_time = get_time(); Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper + // checks if anything to do if( mp->nrec_local == 0 ) return; - int SIMULATION_TYPE = *SIMULATION_TYPEf; - - if(SIMULATION_TYPE == 1) { + if(mp->simulation_type == 1) { transfer_field_acoustic_from_device(mp,mp->d_potential_acoustic,potential_acoustic, number_receiver_global, mp->d_ispec_selected_rec, ispec_selected_rec, ibool); @@ -506,7 +506,7 @@ TRACE("transfer_station_ac_from_device"); number_receiver_global, mp->d_ispec_selected_rec, ispec_selected_rec, ibool); } - else if(SIMULATION_TYPE == 2) { + else if(mp->simulation_type == 2) { transfer_field_acoustic_from_device(mp,mp->d_potential_acoustic,potential_acoustic, number_receiver_global, mp->d_ispec_selected_source, ispec_selected_source, ibool); @@ -517,7 +517,7 @@ TRACE("transfer_station_ac_from_device"); number_receiver_global, mp->d_ispec_selected_source, ispec_selected_source, ibool); } - else if(SIMULATION_TYPE == 3) { + else if(mp->simulation_type == 3) { transfer_field_acoustic_from_device(mp,mp->d_b_potential_acoustic,b_potential_acoustic, number_receiver_global, mp->d_ispec_selected_rec, ispec_selected_rec, ibool); diff --git a/src/generate_databases/create_mass_matrices.f90 b/src/generate_databases/create_mass_matrices.f90 index 47e976a13..c2ff99ccc 100644 --- a/src/generate_databases/create_mass_matrices.f90 +++ b/src/generate_databases/create_mass_matrices.f90 @@ -44,83 +44,125 @@ subroutine create_mass_matrices(nglob,nspec,ibool) integer :: ispec,i,j,k,iglob,ier real(kind=CUSTOM_REAL) :: rho_s, rho_f, rho_bar, phi, tort -! allocates memory - allocate(rmass(nglob),stat=ier); if(ier /= 0) stop 'error in allocate rmass' - allocate(rmass_acoustic(nglob),stat=ier); if(ier /= 0) stop 'error in allocate rmass_acoustic' - allocate(rmass_solid_poroelastic(nglob),stat=ier); if(ier /= 0) stop 'error in allocate' - allocate(rmass_fluid_poroelastic(nglob),stat=ier); if(ier /= 0) stop 'error in allocate' - -! creates mass matrix - rmass(:) = 0._CUSTOM_REAL - rmass_acoustic(:) = 0._CUSTOM_REAL - rmass_solid_poroelastic(:) = 0._CUSTOM_REAL - rmass_fluid_poroelastic(:) = 0._CUSTOM_REAL - - do ispec=1,nspec - do k=1,NGLLZ - do j=1,NGLLY - do i=1,NGLLX - iglob = ibool(i,j,k,ispec) - - weight = wxgll(i)*wygll(j)*wzgll(k) - jacobianl = jacobianstore(i,j,k,ispec) - -! acoustic mass matrix - if( ispec_is_acoustic(ispec) ) then - ! distinguish between single and double precision for reals - if(CUSTOM_REAL == SIZE_REAL) then - rmass_acoustic(iglob) = rmass_acoustic(iglob) + & - sngl( dble(jacobianl) * weight / dble(kappastore(i,j,k,ispec)) ) - else - rmass_acoustic(iglob) = rmass_acoustic(iglob) + & - jacobianl * weight / kappastore(i,j,k,ispec) - endif - endif + ! elastic domains + if( ELASTIC_SIMULATION ) then + ! allocates memory + allocate(rmass(nglob),stat=ier); if(ier /= 0) stop 'error in allocate rmass' + rmass(:) = 0._CUSTOM_REAL + + ! elastic mass matrix + do ispec=1,nspec + if( ispec_is_elastic(ispec) ) then + do k=1,NGLLZ + do j=1,NGLLY + do i=1,NGLLX + iglob = ibool(i,j,k,ispec) + + weight = wxgll(i)*wygll(j)*wzgll(k) + jacobianl = jacobianstore(i,j,k,ispec) + + if(CUSTOM_REAL == SIZE_REAL) then + rmass(iglob) = rmass(iglob) + & + sngl( dble(jacobianl) * weight * dble(rhostore(i,j,k,ispec)) ) + else + rmass(iglob) = rmass(iglob) + & + jacobianl * weight * rhostore(i,j,k,ispec) + endif + enddo + enddo + enddo + endif + enddo ! nspec + endif -! elastic mass matrix - if( ispec_is_elastic(ispec) ) then - if(CUSTOM_REAL == SIZE_REAL) then - rmass(iglob) = rmass(iglob) + & - sngl( dble(jacobianl) * weight * dble(rhostore(i,j,k,ispec)) ) - else - rmass(iglob) = rmass(iglob) + & - jacobianl * weight * rhostore(i,j,k,ispec) - endif - endif + ! acoustic domains + if( ACOUSTIC_SIMULATION) then + ! allocates memory + allocate(rmass_acoustic(nglob),stat=ier); if(ier /= 0) stop 'error in allocate rmass_acoustic' + rmass_acoustic(:) = 0._CUSTOM_REAL + + ! acoustic mass matrix + do ispec=1,nspec + if( ispec_is_acoustic(ispec) ) then + do k=1,NGLLZ + do j=1,NGLLY + do i=1,NGLLX + iglob = ibool(i,j,k,ispec) + + weight = wxgll(i)*wygll(j)*wzgll(k) + jacobianl = jacobianstore(i,j,k,ispec) + + ! distinguish between single and double precision for reals + if(CUSTOM_REAL == SIZE_REAL) then + rmass_acoustic(iglob) = rmass_acoustic(iglob) + & + sngl( dble(jacobianl) * weight / dble(kappastore(i,j,k,ispec)) ) + else + rmass_acoustic(iglob) = rmass_acoustic(iglob) + & + jacobianl * weight / kappastore(i,j,k,ispec) + endif + enddo + enddo + enddo + endif + enddo ! nspec + endif + + ! poroelastic domains + if( POROELASTIC_SIMULATION) then + ! allocates memory + allocate(rmass_solid_poroelastic(nglob),stat=ier); if(ier /= 0) stop 'error in allocate' + allocate(rmass_fluid_poroelastic(nglob),stat=ier); if(ier /= 0) stop 'error in allocate' + rmass_solid_poroelastic(:) = 0._CUSTOM_REAL + rmass_fluid_poroelastic(:) = 0._CUSTOM_REAL + + ! poroelastic mass matrices + do ispec=1,nspec + if( ispec_is_poroelastic(ispec) ) then + do k=1,NGLLZ + do j=1,NGLLY + do i=1,NGLLX + iglob = ibool(i,j,k,ispec) + + weight = wxgll(i)*wygll(j)*wzgll(k) + jacobianl = jacobianstore(i,j,k,ispec) + + rho_s = rhoarraystore(1,i,j,k,ispec) + rho_f = rhoarraystore(2,i,j,k,ispec) + phi = phistore(i,j,k,ispec) + tort = tortstore(i,j,k,ispec) + rho_bar = (1._CUSTOM_REAL-phi)*rho_s + phi*rho_f + + if(CUSTOM_REAL == SIZE_REAL) then + ! for the solid mass matrix + rmass_solid_poroelastic(iglob) = rmass_solid_poroelastic(iglob) + & + sngl( dble(jacobianl) * weight * dble(rho_bar - phi*rho_f/tort) ) + + ! for the fluid mass matrix + rmass_fluid_poroelastic(iglob) = rmass_fluid_poroelastic(iglob) + & + sngl( dble(jacobianl) * weight * dble(rho_bar*rho_f*tort - & + phi*rho_f*rho_f)/dble(rho_bar*phi) ) + else + rmass_solid_poroelastic(iglob) = rmass_solid_poroelastic(iglob) + & + jacobianl * weight * (rho_bar - phi*rho_f/tort) + + rmass_fluid_poroelastic(iglob) = rmass_fluid_poroelastic(iglob) + & + jacobianl * weight * (rho_bar*rho_f*tort - & + phi*rho_f*rho_f) / (rho_bar*phi) + endif + enddo + enddo + enddo + endif + enddo ! nspec + endif -! poroelastic mass matrices - if( ispec_is_poroelastic(ispec) ) then - - rho_s = rhoarraystore(1,i,j,k,ispec) - rho_f = rhoarraystore(2,i,j,k,ispec) - phi = phistore(i,j,k,ispec) - tort = tortstore(i,j,k,ispec) - rho_bar = (1._CUSTOM_REAL-phi)*rho_s + phi*rho_f - - if(CUSTOM_REAL == SIZE_REAL) then - ! for the solid mass matrix - rmass_solid_poroelastic(iglob) = rmass_solid_poroelastic(iglob) + & - sngl( dble(jacobianl) * weight * dble(rho_bar - phi*rho_f/tort) ) - - ! for the fluid mass matrix - rmass_fluid_poroelastic(iglob) = rmass_fluid_poroelastic(iglob) + & - sngl( dble(jacobianl) * weight * dble(rho_bar*rho_f*tort - & - phi*rho_f*rho_f)/dble(rho_bar*phi) ) - else - rmass_solid_poroelastic(iglob) = rmass_solid_poroelastic(iglob) + & - jacobianl * weight * (rho_bar - phi*rho_f/tort) - - rmass_fluid_poroelastic(iglob) = rmass_fluid_poroelastic(iglob) + & - jacobianl * weight * (rho_bar*rho_f*tort - & - phi*rho_f*rho_f) / (rho_bar*phi) - endif + ! extra C*deltat/2 contribution to the mass matrices on Stacey edges + ! for absorbing condition + call create_mass_matrices_Stacey(nglob,nspec,ibool) - endif + ! creates ocean load mass matrix + call create_mass_matrices_ocean_load(nglob,nspec,ibool) - enddo - enddo - enddo - enddo ! nspec end subroutine create_mass_matrices @@ -128,13 +170,16 @@ end subroutine create_mass_matrices !------------------------------------------------------------------------------------------------- ! - subroutine create_mass_matrices_ocean_load(nglob,nspec,ibool,OCEANS,TOPOGRAPHY, & - UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, & - NX_TOPO,NY_TOPO,itopo_bathy) + subroutine create_mass_matrices_ocean_load(nglob,nspec,ibool) ! returns precomputed mass matrix in rmass array + use generate_databases_par,only: & + OCEANS,TOPOGRAPHY,UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, & + NX_TOPO,NY_TOPO,itopo_bathy,myrank + use create_regions_mesh_ext_par + implicit none ! number of spectral elements in each block @@ -143,14 +188,6 @@ subroutine create_mass_matrices_ocean_load(nglob,nspec,ibool,OCEANS,TOPOGRAPHY, ! arrays with the mesh global indices integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool - logical :: OCEANS,TOPOGRAPHY - - ! use integer array to store topography values - integer :: UTM_PROJECTION_ZONE - logical :: SUPPRESS_UTM_PROJECTION - - integer :: NX_TOPO,NY_TOPO - integer, dimension(NX_TOPO,NY_TOPO) :: itopo_bathy ! local parameters double precision :: weight @@ -164,6 +201,10 @@ subroutine create_mass_matrices_ocean_load(nglob,nspec,ibool,OCEANS,TOPOGRAPHY, ! creates ocean load mass matrix if(OCEANS) then + if( myrank == 0) then + write(IMAIN,*) ' ...creating ocean load mass matrix ' + endif + ! adding ocean load mass matrix at ocean bottom NGLOB_OCEAN = nglob allocate(rmass_ocean_load(NGLOB_OCEAN),stat=ier) @@ -244,3 +285,144 @@ subroutine create_mass_matrices_ocean_load(nglob,nspec,ibool,OCEANS,TOPOGRAPHY, endif end subroutine create_mass_matrices_ocean_load + + +! +!------------------------------------------------------------------------------------------------- +! + + subroutine create_mass_matrices_Stacey(nglob,nspec,ibool) + +! in the case of stacey boundary conditions, add C*deltat/2 contribution to the mass matrix +! on Stacey edges for the crust_mantle and outer_core regions but not for the inner_core region +! thus the mass matrix must be replaced by three mass matrices including the "C" damping matrix + + use generate_databases_par,only: & + DT + + use create_regions_mesh_ext_par + + implicit none + + integer :: nglob + integer :: nspec + integer,dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool + + ! local parameters + real(kind=CUSTOM_REAL) :: jacobianw + real(kind=CUSTOM_REAL) :: deltat,deltatover2 + real(kind=CUSTOM_REAL) :: tx,ty,tz,sn + real(kind=CUSTOM_REAL) :: nx,ny,nz,vn + integer :: ispec,iglob,i,j,k,iface,igll,ier + + ! checks if anything to do + if( num_abs_boundary_faces > 0 ) then + nglob_xy = nglob + else + nglob_xy = 1 + endif + + ! elastic domains + if( ELASTIC_SIMULATION ) then + allocate( rmassx(nglob_xy), & + rmassy(nglob_xy), & + rmassz(nglob_xy), & + stat=ier) + if(ier /= 0) stop 'error in allocate 21' + rmassx(:) = 0._CUSTOM_REAL + rmassy(:) = 0._CUSTOM_REAL + rmassz(:) = 0._CUSTOM_REAL + endif + + ! acoustic domains + if( ACOUSTIC_SIMULATION ) then + allocate( rmassz_acoustic(nglob_xy), & + stat=ier) + if(ier /= 0) stop 'error in allocate 22' + rmassz_acoustic(:) = 0._CUSTOM_REAL + endif + + ! use the non-dimensional time step to make the mass matrix correction + if(CUSTOM_REAL == SIZE_REAL) then + deltat = sngl(DT) + deltatover2 = sngl(0.5d0*deltat) + else + deltat = DT + deltatover2 = 0.5d0*deltat + endif + + ! adds contributions to mass matrix to stabilize stacey conditions + do iface=1,num_abs_boundary_faces + + ispec = abs_boundary_ispec(iface) + + ! elastic elements + if( ispec_is_elastic(ispec)) then + ! reference gll points on boundary face + do igll = 1,NGLLSQUARE + ! gets local indices for GLL point + i = abs_boundary_ijk(1,igll,iface) + j = abs_boundary_ijk(2,igll,iface) + k = abs_boundary_ijk(3,igll,iface) + + ! gets velocity + iglob = ibool(i,j,k,ispec) + + ! gets associated normal + nx = abs_boundary_normal(1,igll,iface) + ny = abs_boundary_normal(2,igll,iface) + nz = abs_boundary_normal(3,igll,iface) + + vn = deltatover2*(nx+ny+nz) + + ! C*deltat/2 contributions + tx = rho_vp(i,j,k,ispec)*vn*nx + rho_vs(i,j,k,ispec)*(deltatover2-vn*nx) + ty = rho_vp(i,j,k,ispec)*vn*ny + rho_vs(i,j,k,ispec)*(deltatover2-vn*ny) + tz = rho_vp(i,j,k,ispec)*vn*nz + rho_vs(i,j,k,ispec)*(deltatover2-vn*nz) + + ! gets associated, weighted jacobian + jacobianw = abs_boundary_jacobian2Dw(igll,iface) + + ! assembles mass matrix on global points + if(CUSTOM_REAL == SIZE_REAL) then + rmassx(iglob) = rmassx(iglob) + sngl(tx*jacobianw) + rmassy(iglob) = rmassy(iglob) + sngl(ty*jacobianw) + rmassz(iglob) = rmassz(iglob) + sngl(tz*jacobianw) + else + rmassx(iglob) = rmassx(iglob) + tx*jacobianw + rmassy(iglob) = rmassy(iglob) + ty*jacobianw + rmassz(iglob) = rmassz(iglob) + tz*jacobianw + endif + enddo + endif ! elastic + + ! acoustic element + if( ispec_is_acoustic(ispec) ) then + + ! reference gll points on boundary face + do igll = 1,NGLLSQUARE + ! gets local indices for GLL point + i = abs_boundary_ijk(1,igll,iface) + j = abs_boundary_ijk(2,igll,iface) + k = abs_boundary_ijk(3,igll,iface) + + ! gets global index + iglob=ibool(i,j,k,ispec) + + ! gets associated, weighted jacobian + jacobianw = abs_boundary_jacobian2Dw(igll,iface) + + ! C * DT/2 contribution + sn = deltatover2/rho_vp(i,j,k,ispec) + + if(CUSTOM_REAL == SIZE_REAL) then + rmassz_acoustic(iglob) = rmassz_acoustic(iglob) + sngl(jacobianw*sn) + else + rmassz_acoustic(iglob) = rmassz_acoustic(iglob) + jacobianw*sn + endif + enddo + endif ! acoustic + enddo + + end subroutine create_mass_matrices_Stacey + diff --git a/src/generate_databases/create_regions_mesh.f90 b/src/generate_databases/create_regions_mesh.f90 index b20426f04..85269e2b3 100644 --- a/src/generate_databases/create_regions_mesh.f90 +++ b/src/generate_databases/create_regions_mesh.f90 @@ -46,11 +46,10 @@ subroutine create_regions_mesh() nodes_ibelm_xmin,nodes_ibelm_xmax,nodes_ibelm_ymin,nodes_ibelm_ymax,& nodes_ibelm_bottom,nodes_ibelm_top, & SAVE_MESH_FILES, & - ANISOTROPY,NPROC,OCEANS,TOPOGRAPHY, & + ANISOTROPY,NPROC,OCEANS, & ATTENUATION,USE_OLSEN_ATTENUATION, & - UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, & - NX_TOPO,NY_TOPO,itopo_bathy, & nspec2D_moho_ext,ibelm_moho,nodes_ibelm_moho + use create_regions_mesh_ext_par implicit none @@ -176,15 +175,6 @@ subroutine create_regions_mesh() endif call create_mass_matrices(nglob_dummy,nspec,ibool) -! creates ocean load mass matrix - call sync_all() - if( myrank == 0) then - write(IMAIN,*) ' ...creating ocean load mass matrix ' - endif - call create_mass_matrices_ocean_load(nglob_dummy,nspec,ibool,OCEANS,TOPOGRAPHY, & - UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, & - NX_TOPO,NY_TOPO,itopo_bathy) - ! saves the binary mesh files call sync_all() if( myrank == 0) then @@ -232,8 +222,7 @@ subroutine create_regions_mesh() ispec_is_elastic,min_resolved_period,prname) endif -! cleanup - if( .not. SAVE_MOHO_MESH ) deallocate(xstore_dummy,ystore_dummy,zstore_dummy) + ! cleanup deallocate(xixstore,xiystore,xizstore,& etaxstore,etaystore,etazstore,& gammaxstore,gammaystore,gammazstore) @@ -242,6 +231,22 @@ subroutine create_regions_mesh() deallocate(rho_vpI,rho_vpII,rho_vsI) deallocate(rhoarraystore,kappaarraystore,etastore,phistore,tortstore,permstore) + if( .not. SAVE_MOHO_MESH ) then + deallocate(xstore_dummy,ystore_dummy,zstore_dummy) + endif + + if( ELASTIC_SIMULATION ) then + deallocate(rmass) + deallocate(rmassx,rmassy,rmassz) + endif + if( ACOUSTIC_SIMULATION) then + deallocate(rmass_acoustic) + deallocate(rmassz_acoustic) + endif + if( POROELASTIC_SIMULATION) then + deallocate(rmass_solid_poroelastic,rmass_fluid_poroelastic) + endif + end subroutine create_regions_mesh ! diff --git a/src/generate_databases/generate_databases.f90 b/src/generate_databases/generate_databases.f90 index 4b4e407e4..8a8906178 100644 --- a/src/generate_databases/generate_databases.f90 +++ b/src/generate_databases/generate_databases.f90 @@ -320,6 +320,8 @@ subroutine gd_read_parameters write(IMAIN,'(a)',advance='yes') ' external' case( IMODEL_IPATI ) write(IMAIN,'(a)',advance='yes') ' ipati' + case( IMODEL_IPATI_WATER ) + write(IMAIN,'(a)',advance='yes') ' ipati_water' end select write(IMAIN,*) diff --git a/src/generate_databases/generate_databases_par.f90 b/src/generate_databases/generate_databases_par.f90 index 2129cc049..af7a7f97f 100644 --- a/src/generate_databases/generate_databases_par.f90 +++ b/src/generate_databases/generate_databases_par.f90 @@ -177,6 +177,11 @@ module create_regions_mesh_ext_par real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass,rmass_acoustic,& rmass_solid_poroelastic,rmass_fluid_poroelastic +! mass matrix contributions + real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmassx,rmassy,rmassz + real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmassz_acoustic + integer :: nglob_xy + ! ocean load integer :: NGLOB_OCEAN real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass_ocean_load diff --git a/src/generate_databases/get_model.f90 b/src/generate_databases/get_model.f90 index 7dade46fd..9d1917aa0 100644 --- a/src/generate_databases/get_model.f90 +++ b/src/generate_databases/get_model.f90 @@ -346,6 +346,7 @@ subroutine get_model(myrank,nspec,ibool,mat_ext_mesh,nelmnts_ext_mesh, & enddo ! sets simulation domain flags + ! all processes will have e.g. acoustic_simulation flag set if any flag is .true. somewhere call any_all_l( ANY(ispec_is_acoustic), ACOUSTIC_SIMULATION ) call any_all_l( ANY(ispec_is_elastic), ELASTIC_SIMULATION ) call any_all_l( ANY(ispec_is_poroelastic), POROELASTIC_SIMULATION ) @@ -409,7 +410,7 @@ subroutine get_model_values(materials_ext_mesh,nmat_ext_mesh, & ! selects chosen velocity model select case( IMODEL ) - case( IMODEL_DEFAULT,IMODEL_GLL,IMODEL_IPATI ) + case( IMODEL_DEFAULT,IMODEL_GLL,IMODEL_IPATI,IMODEL_IPATI_WATER ) ! material values determined by mesh properties call model_default(materials_ext_mesh,nmat_ext_mesh, & undef_mat_prop,nundefMat_ext_mesh, & @@ -485,7 +486,9 @@ subroutine get_model_binaries(myrank,nspec,LOCAL_PATH) ! reads in material parameters from external binary files use generate_databases_par,only: IMODEL + use create_regions_mesh_ext_par + implicit none ! number of spectral elements in each block @@ -505,9 +508,15 @@ subroutine get_model_binaries(myrank,nspec,LOCAL_PATH) ! import the model from files in SPECFEM format ! note that those those files should be saved in LOCAL_PATH call model_gll(myrank,nspec,LOCAL_PATH) + case( IMODEL_IPATI ) ! import the model from modified files in SPECFEM format call model_ipati(myrank,nspec,LOCAL_PATH) + + case( IMODEL_IPATI_WATER ) + ! import the model from modified files in SPECFEM format + call model_ipati_water(myrank,nspec,LOCAL_PATH) + end select end subroutine get_model_binaries diff --git a/src/generate_databases/memory_eval.f90 b/src/generate_databases/memory_eval.f90 index a1bdfdded..5c1d4d5a2 100644 --- a/src/generate_databases/memory_eval.f90 +++ b/src/generate_databases/memory_eval.f90 @@ -88,7 +88,7 @@ subroutine memory_eval(NSPEC_AB,NGLOB_AB,max_nibool_interfaces_ext_mesh,num_inte memory_size = memory_size + 3.d0*dble(NDIM)*NGLOB_AB*dble(CUSTOM_REAL) ! rmass - memory_size = memory_size + NGLOB_AB*dble(CUSTOM_REAL) + memory_size = memory_size + 3*NGLOB_AB*dble(CUSTOM_REAL) ! rho_vp,rho_vs memory_size = memory_size + 2.d0*dble(NGLLX)*dble(NGLLY)*dble(NGLLZ)*NSPEC_AB*dble(CUSTOM_REAL) diff --git a/src/generate_databases/model_1d_socal.f90 b/src/generate_databases/model_1d_socal.f90 index 4a799d3f9..8d6f95110 100644 --- a/src/generate_databases/model_1d_socal.f90 +++ b/src/generate_databases/model_1d_socal.f90 @@ -37,7 +37,6 @@ subroutine model_1D_socal(xmesh,ymesh,zmesh,rho,vp,vs,qmu_atten) ! given a GLL point, returns super-imposed velocity model values - use generate_databases_par,only: nspec => NSPEC_AB,ibool use create_regions_mesh_ext_par implicit none diff --git a/src/generate_databases/model_ipati.f90 b/src/generate_databases/model_ipati.f90 index 01e145a61..820837267 100644 --- a/src/generate_databases/model_ipati.f90 +++ b/src/generate_databases/model_ipati.f90 @@ -111,3 +111,97 @@ subroutine model_ipati(myrank,nspec,LOCAL_PATH) deallocate( rho_read,vp_read,vs_read) end subroutine model_ipati + +! +!------------------------------------------------------------------------------------------------- +! + + subroutine model_ipati_water(myrank,nspec,LOCAL_PATH) + + use create_regions_mesh_ext_par + implicit none + + integer, intent(in) :: myrank,nspec + character(len=256) :: LOCAL_PATH + + ! local parameters + real, dimension(:,:,:,:),allocatable :: vp_read,vs_read,rho_read + integer :: ispec,ier + character(len=256) :: prname_lp,filename + + ! ----------------------------------------------------------------------------- + + ! note: vp not vs structure is available (as is often the case in exploration seismology), + ! scaling factor + real, parameter :: SCALING_FACTOR = 1.0/1.8 + + ! ----------------------------------------------------------------------------- + + ! user output + if (myrank==0) then + write(IMAIN,*) + write(IMAIN,*) 'using external IPATI_WATER model from:',trim(LOCAL_PATH) + write(IMAIN,*) 'scaling factor: ',SCALING_FACTOR + write(IMAIN,*) + endif + + ! processors name + write(prname_lp,'(a,i6.6,a)') trim(LOCAL_PATH)//'proc',myrank,'_' + + ! density + allocate( rho_read(NGLLX,NGLLY,NGLLZ,nspec),stat=ier) + if( ier /= 0 ) stop 'error allocating array rho_read' + + filename = prname_lp(1:len_trim(prname_lp))//'rho.bin' + open(unit=28,file=trim(filename),status='old',action='read',form='unformatted',iostat=ier) + if( ier /= 0 ) then + print*,'error opening file: ',trim(filename) + stop 'error reading rho.bin file' + endif + + read(28) rho_read + close(28) + + ! vp + allocate( vp_read(NGLLX,NGLLY,NGLLZ,nspec),stat=ier) + if( ier /= 0 ) stop 'error allocating array vp_read' + + filename = prname_lp(1:len_trim(prname_lp))//'vp.bin' + open(unit=28,file=trim(filename),status='old',action='read',form='unformatted',iostat=ier) + if( ier /= 0 ) then + print*,'error opening file: ',trim(filename) + stop 'error reading vp.bin file' + endif + + read(28) vp_read + close(28) + + ! vs scaled from vp + allocate( vs_read(NGLLX,NGLLY,NGLLZ,nspec),stat=ier) + if( ier /= 0 ) stop 'error allocating array vs_read' + + ! scaling + vs_read = vp_read * SCALING_FACTOR + + ! overwrites only elastic elements + do ispec=1,nspec + ! assumes water layer with acoustic elements are set properly + ! only overwrites elastic elements + if( ispec_is_elastic(ispec)) then + ! isotropic model parameters + rhostore(:,:,:,ispec) = rho_read(:,:,:,ispec) + kappastore(:,:,:,ispec) = rhostore(:,:,:,ispec) * ( vp_read(:,:,:,ispec) * vp_read(:,:,:,ispec) & + - FOUR_THIRDS * vs_read(:,:,:,ispec) * vs_read(:,:,:,ispec) ) + mustore(:,:,:,ispec) = rhostore(:,:,:,ispec) * vs_read(:,:,:,ispec) * vs_read(:,:,:,ispec) + rho_vp(:,:,:,ispec) = rhostore(:,:,:,ispec) * vp_read(:,:,:,ispec) + rho_vs(:,:,:,ispec) = rhostore(:,:,:,ispec) * vs_read(:,:,:,ispec) + endif + enddo + + ! free memory + deallocate( rho_read,vp_read,vs_read) + + end subroutine model_ipati_water + + + diff --git a/src/generate_databases/save_arrays_solver.f90 b/src/generate_databases/save_arrays_solver.f90 index d47fcb892..e75f45745 100644 --- a/src/generate_databases/save_arrays_solver.f90 +++ b/src/generate_databases/save_arrays_solver.f90 @@ -91,30 +91,23 @@ subroutine save_arrays_solver_ext_mesh(nspec,nglob,OCEANS,ibool, & write(IOUT) ispec_is_poroelastic ! acoustic -! all processes will have acoustic_simulation set if any flag is .true. somewhere - call any_all_l( ANY(ispec_is_acoustic), ACOUSTIC_SIMULATION ) if( ACOUSTIC_SIMULATION ) then write(IOUT) rmass_acoustic write(IOUT) rhostore endif ! elastic - call any_all_l( ANY(ispec_is_elastic), ELASTIC_SIMULATION ) if( ELASTIC_SIMULATION ) then write(IOUT) rmass - if( OCEANS) then write(IOUT) rmass_ocean_load endif - !pll Stacey write(IOUT) rho_vp write(IOUT) rho_vs - endif ! poroelastic - call any_all_l( ANY(ispec_is_poroelastic), POROELASTIC_SIMULATION ) if( POROELASTIC_SIMULATION ) then write(IOUT) rmass_solid_poroelastic write(IOUT) rmass_fluid_poroelastic @@ -136,6 +129,15 @@ subroutine save_arrays_solver_ext_mesh(nspec,nglob,OCEANS,ibool, & write(IOUT) abs_boundary_ijk write(IOUT) abs_boundary_jacobian2Dw write(IOUT) abs_boundary_normal + ! store mass matrix contributions + if(ELASTIC_SIMULATION) then + write(IOUT) rmassx + write(IOUT) rmassy + write(IOUT) rmassz + endif + if(ACOUSTIC_SIMULATION) then + write(IOUT) rmassz_acoustic + endif endif ! free surface diff --git a/src/shared/constants.h.in b/src/shared/constants.h.in index 6d118bbb6..6ee728818 100644 --- a/src/shared/constants.h.in +++ b/src/shared/constants.h.in @@ -428,3 +428,5 @@ integer, parameter :: IMODEL_SALTON_TROUGH = 8 integer, parameter :: IMODEL_1D_PREM_PB = 9 integer, parameter :: IMODEL_IPATI = 10 + integer, parameter :: IMODEL_IPATI_WATER = 11 + diff --git a/src/shared/get_shape2D.f90 b/src/shared/get_shape2D.f90 index f384ca9cf..1a32a80ce 100644 --- a/src/shared/get_shape2D.f90 +++ b/src/shared/get_shape2D.f90 @@ -46,8 +46,6 @@ subroutine get_shape2D(myrank,shape2D,dershape2D,xigll,yigll,NGLLA,NGLLB) ! location of the nodes of the 2D quadrilateral elements double precision xi,eta double precision xi_map,eta_map - double precision l1xi,l2xi,l3xi,l1eta,l2eta,l3eta - double precision l1pxi,l2pxi,l3pxi,l1peta,l2peta,l3peta ! for checking the 2D shape functions double precision sumshape,sumdershapexi,sumdershapeeta @@ -94,105 +92,146 @@ subroutine get_shape2D(myrank,shape2D,dershape2D,xigll,yigll,NGLLA,NGLLB) else - ! generate the 2D shape functions and their derivatives (9 nodes) - do i=1,NGLLA + ! note: put further initialization for ngnod2d == 9 into subroutine + ! to avoid compilation errors in case ngnod2d == 4 + call get_shape2D_9(NGNOD2D,NDIM2D,shape2D,dershape2D,xigll,yigll,NGLLA,NGLLB) - xi=xigll(i) + endif - l1xi=HALF*xi*(xi-ONE) - l2xi=ONE-xi**2 - l3xi=HALF*xi*(xi+ONE) +! check the 2D shape functions + do i=1,NGLLA + do j=1,NGLLB - l1pxi=xi-HALF - l2pxi=-TWO*xi - l3pxi=xi+HALF + sumshape=ZERO - do j=1,NGLLB + sumdershapexi=ZERO + sumdershapeeta=ZERO - eta=yigll(j) + do ia=1,NGNOD2D + sumshape=sumshape+shape2D(ia,i,j) - l1eta=HALF*eta*(eta-ONE) - l2eta=ONE-eta**2 - l3eta=HALF*eta*(eta+ONE) + sumdershapexi=sumdershapexi+dershape2D(1,ia,i,j) + sumdershapeeta=sumdershapeeta+dershape2D(2,ia,i,j) + enddo - l1peta=eta-HALF - l2peta=-TWO*eta - l3peta=eta+HALF +! the sum of the shape functions should be 1 + if(abs(sumshape-ONE)>TINYVAL) call exit_MPI(myrank,'error in 2D shape functions') -! corner nodes +! the sum of the derivatives of the shape functions should be 0 + if(abs(sumdershapexi)>TINYVAL) & + call exit_MPI(myrank,'error in xi derivatives of 2D shape function') - shape2D(1,i,j)=l1xi*l1eta - shape2D(2,i,j)=l3xi*l1eta - shape2D(3,i,j)=l3xi*l3eta - shape2D(4,i,j)=l1xi*l3eta + if(abs(sumdershapeeta)>TINYVAL) & + call exit_MPI(myrank,'error in eta derivatives of 2D shape function') - dershape2D(1,1,i,j)=l1pxi*l1eta - dershape2D(1,2,i,j)=l3pxi*l1eta - dershape2D(1,3,i,j)=l3pxi*l3eta - dershape2D(1,4,i,j)=l1pxi*l3eta + enddo + enddo - dershape2D(2,1,i,j)=l1xi*l1peta - dershape2D(2,2,i,j)=l3xi*l1peta - dershape2D(2,3,i,j)=l3xi*l3peta - dershape2D(2,4,i,j)=l1xi*l3peta + end subroutine get_shape2D -! midside nodes +! +!------------------------------------------------------------------------------------------------- +! - shape2D(5,i,j)=l2xi*l1eta - shape2D(6,i,j)=l3xi*l2eta - shape2D(7,i,j)=l2xi*l3eta - shape2D(8,i,j)=l1xi*l2eta + subroutine get_shape2D_9(ngnod2d,ndim2d,shape2D,dershape2D,xigll,yigll,NGLLA,NGLLB) - dershape2D(1,5,i,j)=l2pxi*l1eta - dershape2D(1,6,i,j)=l3pxi*l2eta - dershape2D(1,7,i,j)=l2pxi*l3eta - dershape2D(1,8,i,j)=l1pxi*l2eta + implicit none - dershape2D(2,5,i,j)=l2xi*l1peta - dershape2D(2,6,i,j)=l3xi*l2peta - dershape2D(2,7,i,j)=l2xi*l3peta - dershape2D(2,8,i,j)=l1xi*l2peta +! generic routine that accepts any polynomial degree in each direction -! center node + integer :: ngnod2d,ndim2d + integer :: NGLLA,NGLLB - shape2D(9,i,j)=l2xi*l2eta + double precision xigll(NGLLA) + double precision yigll(NGLLB) - dershape2D(1,9,i,j)=l2pxi*l2eta - dershape2D(2,9,i,j)=l2xi*l2peta +! 2D shape functions and their derivatives + double precision shape2D(NGNOD2D,NGLLA,NGLLB) + double precision dershape2D(NDIM2D,NGNOD2D,NGLLA,NGLLB) - enddo - enddo + integer i,j - endif +! location of the nodes of the 2D quadrilateral elements + double precision xi,eta + double precision l1xi,l2xi,l3xi,l1eta,l2eta,l3eta + double precision l1pxi,l2pxi,l3pxi,l1peta,l2peta,l3peta -! check the 2D shape functions + double precision,parameter :: HALF = 0.5d0 + double precision,parameter :: ONE = 1.0d0 + double precision,parameter :: TWO = 2.0d0 + + ! check that the parameter file is correct + if( ngnod2d /= 9) stop 'surface elements should have 9 control nodes' + + ! generate the 2D shape functions and their derivatives (9 nodes) do i=1,NGLLA + + xi=xigll(i) + + l1xi=HALF*xi*(xi-ONE) + l2xi=ONE-xi**2 + l3xi=HALF*xi*(xi+ONE) + + l1pxi=xi-HALF + l2pxi=-TWO*xi + l3pxi=xi+HALF + do j=1,NGLLB - sumshape=ZERO + eta=yigll(j) - sumdershapexi=ZERO - sumdershapeeta=ZERO + l1eta=HALF*eta*(eta-ONE) + l2eta=ONE-eta**2 + l3eta=HALF*eta*(eta+ONE) - do ia=1,NGNOD2D - sumshape=sumshape+shape2D(ia,i,j) + l1peta=eta-HALF + l2peta=-TWO*eta + l3peta=eta+HALF - sumdershapexi=sumdershapexi+dershape2D(1,ia,i,j) - sumdershapeeta=sumdershapeeta+dershape2D(2,ia,i,j) - enddo +! corner nodes -! the sum of the shape functions should be 1 - if(abs(sumshape-ONE)>TINYVAL) call exit_MPI(myrank,'error in 2D shape functions') + shape2D(1,i,j)=l1xi*l1eta + shape2D(2,i,j)=l3xi*l1eta + shape2D(3,i,j)=l3xi*l3eta + shape2D(4,i,j)=l1xi*l3eta -! the sum of the derivatives of the shape functions should be 0 - if(abs(sumdershapexi)>TINYVAL) & - call exit_MPI(myrank,'error in xi derivatives of 2D shape function') + dershape2D(1,1,i,j)=l1pxi*l1eta + dershape2D(1,2,i,j)=l3pxi*l1eta + dershape2D(1,3,i,j)=l3pxi*l3eta + dershape2D(1,4,i,j)=l1pxi*l3eta - if(abs(sumdershapeeta)>TINYVAL) & - call exit_MPI(myrank,'error in eta derivatives of 2D shape function') + dershape2D(2,1,i,j)=l1xi*l1peta + dershape2D(2,2,i,j)=l3xi*l1peta + dershape2D(2,3,i,j)=l3xi*l3peta + dershape2D(2,4,i,j)=l1xi*l3peta + +! midside nodes + + shape2D(5,i,j)=l2xi*l1eta + shape2D(6,i,j)=l3xi*l2eta + shape2D(7,i,j)=l2xi*l3eta + shape2D(8,i,j)=l1xi*l2eta + + dershape2D(1,5,i,j)=l2pxi*l1eta + dershape2D(1,6,i,j)=l3pxi*l2eta + dershape2D(1,7,i,j)=l2pxi*l3eta + dershape2D(1,8,i,j)=l1pxi*l2eta + + dershape2D(2,5,i,j)=l2xi*l1peta + dershape2D(2,6,i,j)=l3xi*l2peta + dershape2D(2,7,i,j)=l2xi*l3peta + dershape2D(2,8,i,j)=l1xi*l2peta + +! center node + + shape2D(9,i,j)=l2xi*l2eta + + dershape2D(1,9,i,j)=l2pxi*l2eta + dershape2D(2,9,i,j)=l2xi*l2peta enddo enddo - end subroutine get_shape2D + end subroutine get_shape2D_9 + diff --git a/src/shared/hex_nodes.f90 b/src/shared/hex_nodes.f90 index ec309b2b9..392eef13d 100644 --- a/src/shared/hex_nodes.f90 +++ b/src/shared/hex_nodes.f90 @@ -72,91 +72,110 @@ subroutine usual_hex_nodes(iaddx,iaddy,iaddz) if(NGNOD == 27) then - ! midside nodes (nodes located in the middle of an edge) + ! note: put further initialization into subroutine to avoid compilation errors + ! in case ngnod == 8 + call usual_hex_nodes_27(NGNOD,iaddx,iaddy,iaddz) - iaddx(9) = 1 - iaddy(9) = 0 - iaddz(9) = 0 + endif - iaddx(10) = 2 - iaddy(10) = 1 - iaddz(10) = 0 + end subroutine usual_hex_nodes - iaddx(11) = 1 - iaddy(11) = 2 - iaddz(11) = 0 +! +!------------------------------------------------------------------------------------------------- +! - iaddx(12) = 0 - iaddy(12) = 1 - iaddz(12) = 0 + subroutine usual_hex_nodes_27(ngnod,iaddx,iaddy,iaddz) - iaddx(13) = 0 - iaddy(13) = 0 - iaddz(13) = 1 + implicit none - iaddx(14) = 2 - iaddy(14) = 0 - iaddz(14) = 1 + integer :: ngnod + integer,dimension(ngnod) :: iaddx,iaddy,iaddz - iaddx(15) = 2 - iaddy(15) = 2 - iaddz(15) = 1 +! define the topology of the hexahedral elements - iaddx(16) = 0 - iaddy(16) = 2 - iaddz(16) = 1 + ! midside nodes (nodes located in the middle of an edge) - iaddx(17) = 1 - iaddy(17) = 0 - iaddz(17) = 2 + iaddx(9) = 1 + iaddy(9) = 0 + iaddz(9) = 0 - iaddx(18) = 2 - iaddy(18) = 1 - iaddz(18) = 2 + iaddx(10) = 2 + iaddy(10) = 1 + iaddz(10) = 0 - iaddx(19) = 1 - iaddy(19) = 2 - iaddz(19) = 2 + iaddx(11) = 1 + iaddy(11) = 2 + iaddz(11) = 0 - iaddx(20) = 0 - iaddy(20) = 1 - iaddz(20) = 2 + iaddx(12) = 0 + iaddy(12) = 1 + iaddz(12) = 0 - ! side center nodes (nodes located in the middle of a face) + iaddx(13) = 0 + iaddy(13) = 0 + iaddz(13) = 1 - iaddx(21) = 1 - iaddy(21) = 1 - iaddz(21) = 0 + iaddx(14) = 2 + iaddy(14) = 0 + iaddz(14) = 1 - iaddx(22) = 1 - iaddy(22) = 0 - iaddz(22) = 1 + iaddx(15) = 2 + iaddy(15) = 2 + iaddz(15) = 1 - iaddx(23) = 2 - iaddy(23) = 1 - iaddz(23) = 1 + iaddx(16) = 0 + iaddy(16) = 2 + iaddz(16) = 1 - iaddx(24) = 1 - iaddy(24) = 2 - iaddz(24) = 1 + iaddx(17) = 1 + iaddy(17) = 0 + iaddz(17) = 2 - iaddx(25) = 0 - iaddy(25) = 1 - iaddz(25) = 1 + iaddx(18) = 2 + iaddy(18) = 1 + iaddz(18) = 2 - iaddx(26) = 1 - iaddy(26) = 1 - iaddz(26) = 2 + iaddx(19) = 1 + iaddy(19) = 2 + iaddz(19) = 2 - ! center node (barycenter of the eight corners) + iaddx(20) = 0 + iaddy(20) = 1 + iaddz(20) = 2 - iaddx(27) = 1 - iaddy(27) = 1 - iaddz(27) = 1 + ! side center nodes (nodes located in the middle of a face) - endif + iaddx(21) = 1 + iaddy(21) = 1 + iaddz(21) = 0 - end subroutine usual_hex_nodes + iaddx(22) = 1 + iaddy(22) = 0 + iaddz(22) = 1 + + iaddx(23) = 2 + iaddy(23) = 1 + iaddz(23) = 1 + + iaddx(24) = 1 + iaddy(24) = 2 + iaddz(24) = 1 + + iaddx(25) = 0 + iaddy(25) = 1 + iaddz(25) = 1 + + iaddx(26) = 1 + iaddy(26) = 1 + iaddz(26) = 2 + + ! center node (barycenter of the eight corners) + + iaddx(27) = 1 + iaddy(27) = 1 + iaddz(27) = 1 + + end subroutine usual_hex_nodes_27 ! !------------------------------------------------------------------------------------------------- diff --git a/src/shared/read_parameter_file.f90 b/src/shared/read_parameter_file.f90 index 8efc947ac..87f2f252c 100644 --- a/src/shared/read_parameter_file.f90 +++ b/src/shared/read_parameter_file.f90 @@ -254,6 +254,8 @@ subroutine read_parameter_file( NPROC,NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,DT, & IMODEL = IMODEL_USER_EXTERNAL case( 'ipati' ) IMODEL = IMODEL_IPATI + case( 'ipati_water' ) + IMODEL = IMODEL_IPATI_WATER case( 'gll' ) IMODEL = IMODEL_GLL case( 'salton_trough') @@ -270,8 +272,8 @@ subroutine read_parameter_file( NPROC,NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,DT, & end select ! check - if( IMODEL == IMODEL_IPATI ) then - if( USE_RICKER_IPATI .eqv. .false. ) stop 'please set USE_RICKER_IPATI to true in shared/constants.h and recompile' + if( IMODEL == IMODEL_IPATI .or. IMODEL == IMODEL_IPATI_WATER ) then + if( USE_RICKER_IPATI .eqv. .false. ) stop 'please set USE_RICKER_IPATI to .true. in shared/constants.h and recompile' endif end subroutine read_parameter_file diff --git a/src/shared/recompute_jacobian.f90 b/src/shared/recompute_jacobian.f90 index b082daf2b..79180a935 100644 --- a/src/shared/recompute_jacobian.f90 +++ b/src/shared/recompute_jacobian.f90 @@ -48,13 +48,6 @@ subroutine recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, & double precision xgamma,ygamma,zgamma double precision ra1,ra2,rb1,rb2,rc1,rc2 - double precision l1xi,l2xi,l3xi - double precision l1eta,l2eta,l3eta - double precision l1gamma,l2gamma,l3gamma - double precision l1pxi,l2pxi,l3pxi - double precision l1peta,l2peta,l3peta - double precision l1pgamma,l2pgamma,l3pgamma - integer ia ! for 8-node element @@ -120,164 +113,9 @@ subroutine recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, & else -! *** -! *** create the 3D shape functions and the Jacobian for a 27-node element -! *** - - l1xi=HALF*xi*(xi-ONE) - l2xi=ONE-xi**2 - l3xi=HALF*xi*(xi+ONE) - - l1pxi=xi-HALF - l2pxi=-TWO*xi - l3pxi=xi+HALF - - l1eta=HALF*eta*(eta-ONE) - l2eta=ONE-eta**2 - l3eta=HALF*eta*(eta+ONE) - - l1peta=eta-HALF - l2peta=-TWO*eta - l3peta=eta+HALF - - l1gamma=HALF*gamma*(gamma-ONE) - l2gamma=ONE-gamma**2 - l3gamma=HALF*gamma*(gamma+ONE) - - l1pgamma=gamma-HALF - l2pgamma=-TWO*gamma - l3pgamma=gamma+HALF - -! corner nodes - - shape3D(1)=l1xi*l1eta*l1gamma - shape3D(2)=l3xi*l1eta*l1gamma - shape3D(3)=l3xi*l3eta*l1gamma - shape3D(4)=l1xi*l3eta*l1gamma - shape3D(5)=l1xi*l1eta*l3gamma - shape3D(6)=l3xi*l1eta*l3gamma - shape3D(7)=l3xi*l3eta*l3gamma - shape3D(8)=l1xi*l3eta*l3gamma - - dershape3D(1,1)=l1pxi*l1eta*l1gamma - dershape3D(1,2)=l3pxi*l1eta*l1gamma - dershape3D(1,3)=l3pxi*l3eta*l1gamma - dershape3D(1,4)=l1pxi*l3eta*l1gamma - dershape3D(1,5)=l1pxi*l1eta*l3gamma - dershape3D(1,6)=l3pxi*l1eta*l3gamma - dershape3D(1,7)=l3pxi*l3eta*l3gamma - dershape3D(1,8)=l1pxi*l3eta*l3gamma - - dershape3D(2,1)=l1xi*l1peta*l1gamma - dershape3D(2,2)=l3xi*l1peta*l1gamma - dershape3D(2,3)=l3xi*l3peta*l1gamma - dershape3D(2,4)=l1xi*l3peta*l1gamma - dershape3D(2,5)=l1xi*l1peta*l3gamma - dershape3D(2,6)=l3xi*l1peta*l3gamma - dershape3D(2,7)=l3xi*l3peta*l3gamma - dershape3D(2,8)=l1xi*l3peta*l3gamma - - dershape3D(3,1)=l1xi*l1eta*l1pgamma - dershape3D(3,2)=l3xi*l1eta*l1pgamma - dershape3D(3,3)=l3xi*l3eta*l1pgamma - dershape3D(3,4)=l1xi*l3eta*l1pgamma - dershape3D(3,5)=l1xi*l1eta*l3pgamma - dershape3D(3,6)=l3xi*l1eta*l3pgamma - dershape3D(3,7)=l3xi*l3eta*l3pgamma - dershape3D(3,8)=l1xi*l3eta*l3pgamma - -! midside nodes - - shape3D(9)=l2xi*l1eta*l1gamma - shape3D(10)=l3xi*l2eta*l1gamma - shape3D(11)=l2xi*l3eta*l1gamma - shape3D(12)=l1xi*l2eta*l1gamma - shape3D(13)=l1xi*l1eta*l2gamma - shape3D(14)=l3xi*l1eta*l2gamma - shape3D(15)=l3xi*l3eta*l2gamma - shape3D(16)=l1xi*l3eta*l2gamma - shape3D(17)=l2xi*l1eta*l3gamma - shape3D(18)=l3xi*l2eta*l3gamma - shape3D(19)=l2xi*l3eta*l3gamma - shape3D(20)=l1xi*l2eta*l3gamma - - dershape3D(1,9)=l2pxi*l1eta*l1gamma - dershape3D(1,10)=l3pxi*l2eta*l1gamma - dershape3D(1,11)=l2pxi*l3eta*l1gamma - dershape3D(1,12)=l1pxi*l2eta*l1gamma - dershape3D(1,13)=l1pxi*l1eta*l2gamma - dershape3D(1,14)=l3pxi*l1eta*l2gamma - dershape3D(1,15)=l3pxi*l3eta*l2gamma - dershape3D(1,16)=l1pxi*l3eta*l2gamma - dershape3D(1,17)=l2pxi*l1eta*l3gamma - dershape3D(1,18)=l3pxi*l2eta*l3gamma - dershape3D(1,19)=l2pxi*l3eta*l3gamma - dershape3D(1,20)=l1pxi*l2eta*l3gamma - - dershape3D(2,9)=l2xi*l1peta*l1gamma - dershape3D(2,10)=l3xi*l2peta*l1gamma - dershape3D(2,11)=l2xi*l3peta*l1gamma - dershape3D(2,12)=l1xi*l2peta*l1gamma - dershape3D(2,13)=l1xi*l1peta*l2gamma - dershape3D(2,14)=l3xi*l1peta*l2gamma - dershape3D(2,15)=l3xi*l3peta*l2gamma - dershape3D(2,16)=l1xi*l3peta*l2gamma - dershape3D(2,17)=l2xi*l1peta*l3gamma - dershape3D(2,18)=l3xi*l2peta*l3gamma - dershape3D(2,19)=l2xi*l3peta*l3gamma - dershape3D(2,20)=l1xi*l2peta*l3gamma - - dershape3D(3,9)=l2xi*l1eta*l1pgamma - dershape3D(3,10)=l3xi*l2eta*l1pgamma - dershape3D(3,11)=l2xi*l3eta*l1pgamma - dershape3D(3,12)=l1xi*l2eta*l1pgamma - dershape3D(3,13)=l1xi*l1eta*l2pgamma - dershape3D(3,14)=l3xi*l1eta*l2pgamma - dershape3D(3,15)=l3xi*l3eta*l2pgamma - dershape3D(3,16)=l1xi*l3eta*l2pgamma - dershape3D(3,17)=l2xi*l1eta*l3pgamma - dershape3D(3,18)=l3xi*l2eta*l3pgamma - dershape3D(3,19)=l2xi*l3eta*l3pgamma - dershape3D(3,20)=l1xi*l2eta*l3pgamma - -! side center nodes - - shape3D(21)=l2xi*l2eta*l1gamma - shape3D(22)=l2xi*l1eta*l2gamma - shape3D(23)=l3xi*l2eta*l2gamma - shape3D(24)=l2xi*l3eta*l2gamma - shape3D(25)=l1xi*l2eta*l2gamma - shape3D(26)=l2xi*l2eta*l3gamma - - dershape3D(1,21)=l2pxi*l2eta*l1gamma - dershape3D(1,22)=l2pxi*l1eta*l2gamma - dershape3D(1,23)=l3pxi*l2eta*l2gamma - dershape3D(1,24)=l2pxi*l3eta*l2gamma - dershape3D(1,25)=l1pxi*l2eta*l2gamma - dershape3D(1,26)=l2pxi*l2eta*l3gamma - - dershape3D(2,21)=l2xi*l2peta*l1gamma - dershape3D(2,22)=l2xi*l1peta*l2gamma - dershape3D(2,23)=l3xi*l2peta*l2gamma - dershape3D(2,24)=l2xi*l3peta*l2gamma - dershape3D(2,25)=l1xi*l2peta*l2gamma - dershape3D(2,26)=l2xi*l2peta*l3gamma - - dershape3D(3,21)=l2xi*l2eta*l1pgamma - dershape3D(3,22)=l2xi*l1eta*l2pgamma - dershape3D(3,23)=l3xi*l2eta*l2pgamma - dershape3D(3,24)=l2xi*l3eta*l2pgamma - dershape3D(3,25)=l1xi*l2eta*l2pgamma - dershape3D(3,26)=l2xi*l2eta*l3pgamma - -! center node - - shape3D(27)=l2xi*l2eta*l2gamma - - dershape3D(1,27)=l2pxi*l2eta*l2gamma - dershape3D(2,27)=l2xi*l2peta*l2gamma - dershape3D(3,27)=l2xi*l2eta*l2pgamma - + ! note: put further setup for ngnod == 27 into subroutine + ! to avoid compilation errors in case ngnod == 8 + call recompute_jacobian_27(NGNOD,NDIM,xi,eta,gamma,shape3D,dershape3D) endif ! compute coordinates and jacobian matrix @@ -327,3 +165,196 @@ subroutine recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, & end subroutine recompute_jacobian +! +!------------------------------------------------------------------------------------------------- +! + + subroutine recompute_jacobian_27(ngnod,ndim,xi,eta,gamma,shape3D,dershape3D) + + implicit none + + integer :: ngnod,ndim + + double precision :: xi,eta,gamma + +! 3D shape functions and their derivatives at receiver + double precision,dimension(ngnod) :: shape3D + double precision,dimension(ndim,ngnod) :: dershape3D + + ! local parameters + double precision l1xi,l2xi,l3xi + double precision l1eta,l2eta,l3eta + double precision l1gamma,l2gamma,l3gamma + double precision l1pxi,l2pxi,l3pxi + double precision l1peta,l2peta,l3peta + double precision l1pgamma,l2pgamma,l3pgamma + + double precision, parameter :: HALF = 0.5d0 + double precision, parameter :: ONE = 1.0d0 + double precision, parameter :: TWO = 2.0d0 + +! recompute jacobian for any (xi,eta,gamma) point, not necessarily a GLL point + + +! *** +! *** create the 3D shape functions and the Jacobian for a 27-node element +! *** + + l1xi=HALF*xi*(xi-ONE) + l2xi=ONE-xi**2 + l3xi=HALF*xi*(xi+ONE) + + l1pxi=xi-HALF + l2pxi=-TWO*xi + l3pxi=xi+HALF + + l1eta=HALF*eta*(eta-ONE) + l2eta=ONE-eta**2 + l3eta=HALF*eta*(eta+ONE) + + l1peta=eta-HALF + l2peta=-TWO*eta + l3peta=eta+HALF + + l1gamma=HALF*gamma*(gamma-ONE) + l2gamma=ONE-gamma**2 + l3gamma=HALF*gamma*(gamma+ONE) + + l1pgamma=gamma-HALF + l2pgamma=-TWO*gamma + l3pgamma=gamma+HALF + +! corner nodes + + shape3D(1)=l1xi*l1eta*l1gamma + shape3D(2)=l3xi*l1eta*l1gamma + shape3D(3)=l3xi*l3eta*l1gamma + shape3D(4)=l1xi*l3eta*l1gamma + shape3D(5)=l1xi*l1eta*l3gamma + shape3D(6)=l3xi*l1eta*l3gamma + shape3D(7)=l3xi*l3eta*l3gamma + shape3D(8)=l1xi*l3eta*l3gamma + + dershape3D(1,1)=l1pxi*l1eta*l1gamma + dershape3D(1,2)=l3pxi*l1eta*l1gamma + dershape3D(1,3)=l3pxi*l3eta*l1gamma + dershape3D(1,4)=l1pxi*l3eta*l1gamma + dershape3D(1,5)=l1pxi*l1eta*l3gamma + dershape3D(1,6)=l3pxi*l1eta*l3gamma + dershape3D(1,7)=l3pxi*l3eta*l3gamma + dershape3D(1,8)=l1pxi*l3eta*l3gamma + + dershape3D(2,1)=l1xi*l1peta*l1gamma + dershape3D(2,2)=l3xi*l1peta*l1gamma + dershape3D(2,3)=l3xi*l3peta*l1gamma + dershape3D(2,4)=l1xi*l3peta*l1gamma + dershape3D(2,5)=l1xi*l1peta*l3gamma + dershape3D(2,6)=l3xi*l1peta*l3gamma + dershape3D(2,7)=l3xi*l3peta*l3gamma + dershape3D(2,8)=l1xi*l3peta*l3gamma + + dershape3D(3,1)=l1xi*l1eta*l1pgamma + dershape3D(3,2)=l3xi*l1eta*l1pgamma + dershape3D(3,3)=l3xi*l3eta*l1pgamma + dershape3D(3,4)=l1xi*l3eta*l1pgamma + dershape3D(3,5)=l1xi*l1eta*l3pgamma + dershape3D(3,6)=l3xi*l1eta*l3pgamma + dershape3D(3,7)=l3xi*l3eta*l3pgamma + dershape3D(3,8)=l1xi*l3eta*l3pgamma + +! midside nodes + + shape3D(9)=l2xi*l1eta*l1gamma + shape3D(10)=l3xi*l2eta*l1gamma + shape3D(11)=l2xi*l3eta*l1gamma + shape3D(12)=l1xi*l2eta*l1gamma + shape3D(13)=l1xi*l1eta*l2gamma + shape3D(14)=l3xi*l1eta*l2gamma + shape3D(15)=l3xi*l3eta*l2gamma + shape3D(16)=l1xi*l3eta*l2gamma + shape3D(17)=l2xi*l1eta*l3gamma + shape3D(18)=l3xi*l2eta*l3gamma + shape3D(19)=l2xi*l3eta*l3gamma + shape3D(20)=l1xi*l2eta*l3gamma + + dershape3D(1,9)=l2pxi*l1eta*l1gamma + dershape3D(1,10)=l3pxi*l2eta*l1gamma + dershape3D(1,11)=l2pxi*l3eta*l1gamma + dershape3D(1,12)=l1pxi*l2eta*l1gamma + dershape3D(1,13)=l1pxi*l1eta*l2gamma + dershape3D(1,14)=l3pxi*l1eta*l2gamma + dershape3D(1,15)=l3pxi*l3eta*l2gamma + dershape3D(1,16)=l1pxi*l3eta*l2gamma + dershape3D(1,17)=l2pxi*l1eta*l3gamma + dershape3D(1,18)=l3pxi*l2eta*l3gamma + dershape3D(1,19)=l2pxi*l3eta*l3gamma + dershape3D(1,20)=l1pxi*l2eta*l3gamma + + dershape3D(2,9)=l2xi*l1peta*l1gamma + dershape3D(2,10)=l3xi*l2peta*l1gamma + dershape3D(2,11)=l2xi*l3peta*l1gamma + dershape3D(2,12)=l1xi*l2peta*l1gamma + dershape3D(2,13)=l1xi*l1peta*l2gamma + dershape3D(2,14)=l3xi*l1peta*l2gamma + dershape3D(2,15)=l3xi*l3peta*l2gamma + dershape3D(2,16)=l1xi*l3peta*l2gamma + dershape3D(2,17)=l2xi*l1peta*l3gamma + dershape3D(2,18)=l3xi*l2peta*l3gamma + dershape3D(2,19)=l2xi*l3peta*l3gamma + dershape3D(2,20)=l1xi*l2peta*l3gamma + + dershape3D(3,9)=l2xi*l1eta*l1pgamma + dershape3D(3,10)=l3xi*l2eta*l1pgamma + dershape3D(3,11)=l2xi*l3eta*l1pgamma + dershape3D(3,12)=l1xi*l2eta*l1pgamma + dershape3D(3,13)=l1xi*l1eta*l2pgamma + dershape3D(3,14)=l3xi*l1eta*l2pgamma + dershape3D(3,15)=l3xi*l3eta*l2pgamma + dershape3D(3,16)=l1xi*l3eta*l2pgamma + dershape3D(3,17)=l2xi*l1eta*l3pgamma + dershape3D(3,18)=l3xi*l2eta*l3pgamma + dershape3D(3,19)=l2xi*l3eta*l3pgamma + dershape3D(3,20)=l1xi*l2eta*l3pgamma + +! side center nodes + + shape3D(21)=l2xi*l2eta*l1gamma + shape3D(22)=l2xi*l1eta*l2gamma + shape3D(23)=l3xi*l2eta*l2gamma + shape3D(24)=l2xi*l3eta*l2gamma + shape3D(25)=l1xi*l2eta*l2gamma + shape3D(26)=l2xi*l2eta*l3gamma + + dershape3D(1,21)=l2pxi*l2eta*l1gamma + dershape3D(1,22)=l2pxi*l1eta*l2gamma + dershape3D(1,23)=l3pxi*l2eta*l2gamma + dershape3D(1,24)=l2pxi*l3eta*l2gamma + dershape3D(1,25)=l1pxi*l2eta*l2gamma + dershape3D(1,26)=l2pxi*l2eta*l3gamma + + dershape3D(2,21)=l2xi*l2peta*l1gamma + dershape3D(2,22)=l2xi*l1peta*l2gamma + dershape3D(2,23)=l3xi*l2peta*l2gamma + dershape3D(2,24)=l2xi*l3peta*l2gamma + dershape3D(2,25)=l1xi*l2peta*l2gamma + dershape3D(2,26)=l2xi*l2peta*l3gamma + + dershape3D(3,21)=l2xi*l2eta*l1pgamma + dershape3D(3,22)=l2xi*l1eta*l2pgamma + dershape3D(3,23)=l3xi*l2eta*l2pgamma + dershape3D(3,24)=l2xi*l3eta*l2pgamma + dershape3D(3,25)=l1xi*l2eta*l2pgamma + dershape3D(3,26)=l2xi*l2eta*l3pgamma + +! center node + + shape3D(27)=l2xi*l2eta*l2gamma + + dershape3D(1,27)=l2pxi*l2eta*l2gamma + dershape3D(2,27)=l2xi*l2peta*l2gamma + dershape3D(3,27)=l2xi*l2eta*l2pgamma + + + end subroutine recompute_jacobian_27 + + diff --git a/src/specfem3D/PML_init.f90 b/src/specfem3D/PML_init.f90 index af675c089..f7d628546 100644 --- a/src/specfem3D/PML_init.f90 +++ b/src/specfem3D/PML_init.f90 @@ -127,7 +127,6 @@ end subroutine PML_damping_profile_l subroutine PML_initialize() use specfem_par,only: NGLOB_AB,NSPEC_AB,myrank, & - ibool,xstore,ystore,zstore,& model_speed_max,hdur use PML_par use PML_par_acoustic @@ -303,9 +302,9 @@ subroutine PML_set_firstlayer() ! sets ispec occurrences for first element layer in PML region based on absorbing boundary elements use PML_par - use specfem_par,only: NSPEC_AB,NGLOB_AB, & + use specfem_par,only: NSPEC_AB, & abs_boundary_ispec,abs_boundary_normal,num_abs_boundary_faces,& - abs_boundary_ijk,ibool,myrank + abs_boundary_ijk,ibool use constants,only: NDIM,TINYVAL,NGNOD,NGLLX,NGLLY,NGLLZ,NGLLSQUARE implicit none ! local parameters @@ -479,14 +478,13 @@ subroutine PML_determine_interfacePoints() ! finds global interface points of PML region to "regular" domain - use specfem_par,only: ibool,myrank,NGLOB_AB,NSPEC_AB, & + use specfem_par,only: ibool,NGLOB_AB,NSPEC_AB, & num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, & nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, & my_neighbours_ext_mesh,NPROC use PML_par use PML_par_acoustic use constants,only: NGLLX,NGLLY,NGLLZ - use specfem_par_acoustic,only: ispec_is_acoustic implicit none ! local parameters @@ -848,7 +846,6 @@ subroutine PML_add_layer() use PML_par use specfem_par,only: NSPEC_AB,NGLOB_AB, & - abs_boundary_ispec,num_abs_boundary_faces,& ibool,myrank,& num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, & nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, & diff --git a/src/specfem3D/compute_add_sources_acoustic.f90 b/src/specfem3D/compute_add_sources_acoustic.f90 index b68cdb002..5e0f55eef 100644 --- a/src/specfem3D/compute_add_sources_acoustic.f90 +++ b/src/specfem3D/compute_add_sources_acoustic.f90 @@ -139,8 +139,7 @@ subroutine compute_add_sources_acoustic(NSPEC_AB,NGLOB_AB,potential_dot_dot_acou ! write(*,*) "fortran dt = ", dt ! change dt -> DT call compute_add_sources_ac_cuda(Mesh_pointer, phase_is_inner, & - NSOURCES, SIMULATION_TYPE, & - stf_pre_compute, myrank) + NSOURCES,stf_pre_compute, myrank) endif else ! .NOT. GPU_MODE @@ -435,8 +434,7 @@ subroutine compute_add_sources_acoustic(NSPEC_AB,NGLOB_AB,potential_dot_dot_acou ! only implements SIMTYPE=3 call compute_add_sources_ac_s3_cuda(Mesh_pointer, phase_is_inner, & - NSOURCES, SIMULATION_TYPE, & - stf_pre_compute, myrank) + NSOURCES,stf_pre_compute, myrank) endif else ! .NOT. GPU_MODE diff --git a/src/specfem3D/compute_add_sources_elastic.f90 b/src/specfem3D/compute_add_sources_elastic.f90 index 73d9435e9..63ed10d0a 100644 --- a/src/specfem3D/compute_add_sources_elastic.f90 +++ b/src/specfem3D/compute_add_sources_elastic.f90 @@ -39,7 +39,6 @@ subroutine compute_add_sources_elastic( NSPEC_AB,NGLOB_AB,accel, & use specfem_par,only: PRINT_SOURCE_TIME_FUNCTION,stf_used_total, & xigll,yigll,zigll,xi_receiver,eta_receiver,gamma_receiver,& station_name,network_name,adj_source_file, & - LOCAL_PATH,wgllwgll_xy, & num_free_surface_faces,free_surface_ispec, & free_surface_ijk,free_surface_jacobian2Dw, & noise_sourcearray,irec_master_noise, & @@ -48,10 +47,6 @@ subroutine compute_add_sources_elastic( NSPEC_AB,NGLOB_AB,accel, & nrec_local,number_receiver_global, & nsources_local - use specfem_par_movie,only: & - store_val_ux_external_mesh,store_val_uy_external_mesh, & - store_val_uz_external_mesh - implicit none include "constants.h" diff --git a/src/specfem3D/compute_add_sources_poroelastic.f90 b/src/specfem3D/compute_add_sources_poroelastic.f90 index 8d93ef600..e13166243 100644 --- a/src/specfem3D/compute_add_sources_poroelastic.f90 +++ b/src/specfem3D/compute_add_sources_poroelastic.f90 @@ -41,7 +41,7 @@ subroutine compute_add_sources_poroelastic( NSPEC_AB,NGLOB_AB, & use specfem_par,only: PRINT_SOURCE_TIME_FUNCTION,stf_used_total, & xigll,yigll,zigll,xi_receiver,eta_receiver,gamma_receiver,& - station_name,network_name,adj_source_file,nrec_local,number_receiver_global + station_name,network_name,adj_source_file implicit none include "constants.h" diff --git a/src/specfem3D/compute_coupling_elastic_ac.f90 b/src/specfem3D/compute_coupling_elastic_ac.f90 index 43c233a7e..f391bb022 100644 --- a/src/specfem3D/compute_coupling_elastic_ac.f90 +++ b/src/specfem3D/compute_coupling_elastic_ac.f90 @@ -120,3 +120,110 @@ subroutine compute_coupling_elastic_ac(NSPEC_AB,NGLOB_AB, & end subroutine compute_coupling_elastic_ac +! +!------------------------------------------------------------------------------------------------- +! + + subroutine compute_coupling_ocean(NSPEC_AB,NGLOB_AB, & + ibool,rmassx,rmassy,rmassz, & + rmass_ocean_load,accel, & + free_surface_normal,free_surface_ijk,free_surface_ispec, & + num_free_surface_faces,SIMULATION_TYPE, & + NGLOB_ADJOINT,b_accel) + +! updates acceleration with ocean load term: +! approximates ocean-bottom continuity of pressure & displacement for longer period waves (> ~20s ), +! assuming incompressible fluid column above bathymetry ocean bottom + + implicit none + + include 'constants.h' + + integer :: NSPEC_AB,NGLOB_AB + + real(kind=CUSTOM_REAL),dimension(NDIM,NGLOB_AB),intent(inout) :: accel + real(kind=CUSTOM_REAL),dimension(NGLOB_AB),intent(in) :: rmassx,rmassy,rmassz + real(kind=CUSTOM_REAL),dimension(NGLOB_AB),intent(in) :: rmass_ocean_load + + integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB),intent(in) :: ibool + + ! free surface + integer :: num_free_surface_faces + real(kind=CUSTOM_REAL) :: free_surface_normal(NDIM,NGLLSQUARE,num_free_surface_faces) + integer :: free_surface_ijk(3,NGLLSQUARE,num_free_surface_faces) + integer :: free_surface_ispec(num_free_surface_faces) + + ! adjoint simulations + integer :: SIMULATION_TYPE,NGLOB_ADJOINT + real(kind=CUSTOM_REAL),dimension(NDIM,NGLOB_ADJOINT):: b_accel + +! local parameters + real(kind=CUSTOM_REAL) :: nx,ny,nz + real(kind=CUSTOM_REAL) :: force_normal_comp + integer :: i,j,k,ispec,iglob + integer :: igll,iface + logical,dimension(NGLOB_AB) :: updated_dof_ocean_load + ! adjoint locals + real(kind=CUSTOM_REAL) :: b_force_normal_comp + + ! initialize the updates + updated_dof_ocean_load(:) = .false. + + ! for surface elements exactly at the top of the model (ocean bottom) + do iface = 1,num_free_surface_faces + + ispec = free_surface_ispec(iface) + do igll = 1, NGLLSQUARE + i = free_surface_ijk(1,igll,iface) + j = free_surface_ijk(2,igll,iface) + k = free_surface_ijk(3,igll,iface) + + ! get global point number + iglob = ibool(i,j,k,ispec) + + ! only update once + if(.not. updated_dof_ocean_load(iglob)) then + + ! get normal + nx = free_surface_normal(1,igll,iface) + ny = free_surface_normal(2,igll,iface) + nz = free_surface_normal(3,igll,iface) + + ! make updated component of right-hand side + ! we divide by rmass() which is 1 / M + ! we use the total force which includes the Coriolis term above + force_normal_comp = accel(1,iglob)*nx / rmassx(iglob) & + + accel(2,iglob)*ny / rmassy(iglob) & + + accel(3,iglob)*nz / rmassz(iglob) + + accel(1,iglob) = accel(1,iglob) & + + (rmass_ocean_load(iglob) - rmassx(iglob)) * force_normal_comp * nx + accel(2,iglob) = accel(2,iglob) & + + (rmass_ocean_load(iglob) - rmassy(iglob)) * force_normal_comp * ny + accel(3,iglob) = accel(3,iglob) & + + (rmass_ocean_load(iglob) - rmassz(iglob)) * force_normal_comp * nz + + ! adjoint simulations + if (SIMULATION_TYPE == 3) then + b_force_normal_comp = b_accel(1,iglob)*nx / rmassx(iglob) & + + b_accel(2,iglob)*ny / rmassy(iglob) & + + b_accel(3,iglob)*nz / rmassz(iglob) + + b_accel(1,iglob) = b_accel(1,iglob) & + + (rmass_ocean_load(iglob) - rmassx(iglob)) * b_force_normal_comp * nx + b_accel(2,iglob) = b_accel(2,iglob) & + + (rmass_ocean_load(iglob) - rmassy(iglob)) * b_force_normal_comp * ny + b_accel(3,iglob) = b_accel(3,iglob) & + + (rmass_ocean_load(iglob) - rmassz(iglob)) * b_force_normal_comp * nz + endif !adjoint + + ! done with this point + updated_dof_ocean_load(iglob) = .true. + + endif + + enddo ! igll + enddo ! iface + + end subroutine compute_coupling_ocean + diff --git a/src/specfem3D/compute_forces_acoustic.f90 b/src/specfem3D/compute_forces_acoustic.f90 index e43ca8d00..e5cda1850 100644 --- a/src/specfem3D/compute_forces_acoustic.f90 +++ b/src/specfem3D/compute_forces_acoustic.f90 @@ -81,7 +81,7 @@ subroutine compute_forces_acoustic() num_free_surface_faces,ispec_is_acoustic) else ! on GPU - call acoustic_enforce_free_surf_cuda(Mesh_pointer,SIMULATION_TYPE,ABSORB_FREE_SURFACE) + call acoustic_enforce_free_surf_cuda(Mesh_pointer,ABSORB_FREE_SURFACE) endif if(ABSORB_USE_PML .and. ABSORBING_CONDITIONS) then @@ -148,8 +148,7 @@ subroutine compute_forces_acoustic() ! on GPU ! includes code for SIMULATION_TYPE==3 call compute_forces_acoustic_cuda(Mesh_pointer, iphase, & - nspec_outer_acoustic, nspec_inner_acoustic, & - SIMULATION_TYPE) + nspec_outer_acoustic, nspec_inner_acoustic) endif if(ABSORB_USE_PML .and. ABSORBING_CONDITIONS) then @@ -253,7 +252,7 @@ subroutine compute_forces_acoustic() else ! on GPU call compute_coupling_ac_el_cuda(Mesh_pointer,phase_is_inner, & - num_coupling_ac_el_faces,SIMULATION_TYPE) + num_coupling_ac_el_faces) endif ! GPU_MODE endif endif @@ -401,7 +400,7 @@ subroutine compute_forces_acoustic() b_potential_dot_dot_acoustic(:) = b_potential_dot_dot_acoustic(:) * rmass_acoustic(:) else ! on GPU - call kernel_3_a_acoustic_cuda(Mesh_pointer,NGLOB_AB,SIMULATION_TYPE) + call kernel_3_a_acoustic_cuda(Mesh_pointer,NGLOB_AB) endif @@ -448,7 +447,7 @@ subroutine compute_forces_acoustic() b_potential_dot_acoustic(:) = b_potential_dot_acoustic(:) + b_deltatover2*b_potential_dot_dot_acoustic(:) else ! on GPU - call kernel_3_b_acoustic_cuda(Mesh_pointer,NGLOB_AB,deltatover2,SIMULATION_TYPE,b_deltatover2) + call kernel_3_b_acoustic_cuda(Mesh_pointer,NGLOB_AB,deltatover2,b_deltatover2) endif ! updates potential_dot_acoustic and potential_dot_dot_acoustic inside PML region for plotting seismograms/movies @@ -497,7 +496,7 @@ subroutine compute_forces_acoustic() num_free_surface_faces,ispec_is_acoustic) else ! on GPU - call acoustic_enforce_free_surf_cuda(Mesh_pointer,SIMULATION_TYPE,ABSORB_FREE_SURFACE) + call acoustic_enforce_free_surf_cuda(Mesh_pointer,ABSORB_FREE_SURFACE) endif diff --git a/src/specfem3D/compute_forces_elastic.F90 b/src/specfem3D/compute_forces_elastic.F90 index 53ced024c..6f00373fb 100644 --- a/src/specfem3D/compute_forces_elastic.F90 +++ b/src/specfem3D/compute_forces_elastic.F90 @@ -124,7 +124,6 @@ subroutine compute_forces_elastic() call compute_forces_elastic_cuda(Mesh_pointer, iphase, & nspec_outer_elastic, & nspec_inner_elastic, & - SIMULATION_TYPE, & COMPUTE_AND_STORE_STRAIN,ATTENUATION,ANISOTROPY) if(phase_is_inner .eqv. .true.) then @@ -203,7 +202,7 @@ subroutine compute_forces_elastic() else ! on GPU call compute_coupling_el_ac_cuda(Mesh_pointer,phase_is_inner, & - num_coupling_ac_el_faces,SIMULATION_TYPE) + num_coupling_ac_el_faces) endif ! GPU_MODE endif ! num_coupling_ac_el_faces endif @@ -329,30 +328,31 @@ subroutine compute_forces_elastic() ! multiplies with inverse of mass matrix (note: rmass has been inverted already) if(.NOT. GPU_MODE) then - accel(1,:) = accel(1,:)*rmass(:) - accel(2,:) = accel(2,:)*rmass(:) - accel(3,:) = accel(3,:)*rmass(:) + accel(1,:) = accel(1,:)*rmassx(:) + accel(2,:) = accel(2,:)*rmassy(:) + accel(3,:) = accel(3,:)*rmassz(:) ! adjoint simulations if (SIMULATION_TYPE == 3) then - b_accel(1,:) = b_accel(1,:)*rmass(:) - b_accel(2,:) = b_accel(2,:)*rmass(:) - b_accel(3,:) = b_accel(3,:)*rmass(:) + b_accel(1,:) = b_accel(1,:)*rmassx(:) + b_accel(2,:) = b_accel(2,:)*rmassy(:) + b_accel(3,:) = b_accel(3,:)*rmassz(:) endif !adjoint else ! GPU_MODE == 1 - call kernel_3_a_cuda(Mesh_pointer, NGLOB_AB, deltatover2,SIMULATION_TYPE,b_deltatover2,OCEANS) + call kernel_3_a_cuda(Mesh_pointer, NGLOB_AB, deltatover2,b_deltatover2,OCEANS) endif ! updates acceleration with ocean load term if(OCEANS) then if( .NOT. GPU_MODE ) then - call elastic_ocean_load(NSPEC_AB,NGLOB_AB, & - ibool,rmass,rmass_ocean_load,accel, & - free_surface_normal,free_surface_ijk,free_surface_ispec, & - num_free_surface_faces,SIMULATION_TYPE, & - NGLOB_ADJOINT,b_accel) + call compute_coupling_ocean(NSPEC_AB,NGLOB_AB, & + ibool,rmassx,rmassy,rmassz, & + rmass_ocean_load,accel, & + free_surface_normal,free_surface_ijk,free_surface_ispec, & + num_free_surface_faces,SIMULATION_TYPE, & + NGLOB_ADJOINT,b_accel) else ! on GPU - call elastic_ocean_load_cuda(Mesh_pointer,SIMULATION_TYPE) + call compute_coupling_ocean_cuda(Mesh_pointer) endif endif @@ -378,115 +378,13 @@ subroutine compute_forces_elastic() ! adjoint simulations if (SIMULATION_TYPE == 3) b_veloc(:,:) = b_veloc(:,:) + b_deltatover2*b_accel(:,:) else ! GPU_MODE == 1 - if( OCEANS ) call kernel_3_b_cuda(Mesh_pointer, NGLOB_AB, deltatover2,SIMULATION_TYPE,b_deltatover2) + if( OCEANS ) call kernel_3_b_cuda(Mesh_pointer, NGLOB_AB, deltatover2,b_deltatover2) endif end subroutine compute_forces_elastic -! -!------------------------------------------------------------------------------------------------- -! - -subroutine elastic_ocean_load(NSPEC_AB,NGLOB_AB, & - ibool,rmass,rmass_ocean_load,accel, & - free_surface_normal,free_surface_ijk,free_surface_ispec, & - num_free_surface_faces,SIMULATION_TYPE, & - NGLOB_ADJOINT,b_accel) - -! updates acceleration with ocean load term: -! approximates ocean-bottom continuity of pressure & displacement for longer period waves (> ~20s ), -! assuming incompressible fluid column above bathymetry ocean bottom - - implicit none - - include 'constants.h' - - integer :: NSPEC_AB,NGLOB_AB - - real(kind=CUSTOM_REAL),dimension(NDIM,NGLOB_AB),intent(inout) :: accel - real(kind=CUSTOM_REAL),dimension(NGLOB_AB),intent(in) :: rmass,rmass_ocean_load - - integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB),intent(in) :: ibool - - ! free surface - integer :: num_free_surface_faces - real(kind=CUSTOM_REAL) :: free_surface_normal(NDIM,NGLLSQUARE,num_free_surface_faces) - integer :: free_surface_ijk(3,NGLLSQUARE,num_free_surface_faces) - integer :: free_surface_ispec(num_free_surface_faces) - - ! adjoint simulations - integer :: SIMULATION_TYPE,NGLOB_ADJOINT - real(kind=CUSTOM_REAL),dimension(NDIM,NGLOB_ADJOINT):: b_accel - -! local parameters - real(kind=CUSTOM_REAL) :: nx,ny,nz - real(kind=CUSTOM_REAL) :: additional_term,force_normal_comp - integer :: i,j,k,ispec,iglob - integer :: igll,iface - logical,dimension(NGLOB_AB) :: updated_dof_ocean_load - ! adjoint locals - real(kind=CUSTOM_REAL) :: b_additional_term,b_force_normal_comp - - ! initialize the updates - updated_dof_ocean_load(:) = .false. - - ! for surface elements exactly at the top of the model (ocean bottom) - do iface = 1,num_free_surface_faces - - ispec = free_surface_ispec(iface) - do igll = 1, NGLLSQUARE - i = free_surface_ijk(1,igll,iface) - j = free_surface_ijk(2,igll,iface) - k = free_surface_ijk(3,igll,iface) - - ! get global point number - iglob = ibool(i,j,k,ispec) - - ! only update once - if(.not. updated_dof_ocean_load(iglob)) then - - ! get normal - nx = free_surface_normal(1,igll,iface) - ny = free_surface_normal(2,igll,iface) - nz = free_surface_normal(3,igll,iface) - - ! make updated component of right-hand side - ! we divide by rmass() which is 1 / M - ! we use the total force which includes the Coriolis term above - force_normal_comp = ( accel(1,iglob)*nx + & - accel(2,iglob)*ny + & - accel(3,iglob)*nz ) / rmass(iglob) - - additional_term = (rmass_ocean_load(iglob) - rmass(iglob)) * force_normal_comp - - accel(1,iglob) = accel(1,iglob) + additional_term * nx - accel(2,iglob) = accel(2,iglob) + additional_term * ny - accel(3,iglob) = accel(3,iglob) + additional_term * nz - - ! adjoint simulations - if (SIMULATION_TYPE == 3) then - b_force_normal_comp = ( b_accel(1,iglob)*nx + & - b_accel(2,iglob)*ny + & - b_accel(3,iglob)*nz) / rmass(iglob) - b_additional_term = (rmass_ocean_load(iglob) - rmass(iglob)) * b_force_normal_comp - - b_accel(1,iglob) = b_accel(1,iglob) + b_additional_term * nx - b_accel(2,iglob) = b_accel(2,iglob) + b_additional_term * ny - b_accel(3,iglob) = b_accel(3,iglob) + b_additional_term * nz - endif !adjoint - - ! done with this point - updated_dof_ocean_load(iglob) = .true. - - endif - - enddo ! igll - enddo ! iface - -end subroutine elastic_ocean_load - ! !------------------------------------------------------------------------------------------------- ! diff --git a/src/specfem3D/compute_stacey_acoustic.f90 b/src/specfem3D/compute_stacey_acoustic.f90 index 60aa2ab30..63f44dd1c 100644 --- a/src/specfem3D/compute_stacey_acoustic.f90 +++ b/src/specfem3D/compute_stacey_acoustic.f90 @@ -147,7 +147,7 @@ subroutine compute_stacey_acoustic(NSPEC_AB,NGLOB_AB, & ! GPU_MODE == .true. if( num_abs_boundary_faces > 0 ) & call compute_stacey_acoustic_cuda(Mesh_pointer, phase_is_inner, & - SIMULATION_TYPE,SAVE_FORWARD,b_absorb_potential) + SAVE_FORWARD,b_absorb_potential) endif diff --git a/src/specfem3D/compute_stacey_elastic.f90 b/src/specfem3D/compute_stacey_elastic.f90 index 3a7c1d853..826e83352 100644 --- a/src/specfem3D/compute_stacey_elastic.f90 +++ b/src/specfem3D/compute_stacey_elastic.f90 @@ -166,7 +166,7 @@ subroutine compute_stacey_elastic(NSPEC_AB,NGLOB_AB,accel, & ! GPU_MODE == .true. if( num_abs_boundary_faces > 0 ) & call compute_stacey_elastic_cuda(Mesh_pointer,phase_is_inner, & - SIMULATION_TYPE,SAVE_FORWARD,b_absorb_field) + SAVE_FORWARD,b_absorb_field) endif ! adjoint simulations: stores absorbed wavefield part diff --git a/src/specfem3D/create_color_image.f90 b/src/specfem3D/create_color_image.f90 index c4edac58a..961eeb2ad 100644 --- a/src/specfem3D/create_color_image.f90 +++ b/src/specfem3D/create_color_image.f90 @@ -103,7 +103,7 @@ subroutine write_PNM_GIF_initialize() use specfem_par,only: NGLOB_AB,NSPEC_AB,ibool,xstore,ystore,zstore,& num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, & nibool_interfaces_ext_mesh,my_neighbours_ext_mesh, & - ibool_interfaces_ext_mesh,prname + ibool_interfaces_ext_mesh !,prname use constants,only: HUGEVAL,NGLLX,NGLLY,NGLLZ implicit none ! local parameters @@ -816,7 +816,7 @@ end subroutine write_PNM_GIF_data subroutine get_iglob_vp(iglob,ispec,vp) use constants,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,FOUR_THIRDS - use specfem_par,only: mustore,kappastore,ibool,myrank,NSPEC_AB + use specfem_par,only: mustore,kappastore,ibool,myrank use specfem_par_acoustic,only: ACOUSTIC_SIMULATION,rhostore use specfem_par_elastic,only: ELASTIC_SIMULATION,rho_vp implicit none @@ -857,7 +857,7 @@ subroutine get_iglob_veloc(iglob,ispec,val_vector) rhostore,ispec_is_acoustic, & b_potential_acoustic,b_potential_dot_acoustic use specfem_par_elastic,only: ELASTIC_SIMULATION,displ,veloc, & - ispec_is_elastic,b_displ,b_veloc + ispec_is_elastic ! ,b_displ,b_veloc use specfem_par,only: NSPEC_AB,NGLOB_AB,hprime_xx,hprime_yy,hprime_zz, & xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, & ibool,SIMULATION_TYPE,GRAVITY diff --git a/src/specfem3D/finalize_simulation.f90 b/src/specfem3D/finalize_simulation.f90 index f18adfef9..e4499d11e 100644 --- a/src/specfem3D/finalize_simulation.f90 +++ b/src/specfem3D/finalize_simulation.f90 @@ -140,6 +140,18 @@ subroutine finalize_simulation() endif endif + ! frees dynamically allocated memory + + ! mass matrices + if( ELASTIC_SIMULATION ) then + deallocate(rmassx) + deallocate(rmassy) + deallocate(rmassz) + endif + if( ACOUSTIC_SIMULATION ) then + deallocate(rmass_acoustic) + endif + ! close the main output file if(myrank == 0) then write(IMAIN,*) diff --git a/src/specfem3D/initialize_simulation.f90 b/src/specfem3D/initialize_simulation.f90 index d5be225f4..b2a6d8483 100644 --- a/src/specfem3D/initialize_simulation.f90 +++ b/src/specfem3D/initialize_simulation.f90 @@ -116,6 +116,8 @@ subroutine initialize_simulation() write(IMAIN,'(a)',advance='yes') ' external' case( IMODEL_IPATI ) write(IMAIN,'(a)',advance='yes') ' ipati' + case( IMODEL_IPATI_WATER ) + write(IMAIN,'(a)',advance='yes') ' ipati_water' end select write(IMAIN,*) diff --git a/src/specfem3D/iterate_time.f90 b/src/specfem3D/iterate_time.f90 index 9db2ab79e..12e61cfe4 100644 --- a/src/specfem3D/iterate_time.f90 +++ b/src/specfem3D/iterate_time.f90 @@ -416,8 +416,8 @@ subroutine it_update_displacement_scheme() else ! on GPU call it_update_displacement_ac_cuda(Mesh_pointer, NGLOB_AB, & - deltat, deltatsqover2, deltatover2, & - SIMULATION_TYPE, b_deltat, b_deltatsqover2, b_deltatover2) + deltat, deltatsqover2, deltatover2, & + b_deltat, b_deltatsqover2, b_deltatover2) endif ! time marching potentials @@ -457,7 +457,7 @@ subroutine it_update_displacement_scheme() ! on GPU ! Includes SIM_TYPE 1 & 3 (for noise tomography) call it_update_displacement_cuda(Mesh_pointer, size(displ), deltat, deltatsqover2,& - deltatover2, SIMULATION_TYPE, b_deltat, b_deltatsqover2, b_deltatover2) + deltatover2, b_deltat, b_deltatsqover2, b_deltatover2) endif endif @@ -821,7 +821,7 @@ subroutine it_transfer_from_GPU() ! frees allocated memory on GPU call prepare_cleanup_device(Mesh_pointer, & - SIMULATION_TYPE,SAVE_FORWARD, & + SAVE_FORWARD, & ACOUSTIC_SIMULATION,ELASTIC_SIMULATION, & ABSORBING_CONDITIONS,NOISE_TOMOGRAPHY,COMPUTE_AND_STORE_STRAIN, & ATTENUATION,ANISOTROPY,OCEANS, & diff --git a/src/specfem3D/model_update.f90 b/src/specfem3D/model_update.f90 index ba7e78771..0723f27cd 100644 --- a/src/specfem3D/model_update.f90 +++ b/src/specfem3D/model_update.f90 @@ -827,47 +827,47 @@ program model_update rmass_new(:) = 0._CUSTOM_REAL + ! note: this does not update the absorbing boundary contributions to the mass matrix + ! elastic mass matrix do ispec=1,nspec - do k=1,NGLLZ - do j=1,NGLLY - do i=1,NGLLX - iglob = ibool(i,j,k,ispec) - - weight = wxgll(i)*wygll(j)*wzgll(k) - jacobianl = jacobianstore(i,j,k,ispec) - - if( myrank == 0) then - print*, 'weight', weight - print*, 'jacobianl', jacobianl - endif + if( ispec_is_elastic(ispec) ) then + do k=1,NGLLZ + do j=1,NGLLY + do i=1,NGLLX + iglob = ibool(i,j,k,ispec) + + weight = wxgll(i)*wygll(j)*wzgll(k) + jacobianl = jacobianstore(i,j,k,ispec) + + if( myrank == 0) then + print*, 'weight', weight + print*, 'jacobianl', jacobianl + endif -! elastic mass matrix - if( ispec_is_elastic(ispec) ) then if(CUSTOM_REAL == SIZE_REAL) then rmass_new(iglob) = rmass_new(iglob) + & - sngl( dble(jacobianl) * weight * dble(rhostore_new(i,j,k,ispec)) ) + sngl( dble(jacobianl) * weight * dble(rhostore_new(i,j,k,ispec)) ) else - rmass_new(iglob) = rmass_new(iglob) + & - jacobianl * weight * rhostore_new(i,j,k,ispec) + rmass_new(iglob) = rmass_new(iglob) + & + jacobianl * weight * rhostore_new(i,j,k,ispec) endif - endif + enddo enddo enddo - enddo + endif enddo ! nspec call sync_all() - + ! dummy allocations, arrays are not needed since the update here only works for elastic models allocate(rmass_acoustic_new(NGLOB)) allocate(rmass_solid_poroelastic_new(NGLOB)) allocate(rmass_fluid_poroelastic_new(NGLOB)) rmass_acoustic_new = 0._CUSTOM_REAL rmass_solid_poroelastic_new = 0._CUSTOM_REAL rmass_fluid_poroelastic_new = 0._CUSTOM_REAL - - + !-------- attenuation ------- ! read the proc*attenuation.vtk for the old model in LOCAL_PATH and store qmu_attenuation_store @@ -979,8 +979,6 @@ program model_update abs_boundary_ijk,abs_boundary_ispec,num_abs_boundary_faces, & free_surface_normal,free_surface_jacobian2Dw, & free_surface_ijk,free_surface_ispec,num_free_surface_faces, & - coupling_ac_el_normal,coupling_ac_el_jacobian2Dw, & - coupling_ac_el_ijk,coupling_ac_el_ispec,num_coupling_ac_el_faces, & num_interfaces_ext_mesh,my_neighbours_ext_mesh,nibool_interfaces_ext_mesh, & max_nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, & prname_new,SAVE_MESH_FILES,ANISOTROPY,NSPEC_ANISO, & diff --git a/src/specfem3D/prepare_timerun.F90 b/src/specfem3D/prepare_timerun.F90 index 00516328f..a1cbdf9a5 100644 --- a/src/specfem3D/prepare_timerun.F90 +++ b/src/specfem3D/prepare_timerun.F90 @@ -219,6 +219,14 @@ subroutine prepare_timerun_mass_matrices() ! the mass matrix needs to be assembled with MPI here once and for all if(ACOUSTIC_SIMULATION) then + + ! adds contributions + if( ABSORBING_CONDITIONS ) then + rmass_acoustic(:) = rmass_acoustic(:) + rmassz_acoustic(:) + ! not needed anymore + deallocate(rmassz_acoustic) + endif + call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmass_acoustic, & num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, & nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,& @@ -231,21 +239,52 @@ subroutine prepare_timerun_mass_matrices() endif if(ELASTIC_SIMULATION) then - call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmass, & + + ! switches to three-component mass matrix + if( ABSORBING_CONDITIONS ) then + ! adds boundary contributions + rmassx(:) = rmass(:) + rmassx(:) + rmassy(:) = rmass(:) + rmassy(:) + rmassz(:) = rmass(:) + rmassz(:) + else + rmassx(:) = rmass(:) + rmassy(:) = rmass(:) + rmassz(:) = rmass(:) + endif + ! not needed anymore + deallocate(rmass) + + ! assemble mass matrix + call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmassx, & + num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, & + nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, & + my_neighbours_ext_mesh) + call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmassy, & + num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, & + nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, & + my_neighbours_ext_mesh) + call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmassz, & num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, & nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, & my_neighbours_ext_mesh) ! fill mass matrix with fictitious non-zero values to make sure it can be inverted globally - where(rmass <= 0._CUSTOM_REAL) rmass = 1._CUSTOM_REAL - rmass(:) = 1._CUSTOM_REAL / rmass(:) - + where(rmassx <= 0._CUSTOM_REAL) rmassx = 1._CUSTOM_REAL + where(rmassy <= 0._CUSTOM_REAL) rmassy = 1._CUSTOM_REAL + where(rmassz <= 0._CUSTOM_REAL) rmassz = 1._CUSTOM_REAL + rmassx(:) = 1._CUSTOM_REAL / rmassx(:) + rmassy(:) = 1._CUSTOM_REAL / rmassy(:) + rmassz(:) = 1._CUSTOM_REAL / rmassz(:) + + ! ocean load if(OCEANS ) then - if( minval(rmass_ocean_load(:)) <= 0._CUSTOM_REAL) & - call exit_MPI(myrank,'negative ocean load mass matrix term') - rmass_ocean_load(:) = 1. / rmass_ocean_load(:) + call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmass_ocean_load, & + num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, & + nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, & + my_neighbours_ext_mesh) + where(rmass_ocean_load <= 0._CUSTOM_REAL) rmass_ocean_load = 1._CUSTOM_REAL + rmass_ocean_load(:) = 1._CUSTOM_REAL / rmass_ocean_load(:) endif - endif if(POROELASTIC_SIMULATION) then @@ -1077,7 +1116,7 @@ subroutine prepare_timerun_GPU() ispec_is_acoustic, & NOISE_TOMOGRAPHY,num_free_surface_faces, & free_surface_ispec,free_surface_ijk, & - ABSORBING_CONDITIONS,b_reclen_potential,b_absorb_potential, & + b_reclen_potential,b_absorb_potential, & ELASTIC_SIMULATION, num_coupling_ac_el_faces, & coupling_ac_el_ispec,coupling_ac_el_ijk, & coupling_ac_el_normal,coupling_ac_el_jacobian2Dw, & @@ -1086,19 +1125,19 @@ subroutine prepare_timerun_GPU() if( SIMULATION_TYPE == 3 ) & call prepare_fields_acoustic_adj_dev(Mesh_pointer, & - SIMULATION_TYPE, & - APPROXIMATE_HESS_KL) + APPROXIMATE_HESS_KL) endif ! prepares fields on GPU for elastic simulations if( ELASTIC_SIMULATION ) then call prepare_fields_elastic_device(Mesh_pointer, NDIM*NGLOB_AB, & - rmass,rho_vp,rho_vs, & + rmassx,rmassy,rmassz, & + rho_vp,rho_vs, & num_phase_ispec_elastic,phase_ispec_inner_elastic, & ispec_is_elastic, & - ABSORBING_CONDITIONS,b_absorb_field,b_reclen_field, & - SIMULATION_TYPE,SAVE_FORWARD, & + b_absorb_field,b_reclen_field, & + SAVE_FORWARD, & COMPUTE_AND_STORE_STRAIN, & epsilondev_xx,epsilondev_yy,epsilondev_xy, & epsilondev_xz,epsilondev_yz, & @@ -1122,7 +1161,6 @@ subroutine prepare_timerun_GPU() if( SIMULATION_TYPE == 3 ) & call prepare_fields_elastic_adj_dev(Mesh_pointer, NDIM*NGLOB_AB, & - SIMULATION_TYPE, & COMPUTE_AND_STORE_STRAIN, & epsilon_trace_over_3, & b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, & @@ -1155,7 +1193,7 @@ subroutine prepare_timerun_GPU() free_surface_ispec, & free_surface_ijk, & num_free_surface_faces, & - SIMULATION_TYPE,NOISE_TOMOGRAPHY, & + NOISE_TOMOGRAPHY, & NSTEP,noise_sourcearray, & normal_x_noise,normal_y_noise,normal_z_noise, & mask_noise,free_surface_jacobian2Dw) diff --git a/src/specfem3D/read_mesh_databases.f90 b/src/specfem3D/read_mesh_databases.f90 index d79d21f70..24b590601 100644 --- a/src/specfem3D/read_mesh_databases.f90 +++ b/src/specfem3D/read_mesh_databases.f90 @@ -95,6 +95,12 @@ subroutine read_mesh_databases() ! mass matrix, density allocate(rmass_acoustic(NGLOB_AB),stat=ier) if( ier /= 0 ) stop 'error allocating array rmass_acoustic' + + ! initializes mass matrix contribution + allocate(rmassz_acoustic(NGLOB_AB),stat=ier) + if( ier /= 0 ) stop 'error allocating array rmassz_acoustic' + rmassz_acoustic(:) = 0._CUSTOM_REAL + allocate(rhostore(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier) if( ier /= 0 ) stop 'error allocating array rhostore' @@ -120,8 +126,20 @@ subroutine read_mesh_databases() allocate(accel_adj_coupling(NDIM,NGLOB_AB),stat=ier) if( ier /= 0 ) stop 'error allocating array accel_adj_coupling' endif + + ! allocates mass matrix allocate(rmass(NGLOB_AB),stat=ier) if( ier /= 0 ) stop 'error allocating array rmass' + ! initializes mass matrix contributions + allocate(rmassx(NGLOB_AB), & + rmassy(NGLOB_AB), & + rmassz(NGLOB_AB), & + stat=ier) + if( ier /= 0 ) stop 'error allocating array rmassx,rmassy,rmassz' + rmassx(:) = 0._CUSTOM_REAL + rmassy(:) = 0._CUSTOM_REAL + rmassz(:) = 0._CUSTOM_REAL + allocate(rho_vp(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier) if( ier /= 0 ) stop 'error allocating array rho_vp' allocate(rho_vs(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier) @@ -312,6 +330,15 @@ subroutine read_mesh_databases() read(27) abs_boundary_ijk read(27) abs_boundary_jacobian2Dw read(27) abs_boundary_normal + ! reads mass matrix contributions on boundaries + if( ELASTIC_SIMULATION ) then + read(27) rmassx + read(27) rmassy + read(27) rmassz + endif + if( ACOUSTIC_SIMULATION ) then + read(27) rmassz_acoustic + endif endif ! free surface diff --git a/src/specfem3D/save_external_bin_m_up.f90 b/src/specfem3D/save_external_bin_m_up.f90 index 71e2e85f0..6730a2cfa 100644 --- a/src/specfem3D/save_external_bin_m_up.f90 +++ b/src/specfem3D/save_external_bin_m_up.f90 @@ -45,9 +45,6 @@ subroutine save_external_bin_m_up(nspec,nglob, & free_surface_normal,free_surface_jacobian2Dw, & free_surface_ijk,free_surface_ispec, & num_free_surface_faces, & - coupling_ac_el_normal,coupling_ac_el_jacobian2Dw, & - coupling_ac_el_ijk,coupling_ac_el_ispec, & - num_coupling_ac_el_faces, & num_interfaces_ext_mesh,my_neighbours_ext_mesh,nibool_interfaces_ext_mesh, & max_nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, & prname,SAVE_MESH_FILES, & @@ -58,6 +55,24 @@ subroutine save_external_bin_m_up(nspec,nglob, & c55store,c56store,c66store, & ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic) + use specfem_par,only: & + ispec_is_inner + + use specfem_par_elastic,only: & + rmassx,rmassy,rmassz, & + nspec_inner_elastic,nspec_outer_elastic,num_phase_ispec_elastic,phase_ispec_inner_elastic, & + num_colors_outer_elastic,num_colors_inner_elastic,num_elem_colors_elastic + + use specfem_par_acoustic,only: & + rmassz_acoustic,num_coupling_ac_po_faces, & + num_coupling_ac_el_faces,coupling_ac_el_ijk,coupling_ac_el_ispec, & + nspec_inner_acoustic,nspec_outer_acoustic,num_phase_ispec_acoustic,phase_ispec_inner_acoustic, & + num_colors_outer_acoustic,num_colors_inner_acoustic,num_elem_colors_acoustic + + use specfem_par_poroelastic,only: & + num_coupling_el_po_faces, & + nspec_inner_poroelastic,nspec_outer_poroelastic,num_phase_ispec_poroelastic,phase_ispec_inner_poroelastic + implicit none include "constants.h" @@ -76,6 +91,7 @@ subroutine save_external_bin_m_up(nspec,nglob, & real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: rhostore,kappastore,mustore real(kind=CUSTOM_REAL), dimension(nglob) :: rmass,rmass_acoustic, & rmass_solid_poroelastic,rmass_fluid_poroelastic + ! ocean load logical :: OCEANS integer :: NGLOB_OCEAN @@ -99,13 +115,6 @@ subroutine save_external_bin_m_up(nspec,nglob, & integer :: free_surface_ijk(3,NGLLSQUARE,num_free_surface_faces) integer :: free_surface_ispec(num_free_surface_faces) -! acoustic-elastic coupling surface - integer :: num_coupling_ac_el_faces - real(kind=CUSTOM_REAL) :: coupling_ac_el_normal(NDIM,NGLLSQUARE,num_coupling_ac_el_faces) - real(kind=CUSTOM_REAL) :: coupling_ac_el_jacobian2Dw(NGLLSQUARE,num_coupling_ac_el_faces) - integer :: coupling_ac_el_ijk(3,NGLLSQUARE,num_coupling_ac_el_faces) - integer :: coupling_ac_el_ispec(num_coupling_ac_el_faces) - ! MPI interfaces integer :: num_interfaces_ext_mesh integer, dimension(num_interfaces_ext_mesh) :: my_neighbours_ext_mesh @@ -192,7 +201,6 @@ subroutine save_external_bin_m_up(nspec,nglob, & !pll Stacey write(IOUT) rho_vp write(IOUT) rho_vs - endif ! poroelastic @@ -200,39 +208,65 @@ subroutine save_external_bin_m_up(nspec,nglob, & if( POROELASTIC_SIMULATION ) then write(IOUT) rmass_solid_poroelastic write(IOUT) rmass_fluid_poroelastic + stop 'model update with poroelastic domains not supported yet' endif ! absorbing boundary surface write(IOUT) num_abs_boundary_faces - write(IOUT) abs_boundary_ispec - write(IOUT) abs_boundary_ijk - write(IOUT) abs_boundary_jacobian2Dw - write(IOUT) abs_boundary_normal - + if(num_abs_boundary_faces > 0 ) then + write(IOUT) abs_boundary_ispec + write(IOUT) abs_boundary_ijk + write(IOUT) abs_boundary_jacobian2Dw + write(IOUT) abs_boundary_normal + ! store mass matrix contributions + if(ELASTIC_SIMULATION) then + write(IOUT) rmassx + write(IOUT) rmassy + write(IOUT) rmassz + endif + if(ACOUSTIC_SIMULATION) then + write(IOUT) rmassz_acoustic + endif + endif + ! free surface write(IOUT) num_free_surface_faces - write(IOUT) free_surface_ispec - write(IOUT) free_surface_ijk - write(IOUT) free_surface_jacobian2Dw - write(IOUT) free_surface_normal + if( num_free_surface_faces > 0 ) then + write(IOUT) free_surface_ispec + write(IOUT) free_surface_ijk + write(IOUT) free_surface_jacobian2Dw + write(IOUT) free_surface_normal + endif ! acoustic-elastic coupling surface write(IOUT) num_coupling_ac_el_faces - write(IOUT) coupling_ac_el_ispec - write(IOUT) coupling_ac_el_ijk - write(IOUT) coupling_ac_el_jacobian2Dw - write(IOUT) coupling_ac_el_normal + if( num_coupling_ac_el_faces > 0 ) then + stop 'coupling ac_po not updated yet' + endif + +! acoustic-poroelastic coupling surface + write(IOUT) num_coupling_ac_po_faces + if( num_coupling_ac_po_faces > 0 ) then + stop 'coupling ac_po not updated yet' + endif + +! elastic-poroelastic coupling surface + write(IOUT) num_coupling_el_po_faces + if( num_coupling_el_po_faces > 0 ) then + stop 'coupling ac_po not updated yet' + endif !MPI interfaces write(IOUT) num_interfaces_ext_mesh - write(IOUT) max_nibool_interfaces_ext_mesh !magnoni - write(IOUT) my_neighbours_ext_mesh - write(IOUT) nibool_interfaces_ext_mesh !magnoni - write(IOUT) ibool_interfaces_ext_mesh !magnoni - + if( num_interfaces_ext_mesh > 0 ) then + write(IOUT) max_nibool_interfaces_ext_mesh + write(IOUT) my_neighbours_ext_mesh + write(IOUT) nibool_interfaces_ext_mesh + write(IOUT) ibool_interfaces_ext_mesh !magnoni + endif ! anisotropy - if( ANISOTROPY ) then + if( ELASTIC_SIMULATION .and. ANISOTROPY ) then write(IOUT) c11store write(IOUT) c12store write(IOUT) c13store @@ -256,6 +290,39 @@ subroutine save_external_bin_m_up(nspec,nglob, & write(IOUT) c66store endif +! inner/outer elements + write(IOUT) ispec_is_inner + + if( ACOUSTIC_SIMULATION ) then + write(IOUT) nspec_inner_acoustic,nspec_outer_acoustic + write(IOUT) num_phase_ispec_acoustic + if(num_phase_ispec_acoustic > 0 ) write(IOUT) phase_ispec_inner_acoustic + endif + + if( ELASTIC_SIMULATION ) then + write(IOUT) nspec_inner_elastic,nspec_outer_elastic + write(IOUT) num_phase_ispec_elastic + if(num_phase_ispec_elastic > 0 ) write(IOUT) phase_ispec_inner_elastic + endif + + if( POROELASTIC_SIMULATION ) then + write(IOUT) nspec_inner_poroelastic,nspec_outer_poroelastic + write(IOUT) num_phase_ispec_poroelastic + if(num_phase_ispec_poroelastic > 0 ) write(IOUT) phase_ispec_inner_poroelastic + endif + + ! mesh coloring + if( USE_MESH_COLORING_GPU ) then + if( ACOUSTIC_SIMULATION ) then + write(IOUT) num_colors_outer_acoustic,num_colors_inner_acoustic + write(IOUT) num_elem_colors_acoustic + endif + if( ELASTIC_SIMULATION ) then + write(IOUT) num_colors_outer_elastic,num_colors_inner_elastic + write(IOUT) num_elem_colors_elastic + endif + endif + close(IOUT) diff --git a/src/specfem3D/specfem3D_par.f90 b/src/specfem3D/specfem3D_par.f90 index 02b07cae2..113961028 100644 --- a/src/specfem3D/specfem3D_par.f90 +++ b/src/specfem3D/specfem3D_par.f90 @@ -297,6 +297,7 @@ module specfem_par_elastic real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass ! Stacey + real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmassx,rmassy,rmassz real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: rho_vp,rho_vs ! anisotropic @@ -381,6 +382,7 @@ module specfem_par_acoustic ! mass matrix real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass_acoustic + real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmassz_acoustic ! acoustic-elastic coupling surface real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: coupling_ac_el_normal diff --git a/src/specfem3D/write_output_SU.f90 b/src/specfem3D/write_output_SU.f90 index 932f06ca5..f9fa24edf 100644 --- a/src/specfem3D/write_output_SU.f90 +++ b/src/specfem3D/write_output_SU.f90 @@ -38,17 +38,12 @@ subroutine write_output_SU() character(len=256) procname,final_LOCAL_PATH integer :: irec_local,irec - ! headers - integer,parameter :: nheader=240 ! 240 bytes -!! DK DK integer(kind=2) :: i2head(nheader/2) ! 2-byte-integer - integer(kind=4) :: i4head(nheader/4) ! 4-byte-integer -!! DK DK equivalence (i2head,i4head) ! share the same 240-byte-memory - double precision, allocatable, dimension(:) :: x_found,y_found,z_found double precision :: x_found_source,y_found_source,z_found_source real(kind=4),dimension(:),allocatable :: rtmpseis - integer :: ier + real :: dx + integer :: i,ier allocate(x_found(nrec),y_found(nrec),z_found(nrec),stat=ier) if( ier /= 0 ) stop 'error allocating array x_found y_found z_found' @@ -83,87 +78,141 @@ subroutine write_output_SU() ! write seismograms (dx) open(unit=IOUT_SU, file=trim(adjustl(final_LOCAL_PATH))//trim(adjustl(procname))//'_dx_SU' ,& - form='unformatted', access='direct', recl=240+4*NSTEP, iostat=ier) + status='unknown', access='direct', recl=4, iostat=ier) + if( ier /= 0 ) stop 'error opening ***_dx_SU file' + ! receiver interval + if( nrec > 1) then + dx = SNGL(x_found(2)-x_found(1)) + else + dx = 0.0 + endif + + do irec_local = 1,nrec_local - irec = number_receiver_global(irec_local) - i4head(1) =irec - i4head(11) =z_found(irec) - i4head(13) =z_found_source - i4head(19) =x_found_source !utm_x_source(1) - i4head(20) =y_found_source !utm_y_source(1) - i4head(21) =x_found(irec) !stutm_x(irec) - i4head(22) =y_found(irec) !stutm_y(irec) -!! DK DK i2head(58) =NSTEP -!! DK DK i2head(59) =DT*1.0d6 -!! DK DK almost all modern processors are little-endian, thus we do not need an equivalence statement any more -!! DK DK (some compilers give a warning message for the equivalent statement that we used to have above) - i4head(58/2) = NSTEP + 65536*int(DT*1.0d6) - - ! convert to real 4-byte - rtmpseis(1:NSTEP) = seismograms_d(1,irec_local,1:NSTEP) - - ! write record - write(IOUT_SU,rec=irec_local) i4head, rtmpseis + irec = number_receiver_global(irec_local) + + ! write section header + call write_SU_header(irec_local,irec,nrec,NSTEP,DT,dx, & + x_found(irec),y_found(irec),z_found(irec),x_found_source,y_found_source,z_found_source) + + ! convert trace to real 4-byte + if( CUSTOM_REAL == SIZE_REAL) then + rtmpseis(1:NSTEP) = seismograms_d(1,irec_local,1:NSTEP) + else + rtmpseis(1:NSTEP) = sngl(seismograms_d(1,irec_local,1:NSTEP)) + endif + do i=1,NSTEP + write(IOUT_SU,rec=irec_local*60+(irec_local-1)*NSTEP+i) rtmpseis(i) + enddo + enddo close(IOUT_SU) ! write seismograms (dy) open(unit=IOUT_SU, file=trim(adjustl(final_LOCAL_PATH))//trim(adjustl(procname))//'_dy_SU' ,& - form='unformatted', access='direct', recl=240+4*NSTEP, iostat=ier) + status='unknown', access='direct', recl=4, iostat=ier) + if( ier /= 0 ) stop 'error opening ***_dy_SU file' do irec_local = 1,nrec_local - irec = number_receiver_global(irec_local) - i4head(1) =irec - i4head(11) =z_found(irec) - i4head(13) =z_found_source - i4head(19) =x_found_source !utm_x_source(1) - i4head(20) =y_found_source !utm_y_source(1) - i4head(21) =x_found(irec) !stutm_x(irec) - i4head(22) =y_found(irec) !stutm_y(irec) -!! DK DK i2head(58) =NSTEP -!! DK DK i2head(59) =DT*1.0d6 -!! DK DK almost all modern processors are little-endian, thus we do not need an equivalence statement any more -!! DK DK (some compilers give a warning message for the equivalent statement that we used to have above) - i4head(58/2) = NSTEP + 65536*int(DT*1.0d6) - - ! convert to real 4-byte - rtmpseis(1:NSTEP) = seismograms_d(2,irec_local,1:NSTEP) - - ! write record - write(IOUT_SU,rec=irec_local) i4head, rtmpseis + irec = number_receiver_global(irec_local) + + ! write section header + call write_SU_header(irec_local,irec,nrec,NSTEP,DT,dx, & + x_found(irec),y_found(irec),z_found(irec),x_found_source,y_found_source,z_found_source) + + ! convert trace to real 4-byte + if( CUSTOM_REAL == SIZE_REAL) then + rtmpseis(1:NSTEP) = seismograms_d(2,irec_local,1:NSTEP) + else + rtmpseis(1:NSTEP) = sngl(seismograms_d(2,irec_local,1:NSTEP)) + endif + do i=1,NSTEP + write(IOUT_SU,rec=irec_local*60+(irec_local-1)*NSTEP+i) rtmpseis(i) + enddo enddo close(IOUT_SU) ! write seismograms (dz) open(unit=IOUT_SU, file=trim(adjustl(final_LOCAL_PATH))//trim(adjustl(procname))//'_dz_SU' ,& - form='unformatted', access='direct', recl=240+4*NSTEP, iostat=ier) + status='unknown', access='direct', recl=4, iostat=ier) + if( ier /= 0 ) stop 'error opening ***_dz_SU file' do irec_local = 1,nrec_local - irec = number_receiver_global(irec_local) - i4head(1) =irec - i4head(11) =z_found(irec) - i4head(13) =z_found_source - i4head(19) =x_found_source !utm_x_source(1) - i4head(20) =y_found_source !utm_y_source(1) - i4head(21) =x_found(irec) !stutm_x(irec) - i4head(22) =y_found(irec) !stutm_y(irec) -!! DK DK i2head(58) =NSTEP -!! DK DK i2head(59) =DT*1.0d6 -!! DK DK almost all modern processors are little-endian, thus we do not need an equivalence statement any more -!! DK DK (some compilers give a warning message for the equivalent statement that we used to have above) - i4head(58/2) = NSTEP + 65536*int(DT*1.0d6) - - ! convert to real 4-byte - rtmpseis(1:NSTEP) = seismograms_d(3,irec_local,1:NSTEP) - - ! write record - write(IOUT_SU,rec=irec_local) i4head, rtmpseis + irec = number_receiver_global(irec_local) + + ! write section header + call write_SU_header(irec_local,irec,nrec,NSTEP,DT,dx, & + x_found(irec),y_found(irec),z_found(irec),x_found_source,y_found_source,z_found_source) + + ! convert trace to real 4-byte + if( CUSTOM_REAL == SIZE_REAL) then + rtmpseis(1:NSTEP) = seismograms_d(3,irec_local,1:NSTEP) + else + rtmpseis(1:NSTEP) = sngl(seismograms_d(3,irec_local,1:NSTEP)) + endif + do i=1,NSTEP + write(IOUT_SU,rec=irec_local*60+(irec_local-1)*NSTEP+i) rtmpseis(i) + enddo + enddo close(IOUT_SU) end subroutine write_output_SU +! +!------------------------------------------------------------------------------------------------------ +! + + subroutine write_SU_header(irec_local,irec,nrec,NSTEP,DT,dx, & + x_found,y_found,z_found,x_found_source,y_found_source,z_found_source) + + implicit none + + include "constants.h" + + integer :: irec_local,irec,nrec + integer :: NSTEP + double precision :: x_found,y_found,z_found + double precision :: x_found_source,y_found_source,z_found_source + double precision :: DT + real :: dx + + ! local parameters + integer(kind=2) :: header2(2) + + ! write SU headers (refer to Seismic Unix for details) + write(IOUT_SU,rec=(irec_local-1)*60+(irec_local-1)*NSTEP+1) irec ! receiver ID + write(IOUT_SU,rec=(irec_local-1)*60+(irec_local-1)*NSTEP+10) NINT(x_found-x_found_source) ! offset + write(IOUT_SU,rec=(irec_local-1)*60+(irec_local-1)*NSTEP+11) NINT(z_found) + + write(IOUT_SU,rec=(irec_local-1)*60+(irec_local-1)*NSTEP+13) NINT(z_found_source) ! source location + write(IOUT_SU,rec=(irec_local-1)*60+(irec_local-1)*NSTEP+19) NINT(x_found_source) ! source location + write(IOUT_SU,rec=(irec_local-1)*60+(irec_local-1)*NSTEP+20) NINT(y_found_source) ! source location + + write(IOUT_SU,rec=(irec_local-1)*60+(irec_local-1)*NSTEP+21) NINT(x_found) ! receiver location xr + write(IOUT_SU,rec=(irec_local-1)*60+(irec_local-1)*NSTEP+22) NINT(y_found) ! receiver location zr + + if (nrec>1) write(IOUT_SU,rec=(irec_local-1)*60+(irec_local-1)*NSTEP+48) dx ! receiver interval + + ! time steps + header2(1)=0 ! dummy + header2(2)=NSTEP + write(IOUT_SU,rec=(irec_local-1)*60+(irec_local-1)*NSTEP+29) header2 + + ! time increment + if( NINT(DT*1.0d6) < 65536 ) then + header2(1)=NINT(DT*1.0d6) ! deltat (unit: 10^{-6} second) + else if( NINT(DT*1.0d3) < 65536 ) then + header2(1)=NINT(DT*1.0d3) ! deltat (unit: 10^{-3} second) + else + header2(1)=NINT(DT) ! deltat (unit: 10^{0} second) + endif + header2(2)=0 ! dummy + write(IOUT_SU,rec=(irec_local-1)*60+(irec_local-1)*NSTEP+30) header2 + + end subroutine write_SU_header + diff --git a/src/specfem3D/write_seismograms.f90 b/src/specfem3D/write_seismograms.f90 index d9767719a..5040410a9 100644 --- a/src/specfem3D/write_seismograms.f90 +++ b/src/specfem3D/write_seismograms.f90 @@ -55,11 +55,10 @@ subroutine write_seismograms() if( ACOUSTIC_SIMULATION ) then ! only copy corresponding elements to CPU host ! timing: Elapsed time: 5.230904e-04 - call transfer_station_ac_from_device( & - potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, & + call transfer_station_ac_from_device(potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, & b_potential_acoustic,b_potential_dot_acoustic,b_potential_dot_dot_acoustic, & Mesh_pointer,number_receiver_global, & - ispec_selected_rec,ispec_selected_source,ibool,SIMULATION_TYPE) + ispec_selected_rec,ispec_selected_source,ibool) ! alternative: transfers whole fields ! timing: Elapsed time: 4.138947e-03 @@ -71,7 +70,6 @@ subroutine write_seismograms() if( ELASTIC_SIMULATION ) then if(USE_CUDA_SEISMOGRAMS) then call transfer_seismograms_el_from_d(nrec_local,Mesh_pointer, & - SIMULATION_TYPE,& seismograms_d,seismograms_v,seismograms_a,& it) else @@ -79,7 +77,7 @@ subroutine write_seismograms() b_displ,b_veloc,b_accel, & Mesh_pointer,number_receiver_global, & ispec_selected_rec,ispec_selected_source, & - ibool,SIMULATION_TYPE) + ibool) endif ! alternative: transfers whole fields ! call transfer_fields_el_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer) diff --git a/utils/update_headers_change_word_f90.pl b/utils/update_headers_change_word_f90.pl index d5eb2da77..b473c2e1c 100755 --- a/utils/update_headers_change_word_f90.pl +++ b/utils/update_headers_change_word_f90.pl @@ -16,7 +16,7 @@ # in the code (which is dangerous, but really easier to program...) # -@objects = `ls ../src/*/*.f90 ../src/*/*.F90 ../src/*/*.c ../src/*/*.cu ../src/*/*.h.in ../src/*/*.h`; +@objects = `ls src/*/*.f90 src/*/*.F90 src/*/*.c src/*/*.cu src/*/*.h.in src/*/*.h`; foreach $name (@objects) { chop $name;