From 68c7a7720fcee072fbb6cb93ec3d1580563274b0 Mon Sep 17 00:00:00 2001 From: Daniel Peter Date: Thu, 25 May 2023 22:29:45 +0200 Subject: [PATCH] adds LTS to solver --- setup/constants.h.in | 9 +- src/decompose_mesh/lts_helper.F90 | 34 +- src/decompose_mesh/lts_setup_elements.F90 | 38 +- .../part_decompose_mesh_hdf5.F90 | 2 +- .../write_mesh_databases_hdf5.F90 | 2 +- .../lts_generate_databases.F90 | 70 +- src/inverse_problem_for_model/rules.mk | 4 + .../specfem_interface_mod.F90 | 7 +- src/shared/check_mesh_resolution.f90 | 10 +- src/shared/read_parameter_file.F90 | 91 ++ src/shared/write_VTK_data.f90 | 96 ++ src/specfem3D/assemble_MPI_vector.f90 | 12 +- .../compute_add_sources_acoustic.f90 | 20 +- .../compute_add_sources_viscoelastic.F90 | 100 ++- src/specfem3D/compute_element_att_memory.f90 | 128 ++- ...ompute_forces_acoustic_calling_routine.f90 | 10 +- ...ute_forces_poroelastic_calling_routine.f90 | 40 +- src/specfem3D/compute_forces_viscoelastic.F90 | 67 +- ...te_forces_viscoelastic_calling_routine.F90 | 26 +- src/specfem3D/fault_solver_common.f90 | 242 ++++- src/specfem3D/fault_solver_dynamic.f90 | 843 ++++++++++++++++-- src/specfem3D/fault_solver_kinematic.f90 | 297 +++++- src/specfem3D/finalize_simulation.f90 | 15 + src/specfem3D/iterate_time.F90 | 11 + src/specfem3D/iterate_time_undoatt.F90 | 2 + src/specfem3D/locate_source.F90 | 69 +- src/specfem3D/lts_setup.F90 | 582 ++++++++---- src/specfem3D/noise_tomography.f90 | 6 +- src/specfem3D/read_mesh_databases_hdf5.F90 | 2 +- src/specfem3D/rules.mk | 9 + src/specfem3D/setup_sources_receivers.f90 | 8 +- src/specfem3D/specfem3D_par.F90 | 54 +- src/specfem3D/write_movie_output_HDF5.F90 | 5 +- tests/decompose_mesh/test_partitioning.f90 | 1 + .../decompose_mesh/test_partitioning.makefile | 6 +- tests/decompose_mesh/test_read.makefile | 6 +- tests/decompose_mesh/test_valence.makefile | 6 +- 37 files changed, 2360 insertions(+), 570 deletions(-) diff --git a/setup/constants.h.in b/setup/constants.h.in index 3fafa8aee..ab1e216cf 100644 --- a/setup/constants.h.in +++ b/setup/constants.h.in @@ -525,12 +525,17 @@ ! adds overlap elements logical,parameter :: LTS_OVERLAP_REGION = .false. -! uses only 1 single p-level (for debugging) +! uses only 1 single p-level logical,parameter :: LTS_SINGLE_P_LEVEL = .false. -! uses only 2 p-levels (for debugging) +! uses only 2 p-levels logical,parameter :: LTS_TWO_P_LEVEL = .false. ! decreases final coarsest time step for stability +! note: LTS must decrease time step for stability depending on p depth; +! to avoid this decrease, one can try to overlap the fine region by one/two elements. +! by default, we add an LTS_SAFETY_MARGIN to the suggested LTS time steps and LTS element binning; +! where the coarsest time step is calculated as a multiple of the user DT input in Par_file. +! here, we can further decrease this coarsest step for LTS simulations to add additional stability. logical,parameter :: LTS_DECREASE_DT = .false. ! time step margin to use for decrease double precision,parameter :: LTS_STABILITY_MARGIN_DT = 0.6d0 diff --git a/src/decompose_mesh/lts_helper.F90 b/src/decompose_mesh/lts_helper.F90 index 8398e0a9b..cb13c2e79 100644 --- a/src/decompose_mesh/lts_helper.F90 +++ b/src/decompose_mesh/lts_helper.F90 @@ -37,38 +37,18 @@ subroutine lts_add_overlap_elements(nglob,nspec,iglob_p_refine,ispec_p_refine) integer,dimension(nspec),intent(inout) :: ispec_p_refine ! local parameters - integer :: ispec,iglob,i,p + integer :: ispec,iglob,i,p,icycle + ! number of cycles to add overlap by one element layer + integer, parameter :: NUMBER_OF_OVERLAP_CYCLES = 1 ! user output print *,' adding overlap region' + print *,' number of overlap cycles = ', NUMBER_OF_OVERLAP_CYCLES print * ! adds overlap by one element - ! 1. overlap - do ispec = 1,NSPEC - ! sets refinement on all points - ! this enlarges the finer region by one element - p = ispec_p_refine(ispec) - do i = 1,NGNOD - iglob = elmnts(i,ispec) - if ( p > iglob_p_refine(iglob) ) iglob_p_refine(iglob) = p - enddo - enddo - ! re-sets p refinement for element due to overlap addition above - ! (adds also elements touching global points with higher refinement) - ispec_p_refine(:) = 0 - do ispec = 1,NSPEC - do i = 1,NGNOD - iglob = elmnts(i,ispec) - p = iglob_p_refine(iglob) - if ( p > ispec_p_refine(ispec)) ispec_p_refine(ispec) = p - enddo - enddo - ! checks that all elements have a valid p-value assigned - if ( minval(ispec_p_refine(:)) == 0 ) stop 'error ispec_p_refine has zero entry' - - ! 2. overlap - if ( .false. ) then + do icycle = 1,NUMBER_OF_OVERLAP_CYCLES + ! flags all shared nodes according to the highest element p-level do ispec = 1,NSPEC ! sets refinement on all points ! this enlarges the finer region by one element @@ -90,7 +70,7 @@ subroutine lts_add_overlap_elements(nglob,nspec,iglob_p_refine,ispec_p_refine) enddo ! checks that all elements have a valid p-value assigned if ( minval(ispec_p_refine(:)) == 0 ) stop 'error ispec_p_refine has zero entry' - endif + enddo end subroutine lts_add_overlap_elements diff --git a/src/decompose_mesh/lts_setup_elements.F90 b/src/decompose_mesh/lts_setup_elements.F90 index 15406916d..66c5fc8a3 100644 --- a/src/decompose_mesh/lts_setup_elements.F90 +++ b/src/decompose_mesh/lts_setup_elements.F90 @@ -50,8 +50,8 @@ subroutine lts_setup_elements() ! time steps double precision :: dtmin,dtmax - double precision :: time_step double precision :: deltat_lts + double precision :: dt_suggested real(kind=CUSTOM_REAL),dimension(:),allocatable :: ispec_time_step @@ -155,35 +155,39 @@ subroutine lts_setup_elements() distance_min = elemsize * percent_GLL(NGLLX) ! estimated time step based on CFL condition - time_step = COURANT_SUGGESTED * distance_min / max( vp,vs ) + dt_suggested = COURANT_SUGGESTED * distance_min / max( vp,vs ) + + ! cut at a significant number of digits (2 digits) + ! example: 0.0734815 -> lpow = (2 - (-1) = 3 -> 0.0730 + call get_timestep_limit_significant_digit_dp(dt_suggested) ! stores value - ispec_time_step(ispec) = time_step + ispec_time_step(ispec) = dt_suggested ! determines coarsest time step - if (dtmax < time_step) then - dtmax = time_step + if (dtmax < dt_suggested) then + dtmax = dt_suggested endif ! determines finest time step - if (dtmin > time_step) then - dtmin = time_step + if (dtmin > dt_suggested) then + dtmin = dt_suggested endif enddo - print *,' estimated time step min = ',sngl(dtmin),' seconds' - print *,' estimated time step max = ',sngl(dtmax),' seconds' - print *,' estimated time step ratio = ',sngl(dtmax/dtmin) + print *,' estimated time step min = ',dtmin,' seconds' + print *,' estimated time step max = ',dtmax,' seconds' + print *,' estimated time step ratio = ',dtmax/dtmin print * ! users sets DT = .. in Par_file, we use this as a threshold limit for the minimum time step size ! (useful in case CFL is underestimating size) if (dtmin > DT) then - print *,' DT time step size set by Par_file: DT = ',sngl(DT),' limits coarsest time step size' + print *,' DT time step size set by Par_file: DT = ',DT,' limits coarsest time step size' if (dtmin / DT >= 2.0) then - print *,' Please set DT to higher value : ',sngl(dtmin),' otherwise LTS will not be optimal' + print *,' Please set DT to higher value : ',dtmin,' otherwise LTS will not be optimal' endif print * endif - print *,' suggested minimum DT time step = ',sngl(dtmin) + print *,' suggested minimum DT time step = ',dtmin ! we will use now the smallest time step estimate and put elements ! into bins with increasing time step size (power of 2 of smallest time step) @@ -193,17 +197,17 @@ subroutine lts_setup_elements() do ispec = 1,nspec ! gets estimated time step based on CFL condition - time_step = ispec_time_step(ispec) + dt_suggested = ispec_time_step(ispec) ! uses a 5% safety margin for binning - time_step = ( 1.d0 - LTS_SAFETY_MARGIN ) * time_step + dt_suggested = ( 1.d0 - LTS_SAFETY_MARGIN ) * dt_suggested ! local time step level compared to global time step size - p = floor( time_step / dtmin ) + p = floor( dt_suggested / dtmin ) if (p < 1) p = 1 ! debug - !print *,'ispec',ispec,'time step=',time_step,dtmin,' p refinement = ',p + !print *,'ispec',ispec,'time step=',dt_suggested,dtmin,' p refinement = ',p ! debug ! uniform distribution for testing diff --git a/src/decompose_mesh/part_decompose_mesh_hdf5.F90 b/src/decompose_mesh/part_decompose_mesh_hdf5.F90 index 908e16375..f87a3d215 100644 --- a/src/decompose_mesh/part_decompose_mesh_hdf5.F90 +++ b/src/decompose_mesh/part_decompose_mesh_hdf5.F90 @@ -1042,7 +1042,7 @@ subroutine write_moho_surface_database_h5(iproc, nspec, & endif enddo - !#TODO: check if this works... creates a single group per process + !#TODO: HDF5 moho check if this works... creates a single group per process ! create group name for the database of the current proc write(gname_proc,"('proc',i6.6,'_')") iproc diff --git a/src/decompose_mesh/write_mesh_databases_hdf5.F90 b/src/decompose_mesh/write_mesh_databases_hdf5.F90 index 3a63ff6ac..ee9408c38 100644 --- a/src/decompose_mesh/write_mesh_databases_hdf5.F90 +++ b/src/decompose_mesh/write_mesh_databases_hdf5.F90 @@ -240,7 +240,7 @@ subroutine write_mesh_databases_hdf5() if (COUPLE_WITH_INJECTION_TECHNIQUE .or. MESH_A_CHUNK_OF_THE_EARTH) close(124) - ! #TODO: hdf5 support for fault simulation not implemented yet + ! #TODO: HDF5 support for fault simulation not implemented yet ! writes fault database if (ANY_FAULT) then do ipart = 0, nparts-1 diff --git a/src/generate_databases/lts_generate_databases.F90 b/src/generate_databases/lts_generate_databases.F90 index c15467638..753b310c1 100644 --- a/src/generate_databases/lts_generate_databases.F90 +++ b/src/generate_databases/lts_generate_databases.F90 @@ -98,7 +98,7 @@ subroutine lts_generate_databases() ! time steps double precision :: dtmin,dtmax double precision :: dtmin_glob,dtmax_glob - double precision :: time_step + double precision :: dt_suggested real(kind=CUSTOM_REAL),dimension(:),allocatable :: ispec_time_step @@ -215,16 +215,20 @@ subroutine lts_generate_databases() ! NSPEC_AB,NGLOB_AB,ibool,xstore,ystore,zstore) ! estimated time step based on CFL condition - time_step = COURANT_SUGGESTED * distance_min / max( vpmax,vsmax ) + dt_suggested = COURANT_SUGGESTED * distance_min / max( vpmax,vsmax ) + + ! cut at a significant number of digits (2 digits) + ! example: 0.0734815 -> lpow = (2 - (-1) = 3 -> 0.0730 + call get_timestep_limit_significant_digit_dp(dt_suggested) ! stores value - ispec_time_step(ispec) = time_step + ispec_time_step(ispec) = dt_suggested ! determines coarsest time step - if (dtmax < time_step) dtmax = time_step + if (dtmax < dt_suggested) dtmax = dt_suggested ! determines finest time step - if (dtmin > time_step) dtmin = time_step + if (dtmin > dt_suggested) dtmin = dt_suggested enddo ! gets min/max values over all processes @@ -258,7 +262,7 @@ subroutine lts_generate_databases() ! user output if (myrank == 0) then - write(IMAIN,*) ' used minimum DT time step = ',dtmin + write(IMAIN,*) ' suggested minimum DT time step = ',dtmin call flush_IMAIN() endif @@ -270,22 +274,22 @@ subroutine lts_generate_databases() do ispec = 1,NSPEC_AB ! gets estimated time step based on CFL condition - time_step = ispec_time_step(ispec) + dt_suggested = ispec_time_step(ispec) ! uses a user-defined safety margin for binning - time_step = ( 1.d0 - LTS_SAFETY_MARGIN ) * time_step + dt_suggested = ( 1.d0 - LTS_SAFETY_MARGIN ) * dt_suggested ! absorbing conditions need slightly smaller time steps ! if (ABSORBING_CONDITIONS) then ! ! uses a 60% safety margin for binning - ! time_step = ( 1.d0 - 0.3d0 ) * time_step + ! dt_suggested = ( 1.d0 - 0.3d0 ) * dt_suggested ! endif ! local time step level compared to global time step size - p = floor( time_step / dtmin ) + p = floor( dt_suggested / dtmin ) if (p < 1) p = 1 ! debug - !print *,'ispec',ispec,'time step=',time_step,dtmin,' p refinement = ',p + !print *,'ispec',ispec,'time step=',dt_suggested,dtmin,' p refinement = ',p ! debug ! uniform distribution for testing @@ -352,14 +356,14 @@ subroutine lts_generate_databases() ! user output if (myrank == 0) then - write(IMAIN,*) ' final lts time step = ',deltat_lts + write(IMAIN,*) ' suggested global coarsest time step = ',deltat_lts write(IMAIN,*) call flush_IMAIN() endif call synchronize_all() ! re-orders p' -> p = 1 / p' - ! note: 1 == coarsest, p_max == smallest such that local time step == dt / p + ! note: 1 == coarsest, p_max == finest level such that local time step == dt / p do iglob = 1,NGLOB_AB p = iglob_p_refine(iglob) iglob_p_refine(iglob) = p_max / p @@ -1251,47 +1255,21 @@ subroutine lts_add_overlap_elements_databases(iglob_p_refine,ispec_p_refine) integer,dimension(NSPEC_AB),intent(inout) :: ispec_p_refine ! local parameters - integer :: ispec,iglob,i,j,k,p + integer :: ispec,iglob,i,j,k,p,icycle + ! number of cycles to add overlap by one element layer + integer, parameter :: NUMBER_OF_OVERLAP_CYCLES = 1 ! user output if (myrank == 0) then write(IMAIN,*) ' adding overlap region' + write(IMAIN,*) ' number of overlap cycles = ', NUMBER_OF_OVERLAP_CYCLES write(IMAIN,*) call flush_IMAIN() endif ! adds overlap by one element - ! 1. overlap - do ispec = 1,NSPEC_AB - ! sets refinement on all points - ! this enlarges the finer region by one element - p = ispec_p_refine(ispec) - do k = 1,NGLLZ - do j = 1,NGLLY - do i = 1,NGLLX - iglob = ibool(i,j,k,ispec) - if (p > iglob_p_refine(iglob)) iglob_p_refine(iglob) = p - enddo - enddo - enddo - enddo - ! re-sets p refinement for element due to overlap addition above - ! (adds also elements touching global points with higher refinement) - ispec_p_refine(:) = 0 - do ispec = 1,NSPEC_AB - do k = 1,NGLLZ - do j = 1,NGLLY - do i = 1,NGLLX - iglob = ibool(i,j,k,ispec) - p = iglob_p_refine(iglob) - if (p > ispec_p_refine(ispec)) ispec_p_refine(ispec) = p - enddo - enddo - enddo - enddo - - ! 2. overlap - if (.false.) then + do icycle = 1,NUMBER_OF_OVERLAP_CYCLES + ! flags all shared nodes according to the highest element p-level do ispec = 1,NSPEC_AB ! sets refinement on all points ! this enlarges the finer region by one element @@ -1319,7 +1297,7 @@ subroutine lts_add_overlap_elements_databases(iglob_p_refine,ispec_p_refine) enddo enddo enddo - endif + enddo end subroutine lts_add_overlap_elements_databases diff --git a/src/inverse_problem_for_model/rules.mk b/src/inverse_problem_for_model/rules.mk index a77b5f5a1..c6de580b1 100644 --- a/src/inverse_problem_for_model/rules.mk +++ b/src/inverse_problem_for_model/rules.mk @@ -163,6 +163,10 @@ inverse_problem_for_model_OBJECTS += \ $O/locate_point.spec.o \ $O/locate_receivers.spec.o \ $O/locate_source.spec.o \ + $O/lts_assemble_MPI_vector.spec.o \ + $O/lts_global_step.spec.o \ + $O/lts_iterate_time.spec.o \ + $O/lts_newmark_update.spec.o \ $O/lts_setup.spec.o \ $O/make_gravity.spec.o \ $O/noise_tomography.spec.o \ diff --git a/src/inverse_problem_for_model/specfem_interface/specfem_interface_mod.F90 b/src/inverse_problem_for_model/specfem_interface/specfem_interface_mod.F90 index 77b1fdd08..d6f61fb36 100644 --- a/src/inverse_problem_for_model/specfem_interface/specfem_interface_mod.F90 +++ b/src/inverse_problem_for_model/specfem_interface/specfem_interface_mod.F90 @@ -1570,7 +1570,12 @@ subroutine CheckModelSuitabilityForModeling(ModelIsSuitable) ! suggested timestep vel_max = max( vpmax,vsmax ) dt_suggested = COURANT_SUGGESTED * distance_min / vel_max - dt_suggested_glob = min( dt_suggested_glob, dt_suggested) + + ! cut at a significant number of digits (2 digits) + ! example: 0.0734815 -> lpow = (2 - (-1) = 3 -> 0.0730 + call get_timestep_limit_significant_digit(dt_suggested) + + dt_suggested_glob = min(dt_suggested_glob, dt_suggested) enddo diff --git a/src/shared/check_mesh_resolution.f90 b/src/shared/check_mesh_resolution.f90 index 7328dbd7c..52443608f 100644 --- a/src/shared/check_mesh_resolution.f90 +++ b/src/shared/check_mesh_resolution.f90 @@ -294,7 +294,13 @@ subroutine check_mesh_resolution(NSPEC_AB,NGLOB_AB,ibool,xstore,ystore,zstore, & ! suggested timestep dt_suggested = COURANT_SUGGESTED * distance_min / vel_max - dt_suggested_glob = min( dt_suggested_glob, dt_suggested) + + ! cut at a significant number of digits (2 digits) + ! example: 0.0734815 -> lpow = (2 - (-1) = 3 -> 0.0730 + call get_timestep_limit_significant_digit(dt_suggested) + + ! global suggested timestep + dt_suggested_glob = min(dt_suggested_glob, dt_suggested) ! debug: for vtk output if (SAVE_MESH_FILES) tmp2(ispec) = pmax @@ -736,7 +742,7 @@ end subroutine check_mesh_resolution ! ! ! suggested timestep ! dt_suggested = COURANT_SUGGESTED * distance_min / max( vpmax,vp2max,vsmax ) -! dt_suggested_glob = min( dt_suggested_glob, dt_suggested) +! dt_suggested_glob = min(dt_suggested_glob, dt_suggested) ! ! ! debug: for vtk output ! if (SAVE_MESH_FILES) tmp2(ispec) = pmax diff --git a/src/shared/read_parameter_file.F90 b/src/shared/read_parameter_file.F90 index c93452ee6..3762d3526 100644 --- a/src/shared/read_parameter_file.F90 +++ b/src/shared/read_parameter_file.F90 @@ -1438,3 +1438,94 @@ subroutine broadcast_computed_parameters() end subroutine broadcast_computed_parameters +! +!------------------------------------------------------------------------------------------------- +! + + subroutine get_timestep_limit_significant_digit(time_step) + + ! cut at a significant number of digits (e.g., 2 digits) using 1/2 rounding + ! example: 0.0734815 -> 0.0730 + ! and 0.0737777 -> 0.0735 + ! + ! also works with different magnitudes of time step sizes (0.118, 0.00523, ..). always cut of after 2 significant digits: + ! example: 0.118749999 -> 0.115 + + use constants, only: CUSTOM_REAL + + implicit none + + real(kind=CUSTOM_REAL),intent(inout) :: time_step ! CUSTOM_REAL + + ! rounding + integer :: lpow,ival + double precision :: fac_pow,dt_cut + + ! initializes + dt_cut = time_step + + ! cut at a significant number of digits (2 digits) + ! example: 0.0734815 -> lpow = (2 - (-1) = 3 + lpow = int(2.d0 - log10(dt_cut)) + + ! example: -> factor 10**3 + fac_pow = 10.d0**(lpow) + + ! example: -> 73 + ival = int(fac_pow * dt_cut) + + ! adds .5-digit (in case): 73.0 -> 0.073 + if ( (fac_pow * dt_cut - ival) >= 0.5 ) then + dt_cut = (dble(ival) + 0.5d0) / fac_pow + else + dt_cut = dble(ival) / fac_pow + endif + + time_step = real(dt_cut,kind=CUSTOM_REAL) + + end subroutine get_timestep_limit_significant_digit + +! +!------------------------------------------------------------------------------------------------- +! + + subroutine get_timestep_limit_significant_digit_dp(time_step) + + ! cut at a significant number of digits (e.g., 2 digits) using 1/2 rounding + ! example: 0.0734815 -> 0.0730 + ! and 0.0737777 -> 0.0735 + ! + ! also works with different magnitudes of time step sizes (0.118, 0.00523, ..). always cut of after 2 significant digits: + ! example: 0.118749999 -> 0.115 + + implicit none + + double precision,intent(inout) :: time_step ! double precision + + ! rounding + integer :: lpow,ival + double precision :: fac_pow,dt_cut + + ! initializes + dt_cut = time_step + + ! cut at a significant number of digits (2 digits) + ! example: 0.0734815 -> lpow = (2 - (-1) = 3 + lpow = int(2.d0 - log10(dt_cut)) + + ! example: -> factor 10**3 + fac_pow = 10.d0**(lpow) + + ! example: -> 73 + ival = int(fac_pow * dt_cut) + + ! adds .5-digit (in case): 73.0 -> 0.073 + if ( (fac_pow * dt_cut - ival) >= 0.5 ) then + dt_cut = (dble(ival) + 0.5d0) / fac_pow + else + dt_cut = dble(ival) / fac_pow + endif + + time_step = dt_cut + + end subroutine get_timestep_limit_significant_digit_dp diff --git a/src/shared/write_VTK_data.f90 b/src/shared/write_VTK_data.f90 index 87d3ee8b4..8c78a6b4b 100644 --- a/src/shared/write_VTK_data.f90 +++ b/src/shared/write_VTK_data.f90 @@ -1536,9 +1536,105 @@ end subroutine write_VTK_movie_data_elemental ! ! end subroutine write_VTK_movie_data_binary ! + +! +!------------------------------------------------------------------------------------------------- ! + + subroutine write_VTK_wavefield(nspec,nglob,xstore,ystore,zstore,ibool, & + field,prname_file) + + use constants, only: CUSTOM_REAL,MAX_STRING_LEN,IOUT_VTK,NDIM,NGLLX,NGLLY,NGLLZ + + implicit none + + integer,intent(in) :: nspec,nglob + + ! global coordinates + integer, dimension(NGLLX,NGLLY,NGLLZ,nspec),intent(in) :: ibool + real(kind=CUSTOM_REAL), dimension(nglob),intent(in) :: xstore,ystore,zstore + + ! GLL data values array + real(kind=CUSTOM_REAL), dimension(NDIM,nglob),intent(in) :: field + + ! file name + character(len=MAX_STRING_LEN),intent(in) :: prname_file + + ! local parameters + integer :: ispec,i + + open(IOUT_VTK,file=prname_file(1:len_trim(prname_file))//'.vtk',status='unknown') + write(IOUT_VTK,'(a)') '# vtk DataFile Version 3.1' + write(IOUT_VTK,'(a)') 'material model VTK file' + write(IOUT_VTK,'(a)') 'ASCII' + write(IOUT_VTK,'(a)') 'DATASET UNSTRUCTURED_GRID' + write(IOUT_VTK, '(a,i12,a)') 'POINTS ', nglob, ' float' + do i=1,nglob + write(IOUT_VTK,'(3e18.6)') xstore(i),ystore(i),zstore(i) + enddo + write(IOUT_VTK,*) "" + + ! note: indices for vtk start at 0 + write(IOUT_VTK,'(a,i12,i12)') "CELLS ",nspec,nspec*9 + do ispec=1,nspec + write(IOUT_VTK,'(9i12)') 8, & + ibool(1,1,1,ispec)-1,ibool(NGLLX,1,1,ispec)-1, & + ibool(NGLLX,NGLLY,1,ispec)-1,ibool(1,NGLLY,1,ispec)-1, & + ibool(1,1,NGLLZ,ispec)-1,ibool(NGLLX,1,NGLLZ,ispec)-1, & + ibool(NGLLX,NGLLY,NGLLZ,ispec)-1,ibool(1,NGLLY,NGLLZ,ispec)-1 + enddo + write(IOUT_VTK,*) "" + + ! type: hexahedrons + write(IOUT_VTK,'(a,i12)') "CELL_TYPES ",nspec + write(IOUT_VTK,'(6i12)') (12,ispec=1,nspec) + write(IOUT_VTK,*) "" + + write(IOUT_VTK,'(a,i12)') "POINT_DATA ",nglob + + ! single wavefield components + if (.true.) then + write(IOUT_VTK,'(a)') "SCALARS field_x float" + write(IOUT_VTK,'(a)') "LOOKUP_TABLE default" + do i = 1,nglob + write(IOUT_VTK,*) field(1,i) + enddo + write(IOUT_VTK,*) "" + + write(IOUT_VTK,'(a)') "SCALARS field_y float" + write(IOUT_VTK,'(a)') "LOOKUP_TABLE default" + do i = 1,nglob + write(IOUT_VTK,*) field(2,i) + enddo + write(IOUT_VTK,*) "" + + write(IOUT_VTK,'(a)') "SCALARS field_z float" + write(IOUT_VTK,'(a)') "LOOKUP_TABLE default" + do i = 1,nglob + write(IOUT_VTK,*) field(3,i) + enddo + write(IOUT_VTK,*) "" + endif + + ! vector wavefield + if (.true.) then + write(IOUT_VTK,'(a)') "VECTORS field_vector float" + do i = 1,nglob + write(IOUT_VTK,*) field(1,i),field(2,i),field(3,i) + enddo + write(IOUT_VTK,*) "" + endif + + close(IOUT_VTK) + + end subroutine write_VTK_wavefield + + !------------------------------------------------------------------------------------------------- ! +! VTU binary formats +! +!------------------------------------------------------------------------------------------------- subroutine write_VTU_movie_data(ne,np,total_dat_xyz,total_dat_con,total_dat,mesh_file,var_name) diff --git a/src/specfem3D/assemble_MPI_vector.f90 b/src/specfem3D/assemble_MPI_vector.f90 index 6d92558ac..0281e0637 100644 --- a/src/specfem3D/assemble_MPI_vector.f90 +++ b/src/specfem3D/assemble_MPI_vector.f90 @@ -249,7 +249,7 @@ subroutine assemble_MPI_vector_async_send(NPROC,NGLOB_AB,array_val, & ! sends data - use constants, only: NDIM,CUSTOM_REAL,itag + use constants, only: NDIM,CUSTOM_REAL,itag,ASSEMBLE_MPI_OFF implicit none @@ -269,10 +269,13 @@ subroutine assemble_MPI_vector_async_send(NPROC,NGLOB_AB,array_val, & integer, dimension(num_interfaces_ext_mesh),intent(inout) :: request_send_vector_ext_mesh,request_recv_vector_ext_mesh ! local parameters - integer ipoin,iinterface + integer :: ipoin,iinterface ! here we have to assemble all the contributions between partitions using MPI + ! debug: no mpi + if (ASSEMBLE_MPI_OFF) return + ! assemble only if more than one partition if (NPROC > 1) then @@ -315,7 +318,7 @@ subroutine assemble_MPI_vector_async_recv(NPROC,NGLOB_AB,array_val, & ! waits for send/receiver to be completed and assembles contributions - use constants, only: NDIM,CUSTOM_REAL + use constants, only: NDIM,CUSTOM_REAL,ASSEMBLE_MPI_OFF use specfem_par, only: FAULT_SIMULATION implicit none @@ -341,6 +344,9 @@ subroutine assemble_MPI_vector_async_recv(NPROC,NGLOB_AB,array_val, & ! here we have to assemble all the contributions between partitions using MPI + ! debug: no mpi + if (ASSEMBLE_MPI_OFF) return + ! assemble only if more than one partition if (NPROC == 1) return diff --git a/src/specfem3D/compute_add_sources_acoustic.f90 b/src/specfem3D/compute_add_sources_acoustic.f90 index e96cdafee..c9e1de25b 100644 --- a/src/specfem3D/compute_add_sources_acoustic.f90 +++ b/src/specfem3D/compute_add_sources_acoustic.f90 @@ -27,7 +27,7 @@ ! for acoustic solver - subroutine compute_add_sources_acoustic() + subroutine compute_add_sources_acoustic(potential_dot_dot_acoustic) use constants use specfem_par, only: station_name,network_name, & @@ -35,11 +35,11 @@ subroutine compute_add_sources_acoustic() hxir_adjstore,hetar_adjstore,hgammar_adjstore,source_adjoint,number_adjsources_global,nadj_rec_local, & USE_BINARY_FOR_SEISMOGRAMS, & ibool,NSOURCES,myrank,it,ispec_selected_source,islice_selected_source, & - sourcearrays,kappastore,SIMULATION_TYPE,NSTEP, & + sourcearrays,kappastore,SIMULATION_TYPE,NSTEP,NGLOB_AB, & ispec_selected_rec, & nadj_rec_local,NTSTEP_BETWEEN_READ_ADJSRC, INVERSE_FWI_FULL_PROBLEM - use specfem_par_acoustic, only: potential_dot_dot_acoustic,ispec_is_acoustic + use specfem_par_acoustic, only: ispec_is_acoustic ! coupling use shared_parameters, only: COUPLE_WITH_INJECTION_TECHNIQUE @@ -49,7 +49,9 @@ subroutine compute_add_sources_acoustic() implicit none -! local parameters + real(kind=CUSTOM_REAL), dimension(NGLOB_AB), intent(inout) :: potential_dot_dot_acoustic + + ! local parameters real(kind=CUSTOM_REAL) :: stf_used,hlagrange logical :: ibool_read_adj_arrays double precision :: stf,time_source_dble,time_t @@ -246,18 +248,18 @@ end subroutine compute_add_sources_acoustic ! for acoustic solver for back propagation wave field - subroutine compute_add_sources_acoustic_backward() + subroutine compute_add_sources_acoustic_backward(b_potential_dot_dot_acoustic) use constants use specfem_par, only: nsources_local,tshift_src,DT,t0,USE_LDDRK,istage, & ibool,NSOURCES,myrank,it,islice_selected_source,ispec_selected_source, & - sourcearrays,kappastore,SIMULATION_TYPE,NSTEP + sourcearrays,kappastore,SIMULATION_TYPE,NSTEP,NGLOB_AB ! undo_att use specfem_par, only: UNDO_ATTENUATION_AND_OR_PML,NSUBSET_ITERATIONS,NT_DUMP_ATTENUATION, & iteration_on_subset,it_of_this_subset - use specfem_par_acoustic, only: ispec_is_acoustic,b_potential_dot_dot_acoustic + use specfem_par_acoustic, only: ispec_is_acoustic ! coupling use shared_parameters, only: COUPLE_WITH_INJECTION_TECHNIQUE @@ -267,7 +269,9 @@ subroutine compute_add_sources_acoustic_backward() implicit none -! local parameters + real(kind=CUSTOM_REAL), dimension(NGLOB_AB), intent(inout) :: b_potential_dot_dot_acoustic + + ! local parameters real(kind=CUSTOM_REAL) :: stf_used double precision :: stf,time_source_dble,time_t diff --git a/src/specfem3D/compute_add_sources_viscoelastic.F90 b/src/specfem3D/compute_add_sources_viscoelastic.F90 index 10655e5f5..4311eaebf 100644 --- a/src/specfem3D/compute_add_sources_viscoelastic.F90 +++ b/src/specfem3D/compute_add_sources_viscoelastic.F90 @@ -27,21 +27,25 @@ ! for elastic solver - subroutine compute_add_sources_viscoelastic() + subroutine compute_add_sources_viscoelastic(accel) use constants - use specfem_par, only: station_name,network_name,num_free_surface_faces,free_surface_ispec, & - free_surface_ijk,free_surface_jacobian2Dw, & - nsources_local,tshift_src,dt,t0,SU_FORMAT, & - USE_LDDRK,istage, & - USE_BINARY_FOR_SEISMOGRAMS,NSPEC_AB,NGLOB_AB,ibool,NSOURCES,myrank,it,islice_selected_source, & - ispec_selected_source,sourcearrays,SIMULATION_TYPE,NSTEP,READ_ADJSRC_ASDF, & - nrec,islice_selected_rec,ispec_selected_rec, & - NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY, & - hxir_adjstore,hetar_adjstore,hgammar_adjstore,source_adjoint,number_adjsources_global,nadj_rec_local, & - INVERSE_FWI_FULL_PROBLEM - use specfem_par_elastic, only: accel,ispec_is_elastic + use shared_parameters, only: DT, & + SIMULATION_TYPE,NOISE_TOMOGRAPHY,INVERSE_FWI_FULL_PROBLEM, & + USE_LDDRK,LTS_MODE, & + SU_FORMAT,USE_BINARY_FOR_SEISMOGRAMS, & + NSTEP,READ_ADJSRC_ASDF,NTSTEP_BETWEEN_READ_ADJSRC + + use specfem_par, only: station_name,network_name, & + NSPEC_AB,NGLOB_AB,ibool, & + tshift_src,t0,istage,it, & + num_free_surface_faces,free_surface_ispec,free_surface_ijk,free_surface_jacobian2Dw, & + nsources_local,NSOURCES,islice_selected_source,ispec_selected_source,sourcearrays, & + nrec,islice_selected_rec,ispec_selected_rec, & + nadj_rec_local,hxir_adjstore,hetar_adjstore,hgammar_adjstore,source_adjoint,number_adjsources_global + + use specfem_par_elastic, only: ispec_is_elastic ! noise use specfem_par_noise, only: noise_sourcearray,irec_main_noise, & @@ -53,9 +57,14 @@ subroutine compute_add_sources_viscoelastic() ! faults use specfem_par, only: FAULT_SIMULATION + ! LTS + use specfem_par_lts, only: current_lts_time, current_lts_elem + implicit none -! local parameters + real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB), intent(inout) :: accel + + ! local parameters real(kind=CUSTOM_REAL) :: stf_used,hlagrange logical :: ibool_read_adj_arrays double precision :: stf,time_source_dble,time_t @@ -74,6 +83,9 @@ subroutine compute_add_sources_viscoelastic() ! thus, at each time loop step it, displ(:) is still at (n) and not (n+1) like for the Newmark scheme ! when entering this routine. we therefore at an additional -DT to have the corresponding timing for the source. time_t = dble(it-1-1)*DT + dble(C_LDDRK(istage))*DT - t0 + else if (LTS_MODE) then + ! current local time + time_t = current_lts_time else time_t = dble(it-1)*DT - t0 endif @@ -102,6 +114,14 @@ subroutine compute_add_sources_viscoelastic() ispec = ispec_selected_source(isource) if (ispec_is_elastic(ispec)) then + ! LTS + if (LTS_MODE) then + ! checks if source lies in this p-level LTS element + if (.not. current_lts_elem(ispec)) cycle + ! debug + !if (it==1) print *,'debug: lts add source time',time_t,it,isource + endif + ! current time time_source_dble = time_t - tshift_src(isource) @@ -271,7 +291,7 @@ end subroutine compute_add_sources_viscoelastic !===================================================================== ! for elastic solver - subroutine compute_add_sources_viscoelastic_backward() + subroutine compute_add_sources_viscoelastic_backward(b_accel) use constants use specfem_par, only: num_free_surface_faces,free_surface_ispec, & @@ -282,7 +302,7 @@ subroutine compute_add_sources_viscoelastic_backward() NSOURCES,myrank,it,islice_selected_source,ispec_selected_source, & sourcearrays,SIMULATION_TYPE,NSTEP,NOISE_TOMOGRAPHY - use specfem_par_elastic, only: b_accel,ispec_is_elastic + use specfem_par_elastic, only: ispec_is_elastic ! noise use specfem_par_noise, only: normal_x_noise,normal_y_noise,normal_z_noise, & @@ -300,6 +320,8 @@ subroutine compute_add_sources_viscoelastic_backward() implicit none + real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB), intent(inout) :: b_accel + ! local parameters real(kind=CUSTOM_REAL) stf_used @@ -447,17 +469,21 @@ end subroutine compute_add_sources_viscoelastic_backward subroutine compute_add_sources_viscoelastic_GPU() use constants + + use shared_parameters, only: DT, & + SIMULATION_TYPE,NOISE_TOMOGRAPHY,INVERSE_FWI_FULL_PROBLEM, & + USE_LDDRK,LTS_MODE,GPU_MODE,UNDO_ATTENUATION_AND_OR_PML, & + SU_FORMAT,USE_BINARY_FOR_SEISMOGRAMS, & + NSTEP,NTSTEP_BETWEEN_READ_ADJSRC + use specfem_par, only: station_name,network_name, & - num_free_surface_faces, & - nsources_local,tshift_src,dt,t0,SU_FORMAT, & - USE_LDDRK,istage,USE_BINARY_FOR_SEISMOGRAMS, & - UNDO_ATTENUATION_AND_OR_PML, & - NSOURCES,it,SIMULATION_TYPE,NSTEP, & - nrec,islice_selected_rec, & - nadj_rec_local,NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY, & - Mesh_pointer, & - source_adjoint,nadj_rec_local,number_adjsources_global, & - INVERSE_FWI_FULL_PROBLEM,GPU_MODE + num_free_surface_faces, & + tshift_src,t0, & + istage,it, & + NSOURCES,nsources_local,ispec_selected_source, & + nrec,islice_selected_rec, & + nadj_rec_local,source_adjoint,nadj_rec_local,number_adjsources_global, & + Mesh_pointer use specfem_par_noise, only: irec_main_noise,noise_surface_movie @@ -467,6 +493,9 @@ subroutine compute_add_sources_viscoelastic_GPU() ! faults use specfem_par, only: FAULT_SIMULATION + ! LTS + use specfem_par_lts, only: current_lts_time,current_lts_elem + implicit none ! local parameters @@ -502,10 +531,19 @@ subroutine compute_add_sources_viscoelastic_GPU() ! thus, at each time loop step it, displ(:) is still at (n) and not (n+1) like for the Newmark scheme ! when entering this routine. we therefore at an additional -DT to have the corresponding timing for the source. time_t = dble(it-1-1)*DT + dble(C_LDDRK(istage))*DT - t0 + else if (LTS_MODE) then + ! current LTS time + time_t = current_lts_time else time_t = dble(it-1)*DT - t0 endif + ! LTS + if (LTS_MODE) then + ! checks if anything to do + if (NSOURCES == 1 .and. (.not. current_lts_elem(ispec_selected_source(1)))) return + endif + do isource = 1,NSOURCES ! current time time_source_dble = time_t - tshift_src(isource) @@ -513,10 +551,22 @@ subroutine compute_add_sources_viscoelastic_GPU() ! determines source time function value stf = get_stf_viscoelastic(time_source_dble,isource,it) + ! LTS + if (LTS_MODE) then + ! only sources in elements contribute which are in current p-level + if (.not. current_lts_elem(ispec_selected_source(isource))) stf = 0.d0 + endif + ! stores precomputed source time function factor stf_pre_compute(isource) = stf enddo + ! LTS + if (LTS_MODE) then + ! checks if anything to do + if (maxval(stf_pre_compute(:)) == 0.d0) return + endif + ! only implements SIMTYPE=1 and NOISE_TOM=0 ! write(*,*) "Fortran dt = ", dt ! change dt -> DT diff --git a/src/specfem3D/compute_element_att_memory.f90 b/src/specfem3D/compute_element_att_memory.f90 index e15627213..ca0a7720a 100644 --- a/src/specfem3D/compute_element_att_memory.f90 +++ b/src/specfem3D/compute_element_att_memory.f90 @@ -25,7 +25,7 @@ ! !===================================================================== -subroutine compute_element_att_memory_second_order_rk(ispec,alphaval,betaval,gammaval,NSPEC_AB,kappastore,mustore, & + subroutine compute_element_att_memory_second_order_rk(ispec,alphaval,betaval,gammaval,NSPEC_AB,kappastore,mustore, & NSPEC_ATTENUATION_AB, & factor_common_kappa, & R_trace,epsilondev_trace,epsilondev_trace_loc, & @@ -35,6 +35,10 @@ subroutine compute_element_att_memory_second_order_rk(ispec,alphaval,betaval,gam use constants, only: CUSTOM_REAL,N_SLS,NGLLX,NGLLY,NGLLZ + ! LTS + use shared_parameters, only: LTS_MODE + use specfem_par_lts, only: lts_type_compute_pelem + implicit none integer,intent(in) :: ispec,NSPEC_AB,NSPEC_ATTENUATION_AB,NSPEC_STRAIN_ONLY @@ -53,65 +57,109 @@ subroutine compute_element_att_memory_second_order_rk(ispec,alphaval,betaval,gam real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(in) :: epsilondev_trace_loc, epsilondev_xx_loc, & epsilondev_yy_loc, epsilondev_xy_loc, epsilondev_xz_loc, epsilondev_yz_loc -! local parameters + ! local parameters integer :: i,j,k real(kind=CUSTOM_REAL) :: Sn,Snp1 real(kind=CUSTOM_REAL), dimension(N_SLS) :: factor_loc - do k = 1,NGLLZ - do j = 1,NGLLY - do i = 1,NGLLX + ! LTS + if (LTS_MODE .and. .not. lts_type_compute_pelem) then + ! note: we first call the compute_forces routine for p-elements only, then in a second call for boundary-elements. + ! to update the memory variables, we will in the first compute them as by default, but in the second call + ! only add to the existing values. + + ! boundary-element call + do k = 1,NGLLZ + do j = 1,NGLLY + do i = 1,NGLLX + ! bulk attenuation + ! term in trace + factor_loc(:) = kappastore(i,j,k,ispec) * factor_common_kappa(:,i,j,k,ispec) + ! adds only gamma term + Snp1 = epsilondev_trace_loc(i,j,k) + R_trace(:,i,j,k,ispec) = R_trace(:,i,j,k,ispec) + factor_loc(:) * gammaval(:) * Snp1 + + ! shear attenuation + ! term in xx yy zz xy xz yz + factor_loc(:) = mustore(i,j,k,ispec) * factor_common(:,i,j,k,ispec) + ! adds only gamma term + ! term in xx + Snp1 = epsilondev_xx_loc(i,j,k) + R_xx(:,i,j,k,ispec) = R_xx(:,i,j,k,ispec) + factor_loc(:) * gammaval(:) * Snp1 + ! term in yy + Snp1 = epsilondev_yy_loc(i,j,k) + R_yy(:,i,j,k,ispec) = R_yy(:,i,j,k,ispec) + factor_loc(:) * gammaval(:) * Snp1 + ! term in zz not computed since zero trace + ! term in xy + Snp1 = epsilondev_xy_loc(i,j,k) + R_xy(:,i,j,k,ispec) = R_xy(:,i,j,k,ispec) + factor_loc(:) * gammaval(:) * Snp1 + ! term in xz + Snp1 = epsilondev_xz_loc(i,j,k) + R_xz(:,i,j,k,ispec) = R_xz(:,i,j,k,ispec) + factor_loc(:) * gammaval(:) * Snp1 + ! term in yz + Snp1 = epsilondev_yz_loc(i,j,k) + R_yz(:,i,j,k,ispec) = R_yz(:,i,j,k,ispec) + factor_loc(:) * gammaval(:) * Snp1 + enddo + enddo + enddo - ! bulk attenuation - ! term in trace - factor_loc(:) = kappastore(i,j,k,ispec)* factor_common_kappa(:,i,j,k,ispec) + else + ! default + do k = 1,NGLLZ + do j = 1,NGLLY + do i = 1,NGLLX - Sn = epsilondev_trace(i,j,k,ispec) - Snp1 = epsilondev_trace_loc(i,j,k) - R_trace(:,i,j,k,ispec) = alphaval(:) * R_trace(:,i,j,k,ispec) + factor_loc(:) * (betaval(:) * Sn + gammaval(:) * Snp1) + ! bulk attenuation + ! term in trace + factor_loc(:) = kappastore(i,j,k,ispec)* factor_common_kappa(:,i,j,k,ispec) - ! shear attenuation - ! term in xx yy zz xy xz yz - factor_loc(:) = mustore(i,j,k,ispec) * factor_common(:,i,j,k,ispec) + Sn = epsilondev_trace(i,j,k,ispec) + Snp1 = epsilondev_trace_loc(i,j,k) + R_trace(:,i,j,k,ispec) = alphaval(:) * R_trace(:,i,j,k,ispec) + factor_loc(:) * (betaval(:) * Sn + gammaval(:) * Snp1) - ! term in xx - Sn = epsilondev_xx(i,j,k,ispec) - Snp1 = epsilondev_xx_loc(i,j,k) - R_xx(:,i,j,k,ispec) = alphaval(:) * R_xx(:,i,j,k,ispec) + factor_loc(:) * (betaval(:) * Sn + gammaval(:) * Snp1) + ! shear attenuation + ! term in xx yy zz xy xz yz + factor_loc(:) = mustore(i,j,k,ispec) * factor_common(:,i,j,k,ispec) - ! term in yy - Sn = epsilondev_yy(i,j,k,ispec) - Snp1 = epsilondev_yy_loc(i,j,k) - R_yy(:,i,j,k,ispec) = alphaval(:) * R_yy(:,i,j,k,ispec) + factor_loc(:) * (betaval(:) * Sn + gammaval(:) * Snp1) + ! term in xx + Sn = epsilondev_xx(i,j,k,ispec) + Snp1 = epsilondev_xx_loc(i,j,k) + R_xx(:,i,j,k,ispec) = alphaval(:) * R_xx(:,i,j,k,ispec) + factor_loc(:) * (betaval(:) * Sn + gammaval(:) * Snp1) - ! term in zz not computed since zero trace + ! term in yy + Sn = epsilondev_yy(i,j,k,ispec) + Snp1 = epsilondev_yy_loc(i,j,k) + R_yy(:,i,j,k,ispec) = alphaval(:) * R_yy(:,i,j,k,ispec) + factor_loc(:) * (betaval(:) * Sn + gammaval(:) * Snp1) - ! term in xy - Sn = epsilondev_xy(i,j,k,ispec) - Snp1 = epsilondev_xy_loc(i,j,k) - R_xy(:,i,j,k,ispec) = alphaval(:) * R_xy(:,i,j,k,ispec) + factor_loc(:) * (betaval(:) * Sn + gammaval(:) * Snp1) + ! term in zz not computed since zero trace - ! term in xz - Sn = epsilondev_xz(i,j,k,ispec) - Snp1 = epsilondev_xz_loc(i,j,k) - R_xz(:,i,j,k,ispec) = alphaval(:) * R_xz(:,i,j,k,ispec) + factor_loc(:) * (betaval(:) * Sn + gammaval(:) * Snp1) + ! term in xy + Sn = epsilondev_xy(i,j,k,ispec) + Snp1 = epsilondev_xy_loc(i,j,k) + R_xy(:,i,j,k,ispec) = alphaval(:) * R_xy(:,i,j,k,ispec) + factor_loc(:) * (betaval(:) * Sn + gammaval(:) * Snp1) - ! term in yz - Sn = epsilondev_yz(i,j,k,ispec) - Snp1 = epsilondev_yz_loc(i,j,k) - R_yz(:,i,j,k,ispec) = alphaval(:) * R_yz(:,i,j,k,ispec) + factor_loc(:) * (betaval(:) * Sn + gammaval(:) * Snp1) + ! term in xz + Sn = epsilondev_xz(i,j,k,ispec) + Snp1 = epsilondev_xz_loc(i,j,k) + R_xz(:,i,j,k,ispec) = alphaval(:) * R_xz(:,i,j,k,ispec) + factor_loc(:) * (betaval(:) * Sn + gammaval(:) * Snp1) + ! term in yz + Sn = epsilondev_yz(i,j,k,ispec) + Snp1 = epsilondev_yz_loc(i,j,k) + R_yz(:,i,j,k,ispec) = alphaval(:) * R_yz(:,i,j,k,ispec) + factor_loc(:) * (betaval(:) * Sn + gammaval(:) * Snp1) + + enddo enddo enddo - enddo + endif -end subroutine compute_element_att_memory_second_order_rk + end subroutine compute_element_att_memory_second_order_rk ! !-------------------------------------------------------------------------------------------- ! -subroutine compute_element_att_memory_lddrk(ispec,deltat,NSPEC_AB,kappastore,mustore, & + subroutine compute_element_att_memory_lddrk(ispec,deltat,NSPEC_AB,kappastore,mustore, & NSPEC_ATTENUATION_AB, & factor_common_kappa, & R_trace,epsilondev_trace_loc, & @@ -143,7 +191,7 @@ subroutine compute_element_att_memory_lddrk(ispec,deltat,NSPEC_AB,kappastore,mus real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(in) :: epsilondev_trace_loc, epsilondev_xx_loc, & epsilondev_yy_loc, epsilondev_xy_loc, epsilondev_xz_loc, epsilondev_yz_loc -! local parameters + ! local parameters integer :: i,j,k real(kind=CUSTOM_REAL) :: Snp1 real(kind=CUSTOM_REAL), dimension(N_SLS) :: factor_loc @@ -199,4 +247,4 @@ subroutine compute_element_att_memory_lddrk(ispec,deltat,NSPEC_AB,kappastore,mus enddo enddo -end subroutine compute_element_att_memory_lddrk + end subroutine compute_element_att_memory_lddrk diff --git a/src/specfem3D/compute_forces_acoustic_calling_routine.f90 b/src/specfem3D/compute_forces_acoustic_calling_routine.f90 index 7c2d65bb7..198e83479 100644 --- a/src/specfem3D/compute_forces_acoustic_calling_routine.f90 +++ b/src/specfem3D/compute_forces_acoustic_calling_routine.f90 @@ -213,7 +213,7 @@ subroutine compute_forces_acoustic_forward_calling() ! if (.not. GPU_MODE) then ! on CPU - call compute_add_sources_acoustic() + call compute_add_sources_acoustic(potential_dot_dot_acoustic) else ! on GPU call compute_add_sources_acoustic_GPU() @@ -331,9 +331,11 @@ subroutine compute_forces_acoustic_forward_calling() ! updates the chi_dot term which requires chi_dot_dot(t+delta) ! corrector if (USE_LDDRK) then + ! LDDRK call update_potential_dot_acoustic_lddrk() else - potential_dot_acoustic(:) = potential_dot_acoustic(:) + deltatover2*potential_dot_dot_acoustic(:) + ! Newmark time scheme + call update_potential_dot_acoustic() endif else ! on GPU @@ -514,7 +516,7 @@ subroutine compute_forces_acoustic_backward_calling() ! if (.not. GPU_MODE) then ! on CPU - call compute_add_sources_acoustic_backward() + call compute_add_sources_acoustic_backward(b_potential_dot_dot_acoustic) else ! on GPU call compute_add_sources_acoustic_backward_GPU() @@ -597,7 +599,7 @@ subroutine compute_forces_acoustic_backward_calling() stop 'LDDRK scheme for backward propagation not implemented yet' else ! adjoint simulations - b_potential_dot_acoustic(:) = b_potential_dot_acoustic(:) + b_deltatover2*b_potential_dot_dot_acoustic(:) + call update_potential_dot_acoustic_backward() endif else ! on GPU diff --git a/src/specfem3D/compute_forces_poroelastic_calling_routine.f90 b/src/specfem3D/compute_forces_poroelastic_calling_routine.f90 index 3f2802773..0f5ad3960 100644 --- a/src/specfem3D/compute_forces_poroelastic_calling_routine.f90 +++ b/src/specfem3D/compute_forces_poroelastic_calling_routine.f90 @@ -204,24 +204,24 @@ subroutine compute_forces_poroelastic_calling() ! multiplies with inverse of mass matrix (note: rmass has been inverted already) ! solid phase - accels_poroelastic(1,:) = accels_poroelastic(1,:)*rmass_solid_poroelastic(:) - accels_poroelastic(2,:) = accels_poroelastic(2,:)*rmass_solid_poroelastic(:) - accels_poroelastic(3,:) = accels_poroelastic(3,:)*rmass_solid_poroelastic(:) + accels_poroelastic(1,:) = accels_poroelastic(1,:) * rmass_solid_poroelastic(:) + accels_poroelastic(2,:) = accels_poroelastic(2,:) * rmass_solid_poroelastic(:) + accels_poroelastic(3,:) = accels_poroelastic(3,:) * rmass_solid_poroelastic(:) ! fluid phase - accelw_poroelastic(1,:) = accelw_poroelastic(1,:)*rmass_fluid_poroelastic(:) - accelw_poroelastic(2,:) = accelw_poroelastic(2,:)*rmass_fluid_poroelastic(:) - accelw_poroelastic(3,:) = accelw_poroelastic(3,:)*rmass_fluid_poroelastic(:) + accelw_poroelastic(1,:) = accelw_poroelastic(1,:) * rmass_fluid_poroelastic(:) + accelw_poroelastic(2,:) = accelw_poroelastic(2,:) * rmass_fluid_poroelastic(:) + accelw_poroelastic(3,:) = accelw_poroelastic(3,:) * rmass_fluid_poroelastic(:) ! adjoint simulations if (SIMULATION_TYPE == 3) then ! solid - b_accels_poroelastic(1,:) = b_accels_poroelastic(1,:)*rmass_solid_poroelastic(:) - b_accels_poroelastic(2,:) = b_accels_poroelastic(2,:)*rmass_solid_poroelastic(:) - b_accels_poroelastic(3,:) = b_accels_poroelastic(3,:)*rmass_solid_poroelastic(:) + b_accels_poroelastic(1,:) = b_accels_poroelastic(1,:) * rmass_solid_poroelastic(:) + b_accels_poroelastic(2,:) = b_accels_poroelastic(2,:) * rmass_solid_poroelastic(:) + b_accels_poroelastic(3,:) = b_accels_poroelastic(3,:) * rmass_solid_poroelastic(:) ! fluid - b_accelw_poroelastic(1,:) = b_accelw_poroelastic(1,:)*rmass_fluid_poroelastic(:) - b_accelw_poroelastic(2,:) = b_accelw_poroelastic(2,:)*rmass_fluid_poroelastic(:) - b_accelw_poroelastic(3,:) = b_accelw_poroelastic(3,:)*rmass_fluid_poroelastic(:) + b_accelw_poroelastic(1,:) = b_accelw_poroelastic(1,:) * rmass_fluid_poroelastic(:) + b_accelw_poroelastic(2,:) = b_accelw_poroelastic(2,:) * rmass_fluid_poroelastic(:) + b_accelw_poroelastic(3,:) = b_accelw_poroelastic(3,:) * rmass_fluid_poroelastic(:) endif ! updates velocities @@ -240,19 +240,7 @@ subroutine compute_forces_poroelastic_calling() ! ! corrector: ! updates the velocity term which requires a(t+delta) - - ! solid phase - velocs_poroelastic(:,:) = velocs_poroelastic(:,:) + deltatover2*accels_poroelastic(:,:) - ! fluid phase - velocw_poroelastic(:,:) = velocw_poroelastic(:,:) + deltatover2*accelw_poroelastic(:,:) - - ! adjoint simulations - if (SIMULATION_TYPE == 3) then - ! solid phase - b_velocs_poroelastic(:,:) = b_velocs_poroelastic(:,:) + b_deltatover2*b_accels_poroelastic(:,:) - ! fluid phase - b_velocw_poroelastic(:,:) = b_velocw_poroelastic(:,:) + b_deltatover2*b_accelw_poroelastic(:,:) - endif + call update_veloc_poroelastic_forward_and_backward() ! elastic coupling if (ELASTIC_SIMULATION) & @@ -347,6 +335,7 @@ subroutine compute_continuity_disp_po_el(NSPEC_AB,NGLOB_AB,ibool, & velocs_poroelastic(1,iglob) = velocs_poroelastic(1,iglob) - deltatover2*accels_poroelastic(1,iglob) velocs_poroelastic(2,iglob) = velocs_poroelastic(2,iglob) - deltatover2*accels_poroelastic(2,iglob) velocs_poroelastic(3,iglob) = velocs_poroelastic(3,iglob) - deltatover2*accels_poroelastic(3,iglob) + accels_poroelastic(1,iglob) = accels_poroelastic(1,iglob) / rmass_solid_poroelastic(iglob) accels_poroelastic(2,iglob) = accels_poroelastic(2,iglob) / rmass_solid_poroelastic(iglob) accels_poroelastic(3,iglob) = accels_poroelastic(3,iglob) / rmass_solid_poroelastic(iglob) @@ -379,6 +368,7 @@ subroutine compute_continuity_disp_po_el(NSPEC_AB,NGLOB_AB,ibool, & accelw_poroelastic(1,iglob) = 0.d0 accelw_poroelastic(2,iglob) = 0.d0 accelw_poroelastic(3,iglob) = 0.d0 + velocw_poroelastic(1,iglob) = 0.d0 velocw_poroelastic(2,iglob) = 0.d0 velocw_poroelastic(3,iglob) = 0.d0 diff --git a/src/specfem3D/compute_forces_viscoelastic.F90 b/src/specfem3D/compute_forces_viscoelastic.F90 index b3c7277ab..cacedd4d4 100644 --- a/src/specfem3D/compute_forces_viscoelastic.F90 +++ b/src/specfem3D/compute_forces_viscoelastic.F90 @@ -43,10 +43,13 @@ subroutine compute_forces_viscoelastic(iphase,deltat, & use constants, only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,NDIM,N_SLS,ONE_THIRD,FOUR_THIRDS, & m1,m2 + use shared_parameters, only: SIMULATION_TYPE, & + USE_LDDRK,LTS_MODE,SAVE_MOHO_MESH, & + ATTENUATION,ANISOTROPY + use fault_solver_common, only: Kelvin_Voigt_eta,USE_KELVIN_VOIGT_DAMPING - use specfem_par, only: SAVE_MOHO_MESH,USE_LDDRK, & - xixstore,xiystore,xizstore,etaxstore,etaystore,etazstore, & + use specfem_par, only: xixstore,xiystore,xizstore,etaxstore,etaystore,etazstore, & gammaxstore,gammaystore,gammazstore,jacobianstore, & NSPEC_AB,NGLOB_AB, & hprime_xx,hprime_xxT, & @@ -55,10 +58,9 @@ subroutine compute_forces_viscoelastic(iphase,deltat, & hprimewgll_xx,hprimewgll_xxT, & hprimewgll_yy,hprimewgll_zz, & kappastore,mustore,ibool, & - ATTENUATION, & NSPEC_ATTENUATION_AB,NSPEC_ATTENUATION_AB_LDDRK, & - ANISOTROPY,SIMULATION_TYPE, & - NSPEC_ADJOINT,is_moho_top,is_moho_bot, & + NSPEC_ADJOINT, & + is_moho_top,is_moho_bot, & irregular_element_number,xix_regular,jacobian_regular use specfem_par, only: wgllwgll_xy_3D,wgllwgll_xz_3D,wgllwgll_yz_3D @@ -73,8 +75,12 @@ subroutine compute_forces_viscoelastic(iphase,deltat, & ispec2D_moho_top,ispec2D_moho_bot, & nspec_inner_elastic,nspec_outer_elastic,phase_ispec_inner_elastic + ! PML use pml_par, only: is_CPML,NSPEC_CPML + ! LTS + use specfem_par_lts, only: lts_type_compute_pelem,current_lts_elem,current_lts_boundary_elem + #ifdef FORCE_VECTORIZATION use constants, only: NGLLCUBE #endif @@ -199,7 +205,8 @@ subroutine compute_forces_viscoelastic(iphase,deltat, & !$OMP epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz,epsilondev_trace,epsilon_trace_over_3, & !$OMP USE_LDDRK,R_xx_lddrk,R_yy_lddrk,R_xy_lddrk,R_xz_lddrk,R_yz_lddrk,R_trace_lddrk, & !$OMP NSPEC_AB,NSPEC_ATTENUATION_AB,NSPEC_ATTENUATION_AB_LDDRK,NSPEC_STRAIN_ONLY, & -!$OMP SAVE_MOHO_MESH,dsdx_top,dsdx_bot,ispec2D_moho_top,ispec2D_moho_bot,is_moho_top,is_moho_bot & +!$OMP SAVE_MOHO_MESH,dsdx_top,dsdx_bot,ispec2D_moho_top,ispec2D_moho_bot,is_moho_top,is_moho_bot, & +!$OMP LTS_MODE,lts_type_compute_pelem,current_lts_elem,current_lts_boundary_elem & !$OMP ) & !$OMP PRIVATE( & !$OMP ispec_p,ispec,ispec_irreg,i,j,k,l,iglob,ispec2D, & @@ -241,6 +248,22 @@ subroutine compute_forces_viscoelastic(iphase,deltat, & ispec = phase_ispec_inner_elastic(ispec_p,iphase) ! selects element contribution + + ! LTS + if (LTS_MODE) then + if (lts_type_compute_pelem) then + ! p-element call + ! if not in this p-level, skip + if (.not. current_lts_elem(ispec)) cycle + ! only work on elements _strictly_ in this level (all 125 nodes in P) + if (current_lts_boundary_elem(ispec)) cycle + else + ! boundary-element call + ! only for boundary elements + if (.not. current_lts_boundary_elem(ispec)) cycle + endif + endif + ! PML elements will be computed later if (is_CPML(ispec) .and. .not. backward_simulation) cycle @@ -788,13 +811,31 @@ subroutine compute_forces_viscoelastic(iphase,deltat, & ! save deviatoric strain for Runge-Kutta scheme if (COMPUTE_AND_STORE_STRAIN) then - if (ATTENUATION .and. .not. is_CPML(ispec)) epsilondev_trace(:,:,:,ispec) = epsilondev_trace_loc(:,:,:) - ! already stored epsilon_trace_over_3(:,:,:,ispec) above for SIMULATION_TYPE == 3 - epsilondev_xx(:,:,:,ispec) = epsilondev_xx_loc(:,:,:) - epsilondev_yy(:,:,:,ispec) = epsilondev_yy_loc(:,:,:) - epsilondev_xy(:,:,:,ispec) = epsilondev_xy_loc(:,:,:) - epsilondev_xz(:,:,:,ispec) = epsilondev_xz_loc(:,:,:) - epsilondev_yz(:,:,:,ispec) = epsilondev_yz_loc(:,:,:) + ! LTS + if (LTS_MODE .and. .not. lts_type_compute_pelem) then + ! note: we first call this compute_forces routine for p-elements only, then in a second call for boundary-elements. + ! to store strains, we will in the first call re-set the strain as by default, but in the second call + ! only add to the existing values. + ! boundary-element call + ! adds contributions + if (ATTENUATION .and. .not. is_CPML(ispec)) & + epsilondev_trace(:,:,:,ispec) = epsilondev_trace(:,:,:,ispec) + epsilondev_trace_loc(:,:,:) + ! already stored epsilon_trace_over_3(:,:,:,ispec) above for SIMULATION_TYPE == 3 + epsilondev_xx(:,:,:,ispec) = epsilondev_xx(:,:,:,ispec) + epsilondev_xx_loc(:,:,:) + epsilondev_yy(:,:,:,ispec) = epsilondev_yy(:,:,:,ispec) + epsilondev_yy_loc(:,:,:) + epsilondev_xy(:,:,:,ispec) = epsilondev_xy(:,:,:,ispec) + epsilondev_xy_loc(:,:,:) + epsilondev_xz(:,:,:,ispec) = epsilondev_xz(:,:,:,ispec) + epsilondev_xz_loc(:,:,:) + epsilondev_yz(:,:,:,ispec) = epsilondev_yz(:,:,:,ispec) + epsilondev_yz_loc(:,:,:) + else + ! default + if (ATTENUATION .and. .not. is_CPML(ispec)) epsilondev_trace(:,:,:,ispec) = epsilondev_trace_loc(:,:,:) + ! already stored epsilon_trace_over_3(:,:,:,ispec) above for SIMULATION_TYPE == 3 + epsilondev_xx(:,:,:,ispec) = epsilondev_xx_loc(:,:,:) + epsilondev_yy(:,:,:,ispec) = epsilondev_yy_loc(:,:,:) + epsilondev_xy(:,:,:,ispec) = epsilondev_xy_loc(:,:,:) + epsilondev_xz(:,:,:,ispec) = epsilondev_xz_loc(:,:,:) + epsilondev_yz(:,:,:,ispec) = epsilondev_yz_loc(:,:,:) + endif endif enddo ! spectral element loop diff --git a/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90 b/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90 index 1d7af5d1c..674bd7c83 100644 --- a/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90 +++ b/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90 @@ -261,7 +261,7 @@ subroutine compute_forces_viscoelastic_calling() ! adds source term (single-force/moment-tensor solution) if (.not. GPU_MODE) then ! on CPU - call compute_add_sources_viscoelastic() + call compute_add_sources_viscoelastic(accel) else ! on GPU call compute_add_sources_viscoelastic_GPU() @@ -362,9 +362,9 @@ subroutine compute_forces_viscoelastic_calling() if (.not. GPU_MODE) then ! on CPU do iglob = 1,NGLOB_AB - accel(1,iglob) = accel(1,iglob)*rmassx(iglob) - accel(2,iglob) = accel(2,iglob)*rmassy(iglob) - accel(3,iglob) = accel(3,iglob)*rmassz(iglob) + accel(1,iglob) = accel(1,iglob) * rmassx(iglob) + accel(2,iglob) = accel(2,iglob) * rmassy(iglob) + accel(3,iglob) = accel(3,iglob) * rmassz(iglob) enddo else ! on GPU @@ -435,7 +435,7 @@ subroutine compute_forces_viscoelastic_calling() if (USE_LDDRK) then call update_veloc_elastic_lddrk() else - veloc(:,:) = veloc(:,:) + deltatover2*accel(:,:) + call update_veloc_elastic() endif else ! on GPU @@ -591,7 +591,7 @@ subroutine compute_forces_viscoelastic_backward_calling() ! to avoid calling the same routine twice and to check if the source element is an inner/outer element if (.not. GPU_MODE) then ! on CPU - call compute_add_sources_viscoelastic_backward() + call compute_add_sources_viscoelastic_backward(b_accel) else ! on GPU call compute_add_sources_viscoelastic_backward_GPU() @@ -651,9 +651,9 @@ subroutine compute_forces_viscoelastic_backward_calling() if (.not. GPU_MODE) then ! on CPU ! adjoint simulations - b_accel(1,:) = b_accel(1,:)*rmassx(:) - b_accel(2,:) = b_accel(2,:)*rmassy(:) - b_accel(3,:) = b_accel(3,:)*rmassz(:) + b_accel(1,:) = b_accel(1,:) * rmassx(:) + b_accel(2,:) = b_accel(2,:) * rmassy(:) + b_accel(3,:) = b_accel(3,:) * rmassz(:) else ! on GPU call kernel_3_a_cuda(Mesh_pointer,deltatover2,b_deltatover2,APPROXIMATE_OCEAN_LOAD,3) ! 3 == backward @@ -695,8 +695,12 @@ subroutine compute_forces_viscoelastic_backward_calling() ! updates the velocity term which requires a(t+delta) if (.not. GPU_MODE) then ! on CPU - ! adjoint simulations - b_veloc(:,:) = b_veloc(:,:) + b_deltatover2*b_accel(:,:) + if (USE_LDDRK) then + stop 'USE_LDDRK not implemented for backward viscoelastic simulations' + else + ! adjoint simulations + call update_veloc_elastic_backward() + endif else ! on GPU ! only call in case of ocean load, otherwise already done in kernel 3 a diff --git a/src/specfem3D/fault_solver_common.f90 b/src/specfem3D/fault_solver_common.f90 index bf9831634..6d9e9e94c 100644 --- a/src/specfem3D/fault_solver_common.f90 +++ b/src/specfem3D/fault_solver_common.f90 @@ -105,6 +105,11 @@ module fault_solver_common real(kind=CUSTOM_REAL) :: kin_dt integer :: kin_it real(kind=CUSTOM_REAL), dimension(:,:), pointer :: v_kin_t1,v_kin_t2 + + ! LTS + logical,dimension(:),pointer :: fault_use_p_level => null() + integer,dimension(:,:),pointer :: fault_ibool_local_p => null() + integer,dimension(:),pointer :: fault_num_p_local => null() end type bc_dynandkinflt_type ! fault array @@ -126,17 +131,23 @@ module fault_solver_common !--------------------------------------------------------------------- - subroutine initialize_fault (bc,IIN_BIN) + subroutine initialize_fault(bc,IIN_BIN) + + use constants, only: PARALLEL_FAULT,NDIM,NGLLSQUARE,myrank - use constants, only: PARALLEL_FAULT,NDIM,NGLLSQUARE + use shared_parameters, only: NPROC,DT,LTS_MODE - use specfem_par, only: NPROC,DT,NGLOB_AB, & + use specfem_par, only: NGLOB_AB, & num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, & nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, & my_neighbors_ext_mesh use specfem_par_elastic, only: rmassx + ! LTS + use specfem_par_lts, only: iglob_p_refine,num_p_level,p_level,p_lookup + use specfem_par, only: xstore,ystore,zstore + implicit none !! DK DK use type(bc_dynandkinflt_type) instead of class(fault_type) for compatibility with some current compilers type(bc_dynandkinflt_type), intent(inout) :: bc @@ -146,12 +157,34 @@ subroutine initialize_fault (bc,IIN_BIN) real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: jacobian2Dw real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: normal real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: nxyz + real(kind=CUSTOM_REAL) :: deltat integer, dimension(:,:), allocatable :: ibool1 integer :: ij,k,e,ier + ! LTS + integer :: i,p,ilevel + integer :: p_min,p_max + integer :: p_min_glob,p_max_glob + + ! explicit initialization + bc%nspec = 0 + bc%nglob = 0 ! number of elements and global points on fault read(IIN_BIN) bc%nspec,bc%nglob + ! makes sure nglob value is also zero in case no fault elements are in this partition + if (bc%nspec <= 0) then + ! checks + if (bc%nglob > 0) print *,'Warning: rank',myrank,'has no fault elements but a non-zero nglob:',bc%nspec,bc%nglob + ! re-sets nspec/nglob + bc%nspec = 0 + bc%nglob = 0 + endif + + ! time step on fault + ! sets simulation time step size + bc%dt = real(DT,kind=CUSTOM_REAL) + if (bc%nspec > 0) then ! checks nglob if (bc%nglob < 1) then @@ -221,9 +254,6 @@ subroutine initialize_fault (bc,IIN_BIN) ! done reading faults_db.bin file, no more records in read(IIN_BIN).. from here on - ! sets simulation time step size - bc%dt = real(DT,kind=CUSTOM_REAL) - ! normal vector allocate(nxyz(NDIM,bc%nglob),stat=ier) if (ier /= 0) call exit_MPI_without_rank('error allocating array 2170') @@ -234,6 +264,7 @@ subroutine initialize_fault (bc,IIN_BIN) do e = 1,bc%nspec do ij = 1,NGLLSQUARE k = ibool1(ij,e) + if (k < 1 .or. k > bc%nglob) call exit_MPI(myrank,'Error: fault ibool1 value out of bounds') nxyz(:,k) = nxyz(:,k) + normal(:,ij,e) bc%B(k) = bc%B(k) + jacobian2Dw(ij,e) enddo @@ -250,6 +281,12 @@ subroutine initialize_fault (bc,IIN_BIN) bc%R(NDIM,NDIM,1)) endif + ! checks fault iglob values + do i = 1,bc%nglob + if (bc%ibulk1(i) < 1 .or. bc%ibulk1(i) > NGLOB_AB) call exit_MPI(myrank,'Error ibulk1 has invalid iglob value') + if (bc%ibulk2(i) < 1 .or. bc%ibulk2(i) > NGLOB_AB) call exit_MPI(myrank,'Error ibulk2 has invalid iglob value') + enddo + ! fault parallelization across multiple MPI processes if (PARALLEL_FAULT) then ! assembles fault boundary matrix B @@ -277,6 +314,74 @@ subroutine initialize_fault (bc,IIN_BIN) if (bc%nspec > 0) nxyz(:,:) = tmp_vec(:,bc%ibulk1(:)) endif + ! LTS + if (LTS_MODE) then + ! checks + if (size(p_level(:)) /= num_p_level) call exit_MPI(myrank,'Error num_p_level in initialize_fault') + + ! uses flags to determine whether there are p-level nodes on fault surfaces + allocate(bc%fault_use_p_level(num_p_level), & + bc%fault_num_p_local(num_p_level), & + bc%fault_ibool_local_p(bc%nglob,num_p_level),stat=ier) + if (ier /= 0) stop 'Error allocating arrays fault_ibool_local_p,..' + + bc%fault_use_p_level(:) = .false. + bc%fault_num_p_local(:) = 0 + bc%fault_ibool_local_p(:,:) = 0 + + p_min = 1000000000 + p_max = 0 + do i = 1,bc%nglob + ! p-value on 1. split node + p = iglob_p_refine(bc%ibulk1(i)) + + ! opposite split nodes must lie in same p-level region + ! LTS check + if (p /= iglob_p_refine(bc%ibulk2(i))) then + print *,'Error fault p-values: rank ',myrank + print *,' fault ibulk1: iglob = ',bc%ibulk1(i),'p = ',iglob_p_refine(bc%ibulk1(i)) + print *,' x/y/z: ',xstore(bc%ibulk1(i)),ystore(bc%ibulk1(i)),zstore(bc%ibulk1(i)) + print *,' fault ibulk2: iglob = ',bc%ibulk2(i),'p = ',iglob_p_refine(bc%ibulk2(i)) + print *,' x/y/z: ',xstore(bc%ibulk2(i)),ystore(bc%ibulk2(i)),zstore(bc%ibulk2(i)) + print *,'fault split nodes MUST belong to the same p-level, please check your mesh...' + call exit_MPI(myrank,'error fault p-values on split nodes') + endif + ! statistics + if (p < p_min) p_min = p + if (p > p_max) p_max = p + + ilevel = p_lookup(p) + + ! sets flag if fault has p-nodes + bc%fault_use_p_level(ilevel) = .true. + + ! counts number of p-nodes + bc%fault_num_p_local(ilevel) = bc%fault_num_p_local(ilevel) + 1 + + ! sets up local array with p-nodes + bc%fault_ibool_local_p(bc%fault_num_p_local(ilevel),ilevel) = i + enddo + + ! gather info + call min_all_all_i(p_min,p_min_glob) + call max_all_all_i(p_max,p_max_glob) + p_min = p_min_glob + p_max = p_max_glob + + ! LTS sets fault time step to minimum LTS time step for fault nodes + if (p_max_glob <= 0) then + call exit_MPI(myrank,'Error p_max_glob invalid') + endif + + ! user output + if (myrank == 0) then + write(IMAIN,*) " LTS fault p-values min/max :",p_min_glob,p_max_glob + write(IMAIN,*) " LTS minimum local time step on fault:",bc%dt / p_max_glob + call flush_IMAIN() + endif + call synchronize_all() + endif ! LTS_MODE + if (bc%nspec > 0) then ! normalizes fault normal vector call normalize_3d_vector(nxyz) @@ -284,7 +389,7 @@ subroutine initialize_fault (bc,IIN_BIN) call compute_R(bc%R,bc%nglob,nxyz) ! inverse mass matrix - !SURENDRA : WARNING! Assuming rmassx=rmassy=rmassz + ! WARNING: Assuming rmassx=rmassy=rmassz ! Needed in dA_Free = -K2*d2/M2 + K1*d1/M1 bc%invM1(:) = rmassx(bc%ibulk1(:)) bc%invM2(:) = rmassx(bc%ibulk2(:)) @@ -294,8 +399,19 @@ subroutine initialize_fault (bc,IIN_BIN) ! Z = 1/( B1/M1 + B2/M2 ) / (0.5*dt) ! T_stick = Z*Vfree traction as if the fault was stuck (no displ discontinuity) ! NOTE: same Bi on both sides, see note above - - bc%Z(:) = 1.0_CUSTOM_REAL/(0.5_CUSTOM_REAL * bc%dt * bc%B(:) *( bc%invM1(:) + bc%invM2(:) )) + if (LTS_MODE) then + do i = 1,bc%nglob + ! p-value on 1. split node (should be the same as on opposite node) + p = iglob_p_refine(bc%ibulk1(i)) + if (p <= 0) call exit_MPI(myrank,'Error p-value invalid in fault impedance setup') + ! local time step + deltat = bc%dt / p + bc%Z(i) = 1.0_CUSTOM_REAL/(0.5_CUSTOM_REAL * deltat * bc%B(i) *( bc%invM1(i) + bc%invM2(i) )) + enddo + else + deltat = bc%dt + bc%Z(:) = 1.0_CUSTOM_REAL/(0.5_CUSTOM_REAL * deltat * bc%B(:) *( bc%invM1(:) + bc%invM2(:) )) + endif ! WARNING: In non-split nodes at fault edges M is assembled across the fault. ! hence invM1+invM2=2/(M1+M2) instead of 1/M1+1/M2 @@ -408,9 +524,9 @@ function get_weighted_jump (bc,f) result(da) real(kind=CUSTOM_REAL) :: da(3,bc%nglob) ! difference between side 2 and side 1 of fault nodes. M-1 * F - da(1,:) = bc%invM2(:)*f(1,bc%ibulk2(:)) - bc%invM1(:)*f(1,bc%ibulk1(:)) - da(2,:) = bc%invM2(:)*f(2,bc%ibulk2(:)) - bc%invM1(:)*f(2,bc%ibulk1(:)) - da(3,:) = bc%invM2(:)*f(3,bc%ibulk2(:)) - bc%invM1(:)*f(3,bc%ibulk1(:)) + da(1,:) = bc%invM2(:) * f(1,bc%ibulk2(:)) - bc%invM1(:) * f(1,bc%ibulk1(:)) + da(2,:) = bc%invM2(:) * f(2,bc%ibulk2(:)) - bc%invM1(:) * f(2,bc%ibulk1(:)) + da(3,:) = bc%invM2(:) * f(3,bc%ibulk2(:)) - bc%invM1(:) * f(3,bc%ibulk1(:)) ! NOTE: In non-split nodes at fault edges M and f are assembled across the fault. ! Hence, f1=f2, invM1=invM2=1/(M1+M2) instead of invMi=1/Mi, and da=0. @@ -455,16 +571,42 @@ subroutine add_BT(bc,MxA,T) real(kind=CUSTOM_REAL), intent(inout) :: MxA(:,:) real(kind=CUSTOM_REAL), dimension(3,bc%nglob), intent(in) :: T - MxA(1,bc%ibulk1(:)) = MxA(1,bc%ibulk1(:)) + bc%B(:)*T(1,:) - MxA(2,bc%ibulk1(:)) = MxA(2,bc%ibulk1(:)) + bc%B(:)*T(2,:) - MxA(3,bc%ibulk1(:)) = MxA(3,bc%ibulk1(:)) + bc%B(:)*T(3,:) + MxA(1,bc%ibulk1(:)) = MxA(1,bc%ibulk1(:)) + bc%B(:) * T(1,:) + MxA(2,bc%ibulk1(:)) = MxA(2,bc%ibulk1(:)) + bc%B(:) * T(2,:) + MxA(3,bc%ibulk1(:)) = MxA(3,bc%ibulk1(:)) + bc%B(:) * T(3,:) - MxA(1,bc%ibulk2(:)) = MxA(1,bc%ibulk2(:)) - bc%B(:)*T(1,:) - MxA(2,bc%ibulk2(:)) = MxA(2,bc%ibulk2(:)) - bc%B(:)*T(2,:) - MxA(3,bc%ibulk2(:)) = MxA(3,bc%ibulk2(:)) - bc%B(:)*T(3,:) + MxA(1,bc%ibulk2(:)) = MxA(1,bc%ibulk2(:)) - bc%B(:) * T(1,:) + MxA(2,bc%ibulk2(:)) = MxA(2,bc%ibulk2(:)) - bc%B(:) * T(2,:) + MxA(3,bc%ibulk2(:)) = MxA(3,bc%ibulk2(:)) - bc%B(:) * T(3,:) end subroutine add_BT +!---------------------------------------------------------------------- + + subroutine add_BT_single(bc,MxA,T,i) + +! LTS version for a single fault point + + implicit none + type(bc_dynandkinflt_type), intent(in) :: bc + real(kind=CUSTOM_REAL), intent(inout) :: MxA(:,:) + real(kind=CUSTOM_REAL), dimension(3,bc%nglob), intent(in) :: T + integer, intent(in) :: i + + ! local parameters + integer :: iglob1,iglob2 + + iglob1 = bc%ibulk1(i) + MxA(1,iglob1) = MxA(1,iglob1) + bc%B(i) * T(1,i) + MxA(2,iglob1) = MxA(2,iglob1) + bc%B(i) * T(2,i) + MxA(3,iglob1) = MxA(3,iglob1) + bc%B(i) * T(3,i) + + iglob2 = bc%ibulk2(i) + MxA(1,iglob2) = MxA(1,iglob2) - bc%B(i) * T(1,i) + MxA(2,iglob2) = MxA(2,iglob2) - bc%B(i) * T(2,i) + MxA(3,iglob2) = MxA(3,iglob2) - bc%B(i) * T(3,i) + +end subroutine add_BT_single !=============================================================== ! dataT outputs @@ -487,17 +629,18 @@ subroutine init_dataT(dataT,coord,nglob,NT,DT,ndat,iflt) integer, dimension(:,:), allocatable :: iglob_all integer, dimension(:), allocatable :: iproc,iglob_tmp,glob_indx real(kind=CUSTOM_REAL) :: xtarget,ytarget,ztarget,dist,distkeep - integer :: i, iglob , IIN, ier, jflt, np, k + integer :: i, iglob, ier, jflt, np, k character(len=MAX_STRING_LEN) :: tmpname character(len=MAX_STRING_LEN), dimension(:), allocatable :: name_tmp integer :: ipoin, ipoin_local, npoin_local + ! WARNING: not safe, should check that unit is not aleady opened + integer, parameter :: IIN_FAULT_STATIONS = 251 + ! 1. read fault output coordinates from user file, ! 2. define iglob: the fault global index of the node nearest to user ! requested coordinate - IIN = 251 ! WARNING: not safe, should check that unit is not aleady opened - ! initializes dataT dataT%ndat = ndat dataT%nt = NT @@ -505,7 +648,8 @@ subroutine init_dataT(dataT,coord,nglob,NT,DT,ndat,iflt) dataT%element_size = 0.0 ! count the number of output points on the current fault (#iflt) - open(IIN,file=IN_DATA_FILES(1:len_trim(IN_DATA_FILES))//'FAULT_STATIONS',status='old',action='read',iostat=ier) + open(IIN_FAULT_STATIONS,file=IN_DATA_FILES(1:len_trim(IN_DATA_FILES))//'FAULT_STATIONS', & + status='old',action='read',iostat=ier) if (ier /= 0) then print *,'Error opening file ',IN_DATA_FILES(1:len_trim(IN_DATA_FILES))//'FAULT_STATIONS' if (myrank == 0) write(IMAIN,*) 'Fatal error opening FAULT_STATIONS file. Abort.' @@ -513,18 +657,18 @@ subroutine init_dataT(dataT,coord,nglob,NT,DT,ndat,iflt) endif ! number of points - read(IIN,*) np + read(IIN_FAULT_STATIONS,*) np ! counts fault points on specified fault dataT%npoin = 0 do i = 1,np ! format : #x #y #z #name #fault_id ! example: -4500.0 0.0 0.0 faultst-045dp000 1 - read(IIN,*) xtarget,ytarget,ztarget,tmpname,jflt + read(IIN_FAULT_STATIONS,*) xtarget,ytarget,ztarget,tmpname,jflt ! only points on this fault if (jflt == iflt) dataT%npoin = dataT%npoin + 1 enddo - close(IIN) + close(IIN_FAULT_STATIONS) ! allocates fault point arrays if (dataT%npoin > 0) then @@ -552,26 +696,29 @@ subroutine init_dataT(dataT,coord,nglob,NT,DT,ndat,iflt) if (dataT%npoin == 0) return ! opens in fault stations - open(IIN,file=IN_DATA_FILES(1:len_trim(IN_DATA_FILES))//'FAULT_STATIONS',status='old',action='read',iostat=ier) + open(IIN_FAULT_STATIONS,file=IN_DATA_FILES(1:len_trim(IN_DATA_FILES))//'FAULT_STATIONS', & + status='old',action='read',iostat=ier) if (ier /= 0) then print *,'Error opening file ',IN_DATA_FILES(1:len_trim(IN_DATA_FILES))//'FAULT_STATIONS' stop 'Error opening file FAULT_STATIONS' endif ! number of points - read(IIN,*) np + read(IIN_FAULT_STATIONS,*) np ! reads in fault point positions k = 0 do i = 1,np ! format : #x #y #z #name #fault_id ! example: -4500.0 0.0 0.0 faultst-045dp000 1 - read(IIN,*) xtarget,ytarget,ztarget,tmpname,jflt + read(IIN_FAULT_STATIONS,*) xtarget,ytarget,ztarget,tmpname,jflt ! only points on this fault if (jflt /= iflt) cycle k = k+1 + if (k < 1 .or. k > dataT%npoin) stop 'Error bounds in init_dataT' + dataT%name(k) = tmpname ! search nearest node @@ -588,7 +735,7 @@ subroutine init_dataT(dataT,coord,nglob,NT,DT,ndat,iflt) enddo dist_loc(k) = distkeep enddo - close(IIN) + close(IIN_FAULT_STATIONS) if (PARALLEL_FAULT) then ! For each output point, find the processor that contains the nearest node @@ -614,7 +761,7 @@ subroutine init_dataT(dataT,coord,nglob,NT,DT,ndat,iflt) call bcast_all_i(dataT%iglob,dataT%npoin) ! Number of output points contained in the current processor - npoin_local = count( iproc == myrank ) + npoin_local = count( iproc(:) == myrank ) if (npoin_local > 0) then ! Make a list of output points contained in the current processor @@ -736,6 +883,9 @@ subroutine SCEC_write_dataT(dataT) integer, dimension(8) :: time_values integer, parameter :: IOUT_SC = 121 !WARNING: not very robust. Could instead look for an available ID + ! checks if anything to do + if (dataT%npoin == 0) return + call date_and_time(VALUES=time_values) ! element size in m @@ -782,6 +932,33 @@ subroutine SCEC_write_dataT(dataT) end subroutine SCEC_write_dataT +!------------------------------------------------------------------------ + + subroutine free_fault_memory(bc) + + ! LTS + use specfem_par, only: LTS_MODE + + implicit none + type(bc_dynandkinflt_type), intent(inout) :: bc + + if (bc%nspec > 0) then + deallocate(bc%ibulk1, & + bc%ibulk2, & + bc%R, & + bc%invM1, & + bc%invM2, & + bc%B, & + bc%Z) + endif + deallocate(bc%coord) + + if (LTS_MODE) then + deallocate(bc%fault_use_p_level,bc%fault_num_p_local,bc%fault_ibool_local_p) + endif + + end subroutine free_fault_memory + !--------------------------------------------------------------------- subroutine fault_check_mesh_resolution() @@ -908,9 +1085,14 @@ subroutine fault_check_mesh_resolution() if (myrank == 0) then ! average distance between GLL points within this element avg_distance = elemsize_max_glob / ( NGLLX - 1) ! since NGLLX = NGLLY = NGLLZ + ! rough estimate of time step dt_suggested = COURANT_SUGGESTED * avg_distance / vpmax_glob + ! cut at a significant number of digits (2 digits) + ! example: 0.0734815 -> lpow = (2 - (-1) = 3 -> 0.0730 + call get_timestep_limit_significant_digit(dt_suggested) + ! critical time step estimation ! see: utils/critical_timestep.m NGLL = max(NGLLX,NGLLY,NGLLZ) diff --git a/src/specfem3D/fault_solver_dynamic.f90 b/src/specfem3D/fault_solver_dynamic.f90 index 79e10d3a5..1e5c8c7ad 100644 --- a/src/specfem3D/fault_solver_dynamic.f90 +++ b/src/specfem3D/fault_solver_dynamic.f90 @@ -78,12 +78,11 @@ module fault_solver_dynamic integer, save :: NT_RECORD_LENGTH = 500 ! default 500, i.e., every 500 time step synchronizes dataT%dat records. ! will adapt in case needed to NSNAP number of time steps for snapshots - public :: BC_DYNFLT_init, BC_DYNFLT_set3d_all, & + public :: BC_DYNFLT_init, BC_DYNFLT_free, BC_DYNFLT_set3d_all, & SIMULATION_TYPE_DYN, NT_RECORD_LENGTH, & fault_transfer_data_GPU, fault_rsf_swf_init_GPU, fault_output_synchronize_GPU, & fault_check_mesh_resolution - contains !===================================================================== @@ -94,7 +93,7 @@ module fault_solver_dynamic subroutine BC_DYNFLT_init(prname) use specfem_par, only: nt => NSTEP,DTglobal => DT - use constants, only: IMAIN,myrank,IIN_PAR,IIN_BIN + use constants, only: IMAIN,myrank,IIN_PAR,IIN_BIN,PARALLEL_FAULT implicit none @@ -168,6 +167,12 @@ subroutine BC_DYNFLT_init(prname) if (myrank == 0) then write(IMAIN,*) ' incorporating dynamic rupture simulation' write(IMAIN,*) ' found ', nbfaults, ' fault(s) in file DATA/Par_file_faults' + if (PARALLEL_FAULT) then + write(IMAIN,*) " using parallel partitions" + else + write(IMAIN,*) " using single partition" + endif + call flush_IMAIN() endif ! sets dynamic rupture flag @@ -183,6 +188,13 @@ subroutine BC_DYNFLT_init(prname) read(IIN_PAR,*) V_HEALING read(IIN_PAR,*) V_RUPT + ! user output + if (myrank == 0) then + write(IMAIN,*) " number of iterations per output : ",NTOUT + write(IMAIN,*) " number of iterations per snapshot: ",NSNAP + call flush_IMAIN() + endif + ! from binary fault file read(IIN_BIN) nbfaults_bin ! should be the same as in IIN_PAR @@ -222,6 +234,12 @@ subroutine BC_DYNFLT_init(prname) close(IIN_BIN) close(IIN_PAR) + ! user output + if (myrank == 0) then + write(IMAIN,*) " reading in Kelvin_voigt_eta arrays" + call flush_IMAIN() + endif + ! reads Kelvin-Voigt parameters filename = prname(1:len_trim(prname))//'Kelvin_voigt_eta.bin' open(unit=IIN_BIN,file=trim(filename),status='old',action='read',form='unformatted',iostat=ier) @@ -399,14 +417,29 @@ subroutine init_one_fault(bc,IIN_BIN,IIN_PAR,dt_real,NT,iflt,myrank) real(kind=CUSTOM_REAL) :: S1,S2,S3,Sigma(6) integer :: n1,n2,n3,ier,recordlength + integer :: nspec_all,nglob_all logical :: LOAD_STRESSDROP = .false. NAMELIST / INIT_STRESS / S1,S2,S3,n1,n2,n3 NAMELIST /STRESS_TENSOR / Sigma + ! user output + if (myrank == 0) then + write(IMAIN,*) " initializing fault",iflt,":" + call flush_IMAIN() + endif + ! reads in fault_db binary file and initializes fault arrays call initialize_fault(bc,IIN_BIN) + ! user output + call sum_all_i(bc%nspec,nspec_all) + call sum_all_i(bc%nglob,nglob_all) + if (myrank == 0) then + write(IMAIN,*) " total number of elements = ",nspec_all," global points = ",nglob_all + call flush_IMAIN() + endif + ! sets up initial fault state if (bc%nspec > 0) then allocate(bc%T(3,bc%nglob),stat=ier) @@ -528,6 +561,12 @@ subroutine init_one_fault(bc,IIN_BIN,IIN_PAR,dt_real,NT,iflt,myrank) call init_dataXZ(bc%dataXZ,bc) + ! user output + if (myrank == 0) then + write(IMAIN,*) " writing out fault snapshot: at t=0" + call flush_IMAIN() + endif + ! output a fault snapshot at t=0 if (PARALLEL_FAULT) then call gather_dataXZ(bc) @@ -894,7 +933,10 @@ end function heaviside ! NOTE: On non-split nodes at fault edges, dD=dV=dA=0 ! and the net contribution of B*T is =0 ! - subroutine bc_dynflt_set3d_all(F,V,D) + subroutine bc_dynflt_set3d_all(F,V,D,p_value,ilevel) + + ! LTS + use specfem_par, only: LTS_MODE implicit none @@ -905,6 +947,8 @@ subroutine bc_dynflt_set3d_all(F,V,D) real(kind=CUSTOM_REAL), dimension(:,:), intent(in) :: V,D real(kind=CUSTOM_REAL), dimension(:,:), intent(inout) :: F + ! LTS + integer,intent(in),optional :: p_value,ilevel ! local parameters integer :: iflt @@ -916,7 +960,11 @@ subroutine bc_dynflt_set3d_all(F,V,D) do iflt = 1,Nfaults ! note: this routine should be called by all processes, regardless if they contain no fault elements, ! for managing MPI calls and file outputs - call BC_DYNFLT_set3d(faults(iflt),F,V,D,iflt) + if (LTS_MODE) then + call BC_DYNFLT_set3d_lts(faults(iflt),F,V,D,iflt,p_value,ilevel) + else + call BC_DYNFLT_set3d(faults(iflt),F,V,D,iflt) + endif enddo end subroutine bc_dynflt_set3d_all @@ -944,8 +992,9 @@ subroutine BC_DYNFLT_set3d(bc,MxA,V,D,iflt) real(kind=CUSTOM_REAL), dimension(bc%nglob) :: Tstick,Tnew real(kind=CUSTOM_REAL), dimension(bc%nglob) :: strength, theta_old, theta_new, dc real(kind=CUSTOM_REAL), dimension(bc%nglob) :: Vf_old, Vf_new, TxExt, tmp_Vf - real(kind=CUSTOM_REAL) :: half_dt,TLoad,DTau0,GLoad,timeval - integer :: i,ipoin,iglob + real(kind=CUSTOM_REAL) :: TLoad,DTau0,GLoad + real(kind=CUSTOM_REAL) :: deltat,half_dt,timeval + integer :: i,ipoin,iglob,ipass real(kind=CUSTOM_REAL) :: nuc_x, nuc_y, nuc_z, nuc_r, nuc_t0, nuc_v, dist, tw_r, coh_size ! note: this implementation follows the description in: @@ -965,10 +1014,18 @@ subroutine BC_DYNFLT_set3d(bc,MxA,V,D,iflt) ! fault opening, fault healing, time-weakening, smooth loading, ! and TPV16 benchmark specific friction handling + ! global time step + deltat = bc%dt + + ! half time step + half_dt = 0.5_CUSTOM_REAL * deltat + + ! time will never be zero. it starts from 1 + timeval = it * deltat + ! for parallel faults if (bc%nspec > 0) then - half_dt = 0.5_CUSTOM_REAL * bc%dt Vf_old(:) = sqrt(bc%V(1,:)*bc%V(1,:) + bc%V(2,:)*bc%V(2,:)) ! get predicted values @@ -998,7 +1055,9 @@ subroutine BC_DYNFLT_set3d(bc,MxA,V,D,iflt) ! Solve for normal stress (negative is compressive) ! Opening implies free stress - if (bc%allow_opening) T(3,:) = min(T(3,:),0.0_CUSTOM_REAL) + if (bc%allow_opening) then + T(3,:) = min(T(3,:),0.0_CUSTOM_REAL) + endif ! smooth loading within nucleation patch !WARNING : ad hoc for SCEC benchmark TPV10x @@ -1008,9 +1067,6 @@ subroutine BC_DYNFLT_set3d(bc,MxA,V,D,iflt) TLoad = 1.0_CUSTOM_REAL ! T_ini DTau0 = 1.0_CUSTOM_REAL ! \Delta \tau_0 - ! time will never be zero. it starts from 1 - timeval = it*bc%dt - ! function G(t) ! see Kaneko (2008), equation (B3): ! if 0 < t < T_ini @@ -1046,7 +1102,6 @@ subroutine BC_DYNFLT_set3d(bc,MxA,V,D,iflt) ! combined with time-weakening for nucleation if (TWF) then - timeval = it*bc%dt nuc_x = bc%twf%nuc_x nuc_y = bc%twf%nuc_y nuc_z = bc%twf%nuc_z @@ -1070,12 +1125,11 @@ subroutine BC_DYNFLT_set3d(bc,MxA,V,D,iflt) ! TPV16 benchmark if (TPV16) then ! fixes friction coefficient to be dynamic friction coefficient when rupture time is over - where (bc%swf%T(:) <= it*bc%dt) bc%mu(:) = bc%swf%mud(:) + where (bc%swf%T(:) <= timeval) bc%mu(:) = bc%swf%mud(:) endif ! updates fault strength strength(:) = -bc%mu(:) * min(T(3,:),0.0_CUSTOM_REAL) + bc%swf%C(:) - ! solves for shear stress Tnew(:) = min(Tstick(:),strength(:)) @@ -1087,25 +1141,31 @@ subroutine BC_DYNFLT_set3d(bc,MxA,V,D,iflt) ! see: Kaneko (2008), explanation of steps 4 and 6 in section 2.3, ! and compare against the alternative first-order expansion given by equation (24) - ! first pass - theta_old(:) = bc%rsf%theta(:) - call rsf_update_state(Vf_old,bc%dt,bc%rsf) - - do i = 1,bc%nglob - Vf_new(i) = rtsafe(0.0_CUSTOM_REAL,Vf_old(i)+5.0_CUSTOM_REAL,1e-5_CUSTOM_REAL,Tstick(i),-T(3,i),bc%Z(i),bc%rsf%f0(i), & - bc%rsf%V0(i),bc%rsf%a(i),bc%rsf%b(i),bc%rsf%L(i),bc%rsf%theta(i),bc%rsf%StateLaw) - enddo - - ! second pass - bc%rsf%theta(:) = theta_old(:) - tmp_Vf(:) = 0.5_CUSTOM_REAL*(Vf_old(:) + Vf_new(:)) - call rsf_update_state(tmp_Vf,bc%dt,bc%rsf) - - do i = 1,bc%nglob - Vf_new(i) = rtsafe(0.0_CUSTOM_REAL,Vf_old(i)+5.0_CUSTOM_REAL,1e-5_CUSTOM_REAL,Tstick(i),-T(3,i),bc%Z(i),bc%rsf%f0(i), & - bc%rsf%V0(i),bc%rsf%a(i),bc%rsf%b(i),bc%rsf%L(i),bc%rsf%theta(i),bc%rsf%StateLaw) - enddo + do ipass = 1,2 + ! updates state + select case(ipass) + case (1) + ! first pass + theta_old(:) = bc%rsf%theta(:) + call rsf_update_state(Vf_old,deltat,bc%rsf) + case (2) + ! second pass + bc%rsf%theta(:) = theta_old(:) + tmp_Vf(:) = 0.5_CUSTOM_REAL*(Vf_old(:) + Vf_new(:)) + call rsf_update_state(tmp_Vf,deltat,bc%rsf) + case default + stop 'Error invalid pass in rate and state friction update' + end select + + ! updates Vf_new + do i = 1,bc%nglob + Vf_new(i) = rtsafe(0.0_CUSTOM_REAL,Vf_old(i)+5.0_CUSTOM_REAL,1e-5_CUSTOM_REAL, & + Tstick(i),-T(3,i),bc%Z(i),bc%rsf%f0(i), & + bc%rsf%V0(i),bc%rsf%a(i),bc%rsf%b(i),bc%rsf%L(i),bc%rsf%theta(i),bc%rsf%StateLaw) + enddo + enddo ! two passes + ! updates new traction Tnew(:) = Tstick(:) - bc%Z(:) * Vf_new(:) endif @@ -1120,7 +1180,9 @@ subroutine BC_DYNFLT_set3d(bc,MxA,V,D,iflt) ! Subtract initial stress (to have relative stress on the fault) T(:,:) = T(:,:) - bc%T0(:,:) - if (RATE_AND_STATE) T(1,:) = T(1,:) - TxExt(:) + if (RATE_AND_STATE) then + T(1,:) = T(1,:) - TxExt(:) + endif !JPA: this eliminates the effect of TxExt on the equations of motion. Why is it needed? ! Update slip acceleration da=da_free-T/(0.5*dt*Z) @@ -1130,7 +1192,7 @@ subroutine BC_DYNFLT_set3d(bc,MxA,V,D,iflt) ! Update slip and slip rate, in fault frame bc%D(:,:) = dD(:,:) - bc%V(:,:) = dV(:,:) + half_dt*dA(:,:) + bc%V(:,:) = dV(:,:) + half_dt * dA(:,:) ! Rotate tractions back to (x,y,z) frame T(:,:) = rotate(bc,T,-1) @@ -1149,22 +1211,572 @@ subroutine BC_DYNFLT_set3d(bc,MxA,V,D,iflt) endif call store_dataXZ(bc%dataXZ, strength, theta_old, theta_new, dc, & - Vf_old, Vf_new, it*bc%dt,bc%dt) + Vf_old, Vf_new, timeval, deltat) call store_dataT(bc%dataT,bc%D,bc%V,bc%T,it) if (RATE_AND_STATE) then ! adds storage of state - do ipoin = 1,bc%dataT%npoin - iglob = bc%dataT%iglob(ipoin) - if (bc%rsf%StateLaw == 1) then - ! ageing law + if (bc%rsf%StateLaw == 1) then + ! ageing law + do ipoin = 1,bc%dataT%npoin + iglob = bc%dataT%iglob(ipoin) bc%dataT%dat(8,ipoin,it) = log10(theta_new(iglob)) - else - ! slip law + enddo + else + ! slip law + do ipoin = 1,bc%dataT%npoin + iglob = bc%dataT%iglob(ipoin) bc%dataT%dat(8,ipoin,it) = theta_new(iglob) + enddo + endif + endif + + !-- outputs -- + ! write dataT every NTOUT time step or at the end of simulation + if (mod(it,NTOUT) == 0 .or. it == NSTEP) call SCEC_write_dataT(bc%dataT) + endif + + ! note: this stage of the routine must be reached by all processes, + ! otherwise the MPI gather calls won't succeed and the run gets stuck. + + ! write dataXZ every NSNAP time step + if (mod(it,NSNAP) == 0) then + if (PARALLEL_FAULT) then + ! collects data from all processes + call gather_dataXZ(bc) + ! main process writes output file + if (myrank == 0) call write_dataXZ(bc%dataXZ_all,it,iflt) + else + ! fault in single slice + if (bc%nspec > 0) call write_dataXZ(bc%dataXZ,it,iflt) + endif + endif + + ! final output + if (it == NSTEP) then + if (.not. PARALLEL_FAULT) then + call SCEC_Write_RuptureTime(bc%dataXZ,iflt) + else + if (myrank == 0) call SCEC_Write_RuptureTime(bc%dataXZ_all,iflt) + endif + endif + + end subroutine BC_DYNFLT_set3d + +!--------------------------------------------------------------------- + + subroutine BC_DYNFLT_set3d_lts(bc,MxA,V,D,iflt,p_value,ilevel) + +! LTS version of BC_DYNFLT_set3d to calculate only for fault nodes on a given p-level + + use constants, only: PARALLEL_FAULT + use specfem_par, only: it,NSTEP,myrank + + ! LTS + use specfem_par, only: LTS_MODE + use specfem_par_lts, only: num_p_level,p_level,lts_current_m + + implicit none + + ! fault + type(bc_dynandkinflt_type), intent(inout) :: bc + ! force/accel + real(kind=CUSTOM_REAL), intent(inout) :: MxA(:,:) + ! velocity,displacement + real(kind=CUSTOM_REAL), intent(in) :: V(:,:),D(:,:) + ! fault id + integer, intent(in) :: iflt + ! LTS + integer,intent(in),optional :: p_value,ilevel + + ! local parameters + real(kind=CUSTOM_REAL), dimension(3,bc%nglob) :: T,dD,dV,dA + real(kind=CUSTOM_REAL), dimension(bc%nglob) :: Tstick,Tnew + real(kind=CUSTOM_REAL), dimension(bc%nglob) :: strength, theta_old, theta_new, dc + real(kind=CUSTOM_REAL), dimension(bc%nglob) :: Vf_old, Vf_new, TxExt, tmp_Vf + real(kind=CUSTOM_REAL) :: TLoad,DTau0,GLoad + real(kind=CUSTOM_REAL) :: deltat,half_dt,timeval + integer :: i,ipoin,iglob,ipass + real(kind=CUSTOM_REAL) :: nuc_x, nuc_y, nuc_z, nuc_r, nuc_t0, nuc_v, dist, tw_r, coh_size + ! LTS + integer :: inum,num_p_local,iilevel + ! only update p-level nodes (when called for each ilevel separately) set this to .true., otherwise + ! update all nodes (when called once on for last ilevel) + logical, parameter :: UPDATE_LTS_ONLY_P = .false. + +! note: this implementation follows the description in: +! - rate and state friction: +! Kaneko, Y., N. Lapusta, J.-P. Ampuero (2008) +! Spectral element modeling of spontaneous earthquake rupture on rate and state faults: Effect of +! velocity-strengthening friction at shallow depths, +! JGR, 113, B09317, doi:10.1029/2007JB005553 +! +! - slip weakening friction: +! Galvez, P., J.-P. Ampuero, L.A. Dalguer, S.N. Somala, T. Nissen-Meyer (2014) +! Dynamic earthquake rupture modelled with an unstructured 3-D spectral element method applied to +! the 2011 M9 Tohoku earthquake, +! Geophys. J. Int., 198, 1222-1240. +! +! More features have been added, including: +! fault opening, fault healing, time-weakening, smooth loading, +! and TPV16 benchmark specific friction handling + + ! safety check + if (.not. LTS_MODE) return + + ! LTS checks if fault has any p-nodes to compute + ! fault flag: only compute fault elements if fault elements in this p-level + if (UPDATE_LTS_ONLY_P) then + if (.not. bc%fault_use_p_level(ilevel)) return + endif + + ! local time step + deltat = bc%dt / p_value + + ! half time step + half_dt = 0.5_CUSTOM_REAL * deltat + + ! time will never be zero. it starts from 1 + ! calculate time for this level + timeval = it * bc%dt ! coarse level + ! adds fine level increments + do iilevel = ilevel,(num_p_level-1) + timeval = timeval + dble(lts_current_m(iilevel)-1) * bc%dt/dble(p_level(iilevel)) + enddo + + ! for parallel faults + if (bc%nspec > 0) then + + ! sets up local p-nodes on fault surface + num_p_local = bc%fault_num_p_local(ilevel) + + ! LTS: note that T,dD,.. are all temporary arrays which we don't have to split into p-values necessarily + ! however, computing them on p-nodes only would make it faster probably + ! for now, we only take care to update arrays stored by fault arrays bc%.. on p-nodes only + ! LTS - todo optimization + !if (USE_OPTIMIZATION) then + ! ! LTS on global p-nodes only + ! do inum = 1,num_p_local + ! i = bc%fault_ibool_local_p(inum,ilevel) + ! Vf_old(i) = sqrt( bc%V(1,i)*bc%V(1,i) + bc%V(2,i)*bc%V(2,i) ) + ! enddo + !else + Vf_old(:) = sqrt(bc%V(1,:)*bc%V(1,:) + bc%V(2,:)*bc%V(2,:)) + + ! get predicted values + dD(:,:) = get_jump(bc,D) ! dD_predictor + dV(:,:) = get_jump(bc,V) ! dV_predictor + dA(:,:) = get_weighted_jump(bc,MxA) ! dA_free + + ! rotate to fault frame (tangent,normal) + ! component 3 is normal to the fault + dD(:,:) = rotate(bc,dD,1) + dV(:,:) = rotate(bc,dV,1) + dA(:,:) = rotate(bc,dA,1) + + ! T_stick "stick traction" + ! LTS - todo optimization + !if (USE_OPTIMIZATION) then + ! ! LTS on global p-nodes only + ! do inum = 1,num_p_local + ! i = bc%fault_ibool_local_p(inum,ilevel) + ! T(:,i) = bc%Z(i) * ( dV(:,i) + half_dt*dA(:,i) ) + ! enddo + !else + T(1,:) = bc%Z(:) * ( dV(1,:) + half_dt*dA(1,:) ) + T(2,:) = bc%Z(:) * ( dV(2,:) + half_dt*dA(2,:) ) + T(3,:) = bc%Z(:) * ( dV(3,:) + half_dt*dA(3,:) ) + + !Warning : dirty particular free surface condition z = 0. + ! where (bc%zcoord(:) > - SMALLVAL) T(2,:) = 0 + ! do k = 1,bc%nglob + ! if (abs(bc%zcoord(k)-0.e0_CUSTOM_REAL) < SMALLVAL) T(2,k) = 0.e0_CUSTOM_REAL + ! enddo + + ! add initial stress + ! LTS - todo optimization + !if (USE_OPTIMIZATION) then + ! ! LTS on global p-nodes only + ! do inum = 1,num_p_local + ! i = bc%fault_ibool_local_p(inum,ilevel) + ! T(:,i) = T(:,i) + bc%T0(:,i) + ! enddo + !else + T(:,:) = T(:,:) + bc%T0(:,:) + + ! Solve for normal stress (negative is compressive) + ! Opening implies free stress + if (bc%allow_opening) then + ! LTS - todo optimization + !if (USE_OPTIMIZATION) then + ! ! LTS on global p-nodes only + ! do inum = 1,num_p_local + ! i = bc%fault_ibool_local_p(inum,ilevel) + ! T(3,i) = min(T(3,i),0.e0_CUSTOM_REAL) + ! enddo + !else + T(3,:) = min(T(3,:),0.0_CUSTOM_REAL) + endif + + ! smooth loading within nucleation patch + !WARNING : ad hoc for SCEC benchmark TPV10x + if (RATE_AND_STATE) then + ! see: Kaneko (2008), appendix B "Rupture Initiation Procedure" + + ! LTS - todo optimization + !if (USE_OPTIMIZATION) then + ! TLoad = 1.0_CUSTOM_REAL + ! DTau0 = 1.0_CUSTOM_REAL + ! if (timeval <= TLoad) then + ! GLoad = exp( (timeval-TLoad)*(timeval-Tload) / (timeval*(timeval-2.0_CUSTOM_REAL*TLoad)) ) + ! else + ! GLoad = 1.0_CUSTOM_REAL + ! endif + ! ! LTS on p-nodes only + ! do inum = 1,num_p_local + ! i = bc%fault_ibool_local_p(inum,ilevel) + ! TxExt(i) = DTau0 * bc%Fload(i) * GLoad + ! T(1,i) = T(1,i) + TxExt(i) + ! enddo + !else + TxExt(:) = 0.0_CUSTOM_REAL + TLoad = 1.0_CUSTOM_REAL ! T_ini + DTau0 = 1.0_CUSTOM_REAL ! \Delta \tau_0 + + ! function G(t) + ! see Kaneko (2008), equation (B3): + ! if 0 < t < T_ini + ! G(t) = exp( (t - T_ini)^2 / (t^2 - 2 t T_ini) + ! else + ! G(t) = 1 + if (timeval <= TLoad) then + GLoad = exp( (timeval-TLoad)*(timeval-Tload) / (timeval*(timeval-2.0_CUSTOM_REAL*TLoad)) ) + else + GLoad = 1.0_CUSTOM_REAL + endif + ! Kaneko (2008), equation (B1): \Delta \tau = \Delta \tau_0 * F(x,z) * G(t) + ! the geometry and values of function F(x,z) have been pre-computed for a case specific nucleation patch + TxExt(:) = DTau0 * bc%Fload(:) * GLoad + + ! adds horizontal shear traction perturbation + T(1,:) = T(1,:) + TxExt(:) + endif + + ! norm of shear fault traction + ! LTS - todo optimization + !if (USE_OPTIMIZATION) then + ! ! LTS on p-nodes only + ! do inum = 1,num_p_local + ! i = bc%fault_ibool_local_p(inum,ilevel) + ! tStick(i) = sqrt( T(1,i)*T(1,i) + T(2,i)*T(2,i) ) + ! enddo + !else + Tstick(:) = sqrt( T(1,:)*T(1,:) + T(2,:)*T(2,:)) + + if (.not. RATE_AND_STATE) then + ! slip weakening friction + if (UPDATE_LTS_ONLY_P) then + ! update slip state variable + ! WARNING: during opening the friction state variable should not evolve + ! LTS on p-nodes only + do inum = 1,num_p_local + i = bc%fault_ibool_local_p(inum,ilevel) + theta_old(i) = bc%swf%theta(i) + enddo + call swf_update_state_lts(bc%D,dD,bc%V,bc%swf,p_value,bc) ! LTS version + + ! update friction coefficient (using slip weakening friction law) + ! LTS on p-nodes only + do inum = 1,num_p_local + i = bc%fault_ibool_local_p(inum,ilevel) + bc%MU(i) = swf_mu_lts(bc%swf,i) ! LTS version + enddo + else + ! update slip state variable + ! WARNING: during opening the friction state variable should not evolve + theta_old(:) = bc%swf%theta(:) + call swf_update_state(bc%D,dD,bc%V,bc%swf) + + ! update friction coefficient (using slip weakening friction law) + bc%mu(:) = swf_mu(bc%swf) + endif + + ! combined with time-weakening for nucleation + if (TWF) then + nuc_x = bc%twf%nuc_x + nuc_y = bc%twf%nuc_y + nuc_z = bc%twf%nuc_z + nuc_r = bc%twf%nuc_r + nuc_t0 = bc%twf%nuc_t0 + nuc_v = bc%twf%nuc_v + do i = 1,bc%nglob + dist = ((bc%coord(1,i)-nuc_x)**2 + (bc%coord(2,i)-nuc_y)**2 + (bc%coord(3,i)-nuc_z)**2)**0.5 + if (dist <= nuc_r) then + tw_r = timeval * nuc_v + coh_size = nuc_t0 * nuc_v + if (dist <= tw_r - coh_size) then + bc%mu(i) = min(bc%mu(i), bc%swf%mud(i)) + else if (dist > tw_r - coh_size .and. dist <= tw_r ) then + bc%mu(i) = min(bc%mu(i), bc%swf%mud(i) + (dist-(tw_r-coh_size))/coh_size*(bc%swf%mus(i)-bc%swf%mud(i))) + endif + endif + enddo + endif + + ! TPV16 benchmark + if (TPV16) then + ! fixes friction coefficient to be dynamic friction coefficient when rupture time is over + if (UPDATE_LTS_ONLY_P) then + ! LTS on p-nodes only + do inum = 1,num_p_local + i = bc%fault_ibool_local_p(inum,ilevel) + if (bc%swf%T(i) <= timeval) bc%mu(i) = bc%swf%mud(i) + enddo + else + where (bc%swf%T(:) <= timeval) bc%mu(:) = bc%swf%mud(:) + endif + endif + + ! Update strength & shear stress + if (UPDATE_LTS_ONLY_P) then + ! LTS on p-nodes only + do inum = 1,num_p_local + i = bc%fault_ibool_local_p(inum,ilevel) + ! Update strength + strength(i) = -bc%MU(i) * min(T(3,i),0.0_CUSTOM_REAL) + bc%swf%C(i) + ! solves for shear stress + Tnew(i) = min(Tstick(i),strength(i)) + enddo + else + ! updates fault strength + strength(:) = -bc%mu(:) * min(T(3,:),0.0_CUSTOM_REAL) + bc%swf%C(:) + ! solves for shear stress + Tnew(:) = min(Tstick(:),strength(:)) + endif + else + ! rate and state friction + + !JPA the solver below can be refactored into a loop with two passes + ! + ! see: Kaneko (2008), explanation of steps 4 and 6 in section 2.3, + ! and compare against the alternative first-order expansion given by equation (24) + + do ipass = 1,2 + ! updates state + select case(ipass) + case (1) + ! first pass + if (UPDATE_LTS_ONLY_P) then + ! LTS on p-nodes only + do inum = 1,num_p_local + i = bc%fault_ibool_local_p(inum,ilevel) + theta_old(i) = bc%rsf%theta(i) + call rsf_update_state_lts(Vf_old,deltat,bc%rsf,i) + enddo + else + theta_old(:) = bc%rsf%theta(:) + call rsf_update_state(Vf_old,deltat,bc%rsf) + endif + case (2) + ! second pass + if (UPDATE_LTS_ONLY_P) then + ! LTS on p-nodes only + do inum = 1,num_p_local + i = bc%fault_ibool_local_p(inum,ilevel) + bc%rsf%theta(i) = theta_old(i) + tmp_Vf(:) = 0.5_CUSTOM_REAL*(Vf_old(:) + Vf_new(:)) + call rsf_update_state_lts(tmp_Vf,deltat,bc%rsf,i) + enddo + else + bc%rsf%theta(:) = theta_old(:) + tmp_Vf(:) = 0.5_CUSTOM_REAL*(Vf_old(:) + Vf_new(:)) + call rsf_update_state(tmp_Vf,deltat,bc%rsf) + endif + case default + stop 'Error invalid pass in rate and state friction update' + end select + + ! updates Vf_new + if (UPDATE_LTS_ONLY_P) then + ! LTS on p-nodes only + do inum = 1,num_p_local + i = bc%fault_ibool_local_p(inum,ilevel) + Vf_new(i) = rtsafe(0.0_CUSTOM_REAL,Vf_old(i)+5.0_CUSTOM_REAL,1e-5_CUSTOM_REAL, & + Tstick(i),-T(3,i),bc%Z(i),bc%rsf%f0(i), & + bc%rsf%V0(i),bc%rsf%a(i),bc%rsf%b(i),bc%rsf%L(i),bc%rsf%theta(i),bc%rsf%StateLaw) + enddo + else + do i = 1,bc%nglob + Vf_new(i) = rtsafe(0.0_CUSTOM_REAL,Vf_old(i)+5.0_CUSTOM_REAL,1e-5_CUSTOM_REAL, & + Tstick(i),-T(3,i),bc%Z(i),bc%rsf%f0(i), & + bc%rsf%V0(i),bc%rsf%a(i),bc%rsf%b(i),bc%rsf%L(i),bc%rsf%theta(i),bc%rsf%StateLaw) + enddo endif + enddo ! two passes + + ! updates new stress + if (UPDATE_LTS_ONLY_P) then + ! LTS on p-nodes only + do inum = 1,num_p_local + i = bc%fault_ibool_local_p(inum,ilevel) + Tnew(i) = Tstick(i) - bc%Z(i) * Vf_new(i) + enddo + else + ! updates new traction + Tnew(:) = Tstick(:) - bc%Z(:) * Vf_new(:) + endif + + endif + + if (UPDATE_LTS_ONLY_P) then + ! LTS on p-nodes only + do inum = 1,num_p_local + i = bc%fault_ibool_local_p(inum,ilevel) + Tstick(i) = max(Tstick(i),1.0_CUSTOM_REAL) ! to avoid division by zero + T(1,i) = Tnew(i) * T(1,i)/Tstick(i) + T(2,i) = Tnew(i) * T(2,i)/Tstick(i) enddo + + ! Save total tractions + ! LTS on p-nodes only + do inum = 1,num_p_local + i = bc%fault_ibool_local_p(inum,ilevel) + bc%T(:,i) = T(:,i) + enddo + else + Tstick(:) = max(Tstick(:),1.0_CUSTOM_REAL) ! to avoid division by zero + + T(1,:) = Tnew(:) * T(1,:)/Tstick(:) + T(2,:) = Tnew(:) * T(2,:)/Tstick(:) + + ! Save total tractions + bc%T(:,:) = T(:,:) + endif + + ! Subtract initial stress (to have relative stress on the fault) + ! LTS - todo optimization + !if (USE_OPTIMIZATION) then + ! do inum = 1,num_p_local + ! i = bc%fault_ibool_local_p(inum,ilevel) + ! T(:,i) = T(:,i) - bc%T0(:,i) + ! enddo + !else + T(:,:) = T(:,:) - bc%T0(:,:) + + if (RATE_AND_STATE) then + ! LTS - todo optimization + !if (USE_OPTIMIZATION) then + ! do inum = 1,num_p_local + ! i = bc%fault_ibool_local_p(inum,ilevel) + ! T(1,i) = T(1,i) - TxExt(i) + ! enddo + !else + T(1,:) = T(1,:) - TxExt(:) + endif + !JPA: this eliminates the effect of TxExt on the equations of motion. Why is it needed? + + ! Update slip acceleration da=da_free-T/(0.5*dt*Z) + ! LTS - todo optimization + !if (USE_OPTIMIZATION) then + ! do inum = 1,num_p_local + ! i = bc%fault_ibool_local_p(inum,ilevel) + ! dA(:,i) = dA(:,i) - T(:,i)/(bc%Z(i) * half_dt) + ! enddo + !else + dA(1,:) = dA(1,:) - T(1,:)/(bc%Z(:) * half_dt) + dA(2,:) = dA(2,:) - T(2,:)/(bc%Z(:) * half_dt) + dA(3,:) = dA(3,:) - T(3,:)/(bc%Z(:) * half_dt) + + ! Update slip and slip rate, in fault frame + if (UPDATE_LTS_ONLY_P) then + ! LTS on p-nodes only + do inum = 1,num_p_local + i = bc%fault_ibool_local_p(inum,ilevel) + bc%D(:,i) = dD(:,i) + bc%V(:,i) = dV(:,i) + half_dt*dA(:,i) + enddo + else + bc%D(:,:) = dD(:,:) + bc%V(:,:) = dV(:,:) + half_dt * dA(:,:) + endif + + ! Rotate tractions back to (x,y,z) frame + T(:,:) = rotate(bc,T,-1) + + ! Add boundary term B*T to M*a + if (UPDATE_LTS_ONLY_P) then + ! LTS on p-nodes only + do inum = 1,num_p_local + i = bc%fault_ibool_local_p(inum,ilevel) + call add_BT_single(bc,MxA,T,i) ! LTS version + enddo + else + call add_BT(bc,MxA,T) + endif + + !-- intermediate storage of outputs -- + ! LTS - todo optimization + !if (USE_OPTIMIZATION) then + ! do inum = 1,num_p_local + ! i = bc%fault_ibool_local_p(inum,ilevel) + ! Vf_new(i) = sqrt(bc%V(1,i)*bc%V(1,i) + bc%V(2,i)*bc%V(2,i)) + ! if (.not. RATE_AND_STATE) then + ! theta_new(i) = bc%swf%theta(i) + ! dc(i) = bc%swf%dc(i) + ! else + ! theta_new(i) = bc%rsf%theta(i) + ! dc(i) = bc%rsf%L(i) + ! endif + ! enddo + !else + Vf_new = sqrt(bc%V(1,:)*bc%V(1,:) + bc%V(2,:)*bc%V(2,:)) + if (.not. RATE_AND_STATE) then + theta_new(:) = bc%swf%theta(:) + dc(:) = bc%swf%Dc(:) + else + theta_new(:) = bc%rsf%theta(:) + dc(:) = bc%rsf%L(:) + endif + + call store_dataXZ(bc%dataXZ, strength, theta_old, theta_new, dc, & + Vf_old, Vf_new, timeval, deltat) + + call store_dataT(bc%dataT,bc%D,bc%V,bc%T,it) + + if (RATE_AND_STATE) then + ! adds storage of state + if (bc%rsf%StateLaw == 1) then + ! ageing law + if (UPDATE_LTS_ONLY_P) then + ! LTS on p-nodes only + do inum = 1,num_p_local + ipoin = bc%fault_ibool_local_p(inum,ilevel) + iglob = bc%dataT%iglob(ipoin) + bc%dataT%dat(8,ipoin,it) = log10(theta_new(iglob)) + enddo + else + do ipoin = 1,bc%dataT%npoin + iglob = bc%dataT%iglob(ipoin) + bc%dataT%dat(8,ipoin,it) = log10(theta_new(iglob)) + enddo + endif + else + ! slip law + if (UPDATE_LTS_ONLY_P) then + ! LTS on p-nodes only + do inum = 1,num_p_local + ipoin = bc%fault_ibool_local_p(inum,ilevel) + iglob = bc%dataT%iglob(ipoin) + bc%dataT%dat(8,ipoin,it) = theta_new(iglob) + enddo + else + do ipoin = 1,bc%dataT%npoin + iglob = bc%dataT%iglob(ipoin) + bc%dataT%dat(8,ipoin,it) = theta_new(iglob) + enddo + endif + endif endif !-- outputs -- @@ -1197,7 +1809,7 @@ subroutine BC_DYNFLT_set3d(bc,MxA,V,D,iflt) endif endif - end subroutine BC_DYNFLT_set3d + end subroutine BC_DYNFLT_set3d_lts !=============================================================== @@ -1339,6 +1951,45 @@ subroutine swf_update_state(dold,dnew,vold,f) end subroutine swf_update_state +!--------------------------------------------------------------------- + + subroutine swf_update_state_lts(dold,dnew,vold,f,p_value,bc) + + use specfem_par_lts, only: iglob_p_refine + implicit none + + real(kind=CUSTOM_REAL), dimension(:,:), intent(in) :: vold,dold,dnew + type(swf_type), intent(inout) :: f + + integer,intent(in) :: p_value + type(bc_dynandkinflt_type), intent(in) :: bc + + ! local parameters + real(kind=CUSTOM_REAL) :: vnorm + integer :: i,iglob + + ! fault state variable theta (magnitude of accumulated slip on fault) + ! accumulates fault slip + do i = 1,bc%nglob + iglob = bc%ibulk1(i) + if ( iglob_p_refine(iglob) == p_value ) then + f%theta(i) = f%theta(i) + sqrt( (dold(1,i)-dnew(1,i))**2 + (dold(2,i)-dnew(2,i))**2 ) + endif + enddo + + ! fault healing + if (f%healing) then + do i = 1,bc%nglob + iglob = bc%ibulk1(i) + if (iglob_p_refine(iglob) == p_value) then + vnorm = sqrt(vold(1,i)**2 + vold(2,i)**2) + if (vnorm < V_HEALING) f%theta(i) = 0.0_CUSTOM_REAL + endif + enddo + endif + + end subroutine swf_update_state_lts + !--------------------------------------------------------------------- function swf_mu(f) result(mu) @@ -1349,19 +2000,42 @@ function swf_mu(f) result(mu) real(kind=CUSTOM_REAL) :: mu(size(f%theta)) if (f%kind == 1) then - ! slip weakening law - ! - ! for example: Galvez, 2014, eq. (8) - ! also Ida, 1973; Palmer & Rice 1973; Andrews 1976; .. - mu(:) = f%mus(:) - (f%mus(:)-f%mud(:)) * min(f%theta(:)/f%Dc(:), 1.0_CUSTOM_REAL) + ! slip weakening law + ! + ! for example: Galvez, 2014, eq. (8) + ! also Ida, 1973; Palmer & Rice 1973; Andrews 1976; .. + mu(:) = f%mus(:) - (f%mus(:)-f%mud(:)) * min(f%theta(:)/f%Dc(:), 1.0_CUSTOM_REAL) else - !-- exponential slip weakening: - mu(:) = f%mud(:) - (f%mud(:)-f%mus(:)) * exp(-f%theta(:)/f%Dc(:)) + !-- exponential slip weakening: + mu(:) = f%mud(:) - (f%mud(:)-f%mus(:)) * exp(-f%theta(:)/f%Dc(:)) endif end function swf_mu +!--------------------------------------------------------------------- + + function swf_mu_lts(f,i) result(mu) + + implicit none + + type(swf_type), intent(in) :: f + integer, intent(in) :: i + real(kind=CUSTOM_REAL) :: mu + + if (f%kind == 1) then + ! slip weakening law + ! + ! for example: Galvez, 2014, eq. (8) + ! also Ida, 1973; Palmer & Rice 1973; Andrews 1976; .. + mu = f%mus(i) - (f%mus(i)-f%mud(i)) * min(f%theta(i)/f%Dc(i), 1.0_CUSTOM_REAL) + else + !-- exponential slip weakening: + mu = f%mud(i) - (f%mud(i)-f%mus(i)) * exp(-f%theta(i)/f%Dc(i)) + endif + + end function swf_mu_lts + !===================================================================== subroutine fault_rsf_swf_init_GPU() @@ -1888,6 +2562,56 @@ subroutine rsf_update_state(V,dt_real,f) end subroutine rsf_update_state +!--------------------------------------------------------------------- + + subroutine rsf_update_state_lts(V,dt_real,f,i) + + use constants, only: ONE,HALF,TWO + implicit none + real(kind=CUSTOM_REAL), dimension(:), intent(in) :: V + type(rsf_type), intent(inout) :: f + real(kind=CUSTOM_REAL), intent(in) :: dt_real + + ! LTS + integer,intent(in) :: i + + ! local parameters + real(kind=CUSTOM_REAL) :: vDtL + real(kind=CUSTOM_REAL) :: f_ss,theta_ss,f_LV + + ! LTS on p-nodes only + vDtL = V(i) * dt_real / f%L(i) + + ! note: assumes that vDTL is strictly positive ( >= 0), since V >= 0, dt > 0 and L > 0 + if (vDtL < 0.0_CUSTOM_REAL) stop 'Invalid negative factor found in rate and state friction law' + + ! state update + if (f%StateLaw == 1) then + ! ageing law + if (vDtL > 1.e-5_CUSTOM_REAL) then + f%theta(i) = f%theta(i) * exp(-vDtL) + f%L(i)/V(i) * (ONE - exp(-vDtL)) + else + f%theta(i) = f%theta(i) * exp(-vDtL) + dt_real * ( ONE - HALF*vDtL ) + endif + else + ! slip law + if (RSF_SLIP_LAW_TYPE == 1) then + ! default, strong rate-weakening: + if (V(i) /= 0.0_CUSTOM_REAL) then + f_LV = f%f0(i) - (f%b(i) - f%a(i))*log(V(i)/f%V0(i)) + f_ss = f%fw(i) + (f_LV - f%fw(i))/(ONE + (V(i)/f%Vw(i))**8)**0.125 + theta_ss = f%a(i) * log( TWO*f%V0(i)/V(i) * sinh(f_ss/f%a(i)) ) + f%theta(i) = theta_ss + (f%theta(i) - theta_ss) * exp(-vDtL) + else + f%theta(i) = f%theta(i) + endif + else + ! Kaneko (2008) slip law: + f%theta(i) = f%L(i)/V(i) * (f%theta(i)*V(i)/f%L(i))**(exp(-vDtL)) + endif + endif + + end subroutine rsf_update_state_lts !=============================================================== ! OUTPUTS @@ -1905,7 +2629,8 @@ subroutine SCEC_Write_RuptureTime(dataXZ,iflt) integer :: i character(len=MAX_STRING_LEN) :: filename integer, dimension(8) :: time_values - integer, parameter :: IOUT_RUP = 121 !WARNING: not very robust. Could instead look for an available ID + ! WARNING: not very robust. Could instead look for an available ID + integer, parameter :: IOUT_RUP = 121 call date_and_time(VALUES=time_values) @@ -2563,5 +3288,21 @@ subroutine fault_output_synchronize_GPU(it) end subroutine fault_output_synchronize_GPU +!--------------------------------------------------------------- + + subroutine BC_DYNFLT_free() + + implicit none + integer :: iflt + + do iflt = 1,size(faults) + call free_fault_memory(faults(iflt)) + enddo + + deallocate(faults) + if (allocated(Kelvin_Voigt_eta)) deallocate(Kelvin_Voigt_eta) + + end subroutine BC_DYNFLT_free + end module fault_solver_dynamic diff --git a/src/specfem3D/fault_solver_kinematic.f90 b/src/specfem3D/fault_solver_kinematic.f90 index 0b7c24fe8..8c2e4c86a 100644 --- a/src/specfem3D/fault_solver_kinematic.f90 +++ b/src/specfem3D/fault_solver_kinematic.f90 @@ -43,7 +43,8 @@ module fault_solver_kinematic logical, save :: SIMULATION_TYPE_KIN = .false. - public :: BC_KINFLT_init, BC_KINFLT_set_all, SIMULATION_TYPE_KIN + public :: BC_KINFLT_init, BC_KINFLT_free, BC_KINFLT_set_all, & + SIMULATION_TYPE_KIN contains @@ -56,7 +57,7 @@ subroutine BC_KINFLT_init(prname) use specfem_par, only: nt => NSTEP, DTglobal => DT - use constants, only: myrank,IIN_PAR,IIN_BIN + use constants, only: myrank,IIN_PAR,IIN_BIN,PARALLEL_FAULT implicit none @@ -126,6 +127,12 @@ subroutine BC_KINFLT_init(prname) if (myrank == 0) then write(IMAIN,*) ' incorporating kinematic rupture simulation' write(IMAIN,*) ' found ', nbfaults, ' fault(s) in file DATA/Par_file_faults' + if (PARALLEL_FAULT) then + write(IMAIN,*) " using parallel partitions" + else + write(IMAIN,*) " using single partition" + endif + call flush_IMAIN() endif ! sets kinematic rupture flag @@ -217,6 +224,7 @@ end subroutine BC_KINFLT_init subroutine init_one_fault(bc,IIN_BIN,IIN_PAR,dt_real,NT,iflt) + use constants, only: myrank implicit none type(bc_dynandkinflt_type), intent(inout) :: bc @@ -230,6 +238,12 @@ subroutine init_one_fault(bc,IIN_BIN,IIN_PAR,dt_real,NT,iflt) NAMELIST / KINPAR / kindt + ! user output + if (myrank == 0) then + write(IMAIN,*) " initializing fault",iflt + call flush_IMAIN() + endif + ! reads in fault_db binary file and initializes fault arrays call initialize_fault(bc,IIN_BIN) @@ -273,17 +287,28 @@ subroutine init_one_fault(bc,IIN_BIN,IIN_PAR,dt_real,NT,iflt) call init_dataXZ(bc%dataXZ,bc) + ! user output + if (myrank == 0) then + write(IMAIN,*) " fault initialized successfully" + call flush_IMAIN() + endif + end subroutine init_one_fault !===================================================================== ! adds boundary term Bt to Force array for each fault. ! - subroutine BC_KINFLT_set_all(F,Vel,Dis) + subroutine BC_KINFLT_set_all(F,Vel,Dis,p_value,ilevel) + + ! LTS + use specfem_par, only: LTS_MODE implicit none real(kind=CUSTOM_REAL), dimension(:,:), intent(in) :: Vel,Dis real(kind=CUSTOM_REAL), dimension(:,:), intent(inout) :: F + ! LTS + integer,intent(in),optional :: p_value,ilevel ! local parameters integer :: iflt @@ -295,7 +320,11 @@ subroutine BC_KINFLT_set_all(F,Vel,Dis) do iflt = 1,Nfaults ! note: this routine should be called by all processes, regardless if they contain no fault elements, ! for managing MPI calls and file outputs - call BC_KINFLT_set_single(faults(iflt),F,Vel,Dis,iflt) + if (LTS_MODE) then + call BC_KINFLT_set_single_lts(faults(iflt),F,Vel,Dis,iflt,p_value,ilevel) + else + call BC_KINFLT_set_single(faults(iflt),F,Vel,Dis,iflt) + endif enddo end subroutine BC_KINFLT_set_all @@ -316,31 +345,38 @@ subroutine BC_KINFLT_set_single(bc,MxA,V,D,iflt) type(bc_dynandkinflt_type), intent(inout) :: bc real(kind=CUSTOM_REAL), intent(in) :: V(:,:),D(:,:) integer,intent(in) :: iflt + + ! local parameters integer :: it_kin,itime real(kind=CUSTOM_REAL), dimension(3,bc%nglob) :: T real(kind=CUSTOM_REAL), dimension(3,bc%nglob) :: dD,dV,dA,dV_free real(kind=CUSTOM_REAL) :: t1,t2 - real(kind=CUSTOM_REAL) :: half_dt,timeval + real(kind=CUSTOM_REAL) :: deltat,half_dt,timeval - if (bc%nspec > 0) then !Surendra : for parallel faults + ! global time step + deltat = bc%dt + + ! half time step + half_dt = 0.5_CUSTOM_REAL * deltat - half_dt = 0.5e0_CUSTOM_REAL*bc%dt + ! time will never be zero. it starts from 1 + timeval = it * deltat + + if (bc%nspec > 0) then !Surendra : for parallel faults ! get predicted values - dD = get_jump(bc,D) ! dD_predictor - dV = get_jump(bc,V) ! dV_predictor - dA = get_weighted_jump(bc,MxA) ! dA_free + dD(:,:) = get_jump(bc,D) ! dD_predictor + dV(:,:) = get_jump(bc,V) ! dV_predictor + dA(:,:) = get_weighted_jump(bc,MxA) ! dA_free ! rotate to fault frame (tangent,normal) ! component 3 is normal to the fault - dD = rotate(bc,dD,1) - dV = rotate(bc,dV,1) - dA = rotate(bc,dA,1) + dD(:,:) = rotate(bc,dD,1) + dV(:,:) = rotate(bc,dV,1) + dA(:,:) = rotate(bc,dA,1) - ! Time marching - timeval = it*bc%dt ! Slip_rate step "it_kin" - it_kin = bc%kin_it*nint(bc%kin_dt/bc%dt) + it_kin = bc%kin_it * nint(bc%kin_dt/bc%dt) ! (nint : Fortran round (nearest whole number) , ! if nint(a)=0.5 then "a" get upper bound ) @@ -348,27 +384,24 @@ subroutine BC_KINFLT_set_single(bc,MxA,V,D,iflt) ! This is done in case bc%kin_dt ! if (it_kin == it) it_kin=it_kin+1 ! - !NOTE : it and it_kin is being used due to integers are exact numbers. if (it > it_kin) then - print *, 'it :', it print *, 'it_kin :', it_kin bc%kin_it = bc%kin_it +1 bc%v_kin_t1 = bc%v_kin_t2 print *, 'loading v_kin_t2' - !Temporal : just for snapshots file names kin_dt=0.1 , dt=0.0001 - !snapshot(100=itime).. : itime=kin_it*(kin_dt/dt) - itime = bc%kin_it*nint(bc%kin_dt/bc%dt) + ! Temporal : just for snapshots file names kin_dt=0.1 , dt=0.0001 + ! snapshot(100=itime).. : itime=kin_it*(kin_dt/dt) + itime = bc%kin_it * nint(bc%kin_dt/bc%dt) call load_vslip_snapshots(bc%dataXZ,itime,iflt,myrank) -! loading slip rates - bc%v_kin_t2(1,:)=bc%dataXZ%v1 - bc%v_kin_t2(2,:)=bc%dataXZ%v2 + ! loading slip rates + bc%v_kin_t2(1,:) = bc%dataXZ%v1 + bc%v_kin_t2(2,:) = bc%dataXZ%v2 !linear interpolation in time between t1 and t2 !REMARK , bc%kin_dt is the delta "t" between two snapshots. - endif t1 = (bc%kin_it-1) * bc%kin_dt @@ -382,9 +415,9 @@ subroutine BC_KINFLT_set_single(bc,MxA,V,D,iflt) bc%V(2,:) = ( (t2 - timeval)*bc%v_kin_t1(2,:) + (timeval - t1)*bc%v_kin_t2(2,:) )/ bc%kin_dt !dV_free = dV_predictor + (dt/2)*dA_free - dV_free(1,:) = dV(1,:) + half_dt*dA(1,:) - dV_free(2,:) = dV(2,:) + half_dt*dA(2,:) - dV_free(3,:) = dV(3,:) + half_dt*dA(3,:) + dV_free(1,:) = dV(1,:) + half_dt * dA(1,:) + dV_free(2,:) = dV(2,:) + half_dt * dA(2,:) + dV_free(3,:) = dV(3,:) + half_dt * dA(3,:) ! T = Z*( dV_free - V) , V known apriori as input. ! CONVENTION : T(ibulk1) = T = -T(ibulk2) @@ -393,13 +426,13 @@ subroutine BC_KINFLT_set_single(bc,MxA,V,D,iflt) T(3,:) = bc%Z * ( dV_free(3,:) ) ! Save tractions - bc%T = T + bc%T(:,:) = T(:,:) ! Update slip in fault frame - bc%D = dD + bc%D(:,:) = dD(:,:) ! Rotate tractions back to (x,y,z) frame - T = rotate(bc,T,-1) + T(:,:) = rotate(bc,T,-1) ! Add boundary term B*T to M*a call add_BT(bc,MxA,T) @@ -417,6 +450,194 @@ subroutine BC_KINFLT_set_single(bc,MxA,V,D,iflt) end subroutine BC_KINFLT_set_single +!--------------------------------------------------------------------- + + subroutine BC_KINFLT_set_single_lts(bc,MxA,V,D,iflt,p_value,ilevel) + +! LTS version of BC_KINFLT_set_single to calculate only for fault nodes on a given p-level + + use specfem_par, only: it,NSTEP,myrank + + ! LTS + use specfem_par, only: LTS_MODE + use specfem_par_lts, only: num_p_level,p_level,lts_current_m + + implicit none + real(kind=CUSTOM_REAL), intent(inout) :: MxA(:,:) + type(bc_dynandkinflt_type), intent(inout) :: bc + real(kind=CUSTOM_REAL), intent(in) :: V(:,:),D(:,:) + integer,intent(in) :: iflt + ! LTS + integer,intent(in),optional :: p_value,ilevel + + ! local parameters + integer :: it_kin,itime + real(kind=CUSTOM_REAL), dimension(3,bc%nglob) :: T + real(kind=CUSTOM_REAL), dimension(3,bc%nglob) :: dD,dV,dA,dV_free + real(kind=CUSTOM_REAL) :: t1,t2 + real(kind=CUSTOM_REAL) :: deltat,half_dt,timeval + ! LTS + integer :: i,iglob1,iglob2,iilevel,inum,num_p_local + ! only update p-level nodes (when called for each ilevel separately) set this to .true., otherwise + ! update all nodes (when called once on for last ilevel) + logical, parameter :: UPDATE_LTS_ONLY_P = .false. + + ! safety check + if (.not. LTS_MODE) return + + ! LTS checks if fault has any p-nodes to compute + ! fault flag: only compute fault elements if fault elements in this p-level + if (UPDATE_LTS_ONLY_P) then + if (.not. bc%fault_use_p_level(ilevel)) return + endif + + ! local time step + deltat = bc%dt / p_value + + ! half time step + half_dt = 0.5_CUSTOM_REAL * deltat + + ! time will never be zero. it starts from 1 + ! calculate time for this level + timeval = it * bc%dt ! coarse level + ! adds fine level increments + do iilevel = ilevel,(num_p_level-1) + timeval = timeval + dble(lts_current_m(iilevel)-1) * bc%dt/dble(p_level(iilevel)) + enddo + + if (bc%nspec > 0) then !Surendra : for parallel faults + + ! sets up local p-nodes on fault surface + num_p_local = bc%fault_num_p_local(ilevel) + + ! get predicted values + dD(:,:) = get_jump(bc,D) ! dD_predictor + dV(:,:) = get_jump(bc,V) ! dV_predictor + dA(:,:) = get_weighted_jump(bc,MxA) ! dA_free + + ! rotate to fault frame (tangent,normal) + ! component 3 is normal to the fault + dD(:,:) = rotate(bc,dD,1) + dV(:,:) = rotate(bc,dV,1) + dA(:,:) = rotate(bc,dA,1) + + ! Slip_rate step "it_kin" + it_kin = bc%kin_it * nint(bc%kin_dt/bc%dt) + ! (nint : Fortran round (nearest whole number) , + ! if nint(a)=0.5 then "a" get upper bound ) + + ! Loading the next slip_rate one ahead it. + ! This is done in case bc%kin_dt + ! if (it_kin == it) it_kin=it_kin+1 ! + + !NOTE : it and it_kin is being used due to integers are exact numbers. + if (it > it_kin) then + print *, 'it :', it + print *, 'it_kin :', it_kin + + bc%kin_it = bc%kin_it +1 + bc%v_kin_t1 = bc%v_kin_t2 + print *, 'loading v_kin_t2' + ! Temporal : just for snapshots file names kin_dt=0.1 , dt=0.0001 + ! snapshot(100=itime).. : itime=kin_it*(kin_dt/dt) + itime = bc%kin_it*nint(bc%kin_dt/bc%dt) + call load_vslip_snapshots(bc%dataXZ,itime,iflt,myrank) + ! loading slip rates + bc%v_kin_t2(1,:) = bc%dataXZ%v1 + bc%v_kin_t2(2,:) = bc%dataXZ%v2 + + !linear interpolation in time between t1 and t2 + !REMARK , bc%kin_dt is the delta "t" between two snapshots. + endif + + t1 = (bc%kin_it-1) * bc%kin_dt + t2 = bc%kin_it * bc%kin_dt + + ! Kinematic velocity_rate + ! bc%V : Imposed a priori and read from slip rate snapshots (from time reversal) + ! Linear interpolation between consecutive kinematic time steps. + ! V will be given at each time step. + if (UPDATE_LTS_ONLY_P) then + ! LTS on p-nodes only + do inum = 1,num_p_local + i = bc%fault_ibool_local_p(inum,ilevel) + bc%V(1,i) = ( (t2 - timeval)*bc%v_kin_t1(1,i) + (timeval - t1)*bc%v_kin_t2(1,i) )/ bc%kin_dt + bc%V(2,i) = ( (t2 - timeval)*bc%v_kin_t1(2,i) + (timeval - t1)*bc%v_kin_t2(2,i) )/ bc%kin_dt + enddo + else + bc%V(1,:) = ( (t2 - timeval)*bc%v_kin_t1(1,:) + (timeval - t1)*bc%v_kin_t2(1,:) )/ bc%kin_dt + bc%V(2,:) = ( (t2 - timeval)*bc%v_kin_t1(2,:) + (timeval - t1)*bc%v_kin_t2(2,:) )/ bc%kin_dt + endif + + !dV_free = dV_predictor + (dt/2)*dA_free + dV_free(1,:) = dV(1,:) + half_dt * dA(1,:) + dV_free(2,:) = dV(2,:) + half_dt * dA(2,:) + dV_free(3,:) = dV(3,:) + half_dt * dA(3,:) + + ! T = Z*( dV_free - V) , V known apriori as input. + ! CONVENTION : T(ibulk1) = T = -T(ibulk2) + T(1,:) = bc%Z * ( dV_free(1,:) - bc%V(1,:) ) + T(2,:) = bc%Z * ( dV_free(2,:) - bc%V(2,:) ) + T(3,:) = bc%Z * ( dV_free(3,:) ) + + if (UPDATE_LTS_ONLY_P) then + ! Save tractions + ! LTS on p-nodes only + do inum = 1,num_p_local + i = bc%fault_ibool_local_p(inum,ilevel) + bc%T(:,i) = T(:,i) + enddo + + ! Update slip in fault frame + ! LTS on p-nodes only + do inum = 1,num_p_local + i = bc%fault_ibool_local_p(inum,ilevel) + bc%D(:,i) = dD(:,i) + enddo + else + ! Save tractions + bc%T(:,:) = T(:,:) + + ! Update slip in fault frame + bc%D(:,:) = dD(:,:) + endif + + ! Rotate tractions back to (x,y,z) frame + T(:,:) = rotate(bc,T,-1) + + ! Add boundary term B*T to M*a + if (UPDATE_LTS_ONLY_P) then + ! LTS on p-nodes only + do inum = 1,num_p_local + i = bc%fault_ibool_local_p(inum,ilevel) + iglob1 = bc%ibulk1(i) + MxA(1,iglob1) = MxA(1,iglob1) + bc%B(i) * T(1,i) + MxA(2,iglob1) = MxA(2,iglob1) + bc%B(i) * T(2,i) + MxA(3,iglob1) = MxA(3,iglob1) + bc%B(i) * T(3,i) + + iglob2 = bc%ibulk2(i) + MxA(1,iglob2) = MxA(1,iglob2) - bc%B(i) * T(1,i) + MxA(2,iglob2) = MxA(2,iglob2) - bc%B(i) * T(2,i) + MxA(3,iglob2) = MxA(3,iglob2) - bc%B(i) * T(3,i) + enddo + else + call add_BT(bc,MxA,T) + endif + + !-- intermediate storage of outputs -- + call store_dataT(bc%dataT,bc%D,bc%V,bc%T,it) + + !-- OUTPUTS -- + ! write dataT every NTOUT time steps or at the end of simulation + if (mod(it,NTOUT) == 0 .or. it == NSTEP) call SCEC_write_dataT(bc%dataT) + ! write dataXZ every NSNAP time steps + if (mod(it,NSNAP) == 0) call write_dataXZ(bc%dataXZ,it,iflt) + + endif + + end subroutine BC_KINFLT_set_single_lts + + !=============================================================== subroutine init_dataXZ(dataXZ,bc) @@ -540,5 +761,19 @@ subroutine load_vslip_snapshots(dataXZ,itime,iflt,myrank) end subroutine load_vslip_snapshots +!--------------------------------------------------------------- + + subroutine BC_KINFLT_free() + + implicit none + integer :: iflt + + do iflt = 1,size(faults) + call free_fault_memory(faults(iflt)) + enddo + + deallocate(faults) + + end subroutine BC_KINFLT_free end module fault_solver_kinematic diff --git a/src/specfem3D/finalize_simulation.f90 b/src/specfem3D/finalize_simulation.f90 index 038930564..62a9da55d 100644 --- a/src/specfem3D/finalize_simulation.f90 +++ b/src/specfem3D/finalize_simulation.f90 @@ -134,6 +134,9 @@ subroutine finalize_simulation_cleanup() use specfem_par_poroelastic use specfem_par_lts + use fault_solver_dynamic, only: SIMULATION_TYPE_DYN,BC_DYNFLT_free + use fault_solver_kinematic, only: SIMULATION_TYPE_KIN,BC_KINFLT_free + implicit none ! from here on, no gpu data is needed anymore @@ -153,6 +156,7 @@ subroutine finalize_simulation_cleanup() if (ACOUSTIC_SIMULATION) then deallocate(rmass_acoustic) endif + ! boundary surfaces deallocate(ibelm_xmin) deallocate(ibelm_xmax) @@ -160,6 +164,7 @@ subroutine finalize_simulation_cleanup() deallocate(ibelm_ymax) deallocate(ibelm_bottom) deallocate(ibelm_top) + ! sources if (NSOURCES > 0) then deallocate(islice_selected_source,ispec_selected_source) @@ -170,6 +175,7 @@ subroutine finalize_simulation_cleanup() deallocate(nu_source) deallocate(user_source_time_function) endif + ! receivers if (nrec > 0) then deallocate(islice_selected_rec,ispec_selected_rec) @@ -177,6 +183,7 @@ subroutine finalize_simulation_cleanup() deallocate(station_name,network_name) deallocate(nu_rec) endif + if (allocated(pm1_source_encoding)) deallocate(pm1_source_encoding) if (allocated(sourcearrays)) deallocate(sourcearrays) if (allocated(source_adjoint)) deallocate(source_adjoint) @@ -185,6 +192,7 @@ subroutine finalize_simulation_cleanup() if (allocated(number_receiver_global)) deallocate(number_receiver_global) if (allocated(hxir_store)) deallocate(hxir_store,hetar_store,hgammar_store) if (allocated(hpxir_store)) deallocate(hpxir_store,hpetar_store,hpgammar_store) + ! adjoint sources if (SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3) then if (nadj_rec_local > 0) then @@ -197,11 +205,14 @@ subroutine finalize_simulation_cleanup() endif endif endif + ! seismograms if (allocated(seismograms_d)) deallocate(seismograms_d,seismograms_v,seismograms_a,seismograms_p) if (allocated(seismograms_eps)) deallocate(seismograms_eps) + ! moment tensor derivatives if (allocated(Mxx_der)) deallocate(Mxx_der,Myy_der,Mzz_der,Mxy_der,Mxz_der,Myz_der,sloc_der) + ! mesh deallocate(ibool) deallocate(irregular_element_number) @@ -211,6 +222,10 @@ subroutine finalize_simulation_cleanup() deallocate(kappastore,mustore,rhostore) deallocate(ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic) + ! faults + if (SIMULATION_TYPE_DYN) call BC_DYNFLT_free() + if (SIMULATION_TYPE_KIN) call BC_KINFLT_free() + ! LTS if (LTS_MODE) then deallocate(p_level,p_level_loops,p_level_steps) diff --git a/src/specfem3D/iterate_time.F90 b/src/specfem3D/iterate_time.F90 index 512286def..5f16e06c0 100644 --- a/src/specfem3D/iterate_time.F90 +++ b/src/specfem3D/iterate_time.F90 @@ -207,6 +207,14 @@ subroutine iterate_time() ! get MPI starting time_start = wtime() + ! LTS + if (LTS_MODE) then + ! LTS steps through its own time iterations - for now + call lts_iterate_time() + ! all done, continue after time loop + goto 777 + endif + ! time loop do it = it_begin,it_end @@ -321,6 +329,9 @@ subroutine iterate_time() ! enddo ! end of main time loop +! goto point for LTS to finish time loop +777 continue + ! close the huge file that contains a dump of all the time steps to disk if (EXACT_UNDOING_TO_DISK) close(IFILE_FOR_EXACT_UNDOING) diff --git a/src/specfem3D/iterate_time_undoatt.F90 b/src/specfem3D/iterate_time_undoatt.F90 index 3ac61326b..5bef002fc 100644 --- a/src/specfem3D/iterate_time_undoatt.F90 +++ b/src/specfem3D/iterate_time_undoatt.F90 @@ -69,6 +69,8 @@ subroutine iterate_time_undoatt() call exit_MPI(myrank,'for undo_attenuation, POROELASTIC kernel simulation is not supported yet') ! not working yet... if (SIMULATION_TYPE == 3 .and. NOISE_TOMOGRAPHY /= 0) & call exit_MPI(myrank,'for undo_attenuation, noise kernel simulation is not supported yet') ! not working yet... + if (LTS_MODE) & + call exit_MPI(myrank,'LTS mode for undo_attenuation is not supported yet') ! hdf5 i/o server if (HDF5_IO_NODES > 0) then diff --git a/src/specfem3D/locate_source.F90 b/src/specfem3D/locate_source.F90 index bcef26c6e..44a86da5a 100644 --- a/src/specfem3D/locate_source.F90 +++ b/src/specfem3D/locate_source.F90 @@ -37,7 +37,7 @@ subroutine locate_source() USE_EXTERNAL_SOURCE_FILE, & USE_TRICK_FOR_BETTER_PRESSURE,COUPLE_WITH_INJECTION_TECHNIQUE, & myrank,NSPEC_AB,NGLOB_AB,ibool,xstore,ystore,zstore,DT, & - NSOURCES,HAS_FINITE_FAULT_SOURCE + NSOURCES,HAS_FINITE_FAULT_SOURCE,LTS_MODE ! sources arrays use specfem_par, only: Mxx,Myy,Mzz,Mxy,Mxz,Myz,hdur, & @@ -49,6 +49,9 @@ subroutine locate_source() ! PML use pml_par, only: is_CPML + ! LTS + use specfem_par_lts, only: p_elem,ispec_p_refine,p_lookup,num_p_level + implicit none ! local parameters @@ -93,6 +96,12 @@ subroutine locate_source() integer :: ispec,ier logical :: is_done_sources + ! LTS + integer :: p_spec,ilevel + integer, dimension(:), allocatable :: source_p_value,source_p_ilevel + integer, dimension(:,:), allocatable :: source_p_elem + integer, dimension(:), allocatable :: sendbufv,recvbufv + ! timer MPI double precision :: tstart,tCPU double precision, external :: wtime @@ -308,6 +317,56 @@ subroutine locate_source() enddo call any_all_1Darray_l(is_CPML_source,is_CPML_source_all,NSOURCES) + ! LTS + if (LTS_MODE) then + allocate(source_p_value(NSOURCES),source_p_ilevel(NSOURCES), & + source_p_elem(num_p_level,NSOURCES), & + sendbufv(num_p_level),recvbufv(num_p_level), stat=ier) + if (ier /= 0) call exit_MPI(myrank,'Error allocating source_p arrays') + source_p_value(:) = 0 + source_p_ilevel(:) = 0 + source_p_elem(:,:) = 0 + ! collects p-level info of source element + do isource = 1,NSOURCES + if (myrank == islice_selected_source(isource)) then + ispec = ispec_selected_source(isource) + ! p_value + p_spec = ispec_p_refine(ispec) + ilevel = p_lookup(p_spec) + source_p_value(isource) = p_spec + source_p_ilevel(isource) = ilevel + ! here use integer values for P-elem since the send routine sendrecv_all_i() is only implemented for integers + ! explicit if-case to avoid a ternary operator like: s = (p_elem == .true.)? 1:0 + do ilevel = 1,num_p_level + if (p_elem(ispec,ilevel) .eqv. .true.) then + source_p_elem(ilevel,isource) = 1 + else + source_p_elem(ilevel,isource) = 0 + endif + enddo + endif + ! send to main process if needed + if (islice_selected_source(isource) /= 0) then + if (myrank == 0) then + ! main process gets p-value + call recv_singlei(source_p_value(isource),islice_selected_source(isource),itag) + ! ilevel-value + call recv_singlei(source_p_ilevel(isource),islice_selected_source(isource),itag) + ! p_elem as integer-value + call recv_i(recvbufv,num_p_level,islice_selected_source(isource),itag) + source_p_elem(:,isource) = recvbufv(:) + else if (myrank == islice_selected_source(isource)) then + ! slice with sources sents its values + call send_singlei(source_p_value(isource),0,itag) + ! ilevel-value + call send_singlei(source_p_ilevel(isource),0,itag) + ! p_elem as integer-value + sendbufv(:) = source_p_elem(:,isource) + call send_i(sendbufv,num_p_level,0,itag) + endif + endif + enddo + endif ! sets new utm coordinates for best locations utm_x_source(:) = x_found(:) @@ -362,6 +421,13 @@ subroutine locate_source() write(IMAIN,*) ' in unknown domain' endif write(IMAIN,*) + ! LTS info + if (LTS_MODE) then + write(IMAIN,*) ' in LTS ilevel : ',source_p_ilevel(isource) + write(IMAIN,*) ' p-value : ',source_p_value(isource) + write(IMAIN,*) ' p-elem : ',source_p_elem(:,isource) + write(IMAIN,*) + endif ! source location (reference element) if (USE_FORCE_POINT_SOURCE) then @@ -630,6 +696,7 @@ subroutine locate_source() deallocate(x_found,y_found,z_found,elevation,final_distance) deallocate(x_target,y_target,z_target,idomain) deallocate(is_CPML_source,is_CPML_source_all) + if (LTS_MODE) deallocate(source_p_value,source_p_ilevel,source_p_elem,sendbufv,recvbufv) end subroutine locate_source diff --git a/src/specfem3D/lts_setup.F90 b/src/specfem3D/lts_setup.F90 index ccffc4fd9..a723e7636 100644 --- a/src/specfem3D/lts_setup.F90 +++ b/src/specfem3D/lts_setup.F90 @@ -33,6 +33,12 @@ ! Newmark local time stepping on high-performance computing architectures, ! Journal of Comp. Physics, 334, p. 308-326. ! https://doi.org/10.1016/j.jcp.2016.11.012 +! +! Rietmann, M., B. Ucar, D. Peter, O. Schenk, M. Grote, 2015. +! Load-balanced local time stepping for large-scale wave propagation, +! in: Parallel Distributed Processing Symposium (IPDPS), IEEE International, May 2015. +! https://doi.org/10.1109/IPDPS.2015.10 + ! LTS setup routines ! @@ -42,10 +48,11 @@ subroutine lts_setup() use constants, only: NDIM,CUSTOM_REAL,VERYSMALLVAL,FIX_UNDERFLOW_PROBLEM,IMAIN,itag,myrank - use shared_parameters, only: DT,NSTEP,NPROC + use shared_parameters, only: DT,NSTEP,NPROC,SIMULATION_TYPE,LTS_MODE,GPU_MODE,USE_LDDRK, & + SAVE_SEISMOGRAMS_ACCELERATION,CREATE_SHAKEMAP use specfem_par, only: ACOUSTIC_SIMULATION,ELASTIC_SIMULATION,POROELASTIC_SIMULATION, & - deltat,NGLOB_AB,NSPEC_AB + deltat,NGLOB_AB ! MPI interfaces use specfem_par, only: num_interfaces_ext_mesh,ibool_interfaces_ext_mesh,nibool_interfaces_ext_mesh, & @@ -62,25 +69,32 @@ subroutine lts_setup() ! local parameters integer :: ilevel,p,m,step - integer :: p_min,p_max,p_min_glob,p_max_glob - integer :: ipoin, iinterface, iglob, ier + integer :: p_min,p_max,p_min_glob,p_max_glob,p_node + integer :: i, ipoin, iinterface, iglob, ier real(kind=CUSTOM_REAL) :: duration integer, dimension(:), allocatable :: num_boundary_p, num_p_glob ! check integer, dimension(:,:), allocatable ::nibool_interface_p_refine_all integer, dimension(:), allocatable :: sendbuf,recvbuf - integer :: i,inum,ispec - integer,dimension(:),allocatable :: num_ispec_level - double precision :: total_work,total_work_lts,total_work_lts_b + double precision :: lts_speedup,lts_speedup_with_boundary double precision :: deltat_lts + double precision :: memory_size + integer :: is,ie,is0,ie0 ! debugging - integer :: is,ie integer :: iproc + ! checks if anything to do + if (.not. LTS_MODE) return + + ! safety checks ! checks simulation domain types - if (ACOUSTIC_SIMULATION) call exit_MPI(myrank,'Error LTS routines only implemented for purely ELASTIC simulations...') - if (POROELASTIC_SIMULATION) call exit_MPI(myrank,'Error LTS routines only implemented for purely ELASTIC simulations...') - if (.not. ELASTIC_SIMULATION) call exit_MPI(myrank,'Error LTS routines only implemented for purely ELASTIC simulations...') + if (ACOUSTIC_SIMULATION) call exit_MPI(myrank,'LTS routines only implemented for purely ELASTIC simulations') + if (POROELASTIC_SIMULATION) call exit_MPI(myrank,'LTS routines only implemented for purely ELASTIC simulations') + if (.not. ELASTIC_SIMULATION) call exit_MPI(myrank,'LTS routines only implemented for purely ELASTIC simulations') + + if (SIMULATION_TYPE /= 1) call exit_MPI(myrank,'LTS routines only implemented for forward simulations (SIMULATION_TYPE == 1)') + if (USE_LDDRK) call exit_MPI(myrank,'LTS routines only implemented for Newark time scheme (USE_LDDRK == .false.)') + if (GPU_MODE) call exit_MPI(myrank,'LTS routines only implemented for CPU-only runs (GPU_MODE == .false.)') ! user output if (myrank == 0) then @@ -104,6 +118,35 @@ subroutine lts_setup() endif call synchronize_all() + ! safety check + if (num_p_level == 0) stop 'Error LTS mode needs at least a single p-level' + + ! we assume contiguous range of p-level nodes + ! checks iglob range + if (p_level_iglob_start(1) /= 1) call exit_mpi(myrank,"ASSERT: p-level iglob should start at 1") + if (p_level_iglob_end(num_p_level) /= NGLOB_AB) call exit_mpi(myrank,"ASSERT: coarsest p-level must have all iglobs!") + ! checks levels + do ilevel = 1,num_p_level + ! start index of current p-level + is = p_level_iglob_start(ilevel) + + ! end index of current p-level + ie = p_level_iglob_end(ilevel) + + ! start index of finest level + is0 = p_level_iglob_start(1) + + ! end index of next finer p-level + if (ilevel > 1) then + ie0 = p_level_iglob_end(ilevel-1) + endif + + ! checks + if (ie < is) stop 'Error lts newmark update: end index of current level invalid' + if (ie < is0) stop 'Error lts newmark update: start/end index invalid' + if (ilevel > 1 .and. ie0 < is0) stop 'Error lts newmark update: end index of finer level invalid' + enddo + ! desired initial duration duration = sngl(NSTEP * DT) @@ -121,104 +164,27 @@ subroutine lts_setup() deltat = deltat_lts ! counts number of local time step evaluations - it_local = 0 + lts_it_local = 0 do step = 1,num_p_level_steps ilevel = p_level_steps(step) p = p_level(ilevel) do m = 1,p_level_loops(ilevel) - it_local = it_local + 1 + lts_it_local = lts_it_local + 1 enddo enddo - NSTEP_LOCAL = it_local + NSTEP_LOCAL = lts_it_local + + ! gets theoretical speed-up values (valid only for all elements belonging to same domain type) + call lts_get_theoretical_speedup(lts_speedup,lts_speedup_with_boundary) ! a map for p-level -> ilevel allocate(p_level_ilevel_map(p_level(1)),stat=ier) if (ier /= 0) call exit_MPI(myrank,'Error allocating working LTS fields p_level_ilevel_map') - p_level_ilevel_map(:) = 0 do ilevel = 1,num_p_level p_level_ilevel_map(p_level(ilevel)) = ilevel enddo - ! counts number of elements for each level - allocate(num_ispec_level(num_p_level),stat=ier) - if (ier /= 0) call exit_MPI(myrank,'Error allocating array num_ispec_level') - num_ispec_level(:) = 0 - do i = 1,num_p_level - p = p_level(i) - num_ispec_level(i) = count(ispec_p_refine(:) == p) - enddo - - ! note: some partitions might not contain any element within a certain p-level - ! collect on master - if (NPROC > 1) then - do ilevel = 1,num_p_level - call sum_all_all_i(num_ispec_level(ilevel),inum) - num_ispec_level(ilevel) = inum - enddo - endif - if (minval(num_ispec_level(:)) == 0) call exit_MPI(myrank,'Error p level without element found') - - ! user output - if (myrank == 0) then - write(IMAIN,*) ' p-level number of elements: ',num_ispec_level(:) - call flush_IMAIN() - endif - - ! theoretical speed-up values (valid only for all elements belonging to same domain type) - total_work = 0.0 - total_work_lts = 0.0 - if (myrank == 0) then - ! "pure" speed-up, without taking account of additional coarse/fine boundary contributions - do ilevel = 1,num_p_level - p = p_level(ilevel) - total_work_lts = total_work_lts + num_ispec_level(ilevel) * p - enddo - ! work without lts - total_work = sum( num_ispec_level(:)) * maxval(p_level(:)) - endif - - ! counts boundary elements - num_ispec_level(:) = 0 - do ilevel = 1,num_p_level - do ispec = 1,NSPEC_AB - if (boundary_elem(ispec,ilevel)) then - num_ispec_level(ilevel) = num_ispec_level(ilevel) + 1 - endif - enddo - enddo - - ! note: some partitions might not contain any element within a certain p-level - ! collect on master - if (NPROC > 1 ) then - do ilevel = 1,num_p_level - call sum_all_all_i(num_ispec_level(ilevel),inum) - num_ispec_level(ilevel) = inum - enddo - endif - ! work with additional coarse/fine boundary contributions - total_work_lts_b = total_work_lts - if (myrank == 0) then - do ilevel = 1,num_p_level - p = p_level(ilevel) - total_work_lts_b = total_work_lts_b + num_ispec_level(ilevel) * p - enddo - endif - - ! user output - call synchronize_all() - if (myrank == 0) then - write(IMAIN,*) ' p-level boundary elements : ',num_ispec_level(:) - write(IMAIN,*) - write(IMAIN,*) ' theoretical speed-up value: ',sngl(total_work / total_work_lts),'(without boundary contributions)' - write(IMAIN,*) ' theoretical speed-up value: ',sngl(total_work / total_work_lts_b),'(with boundary contributions)' - write(IMAIN,*) - call flush_IMAIN() - endif - call synchronize_all() - - deallocate(num_ispec_level) - ! p_lookup maps p -> ilevel allocate(p_lookup(maxval(p_level)),stat=ier) if (ier /= 0) call exit_MPI(myrank,'Error allocating p_lookup') @@ -250,17 +216,17 @@ subroutine lts_setup() do ipoin = 1, nibool_interfaces_ext_mesh(iinterface) ! gets p value for this interface points iglob = ibool_interfaces_ext_mesh(ipoin,iinterface) - p = iglob_p_refine(iglob) + p_node = iglob_p_refine(iglob) ! debug - ! if (myrank == 1) print *, "p=", p + ! if (myrank == 1) print *, "p_node=", p_node ! for each interface, stores for each ilevel its corresponding p value if p-nodes on this interface where found - interface_p_refine_all(iinterface,p_lookup(p)) = p - num_boundary_p(p_lookup(p)) = num_boundary_p(p_lookup(p)) + 1 + interface_p_refine_all(iinterface,p_lookup(p_node)) = p_node + num_boundary_p(p_lookup(p_node)) = num_boundary_p(p_lookup(p_node)) + 1 ! counts p-nodes on interface - nibool_interface_p_refine_all(iinterface,p_lookup(p)) = nibool_interface_p_refine_all(iinterface,p_lookup(p)) + 1 + nibool_interface_p_refine_all(iinterface,p_lookup(p_node)) = nibool_interface_p_refine_all(iinterface,p_lookup(p_node)) + 1 enddo ! debug !print *,'rank ',myrank,'interface neighbor',my_neighbors_ext_mesh(iinterface) @@ -277,8 +243,8 @@ subroutine lts_setup() ! call synchronize_all() ! if (myrank == iproc) then ! print *,'rank',myrank,' num boundary p:' - ! do ip = 1,num_p_level - ! print *,' level ',ip,' total number of p-nodes on MPI boundary = ',num_boundary_p(ip) + ! do ilevel = 1,num_p_level + ! print *,' level ',ilevel,' total number of p-nodes on MPI boundary = ',num_boundary_p(ilevel) ! enddo ! endif !enddo @@ -314,11 +280,12 @@ subroutine lts_setup() endif !debug + ! num_p_glob(:) = 0 ! do iglob = 1,NGLOB_AB - ! p = iglob_p_refine(iglob) - ! num_p_glob(p_lookup(p)) = num_p_glob(p_lookup(p)) + 1 + ! p_node = iglob_p_refine(iglob) + ! num_p_glob(p_lookup(p_node)) = num_p_glob(p_lookup(p_node)) + 1 ! enddo - ! if (myrank==0) print *, "Rank ,p,#dof" + ! if (myrank==0) print *, "Rank, p, #dof" ! do ilevel = 1,num_p_level ! if (num_boundary_p(ilevel) > 0) then ! print *, myrank, ",", p_level(ilevel), ",", num_p_glob(ilevel) @@ -336,10 +303,10 @@ subroutine lts_setup() ! loops over all global points do iglob = 1,NGLOB_AB ! gets p value for this node - p = iglob_p_refine(iglob) + p_node = iglob_p_refine(iglob) ! gets ilevel index for this p-value - ilevel = p_lookup(p) + ilevel = p_lookup(p_node) ! start/end index for this p-level is = p_level_iglob_start(ilevel) @@ -364,10 +331,10 @@ subroutine lts_setup() call exit_MPI(myrank,'Error iglob invalid in LTS level') endif ! checks associated p-level - p = iglob_p_refine(iglob) - i = p_lookup(p) + p_node = iglob_p_refine(iglob) + i = p_lookup(p_node) if (i /= ilevel) then - print *,'Error rank:',myrank,'has iglob value ',iglob,'in level',i,'range:',is,ie,'p-value:',p,'ilevel:',ilevel + print *,'Error rank:',myrank,'has iglob value ',iglob,'in level',i,'range:',is,ie,'p-value:',p_node,'ilevel:',ilevel call exit_MPI(myrank,'Error iglob level invalid in LTS level') endif enddo @@ -388,10 +355,18 @@ subroutine lts_setup() if (ier /= 0) stop 'Error allocating lts_current_m array' lts_current_m(:) = 0 + ! initializes current pointers + current_lts_elem => p_elem(:,1) ! current p-level elements + current_lts_boundary_elem => boundary_elem(:,1) ! current boundary elements + + ! current p-level time + current_lts_time = 0.d0 + ! user output if (myrank == 0) then - write(IMAIN,*) ' array size for working fields displ_p,veloc_p: ', & - dble(NDIM)*dble(NGLOB_AB)*dble(num_p_level)*dble(CUSTOM_REAL)/ 1024./1024.,'MB' + ! wavefield sizes displ_p,veloc_p + memory_size = dble(NDIM) * dble(NGLOB_AB) * dble(num_p_level) * dble(CUSTOM_REAL) + write(IMAIN,*) ' array size for working fields displ_p, veloc_p: ', sngl(memory_size) / 1024./ 1024.,'MB' write(IMAIN,*) call flush_IMAIN() endif @@ -406,23 +381,44 @@ subroutine lts_setup() ! put negligible initial value to avoid very slow underflow trapping if (FIX_UNDERFLOW_PROBLEM) displ_p(:,:,:) = VERYSMALLVAL - !#TODO: LTS reset DT and NSTEP - ! re-sets NSTEP - !NSTEP = ceiling( duration / deltat_lts ) + ! collected acceleration + ! only needed for seismograms and shakemaps + if (SAVE_SEISMOGRAMS_ACCELERATION .or. CREATE_SHAKEMAP) then + allocate(accel_collected(NDIM,NGLOB_AB),stat=ier) + if (ier /= 0) call exit_MPI(myrank,'Error allocating working LTS fields displ_p,veloc_p') + accel_collected(:,:) = 0.0_CUSTOM_REAL + endif - ! re-sets global time-step to be coarsest level - !DT = deltat_lts + ! user output + call synchronize_all() + if (myrank == 0) then + write(IMAIN,*) ' original time step : ',sngl(DT) + write(IMAIN,*) ' number of time steps : ',NSTEP + write(IMAIN,*) ' total duration : ',sngl(NSTEP * DT) + write(IMAIN,*) + call flush_IMAIN() + endif + + ! re-sets simulation NSTEP + NSTEP = ceiling( duration / deltat_lts ) + + ! re-sets simulation global time step to be coarsest-level time step + DT = deltat_lts ! user output call synchronize_all() if (myrank == 0) then - write(IMAIN,*) ' final lts time step: ',sngl(DT) - write(IMAIN,*) ' new number of time steps: ',NSTEP - write(IMAIN,*) ' local time steps: ',NSTEP_LOCAL - write(IMAIN,*) ' new duration: ',sngl(NSTEP * DT) + write(IMAIN,*) ' new LTS time step : ',sngl(DT) + write(IMAIN,*) ' number of time steps : ',NSTEP + write(IMAIN,*) ' total duration : ',sngl(NSTEP * DT) + write(IMAIN,*) ' number of local time steps: ',NSTEP_LOCAL + write(IMAIN,*) + write(IMAIN,*) ' theoretical speed-up value: ',sngl(lts_speedup),'(without boundary contributions)' + write(IMAIN,*) ' theoretical speed-up value: ',sngl(lts_speedup_with_boundary),'(with boundary contributions)' write(IMAIN,*) call flush_IMAIN() endif + call synchronize_all() ! user output call synchronize_all() @@ -537,6 +533,122 @@ subroutine lts_read_databases() end subroutine lts_read_databases +! +!-------------------------------------------------------------------------------------------- +! + + subroutine lts_get_theoretical_speedup(lts_speedup,lts_speedup_with_boundary) + + use constants, only: IMAIN,myrank + use shared_parameters, only: NPROC + use specfem_par, only: NSPEC_AB + + use specfem_par_lts + + implicit none + + double precision, intent(out) :: lts_speedup,lts_speedup_with_boundary + + ! local parameters + double precision :: total_work,total_work_lts,total_work_lts_b + integer,dimension(:),allocatable :: num_ispec_level + integer :: ispec,p,ilevel,inum,ier + + ! theoretical speed-up values (valid only for all elements belonging to same domain type) + total_work = 0.0 + total_work_lts = 0.0 + + ! counts number of elements for each level + allocate(num_ispec_level(num_p_level),stat=ier) + if (ier /= 0) call exit_MPI(myrank,'Error allocating array num_ispec_level') + num_ispec_level(:) = 0 + + do ilevel = 1,num_p_level + p = p_level(ilevel) + num_ispec_level(ilevel) = count(ispec_p_refine(:) == p) + enddo + + ! note: some partitions might not contain any element within a certain p-level + ! collect on master + if (NPROC > 1) then + do ilevel = 1,num_p_level + call sum_all_all_i(num_ispec_level(ilevel),inum) + num_ispec_level(ilevel) = inum + enddo + endif + if (minval(num_ispec_level(:)) == 0) call exit_MPI(myrank,'Error p level without element found') + + ! user output + if (myrank == 0) then + write(IMAIN,*) ' p-level number of elements: ',num_ispec_level(:) + call flush_IMAIN() + endif + + ! computes total work + if (myrank == 0) then + ! "pure" speed-up, without taking account of additional coarse/fine boundary contributions + do ilevel = 1,num_p_level + p = p_level(ilevel) + total_work_lts = total_work_lts + num_ispec_level(ilevel) * p + enddo + ! work without lts + total_work = sum( num_ispec_level(:)) * maxval(p_level(:)) + endif + + ! counts boundary elements + num_ispec_level(:) = 0 + do ilevel = 1,num_p_level + do ispec = 1,NSPEC_AB + if (boundary_elem(ispec,ilevel)) then + num_ispec_level(ilevel) = num_ispec_level(ilevel) + 1 + endif + enddo + enddo + + ! note: some partitions might not contain any element within a certain p-level + ! collect on master + if (NPROC > 1 ) then + do ilevel = 1,num_p_level + call sum_all_all_i(num_ispec_level(ilevel),inum) + num_ispec_level(ilevel) = inum + enddo + endif + + ! user output + if (myrank == 0) then + write(IMAIN,*) ' p-level boundary elements : ',num_ispec_level(:) + write(IMAIN,*) + call flush_IMAIN() + endif + + ! work with additional coarse/fine boundary contributions + total_work_lts_b = total_work_lts + if (myrank == 0) then + do ilevel = 1,num_p_level + p = p_level(ilevel) + total_work_lts_b = total_work_lts_b + num_ispec_level(ilevel) * p + enddo + endif + + ! speedup factors + if (total_work_lts /= 0.d0) then + lts_speedup = total_work / total_work_lts + else + lts_speedup = 0.d0 + endif + + ! speedup factor w/ boundary work + if (total_work_lts_b /= 0.d0) then + lts_speedup_with_boundary = total_work / total_work_lts_b + else + lts_speedup_with_boundary = 0.d0 + endif + + ! free temporary array + deallocate(num_ispec_level) + + end subroutine lts_get_theoretical_speedup + ! !-------------------------------------------------------------------------------------------- ! @@ -557,10 +669,13 @@ subroutine lts_setup_level_boundary() use specfem_par_lts, only: & p_level_ilevel_map, num_p_level, boundary_elem, iglob_p_refine, & - boundary_ispec, ispec_counter, ibool_from, ibool_counter, ilevel_from, & + num_p_level_boundary_ispec,num_p_level_boundary_nodes, p_level_boundary_node, & + p_level_boundary_ispec, & + p_level_boundary_ilevel_from, & num_p_level_coarser_to_update, p_level_coarser_to_update, & - num_interface_p_refine_ibool, interface_p_refine_ibool, p_level_iglob_end, & - p_level,p_level_m_loops + num_interface_p_refine_ibool, interface_p_refine_ibool, & + p_level_iglob_start, p_level_iglob_end, & + p_level, p_level_m_loops, current_lts_boundary_elem implicit none @@ -573,12 +688,10 @@ subroutine lts_setup_level_boundary() integer :: ilevel, iphase, ispec_p, ispec, num_elements, iglob integer :: jlevel, iglob_n, ipoin, iinterface, ipoin_n, ier, ipoin_old integer :: p,m - - logical, dimension(:), allocatable :: duplicate_check - - integer :: i,j,k,icounter + integer :: i,j,k,icounter,inum_poin,inum_spec integer :: coarser_counter, coarser_counter_new, num_ipoin_coarser_neighbor - + integer :: is,ie + integer :: p_node integer, dimension(:), allocatable :: interface_p_refine_ipoin integer, dimension(:), allocatable :: ipoin_coarser integer, dimension(:), allocatable :: recv_interface_p_refine_ipoin @@ -590,6 +703,8 @@ subroutine lts_setup_level_boundary() integer, dimension(:,:), allocatable :: tmp_p_level_coarser_nodes integer, dimension(:), allocatable :: num_p_level_coarser_nodes + logical, dimension(:), allocatable :: mask_ibool + ! timing logical, parameter :: DEBUG_TIMING = .false. double precision, external :: wtime @@ -604,7 +719,6 @@ subroutine lts_setup_level_boundary() ! user output if (myrank == 0) then write(IMAIN,*) ' p-level boundary setup:' - write(IMAIN,*) call flush_IMAIN() endif call synchronize_all() @@ -612,29 +726,30 @@ subroutine lts_setup_level_boundary() ! get MPI starting time if (DEBUG_TIMING) time_start = wtime() - ! temporary arrays - allocate(boundary_ispec(NSPEC_AB,2,num_p_level),stat=ier) - if (ier /= 0) stop 'Error allocating arrays boundary_ispec,...' - boundary_ispec(:,:,:) = 0 + ! boundary arrays + allocate(p_level_boundary_ispec(NSPEC_AB,2,num_p_level),stat=ier) + if (ier /= 0) stop 'Error allocating arrays p_level_boundary_ispec,...' + p_level_boundary_ispec(:,:,:) = 0 - allocate(ibool_from(NGLOB_AB,2,num_p_level),stat=ier) - if (ier /= 0) stop 'Error allocating arrays ibool_from,...' - ibool_from(:,:,:) = 0 + allocate(p_level_boundary_node(NGLOB_AB,2,num_p_level),stat=ier) + if (ier /= 0) stop 'Error allocating arrays p_level_boundary_node,...' + p_level_boundary_node(:,:,:) = 0 - allocate(ilevel_from(NGLOB_AB,2,num_p_level),stat=ier) - if (ier /= 0) stop 'Error allocating arrays ilevel_from,...' - ilevel_from(:,:,:) = 0 + allocate(p_level_boundary_ilevel_from(NGLOB_AB,2,num_p_level),stat=ier) + if (ier /= 0) stop 'Error allocating arrays p_level_boundary_ilevel_from,...' + p_level_boundary_ilevel_from(:,:,:) = 0 - allocate(duplicate_check(NGLOB_AB),stat=ier) - if (ier /= 0) stop 'Error allocating arrays duplicate_check,...' - duplicate_check(:) = .true. + ! counters + allocate(num_p_level_boundary_nodes(2,num_p_level), & + num_p_level_boundary_ispec(2,num_p_level),stat=ier) + if (ier /= 0) stop 'Error allocating arrays num_p_level_boundary_nodes,...' + num_p_level_boundary_nodes(:,:) = 0 + num_p_level_boundary_ispec(:,:) = 0 - ! counters needed for GPU mode - allocate(ibool_counter(2,num_p_level), & - ispec_counter(2,num_p_level),stat=ier) - if (ier /= 0) stop 'Error allocating arrays ibool_counter,...' - ibool_counter(:,:) = 0 - ispec_counter(:,:) = 0 + ! temporary arrays + allocate(mask_ibool(NGLOB_AB),stat=ier) + if (ier /= 0) stop 'Error allocating arrays mask_ibool' + mask_ibool(:) = .false. ! temporary arrays for nodes in coarser p-levels allocate(p_level_coarser_nodes(NGLOB_AB,num_p_level),stat=ier) @@ -647,7 +762,7 @@ subroutine lts_setup_level_boundary() ! user output if (myrank == 0) then - write(IMAIN,*) " determining coarser p-level nodes" + write(IMAIN,*) " determining coarser p-level nodes" call flush_IMAIN() endif call synchronize_all() @@ -658,10 +773,13 @@ subroutine lts_setup_level_boundary() do ilevel = 1,num_p_level + ! current boundary elements + current_lts_boundary_elem => boundary_elem(:,ilevel) + do iphase = 1,2 - ibool_counter(iphase,ilevel) = 0 - ispec_counter(iphase,ilevel) = 0 + num_p_level_boundary_nodes(iphase,ilevel) = 0 + num_p_level_boundary_ispec(iphase,ilevel) = 0 ! choses inner/outer elements if (iphase == 1) then @@ -670,29 +788,47 @@ subroutine lts_setup_level_boundary() num_elements = nspec_inner_elastic endif + ! counters + inum_spec = 0 + inum_poin = 0 + mask_ibool(:) = .false. + do ispec_p = 1,num_elements ispec = phase_ispec_inner_elastic(ispec_p,iphase) ! only elements belonging to a p-level boundary - if (boundary_elem(ispec,ilevel) .eqv. .true.) then - ispec_counter(iphase,ilevel) = ispec_counter(iphase,ilevel) + 1 - boundary_ispec(ispec_counter(iphase,ilevel),iphase,ilevel) = ispec + if (current_lts_boundary_elem(ispec)) then + ! boundary elements + inum_spec = inum_spec + 1 + + num_p_level_boundary_ispec(iphase,ilevel) = inum_spec + p_level_boundary_ispec(inum_spec,iphase,ilevel) = ispec do k = 1,NGLLZ do j = 1,NGLLY do i = 1,NGLLX iglob = ibool(i,j,k,ispec) - p = iglob_p_refine(iglob) - ibool_counter(iphase,ilevel) = ibool_counter(iphase,ilevel) + 1 - ibool_from(ibool_counter(iphase,ilevel),iphase,ilevel) = iglob - ilevel_from(ibool_counter(iphase,ilevel),iphase,ilevel) = p_level_ilevel_map(p) + ! associated p-level value + p_node = iglob_p_refine(iglob) + + ! add global point only once + if (.not. mask_ibool(iglob)) then + mask_ibool(iglob) = .true. + ! counter + inum_poin = inum_poin + 1 - ! adds node - if (p_level_ilevel_map(p) /= ilevel) then - num_p_level_coarser_nodes(ilevel) = num_p_level_coarser_nodes(ilevel) + 1 - p_level_coarser_nodes(num_p_level_coarser_nodes(ilevel),ilevel) = iglob + num_p_level_boundary_nodes(iphase,ilevel) = inum_poin + p_level_boundary_node(inum_poin,iphase,ilevel) = iglob + + p_level_boundary_ilevel_from(inum_poin,iphase,ilevel) = p_level_ilevel_map(p_node) + + ! adds node to coarser nodes + if (p_level_ilevel_map(p_node) /= ilevel) then + num_p_level_coarser_nodes(ilevel) = num_p_level_coarser_nodes(ilevel) + 1 + p_level_coarser_nodes(num_p_level_coarser_nodes(ilevel),ilevel) = iglob + endif endif enddo enddo @@ -704,7 +840,7 @@ subroutine lts_setup_level_boundary() ! gets maximum number of coarser nodes if (myrank == 0) then - write(IMAIN,*) " maximum coarser nodes: ",maxval(num_p_level_coarser_nodes(:)) + write(IMAIN,*) " maximum coarser nodes: ",maxval(num_p_level_coarser_nodes(:)) call flush_IMAIN() endif call synchronize_all() @@ -736,15 +872,15 @@ subroutine lts_setup_level_boundary() ! loops over finer p-levels do ilevel = 1,(num_p_level-1) ! checks if nodes have been taken multiple times from coarser p-boundary already - duplicate_check(:) = .true. + mask_ibool(:) = .false. ! loops over all levels up to current one do jlevel = 1,ilevel do iglob_n = 1,num_p_level_coarser_nodes(jlevel) ! checks if node belongs to a coarser level iglob = tmp_p_level_coarser_nodes(iglob_n,jlevel) - p = iglob_p_refine(iglob) + p_node = iglob_p_refine(iglob) - if (p_level_ilevel_map(p) > ilevel .and. duplicate_check(iglob)) then + if (p_level_ilevel_map(p_node) > ilevel .and. (.not. mask_ibool(iglob))) then ! adds node icounter = num_p_level_coarser_to_update(ilevel) icounter = icounter + 1 @@ -752,7 +888,7 @@ subroutine lts_setup_level_boundary() num_p_level_coarser_to_update(ilevel) = icounter p_level_coarser_to_update(icounter,ilevel) = iglob - duplicate_check(iglob) = .false. + mask_ibool(iglob) = .true. endif enddo enddo @@ -761,13 +897,13 @@ subroutine lts_setup_level_boundary() ! frees temporary arrays deallocate(tmp_p_level_coarser_nodes) deallocate(num_p_level_coarser_nodes) - deallocate(duplicate_check) + deallocate(mask_ibool) ! user output if (DEBUG_TIMING) then if (myrank == 0 ) then tCPU = wtime() - time_start - write(IMAIN,*) " time in seconds = ", tCPU + write(IMAIN,*) " time in seconds = ", tCPU call flush_IMAIN() endif time_start = wtime() @@ -775,7 +911,7 @@ subroutine lts_setup_level_boundary() ! user output if (myrank == 0) then - write(IMAIN,*) " building MPI arrays" + write(IMAIN,*) " building MPI arrays" call flush_IMAIN() endif call synchronize_all() @@ -801,14 +937,14 @@ subroutine lts_setup_level_boundary() if (TEST_ERROR) then ! user output if (myrank == 0) then - write(IMAIN,*) " testing LTS arrays" + write(IMAIN,*) " testing LTS initial MPI arrays" call flush_IMAIN() endif call synchronize_all() ! user output if (myrank == 0) then - write(IMAIN,*) " checking initial interface locations" + write(IMAIN,*) " checking initial interface locations" call flush_IMAIN() endif @@ -876,7 +1012,7 @@ subroutine lts_setup_level_boundary() ! user output if (myrank == 0) then - write(IMAIN,*) " test result okay" + write(IMAIN,*) " test result okay" call flush_IMAIN() endif call synchronize_all() @@ -893,7 +1029,7 @@ subroutine lts_setup_level_boundary() do ilevel = 1,num_p_level-1 ! user output if (myrank == 0 ) then - write(IMAIN,*) " MPI interfaces for finer p-levels: level = ",ilevel," p-value = ",p_level(ilevel) + write(IMAIN,*) " MPI interfaces for finer p-levels: level = ",ilevel," p-value = ",p_level(ilevel) call flush_IMAIN() endif if (DEBUG_TIMING) time_start = wtime() @@ -1041,7 +1177,7 @@ subroutine lts_setup_level_boundary() " out of ", num_interfaces_ext_mesh,"interfaces" if (DEBUG_TIMING) then tCPU = wtime() - time_start - write(IMAIN,*) " time in seconds = ", tCPU,"s" + write(IMAIN,*) " time in seconds = ", tCPU,"s" endif ! flushes file buffer for main output file (IMAIN) call flush_IMAIN() @@ -1055,18 +1191,34 @@ subroutine lts_setup_level_boundary() ! frees temporary array deallocate(interface_p_refine_ipoin) + ! checks p_level_coarser_to_update + do ilevel = 1,num_p_level + ! gets start index of finest level and end index of current p-level + is = p_level_iglob_start(1) + ie = p_level_iglob_end(ilevel) + if (ilevel < num_p_level) then + ! considers contributions from coarser to finer p-levels + do iglob_n = 1,num_p_level_coarser_to_update(ilevel) + iglob = p_level_coarser_to_update(iglob_n,ilevel) + ! checks + if (iglob < ie) call exit_mpi(myrank,"ASSERT: coarser iglob should start in next coarser level") + if (iglob > NGLOB_AB) call exit_mpi(myrank,"ASSERT: coarser iglob index is out of bounds!") + enddo + endif + enddo + ! tests MPI interface arrays if (TEST_ERROR) then ! user output if (myrank == 0) then - write(IMAIN,*) " testing LTS arrays" + write(IMAIN,*) " testing LTS MPI arrays" call flush_IMAIN() endif call synchronize_all() ! user output if (myrank == 0) then - write(IMAIN,*) " checking iglob index range" + write(IMAIN,*) " checking iglob index range" call flush_IMAIN() endif @@ -1095,7 +1247,7 @@ subroutine lts_setup_level_boundary() ! user output if (myrank == 0) then - write(IMAIN,*) " checking interface locations" + write(IMAIN,*) " checking interface locations" call flush_IMAIN() endif call synchronize_all() @@ -1162,11 +1314,11 @@ subroutine lts_setup_level_boundary() ! user output if (myrank == 0) then + write(IMAIN,*) " test result okay" if (DEBUG_TIMING) then tCPU = wtime() - time_start - write(IMAIN,*) " time in seconds = ", tCPU + write(IMAIN,*) " time in seconds = ", tCPU endif - write(IMAIN,*) " test result okay" call flush_IMAIN() endif call synchronize_all() @@ -1193,6 +1345,51 @@ subroutine lts_setup_level_boundary() p_level_m_loops(m) = p_level(m)/p_level(m+1) enddo + ! checks boundary node p-values + do ilevel = 1,num_p_level + ! current boundary elements + current_lts_boundary_elem => boundary_elem(:,ilevel) + + ! p (dt/p) refinement number in this specified p-level + p = p_level(ilevel) + + ! inner/outer elements + do iphase = 1,2 + ! choses inner/outer elements + if (iphase == 1) then + num_elements = nspec_outer_elastic + else + num_elements = nspec_inner_elastic + endif + + do ispec_p = 1,num_elements + ! returns element id from stored element list + ispec = phase_ispec_inner_elastic(ispec_p,iphase) + + ! only elements belonging to p-level boundary + if (.not. current_lts_boundary_elem(ispec)) cycle + + ! checks element nodes + do k = 1,NGLLZ + do j = 1,NGLLY + do i = 1,NGLLX + iglob = ibool(i,j,k,ispec) + ! checks if node belongs to this or coarser p-level + p_node = iglob_p_refine(iglob) + if (p_node > p) then + ! coarser p-levels have smaller p values, finer p-levels have larger p values, + ! such that local time step delta_lts==DT/p + print *,'Error: boundary node p value is invalid: ',p_node,' on level',ilevel,'p',p + print *,' iglob ',iglob,'i/j/k/ispec',i,j,k,ispec + call exit_mpi(myrank,"Error: invalid p-value node; Assert(boundary nodes are this level or coarser only)") + endif + enddo + enddo + enddo + enddo ! ispec + enddo ! iphase + enddo ! ilevel + ! computes true cost of MPI in terms of total elements sent mpi_cost = 0 call lts_mpi_cost(num_p_level,mpi_cost) @@ -1206,9 +1403,9 @@ subroutine lts_setup_level_boundary() ! user output if (myrank == 0) then - write(IMAIN,*) " Communication cost: sum(mpi_cost)=",sum(mpi_cost_gather), & - " avg(mpi_cost)=",sngl(sum(mpi_cost_gather)/dble(NPROC)) - write(IMAIN,*) " p-level boundary setup done" + write(IMAIN,*) " Communication cost: sum(mpi_cost) = ",sum(mpi_cost_gather) + write(IMAIN,*) " avg(mpi_cost per proc) = ",sngl(sum(mpi_cost_gather)/dble(NPROC)) + write(IMAIN,*) " p-level boundary setup done" write(IMAIN,*) call flush_IMAIN() endif @@ -1217,9 +1414,6 @@ subroutine lts_setup_level_boundary() ! frees temporary arrays deallocate(mpi_cost_gather) deallocate(ipoin_coarser) - deallocate(boundary_ispec) - deallocate(ibool_from) - deallocate(ilevel_from) contains @@ -1372,17 +1566,10 @@ subroutine lts_prepare_mass_matrices() rmassxyz_mod(2,:) = rmass(:) + rmassy(:) rmassxyz_mod(3,:) = rmass(:) + rmassz(:) - !#TODO: LTS reset mass matrix in Stacey case ! re-sets mass matrices without contributions - !rmassx(:) = rmass(:) - !rmassy(:) = rmass(:) - !rmassz(:) = rmass(:) - ! no LTS case - same result as original prepare_timerun_mass_matrices() routine - ! where final rmassx,.. contains both contributions from rmass + initial rmassx, .. - ! to be taken out and replaces with above outcommented lines when LTS is fully implemented: - rmassx(:) = rmass(:) + rmassx(:) - rmassy(:) = rmass(:) + rmassy(:) - rmassz(:) = rmass(:) + rmassz(:) + rmassx(:) = rmass(:) + rmassy(:) = rmass(:) + rmassz(:) = rmass(:) else ! no absorbing boundary contributions @@ -1648,7 +1835,8 @@ subroutine lts_prepare_gpu() ! call transfer_element_list_to_device(Mesh_pointer,element_list,num_element_list) ! ! ! transfer ibool and ilevel from CPU to GPU -! call transfer_boundary_element_list_to_device(Mesh_pointer,ibool_from,ilevel_from,boundary_ispec) +! call transfer_boundary_element_list_to_device(Mesh_pointer,p_level_boundary_node, & +! p_level_boundary_ilevel_from,p_level_boundary_ispec) ! ! ! sends list for coarser nodes to GPU ! call setup_r_boundaries_time_stepping(Mesh_pointer,p_level_coarser_to_update) diff --git a/src/specfem3D/noise_tomography.f90 b/src/specfem3D/noise_tomography.f90 index eb2206e20..0c803d9a1 100644 --- a/src/specfem3D/noise_tomography.f90 +++ b/src/specfem3D/noise_tomography.f90 @@ -709,9 +709,9 @@ end subroutine compute_arrays_source_noise ! step 1: calculate the "ensemble forward source" ! add noise spectrum to the location of main receiver subroutine add_source_main_rec_noise(nrec,NSTEP,accel,noise_sourcearray, & - ibool,islice_selected_rec,ispec_selected_rec, & - it,irec_main_noise, & - nspec,nglob) + ibool,islice_selected_rec,ispec_selected_rec, & + it,irec_main_noise, & + nspec,nglob) use constants, only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,NDIM,myrank diff --git a/src/specfem3D/read_mesh_databases_hdf5.F90 b/src/specfem3D/read_mesh_databases_hdf5.F90 index 939a29414..0f16f3622 100644 --- a/src/specfem3D/read_mesh_databases_hdf5.F90 +++ b/src/specfem3D/read_mesh_databases_hdf5.F90 @@ -1729,7 +1729,7 @@ subroutine read_mesh_databases_moho_hdf5() implicit none - !#TODO: hdf5 moho version not implemented yet + !#TODO: HDF5 moho version not implemented yet stop 'HDF5_ENABLED version of routine read_mesh_databases_moho_hdf5() not implemented yet' end subroutine read_mesh_databases_moho_hdf5 diff --git a/src/specfem3D/rules.mk b/src/specfem3D/rules.mk index 3270a48a2..76595154a 100644 --- a/src/specfem3D/rules.mk +++ b/src/specfem3D/rules.mk @@ -98,6 +98,10 @@ specfem3D_OBJECTS = \ $O/locate_point.spec.o \ $O/locate_receivers.spec.o \ $O/locate_source.spec.o \ + $O/lts_assemble_MPI_vector.spec.o \ + $O/lts_global_step.spec.o \ + $O/lts_iterate_time.spec.o \ + $O/lts_newmark_update.spec.o \ $O/lts_setup.spec.o \ $O/make_gravity.spec.o \ $O/noise_tomography.spec.o \ @@ -336,6 +340,7 @@ $O/fault_solver_dynamic.spec.o: $O/fault_solver_common.spec.o $O/fault_solver_kinematic.spec.o: $O/fault_solver_common.spec.o $O/compute_forces_viscoelastic.spec.o: $O/fault_solver_dynamic.spec.o $O/compute_forces_viscoelastic_calling_routine.spec.o: $O/fault_solver_dynamic.spec.o $O/fault_solver_kinematic.spec.o +$O/finalize_simulation.spec.o: $O/fault_solver_dynamic.spec.o $O/fault_solver_kinematic.spec.o $O/prepare_timerun.spec.o: $O/fault_solver_dynamic.spec.o $O/fault_solver_kinematic.spec.o $O/prepare_gpu.spec.o: $O/fault_solver_dynamic.spec.o $O/fault_solver_kinematic.spec.o @@ -368,6 +373,10 @@ $O/write_movie_output.spec.o: $O/hdf5_io_server.spec_hdf5.o $O/write_movie_output_HDF5.spec_hdf5.o: $O/hdf5_io_server.spec_hdf5.o $O/write_output_HDF5.spec_hdf5.o: $O/hdf5_io_server.spec_hdf5.o +## LTS +$O/lts_iterate_time.spec.o: $O/gravity_perturbation.spec.o $O/hdf5_io_server.spec_hdf5.o +$O/lts_global_step.spec.o: $O/fault_solver_dynamic.spec.o $O/fault_solver_kinematic.spec.o + #### #### rule to build each .o file below #### diff --git a/src/specfem3D/setup_sources_receivers.f90 b/src/specfem3D/setup_sources_receivers.f90 index 81e30e458..6769ca8cb 100644 --- a/src/specfem3D/setup_sources_receivers.f90 +++ b/src/specfem3D/setup_sources_receivers.f90 @@ -372,8 +372,10 @@ subroutine get_run_number_of_the_source() write(IMAIN,*) ! warning if (NSOURCES /= NB_RUNS_ACOUSTIC_GPU) then + write(IMAIN,*) ' ***' write(IMAIN,*) ' *** WARNING: number of sources ',NSOURCES, & ' does not match number of runs ',NB_RUNS_ACOUSTIC_GPU,' ***' + write(IMAIN,*) ' ***' write(IMAIN,*) endif call flush_IMAIN() @@ -1655,7 +1657,11 @@ subroutine setup_receivers_precompute_intp() enddo ! warning if (has_receiver_in_elastic_domain) then - print *, "Warning: Pressure seismogram output for receivers in elastic domains is valid only for non-attenuation case" + write(IMAIN,*) ' ***' + write(IMAIN,*) ' *** Warning: Pressure seismogram output for receivers in elastic domains& + &is valid only for non-attenuation case ***' + write(IMAIN,*) ' ***' + call flush_IMAIN() endif endif diff --git a/src/specfem3D/specfem3D_par.F90 b/src/specfem3D/specfem3D_par.F90 index b12c3dcfe..d39425574 100644 --- a/src/specfem3D/specfem3D_par.F90 +++ b/src/specfem3D/specfem3D_par.F90 @@ -782,12 +782,14 @@ module specfem_par_lts implicit none + ! current lts time + double precision :: current_lts_time + ! suggested coarsest time step for LTS (largest multiple p of smallest time step) double precision :: deltat_lts_suggested ! LTS intermediate arrays, one NGLOB_AB*3 per level - real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: displ_p,veloc_p,accel_p - real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: displ_ref,veloc_ref,accel_ref + real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: displ_p,veloc_p ! p-refinement level arrays integer :: num_p_level @@ -805,33 +807,43 @@ module specfem_par_lts integer, dimension(:), allocatable :: ispec_p_refine integer, dimension(:,:), allocatable :: interface_p_refine_all + ! lts call type indicator for compute forces: true == p-element call, false == boundary-element call + logical :: lts_type_compute_pelem + ! p_elem(ispec,p_level) = (p == ispec_p_refine(ispec,p)) - logical, dimension(:,:), allocatable :: p_elem + logical, dimension(:,:), allocatable, target :: p_elem + + ! element in current p-level (points to p_elem(:,ilevel) + logical, dimension(:), pointer :: current_lts_elem => null() ! boundary_elem = (p_elem(ispec,p_level) == .true.) .and. (some element nodes are in different level) ! Note: p-levels are fine-greedy. Finer levels take an element - ! for themselves when sharing element-boundary-nodes. - logical, dimension(:,:), allocatable :: boundary_elem + ! for themselves when sharing element-boundary-nodes. + logical, dimension(:,:), allocatable, target :: boundary_elem + + ! element in current boundary_elem (points to boundary_elem(:,ilevel) + logical, dimension(:), pointer :: current_lts_boundary_elem => null() ! dofs are grouped by p-level for efficiency of time-stepping vector additions. integer, dimension(:), allocatable :: p_level_iglob_start integer, dimension(:), allocatable :: p_level_iglob_end - ! Q-R; Coarse region minus halo from fine region - integer, dimension(:), allocatable :: p_level_iglob_inner_end - ! boundary counters/maps for GPU_MODE - integer, dimension(:,:,:), allocatable :: boundary_ispec - integer, dimension(:,:,:), allocatable :: ibool_from - integer, dimension(:,:,:), allocatable :: ilevel_from - integer, dimension(:,:), allocatable :: ibool_counter - integer, dimension(:,:), allocatable :: ispec_counter + ! Q-R; Coarse region minus halo from fine region + !integer, dimension(:), allocatable :: p_level_iglob_inner_end - integer :: it_local,NSTEP_LOCAL + integer :: lts_it_local + integer :: NSTEP_LOCAL ! boundary element nodes -- used to update the degrees of freedom on a p-level boundary ! equivalent to R and R* - integer, dimension(:), allocatable :: num_p_level_boundary_nodes - integer, dimension(:,:), allocatable :: p_level_boundary_node + ! boundary counters/maps + integer, dimension(:,:), allocatable :: num_p_level_boundary_nodes + integer, dimension(:,:), allocatable :: num_p_level_boundary_ispec + + integer, dimension(:,:,:), allocatable :: p_level_boundary_ispec + integer, dimension(:,:,:), allocatable :: p_level_boundary_node + integer, dimension(:,:,:), allocatable :: p_level_boundary_ilevel_from + integer, dimension(:), allocatable :: p_level_ilevel_map integer, dimension(:), allocatable :: p_level_m_loops @@ -848,9 +860,13 @@ module specfem_par_lts integer, dimension(:,:), allocatable :: interface_p_refine_boundary integer :: max_nibool_interfaces_boundary - ! adaptive error - real(kind=CUSTOM_REAL), dimension(:,:),allocatable :: accel_adap_old - real(kind=CUSTOM_REAL) :: relative_error,error_norm + ! reference solution wavefields for debugging + real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: displ_ref,veloc_ref,accel_ref + ! global step reference element flags + logical,dimension(:), allocatable, target :: p_elem_ref,boundary_elem_ref + + ! collected acceleration wavefield + real(kind=CUSTOM_REAL), dimension(:,:),allocatable :: accel_collected ! for stacey absorbing boundary conditions real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: cmassxyz, rmassxyz diff --git a/src/specfem3D/write_movie_output_HDF5.F90 b/src/specfem3D/write_movie_output_HDF5.F90 index 87548c0ef..a2ac4db43 100644 --- a/src/specfem3D/write_movie_output_HDF5.F90 +++ b/src/specfem3D/write_movie_output_HDF5.F90 @@ -725,10 +725,7 @@ subroutine write_xdmf_surface_body(it_io, num_nodes) ! checks if anything do, only main process writes out xdmf file if (myrank /= 0) return - !#TODO: hdf5 i/o server - ! redefinition for no ioserver case - !if (fname_xdmf_surf == '' ) fname_xdmf_surf = trim(OUTPUT_FILES)//"/movie_surface.xmf" - + ! append data section to xdmf file for surface movie fname_xdmf_surf = trim(OUTPUT_FILES)//"/movie_surface.xmf" fname_h5_data_surf_xdmf = "./movie_surface.h5" ! relative to movie_surface.xmf file ! this seems to point to a wrong directory: diff --git a/tests/decompose_mesh/test_partitioning.f90 b/tests/decompose_mesh/test_partitioning.f90 index 1cdc97012..434609825 100644 --- a/tests/decompose_mesh/test_partitioning.f90 +++ b/tests/decompose_mesh/test_partitioning.f90 @@ -6,6 +6,7 @@ program test_partitioning use constants, only: NDIM use decompose_mesh_par + use fault_scotch, only: ANY_FAULT implicit none integer :: i,ispec,inode,ier diff --git a/tests/decompose_mesh/test_partitioning.makefile b/tests/decompose_mesh/test_partitioning.makefile index 11f134b9c..1fce64755 100644 --- a/tests/decompose_mesh/test_partitioning.makefile +++ b/tests/decompose_mesh/test_partitioning.makefile @@ -22,11 +22,13 @@ OBJECTS = \ $O/wrap_patoh.o \ $O/wrap_metis.o \ $O/param_reader.cc.o \ - $O/exit_mpi.shared.o \ - $O/shared_par.shared_module.o \ $O/count_number_of_sources.shared.o \ + $O/exit_mpi.shared.o \ + $O/heap_sort.shared.o \ $O/read_parameter_file.shared.o \ $O/read_value_parameters.shared.o \ + $O/search_kdtree.shared.o \ + $O/shared_par.shared_module.o \ $O/sort_array_coordinates.shared.o \ $O/write_VTK_data.shared.o \ $(EMPTY_MACRO) diff --git a/tests/decompose_mesh/test_read.makefile b/tests/decompose_mesh/test_read.makefile index 25a402f7a..b667a465e 100644 --- a/tests/decompose_mesh/test_read.makefile +++ b/tests/decompose_mesh/test_read.makefile @@ -23,10 +23,12 @@ OBJECTS = \ $O/param_reader.cc.o \ $O/count_number_of_sources.shared.o \ $O/exit_mpi.shared.o \ - $O/shared_par.shared_module.o \ + $O/heap_sort.shared.o \ $O/read_parameter_file.shared.o \ $O/read_value_parameters.shared.o \ - $O/sort_array_coordinates.shared.o \ + $O/search_kdtree.shared.o \ + $O/shared_par.shared_module.o \ + $O/sort_array_coordinates.shared.o \ $O/write_VTK_data.shared.o \ $(EMPTY_MACRO) diff --git a/tests/decompose_mesh/test_valence.makefile b/tests/decompose_mesh/test_valence.makefile index 5051a5d1b..3a03ad267 100644 --- a/tests/decompose_mesh/test_valence.makefile +++ b/tests/decompose_mesh/test_valence.makefile @@ -22,11 +22,13 @@ OBJECTS = \ $O/wrap_patoh.o \ $O/wrap_metis.o \ $O/param_reader.cc.o \ - $O/exit_mpi.shared.o \ - $O/shared_par.shared_module.o \ $O/count_number_of_sources.shared.o \ + $O/exit_mpi.shared.o \ + $O/heap_sort.shared.o \ $O/read_parameter_file.shared.o \ $O/read_value_parameters.shared.o \ + $O/search_kdtree.shared.o \ + $O/shared_par.shared_module.o \ $O/sort_array_coordinates.shared.o \ $O/write_VTK_data.shared.o \ $(EMPTY_MACRO)