From bcef151f34fc8a031493afd0d21ad09e6fb15247 Mon Sep 17 00:00:00 2001 From: Jeff Candy Date: Thu, 19 Sep 2024 17:36:35 -0700 Subject: [PATCH] Old code --- cgyro/src/cgyro_step_gk_ck.f90 | 249 ++++++++++++++++++--------------- 1 file changed, 136 insertions(+), 113 deletions(-) diff --git a/cgyro/src/cgyro_step_gk_ck.f90 b/cgyro/src/cgyro_step_gk_ck.f90 index 2a5abf915..d5e33805a 100644 --- a/cgyro/src/cgyro_step_gk_ck.f90 +++ b/cgyro/src/cgyro_step_gk_ck.f90 @@ -1,17 +1,15 @@ -! Cash-Karp 6:5(4) adaptive integrator +! Cash-Karp 6:5(4) adaptive integrator | multithreaded version subroutine cgyro_step_gk_ck - use timer_lib use mpi + use timer_lib use cgyro_globals - use cgyro_globals_math use cgyro_io use cgyro_step - + implicit none - ! ! z e vpar z e vperp^2 ! h = H - ----- G0 ( phi - ----- Apar ) + ----- ---------- Gperp Bpar ! T c T omega_a c @@ -23,17 +21,10 @@ subroutine cgyro_step_gk_ck ! phi -> field(1) ! Apar -> field(2) ! Bpar -> field(3) - - logical :: is_first - + tol = error_tol - ! total iteration counter itrk = 0 - - ! good step counter - istep = 0 - conv = 1 scale_x = 0.0 @@ -53,153 +44,189 @@ subroutine cgyro_step_gk_ck delta_t_last = deltah2 call timer_lib_in('str_mem') - call cgyro_vel_copy(h0_old, h_x) - call timer_lib_out('str_mem') - - is_first = .TRUE. - +!$omp parallel workshare + h0_old(:,:) = h_x(:,:) +!$omp end parallel workshare + call timer_lib_out('str_mem') + do while (delta_t_tot < delta_t) - + call timer_lib_in('str') if (delta_t_tot + deltah2 > delta_t) then deltah2 = delta_t-delta_t_tot delta_t_last_step = deltah2 else delta_t_last = deltah2 - deltah2_min = min(deltah2, deltah2_min) - deltah2_max = max(deltah2, deltah2_max) + deltah2_min = min(deltah2,deltah2_min) + deltah2_max = max(deltah2,deltah2_max) endif if (conv == 1) then - call cgyro_vel_copy(h0_x, h_x) +!$omp parallel do collapse(2) + do iv_loc=1,nv_loc + do ic=1,nc + h0_x(ic,iv_loc) = h_x(ic,iv_loc) + enddo + enddo else - call cgyro_vel_copy2(h0_x, h_x, h0_old) +!$omp parallel do collapse(2) + do iv_loc=1,nv_loc + do ic=1,nc + h0_x(ic,iv_loc) = h0_old(ic,iv_loc) + h_x(ic,iv_loc) = h0_old(ic,iv_loc) + enddo + enddo endif call timer_lib_out('str') - - call cgyro_rhs_comm_async_hx - if (is_first) then - ! fields already in good shape in the beginning - call cgyro_rhs(1,.FALSE.) - is_first = .FALSE. - else - call cgyro_field_c(.FALSE.) - call cgyro_rhs(1,.TRUE.) - endif + + call cgyro_field_c + call cgyro_rhs(1) call timer_lib_in('str') - call cgyro_vel_fma2(h_x, h0_x, 0.2d0*deltah2, rhs(:,:,:,1)) +!$omp parallel do collapse(2) + do iv_loc=1,nv_loc + do ic=1,nc + h_x(ic, iv_loc) = h0_x(ic, iv_loc) & + + 0.2d0*deltah2*rhs(ic, iv_loc, 1) + enddo + enddo call timer_lib_out('str') - - call cgyro_rhs_comm_async_hx - call cgyro_field_c(.FALSE.) - call cgyro_rhs(2,.TRUE.) + + call cgyro_field_c + call cgyro_rhs(2) call timer_lib_in('str') - call cgyro_vel_fma3(h_x, & - h0_x, & - 1.0/40.0*deltah2*3.0, rhs(:,:,:,1), & - 1.0/40.0*deltah2*9.0, rhs(:,:,:,2)) +!$omp parallel do collapse(2) + do iv_loc=1,nv_loc + do ic=1,nc + h_x(ic, iv_loc) = h0_x(ic,iv_loc) & + + 1.0/40.0*deltah2*(3.0*rhs(ic, iv_loc, 1) & + + 9.0*rhs(ic, iv_loc, 2)) + enddo + enddo call timer_lib_out('str') - call cgyro_rhs_comm_async_hx - call cgyro_field_c(.FALSE.) - call cgyro_rhs(3,.TRUE.) + call cgyro_field_c + call cgyro_rhs(3) call timer_lib_in('str') - call cgyro_vel_fmaN(3, h_x, & - h0_x, & - (/ deltah2*( 3.d0/10.d0), & - deltah2*(-9.d0/10.d0), & - deltah2*( 6.d0/ 5.d0) /), & - rhs(:,:,:,1:3)) +!$omp parallel do collapse(2) + do iv_loc=1,nv_loc + do ic=1,nc + h_x(ic, iv_loc) = h0_x(ic,iv_loc) & + + deltah2*( 3.d0/10.d0*rhs(ic, iv_loc, 1) & + - 9.d0/10.d0*rhs(ic, iv_loc, 2) & + + 6.d0/5.d0*rhs(ic, iv_loc, 3)) + enddo + enddo call timer_lib_out('str') - - call cgyro_rhs_comm_async_hx - call cgyro_field_c(.FALSE.) - call cgyro_rhs(4,.TRUE.) + + call cgyro_field_c + call cgyro_rhs(4) call timer_lib_in('str') - call cgyro_vel_fmaN(4, h_x, & - h0_x, & - (/ deltah2*(-11.d0/54.d0), & - deltah2*( 5.d0/ 2.d0), & - deltah2*(-70.d0/27.d0), & - deltah2*( 35.d0/27.d0) /), & - rhs(:,:,:,1:4)) +!$omp parallel do collapse(2) + do iv_loc=1,nv_loc + do ic=1,nc + h_x(ic, iv_loc) = h0_x(ic,iv_loc) & + + deltah2*(-11.d0/54.d0 *rhs(ic, iv_loc, 1) & + + 5.d0/2.d0*rhs(ic, iv_loc, 2) & + - 70.d0/27.d0*rhs(ic, iv_loc, 3) & + + 35.d0/27.d0*rhs(ic, iv_loc, 4)) + enddo + enddo call timer_lib_out('str') - - call cgyro_rhs_comm_async_hx - call cgyro_field_c(.FALSE.) - call cgyro_rhs(5,.TRUE.) + + call cgyro_field_c + call cgyro_rhs(5) call timer_lib_in('str') - call cgyro_vel_fmaN(5, h_x, & - h0_x, & - (/ deltah2*( 1631.d0/ 55296.d0), & - deltah2*( 175.d0/ 512.d0), & - deltah2*( 575.d0/ 13824.d0), & - deltah2*(44275.d0/110592.d0), & - deltah2*( 253.d0/ 4096.d0) /), & - rhs(:,:,:,1:5)) +!$omp parallel do collapse(2) + do iv_loc=1,nv_loc + do ic=1,nc + h_x(ic,iv_loc) = h0_x(ic,iv_loc) & + + deltah2*(1631.d0/55296.d0*rhs(ic, iv_loc, 1) & + + 175.d0/512.d0*rhs(ic, iv_loc, 2) & + + 575.d0/13824.d0*rhs(ic, iv_loc, 3) & + + 44275.d0/110592.d0*rhs(ic, iv_loc, 4) & + + 253.d0/4096.d0*rhs(ic, iv_loc, 5)) + enddo + enddo call timer_lib_out('str') + + call cgyro_field_c + call cgyro_rhs(6) - call cgyro_rhs_comm_async_hx - call cgyro_field_c(.FALSE.) - call cgyro_rhs(6,.TRUE.) + !--------- + ! SOLUTION + !--------- - !------------------- - ! SOLUTION and ERROR - !------------------- + call timer_lib_in('str') +!$omp parallel do collapse(2) + do iv_loc=1,nv_loc + do ic=1,nc + h_x(ic, iv_loc) = h0_x(ic,iv_loc) & + + deltah2*( 37.d0/378.d0*rhs(ic, iv_loc, 1) & + + 250.d0/621.d0 * rhs(ic, iv_loc, 3) & + + 125.d0/594.d0*rhs(ic, iv_loc, 4) & + + 512.d0/1771.d0*rhs(ic, iv_loc, 6)) + enddo + enddo + call timer_lib_out('str') + !--------- + ! ERROR + !--------- + call timer_lib_in('str') - ! using a multiplication by 0 in one element is still efffienct, since the matrix element was read for the 2nd equation - call cgyro_vel_solution_werror(4, h_x, & - h0_x, & - deltah2*( 37.d0/ 378.d0), rhs(:,:,:,1), & - (/ deltah2*(250.d0/ 621.d0), & - deltah2*(125.d0/ 594.d0), 0.d0, & - deltah2*(512.d0/1771.d0) /), & - rhs(:,:,:,3:6), & - deltah2*( 37.d0/378.d0- 2825.d0/27648.d0), & - (/ deltah2*( 250.d0/621.d0-18575.d0/48384.d0), & - deltah2*( 125.d0/594.d0-13525.d0/55296.d0), & - deltah2*(-277.d0/14336.d0), & - deltah2*( 512.d0/1771.d0-1.d0/4.d0) /), & - error_hx, error_rhs) +!$omp parallel do collapse(2) + do iv_loc=1,nv_loc + do ic=1,nc + rhs(ic,iv_loc,1) = deltah2*(& + (37.d0/378.d0-2825.d0/27648.d0)*rhs(ic, iv_loc, 1) & + + (250.d0/621.d0-18575.d0/48384.d0)*rhs(ic, iv_loc, 3) & + + (125.d0/594.d0-13525.d0/55296.d0)*rhs(ic, iv_loc, 4) & + + (-277.d0/14336.d0)*rhs(ic, iv_loc, 5) & + + (512.d0/1771.d0-1.d0/4.d0)*rhs(ic, iv_loc, 6)) + enddo + enddo + + error_x(1) = sum(abs(rhs(:,:,1))) + error_x(2) = sum(abs(h_x)) call timer_lib_out('str') call timer_lib_in('str_comm') - error_x(1) = error_rhs - error_x(2) = error_hx - call MPI_ALLREDUCE(error_x,error_sum,2,MPI_DOUBLE_PRECISION,& MPI_SUM,CGYRO_COMM_WORLD,i_err) + call timer_lib_out('str_comm') error_x = error_sum delta_x = error_x(1)+eps + rel_error = error_x(1)/(error_x(2)+eps) var_error = sqrt(total_local_error+rel_error*rel_error) - call timer_lib_out('str_comm') - + if (var_error < tol) then + call cgyro_field_c - istep = istep+1 - deltah2_vec(istep) = deltah2 + delta_t_tot = delta_t_tot+deltah2 + total_local_error = total_local_error+rel_error*rel_error - delta_t_tot = delta_t_tot + deltah2 - total_local_error = total_local_error + rel_error*rel_error - - scale_x = max((tol/delta_x*1.0/delta_t)**0.2, & + scale_x = max((tol/delta_x*1.0/delta_t)**0.2,& (tol/delta_x*1.0/delta_t)**0.25) deltah2 = deltah2*max(1.0,min(6.0,scale_x)) - + local_max_error = max(local_max_error,rel_error) conv = 1 call timer_lib_in('str_mem') - call cgyro_vel_copy(h0_old, h0_x) +!$omp parallel do collapse(2) + do iv_loc=1,nv_loc + do ic=1,nc + h0_old(ic,iv_loc) = h0_x(ic,iv_loc) + enddo + enddo call timer_lib_out('str_mem') else @@ -211,7 +238,7 @@ subroutine cgyro_step_gk_ck deltah2 = max(delta_x_min,deltah2) itrk = itrk+1 - + if (itrk > itrk_max) then call cgyro_error('Cash-Carp step exceeded max iteration count') return @@ -219,9 +246,6 @@ subroutine cgyro_step_gk_ck enddo - ! caller expects the fields to be updated on exit - call cgyro_field_c(.TRUE.) - delta_t_gk = max(delta_t_last,deltah2) if (delta_t_last_step <= small) delta_t_last_step = delta_t_last @@ -235,7 +259,6 @@ subroutine cgyro_step_gk_ck endif delta_t_gk = min(delta_t,delta_t_gk) - total_local_error = var_error - + end subroutine cgyro_step_gk_ck