Skip to content

Commit

Permalink
Revert "Propagate triad changes to GPU code paths"
Browse files Browse the repository at this point in the history
  • Loading branch information
NFPCjiheon authored Jan 10, 2025
1 parent 81e0e1a commit 177ac14
Show file tree
Hide file tree
Showing 11 changed files with 115 additions and 520 deletions.
9 changes: 0 additions & 9 deletions cgyro/src/cgyro_check_memory.F90
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,6 @@ subroutine cgyro_check_memory(datafile)
call cgyro_alloc_add_4d(io,n_field,nc,nv_loc,nt_loc,16,'omega_ss')
call cgyro_alloc_add_3d(io,nc,nv_loc,nt_loc,16,'omega_cap_h')
call cgyro_alloc_add_3d(io,nc,nv_loc,nt_loc,16,'omega_h')
if (triad_print_flag == 1) then
call cgyro_alloc_add_3d(io,nc,nv_loc,nt_loc,16,'diss_r')
endif
call cgyro_alloc_add_3d(io,nc,nv_loc,nt_loc,16,'h_x')
call cgyro_alloc_add_3d(io,nc,nv_loc,nt_loc,16,'h0_x')
call cgyro_alloc_add_3d(io,nc,nv_loc,nt_loc,16,'g_x')
Expand Down Expand Up @@ -136,12 +133,6 @@ subroutine cgyro_check_memory(datafile)
call cgyro_alloc_add_3d(io,n_radial,nt_loc,nsplitA*n_toroidal_procs,16,'fpackA')
call cgyro_alloc_add_3d(io,n_radial,nt_loc,nsplitB*n_toroidal_procs,16,'fpackB')
call cgyro_alloc_add_4d(io,n_field,n_radial,n_jtheta,n_toroidal,16,'gpack')
if (triad_print_flag == 1) then
call cgyro_alloc_add_4d(io,n_radial,nt_loc,nsplitA,n_toroidal_procs,16,'eA_nl')
call cgyro_alloc_add_4d(io,n_radial,nt_loc,nsplitB,n_toroidal_procs,16,'eB_nl')
call cgyro_alloc_add_3d(io,n_radial,nt_loc,nsplitA*n_toroidal_procs,16,'epackA')
call cgyro_alloc_add_3d(io,n_radial,nt_loc,nsplitB*n_toroidal_procs,16,'epackB')
endif
endif

write(io,*)
Expand Down
57 changes: 5 additions & 52 deletions cgyro/src/cgyro_cleanup.F90
Original file line number Diff line number Diff line change
Expand Up @@ -43,33 +43,21 @@ subroutine cgyro_cleanup
ccl_del_device(vel2)
deallocate(vel2)
endif
if(allocated(w_e)) then
ccl_del_device(w_e)
deallocate(w_e)
endif
if(allocated(w_e)) deallocate(w_e)
if(allocated(e_deriv1_mat)) deallocate(e_deriv1_mat)
if(allocated(e_deriv1_rot_mat)) deallocate(e_deriv1_rot_mat)
if(allocated(xi)) then
ccl_del_device(xi)
deallocate(xi)
endif
if(allocated(w_xi)) then
ccl_del_device(w_xi)
deallocate(w_xi)
endif
if(allocated(w_exi)) then
ccl_del_device(w_exi)
deallocate(w_exi)
endif
if(allocated(w_xi)) deallocate(w_xi)
if(allocated(w_exi)) deallocate(w_exi)
if(allocated(xi_lor_mat)) deallocate(xi_lor_mat)
if(allocated(xi_deriv_mat)) deallocate(xi_deriv_mat)

if(allocated(theta)) deallocate(theta)
if(allocated(thetab)) deallocate(thetab)
if(allocated(w_theta)) then
ccl_del_device(w_theta)
deallocate(w_theta)
endif
if(allocated(w_theta)) deallocate(w_theta)
if(allocated(g_theta)) deallocate(g_theta)
if(allocated(g_theta_geo)) deallocate(g_theta_geo)
if(allocated(bmag)) deallocate(bmag)
Expand All @@ -95,10 +83,7 @@ subroutine cgyro_cleanup
if(allocated(lambda_rot)) deallocate(lambda_rot)
if(allocated(dlambda_rot)) deallocate(dlambda_rot)
if(allocated(dens_rot)) deallocate(dens_rot)
if(allocated(dens2_rot)) then
ccl_del_device(dens2_rot)
deallocate(dens2_rot)
endif
if(allocated(dens2_rot)) deallocate(dens2_rot)
if(allocated(dens_ele_rot)) deallocate(dens_ele_rot)
if(allocated(dens_avg_rot)) deallocate(dens_avg_rot)
if(allocated(dlnndr_avg_rot)) deallocate(dlnndr_avg_rot)
Expand Down Expand Up @@ -152,18 +137,6 @@ subroutine cgyro_cleanup
ccl_del_device(source)
deallocate(source)
endif
if(allocated(triad)) then
ccl_del_device(triad)
deallocate(triad)
endif
if(allocated(triad_loc)) then
ccl_del_device(triad_loc)
deallocate(triad_loc)
endif
if(allocated(triad_loc_old)) then
ccl_del_device(triad_loc_old)
deallocate(triad_loc_old)
endif
if(allocated(thfac_itor)) then
ccl_del_device(thfac_itor)
deallocate(thfac_itor)
Expand Down Expand Up @@ -228,10 +201,6 @@ subroutine cgyro_cleanup
ccl_del_device(omega_sbeta)
deallocate(omega_sbeta)
endif
if(allocated(diss_r)) then
ccl_del_device(diss_r)
deallocate(diss_r)
endif
if(allocated(jvec_c)) then
ccl_del_device(jvec_c)
deallocate(jvec_c)
Expand Down Expand Up @@ -292,14 +261,6 @@ subroutine cgyro_cleanup
ccl_del_device(g_nl)
deallocate(g_nl)
endif
if(allocated(eA_nl)) then
ccl_del_device(eA_nl)
deallocate(eA_nl)
endif
if(allocated(eB_nl)) then
ccl_del_device(eB_nl)
deallocate(eB_nl)
endif
if(allocated(fpackA)) then
ccl_del_device(fpackA)
deallocate(fpackA)
Expand All @@ -312,14 +273,6 @@ subroutine cgyro_cleanup
ccl_del_device(gpack)
deallocate(gpack)
endif
if(allocated(epackA)) then
ccl_del_device(epackA)
deallocate(epackA)
endif
if(allocated(epackB)) then
ccl_del_device(epackB)
deallocate(epackB)
endif
if (allocated(cmat)) then
ccl_del_bigdevice(cmat)
deallocate(cmat)
Expand Down
4 changes: 2 additions & 2 deletions cgyro/src/cgyro_equilibrium.F90
Original file line number Diff line number Diff line change
Expand Up @@ -277,9 +277,9 @@ subroutine cgyro_equilibrium
enddo

#if defined(OMPGPU)
!$omp target enter data map(to:xi,omega_stream,w_theta,dens2_rot)
!$omp target enter data map(to:xi,omega_stream)
#elif defined(_OPENACC)
!$acc enter data copyin(xi,omega_stream,w_theta,dens2_rot)
!$acc enter data copyin(xi,omega_stream)
#endif

#if defined(OMPGPU) || defined(_OPENACC)
Expand Down
14 changes: 5 additions & 9 deletions cgyro/src/cgyro_flux.f90
Original file line number Diff line number Diff line change
Expand Up @@ -194,9 +194,8 @@ subroutine cgyro_flux
! 2-1. Compute Triad energy transfer
!-------------------------------------------------------------

if (triad_print_flag == 1) then
kx = 2*pi*rho/length
do is=1,n_species
kx = 2*pi*rho/length
do is=1,n_species
! Triad energy transfer : T_k
triad_loc_old(is,:,itor,1) = triad_loc(is,:,itor,1) *temp(is)/dlntdr(is_ele)
! From Nonzonal Triad energy transfer : T_k [NZ(k',k")->k]
Expand All @@ -215,8 +214,7 @@ subroutine cgyro_flux
triad_loc_old(is,:,itor,7) = triad_loc(is,:,itor,6) *temp(is)/dlntdr(is_ele)
! Diss. (Coll. )
triad_loc_old(is,:,itor,8) = triad_loc(is,:,itor,7) *temp(is)/dlntdr(is_ele)
enddo
endif
enddo


!-----------------------------------------------------
Expand Down Expand Up @@ -281,16 +279,14 @@ subroutine cgyro_flux
NEW_COMM_1, &
i_err)

if (triad_print_flag == 1) then
! Reduced complex triad(ns,kx), below, is still distributed over n
call MPI_ALLREDUCE(triad_loc_old(:,:,:,:), &
! Reduced complex triad(ns,kx), below, is still distributed over n
call MPI_ALLREDUCE(triad_loc_old(:,:,:,:), &
triad, &
size(triad), &
MPI_DOUBLE_COMPLEX, &
MPI_SUM, &
NEW_COMM_1, &
i_err)
endif


tave_step = tave_step + 1
Expand Down
21 changes: 6 additions & 15 deletions cgyro/src/cgyro_init_arrays.F90
Original file line number Diff line number Diff line change
Expand Up @@ -448,14 +448,12 @@ subroutine cgyro_init_arrays
+ abs(omega_rot_drift_r(it,is)) &
+ abs(omega_rot_edrift_r(it)))

if (triad_print_flag == 1) then
! (d/dr) upwind dissipation for triad energy transfer diagnostics
diss_r(ic,iv_loc,itor) = - (n_radial/length)*spectraldiss(u,nup_radial)*up_radial &
* (abs(omega_rdrift(it,is))*energy(ie)*(1.0+xi(ix)**2) &
+ abs(omega_cdrift_r(it,is)*xi(ix))*vel(ie) &
+ abs(omega_rot_drift_r(it,is)) &
+ abs(omega_rot_edrift_r(it)))
endif
! (d/dr) upwind dissipation for triad energy transfer diagnostics
diss_r(ic,iv_loc,itor) = - (n_radial/length)*spectraldiss(u,nup_radial)*up_radial &
* (abs(omega_rdrift(it,is))*energy(ie)*(1.0+xi(ix)**2) &
+ abs(omega_cdrift_r(it,is)*xi(ix))*vel(ie) &
+ abs(omega_rot_drift_r(it,is)) &
+ abs(omega_rot_edrift_r(it)))

! omega_star
carg = &
Expand Down Expand Up @@ -489,13 +487,6 @@ subroutine cgyro_init_arrays
#elif defined(_OPENACC)
!$acc enter data copyin(omega_cap_h,omega_h,omega_s,omega_ss,omega_sbeta)
#endif
if (triad_print_flag == 1) then
#if defined(OMPGPU)
!$omp target enter data map(to:diss_r)
#elif defined(_OPENACC)
!$acc enter data copyin(diss_r)
#endif
endif
!-------------------------------------------------------------------------

deallocate(gdlnndr,gdlntdr)
Expand Down
46 changes: 9 additions & 37 deletions cgyro/src/cgyro_init_manager.F90
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,6 @@ subroutine cgyro_init_manager
enddo
enddo

#if defined(OMPGPU)
!$omp target enter data map(to:w_exi,w_e,w_xi)
#elif defined(_OPENACC)
!$acc enter data copyin(w_exi,w_e,w_xi)
#endif

allocate(theta(n_theta))
allocate(thetab(n_theta,n_radial/box_size))
allocate(w_theta(n_theta))
Expand Down Expand Up @@ -185,6 +179,10 @@ subroutine cgyro_init_manager
allocate( gflux(0:n_global,n_species,4,n_field,nt1:nt2))
allocate(gflux_loc(0:n_global,n_species,4,n_field,nt1:nt2))

allocate( triad(n_species,n_radial,nt1:nt2,8))
allocate(triad_loc(n_species,n_radial,nt1:nt2,7))
allocate(triad_loc_old(n_species,n_radial,nt1:nt2,8))

allocate(cflux_tave(n_species,4))
allocate(gflux_tave(n_species,4))

Expand All @@ -197,16 +195,6 @@ subroutine cgyro_init_manager
#elif defined(_OPENACC)
!$acc enter data create(fcoef,gcoef,field,field_loc,source)
#endif
if (triad_print_flag == 1) then
allocate( triad(n_species,n_radial,nt1:nt2,8))
allocate(triad_loc(n_species,n_radial,nt1:nt2,7))
allocate(triad_loc_old(n_species,n_radial,nt1:nt2,8))
#if defined(OMPGPU)
!$omp target enter data map(alloc:triad,triad_loc,triad_loc_old)
#elif defined(_OPENACC)
!$acc enter data create(triad,triad_loc,triad_loc_old)
#endif
endif

if ((collision_model /= 5) .AND. (collision_field_model == 1)) then
! nc and nc_loc must be last, since it will be collated
Expand Down Expand Up @@ -274,9 +262,7 @@ subroutine cgyro_init_manager
allocate(cap_h_v(nc_loc,nt1:nt2,nv))
allocate(omega_cap_h(nc,nv_loc,nt1:nt2))
allocate(omega_h(nc,nv_loc,nt1:nt2))
if (triad_print_flag == 1) then
allocate(diss_r(nc,nv_loc,nt1:nt2))
endif
allocate(diss_r(nc,nv_loc,nt1:nt2))
allocate(omega_s(n_field,nc,nv_loc,nt1:nt2))
allocate(omega_ss(n_field,nc,nv_loc,nt1:nt2))
allocate(omega_sbeta(nc,nv_loc,nt1:nt2))
Expand Down Expand Up @@ -316,41 +302,27 @@ subroutine cgyro_init_manager
! Nonlinear arrays
if (nonlinear_flag == 1) then
allocate(fA_nl(n_radial,nt_loc,nsplitA,n_toroidal_procs))
allocate(eA_nl(n_radial,nt_loc,nsplitA,n_toroidal_procs))
allocate(g_nl(n_field,n_radial,n_jtheta,n_toroidal))
allocate(fpackA(n_radial,nt_loc,nsplitA*n_toroidal_procs))
allocate(epackA(n_radial,nt_loc,nsplitA*n_toroidal_procs))
allocate(gpack(n_field,n_radial,n_jtheta,n_toroidal))
allocate(jvec_c_nl(n_field,n_radial,n_jtheta,nv_loc,n_toroidal))
#if defined(OMPGPU)
!$omp target enter data map(alloc:fpackA,gpack,fA_nl,g_nl,jvec_c_nl)
#elif defined(_OPENACC)
!$acc enter data create(fpackA,gpack,fA_nl,g_nl,jvec_c_nl)
#endif
if (triad_print_flag == 1) then
allocate(eA_nl(n_radial,nt_loc,nsplitA,n_toroidal_procs))
allocate(epackA(n_radial,nt_loc,nsplitA*n_toroidal_procs))
#if defined(OMPGPU)
!$omp target enter data map(alloc:epackA,eA_nl)
#elif defined(_OPENACC)
!$acc enter data create(epackA,eA_nl)
#endif
endif
if (nsplitB > 0) then ! nsplitB can be zero at large MPI
allocate(fB_nl(n_radial,nt_loc,nsplitB,n_toroidal_procs))
allocate(fpackB(n_radial,nt_loc,nsplitB*n_toroidal_procs))
allocate(eB_nl(n_radial,nt_loc,nsplitB,n_toroidal_procs))
allocate(epackB(n_radial,nt_loc,nsplitB*n_toroidal_procs))
#if defined(OMPGPU)
!$omp target enter data map(alloc:fpackB,fB_nl)
#elif defined(_OPENACC)
!$acc enter data create(fpackB,fB_nl)
#endif
if (triad_print_flag == 1) then
allocate(epackB(n_radial,nt_loc,nsplitB*n_toroidal_procs))
allocate(eB_nl(n_radial,nt_loc,nsplitB,n_toroidal_procs))
#if defined(OMPGPU)
!$omp target enter data map(alloc:epackB,eB_nl)
#elif defined(_OPENACC)
!$acc enter data create(epackB,eB_nl)
#endif
endif
endif
endif

Expand Down
7 changes: 0 additions & 7 deletions cgyro/src/cgyro_kernel.F90
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,6 @@ subroutine cgyro_kernel
! wait for cap_h_c to be synched into system memory, used by cgyro_write_timedata
!$acc wait(4)
#endif
if (triad_print_flag == 1) then
#if defined(OMPGPU)
!$omp target update from(triad_loc)
#elif defined(_OPENACC)
!$acc update host(triad_loc)
#endif
endif
call timer_lib_out('coll_mem')

call timer_lib_in('io')
Expand Down
Loading

0 comments on commit 177ac14

Please sign in to comment.