Skip to content

Commit

Permalink
Put stress calc in separate subroutine and make GPU compatible
Browse files Browse the repository at this point in the history
  • Loading branch information
bpatel2107 committed Oct 29, 2024
1 parent 933ddb3 commit 8e37236
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 31 deletions.
1 change: 1 addition & 0 deletions cgyro/src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ OBJECTS = cgyro_globals.o \
cgyro_step_gk_ck.o \
cgyro_step_gk_bs5.o \
cgyro_step_gk_v76.o \
cgyro_stress.o \
cgyro_write_initdata.o \
cgyro_write_timedata.o \
cgyro_write_hosts.o \
Expand Down
7 changes: 7 additions & 0 deletions cgyro/src/cgyro_cleanup.F90
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,16 @@ subroutine cgyro_cleanup
if(allocated(cflux_loc)) deallocate(cflux_loc)
if(allocated(gflux)) deallocate(gflux)
if(allocated(gflux_loc)) deallocate(gflux_loc)
if(allocated(stress_integrated)) deallocate(stress_integrated)
if(allocated(stress_integrated_loc)) deallocate(stress_integrated_loc)
if(allocated(cflux_tave)) deallocate(cflux_tave)
if(allocated(gflux_tave)) deallocate(gflux_tave)
if(allocated(recv_status)) deallocate(recv_status)
if(allocated(stress)) then
ccl_del_device(stress)
deallocate(stress)
endif

if(allocated(icd_c)) then
ccl_del_device(icd_c)
deallocate(icd_c)
Expand Down
23 changes: 5 additions & 18 deletions cgyro/src/cgyro_flux.f90
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ subroutine cgyro_flux

use mpi
use cgyro_globals
use cgyro_nl_comm

implicit none

Expand All @@ -48,7 +47,7 @@ subroutine cgyro_flux

!$omp parallel do private(iv_loc,iv,is,ix,ie,dv,vpar,ic,ir,it,erot,cprod,cn) &
!$omp& private(prod1,prod2,prod3,l,icl,dvr,u,flux_norm) &
!$omp& shared(moment_loc,gflux_loc,cflux_loc)
!$omp& shared(moment_loc,gflux_loc,cflux_loc,stress_integrated_loc)
do itor=nt1,nt2

!-----------------------------------------------------
Expand Down Expand Up @@ -219,20 +218,8 @@ subroutine cgyro_flux
cflux_loc(:,:,:,itor) = cflux_loc(:,:,:,itor)/rho**2

if (stress_print_flag .eq. 1) then
stress(:, :, :, :) = 0.0
if (i_time .ne. 0) then

! Set up stress for each field
do i_field=1, n_field
call cgyro_nl_fftw_comm1_async
call cgyro_nl_fftw_comm2_async_stress(i_field)
call cgyro_nl_fftw
call cgyro_nl_fftw_comm1_r_stress(i_field)
end do

end if
! stress(kx-theta, vel_coord, ky, phi)
stress_integrated_loc = 0.0
stress_integrated_loc(:, :, :, itor, :) = 0.0
! stress(kx-theta, vel_coord, ky, phi)
iv_loc = 0
do iv=nv1,nv2

Expand All @@ -248,9 +235,9 @@ subroutine cgyro_flux
ir = ir_c(ic)
it = it_c(ic)
stress_integrated_loc(ir, it, is, itor, :) = stress_integrated_loc(ir, it, is, itor, :) + stress(ic, iv_loc, itor, :) * dv
end do
enddo
enddo
end if
endif
endif
enddo

Expand Down
4 changes: 2 additions & 2 deletions cgyro/src/cgyro_init_manager.F90
Original file line number Diff line number Diff line change
Expand Up @@ -312,9 +312,9 @@ subroutine cgyro_init_manager
allocate(stress_integrated_loc(n_radial,n_theta,n_species,nt1:nt2,n_field))
allocate(stress_integrated(n_radial,n_theta,n_species,nt1:nt2,n_field))
#if defined(OMPGPU)
!$omp target enter data map(alloc:stress,stress_integrated,stress_integrated_loc)
!$omp target enter data map(alloc:stress)
#elif defined(_OPENACC)
!$acc enter data create(stress,stress_integrated,stress_integrated_loc)
!$acc enter data create(stress)
#endif
end if

Expand Down
4 changes: 4 additions & 0 deletions cgyro/src/cgyro_kernel.F90
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@ subroutine cgyro_kernel
use cgyro_io
use cgyro_restart


implicit none

integer, parameter :: analysis = 0
integer :: i_field

if (test_flag == 1) return

Expand Down Expand Up @@ -139,6 +141,8 @@ subroutine cgyro_kernel

call timer_lib_in('io')

if (stress_print_flag .eq. 1) call cgyro_stress

! Write simulation data
call cgyro_write_timedata

Expand Down
22 changes: 11 additions & 11 deletions cgyro/src/cgyro_nl_comm.F90
Original file line number Diff line number Diff line change
Expand Up @@ -399,14 +399,15 @@ subroutine cgyro_nl_fftw_comm2_async_stress(itf)
call timer_lib_in('nl_mem')

#if defined(OMPGPU)
!$omp target teams distribute parallel do simd collapse(5) &
!$omp target teams distribute parallel do simd collapse(4) &
!$omp& private(itor,it,iltheta_min,mytor,gval)
#elif defined(_OPENACC)
!$acc parallel loop gang vector collapse(5) independent &
!$acc parallel loop gang vector collapse(4) independent &
!$acc& private(itor,it,iltheta_min,mytor,gval) &
!$acc& present(field,gpack) &
!$acc& present(n_toroidal_procs,nt_loc,n_jtheta,nv_loc,nt1) &
!$acc& present(n_theta,n_radial,n_field,nsplit) &
!$acc& copyin(itf) &
!$acc& default(none)
#else
!$omp parallel do collapse(3) &
Expand Down Expand Up @@ -443,9 +444,6 @@ subroutine cgyro_nl_fftw_comm2_async_stress(itf)
end subroutine cgyro_nl_fftw_comm2_async_stress


end module cgyro_nl_comm


subroutine cgyro_nl_fftw_comm1_r_stress(itf)
use timer_lib
use parallel_lib
Expand Down Expand Up @@ -485,7 +483,7 @@ subroutine cgyro_nl_fftw_comm1_r_stress(itf)
#elif defined(_OPENACC)
!$acc parallel loop collapse(4) gang vector independent private(ic_loc_m,my_psi) &
!$acc& private(iexch0,itor0,isplit0,iexch_base) &
!$acc& present(ic_c,px,rhs,fpackA,fpackB) copyin(psi_mul,zf_scale) &
!$acc& present(ic_c,px,stress,fpackA,fpackB) copyin(psi_mul,zf_scale) &
!$acc& present(nt1,nt2,nv_loc,n_theta,n_radial,nsplit,nsplitA,nsplitB) copyin(itf) default(none)
#else
!$omp parallel do collapse(2) private(ic_loc_m,my_psi) &
Expand Down Expand Up @@ -516,10 +514,9 @@ subroutine cgyro_nl_fftw_comm1_r_stress(itf)
endif
stress(ic_loc_m,iv_loc_m,itor,itf) = stress(ic_loc_m,iv_loc_m,itor,itf)+psi_mul*my_psi
enddo
enddo
enddo
enddo
enddo

enddo
else ! nsplitB==0

#if defined(OMPGPU)
Expand All @@ -529,8 +526,8 @@ subroutine cgyro_nl_fftw_comm1_r_stress(itf)
#elif defined(_OPENACC)
!$acc parallel loop collapse(4) gang vector independent private(ic_loc_m,my_psi) &
!$acc& private(iexch0,itor0,isplit0,iexch_base) &
!$acc& present(ic_c,px,rhs,fpackA) copyin(psi_mul,zf_scale) &
!$acc& present(nt1,nt2,nv_loc,n_theta,n_radial,nsplit,nsplitA) copyin(ij) default(none)
!$acc& present(ic_c,px,stress,fpackA) copyin(psi_mul,zf_scale) &
!$acc& present(nt1,nt2,nv_loc,n_theta,n_radial,nsplit,nsplitA) copyin(itf) default(none)
#else
!$omp parallel do collapse(2) private(ic_loc_m,my_psi) &
!$omp& private(iexch0,itor0,isplit0,iexch_base)
Expand Down Expand Up @@ -565,3 +562,6 @@ subroutine cgyro_nl_fftw_comm1_r_stress(itf)

end subroutine cgyro_nl_fftw_comm1_r_stress


end module cgyro_nl_comm

55 changes: 55 additions & 0 deletions cgyro/src/cgyro_stress.F90
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
!---------------------------------------------------------------------------
! cgyro_stress.f90
!
! PURPOSE:
! Compute Reynolds/Maxwell stresses from the fields as a
! function of (kx,theta,species,ky,field).
!
! NOTES:
!
! The stress terms are just the nonlinear term evaluated for each
! field individually
!
! To trigger output of the stresses
! set STRESS_PRINT_FLAG=1

!---------------------------------------------------------------------------

subroutine cgyro_stress

use timer_lib
use mpi
use cgyro_globals
use cgyro_nl_comm

implicit none

integer :: i_field

! Set up stress for each field
! Not sure if this can be with async given the NL calc set up
do i_field=1, n_field
! Set up first half h_x
call cgyro_nl_fftw_comm1_async

! Set up fields
call cgyro_nl_fftw_comm2_async_stress(i_field)

! Set up second half h_x (if split)
call cgyro_nl_fftw_comm3_async

! Calculate nonlinear term
call cgyro_nl_fftw
call cgyro_nl_fftw_comm_test

! Set stress array from nonlinear output
call cgyro_nl_fftw_comm1_r_stress(i_field)
call cgyro_nl_fftw_comm_test
end do
#if defined(OMPGPU)
!$omp target update from(stress)
#elif defined(_OPENACC)
!$acc update host(stress)
#endif

end subroutine cgyro_stress

0 comments on commit 8e37236

Please sign in to comment.