Skip to content

Commit

Permalink
Minor tweak to OMP to improve vectorization on the CPU
Browse files Browse the repository at this point in the history
  • Loading branch information
sfiligoi committed Oct 4, 2024
1 parent 1b5f3ee commit 29d9fcf
Showing 1 changed file with 9 additions and 8 deletions.
17 changes: 9 additions & 8 deletions cgyro/src/cgyro_advect_wavenumber.F90
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ subroutine cgyro_advect_wavenumber(ij)

integer, intent(in) :: ij
integer :: ir,l,ll,j,iccj,ivc,itor,llnt
complex :: rl,he1,he2
complex :: rh,rl,he1,he2

if (nonlinear_flag == 0) return

Expand All @@ -23,19 +23,21 @@ subroutine cgyro_advect_wavenumber(ij)

#if defined(OMPGPU)
!$omp target teams distribute parallel do simd collapse(4) &
!$omp& private(ivc,ir,l,iccj,j,ll,rl,llnt,he1,he2)
!$omp& private(ivc,ir,l,iccj,j,ll,rl,rh,llnt,he1,he2)
#elif defined(_OPENACC)
!$acc parallel loop collapse(4) gang vector &
!$acc& private(ivc,ir,l,iccj,j,ll,rl,llnt,he1,he2) &
!$acc& private(ivc,ir,l,iccj,j,ll,rl,rh,llnt,he1,he2) &
!$acc& present(rhs(:,:,:,ij),omega_ss,field,h_x,c_wave)
#else
!$omp parallel do collapse(4) private(ivc,ir,l,iccj,j,ll,rl,llnt,he1,he2)
!$omp parallel do collapse(3) private(ivc,ir,l,iccj,j,ll,rl,rh,llnt,he1,he2) &
!$omp& firstprivate(shear_method,profile_shear_flag)
#endif
do itor=nt1,nt2
do ivc=1,nv_loc
do ir=1,n_radial
do j=1,n_theta
iccj = (ir-1)*n_theta+j
rh = rhs(iccj,ivc,itor,ij)

! Wavenumber advection ExB shear
if (shear_method == 2) then
Expand All @@ -62,13 +64,12 @@ subroutine cgyro_advect_wavenumber(ij)
! Thus sign below has been checked and is correct
rl = rl+c_wave(l)*(he1-he2)
enddo
rhs(iccj,ivc,itor,ij) = rhs(iccj,ivc,itor,ij) + omega_eb_base*itor*rl
rh = rh + omega_eb_base*itor*rl
endif

! Wavenumber advection profile shear
if (profile_shear_flag == 1) then
iccj = (ir-1)*n_theta+j
rl = rhs(iccj,ivc,itor,ij)
#if (!defined(OMPGPU)) && defined(_OPENACC)
!$acc loop seq
#endif
Expand All @@ -88,10 +89,10 @@ subroutine cgyro_advect_wavenumber(ij)
he2 = 0.0
endif
! Note opposite sign to ExB shear
rl = rl-c_wave(l)*(he1-he2)
rh = rh-c_wave(l)*(he1-he2)
enddo
rhs(iccj,ivc,itor,ij) = rl
endif
rhs(iccj,ivc,itor,ij) = rh
enddo
enddo
enddo
Expand Down

0 comments on commit 29d9fcf

Please sign in to comment.