Modify how the summation loops over izp and irp are carried out in th…

…e gyroaverge operator. Introduce indexing arrays that record which elements of the gyromatrix are nonzero and sum only over these indices in attempt to benefit from the sparsity of the gyromatrix for rhostar << 1 and large problem sizes with many elements. The gyromatrix is expected to be sparse only if nelement is large and rhostar is small. The matrix may be sparse for the smallest vperp but dense for the maximum vperp is the rho(vperp) becomes comparable to the box size. Confirmation of a speedup requires further testing to confirm that the Julia compiler behaves well for this implementation.
mabarnes · Mar 17, 2024 · 9ad86b5 · 9ad86b5
1 parent b2fb26e
commit 9ad86b5
Showing 1 changed file with 96 additions and 18 deletions.
diff --git a/moment_kinetics/src/gyroaverages.jl b/moment_kinetics/src/gyroaverages.jl
@@ -9,18 +9,21 @@ export init_gyro_operators
 export gyroaverage_field!
 export gyroaverage_pdf!
 
-using ..type_definitions: mk_float
+using ..type_definitions: mk_float, mk_int
 using ..array_allocation: allocate_float, allocate_shared_float
 using ..array_allocation: allocate_int
 using ..looping
-using ..communication: MPISharedArray
-#using ..communication
+using ..communication: MPISharedArray, comm_block
+using MPI
 
 struct gyro_operators
     # matrix for applying a gyroaverage to a function F(r,vpa,vperp) at fixed r, with R = r - rhovec and rhovec = b x v / Omega
     # the matrix can also be used ring-averaging a function F(R,vpa,vperp) at fixed R, since F(R,vpa,vperp) is independent of gyrophase
     # other matrices are required for gyroaveraging functions that depend on gyrophase.
     gyromatrix::MPISharedArray{mk_float,6}
+    gyroloopsizes::Array{mk_int,4}
+    izpgyroindex::Array{Array{mk_int,1},4}
+    irpgyroindex::Array{Array{mk_int,1},4}
 end
 
 """
@@ -37,11 +40,16 @@ function init_gyro_operators(vperp,z,r,gyrophase,geometry,composition;print_info
     gkions = composition.gyrokinetic_ions
     if !gkions
         gyromatrix =  allocate_shared_float(1,1,1,1,1,1)
+        gyroloopsizes = allocate_int(1,1,1,1)
+        izpgyroindex = zeros.(mk_int,gyroloopsizes)
+        irpgyroindex = zeros.(mk_int,gyroloopsizes)
     else
        if print_info
            println("Begin: init_gyro_operators")
        end
        gyromatrix = allocate_shared_float(z.n,r.n,vperp.n,z.n,r.n,composition.n_ion_species)
+       # an array to store the value for the number of points in the z', r' sum for each gyroaveraged field index 
+       gyroloopsizes = allocate_int(vperp.n,z.n,r.n,composition.n_ion_species)
 
        # init the matrix!
        # the first two indices are to be summed over
@@ -131,13 +139,66 @@ function init_gyro_operators(vperp,z,r,gyrophase,geometry,composition;print_info
                    end
                    #println("counter: ",icounter)
                end
+              # count the number of nonzero (izp,irp) elements in gyromatrix for this ivperp, iz, ir, is
+              zero = 1.0e-14 
+              nsum = 0 
+              for irp in 1:r.n 
+                  for izp in 1:z.n
+                      if abs(gyromatrix[izp,irp,ivperp,iz,ir,is]) > zero
+                          nsum += 1
+                      end
+                  end                  
+              end
+              # assign this value to the gyroloopsizes array
+              gyroloopsizes[ivperp,iz,ir,is] = nsum
            end
-           if print_info
-               println("Finished: init_gyro_operators")
-           end
+        end
+
+        # Broadcast the values in gyroloopsizes across the shared-memory block
+        MPI.Bcast!(gyroloopsizes, 0, comm_block[])
+        # initialise the array of arrays containing the indexing information              
+        izpgyroindex = zeros.(mk_int,gyroloopsizes)
+        irpgyroindex = zeros.(mk_int,gyroloopsizes)
+        # compute the indices on the root process  
+        @serial_region begin
+            zero = 1.0e-14  
+            @loop_s_r_z_vperp is ir iz ivperp begin
+                # extract out the array of indices for this field point            
+                izpgyro = izpgyroindex[ivperp,iz,ir,is] 
+                irpgyro = irpgyroindex[ivperp,iz,ir,is]
+                # fill these arrays with the index locations using the same
+                # conditions as used to create the gyroloopsizes array
+                isum = 0 
+                for irp in 1:r.n 
+                    for izp in 1:z.n
+                        if abs(gyromatrix[izp,irp,ivperp,iz,ir,is]) > zero
+                            isum += 1
+                            izpgyro[isum] = izp
+                            irpgyro[isum] = irp
+                        end
+                    end                  
+                end
+            end
+        end
+        # Broadcast the results to the other processes in this shared-memory block
+        # Broadcast is not defined for the type Array{Array{mk_int,N},M} so Broadcast for each array separately
+        # avoid using shared-memory loop macro as we need to do this for every element of the array regardless
+        for is in 1:composition.n_ion_species
+            for ir in 1:r.n
+                for iz in 1:z.n
+                    for ivperp in 1:vperp.n
+                       MPI.Bcast!(izpgyroindex[ivperp,iz,ir,is], 0, comm_block[])
+                       MPI.Bcast!(irpgyroindex[ivperp,iz,ir,is], 0, comm_block[])
+                    end
+                end
+            end
+        end
+        if print_info
+            println("Finished: init_gyro_operators")
         end
     end
-    gyro = gyro_operators(gyromatrix)
+
+    gyro = gyro_operators(gyromatrix,gyroloopsizes,izpgyroindex,irpgyroindex)
     return gyro
 end
 
@@ -228,15 +289,23 @@ function gyroaverage_field!(gfield_out,field_in,gyro,vperp,z,r,composition)
     nr = r.n
     nz = z.n
     gyromatrix = gyro.gyromatrix
+    gyroloopsizes = gyro.gyroloopsizes
+    izpgyroindex = gyro.izpgyroindex
+    irpgyroindex = gyro.irpgyroindex
 
     begin_s_r_z_vperp_region()
     @loop_s_r_z_vperp is ir iz ivperp begin
+        nsum = gyroloopsizes[ivperp,iz,ir,is]
+        izplist = izpgyroindex[ivperp,iz,ir,is]        
+        irplist = irpgyroindex[ivperp,iz,ir,is]        
+
         gfield_out[ivperp,iz,ir] = 0.0
         # sum over all the contributions in the gyroaverage
-        for irp in 1:nr
-            for izp in 1:nz
-                gfield_out[ivperp,iz,ir,is] += gyromatrix[izp,irp,ivperp,iz,ir,is]*field_in[izp,irp]
-            end
+        # use list of indices here instead of simply summing over
+        # irp in 1:nr and izp in 1:nz to try to make use of the sparsity of the gyromatrix
+        for isum in 1:nsum
+            izp, irp = izplist[isum], irplist[isum]
+            gfield_out[ivperp,iz,ir,is] += gyromatrix[izp,irp,ivperp,iz,ir,is]*field_in[izp,irp]
         end
     end
 
@@ -257,15 +326,24 @@ function gyroaverage_pdf!(gpdf_out,pdf_in,gyro,vpa,vperp,z,r,composition)
     nr = r.n
     nz = z.n
     gyromatrix = gyro.gyromatrix
+    gyroloopsizes = gyro.gyroloopsizes
+    izpgyroindex = gyro.izpgyroindex
+    irpgyroindex = gyro.irpgyroindex
 
     begin_s_r_z_vperp_vpa_region()
-    @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin
-        gpdf_out[ivpa,ivperp,iz,ir,is] = 0.0
-        # sum over all the contributions in the gyroaverage
-        for irp in 1:nr
-            for izp in 1:nz
-                gpdf_out[ivpa,ivperp,iz,ir,is] += gyromatrix[izp,irp,ivperp,iz,ir,is]*pdf_in[ivpa,ivperp,izp,irp,is]
-            end
+    @loop_s_r_z_vperp is ir iz ivperp begin
+        nsum = gyroloopsizes[ivperp,iz,ir,is]
+        izplist = izpgyroindex[ivperp,iz,ir,is]        
+        irplist = irpgyroindex[ivperp,iz,ir,is]        
+        @loop_vpa ivpa begin 
+           gpdf_out[ivpa,ivperp,iz,ir,is] = 0.0
+           # sum over all the contributions in the gyroaverage
+           # use list of indices here instead of simply summing over
+           # irp in 1:nr and izp in 1:nz to try to make use of the sparsity of the gyromatrix
+           for isum in 1:nsum
+               izp, irp = izplist[isum], irplist[isum]
+               gpdf_out[ivpa,ivperp,iz,ir,is] += gyromatrix[izp,irp,ivperp,iz,ir,is]*pdf_in[ivpa,ivperp,izp,irp,is]
+           end
         end
     end