diff --git a/src/file_io.jl b/src/file_io.jl index 7485eb7dc..58908356b 100644 --- a/src/file_io.jl +++ b/src/file_io.jl @@ -647,37 +647,37 @@ end append_to_dynamic_var(io_moments.time, t, t_idx) # add the electrostatic potential and electric field components at this time slice to the hdf5 file - append_to_dynamic_var(io_moments.phi.data, fields.phi, t_idx, z, r) - append_to_dynamic_var(io_moments.Er.data, fields.Er, t_idx, z, r) - append_to_dynamic_var(io_moments.Ez.data, fields.Ez, t_idx, z, r) + append_to_dynamic_var(io_moments.phi, fields.phi.data, t_idx, z, r) + append_to_dynamic_var(io_moments.Er, fields.Er.data, t_idx, z, r) + append_to_dynamic_var(io_moments.Ez, fields.Ez.data, t_idx, z, r) # add the density data at this time slice to the output file - append_to_dynamic_var(io_moments.density.data, moments.charged.dens, t_idx, z, + append_to_dynamic_var(io_moments.density, moments.charged.dens.data, t_idx, z, r, n_ion_species) - append_to_dynamic_var(io_moments.parallel_flow.data, moments.charged.upar, + append_to_dynamic_var(io_moments.parallel_flow, moments.charged.upar.data, t_idx, z, r, n_ion_species) - append_to_dynamic_var(io_moments.parallel_pressure.data, moments.charged.ppar, + append_to_dynamic_var(io_moments.parallel_pressure, moments.charged.ppar.data, t_idx, z, r, n_ion_species) - append_to_dynamic_var(io_moments.perpendicular_pressure.data, moments.charged.pperp, + append_to_dynamic_var(io_moments.perpendicular_pressure, moments.charged.pperp.data, t_idx, z, r, n_ion_species) - append_to_dynamic_var(io_moments.parallel_heat_flux.data, - moments.charged.qpar, t_idx, z, r, n_ion_species) - append_to_dynamic_var(io_moments.thermal_speed.data, moments.charged.vth, + append_to_dynamic_var(io_moments.parallel_heat_flux, + moments.charged.qpar.data, t_idx, z, r, n_ion_species) + append_to_dynamic_var(io_moments.thermal_speed, moments.charged.vth.data, t_idx, z, r, n_ion_species) - append_to_dynamic_var(io_moments.entropy_production.data, moments.charged.dSdt, + append_to_dynamic_var(io_moments.entropy_production, moments.charged.dSdt.data, t_idx, z, r, n_ion_species) if n_neutral_species > 0 - append_to_dynamic_var(io_moments.density_neutral.data, - moments.neutral.dens, t_idx, z, r, + append_to_dynamic_var(io_moments.density_neutral, + moments.neutral.dens.data, t_idx, z, r, n_neutral_species) - append_to_dynamic_var(io_moments.uz_neutral.data, moments.neutral.uz, + append_to_dynamic_var(io_moments.uz_neutral, moments.neutral.uz.data, t_idx, z, r, n_neutral_species) - append_to_dynamic_var(io_moments.pz_neutral.data, moments.neutral.pz, + append_to_dynamic_var(io_moments.pz_neutral, moments.neutral.pz.data, t_idx, z, r, n_neutral_species) - append_to_dynamic_var(io_moments.qz_neutral.data, moments.neutral.qz, + append_to_dynamic_var(io_moments.qz_neutral, moments.neutral.qz.data, t_idx, z, r, n_neutral_species) - append_to_dynamic_var(io_moments.thermal_speed_neutral.data, - moments.neutral.vth, t_idx, z, r, n_neutral_species) + append_to_dynamic_var(io_moments.thermal_speed_neutral, + moments.neutral.vth.data, t_idx, z, r, n_neutral_species) end end return nothing @@ -685,11 +685,8 @@ end end @debug_shared_array begin - # Special versions when using DebugMPISharedArray to avoid implicit conversion to - # Array, which is forbidden. - function write_dfns_data_to_binary(ff::DebugMPISharedArray, ff_neutral::DebugMPISharedArray, - t, n_ion_species, n_neutral_species, h5::hdf5_dfns_info, t_idx, r, z, vperp, - vpa, vzeta, vr, vz) + function write_dfns_data_to_binary(ff::DebugMPISharedArray, ff_neutral::DebugMPISharedArray, t, n_ion_species, n_neutral_species, + io_dfns, t_idx, r, z, vperp, vpa, vzeta, vr, vz) @serial_region begin # Only read/write from first process in each 'block' @@ -697,12 +694,10 @@ end append_to_dynamic_var(io_dfns.time, t, t_idx) # add the distribution function data at this time slice to the output file - # add the distribution function data at this time slice to the output file - append_to_dynamic_var(io_dfns.f, ff.data, t_idx, vpa, vperp, z, r, - n_ion_species) + append_to_dynamic_var(io_dfns.f, ff.data, t_idx, vpa, vperp, z, r, n_ion_species) if n_neutral_species > 0 - append_to_dynamic_var(io_dfns.f_neutral, ff_neutral.data, t_idx, vz, vr, - vzeta, z, r, n_neutral_species) + append_to_dynamic_var(io_dfns.f_neutral, ff_neutral.data, t_idx, vz, vr, vzeta, z, + r, n_neutral_species) end end return nothing diff --git a/src/fokker_planck.jl b/src/fokker_planck.jl index 2c2a632ca..6aef825f8 100644 --- a/src/fokker_planck.jl +++ b/src/fokker_planck.jl @@ -176,7 +176,7 @@ function explicit_fokker_planck_collisions_weak_form!(pdf_out,pdf_in,composition # N.B. parallelisation is only over vpa vperp # ensure s, r, z are local before initiating the s, r, z loop - begin_serial_region() + begin_vperp_vpa_region() @loop_s_r_z is ir iz begin # the functions within this loop will call # begin_vpa_region(), begin_vperp_region(), begin_vperp_vpa_region(), begin_serial_region() to synchronise the shared-memory arrays @@ -295,16 +295,19 @@ function fokker_planck_collision_operator_weak_form!(ffs_in,ffsp_in,ms,msp,nussp vpa,vperp,YY_arrays) end # solve the collision operator matrix eq - if impose_zero_gradient_BC - enforce_zero_bc!(rhsc,vpa,vperp,impose_BC_at_zero_vperp=true) - # invert mass matrix and fill fc - fc = lu_obj_MMZG \ rhsc - else - enforce_zero_bc!(rhsc,vpa,vperp) - # invert mass matrix and fill fc - fc = lu_obj_MM \ rhsc + begin_serial_region() + @serial_region begin + if impose_zero_gradient_BC + enforce_zero_bc!(rhsc,vpa,vperp,impose_BC_at_zero_vperp=true) + # invert mass matrix and fill fc + sc .= lu_obj_MMZG \ rhsc + else + enforce_zero_bc!(rhsc,vpa,vperp) + # invert mass matrix and fill fc + sc .= lu_obj_MM \ rhsc + end end - ravel_c_to_vpavperp_parallel!(CC,fc,vpa.n) + ravel_c_to_vpavperp_parallel!(CC,sc,vpa.n) return nothing end diff --git a/src/fokker_planck_calculus.jl b/src/fokker_planck_calculus.jl index 569e66b2a..1f592c8dc 100644 --- a/src/fokker_planck_calculus.jl +++ b/src/fokker_planck_calculus.jl @@ -959,9 +959,9 @@ function create_sparse_matrix(data::sparse_matrix_constructor) end function allocate_boundary_data(vpa,vperp) - lower_boundary_vpa = Array{mk_float,1}(undef,vperp.n) - upper_boundary_vpa = Array{mk_float,1}(undef,vperp.n) - upper_boundary_vperp = Array{mk_float,1}(undef,vpa.n) + lower_boundary_vpa = allocate_shared_float(vperp.n) + upper_boundary_vpa = allocate_shared_float(vperp.n) + upper_boundary_vperp = allocate_shared_float(vpa.n) return vpa_vperp_boundary_data(lower_boundary_vpa, upper_boundary_vpa,upper_boundary_vperp) end @@ -1932,6 +1932,7 @@ function assemble_explicit_collision_operator_rhs_parallel!(rhsc,rhsvpavperp,pdf @loop_vperp_vpa ivperp ivpa begin rhsvpavperp[ivpa,ivperp] = 0.0 end + # loop over collocation points to benefit from shared-memory parallelism ngrid_vpa, ngrid_vperp = vpa.ngrid, vperp.ngrid @loop_vperp_vpa ivperp_global ivpa_global begin @@ -2002,11 +2003,13 @@ function elliptic_solve!(field,source,boundary_data::vpa_vperp_boundary_data, ravel_vpavperp_to_c_parallel!(sc,source,vpa.n) # assemble the rhs of the weak system begin_serial_region() - mul!(rhsc,matrix_rhs,sc) - # enforce the boundary conditions - enforce_dirichlet_bc!(rhsc,vpa,vperp,boundary_data) - # solve the linear system - sc = lu_object_lhs \ rhsc + @serial_region begin + mul!(rhsc,matrix_rhs,sc) + # enforce the boundary conditions + enforce_dirichlet_bc!(rhsc,vpa,vperp,boundary_data) + # solve the linear system + sc .= lu_object_lhs \ rhsc + end # get data into the vpa vperp indices format begin_vperp_vpa_region() ravel_c_to_vpavperp_parallel!(field,sc,vpa.n) @@ -2023,16 +2026,22 @@ function elliptic_solve!(field,source_1,source_2,boundary_data::vpa_vperp_bounda # assemble the rhs of the weak system begin_serial_region() - mul!(rhsc_1,matrix_rhs_1,sc_1) - mul!(rhsc_2,matrix_rhs_2,sc_2) + @serial_region begin + mul!(rhsc_1,matrix_rhs_1,sc_1) + mul!(rhsc_2,matrix_rhs_2,sc_2) + end + begin_vperp_vpa_region() @loop_vperp_vpa ivperp ivpa begin ic = ic_func(ivpa,ivperp,vpa.n) rhsc_1[ic] += rhsc_2[ic] end - # enforce the boundary conditions - enforce_dirichlet_bc!(rhsc_1,vpa,vperp,boundary_data) - # solve the linear system - sc_1 = lu_object_lhs \ rhsc_1 + begin_serial_region() + @serial_region begin + # enforce the boundary conditions + enforce_dirichlet_bc!(rhsc_1,vpa,vperp,boundary_data) + # solve the linear system + sc_1 .= lu_object_lhs \ rhsc_1 + end # get data into the vpa vperp indices format begin_vperp_vpa_region() ravel_c_to_vpavperp_parallel!(field,sc_1,vpa.n)