Skip to content

Commit

Permalink
Corrected bugs in the usage of the shared-memory formalism.
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Hardman committed Oct 31, 2023
1 parent 3a53caf commit c0cc4a2
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 52 deletions.
51 changes: 23 additions & 28 deletions src/file_io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -647,62 +647,57 @@ end
append_to_dynamic_var(io_moments.time, t, t_idx)

# add the electrostatic potential and electric field components at this time slice to the hdf5 file
append_to_dynamic_var(io_moments.phi.data, fields.phi, t_idx, z, r)
append_to_dynamic_var(io_moments.Er.data, fields.Er, t_idx, z, r)
append_to_dynamic_var(io_moments.Ez.data, fields.Ez, t_idx, z, r)
append_to_dynamic_var(io_moments.phi, fields.phi.data, t_idx, z, r)
append_to_dynamic_var(io_moments.Er, fields.Er.data, t_idx, z, r)
append_to_dynamic_var(io_moments.Ez, fields.Ez.data, t_idx, z, r)

# add the density data at this time slice to the output file
append_to_dynamic_var(io_moments.density.data, moments.charged.dens, t_idx, z,
append_to_dynamic_var(io_moments.density, moments.charged.dens.data, t_idx, z,
r, n_ion_species)
append_to_dynamic_var(io_moments.parallel_flow.data, moments.charged.upar,
append_to_dynamic_var(io_moments.parallel_flow, moments.charged.upar.data,
t_idx, z, r, n_ion_species)
append_to_dynamic_var(io_moments.parallel_pressure.data, moments.charged.ppar,
append_to_dynamic_var(io_moments.parallel_pressure, moments.charged.ppar.data,
t_idx, z, r, n_ion_species)
append_to_dynamic_var(io_moments.perpendicular_pressure.data, moments.charged.pperp,
append_to_dynamic_var(io_moments.perpendicular_pressure, moments.charged.pperp.data,
t_idx, z, r, n_ion_species)
append_to_dynamic_var(io_moments.parallel_heat_flux.data,
moments.charged.qpar, t_idx, z, r, n_ion_species)
append_to_dynamic_var(io_moments.thermal_speed.data, moments.charged.vth,
append_to_dynamic_var(io_moments.parallel_heat_flux,
moments.charged.qpar.data, t_idx, z, r, n_ion_species)
append_to_dynamic_var(io_moments.thermal_speed, moments.charged.vth.data,
t_idx, z, r, n_ion_species)
append_to_dynamic_var(io_moments.entropy_production.data, moments.charged.dSdt,
append_to_dynamic_var(io_moments.entropy_production, moments.charged.dSdt.data,
t_idx, z, r, n_ion_species)
if n_neutral_species > 0
append_to_dynamic_var(io_moments.density_neutral.data,
moments.neutral.dens, t_idx, z, r,
append_to_dynamic_var(io_moments.density_neutral,
moments.neutral.dens.data, t_idx, z, r,
n_neutral_species)
append_to_dynamic_var(io_moments.uz_neutral.data, moments.neutral.uz,
append_to_dynamic_var(io_moments.uz_neutral, moments.neutral.uz.data,
t_idx, z, r, n_neutral_species)
append_to_dynamic_var(io_moments.pz_neutral.data, moments.neutral.pz,
append_to_dynamic_var(io_moments.pz_neutral, moments.neutral.pz.data,
t_idx, z, r, n_neutral_species)
append_to_dynamic_var(io_moments.qz_neutral.data, moments.neutral.qz,
append_to_dynamic_var(io_moments.qz_neutral, moments.neutral.qz.data,
t_idx, z, r, n_neutral_species)
append_to_dynamic_var(io_moments.thermal_speed_neutral.data,
moments.neutral.vth, t_idx, z, r, n_neutral_species)
append_to_dynamic_var(io_moments.thermal_speed_neutral,
moments.neutral.vth.data, t_idx, z, r, n_neutral_species)
end
end
return nothing
end
end

@debug_shared_array begin
# Special versions when using DebugMPISharedArray to avoid implicit conversion to
# Array, which is forbidden.
function write_dfns_data_to_binary(ff::DebugMPISharedArray, ff_neutral::DebugMPISharedArray,
t, n_ion_species, n_neutral_species, h5::hdf5_dfns_info, t_idx, r, z, vperp,
vpa, vzeta, vr, vz)
function write_dfns_data_to_binary(ff::DebugMPISharedArray, ff_neutral::DebugMPISharedArray, t, n_ion_species, n_neutral_species,
io_dfns, t_idx, r, z, vperp, vpa, vzeta, vr, vz)
@serial_region begin
# Only read/write from first process in each 'block'

# add the time for this time slice to the hdf5 file
append_to_dynamic_var(io_dfns.time, t, t_idx)

# add the distribution function data at this time slice to the output file
# add the distribution function data at this time slice to the output file
append_to_dynamic_var(io_dfns.f, ff.data, t_idx, vpa, vperp, z, r,
n_ion_species)
append_to_dynamic_var(io_dfns.f, ff.data, t_idx, vpa, vperp, z, r, n_ion_species)
if n_neutral_species > 0
append_to_dynamic_var(io_dfns.f_neutral, ff_neutral.data, t_idx, vz, vr,
vzeta, z, r, n_neutral_species)
append_to_dynamic_var(io_dfns.f_neutral, ff_neutral.data, t_idx, vz, vr, vzeta, z,
r, n_neutral_species)
end
end
return nothing
Expand Down
23 changes: 13 additions & 10 deletions src/fokker_planck.jl
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ function explicit_fokker_planck_collisions_weak_form!(pdf_out,pdf_in,composition

# N.B. parallelisation is only over vpa vperp
# ensure s, r, z are local before initiating the s, r, z loop
begin_serial_region()
begin_vperp_vpa_region()
@loop_s_r_z is ir iz begin
# the functions within this loop will call
# begin_vpa_region(), begin_vperp_region(), begin_vperp_vpa_region(), begin_serial_region() to synchronise the shared-memory arrays
Expand Down Expand Up @@ -295,16 +295,19 @@ function fokker_planck_collision_operator_weak_form!(ffs_in,ffsp_in,ms,msp,nussp
vpa,vperp,YY_arrays)
end
# solve the collision operator matrix eq
if impose_zero_gradient_BC
enforce_zero_bc!(rhsc,vpa,vperp,impose_BC_at_zero_vperp=true)
# invert mass matrix and fill fc
fc = lu_obj_MMZG \ rhsc
else
enforce_zero_bc!(rhsc,vpa,vperp)
# invert mass matrix and fill fc
fc = lu_obj_MM \ rhsc
begin_serial_region()
@serial_region begin
if impose_zero_gradient_BC
enforce_zero_bc!(rhsc,vpa,vperp,impose_BC_at_zero_vperp=true)
# invert mass matrix and fill fc
sc .= lu_obj_MMZG \ rhsc
else
enforce_zero_bc!(rhsc,vpa,vperp)
# invert mass matrix and fill fc
sc .= lu_obj_MM \ rhsc
end
end
ravel_c_to_vpavperp_parallel!(CC,fc,vpa.n)
ravel_c_to_vpavperp_parallel!(CC,sc,vpa.n)
return nothing
end

Expand Down
37 changes: 23 additions & 14 deletions src/fokker_planck_calculus.jl
Original file line number Diff line number Diff line change
Expand Up @@ -959,9 +959,9 @@ function create_sparse_matrix(data::sparse_matrix_constructor)
end

function allocate_boundary_data(vpa,vperp)
lower_boundary_vpa = Array{mk_float,1}(undef,vperp.n)
upper_boundary_vpa = Array{mk_float,1}(undef,vperp.n)
upper_boundary_vperp = Array{mk_float,1}(undef,vpa.n)
lower_boundary_vpa = allocate_shared_float(vperp.n)
upper_boundary_vpa = allocate_shared_float(vperp.n)
upper_boundary_vperp = allocate_shared_float(vpa.n)
return vpa_vperp_boundary_data(lower_boundary_vpa,
upper_boundary_vpa,upper_boundary_vperp)
end
Expand Down Expand Up @@ -1932,6 +1932,7 @@ function assemble_explicit_collision_operator_rhs_parallel!(rhsc,rhsvpavperp,pdf
@loop_vperp_vpa ivperp ivpa begin
rhsvpavperp[ivpa,ivperp] = 0.0
end

# loop over collocation points to benefit from shared-memory parallelism
ngrid_vpa, ngrid_vperp = vpa.ngrid, vperp.ngrid
@loop_vperp_vpa ivperp_global ivpa_global begin
Expand Down Expand Up @@ -2002,11 +2003,13 @@ function elliptic_solve!(field,source,boundary_data::vpa_vperp_boundary_data,
ravel_vpavperp_to_c_parallel!(sc,source,vpa.n)
# assemble the rhs of the weak system
begin_serial_region()
mul!(rhsc,matrix_rhs,sc)
# enforce the boundary conditions
enforce_dirichlet_bc!(rhsc,vpa,vperp,boundary_data)
# solve the linear system
sc = lu_object_lhs \ rhsc
@serial_region begin
mul!(rhsc,matrix_rhs,sc)
# enforce the boundary conditions
enforce_dirichlet_bc!(rhsc,vpa,vperp,boundary_data)
# solve the linear system
sc .= lu_object_lhs \ rhsc
end
# get data into the vpa vperp indices format
begin_vperp_vpa_region()
ravel_c_to_vpavperp_parallel!(field,sc,vpa.n)
Expand All @@ -2023,16 +2026,22 @@ function elliptic_solve!(field,source_1,source_2,boundary_data::vpa_vperp_bounda

# assemble the rhs of the weak system
begin_serial_region()
mul!(rhsc_1,matrix_rhs_1,sc_1)
mul!(rhsc_2,matrix_rhs_2,sc_2)
@serial_region begin
mul!(rhsc_1,matrix_rhs_1,sc_1)
mul!(rhsc_2,matrix_rhs_2,sc_2)
end
begin_vperp_vpa_region()
@loop_vperp_vpa ivperp ivpa begin
ic = ic_func(ivpa,ivperp,vpa.n)
rhsc_1[ic] += rhsc_2[ic]
end
# enforce the boundary conditions
enforce_dirichlet_bc!(rhsc_1,vpa,vperp,boundary_data)
# solve the linear system
sc_1 = lu_object_lhs \ rhsc_1
begin_serial_region()
@serial_region begin
# enforce the boundary conditions
enforce_dirichlet_bc!(rhsc_1,vpa,vperp,boundary_data)
# solve the linear system
sc_1 .= lu_object_lhs \ rhsc_1
end
# get data into the vpa vperp indices format
begin_vperp_vpa_region()
ravel_c_to_vpavperp_parallel!(field,sc_1,vpa.n)
Expand Down

0 comments on commit c0cc4a2

Please sign in to comment.