Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

minor improvement #92

Merged
merged 1 commit into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/AMDGPUExt/update_halo.jl
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ let

rocstreams = Array{AMDGPU.HIPStream}(undef, NNEIGHBORS_PER_DIM, 0)

wait_iwrite(n::Integer, A::ROCField{T}, i::Integer) where T <: GGNumber = AMDGPU.synchronize(rocstreams[n,i]);
wait_iwrite(n::Integer, A::ROCField{T}, i::Integer) where T <: GGNumber = AMDGPU.synchronize(rocstreams[n,i]; blocking=true);

function allocate_rocstreams_iwrite(fields::GGField...)
if length(fields) > size(rocstreams,2) # Note: for simplicity, we create a stream for every field even if it is not a ROCField
Expand Down Expand Up @@ -169,7 +169,7 @@ let

rocstreams = Array{AMDGPU.HIPStream}(undef, NNEIGHBORS_PER_DIM, 0)

wait_iread(n::Integer, A::ROCField{T}, i::Integer) where T <: GGNumber = AMDGPU.synchronize(rocstreams[n,i]);
wait_iread(n::Integer, A::ROCField{T}, i::Integer) where T <: GGNumber = AMDGPU.synchronize(rocstreams[n,i]; blocking=true);

function allocate_rocstreams_iread(fields::GGField...)
if length(fields) > size(rocstreams,2) # Note: for simplicity, we create a stream for every field even if it is not a ROCField
Expand Down
4 changes: 2 additions & 2 deletions src/CUDAExt/update_halo.jl
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ let

custreams = Array{CuStream}(undef, NNEIGHBORS_PER_DIM, 0)

wait_iwrite(n::Integer, A::CuField{T}, i::Integer) where T <: GGNumber = CUDA.synchronize(custreams[n,i]);
wait_iwrite(n::Integer, A::CuField{T}, i::Integer) where T <: GGNumber = CUDA.synchronize(custreams[n,i]; blocking=true);

function allocate_custreams_iwrite(fields::GGField...)
if length(fields) > size(custreams,2) # Note: for simplicity, we create a stream for every field even if it is not a CuField
Expand Down Expand Up @@ -179,7 +179,7 @@ let

custreams = Array{CuStream}(undef, NNEIGHBORS_PER_DIM, 0)

wait_iread(n::Integer, A::CuField{T}, i::Integer) where T <: GGNumber = CUDA.synchronize(custreams[n,i]);
wait_iread(n::Integer, A::CuField{T}, i::Integer) where T <: GGNumber = CUDA.synchronize(custreams[n,i]; blocking=true);

function allocate_custreams_iread(fields::GGField...)
if length(fields) > size(custreams,2) # Note: for simplicity, we create a stream for every field even if it is not a CuField
Expand Down
8 changes: 4 additions & 4 deletions src/update_halo.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,22 +26,22 @@ Update the halo of the given GPU/CPU-array(s).
shell> export IGG_ROCMAWARE_MPI=1
```
"""
function update_halo!(A::Union{GGArray, GGField, GGFieldConvertible}...)
function update_halo!(A::Union{GGArray, GGField, GGFieldConvertible}...; dims=(NDIMS_MPI,(1:NDIMS_MPI-1)...))
check_initialized();
fields = wrap_field.(A);
check_fields(fields...);
_update_halo!(fields...); # Assignment of A to fields in the internal function _update_halo!() as vararg A can consist of multiple fields; A will be used for a single field in the following (The args of update_halo! must however be "A..." for maximal simplicity and elegance for the user).
_update_halo!(fields...; dims=dims); # Assignment of A to fields in the internal function _update_halo!() as vararg A can consist of multiple fields; A will be used for a single field in the following (The args of update_halo! must however be "A..." for maximal simplicity and elegance for the user).
return nothing
end
#
function _update_halo!(fields::GGField...)
function _update_halo!(fields::GGField...; dims=dims)
if (!cuda_enabled() && !amdgpu_enabled() && !all_arrays(fields...)) error("not all arrays are CPU arrays, but no GPU extension is loaded.") end #NOTE: in the following, it is only required to check for `cuda_enabled()`/`amdgpu_enabled()` when the context does not imply `any_cuarray(fields...)` or `is_cuarray(A)` or the corresponding for AMDGPU. # NOTE: the case where only one of the two extensions are loaded, but an array dad would be for the other extension is passed is very unlikely and therefore not explicitly checked here (but could be added later).
allocate_bufs(fields...);
if any_array(fields...) allocate_tasks(fields...); end
if any_cuarray(fields...) allocate_custreams(fields...); end
if any_rocarray(fields...) allocate_rocstreams(fields...); end

for dim = 1:NDIMS_MPI # NOTE: this works for 1D-3D (e.g. if nx>1, ny>1 and nz=1, then for d=3, there will be no neighbors, i.e. nothing will be done as desired...).
for dim in dims # NOTE: this works for 1D-3D (e.g. if nx>1, ny>1 and nz=1, then for d=3, there will be no neighbors, i.e. nothing will be done as desired...).
for ns = 1:NNEIGHBORS_PER_DIM, i = 1:length(fields)
if has_neighbor(ns, dim) iwrite_sendbufs!(ns, dim, fields[i], i); end
end
Expand Down
Loading