Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Properly freeing MPI communicators after reading or writing restarts #3303

Merged
merged 1 commit into from
Jan 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed

- Free MPI communicators after reading and/or writing of restarts
- Fixed the behavior of MAPL_MaxMin in presence of NaN
- Fixed bug with return codes and macros in udunits2f

Expand Down
59 changes: 51 additions & 8 deletions base/FileIOShared.F90
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ module FileIOSharedMod
public ArrDescrInit
public ArrDescrCreateReaderComm
public ArrDescrCreateWriterComm
public ArrDescrCommFree

! Global vars:
! ------------
Expand Down Expand Up @@ -89,9 +90,13 @@ module FileIOSharedMod
type ArrDescr
integer(kind=MPI_OFFSET_KIND) :: offset
character(len=MPI_MAX_INFO_VAL) :: romio_cb_read,cb_buffer_size,romio_cb_write
integer :: Xcomm, Ycomm, NX0, NY0
integer :: readers_comm, IOscattercomm
integer :: writers_comm, IOgathercomm
integer :: NX0, NY0
integer :: Xcomm = MPI_COMM_NULL
integer :: Ycomm = MPI_COMM_NULL
integer :: readers_comm = MPI_COMM_NULL
integer :: IOscattercomm = MPI_COMM_NULL
integer :: writers_comm = MPI_COMM_NULL
integer :: IOgathercomm = MPI_COMM_NULL
integer :: myrow
logical :: split_restart = .false.
logical :: split_checkpoint = .false.
Expand Down Expand Up @@ -586,12 +591,21 @@ subroutine ArrDescrSet(ArrDes, offset, &
integer, optional, intent(IN ) :: writers_comm, iogathercomm
integer, optional, target :: i1(:), in(:), j1(:), jn(:)
integer, optional, intent(IN ) :: im_world, jm_world, lm_world

integer :: status

if(present(offset )) ArrDes%offset = offset
if(present(readers_comm )) ArrDes%readers_comm = readers_comm
if(present(ioscattercomm)) ArrDes%ioscattercomm = ioscattercomm
if(present(writers_comm )) ArrDes%writers_comm = writers_comm
if(present(iogathercomm )) ArrDes%iogathercomm = iogathercomm
if(present(readers_comm )) then
call MPI_Comm_Dup(readers_comm, ArrDes%readers_comm, status)
end if
if(present(ioscattercomm)) then
call MPI_Comm_Dup(ioscattercomm, ArrDes%ioscattercomm, status)
end if
if(present(writers_comm )) then
call MPI_Comm_Dup(writers_comm, ArrDes%writers_comm, status)
end if
if(present(iogathercomm)) then
call MPI_Comm_Dup(iogathercomm, ArrDes%iogathercomm, status)
end if
if(present(i1 )) ArrDes%i1 => i1
if(present(in )) ArrDes%in => in
if(present(j1 )) ArrDes%j1 => j1
Expand Down Expand Up @@ -874,4 +888,33 @@ subroutine ArrayScatterShmR4D1(local_array, global_array, grid, mask, rc)
_RETURN(ESMF_SUCCESS)
end subroutine ArrayScatterShmR4D1

subroutine ArrDescrCommFree(arrdes, rc)
type(ArrDescr), intent(inout) :: arrdes
integer, optional, intent(out) :: rc

integer :: status

call MAPL_CommFree(arrdes%Xcomm, _RC)
call MAPL_CommFree(arrdes%Ycomm, _RC)
call MAPL_CommFree(arrdes%readers_comm, _RC)
call MAPL_CommFree(arrdes%writers_comm, _RC)
call MAPL_CommFree(arrdes%IOgathercomm, _RC)
call MAPL_CommFree(arrdes%IOscattercomm, _RC)

_RETURN(ESMF_SUCCESS)
end subroutine ArrDescrCommFree

subroutine MAPL_CommFree(comm, rc)
integer, intent(inout) :: comm
integer, optional, intent(out) :: rc

integer :: status

if(comm /= MPI_COMM_NULL) then
call MPI_Comm_Free(comm, status)
_VERIFY(status)
comm = MPI_COMM_NULL
end if
_RETURN(ESMF_SUCCESS)
end subroutine MAPL_CommFree
end module FileIOSharedMod
28 changes: 19 additions & 9 deletions generic/MAPL_Generic.F90
Original file line number Diff line number Diff line change
Expand Up @@ -5932,7 +5932,7 @@ subroutine MAPL_ESMFStateWriteToFile(STATE,CLOCK,FILENAME,FILETYPE,MPL,HDR, writ


if(filetype=='pbinary' ) then
arrdes%ycomm = mpl%grid%Ycomm
call MPI_Comm_Dup(mpl%grid%Ycomm,ArrDes%Ycomm, status)
call MAPL_VarWrite(UNIT=UNIT, STATE=STATE, arrdes=arrdes, rc=status)
_VERIFY(status)

Expand Down Expand Up @@ -5962,6 +5962,7 @@ subroutine MAPL_ESMFStateWriteToFile(STATE,CLOCK,FILENAME,FILETYPE,MPL,HDR, writ
_VERIFY(status)
endif

call ArrDescrCommFree(arrdes, _RC)
_RETURN(ESMF_SUCCESS)
end subroutine MAPL_ESMFStateWriteToFile

Expand Down Expand Up @@ -6254,7 +6255,7 @@ subroutine MAPL_ESMFStateReadFromFile(STATE,CLOCK,FILENAME,MPL,HDR,RC)

if(filetype=='pbinary') then
call ArrDescrSet(arrdes, offset)
arrdes%Ycomm = mpl%grid%Ycomm
call MPI_Comm_Dup(mpl%grid%Ycomm,ArrDes%Ycomm, status)
call MAPL_VarRead(UNIT=UNIT, STATE=STATE, arrdes=arrdes, RC=status)
_VERIFY(status)
if (AmReader) then
Expand Down Expand Up @@ -6285,6 +6286,7 @@ subroutine MAPL_ESMFStateReadFromFile(STATE,CLOCK,FILENAME,MPL,HDR,RC)
call MAPL_AttributeSet(STATE, NAME="MAPL_InitStatus", VALUE=MAPL_InitialRestart, RC=status)
_VERIFY(status)

call ArrDescrCommFree(arrdes, _RC)
_RETURN(ESMF_SUCCESS)

contains
Expand Down Expand Up @@ -9856,7 +9858,7 @@ subroutine READIT(WHICH)
im_world = mpl%grid%im_world, &
jm_world = mpl%grid%jm_world)
endif
arrdes%Ycomm = mpl%grid%Ycomm
call MPI_Comm_Dup(mpl%grid%Ycomm,ArrDes%Ycomm, status)

if (AmReader) then
call MPI_COMM_RANK(mpl%grid%readers_comm, io_rank, status)
Expand Down Expand Up @@ -11039,10 +11041,14 @@ subroutine ArrDescrSetNCPar(ArrDes, MPL, tile, offset, num_readers, num_writers,
arrdes%NX0 = mpl%grid%NX0
arrdes%tile=.true.
arrdes%grid=tilegrid
call ArrDescrCreateWriterComm(arrdes,mpl%grid%comm,mpl%grid%num_writers,_RC)
call ArrDescrCreateReaderComm(arrdes,mpl%grid%comm,mpl%grid%num_readers,_RC)
arrdes%iogathercomm = mpl%grid%comm
arrdes%ioscattercomm = mpl%grid%comm
if (present(num_writers)) then
call ArrDescrCreateWriterComm(arrdes,mpl%grid%comm,mpl%grid%num_writers,_RC)
end if
if (present(num_readers)) then
call ArrDescrCreateReaderComm(arrdes,mpl%grid%comm,mpl%grid%num_readers,_RC)
end if
call MPI_Comm_Dup(mpl%grid%comm,ArrDes%iogathercomm, status)
call MPI_Comm_Dup(mpl%grid%comm,ArrDes%ioscattercomm, status)
arrdes%split_restart = .false.
arrdes%split_checkpoint = .false.
else
Expand All @@ -11067,8 +11073,12 @@ subroutine ArrDescrSetNCPar(ArrDes, MPL, tile, offset, num_readers, num_writers,
arrdes%NX0 = mpl%grid%NX0
arrdes%tile=.false.
arrdes%grid=MPL%GRID%ESMFGRID
call ArrDescrCreateWriterComm(arrdes,mpl%grid%comm,mpl%grid%num_writers,_RC)
call ArrDescrCreateReaderComm(arrdes,mpl%grid%comm,mpl%grid%num_readers,_RC)
if (present(num_writers)) then
call ArrDescrCreateWriterComm(arrdes,mpl%grid%comm,mpl%grid%num_writers,_RC)
end if
if (present(num_readers)) then
call ArrDescrCreateReaderComm(arrdes,mpl%grid%comm,mpl%grid%num_readers,_RC)
end if
call mpi_comm_rank(arrdes%ycomm,arrdes%myrow,status)
_VERIFY(status)
arrdes%split_restart = mpl%grid%split_restart
Expand Down
Loading