From 6334057c274a8052bd415177846a2bd01de98464 Mon Sep 17 00:00:00 2001 From: Atanas Trayanov Date: Fri, 10 Jan 2025 16:00:43 -0500 Subject: [PATCH] Properly freeing MPI communicators after reading or writing restarts --- CHANGELOG.md | 1 + base/FileIOShared.F90 | 59 ++++++++++++++++++++++++++++++++++------ generic/MAPL_Generic.F90 | 28 +++++++++++++------ 3 files changed, 71 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ec413d3ebe2..b62e2990fc25 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Free MPI communicators after reading and/or writing of restarts - Fixed the behavior of MAPL_MaxMin in presence of NaN - Fixed bug with return codes and macros in udunits2f diff --git a/base/FileIOShared.F90 b/base/FileIOShared.F90 index 3854e48ddbdb..fba72dc619f8 100644 --- a/base/FileIOShared.F90 +++ b/base/FileIOShared.F90 @@ -44,6 +44,7 @@ module FileIOSharedMod public ArrDescrInit public ArrDescrCreateReaderComm public ArrDescrCreateWriterComm + public ArrDescrCommFree ! Global vars: ! ------------ @@ -89,9 +90,13 @@ module FileIOSharedMod type ArrDescr integer(kind=MPI_OFFSET_KIND) :: offset character(len=MPI_MAX_INFO_VAL) :: romio_cb_read,cb_buffer_size,romio_cb_write - integer :: Xcomm, Ycomm, NX0, NY0 - integer :: readers_comm, IOscattercomm - integer :: writers_comm, IOgathercomm + integer :: NX0, NY0 + integer :: Xcomm = MPI_COMM_NULL + integer :: Ycomm = MPI_COMM_NULL + integer :: readers_comm = MPI_COMM_NULL + integer :: IOscattercomm = MPI_COMM_NULL + integer :: writers_comm = MPI_COMM_NULL + integer :: IOgathercomm = MPI_COMM_NULL integer :: myrow logical :: split_restart = .false. logical :: split_checkpoint = .false. @@ -586,12 +591,21 @@ subroutine ArrDescrSet(ArrDes, offset, & integer, optional, intent(IN ) :: writers_comm, iogathercomm integer, optional, target :: i1(:), in(:), j1(:), jn(:) integer, optional, intent(IN ) :: im_world, jm_world, lm_world - + integer :: status + if(present(offset )) ArrDes%offset = offset - if(present(readers_comm )) ArrDes%readers_comm = readers_comm - if(present(ioscattercomm)) ArrDes%ioscattercomm = ioscattercomm - if(present(writers_comm )) ArrDes%writers_comm = writers_comm - if(present(iogathercomm )) ArrDes%iogathercomm = iogathercomm + if(present(readers_comm )) then + call MPI_Comm_Dup(readers_comm, ArrDes%readers_comm, status) + end if + if(present(ioscattercomm)) then + call MPI_Comm_Dup(ioscattercomm, ArrDes%ioscattercomm, status) + end if + if(present(writers_comm )) then + call MPI_Comm_Dup(writers_comm, ArrDes%writers_comm, status) + end if + if(present(iogathercomm)) then + call MPI_Comm_Dup(iogathercomm, ArrDes%iogathercomm, status) + end if if(present(i1 )) ArrDes%i1 => i1 if(present(in )) ArrDes%in => in if(present(j1 )) ArrDes%j1 => j1 @@ -874,4 +888,33 @@ subroutine ArrayScatterShmR4D1(local_array, global_array, grid, mask, rc) _RETURN(ESMF_SUCCESS) end subroutine ArrayScatterShmR4D1 + subroutine ArrDescrCommFree(arrdes, rc) + type(ArrDescr), intent(inout) :: arrdes + integer, optional, intent(out) :: rc + + integer :: status + + call MAPL_CommFree(arrdes%Xcomm, _RC) + call MAPL_CommFree(arrdes%Ycomm, _RC) + call MAPL_CommFree(arrdes%readers_comm, _RC) + call MAPL_CommFree(arrdes%writers_comm, _RC) + call MAPL_CommFree(arrdes%IOgathercomm, _RC) + call MAPL_CommFree(arrdes%IOscattercomm, _RC) + + _RETURN(ESMF_SUCCESS) + end subroutine ArrDescrCommFree + + subroutine MAPL_CommFree(comm, rc) + integer, intent(inout) :: comm + integer, optional, intent(out) :: rc + + integer :: status + + if(comm /= MPI_COMM_NULL) then + call MPI_Comm_Free(comm, status) + _VERIFY(status) + comm = MPI_COMM_NULL + end if + _RETURN(ESMF_SUCCESS) + end subroutine MAPL_CommFree end module FileIOSharedMod diff --git a/generic/MAPL_Generic.F90 b/generic/MAPL_Generic.F90 index beaab3718010..60019fc96746 100644 --- a/generic/MAPL_Generic.F90 +++ b/generic/MAPL_Generic.F90 @@ -5932,7 +5932,7 @@ subroutine MAPL_ESMFStateWriteToFile(STATE,CLOCK,FILENAME,FILETYPE,MPL,HDR, writ if(filetype=='pbinary' ) then - arrdes%ycomm = mpl%grid%Ycomm + call MPI_Comm_Dup(mpl%grid%Ycomm,ArrDes%Ycomm, status) call MAPL_VarWrite(UNIT=UNIT, STATE=STATE, arrdes=arrdes, rc=status) _VERIFY(status) @@ -5962,6 +5962,7 @@ subroutine MAPL_ESMFStateWriteToFile(STATE,CLOCK,FILENAME,FILETYPE,MPL,HDR, writ _VERIFY(status) endif + call ArrDescrCommFree(arrdes, _RC) _RETURN(ESMF_SUCCESS) end subroutine MAPL_ESMFStateWriteToFile @@ -6254,7 +6255,7 @@ subroutine MAPL_ESMFStateReadFromFile(STATE,CLOCK,FILENAME,MPL,HDR,RC) if(filetype=='pbinary') then call ArrDescrSet(arrdes, offset) - arrdes%Ycomm = mpl%grid%Ycomm + call MPI_Comm_Dup(mpl%grid%Ycomm,ArrDes%Ycomm, status) call MAPL_VarRead(UNIT=UNIT, STATE=STATE, arrdes=arrdes, RC=status) _VERIFY(status) if (AmReader) then @@ -6285,6 +6286,7 @@ subroutine MAPL_ESMFStateReadFromFile(STATE,CLOCK,FILENAME,MPL,HDR,RC) call MAPL_AttributeSet(STATE, NAME="MAPL_InitStatus", VALUE=MAPL_InitialRestart, RC=status) _VERIFY(status) + call ArrDescrCommFree(arrdes, _RC) _RETURN(ESMF_SUCCESS) contains @@ -9856,7 +9858,7 @@ subroutine READIT(WHICH) im_world = mpl%grid%im_world, & jm_world = mpl%grid%jm_world) endif - arrdes%Ycomm = mpl%grid%Ycomm + call MPI_Comm_Dup(mpl%grid%Ycomm,ArrDes%Ycomm, status) if (AmReader) then call MPI_COMM_RANK(mpl%grid%readers_comm, io_rank, status) @@ -11039,10 +11041,14 @@ subroutine ArrDescrSetNCPar(ArrDes, MPL, tile, offset, num_readers, num_writers, arrdes%NX0 = mpl%grid%NX0 arrdes%tile=.true. arrdes%grid=tilegrid - call ArrDescrCreateWriterComm(arrdes,mpl%grid%comm,mpl%grid%num_writers,_RC) - call ArrDescrCreateReaderComm(arrdes,mpl%grid%comm,mpl%grid%num_readers,_RC) - arrdes%iogathercomm = mpl%grid%comm - arrdes%ioscattercomm = mpl%grid%comm + if (present(num_writers)) then + call ArrDescrCreateWriterComm(arrdes,mpl%grid%comm,mpl%grid%num_writers,_RC) + end if + if (present(num_readers)) then + call ArrDescrCreateReaderComm(arrdes,mpl%grid%comm,mpl%grid%num_readers,_RC) + end if + call MPI_Comm_Dup(mpl%grid%comm,ArrDes%iogathercomm, status) + call MPI_Comm_Dup(mpl%grid%comm,ArrDes%ioscattercomm, status) arrdes%split_restart = .false. arrdes%split_checkpoint = .false. else @@ -11067,8 +11073,12 @@ subroutine ArrDescrSetNCPar(ArrDes, MPL, tile, offset, num_readers, num_writers, arrdes%NX0 = mpl%grid%NX0 arrdes%tile=.false. arrdes%grid=MPL%GRID%ESMFGRID - call ArrDescrCreateWriterComm(arrdes,mpl%grid%comm,mpl%grid%num_writers,_RC) - call ArrDescrCreateReaderComm(arrdes,mpl%grid%comm,mpl%grid%num_readers,_RC) + if (present(num_writers)) then + call ArrDescrCreateWriterComm(arrdes,mpl%grid%comm,mpl%grid%num_writers,_RC) + end if + if (present(num_readers)) then + call ArrDescrCreateReaderComm(arrdes,mpl%grid%comm,mpl%grid%num_readers,_RC) + end if call mpi_comm_rank(arrdes%ycomm,arrdes%myrow,status) _VERIFY(status) arrdes%split_restart = mpl%grid%split_restart