diff --git a/.github/workflows/frontier/build.sh b/.github/workflows/frontier/build.sh
index a6a51b65f..e04f321e1 100644
--- a/.github/workflows/frontier/build.sh
+++ b/.github/workflows/frontier/build.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 
 . ./mfc.sh load -c f -m g
-./mfc.sh build -j 8 --gpu
+./mfc.sh build -j 8 --gpu --sys-hdf5 --sys-fftw
diff --git a/.github/workflows/frontier/test.sh b/.github/workflows/frontier/test.sh
index fab53ef8e..9f26a2a03 100644
--- a/.github/workflows/frontier/test.sh
+++ b/.github/workflows/frontier/test.sh
@@ -1,3 +1,6 @@
 #!/bin/bash
 
-./mfc.sh test -j 4 -a -- -c frontier
+gpus=`rocm-smi --showid | awk '{print $1}' | grep -Eo '[0-9]+' | uniq | tr '\n' ' '`
+ngpus=`echo "$gpus" | tr -d '[:space:]' | wc -c`
+
+./mfc.sh test -j $ngpus --sys-hdf5 --sys-fftw -- -c frontier
diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp
index 7650e89dc..58fb51be7 100644
--- a/src/simulation/m_fftw.fpp
+++ b/src/simulation/m_fftw.fpp
@@ -57,9 +57,9 @@ module m_fftw
     @:CRAY_DECLARE_GLOBAL(complex(kind(0d0)), dimension(:), data_fltr_cmplx_gpu)
     !$acc declare link(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
 #else
-    real(kind(0d0)), allocatable :: data_real_gpu(:)
-    complex(kind(0d0)), allocatable :: data_cmplx_gpu(:)
-    complex(kind(0d0)), allocatable :: data_fltr_cmplx_gpu(:)
+    real(kind(0d0)), allocatable, target :: data_real_gpu(:)
+    complex(kind(0d0)), allocatable, target :: data_cmplx_gpu(:)
+    complex(kind(0d0)), allocatable, target :: data_fltr_cmplx_gpu(:)
     !$acc declare create(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
 #endif
 
@@ -141,7 +141,8 @@ contains
     subroutine s_apply_fourier_filter(q_cons_vf)
 
         type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
-
+        real(c_double), pointer :: p_real(:)
+        complex(c_double_complex), pointer :: p_cmplx(:), p_fltr_cmplx(:)
         integer :: i, j, k, l !< Generic loop iterators
 
         ! Restrict filter to processors that have cells adjacent to axis
@@ -166,11 +167,16 @@ contains
             end do
         end do
 
-!$acc host_data use_device(data_real_gpu, data_cmplx_gpu)
+        p_real => data_real_gpu
+        p_cmplx => data_cmplx_gpu
+        p_fltr_cmplx => data_fltr_cmplx_gpu
+
+!$acc data attach(p_real, p_cmplx, p_fltr_cmplx)
+!$acc host_data use_device(p_real, p_cmplx, p_fltr_cmplx)
 #if defined(__PGI)
         ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
 #else
-        ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(data_real_gpu), c_loc(data_cmplx_gpu))
+        ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(p_real), c_loc(p_cmplx))
         call hipCheck(hipDeviceSynchronize())
 #endif
         !$acc end host_data
@@ -186,11 +192,11 @@ contains
             end do
         end do
 
-!$acc host_data use_device(data_real_gpu, data_fltr_cmplx_gpu)
+!$acc host_data use_device(p_real, p_fltr_cmplx)
 #if defined(__PGI)
         ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
 #else
-        ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(data_fltr_cmplx_gpu), c_loc(data_real_gpu))
+        ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(p_fltr_cmplx), c_loc(p_real))
         call hipCheck(hipDeviceSynchronize())
 #endif
         !$acc end host_data
@@ -225,11 +231,11 @@ contains
                 end do
             end do
 
-!$acc host_data use_device(data_real_gpu, data_cmplx_gpu)
+!$acc host_data use_device(p_real, p_cmplx)
 #if defined(__PGI)
             ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
 #else
-            ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(data_real_gpu), c_loc(data_cmplx_gpu))
+            ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(p_real), c_loc(p_cmplx))
             call hipCheck(hipDeviceSynchronize())
 #endif
             !$acc end host_data
@@ -246,11 +252,11 @@ contains
                 end do
             end do
 
-!$acc host_data use_device(data_real_gpu, data_fltr_cmplx_gpu)
+!$acc host_data use_device(p_real, p_fltr_cmplx)
 #if defined(__PGI)
             ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
 #else
-            ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(data_fltr_cmplx_gpu), c_loc(data_real_gpu))
+            ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(p_fltr_cmplx), c_loc(p_real))
             call hipCheck(hipDeviceSynchronize())
 #endif
             !$acc end host_data
@@ -297,8 +303,8 @@ contains
             end do
         end do
 #endif
-
-    end subroutine s_apply_fourier_filter
+!$acc end data
+    end subroutine s_apply_fourier_filter ! --------------------------------
 
     !>  The purpose of this subroutine is to destroy the fftw plan
         !!      that will be used in the forward and backward DFTs when
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index 26700d163..4cf50e8a9 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -43,32 +43,32 @@ module m_mpi_proxy
     !$acc declare link(ib_buff_send, ib_buff_recv)
     !$acc declare link(c_divs_buff_send, c_divs_buff_recv)
 #else
-    real(kind(0d0)), private, allocatable, dimension(:) :: q_cons_buff_send !<
+    real(kind(0d0)), private, allocatable, dimension(:), target :: q_cons_buff_send !<
     !! This variable is utilized to pack and send the buffer of the cell-average
     !! conservative variables, for a single computational domain boundary at the
     !! time, to the relevant neighboring processor.
 
-    real(kind(0d0)), private, allocatable, dimension(:) :: q_cons_buff_recv !<
+    real(kind(0d0)), private, allocatable, dimension(:), target :: q_cons_buff_recv !<
     !! q_cons_buff_recv is utilized to receive and unpack the buffer of the cell-
     !! average conservative variables, for a single computational domain boundary
     !! at the time, from the relevant neighboring processor.
 
-    real(kind(0d0)), private, allocatable, dimension(:) :: c_divs_buff_send !<
+    real(kind(0d0)), private, allocatable, dimension(:), target :: c_divs_buff_send !<
     !! c_divs_buff_send is utilized to send and unpack the buffer of the cell-
     !! centered color function derivatives, for a single computational domain
     !! boundary at the time, to the the relevant neighboring processor
 
-    real(kind(0d0)), private, allocatable, dimension(:) :: c_divs_buff_recv
+    real(kind(0d0)), private, allocatable, dimension(:), target :: c_divs_buff_recv
     !! c_divs_buff_recv is utilized to receiver and unpack the buffer of the cell-
     !! centered color function derivatives, for a single computational domain
     !! boundary at the time, from the relevant neighboring processor
 
-    integer, private, allocatable, dimension(:) :: ib_buff_send !<
+    integer, private, allocatable, dimension(:), target :: ib_buff_send !<
     !! This variable is utilized to pack and send the buffer of the immersed
     !! boundary markers, for a single computational domain boundary at the
     !! time, to the relevant neighboring processor.
 
-    integer, private, allocatable, dimension(:) :: ib_buff_recv !<
+    integer, private, allocatable, dimension(:), target :: ib_buff_recv !<
     !! q_cons_buff_recv is utilized to receive and unpack the buffer of the
     !! immersed boundary markers, for a single computational domain boundary
     !! at the time, from the relevant neighboring processor.
@@ -860,6 +860,8 @@ contains
 
         integer :: pack_offsets(1:3), unpack_offsets(1:3)
         integer :: pack_offset, unpack_offset
+        real(kind(0d0)), pointer :: p_send, p_recv
+        integer, pointer, dimension(:) :: p_i_send, p_i_recv
 
 #ifdef MFC_MPI
 
@@ -1059,19 +1061,23 @@ contains
         ! Send/Recv
         #:for rdma_mpi in [False, True]
             if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
+                p_send => q_cons_buff_send(0)
+                p_recv => q_cons_buff_recv(0)
                 #:if rdma_mpi
-                    !$acc host_data use_device(q_cons_buff_recv, q_cons_buff_send, ib_buff_recv, ib_buff_send)
+                    !$acc data attach(p_send, p_recv)
+                    !$acc host_data use_device(p_send, p_recv)
                 #:else
                     !$acc update host(q_cons_buff_send, ib_buff_send)
                 #:endif
 
                 call MPI_SENDRECV( &
-                    q_cons_buff_send(0), buffer_count, MPI_DOUBLE_PRECISION, dst_proc, send_tag, &
-                    q_cons_buff_recv(0), buffer_count, MPI_DOUBLE_PRECISION, src_proc, recv_tag, &
+                    p_send, buffer_count, MPI_DOUBLE_PRECISION, dst_proc, send_tag, &
+                    p_recv, buffer_count, MPI_DOUBLE_PRECISION, src_proc, recv_tag, &
                     MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
                 #:if rdma_mpi
                     !$acc end host_data
+                    !$acc end data
                     !$acc wait
                 #:else
                     !$acc update device(q_cons_buff_recv)
@@ -1262,6 +1268,7 @@ contains
         integer, intent(in) :: gp_layers
 
         integer :: i, j, k, l, r !< Generic loop iterators
+        integer, pointer, dimension(:) :: p_i_send, p_i_recv
 
 #ifdef MFC_MPI
 
@@ -1303,19 +1310,24 @@ contains
 
 #if defined(MFC_OpenACC)
                 if (rdma_mpi) then
-                    !$acc host_data use_device( ib_buff_recv, ib_buff_send, ib_buff_recv, ib_buff_send)
+                    p_i_send => ib_buff_send
+                    p_i_recv => ib_buff_recv
+
+                    !$acc data attach(p_i_send, p_i_recv)
+                    !$acc host_data use_device(p_i_send, p_i_recv)
 
                     ! Send/receive buffer to/from bc_x%end/bc_x%beg
                     call MPI_SENDRECV( &
-                        ib_buff_send(0), &
+                        p_i_send(0), &
                         gp_layers*(n + 1)*(p + 1), &
                         MPI_INTEGER, bc_x%end, 0, &
-                        ib_buff_recv(0), &
+                        p_i_recv(0), &
                         gp_layers*(n + 1)*(p + 1), &
                         MPI_INTEGER, bc_x%beg, 0, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
                     !$acc end host_data
+                    !$acc end data
                     !$acc wait
                 else
 #endif
@@ -1353,19 +1365,24 @@ contains
 
 #if defined(MFC_OpenACC)
                 if (rdma_mpi) then
-                    !$acc host_data use_device( ib_buff_recv, ib_buff_send )
+                    p_i_send => ib_buff_send
+                    p_i_recv => ib_buff_recv
+
+                    !$acc data attach(p_i_send, p_i_recv)
+                    !$acc host_data use_device(p_i_send, p_i_recv)
 
                     ! Send/receive buffer to/from bc_x%end/bc_x%beg
                     call MPI_SENDRECV( &
-                        ib_buff_send(0), &
+                        p_i_send(0), &
                         gp_layers*(n + 1)*(p + 1), &
                         MPI_INTEGER, bc_x%beg, 1, &
-                        ib_buff_recv(0), &
+                        p_i_recv(0), &
                         gp_layers*(n + 1)*(p + 1), &
                         MPI_INTEGER, bc_x%beg, 0, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
                     !$acc end host_data
+                    !$acc end data
                     !$acc wait
                 else
 #endif
@@ -1425,19 +1442,24 @@ contains
 
 #if defined(MFC_OpenACC)
                 if (rdma_mpi) then
-                    !$acc host_data use_device( ib_buff_recv, ib_buff_send )
+                    p_i_send => ib_buff_send
+                    p_i_recv => ib_buff_recv
+
+                    !$acc data attach(p_i_send, p_i_recv)
+                    !$acc host_data use_device(p_i_send, p_i_recv)
 
                     ! Send/receive buffer to/from bc_x%end/bc_x%beg
                     call MPI_SENDRECV( &
-                        ib_buff_send(0), &
+                        p_i_send(0), &
                         gp_layers*(n + 1)*(p + 1), &
                         MPI_INTEGER, bc_x%beg, 1, &
-                        ib_buff_recv(0), &
+                        p_i_recv(0), &
                         gp_layers*(n + 1)*(p + 1), &
                         MPI_INTEGER, bc_x%end, 1, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
                     !$acc end host_data
+                    !$acc end data
                     !$acc wait
                 else
 #endif
@@ -1473,19 +1495,24 @@ contains
 
 #if defined(MFC_OpenACC)
                 if (rdma_mpi) then
-                    !$acc host_data use_device( ib_buff_recv, ib_buff_send )
+                    p_i_send => ib_buff_send
+                    p_i_recv => ib_buff_recv
+
+                    !$acc data attach(p_i_send, p_i_recv)
+                    !$acc host_data use_device(p_i_send, p_i_recv)
 
                     ! Send/receive buffer to/from bc_x%end/bc_x%beg
                     call MPI_SENDRECV( &
-                        ib_buff_send(0), &
+                        p_i_send(0), &
                         gp_layers*(n + 1)*(p + 1), &
                         MPI_INTEGER, bc_x%end, 0, &
-                        ib_buff_recv(0), &
+                        p_i_recv(0), &
                         gp_layers*(n + 1)*(p + 1), &
                         MPI_INTEGER, bc_x%end, 1, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
                     !$acc end host_data
+                    !$acc end data
                     !$acc wait
                 else
 #endif
@@ -1547,19 +1574,24 @@ contains
 
 #if defined(MFC_OpenACC)
                 if (rdma_mpi) then
-                    !$acc host_data use_device( ib_buff_recv, ib_buff_send )
+                    p_i_send => ib_buff_send
+                    p_i_recv => ib_buff_recv
+
+                    !$acc data attach(p_i_send, p_i_recv)
+                    !$acc host_data use_device(p_i_send, p_i_recv)
 
                     ! Send/receive buffer to/from bc_x%end/bc_x%beg
                     call MPI_SENDRECV( &
-                        ib_buff_send(0), &
+                        p_i_send(0), &
                         gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
                         MPI_INTEGER, bc_y%end, 0, &
-                        ib_buff_recv(0), &
+                        p_i_recv(0), &
                         gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
                         MPI_INTEGER, bc_y%beg, 0, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
                     !$acc end host_data
+                    !$acc end data
                     !$acc wait
                 else
 #endif
@@ -1598,19 +1630,24 @@ contains
 
 #if defined(MFC_OpenACC)
                 if (rdma_mpi) then
-                    !$acc host_data use_device( ib_buff_recv, ib_buff_send )
+                    p_i_send => ib_buff_send
+                    p_i_recv => ib_buff_recv
+
+                    !$acc data attach(p_i_send, p_i_recv)
+                    !$acc host_data use_device(p_i_send, p_i_recv)
 
                     ! Send/receive buffer to/from bc_x%end/bc_x%beg
                     call MPI_SENDRECV( &
-                        ib_buff_send(0), &
+                        p_i_send(0), &
                         gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
                         MPI_INTEGER, bc_y%beg, 1, &
-                        ib_buff_recv(0), &
+                        p_i_recv(0), &
                         gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
                         MPI_INTEGER, bc_y%beg, 0, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
                     !$acc end host_data
+                    !$acc end data
                     !$acc wait
                 else
 #endif
@@ -1673,19 +1710,24 @@ contains
 
 #if defined(MFC_OpenACC)
                 if (rdma_mpi) then
-                    !$acc host_data use_device( ib_buff_recv, ib_buff_send )
+                    p_i_send => ib_buff_send
+                    p_i_recv => ib_buff_recv
+
+                    !$acc data attach(p_i_send, p_i_recv)
+                    !$acc host_data use_device(p_i_send, p_i_recv)
 
                     ! Send/receive buffer to/from bc_x%end/bc_x%beg
                     call MPI_SENDRECV( &
-                        ib_buff_send(0), &
+                        p_i_send(0), &
                         gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
                         MPI_INTEGER, bc_y%beg, 1, &
-                        ib_buff_recv(0), &
+                        p_i_recv(0), &
                         gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
                         MPI_INTEGER, bc_y%end, 1, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
                     !$acc end host_data
+                    !$acc end data
                     !$acc wait
                 else
 #endif
@@ -1724,19 +1766,24 @@ contains
 
 #if defined(MFC_OpenACC)
                 if (rdma_mpi) then
-                    !$acc host_data use_device( ib_buff_recv, ib_buff_send )
+                    p_i_send => ib_buff_send
+                    p_i_recv => ib_buff_recv
+
+                    !$acc data attach(p_i_send, p_i_recv)
+                    !$acc host_data use_device(p_i_send, p_i_recv)
 
                     ! Send/receive buffer to/from bc_x%end/bc_x%beg
                     call MPI_SENDRECV( &
-                        ib_buff_send(0), &
+                        p_i_send(0), &
                         gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
                         MPI_INTEGER, bc_y%end, 0, &
-                        ib_buff_recv(0), &
+                        p_i_recv(0), &
                         gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
                         MPI_INTEGER, bc_y%end, 1, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
                     !$acc end host_data
+                    !$acc end data
                     !$acc wait
                 else
 #endif
@@ -1802,19 +1849,24 @@ contains
 
 #if defined(MFC_OpenACC)
                 if (rdma_mpi) then
-                    !$acc host_data use_device( ib_buff_recv, ib_buff_send )
+                    p_i_send => ib_buff_send
+                    p_i_recv => ib_buff_recv
+
+                    !$acc data attach(p_i_send, p_i_recv)
+                    !$acc host_data use_device(p_i_send, p_i_recv)
 
                     ! Send/receive buffer to/from bc_x%end/bc_x%beg
                     call MPI_SENDRECV( &
-                        ib_buff_send(0), &
+                        p_i_send(0), &
                         gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
                         MPI_INTEGER, bc_z%end, 0, &
-                        ib_buff_recv(0), &
+                        p_i_recv(0), &
                         gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
                         MPI_INTEGER, bc_z%beg, 0, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
                     !$acc end host_data
+                    !$acc end data
                     !$acc wait
                 else
 #endif
@@ -1853,19 +1905,24 @@ contains
 
 #if defined(MFC_OpenACC)
                 if (rdma_mpi) then
-                    !$acc host_data use_device( ib_buff_recv, ib_buff_send )
+                    p_i_send => ib_buff_send
+                    p_i_recv => ib_buff_recv
+
+                    !$acc data attach(p_i_send, p_i_recv)
+                    !$acc host_data use_device(p_i_send, p_i_recv)
 
                     ! Send/receive buffer to/from bc_x%end/bc_x%beg
                     call MPI_SENDRECV( &
-                        ib_buff_send(0), &
+                        p_i_send(0), &
                         gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
                         MPI_INTEGER, bc_z%beg, 1, &
-                        ib_buff_recv(0), &
+                        p_i_recv(0), &
                         gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
                         MPI_INTEGER, bc_z%beg, 0, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
                     !$acc end host_data
+                    !$acc end data
                     !$acc wait
                 else
 #endif
@@ -1929,19 +1986,24 @@ contains
 
 #if defined(MFC_OpenACC)
                 if (rdma_mpi) then
-                    !$acc host_data use_device( ib_buff_recv, ib_buff_send )
+                    p_i_send => ib_buff_send
+                    p_i_recv => ib_buff_recv
+
+                    !$acc data attach(p_i_send, p_i_recv)
+                    !$acc host_data use_device(p_i_send, p_i_recv)
 
                     ! Send/receive buffer to/from bc_x%end/bc_x%beg
                     call MPI_SENDRECV( &
-                        ib_buff_send(0), &
+                        p_i_send(0), &
                         gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
                         MPI_INTEGER, bc_z%beg, 1, &
-                        ib_buff_recv(0), &
+                        p_i_recv(0), &
                         gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
                         MPI_INTEGER, bc_z%end, 1, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
                     !$acc end host_data
+                    !$acc end data
                     !$acc wait
                 else
 #endif
@@ -1980,19 +2042,24 @@ contains
 
 #if defined(MFC_OpenACC)
                 if (rdma_mpi) then
-                    !$acc host_data use_device( ib_buff_recv, ib_buff_send )
+                    p_i_send => ib_buff_send
+                    p_i_recv => ib_buff_recv
+
+                    !$acc data attach(p_i_send, p_i_recv)
+                    !$acc host_data use_device(p_i_send, p_i_recv)
 
                     ! Send/receive buffer to/from bc_x%end/bc_x%beg
                     call MPI_SENDRECV( &
-                        ib_buff_send(0), &
+                        p_i_send(0), &
                         gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
                         MPI_INTEGER, bc_z%end, 0, &
-                        ib_buff_recv(0), &
+                        p_i_recv(0), &
                         gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
                         MPI_INTEGER, bc_z%end, 1, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
                     !$acc end host_data
+                    !$acc end data
                     !$acc wait
                 else
 #endif
@@ -2058,6 +2125,7 @@ contains
 
         integer :: pack_offsets(1:3), unpack_offsets(1:3)
         integer :: pack_offset, unpack_offset
+        real(kind(0d0)), pointer :: p_send, p_recv
 
 #ifdef MFC_MPI
 
@@ -2152,19 +2220,24 @@ contains
         ! Send/Recv
         #:for rdma_mpi in [False, True]
             if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
+                p_send => c_divs_buff_send(0)
+                p_recv => c_divs_buff_recv(0)
+
                 #:if rdma_mpi
-                    !$acc host_data use_device(c_divs_buff_recv, c_divs_buff_send)
+                    !$acc data attach(p_send, p_recv)
+                    !$acc host_data use_device(p_send, p_recv)
                 #:else
                     !$acc update host(c_divs_buff_send)
                 #:endif
 
                 call MPI_SENDRECV( &
-                    c_divs_buff_send(0), buffer_count, MPI_DOUBLE_PRECISION, dst_proc, send_tag, &
-                    c_divs_buff_recv(0), buffer_count, MPI_DOUBLE_PRECISION, src_proc, recv_tag, &
+                    p_send, buffer_count, MPI_DOUBLE_PRECISION, dst_proc, send_tag, &
+                    p_recv, buffer_count, MPI_DOUBLE_PRECISION, src_proc, recv_tag, &
                     MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
                 #:if rdma_mpi
                     !$acc end host_data
+                    !$acc end data
                     !$acc wait
                 #:else
                     !$acc update device(c_divs_buff_recv)
diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp
index 0019779e5..073c7d278 100644
--- a/src/simulation/m_viscous.fpp
+++ b/src/simulation/m_viscous.fpp
@@ -1493,7 +1493,7 @@ contains
 
     end subroutine s_compute_fd_gradient
 
-    subroutine s_finalize_viscous_module
+    subroutine s_finalize_viscous_module()
 
         integer :: i
 
diff --git a/toolchain/mfc/run/run.py b/toolchain/mfc/run/run.py
index 96d39877e..58cbdf9d0 100644
--- a/toolchain/mfc/run/run.py
+++ b/toolchain/mfc/run/run.py
@@ -93,7 +93,11 @@ def __get_template() -> Template:
 def __generate_job_script(targets, case: input.MFCInputFile):
     env = {}
     if ARG('gpus') is not None:
-        env['CUDA_VISIBLE_DEVICES'] = ','.join([str(_) for _ in ARG('gpus')])
+        gpu_ids = ','.join([str(_) for _ in ARG('gpus')])
+        env.update({
+            'CUDA_VISIBLE_DEVICES': gpu_ids,
+            'HIP_VISIBLE_DEVICES':  gpu_ids
+        })
 
     content = __get_template().render(
         **{**ARGS(), 'targets': targets},
diff --git a/toolchain/mfc/test/case.py b/toolchain/mfc/test/case.py
index d4fa948ef..164ded4b7 100644
--- a/toolchain/mfc/test/case.py
+++ b/toolchain/mfc/test/case.py
@@ -128,7 +128,7 @@ def run(self, targets: typing.List[typing.Union[str, MFCTarget]], gpus: typing.S
             *jobs, "-t", *target_names, *gpus_select, *ARG("--")
         ]
 
-        return common.system(command, print_cmd=False, text=True, capture_output=True)
+        return common.system(command, print_cmd=False, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 
     def get_uuid(self) -> str:
         return trace_to_uuid(self.trace)
diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index e2417d4ea..0d300e58b 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -191,9 +191,9 @@ def _handle_case(case: TestCase, devices: typing.Set[int]):
 
             h5dump = f"{HDF5.get_install_dirpath(MFCInputFile(os.path.basename(case.get_filepath()), case.get_dirpath(), case.get_parameters()))}/bin/h5dump"
 
-            if ARG("sys_hdf5"):
+            if not os.path.exists(h5dump or ""):
                 if not does_command_exist("h5dump"):
-                    raise MFCException("--sys-hdf5 was specified and h5dump couldn't be found.")
+                    raise MFCException("h5dump couldn't be found.")
 
                 h5dump = shutil.which("h5dump")
 
diff --git a/toolchain/modules b/toolchain/modules
index 0dc9576c8..d73c60077 100644
--- a/toolchain/modules
+++ b/toolchain/modules
@@ -47,11 +47,9 @@ p-cpu gcc/12.3.0 openmpi/4.1.5
 p-gpu nvhpc/24.5 hpcx/2.19-cuda cuda/12.1.1
 
 f     OLCF Frontier
-f-gpu rocm/5.5.1 craype-accel-amd-gfx90a
-f-all cpe/23.09
-f-all cray-fftw cray-hdf5 cray-mpich/8.1.26 cce/16.0.1 
-f-all rocm/5.5.1 cray-python omniperf
-f-cpu
+f-all cce/18.0.0 cpe/24.07 rocm/6.1.3 cray-mpich/8.1.28
+f-all cray-fftw cray-hdf5 cray-python omniperf
+f-gpu craype-accel-amd-gfx90a
 
 d     NCSA Delta
 d-all python/3.11.6