Compiles but really slow

anandrdbz · Mar 7, 2024 · 253f666 · 253f666
1 parent c890199
commit 253f666
Show file tree

Hide file tree

Showing 15 changed files with 158 additions and 167 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -124,7 +124,7 @@ if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
 elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
     add_compile_options(
         "SHELL:-h nomessage=296:878:1391:1069"
-        "SHELL:-h msgs" "SHELL:-h static" "SHELL:-h keepfiles"
+        "SHELL:-h static" "SHELL:-h keepfiles"
         "SHELL:-h acc_model=auto_async_none"
         "SHELL: -h acc_model=no_fast_addr"
         "SHELL: -h list=adm" "-DCRAY_ACC_SIMPLIFY" "-DCRAY_ACC_WAR"

diff --git a/src/common/m_helper.fpp b/src/common/m_helper.fpp
@@ -184,10 +184,15 @@ contains
 
         rhol0 = rhoref
         pl0 = pref
-
+#ifdef MFC_SIMULATION
         @:ALLOCATE_GLOBAL(pb0(nb), mass_n0(nb), mass_v0(nb), Pe_T(nb))
         @:ALLOCATE_GLOBAL(k_n(nb), k_v(nb), omegaN(nb))
-        @:ALLOCATE_GLOBALRe_trans_T(nb), Re_trans_c(nb), Im_trans_T(nb), Im_trans_c(nb))
+        @:ALLOCATE_GLOBAL(Re_trans_T(nb), Re_trans_c(nb), Im_trans_T(nb), Im_trans_c(nb))
+#else
+        @:ALLOCATE(pb0(nb), mass_n0(nb), mass_v0(nb), Pe_T(nb))
+        @:ALLOCATE(k_n(nb), k_v(nb), omegaN(nb))
+        @:ALLOCATE(Re_trans_T(nb), Re_trans_c(nb), Im_trans_T(nb), Im_trans_c(nb))        
+#endif
 
         pb0(:) = dflt_real
         mass_n0(:) = dflt_real

diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp
@@ -11,7 +11,7 @@
 module m_phase_change
 
 #ifndef MFC_POST_PROCESS
-
+ 
     ! Dependencies =============================================================
 
     use m_derived_types        !< Definitions of the derived types
@@ -74,7 +74,7 @@ contains
         !!      selecting the phase change module that will be used
         !!      (pT- or pTg-equilibrium)
     subroutine s_initialize_phasechange_module()
-
+#ifndef _CRAYFTN
         ! variables used in the calculation of the saturation curves for fluids 1 and 2
         A = (gs_min(lp)*cvs(lp) - gs_min(vp)*cvs(vp) &
              + qvps(vp) - qvps(lp))/((gs_min(vp) - 1.0d0)*cvs(vp))
@@ -94,7 +94,7 @@ contains
         else
             call s_mpi_abort('relaxation solver was not set!')
         end if
-
+#endif
     end subroutine s_initialize_phasechange_module !-------------------------------
 
     !>  This subroutine is created to activate either the pT- (N fluids) or the
@@ -103,13 +103,14 @@ contains
         !!      state conditions.
         !!  @param q_cons_vf Cell-average conservative variables
     subroutine s_infinite_relaxation_k(q_cons_vf) ! ----------------
+
         type(scalar_field), dimension(sys_size), intent(INOUT) :: q_cons_vf
         real(kind(0.0d0)) :: pS, pSOV, pSSL !< equilibrium pressure for mixture, overheated vapor, and subcooled liquid
         real(kind(0.0d0)) :: TS, TSOV, TSSL, TSatOV, TSatSL !< equilibrium temperature for mixture, overheated vapor, and subcooled liquid. Saturation Temperatures at overheated vapor and subcooled liquid
         real(kind(0.0d0)) :: rhoe, dynE, rhos !< total internal energy, kinetic energy, and total entropy
         real(kind(0.0d0)) :: rho, rM, m1, m2, MCT !< total density, total reacting mass, individual reacting masses
         real(kind(0.0d0)) :: TvF !< total volume fraction
-
+#ifndef _CRAYFTN
         !$acc declare create(pS, pSOV, pSSL, TS, TSOV, TSatOV, TSatSL, TSSL, rhoe, dynE, rhos, rho, rM, m1, m2, MCT, TvF)
 
         real(kind(0d0)), dimension(num_fluids) :: p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok
@@ -290,7 +291,7 @@ contains
                 end do
             end do
         end do
-
+#endif
     end subroutine s_infinite_relaxation_k ! ----------------
 
     !>  This auxiliary subroutine is created to activate the pT-equilibrium for N fluids
@@ -305,6 +306,7 @@ contains
         !!  @param rhoe mixture energy
         !!  @param TS equilibrium temperature at the interface
     subroutine s_infinite_pt_relaxation_k(j, k, l, MFL, pS, p_infpT, rM, q_cons_vf, rhoe, TS)
+
         !$acc routine seq
 
         ! initializing variables
@@ -319,7 +321,7 @@ contains
         real(kind(0.0d0)) :: gp, gpp, hp, pO, mCP, mQ !< variables for the Newton Solver
 
         integer :: i, ns !< generic loop iterators
-
+#ifndef _CRAYFTN
         ! auxiliary variables for the pT-equilibrium solver
         mCP = 0.0d0; mQ = 0.0d0; p_infpT = ps_inf; pk(1:num_fluids) = 0.0d0
 
@@ -395,7 +397,7 @@ contains
 
         ! common temperature
         TS = (rhoe + pS - mQ)/mCP
-
+#endif
     end subroutine s_infinite_pt_relaxation_k ! -----------------------
 
     !>  This auxiliary subroutine is created to activate the pTg-equilibrium for N fluids under pT
@@ -409,6 +411,7 @@ contains
         !!  @param q_cons_vf Cell-average conservative variables
         !!  @param TS equilibrium temperature at the interface
     subroutine s_infinite_ptg_relaxation_k(j, k, l, pS, p_infpT, rhoe, q_cons_vf, TS)
+
         !$acc routine seq
 
         type(scalar_field), dimension(sys_size), intent(INOUT) :: q_cons_vf
@@ -424,7 +427,7 @@ contains
 
         !< Generic loop iterators
         integer :: i, ns
-
+#ifndef _CRAYFTN
         ! pTg-equilibrium solution procedure
         ! Newton Solver parameters
         ! counter
@@ -517,7 +520,7 @@ contains
 
         ! common temperature
         TS = (rhoe + pS - mQ)/mCP
-
+#endif
     end subroutine s_infinite_ptg_relaxation_k ! -----------------------
 
     !>  This auxiliary subroutine corrects the partial densities of the REACTING fluids in case one of them is negative
@@ -538,7 +541,7 @@ contains
         real(kind(0.0d0)), intent(OUT) :: MCT
         integer, intent(IN) :: j, k, l
         !> @}
-
+#ifndef _CRAYFTN
         if (rM < 0.0d0) then
 
             if ((q_cons_vf(lp + contxb - 1)%sf(j, k, l) >= -1.0d0*mixM) .and. &
@@ -572,7 +575,7 @@ contains
             q_cons_vf(vp + contxb - 1)%sf(j, k, l) = MCT*rM
 
         end if
-
+#endif
     end subroutine s_correct_partial_densities
 
     !>  This auxiliary subroutine calculates the 2 x 2 Jacobian and, its inverse and transpose
@@ -596,7 +599,7 @@ contains
         integer, intent(IN) :: j, k, l
         real(kind(0.0d0)), dimension(2, 2), intent(OUT) :: Jac, InvJac, TJac
         real(kind(0.0d0)) :: ml, mT, TS, dFdT, dTdm, dTdp ! mass of the reacting fluid, total reacting mass, and auxiliary variables
-
+#ifndef _CRAYFTN
         ! mass of the reacting liquid
         ml = q_cons_vf(lp + contxb - 1)%sf(j, k, l)
 
@@ -672,7 +675,7 @@ contains
 
         ! dividing by det(J)
         InvJac = InvJac/(Jac(1, 1)*Jac(2, 2) - Jac(1, 2)*Jac(2, 1))
-
+#endif
     end subroutine s_compute_jacobian_matrix
 
     !>  This auxiliary subroutine computes the residue of the pTg-equilibrium procedure
@@ -694,7 +697,7 @@ contains
         integer, intent(IN) :: j, k, l
         real(kind(0.0d0)), dimension(2), intent(OUT) :: R2D
         real(kind(0.0d0)) :: ml, mT, TS !< mass of the reacting liquid, total reacting mass, equilibrium temperature
-
+#ifndef _CRAYFTN
         ! mass of the reacting liquid
         ml = q_cons_vf(lp + contxb - 1)%sf(j, k, l)
 
@@ -722,7 +725,7 @@ contains
                   /(ml*(cvs(lp)*(gs_min(lp) - 1)/(pS + ps_inf(lp)) &
                         - cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))) &
                     + mT*cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp)) + mCVGP))/1
-
+#endif
     end subroutine s_compute_pTg_residue
 
     !>  This auxiliary subroutine finds the Saturation temperature for a given
@@ -739,7 +742,7 @@ contains
 
         ! Generic loop iterators
         integer :: ns
-
+#ifndef _CRAYFTN
         if ((pSat == 0.0d0) .and. (TSIn == 0.0d0)) then
 
             ! assigning Saturation temperature
@@ -780,14 +783,14 @@ contains
             end do
 
         end if
-
+#endif
     end subroutine s_TSat
 
     !>  This subroutine finalizes the phase change module
     subroutine s_finalize_relaxation_solver_module()
-
+#ifndef _CRAYFTN
         s_relaxation_solver => null()
-
+#endif
     end subroutine
 
 #endif

diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp
@@ -86,17 +86,10 @@ module m_variables_conversion
 
     !! In simulation, gammas, pi_infs, and qvs are already declared in m_global_variables
 #ifndef MFC_SIMULATION
-#ifdef CRAY_ACC_WAR
-    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:), gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps)
-    public :: gammas, pi_infs
-    !$acc declare link(gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps)
-#else
     real(kind(0d0)), allocatable, public, dimension(:) :: gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps
     !$acc declare create(gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps)
 #endif
 
-#endif
-
 #ifdef CRAY_ACC_WAR
     @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:), Gs)
     @:CRAY_DECLARE_GLOBAL(integer,         dimension(:), bubrs)
@@ -468,7 +461,7 @@ contains
         !$acc routine seq
 #endif
 
-        real(kind(0d0)), intent(OUT) :: rho_K, gamma_K, pi_inf_K, qv_Ke
+        real(kind(0d0)), intent(OUT) :: rho_K, gamma_K, pi_inf_K, qv_K
 
         real(kind(0d0)), dimension(num_fluids), intent(INOUT) :: alpha_rho_K, alpha_K !<
         real(kind(0d0)), dimension(2), intent(OUT) :: Re_K
@@ -635,6 +628,7 @@ contains
 
         !$acc update device(ixb, ixe, iyb, iye, izb, ize)
 
+#ifdef MFC_SIMULATION
         @:ALLOCATE_GLOBAL(gammas (1:num_fluids))
         @:ALLOCATE_GLOBAL(gs_min (1:num_fluids))
         @:ALLOCATE_GLOBAL(pi_infs(1:num_fluids))
@@ -643,6 +637,17 @@ contains
         @:ALLOCATE_GLOBAL(qvs    (1:num_fluids))
         @:ALLOCATE_GLOBAL(qvps    (1:num_fluids))
         @:ALLOCATE_GLOBAL(Gs     (1:num_fluids))
+#else
+        @:ALLOCATE(gammas (1:num_fluids))
+        @:ALLOCATE(gs_min (1:num_fluids))
+        @:ALLOCATE(pi_infs(1:num_fluids))
+        @:ALLOCATE(ps_inf(1:num_fluids))
+        @:ALLOCATE(cvs    (1:num_fluids))
+        @:ALLOCATE(qvs    (1:num_fluids))
+        @:ALLOCATE(qvps    (1:num_fluids))
+        @:ALLOCATE(Gs     (1:num_fluids))
+#endif
+
 
         do i = 1, num_fluids
             gammas(i) = fluid_pp(i)%gamma
@@ -671,7 +676,11 @@ contains
 #endif
 
         if (bubbles) then
+#ifdef MFC_SIMULATION
             @:ALLOCATE_GLOBAL(bubrs(1:nb))
+#else
+            @:ALLOCATE(bubrs(1:nb))
+#endif
 
             do i = 1, nb
                 bubrs(i) = bub_idx%rs(i)
@@ -1318,10 +1327,17 @@ contains
         deallocate (rho_sf, gamma_sf, pi_inf_sf, qv_sf)
 #endif
 
+#ifdef MFC_SIMULATION
         @:DEALLOCATE_GLOBAL(gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps, Gs)
         if (bubbles) then
             @:DEALLOCATE_GLOBAL(bubrs)
         end if
+#else
+        @:DEALLOCATE(gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps, Gs)
+        if (bubbles) then
+            @:DEALLOCATE(bubrs)
+        end if
+#endif
 
         ! Nullifying the procedure pointer to the subroutine transferring/
         ! computing the mixture/species variables to the mixture variables

diff --git a/src/simulation/include/case.fpp b/src/simulation/include/case.fpp
@@ -1,12 +1,3 @@
 ! This file was generated by MFC. It is only used if the --case-optimization
 ! option is passed to ./mfc.sh run or test, enabling a GPU-oriented optimization
 ! that hard-codes certain case parameters from the input file.
-
-#:set MFC_CASE_OPTIMIZATION = True
-
-#:set weno_order = 5
-#:set weno_polyn = 2
-#:set nb         = 1
-#:set num_dims   = 3
-#:set nterms     = 1
-#:set num_fluids = 2
diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp
@@ -19,7 +19,7 @@ module m_fftw
 
 #if defined(MFC_OpenACC) && defined(__PGI)
     use cufft
-#else if defined(_OPENACC)
+#else if defined(MFC_OpenACC)
     use hipfort
     use hipfort_check
     use hipfort_hipfft
@@ -49,11 +49,11 @@ module m_fftw
     complex(c_double_complex), pointer :: data_fltr_cmplx(:) !<
     !! Filtered complex data in Fourier space
 
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC) 
     !$acc declare create(real_size, cmplx_size, x_size, batch_size, Nfq)
 
 #ifdef CRAY_ACC_WAR
-        @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:), data_real_gpu)
+        @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:),  data_real_gpu)
         @:CRAY_DECLARE_GLOBAL(complex(kind(0d0)), dimension(:), data_cmplx_gpu)
         @:CRAY_DECLARE_GLOBAL(complex(kind(0d0)), dimension(:), data_fltr_cmplx_gpu)
         !$acc declare link(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
@@ -93,7 +93,7 @@ contains
 
         batch_size = x_size*sys_size
 
-#if defined(MFC_OPENACC) 
+#if defined(MFC_OpenACC) 
         rank = 1; istride = 1; ostride = 1
 
         allocate(gpu_fft_size(1:rank), iembed(1:rank), oembed(1:rank))
@@ -118,7 +118,7 @@ contains
         bwd_plan = fftw_plan_dft_c2r_1d(real_size, data_fltr_cmplx, data_real, FFTW_ESTIMATE)
 #endif
 
-#if defined(_OPENACC) 
+#if defined(MFC_OpenACC) 
         @:ALLOCATE_GLOBAL(data_real_gpu(1:real_size*x_size*sys_size))
         @:ALLOCATE_GLOBAL(data_cmplx_gpu(1:cmplx_size*x_size*sys_size))
         @:ALLOCATE_GLOBAL(data_fltr_cmplx_gpu(1:cmplx_size*x_size*sys_size))
@@ -147,7 +147,7 @@ contains
 
         ! Restrict filter to processors that have cells adjacent to axis
         if (bc_y%beg >= 0) return
-#if defined(MFC_OPENACC) 
+#if defined(MFC_OpenACC) 
 
 !$acc parallel loop collapse(3) gang vector default(present)
         do k = 1, sys_size
@@ -308,7 +308,7 @@ contains
         !!      applying the Fourier filter in the azimuthal direction.
     subroutine s_finalize_fftw_module() ! ------------------------------------
 
-#if defined(MFC_OPENACC) 
+#if defined(MFC_OpenACC) 
         @:DEALLOCATE_GLOBAL(data_real_gpu, data_fltr_cmplx_gpu, data_cmplx_gpu)        
 #if defined(_PGI)