From 47b3007df127cea5de30e2ebbe85ee4955aeffd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Thu, 2 Nov 2023 16:10:53 +0100 Subject: [PATCH 1/6] Add explicit locks for AD::GetDerivative() call trees. --- SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp | 8 ++++++++ SU2_CFD/src/solvers/CDiscAdjSolver.cpp | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp b/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp index a297039ea3a..7abd17805fa 100644 --- a/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp +++ b/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp @@ -232,12 +232,16 @@ void CDiscAdjFEASolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *co /*--- Extract and store the adjoint solution ---*/ + AD::BeginUseAdjoints(); + for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) { su2double Solution[MAXNVAR] = {0.0}; direct_solver->GetNodes()->GetAdjointSolution(iPoint,Solution); nodes->SetSolution(iPoint,Solution); } + AD::EndUseAdjoints(); + if (CrossTerm) return; /*--- Extract and store the adjoint solution at time n (including accel. and velocity) ---*/ @@ -358,6 +362,8 @@ void CDiscAdjFEASolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSo /*--- Extract the geometric sensitivities ---*/ + AD::BeginUseAdjoints(); + for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++) { auto Coord = geometry->nodes->GetCoord(iPoint); @@ -375,6 +381,8 @@ void CDiscAdjFEASolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSo } } + AD::EndUseAdjoints(); + } void CDiscAdjFEASolver::ReadDV(const CConfig *config) { diff --git a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp index c42e2b4925f..ac9c6043097 100644 --- a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp +++ b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp @@ -317,6 +317,8 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi if (!config->GetMultizone_Problem()) nodes->Set_OldSolution(); + AD::BeginUseAdjoints(); + SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) { @@ -339,6 +341,8 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi } END_SU2_OMP_FOR + AD::EndUseAdjoints(); + direct_solver->ExtractAdjoint_SolutionExtra(nodes->GetSolutionExtra(), config); /*--- Residuals and time_n terms are not needed when evaluating multizone cross terms. ---*/ @@ -477,6 +481,8 @@ void CDiscAdjSolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config) { void CDiscAdjSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolver*) { + AD::BeginUseAdjoints(); + SU2_OMP_PARALLEL { const bool time_stepping = (config->GetTime_Marching() != TIME_MARCHING::STEADY); @@ -510,6 +516,8 @@ void CDiscAdjSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolve } END_SU2_OMP_PARALLEL + + AD::EndUseAdjoints(); } void CDiscAdjSolver::SetSurface_Sensitivity(CGeometry *geometry, CConfig *config) { From fb364ead5f6fc92712485248d788b17f831e28c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Fri, 3 Nov 2023 12:36:35 +0100 Subject: [PATCH 2/6] Use more lock-free accesses and explicit locking. --- SU2_CFD/include/variables/CVariable.hpp | 14 ++++++++++---- SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp | 4 ++++ SU2_CFD/src/solvers/CDiscAdjSolver.cpp | 8 ++++++++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/SU2_CFD/include/variables/CVariable.hpp b/SU2_CFD/include/variables/CVariable.hpp index 050be2b7787..453dca7226b 100644 --- a/SU2_CFD/include/variables/CVariable.hpp +++ b/SU2_CFD/include/variables/CVariable.hpp @@ -2170,13 +2170,19 @@ class CVariable { } inline void GetAdjointSolution_time_n(unsigned long iPoint, su2double *adj_sol) const { - for (unsigned long iVar = 0; iVar < Solution_time_n.cols(); iVar++) - adj_sol[iVar] = SU2_TYPE::GetDerivative(Solution_time_n(iPoint,iVar)); + int index = 0; + for (unsigned long iVar = 0; iVar < Solution_time_n.cols(); iVar++) { + AD::SetIndex(index, Solution_time_n(iPoint, iVar)); + adj_sol[iVar] = AD::GetDerivative(index); + } } inline void GetAdjointSolution_time_n1(unsigned long iPoint, su2double *adj_sol) const { - for (unsigned long iVar = 0; iVar < Solution_time_n1.cols(); iVar++) - adj_sol[iVar] = SU2_TYPE::GetDerivative(Solution_time_n1(iPoint,iVar)); + int index = 0; + for (unsigned long iVar = 0; iVar < Solution_time_n1.cols(); iVar++) { + AD::SetIndex(index, Solution_time_n1(iPoint, iVar)); + adj_sol[iVar] = AD::GetDerivative(index); + } } /*! diff --git a/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp b/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp index 7abd17805fa..baf00a826a2 100644 --- a/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp +++ b/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp @@ -247,11 +247,15 @@ void CDiscAdjFEASolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *co /*--- Extract and store the adjoint solution at time n (including accel. and velocity) ---*/ if (config->GetTime_Domain()) { + AD::BeginUseAdjoints(); + for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) { su2double Solution[MAXNVAR] = {0.0}; direct_solver->GetNodes()->GetAdjointSolution_time_n(iPoint,Solution); nodes->Set_Solution_time_n(iPoint,Solution); } + + AD::EndUseAdjoints(); } /*--- Set the residuals ---*/ diff --git a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp index ac9c6043097..6b8fd2a199d 100644 --- a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp +++ b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp @@ -359,6 +359,8 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi /*--- Extract and store the adjoint of the primal solution at time n ---*/ if (time_n_needed) { + AD::BeginUseAdjoints(); + SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) { su2double Solution[MAXNVAR] = {0.0}; @@ -366,10 +368,14 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi nodes->Set_Solution_time_n(iPoint,Solution); } END_SU2_OMP_FOR + + AD::EndUseAdjoints(); } /*--- Extract and store the adjoint of the primal solution at time n-1 ---*/ if (time_n1_needed) { + AD::BeginUseAdjoints(); + SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) { su2double Solution[MAXNVAR] = {0.0}; @@ -377,6 +383,8 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi nodes->Set_Solution_time_n1(iPoint,Solution); } END_SU2_OMP_FOR + + AD::EndUseAdjoints(); } } From eb844d4c1d021bda3741b0250a2caef26df938ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Fri, 3 Nov 2023 13:03:15 +0100 Subject: [PATCH 3/6] Extend comments. --- Common/include/basic_types/ad_structure.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Common/include/basic_types/ad_structure.hpp b/Common/include/basic_types/ad_structure.hpp index dd7e3691377..bbed2a91a2c 100644 --- a/Common/include/basic_types/ad_structure.hpp +++ b/Common/include/basic_types/ad_structure.hpp @@ -396,6 +396,8 @@ FORCEINLINE void SetIndex(int& index, const su2double& data) { index = data.getI // WARNING: For performance reasons, this method does not perform bounds checking. // When using it, please ensure sufficient adjoint vector size by a call to AD::ResizeAdjoints(). +// This method does not perform locking either. +// It should be safeguarded by calls to AD::BeginUseAdjoints() and AD::EndUseAdjoints(). FORCEINLINE void SetDerivative(int index, const double val) { if (index == 0) // Allow multiple threads to "set the derivative" of passive variables without causing data races. return; @@ -406,6 +408,8 @@ FORCEINLINE void SetDerivative(int index, const double val) { // WARNING: For performance reasons, this method does not perform bounds checking. // If called after tape evaluations, the adjoints should exist. // Otherwise, please ensure sufficient adjoint vector size by a call to AD::ResizeAdjoints(). +// This method does not perform locking either. +// It should be safeguarded by calls to AD::BeginUseAdjoints() and AD::EndUseAdjoints(). FORCEINLINE double GetDerivative(int index) { return AD::getTape().getGradient(index, codi::AdjointsManagement::Manual); } From 1f02f9604888a5859c5221eabb7ada85ebfea9fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Tue, 7 Nov 2023 16:28:16 +0100 Subject: [PATCH 4/6] Fix uninitialized mixingplane. --- SU2_CFD/src/drivers/CDriver.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/SU2_CFD/src/drivers/CDriver.cpp b/SU2_CFD/src/drivers/CDriver.cpp index c33977ad33f..d8ba985fafa 100644 --- a/SU2_CFD/src/drivers/CDriver.cpp +++ b/SU2_CFD/src/drivers/CDriver.cpp @@ -248,6 +248,8 @@ CDriverBase(confFile, val_nZone, MPICommunicator), StopCalc(false), fsi(false), cout << endl <<"---------------------- Turbomachinery Preprocessing ---------------------" << endl; PreprocessTurbomachinery(config_container, geometry_container, solver_container, interface_container); + } else { + mixingplane = false; } From 5fa584f560737f3c8ad6c0af7882425eb45a8945 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Wed, 8 Nov 2023 16:44:06 +0100 Subject: [PATCH 5/6] Update serial regression test values. --- TestCases/serial_regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TestCases/serial_regression.py b/TestCases/serial_regression.py index a8486bc11ea..2c4e041ce08 100644 --- a/TestCases/serial_regression.py +++ b/TestCases/serial_regression.py @@ -1066,7 +1066,7 @@ def main(): airfoilRBF.cfg_dir = "fea_fsi/Airfoil_RBF" airfoilRBF.cfg_file = "config.cfg" airfoilRBF.test_iter = 1 - airfoilRBF.test_vals = [1.000000, -2.786183, -4.977959] + airfoilRBF.test_vals = [1.000000, -2.786186, -4.977944] airfoilRBF.multizone = True test_list.append(airfoilRBF) From 85b4fb41a244196e925979bc8c7296299c23b438 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Wed, 8 Nov 2023 16:49:32 +0100 Subject: [PATCH 6/6] Further relax tolerances of hybrid AD python wrapper tests. --- TestCases/hybrid_regression_AD.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/TestCases/hybrid_regression_AD.py b/TestCases/hybrid_regression_AD.py index a2243e14e24..cf419bde4e9 100644 --- a/TestCases/hybrid_regression_AD.py +++ b/TestCases/hybrid_regression_AD.py @@ -242,7 +242,7 @@ def main(): pywrapper_FEA_AD_FlowLoad.test_vals_aarch64 = [-0.131745, -0.553214, -0.000364, -0.003101] pywrapper_FEA_AD_FlowLoad.command = TestCase.Command(exec = "python", param = "run_adjoint.py --parallel -f") pywrapper_FEA_AD_FlowLoad.timeout = 1600 - pywrapper_FEA_AD_FlowLoad.tol = 1e-4 + pywrapper_FEA_AD_FlowLoad.tol = 1e-3 pywrapper_FEA_AD_FlowLoad.new_output = False pywrapper_FEA_AD_FlowLoad.enabled_with_tsan = False test_list.append(pywrapper_FEA_AD_FlowLoad) @@ -257,7 +257,7 @@ def main(): pywrapper_CFD_AD_MeshDisp.test_vals_aarch64 = [30.000000, -2.516536, 1.386443, 0.000000] pywrapper_CFD_AD_MeshDisp.command = TestCase.Command(exec = "python", param = "run_adjoint.py --parallel -f") pywrapper_CFD_AD_MeshDisp.timeout = 1600 - pywrapper_CFD_AD_MeshDisp.tol = 1e-4 + pywrapper_CFD_AD_MeshDisp.tol = 1e-3 pywrapper_CFD_AD_MeshDisp.new_output = False pywrapper_CFD_AD_MeshDisp.enabled_with_tsan = False test_list.append(pywrapper_CFD_AD_MeshDisp)