From 371cf7b2feef1e31ac36103470f052e0c04216be Mon Sep 17 00:00:00 2001 From: Alexey Rybalchenko Date: Thu, 7 Mar 2024 15:47:49 +0100 Subject: [PATCH] Allow collections to be fully expendable --- ReleaseNotes.md | 3 +- odc/Controller.cpp | 2 +- odc/DDSSubmit.h | 4 +-- odc/Topology.h | 2 +- tests/CMakeLists.txt | 7 +++-- tests/parameters-tests.cpp | 28 +++++++++--------- tests/requirements-tests.cpp | 56 ++++++++++++++++++------------------ 7 files changed, 53 insertions(+), 49 deletions(-) diff --git a/ReleaseNotes.md b/ReleaseNotes.md index 92ea6fe..b6b996e 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -1,7 +1,8 @@ # ODC Release Notes -## master (unreleased) +## 0.81.0 (2024-03-07) +- **Breaking Change**: Support expendable collections. When nMin is 0, collection is considered to be expendable and failure of all collection members will not trigger a global error. Previously nMin of 0 was the default, which would mean no nMin defined. Now this behaviour occurs when nMin is -1, which is also the new default. - Fix a deprecation warning with C++20 ## 0.80.2 (2024-01-05) diff --git a/odc/Controller.cpp b/odc/Controller.cpp index 103aa10..d77e1f8 100644 --- a/odc/Controller.cpp +++ b/odc/Controller.cpp @@ -916,7 +916,7 @@ void Controller::extractRequirements(const CommonParams& common, Session& sessio string topoPath = parent->getPath(); int nCores = 0; int32_t n = c->getTotalCounter(); - int32_t nmin = 0; + int32_t nmin = -1; int32_t numTasks = c->getNofTasks(); int32_t numTasksTotal = numTasks * n; diff --git a/odc/DDSSubmit.h b/odc/DDSSubmit.h index e8732e1..eb9e302 100644 --- a/odc/DDSSubmit.h +++ b/odc/DDSSubmit.h @@ -51,7 +51,7 @@ struct DDSSubmitParams mConfigFile = pt.get("configFile", ""); mEnvFile = pt.get("envFile", ""); mNumAgents = pt.get("agents", 0); - mMinAgents = 0; + mMinAgents = -1; mNumSlots = pt.get("slots", 0); // number of cores is set dynamically from the topology (if provided), not from the initial resource definition mNumCores = 0; @@ -63,7 +63,7 @@ struct DDSSubmitParams std::string mConfigFile; ///< Path to the configuration file of the RMS plugin std::string mEnvFile; ///< Path to the environment file uint32_t mNumAgents = 0; ///< Number of DDS agents - uint32_t mMinAgents = 0; ///< Minimum number of DDS agents + int32_t mMinAgents = -1; ///< Minimum number of DDS agents uint32_t mNumSlots = 0; ///< Number of slots per DDS agent uint32_t mNumCores = 0; ///< Number of cores diff --git a/odc/Topology.h b/odc/Topology.h index d5ccdb5..98e0935 100644 --- a/odc/Topology.h +++ b/odc/Topology.h @@ -296,7 +296,7 @@ class BasicTopology : public AsioBase bool CheckNmin(int32_t nCurrent, int32_t nMin, const std::string& runtimeColPath, const std::string& colPath, DDSCollection::Id colId) { - if (nMin == 0) { + if (nMin == -1) { // no nMin defined, failure cannot be ignored OLOG(error, mPartitionID, mSession.mLastRunNr.load()) << "Collection '" << runtimeColPath << "' (id: " << colId << ")" diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a4cd412..091fe87 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -87,8 +87,11 @@ macro(add_nmin_test TITLE PASS FAIL) set_tests_properties(${TITLE} PROPERTIES TIMEOUT 60 PASS_REGULAR_EXPRESSION "${PASS}" FAIL_REGULAR_EXPRESSION "${FAIL}" ENVIRONMENT "${TEST_ENV}") endmacro() -# nmin is 0 - is the same as when it is not defined. -add_nmin_test(nmin_0 "Status code: ERROR" "") +# nmin is -1 - is the same as when it is not defined. +add_nmin_test(nmin_negative "Status code: ERROR" "") + +# nmin is 0 - collection can be fully expendable if nMin is 0 +add_nmin_test(nmin_0 "" "Status code: ERROR") # nmin: a collection outside of group should fail add_nmin_test(nmin_col_outside_group "Status code: ERROR" "") diff --git a/tests/parameters-tests.cpp b/tests/parameters-tests.cpp index dd1d29b..7e9fba0 100644 --- a/tests/parameters-tests.cpp +++ b/tests/parameters-tests.cpp @@ -105,10 +105,10 @@ BOOST_AUTO_TEST_CASE(odc_rp_same_simple) printParams(ddsParams2); BOOST_TEST(ddsParams.size() == 1); - testParameterSet(ddsParams.at(0), "localhost", "", "", 1, 0, 36, 0, "", ""); - testParameterSet(ddsParams2.at(0), "localhost", "", "", 1, 0, 12, 0, "", ""); - testParameterSet(ddsParams2.at(1), "localhost", "", "", 1, 0, 12, 0, "", ""); - testParameterSet(ddsParams2.at(2), "localhost", "", "", 1, 0, 12, 0, "", ""); + testParameterSet(ddsParams.at(0), "localhost", "", "", 1, -1, 36, 0, "", ""); + testParameterSet(ddsParams2.at(0), "localhost", "", "", 1, -1, 12, 0, "", ""); + testParameterSet(ddsParams2.at(1), "localhost", "", "", 1, -1, 12, 0, "", ""); + testParameterSet(ddsParams2.at(2), "localhost", "", "", 1, -1, 12, 0, "", ""); } BOOST_AUTO_TEST_CASE(odc_rp_same_zones) @@ -140,7 +140,7 @@ BOOST_AUTO_TEST_CASE(odc_rp_same_zones) printParams(ddsParams2); BOOST_TEST(ddsParams.size() == 5); - testParameterSet(ddsParams.at(0), "localhost", "calib", "calib", 1, 0, 2, 0, "", ""); + testParameterSet(ddsParams.at(0), "localhost", "calib", "calib", 1, -1, 2, 0, "", ""); testParameterSet(ddsParams.at(1), "localhost", "online", "online", 1, 2, 2, 0, "", ""); testParameterSet(ddsParams.at(2), "localhost", "online", "online", 1, 2, 2, 0, "", ""); testParameterSet(ddsParams.at(3), "localhost", "online", "online", 1, 2, 2, 0, "", ""); @@ -176,7 +176,7 @@ BOOST_AUTO_TEST_CASE(odc_rp_epn_slurm_zones) BOOST_TEST(ddsParams.size() == 2); BOOST_TEST(ddsParams2.size() == 2); - testParameterSet(ddsParams.at(1), "slurm", "calib", "calib", 1, 0, 2, 0, "/home/user/slurm-calib.cfg", ""); + testParameterSet(ddsParams.at(1), "slurm", "calib", "calib", 1, -1, 2, 0, "/home/user/slurm-calib.cfg", ""); testParameterSet(ddsParams.at(0), "slurm", "online", "online", 4, 2, 2, 0, "/home/user/slurm-online.cfg", ""); compareParameterSets(ddsParams.at(0), ddsParams2.at(0)); compareParameterSets(ddsParams.at(1), ddsParams2.at(1)); @@ -213,7 +213,7 @@ BOOST_AUTO_TEST_CASE(odc_rp_epn_slurm_zones_group_without_n_with_tasks) BOOST_TEST(ddsParams.size() == 1); BOOST_TEST(ddsParams2.size() == 2); testParameterSet(ddsParams.at(0), "slurm", "online", "online", 4, 2, 2, 0, "/home/user/slurm-online.cfg", ""); - testParameterSet(ddsParams2.at(1), "slurm", "calib", "calib", 1, 0, 2, 0, "/home/user/slurm-calib.cfg", ""); + testParameterSet(ddsParams2.at(1), "slurm", "calib", "calib", 1, -1, 2, 0, "/home/user/slurm-calib.cfg", ""); testParameterSet(ddsParams2.at(0), "slurm", "online", "online", 4, 2, 2, 0, "/home/user/slurm-online.cfg", ""); } @@ -279,9 +279,9 @@ BOOST_AUTO_TEST_CASE(odc_rp_epn_slurm_ncores_without_n) BOOST_TEST(ddsParams.size() == 3); BOOST_TEST(ddsParams2.size() == 3); - testParameterSet(ddsParams.at(0), "slurm", "calib", "calib1", 1, 0, 1, 2, "/home/user/slurm-calib.cfg", ""); - testParameterSet(ddsParams.at(1), "slurm", "online", "online", 4, 0, 1, 0, "/home/user/slurm-online.cfg", ""); - testParameterSet(ddsParams.at(2), "slurm", "calib", "calib2", 1, 0, 1, 1, "/home/user/slurm-calib.cfg", ""); + testParameterSet(ddsParams.at(0), "slurm", "calib", "calib1", 1, -1, 1, 2, "/home/user/slurm-calib.cfg", ""); + testParameterSet(ddsParams.at(1), "slurm", "online", "online", 4, -1, 1, 0, "/home/user/slurm-online.cfg", ""); + testParameterSet(ddsParams.at(2), "slurm", "calib", "calib2", 1, -1, 1, 1, "/home/user/slurm-calib.cfg", ""); compareParameterSets(ddsParams.at(0), ddsParams2.at(2)); compareParameterSets(ddsParams.at(1), ddsParams2.at(0)); compareParameterSets(ddsParams.at(2), ddsParams2.at(1)); @@ -316,9 +316,9 @@ BOOST_AUTO_TEST_CASE(odc_rp_epn_slurm_ncores) BOOST_TEST(ddsParams.size() == 3); BOOST_TEST(ddsParams2.size() == 3); - testParameterSet(ddsParams.at(0), "slurm", "calib", "calib1", 1, 0, 1, 2, "/home/user/slurm-calib.cfg", ""); - testParameterSet(ddsParams.at(1), "slurm", "online", "online", 4, 0, 1, 0, "/home/user/slurm-online.cfg", ""); - testParameterSet(ddsParams.at(2), "slurm", "calib", "calib2", 1, 0, 1, 1, "/home/user/slurm-calib.cfg", ""); + testParameterSet(ddsParams.at(0), "slurm", "calib", "calib1", 1, -1, 1, 2, "/home/user/slurm-calib.cfg", ""); + testParameterSet(ddsParams.at(1), "slurm", "online", "online", 4, -1, 1, 0, "/home/user/slurm-online.cfg", ""); + testParameterSet(ddsParams.at(2), "slurm", "calib", "calib2", 1, -1, 1, 1, "/home/user/slurm-calib.cfg", ""); compareParameterSets(ddsParams.at(0), ddsParams2.at(2)); compareParameterSets(ddsParams.at(1), ddsParams2.at(0)); compareParameterSets(ddsParams.at(2), ddsParams2.at(1)); @@ -347,7 +347,7 @@ BOOST_AUTO_TEST_CASE(odc_extract_epn) BOOST_TEST(ddsParams.size() == 2); testParameterSet(ddsParams.at(0), "slurm", "online", "online", 50, 50, 223, 0, "/home/user/slurm-online.cfg", ""); - testParameterSet(ddsParams.at(1), "slurm", "calib", "calib1", 1, 0, 17, 128, "/home/user/slurm-calib.cfg", ""); + testParameterSet(ddsParams.at(1), "slurm", "calib", "calib1", 1, -1, 17, 128, "/home/user/slurm-calib.cfg", ""); } BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/requirements-tests.cpp b/tests/requirements-tests.cpp index 08e66ff..1954373 100644 --- a/tests/requirements-tests.cpp +++ b/tests/requirements-tests.cpp @@ -132,10 +132,10 @@ BOOST_AUTO_TEST_CASE(simple) BOOST_TEST(session.mZoneInfo.size() == 0); BOOST_TEST(session.mNinfo.size() == 0); BOOST_TEST(session.mCollections.size() == 1); - testCollection(session.mCollections.at("EPNCollection"), "EPNCollection", "", "", 1, 0, 0, 12, 12); + testCollection(session.mCollections.at("EPNCollection"), "EPNCollection", "", "", 1, -1, 0, 12, 12); BOOST_TEST(session.mAgentGroupInfo.size() == 1); - testAgentGroupInfo(session.mAgentGroupInfo.at(""), "", "", 1, 0, 12, 0); + testAgentGroupInfo(session.mAgentGroupInfo.at(""), "", "", 1, -1, 12, 0); } BOOST_AUTO_TEST_CASE(zones_from_agent_groupnames) @@ -157,11 +157,11 @@ BOOST_AUTO_TEST_CASE(zones_from_agent_groupnames) testZoneGroup(session.mZoneInfo.at("online").at(0), 4, 0, "online"); BOOST_TEST(session.mCollections.size() == 2); - testCollection(session.mCollections.at("SamplersSinks"), "SamplersSinks", "calib", "calib", 1, 0, 0, 2, 2); + testCollection(session.mCollections.at("SamplersSinks"), "SamplersSinks", "calib", "calib", 1, -1, 0, 2, 2); testCollection(session.mCollections.at("Processors"), "Processors", "online", "online", 4, 2, 0, 2, 8); BOOST_TEST(session.mAgentGroupInfo.size() == 2); - testAgentGroupInfo(session.mAgentGroupInfo.at("calib"), "calib", "calib", 1, 0, 2, 0); + testAgentGroupInfo(session.mAgentGroupInfo.at("calib"), "calib", "calib", 1, -1, 2, 0); testAgentGroupInfo(session.mAgentGroupInfo.at("online"), "online", "online", 4, 2, 2, 0); } @@ -185,14 +185,14 @@ BOOST_AUTO_TEST_CASE(zones_with_ncores) testZoneGroup(session.mZoneInfo.at("online").at(0), 4, 0, "online"); BOOST_TEST(session.mCollections.size() == 3); - testCollection(session.mCollections.at("Samplers"), "Samplers", "calib", "calib1", 1, 0, 2, 1, 1); - testCollection(session.mCollections.at("Sinks"), "Sinks", "calib", "calib2", 1, 0, 1, 1, 1); - testCollection(session.mCollections.at("Processors"), "Processors", "online", "online", 4, 0, 0, 1, 4); + testCollection(session.mCollections.at("Samplers"), "Samplers", "calib", "calib1", 1, -1, 2, 1, 1); + testCollection(session.mCollections.at("Sinks"), "Sinks", "calib", "calib2", 1, -1, 1, 1, 1); + testCollection(session.mCollections.at("Processors"), "Processors", "online", "online", 4, -1, 0, 1, 4); BOOST_TEST(session.mAgentGroupInfo.size() == 3); - testAgentGroupInfo(session.mAgentGroupInfo.at("online"), "online", "online", 4, 0, 1, 0); - testAgentGroupInfo(session.mAgentGroupInfo.at("calib1"), "calib1", "calib", 1, 0, 1, 2); - testAgentGroupInfo(session.mAgentGroupInfo.at("calib2"), "calib2", "calib", 1, 0, 1, 1); + testAgentGroupInfo(session.mAgentGroupInfo.at("online"), "online", "online", 4, -1, 1, 0); + testAgentGroupInfo(session.mAgentGroupInfo.at("calib1"), "calib1", "calib", 1, -1, 1, 2); + testAgentGroupInfo(session.mAgentGroupInfo.at("calib2"), "calib2", "calib", 1, -1, 1, 1); } BOOST_AUTO_TEST_CASE(nmin) @@ -214,16 +214,16 @@ BOOST_AUTO_TEST_CASE(nmin) testZoneGroup(session.mZoneInfo.at("online").at(0), 4, 0, "online"); BOOST_TEST(session.mNinfo.size() == 2); - testNinfo(session.mNinfo.at("SamplersSinks"), 1, 0, "calib"); + testNinfo(session.mNinfo.at("SamplersSinks"), 1, -1, "calib"); testNinfo(session.mNinfo.at("Processors"), 4, 2, "online"); BOOST_TEST(session.mCollections.size() == 2); - testCollection(session.mCollections.at("SamplersSinks"), "SamplersSinks", "calib", "calib", 1, 0, 0, 2, 2); + testCollection(session.mCollections.at("SamplersSinks"), "SamplersSinks", "calib", "calib", 1, -1, 0, 2, 2); testCollection(session.mCollections.at("Processors"), "Processors", "online", "online", 4, 2, 0, 2, 8); BOOST_TEST(session.mAgentGroupInfo.size() == 2); testAgentGroupInfo(session.mAgentGroupInfo.at("online"), "online", "online", 4, 2, 2, 0); - testAgentGroupInfo(session.mAgentGroupInfo.at("calib"), "calib", "calib", 1, 0, 2, 0); + testAgentGroupInfo(session.mAgentGroupInfo.at("calib"), "calib", "calib", 1, -1, 2, 0); } BOOST_AUTO_TEST_CASE(epn) @@ -245,16 +245,16 @@ BOOST_AUTO_TEST_CASE(epn) testZoneGroup(session.mZoneInfo.at("online").at(0), 50, 0, "online"); BOOST_TEST(session.mNinfo.size() == 2); - testNinfo(session.mNinfo.at("wf11.dds"), 1, 0, "calib1"); + testNinfo(session.mNinfo.at("wf11.dds"), 1, -1, "calib1"); testNinfo(session.mNinfo.at("RecoCollection"), 50, 50, "online"); BOOST_TEST(session.mCollections.size() == 2); - testCollection(session.mCollections.at("wf11.dds"), "wf11.dds", "calib", "calib1", 1, 0, 128, 17, 17); + testCollection(session.mCollections.at("wf11.dds"), "wf11.dds", "calib", "calib1", 1, -1, 128, 17, 17); testCollection(session.mCollections.at("RecoCollection"), "RecoCollection", "online", "online", 50, 50, 0, 223, 11150); BOOST_TEST(session.mAgentGroupInfo.size() == 2); testAgentGroupInfo(session.mAgentGroupInfo.at("online"), "online", "online", 50, 50, 223, 0); - testAgentGroupInfo(session.mAgentGroupInfo.at("calib1"), "calib1", "calib", 1, 0, 17, 128); + testAgentGroupInfo(session.mAgentGroupInfo.at("calib1"), "calib1", "calib", 1, -1, 17, 128); } BOOST_AUTO_TEST_CASE(epn_2) @@ -280,24 +280,24 @@ BOOST_AUTO_TEST_CASE(epn_2) BOOST_TEST(session.mNinfo.size() == 5); testNinfo(session.mNinfo.at("RecoCollection"), 108, 93, "online"); - testNinfo(session.mNinfo.at("wf5.dds"), 1, 0, "calib1"); - testNinfo(session.mNinfo.at("wf4.dds"), 1, 0, "calib2"); - testNinfo(session.mNinfo.at("wf10.dds"), 1, 0, "calib3"); - testNinfo(session.mNinfo.at("wf6.dds"), 1, 0, "calib4"); + testNinfo(session.mNinfo.at("wf5.dds"), 1, -1, "calib1"); + testNinfo(session.mNinfo.at("wf4.dds"), 1, -1, "calib2"); + testNinfo(session.mNinfo.at("wf10.dds"), 1, -1, "calib3"); + testNinfo(session.mNinfo.at("wf6.dds"), 1, -1, "calib4"); BOOST_TEST(session.mCollections.size() == 5); testCollection(session.mCollections.at("RecoCollection"), "RecoCollection", "online", "online", 108, 93, 0, 351, 37908); - testCollection(session.mCollections.at("wf5.dds"), "wf5.dds", "calib", "calib1", 1, 0, 32, 7, 7); - testCollection(session.mCollections.at("wf4.dds"), "wf4.dds", "calib", "calib2", 1, 0, 32, 13, 13); - testCollection(session.mCollections.at("wf10.dds"), "wf10.dds", "calib", "calib3", 1, 0, 16, 7, 7); - testCollection(session.mCollections.at("wf6.dds"), "wf6.dds", "calib", "calib4", 1, 0, 16, 13, 13); + testCollection(session.mCollections.at("wf5.dds"), "wf5.dds", "calib", "calib1", 1, -1, 32, 7, 7); + testCollection(session.mCollections.at("wf4.dds"), "wf4.dds", "calib", "calib2", 1, -1, 32, 13, 13); + testCollection(session.mCollections.at("wf10.dds"), "wf10.dds", "calib", "calib3", 1, -1, 16, 7, 7); + testCollection(session.mCollections.at("wf6.dds"), "wf6.dds", "calib", "calib4", 1, -1, 16, 13, 13); BOOST_TEST(session.mAgentGroupInfo.size() == 5); testAgentGroupInfo(session.mAgentGroupInfo.at("online"), "online", "online", 108, 93, 351, 0); - testAgentGroupInfo(session.mAgentGroupInfo.at("calib1"), "calib1", "calib", 1, 0, 7, 32); - testAgentGroupInfo(session.mAgentGroupInfo.at("calib2"), "calib2", "calib", 1, 0, 13, 32); - testAgentGroupInfo(session.mAgentGroupInfo.at("calib3"), "calib3", "calib", 1, 0, 7, 16); - testAgentGroupInfo(session.mAgentGroupInfo.at("calib4"), "calib4", "calib", 1, 0, 13, 16); + testAgentGroupInfo(session.mAgentGroupInfo.at("calib1"), "calib1", "calib", 1, -1, 7, 32); + testAgentGroupInfo(session.mAgentGroupInfo.at("calib2"), "calib2", "calib", 1, -1, 13, 32); + testAgentGroupInfo(session.mAgentGroupInfo.at("calib3"), "calib3", "calib", 1, -1, 7, 16); + testAgentGroupInfo(session.mAgentGroupInfo.at("calib4"), "calib4", "calib", 1, -1, 13, 16); } BOOST_AUTO_TEST_SUITE_END()