Skip to content

Commit

Permalink
Allow collections to be fully expendable
Browse files Browse the repository at this point in the history
  • Loading branch information
rbx committed Mar 7, 2024
1 parent d5522b6 commit 371cf7b
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 49 deletions.
3 changes: 2 additions & 1 deletion ReleaseNotes.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# ODC Release Notes

## master (unreleased)
## 0.81.0 (2024-03-07)

- **Breaking Change**: Support expendable collections. When nMin is 0, collection is considered to be expendable and failure of all collection members will not trigger a global error. Previously nMin of 0 was the default, which would mean no nMin defined. Now this behaviour occurs when nMin is -1, which is also the new default.
- Fix a deprecation warning with C++20

## 0.80.2 (2024-01-05)
Expand Down
2 changes: 1 addition & 1 deletion odc/Controller.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -916,7 +916,7 @@ void Controller::extractRequirements(const CommonParams& common, Session& sessio
string topoPath = parent->getPath();
int nCores = 0;
int32_t n = c->getTotalCounter();
int32_t nmin = 0;
int32_t nmin = -1;
int32_t numTasks = c->getNofTasks();
int32_t numTasksTotal = numTasks * n;

Expand Down
4 changes: 2 additions & 2 deletions odc/DDSSubmit.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ struct DDSSubmitParams
mConfigFile = pt.get<std::string>("configFile", "");
mEnvFile = pt.get<std::string>("envFile", "");
mNumAgents = pt.get<uint32_t>("agents", 0);
mMinAgents = 0;
mMinAgents = -1;
mNumSlots = pt.get<uint32_t>("slots", 0);
// number of cores is set dynamically from the topology (if provided), not from the initial resource definition
mNumCores = 0;
Expand All @@ -63,7 +63,7 @@ struct DDSSubmitParams
std::string mConfigFile; ///< Path to the configuration file of the RMS plugin
std::string mEnvFile; ///< Path to the environment file
uint32_t mNumAgents = 0; ///< Number of DDS agents
uint32_t mMinAgents = 0; ///< Minimum number of DDS agents
int32_t mMinAgents = -1; ///< Minimum number of DDS agents
uint32_t mNumSlots = 0; ///< Number of slots per DDS agent
uint32_t mNumCores = 0; ///< Number of cores

Expand Down
2 changes: 1 addition & 1 deletion odc/Topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ class BasicTopology : public AsioBase<Executor, Allocator>

bool CheckNmin(int32_t nCurrent, int32_t nMin, const std::string& runtimeColPath, const std::string& colPath, DDSCollection::Id colId)
{
if (nMin == 0) {
if (nMin == -1) {
// no nMin defined, failure cannot be ignored
OLOG(error, mPartitionID, mSession.mLastRunNr.load())
<< "Collection '" << runtimeColPath << "' (id: " << colId << ")"
Expand Down
7 changes: 5 additions & 2 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,11 @@ macro(add_nmin_test TITLE PASS FAIL)
set_tests_properties(${TITLE} PROPERTIES TIMEOUT 60 PASS_REGULAR_EXPRESSION "${PASS}" FAIL_REGULAR_EXPRESSION "${FAIL}" ENVIRONMENT "${TEST_ENV}")
endmacro()

# nmin is 0 - is the same as when it is not defined.
add_nmin_test(nmin_0 "Status code: ERROR" "")
# nmin is -1 - is the same as when it is not defined.
add_nmin_test(nmin_negative "Status code: ERROR" "")

# nmin is 0 - collection can be fully expendable if nMin is 0
add_nmin_test(nmin_0 "" "Status code: ERROR")

# nmin: a collection outside of group should fail
add_nmin_test(nmin_col_outside_group "Status code: ERROR" "")
Expand Down
28 changes: 14 additions & 14 deletions tests/parameters-tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,10 @@ BOOST_AUTO_TEST_CASE(odc_rp_same_simple)
printParams(ddsParams2);

BOOST_TEST(ddsParams.size() == 1);
testParameterSet(ddsParams.at(0), "localhost", "", "", 1, 0, 36, 0, "", "");
testParameterSet(ddsParams2.at(0), "localhost", "", "", 1, 0, 12, 0, "", "");
testParameterSet(ddsParams2.at(1), "localhost", "", "", 1, 0, 12, 0, "", "");
testParameterSet(ddsParams2.at(2), "localhost", "", "", 1, 0, 12, 0, "", "");
testParameterSet(ddsParams.at(0), "localhost", "", "", 1, -1, 36, 0, "", "");
testParameterSet(ddsParams2.at(0), "localhost", "", "", 1, -1, 12, 0, "", "");
testParameterSet(ddsParams2.at(1), "localhost", "", "", 1, -1, 12, 0, "", "");
testParameterSet(ddsParams2.at(2), "localhost", "", "", 1, -1, 12, 0, "", "");
}

BOOST_AUTO_TEST_CASE(odc_rp_same_zones)
Expand Down Expand Up @@ -140,7 +140,7 @@ BOOST_AUTO_TEST_CASE(odc_rp_same_zones)
printParams(ddsParams2);

BOOST_TEST(ddsParams.size() == 5);
testParameterSet(ddsParams.at(0), "localhost", "calib", "calib", 1, 0, 2, 0, "", "");
testParameterSet(ddsParams.at(0), "localhost", "calib", "calib", 1, -1, 2, 0, "", "");
testParameterSet(ddsParams.at(1), "localhost", "online", "online", 1, 2, 2, 0, "", "");
testParameterSet(ddsParams.at(2), "localhost", "online", "online", 1, 2, 2, 0, "", "");
testParameterSet(ddsParams.at(3), "localhost", "online", "online", 1, 2, 2, 0, "", "");
Expand Down Expand Up @@ -176,7 +176,7 @@ BOOST_AUTO_TEST_CASE(odc_rp_epn_slurm_zones)

BOOST_TEST(ddsParams.size() == 2);
BOOST_TEST(ddsParams2.size() == 2);
testParameterSet(ddsParams.at(1), "slurm", "calib", "calib", 1, 0, 2, 0, "/home/user/slurm-calib.cfg", "");
testParameterSet(ddsParams.at(1), "slurm", "calib", "calib", 1, -1, 2, 0, "/home/user/slurm-calib.cfg", "");
testParameterSet(ddsParams.at(0), "slurm", "online", "online", 4, 2, 2, 0, "/home/user/slurm-online.cfg", "");
compareParameterSets(ddsParams.at(0), ddsParams2.at(0));
compareParameterSets(ddsParams.at(1), ddsParams2.at(1));
Expand Down Expand Up @@ -213,7 +213,7 @@ BOOST_AUTO_TEST_CASE(odc_rp_epn_slurm_zones_group_without_n_with_tasks)
BOOST_TEST(ddsParams.size() == 1);
BOOST_TEST(ddsParams2.size() == 2);
testParameterSet(ddsParams.at(0), "slurm", "online", "online", 4, 2, 2, 0, "/home/user/slurm-online.cfg", "");
testParameterSet(ddsParams2.at(1), "slurm", "calib", "calib", 1, 0, 2, 0, "/home/user/slurm-calib.cfg", "");
testParameterSet(ddsParams2.at(1), "slurm", "calib", "calib", 1, -1, 2, 0, "/home/user/slurm-calib.cfg", "");
testParameterSet(ddsParams2.at(0), "slurm", "online", "online", 4, 2, 2, 0, "/home/user/slurm-online.cfg", "");
}

Expand Down Expand Up @@ -279,9 +279,9 @@ BOOST_AUTO_TEST_CASE(odc_rp_epn_slurm_ncores_without_n)

BOOST_TEST(ddsParams.size() == 3);
BOOST_TEST(ddsParams2.size() == 3);
testParameterSet(ddsParams.at(0), "slurm", "calib", "calib1", 1, 0, 1, 2, "/home/user/slurm-calib.cfg", "");
testParameterSet(ddsParams.at(1), "slurm", "online", "online", 4, 0, 1, 0, "/home/user/slurm-online.cfg", "");
testParameterSet(ddsParams.at(2), "slurm", "calib", "calib2", 1, 0, 1, 1, "/home/user/slurm-calib.cfg", "");
testParameterSet(ddsParams.at(0), "slurm", "calib", "calib1", 1, -1, 1, 2, "/home/user/slurm-calib.cfg", "");
testParameterSet(ddsParams.at(1), "slurm", "online", "online", 4, -1, 1, 0, "/home/user/slurm-online.cfg", "");
testParameterSet(ddsParams.at(2), "slurm", "calib", "calib2", 1, -1, 1, 1, "/home/user/slurm-calib.cfg", "");
compareParameterSets(ddsParams.at(0), ddsParams2.at(2));
compareParameterSets(ddsParams.at(1), ddsParams2.at(0));
compareParameterSets(ddsParams.at(2), ddsParams2.at(1));
Expand Down Expand Up @@ -316,9 +316,9 @@ BOOST_AUTO_TEST_CASE(odc_rp_epn_slurm_ncores)

BOOST_TEST(ddsParams.size() == 3);
BOOST_TEST(ddsParams2.size() == 3);
testParameterSet(ddsParams.at(0), "slurm", "calib", "calib1", 1, 0, 1, 2, "/home/user/slurm-calib.cfg", "");
testParameterSet(ddsParams.at(1), "slurm", "online", "online", 4, 0, 1, 0, "/home/user/slurm-online.cfg", "");
testParameterSet(ddsParams.at(2), "slurm", "calib", "calib2", 1, 0, 1, 1, "/home/user/slurm-calib.cfg", "");
testParameterSet(ddsParams.at(0), "slurm", "calib", "calib1", 1, -1, 1, 2, "/home/user/slurm-calib.cfg", "");
testParameterSet(ddsParams.at(1), "slurm", "online", "online", 4, -1, 1, 0, "/home/user/slurm-online.cfg", "");
testParameterSet(ddsParams.at(2), "slurm", "calib", "calib2", 1, -1, 1, 1, "/home/user/slurm-calib.cfg", "");
compareParameterSets(ddsParams.at(0), ddsParams2.at(2));
compareParameterSets(ddsParams.at(1), ddsParams2.at(0));
compareParameterSets(ddsParams.at(2), ddsParams2.at(1));
Expand Down Expand Up @@ -347,7 +347,7 @@ BOOST_AUTO_TEST_CASE(odc_extract_epn)

BOOST_TEST(ddsParams.size() == 2);
testParameterSet(ddsParams.at(0), "slurm", "online", "online", 50, 50, 223, 0, "/home/user/slurm-online.cfg", "");
testParameterSet(ddsParams.at(1), "slurm", "calib", "calib1", 1, 0, 17, 128, "/home/user/slurm-calib.cfg", "");
testParameterSet(ddsParams.at(1), "slurm", "calib", "calib1", 1, -1, 17, 128, "/home/user/slurm-calib.cfg", "");
}

BOOST_AUTO_TEST_SUITE_END()
Expand Down
56 changes: 28 additions & 28 deletions tests/requirements-tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,10 @@ BOOST_AUTO_TEST_CASE(simple)
BOOST_TEST(session.mZoneInfo.size() == 0);
BOOST_TEST(session.mNinfo.size() == 0);
BOOST_TEST(session.mCollections.size() == 1);
testCollection(session.mCollections.at("EPNCollection"), "EPNCollection", "", "", 1, 0, 0, 12, 12);
testCollection(session.mCollections.at("EPNCollection"), "EPNCollection", "", "", 1, -1, 0, 12, 12);

BOOST_TEST(session.mAgentGroupInfo.size() == 1);
testAgentGroupInfo(session.mAgentGroupInfo.at(""), "", "", 1, 0, 12, 0);
testAgentGroupInfo(session.mAgentGroupInfo.at(""), "", "", 1, -1, 12, 0);
}

BOOST_AUTO_TEST_CASE(zones_from_agent_groupnames)
Expand All @@ -157,11 +157,11 @@ BOOST_AUTO_TEST_CASE(zones_from_agent_groupnames)
testZoneGroup(session.mZoneInfo.at("online").at(0), 4, 0, "online");

BOOST_TEST(session.mCollections.size() == 2);
testCollection(session.mCollections.at("SamplersSinks"), "SamplersSinks", "calib", "calib", 1, 0, 0, 2, 2);
testCollection(session.mCollections.at("SamplersSinks"), "SamplersSinks", "calib", "calib", 1, -1, 0, 2, 2);
testCollection(session.mCollections.at("Processors"), "Processors", "online", "online", 4, 2, 0, 2, 8);

BOOST_TEST(session.mAgentGroupInfo.size() == 2);
testAgentGroupInfo(session.mAgentGroupInfo.at("calib"), "calib", "calib", 1, 0, 2, 0);
testAgentGroupInfo(session.mAgentGroupInfo.at("calib"), "calib", "calib", 1, -1, 2, 0);
testAgentGroupInfo(session.mAgentGroupInfo.at("online"), "online", "online", 4, 2, 2, 0);
}

Expand All @@ -185,14 +185,14 @@ BOOST_AUTO_TEST_CASE(zones_with_ncores)
testZoneGroup(session.mZoneInfo.at("online").at(0), 4, 0, "online");

BOOST_TEST(session.mCollections.size() == 3);
testCollection(session.mCollections.at("Samplers"), "Samplers", "calib", "calib1", 1, 0, 2, 1, 1);
testCollection(session.mCollections.at("Sinks"), "Sinks", "calib", "calib2", 1, 0, 1, 1, 1);
testCollection(session.mCollections.at("Processors"), "Processors", "online", "online", 4, 0, 0, 1, 4);
testCollection(session.mCollections.at("Samplers"), "Samplers", "calib", "calib1", 1, -1, 2, 1, 1);
testCollection(session.mCollections.at("Sinks"), "Sinks", "calib", "calib2", 1, -1, 1, 1, 1);
testCollection(session.mCollections.at("Processors"), "Processors", "online", "online", 4, -1, 0, 1, 4);

BOOST_TEST(session.mAgentGroupInfo.size() == 3);
testAgentGroupInfo(session.mAgentGroupInfo.at("online"), "online", "online", 4, 0, 1, 0);
testAgentGroupInfo(session.mAgentGroupInfo.at("calib1"), "calib1", "calib", 1, 0, 1, 2);
testAgentGroupInfo(session.mAgentGroupInfo.at("calib2"), "calib2", "calib", 1, 0, 1, 1);
testAgentGroupInfo(session.mAgentGroupInfo.at("online"), "online", "online", 4, -1, 1, 0);
testAgentGroupInfo(session.mAgentGroupInfo.at("calib1"), "calib1", "calib", 1, -1, 1, 2);
testAgentGroupInfo(session.mAgentGroupInfo.at("calib2"), "calib2", "calib", 1, -1, 1, 1);
}

BOOST_AUTO_TEST_CASE(nmin)
Expand All @@ -214,16 +214,16 @@ BOOST_AUTO_TEST_CASE(nmin)
testZoneGroup(session.mZoneInfo.at("online").at(0), 4, 0, "online");

BOOST_TEST(session.mNinfo.size() == 2);
testNinfo(session.mNinfo.at("SamplersSinks"), 1, 0, "calib");
testNinfo(session.mNinfo.at("SamplersSinks"), 1, -1, "calib");
testNinfo(session.mNinfo.at("Processors"), 4, 2, "online");

BOOST_TEST(session.mCollections.size() == 2);
testCollection(session.mCollections.at("SamplersSinks"), "SamplersSinks", "calib", "calib", 1, 0, 0, 2, 2);
testCollection(session.mCollections.at("SamplersSinks"), "SamplersSinks", "calib", "calib", 1, -1, 0, 2, 2);
testCollection(session.mCollections.at("Processors"), "Processors", "online", "online", 4, 2, 0, 2, 8);

BOOST_TEST(session.mAgentGroupInfo.size() == 2);
testAgentGroupInfo(session.mAgentGroupInfo.at("online"), "online", "online", 4, 2, 2, 0);
testAgentGroupInfo(session.mAgentGroupInfo.at("calib"), "calib", "calib", 1, 0, 2, 0);
testAgentGroupInfo(session.mAgentGroupInfo.at("calib"), "calib", "calib", 1, -1, 2, 0);
}

BOOST_AUTO_TEST_CASE(epn)
Expand All @@ -245,16 +245,16 @@ BOOST_AUTO_TEST_CASE(epn)
testZoneGroup(session.mZoneInfo.at("online").at(0), 50, 0, "online");

BOOST_TEST(session.mNinfo.size() == 2);
testNinfo(session.mNinfo.at("wf11.dds"), 1, 0, "calib1");
testNinfo(session.mNinfo.at("wf11.dds"), 1, -1, "calib1");
testNinfo(session.mNinfo.at("RecoCollection"), 50, 50, "online");

BOOST_TEST(session.mCollections.size() == 2);
testCollection(session.mCollections.at("wf11.dds"), "wf11.dds", "calib", "calib1", 1, 0, 128, 17, 17);
testCollection(session.mCollections.at("wf11.dds"), "wf11.dds", "calib", "calib1", 1, -1, 128, 17, 17);
testCollection(session.mCollections.at("RecoCollection"), "RecoCollection", "online", "online", 50, 50, 0, 223, 11150);

BOOST_TEST(session.mAgentGroupInfo.size() == 2);
testAgentGroupInfo(session.mAgentGroupInfo.at("online"), "online", "online", 50, 50, 223, 0);
testAgentGroupInfo(session.mAgentGroupInfo.at("calib1"), "calib1", "calib", 1, 0, 17, 128);
testAgentGroupInfo(session.mAgentGroupInfo.at("calib1"), "calib1", "calib", 1, -1, 17, 128);
}

BOOST_AUTO_TEST_CASE(epn_2)
Expand All @@ -280,24 +280,24 @@ BOOST_AUTO_TEST_CASE(epn_2)

BOOST_TEST(session.mNinfo.size() == 5);
testNinfo(session.mNinfo.at("RecoCollection"), 108, 93, "online");
testNinfo(session.mNinfo.at("wf5.dds"), 1, 0, "calib1");
testNinfo(session.mNinfo.at("wf4.dds"), 1, 0, "calib2");
testNinfo(session.mNinfo.at("wf10.dds"), 1, 0, "calib3");
testNinfo(session.mNinfo.at("wf6.dds"), 1, 0, "calib4");
testNinfo(session.mNinfo.at("wf5.dds"), 1, -1, "calib1");
testNinfo(session.mNinfo.at("wf4.dds"), 1, -1, "calib2");
testNinfo(session.mNinfo.at("wf10.dds"), 1, -1, "calib3");
testNinfo(session.mNinfo.at("wf6.dds"), 1, -1, "calib4");

BOOST_TEST(session.mCollections.size() == 5);
testCollection(session.mCollections.at("RecoCollection"), "RecoCollection", "online", "online", 108, 93, 0, 351, 37908);
testCollection(session.mCollections.at("wf5.dds"), "wf5.dds", "calib", "calib1", 1, 0, 32, 7, 7);
testCollection(session.mCollections.at("wf4.dds"), "wf4.dds", "calib", "calib2", 1, 0, 32, 13, 13);
testCollection(session.mCollections.at("wf10.dds"), "wf10.dds", "calib", "calib3", 1, 0, 16, 7, 7);
testCollection(session.mCollections.at("wf6.dds"), "wf6.dds", "calib", "calib4", 1, 0, 16, 13, 13);
testCollection(session.mCollections.at("wf5.dds"), "wf5.dds", "calib", "calib1", 1, -1, 32, 7, 7);
testCollection(session.mCollections.at("wf4.dds"), "wf4.dds", "calib", "calib2", 1, -1, 32, 13, 13);
testCollection(session.mCollections.at("wf10.dds"), "wf10.dds", "calib", "calib3", 1, -1, 16, 7, 7);
testCollection(session.mCollections.at("wf6.dds"), "wf6.dds", "calib", "calib4", 1, -1, 16, 13, 13);

BOOST_TEST(session.mAgentGroupInfo.size() == 5);
testAgentGroupInfo(session.mAgentGroupInfo.at("online"), "online", "online", 108, 93, 351, 0);
testAgentGroupInfo(session.mAgentGroupInfo.at("calib1"), "calib1", "calib", 1, 0, 7, 32);
testAgentGroupInfo(session.mAgentGroupInfo.at("calib2"), "calib2", "calib", 1, 0, 13, 32);
testAgentGroupInfo(session.mAgentGroupInfo.at("calib3"), "calib3", "calib", 1, 0, 7, 16);
testAgentGroupInfo(session.mAgentGroupInfo.at("calib4"), "calib4", "calib", 1, 0, 13, 16);
testAgentGroupInfo(session.mAgentGroupInfo.at("calib1"), "calib1", "calib", 1, -1, 7, 32);
testAgentGroupInfo(session.mAgentGroupInfo.at("calib2"), "calib2", "calib", 1, -1, 13, 32);
testAgentGroupInfo(session.mAgentGroupInfo.at("calib3"), "calib3", "calib", 1, -1, 7, 16);
testAgentGroupInfo(session.mAgentGroupInfo.at("calib4"), "calib4", "calib", 1, -1, 13, 16);
}

BOOST_AUTO_TEST_SUITE_END()
Expand Down

0 comments on commit 371cf7b

Please sign in to comment.