From 715819f1fd14a5d59ed32e851edec69625f934c2 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Mon, 3 Feb 2025 14:36:31 +0000 Subject: [PATCH] update exclude and reintegrate drpc handlers and fix ftest ranks params Signed-off-by: Tom Nabarro --- src/control/cmd/dmg/pool.go | 8 +- src/mgmt/srv_drpc.c | 106 +++++++++++++++++++----- src/mgmt/tests/srv_drpc_tests.c | 31 ++----- src/tests/ftest/util/dmg_utils.py | 35 ++++---- src/tests/ftest/util/test_utils_pool.py | 32 ++++--- 5 files changed, 135 insertions(+), 77 deletions(-) diff --git a/src/control/cmd/dmg/pool.go b/src/control/cmd/dmg/pool.go index 838cc045949..7d2f0320486 100644 --- a/src/control/cmd/dmg/pool.go +++ b/src/control/cmd/dmg/pool.go @@ -542,7 +542,7 @@ type poolRanksCmd struct { type poolExcludeCmd struct { poolRanksCmd Force bool `short:"f" long:"force" description:"Force the operation to continue, potentially leading to data loss"` - TargetIdx string `long:"target-idx" description:"Comma-separated list of target idx(s) to be excluded from each rank"` + TargetIdx string `long:"target-idx" description:"Comma-separated list of target index(es) to be excluded from each rank"` } // Execute is run when PoolExcludeCmd subcommand is activated @@ -582,7 +582,7 @@ func (cmd *poolExcludeCmd) Execute(args []string) error { // poolDrainCmd is the struct representing the command to Drain a DAOS target. type poolDrainCmd struct { poolRanksCmd - TargetIdx string `long:"target-idx" description:"Comma-separated list of target idx(s) to be drained on each rank"` + TargetIdx string `long:"target-idx" description:"Comma-separated list of target index(es) to be drained on each rank"` } // Execute is run when PoolDrainCmd subcommand is activated @@ -645,7 +645,7 @@ func (cmd *poolExtendCmd) Execute(args []string) error { // poolReintegrateCmd is the struct representing the command to Add a DAOS target. type poolReintegrateCmd struct { poolRanksCmd - TargetIdx string `long:"target-idx" description:"Comma-separated list of target idx(s) to be reintegrated into each rank"` + TargetIdx string `long:"target-idx" description:"Comma-separated list of target index(es) to be reintegrated into each rank"` } // Execute is run when poolReintegrateCmd subcommand is activated @@ -731,7 +731,7 @@ type poolQueryTargetsCmd struct { poolCmd Rank uint32 `long:"rank" required:"1" description:"Engine rank of the targets to be queried"` - Targets string `long:"target-idx" description:"Comma-separated list of target idx(s) to be queried"` + Targets string `long:"target-idx" description:"Comma-separated list of target index(es) to be queried"` } // Execute is run when PoolQueryTargetsCmd subcommand is activated diff --git a/src/mgmt/srv_drpc.c b/src/mgmt/srv_drpc.c index b80d9d149ef..eb10a18d207 100644 --- a/src/mgmt/srv_drpc.c +++ b/src/mgmt/srv_drpc.c @@ -754,15 +754,19 @@ pool_change_target_state(char *id, d_rank_list_t *svc_ranks, size_t n_target_idx void ds_mgmt_drpc_pool_exclude(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) { - struct drpc_alloc alloc = PROTO_ALLOCATOR_INIT(alloc); + struct drpc_alloc alloc = PROTO_ALLOCATOR_INIT(alloc); Mgmt__PoolExcludeReq *req = NULL; - Mgmt__PoolExcludeResp resp; + Mgmt__PoolRanksResp resp; + d_rank_list_t *tgt_ranks = NULL; d_rank_list_t *svc_ranks = NULL; + d_rank_list_t *success_ranks = NULL; uint8_t *body; - size_t len; - int rc; + size_t len; + int rc; + int i; + int j = 0; - mgmt__pool_exclude_resp__init(&resp); + mgmt__pool_ranks_resp__init(&resp); /* Unpack the inner request from the drpc call body */ req = mgmt__pool_exclude_req__unpack(&alloc.alloc, @@ -775,24 +779,50 @@ ds_mgmt_drpc_pool_exclude(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) return; } + tgt_ranks = uint32_array_to_rank_list(req->ranks, req->n_ranks); + if (tgt_ranks == NULL) + D_GOTO(out, rc = -DER_NOMEM); + svc_ranks = uint32_array_to_rank_list(req->svc_ranks, req->n_svc_ranks); if (svc_ranks == NULL) - D_GOTO(out, rc = -DER_NOMEM); + D_GOTO(out_tgt, rc = -DER_NOMEM); + + success_ranks = d_rank_list_alloc(req->n_ranks); + if (success_ranks == NULL) + D_GOTO(out_svc, rc = -DER_NOMEM); - rc = pool_change_target_state(req->id, svc_ranks, req->n_target_idx, req->target_idx, - req->ranks[0], PO_COMP_ST_DOWN, 0 /* scm_size */, - 0 /* nvme_size */, 0 /* meta_size */, req->force); + for (i = 0; i < req->n_ranks; i++) { + rc = + pool_change_target_state(req->id, svc_ranks, req->n_target_idx, req->target_idx, + req->ranks[i], PO_COMP_ST_DOWN, 0 /* scm_size */, + 0 /* nvme_size */, 0 /* meta_size */, req->force); + if (rc != 0) { + resp.failed_rank = req->ranks[i]; + goto out_list; + } - d_rank_list_free(svc_ranks); + D_ASSERT(j < success_ranks->rl_nr); + success_ranks->rl_ranks[j++] = req->ranks[i]; + } +out_list: + rc = rank_list_to_uint32_array(success_ranks, &resp.success_ranks, &resp.n_success_ranks); + if (rc != 0) { + D_ERROR("Failed to convert enabled target rank list: rc=%d\n", rc); + } + d_rank_list_free(success_ranks); +out_svc: + d_rank_list_free(svc_ranks); +out_tgt: + d_rank_list_free(tgt_ranks); out: resp.status = rc; - len = mgmt__pool_exclude_resp__get_packed_size(&resp); + len = mgmt__pool_ranks_resp__get_packed_size(&resp); D_ALLOC(body, len); if (body == NULL) { drpc_resp->status = DRPC__STATUS__FAILED_MARSHAL; } else { - mgmt__pool_exclude_resp__pack(&resp, body); + mgmt__pool_ranks_resp__pack(&resp, body); drpc_resp->body.len = len; drpc_resp->body.data = body; } @@ -971,15 +1001,19 @@ ds_mgmt_drpc_pool_extend(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) void ds_mgmt_drpc_pool_reintegrate(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) { - struct drpc_alloc alloc = PROTO_ALLOCATOR_INIT(alloc); + struct drpc_alloc alloc = PROTO_ALLOCATOR_INIT(alloc); Mgmt__PoolReintReq *req = NULL; Mgmt__PoolRanksResp resp; - d_rank_list_t *svc_ranks = NULL; + d_rank_list_t *tgt_ranks = NULL; + d_rank_list_t *svc_ranks = NULL; + d_rank_list_t *success_ranks = NULL; uint8_t *body; - size_t len; - uint64_t scm_bytes; - uint64_t nvme_bytes = 0; - int rc; + size_t len; + uint64_t scm_bytes; + uint64_t nvme_bytes = 0; + int i; + int j = 0; + int rc; mgmt__pool_ranks_resp__init(&resp); @@ -1004,16 +1038,42 @@ ds_mgmt_drpc_pool_reintegrate(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) if (req->n_tier_bytes > DAOS_MEDIA_NVME) nvme_bytes = req->tier_bytes[DAOS_MEDIA_NVME]; + tgt_ranks = uint32_array_to_rank_list(req->ranks, req->n_ranks); + if (tgt_ranks == NULL) + D_GOTO(out, rc = -DER_NOMEM); + svc_ranks = uint32_array_to_rank_list(req->svc_ranks, req->n_svc_ranks); if (svc_ranks == NULL) - D_GOTO(out, rc = -DER_NOMEM); + D_GOTO(out_tgt, rc = -DER_NOMEM); - rc = pool_change_target_state(req->id, svc_ranks, req->n_target_idx, req->target_idx, - req->ranks[0], PO_COMP_ST_UP, scm_bytes, nvme_bytes, - req->tier_bytes[DAOS_MEDIA_SCM] /* meta_size */, false); + success_ranks = d_rank_list_alloc(req->n_ranks); + if (success_ranks == NULL) + D_GOTO(out_svc, rc = -DER_NOMEM); - d_rank_list_free(svc_ranks); + for (i = 0; i < req->n_ranks; i++) { + rc = pool_change_target_state( + req->id, svc_ranks, req->n_target_idx, req->target_idx, req->ranks[i], + PO_COMP_ST_UP, scm_bytes, nvme_bytes, + req->tier_bytes[DAOS_MEDIA_SCM] /* meta_size */, false); + if (rc != 0) { + resp.failed_rank = req->ranks[i]; + goto out_list; + } + D_ASSERT(j < success_ranks->rl_nr); + success_ranks->rl_ranks[j++] = req->ranks[i]; + } + +out_list: + rc = rank_list_to_uint32_array(success_ranks, &resp.success_ranks, &resp.n_success_ranks); + if (rc != 0) { + D_ERROR("Failed to convert enabled target rank list: rc=%d\n", rc); + } + d_rank_list_free(success_ranks); +out_svc: + d_rank_list_free(svc_ranks); +out_tgt: + d_rank_list_free(tgt_ranks); out: resp.status = rc; len = mgmt__pool_ranks_resp__get_packed_size(&resp); diff --git a/src/mgmt/tests/srv_drpc_tests.c b/src/mgmt/tests/srv_drpc_tests.c index c0778328878..f7055490fc9 100644 --- a/src/mgmt/tests/srv_drpc_tests.c +++ b/src/mgmt/tests/srv_drpc_tests.c @@ -1796,19 +1796,18 @@ setup_exclude_drpc_call(Drpc__Call *call, char *uuid, uint32_t rank) } static void -expect_drpc_exclude_resp_with_error(Drpc__Response *resp, int exp_error) +expect_drpc_ranks_resp_with_error(Drpc__Response *resp, int exp_error) { - Mgmt__PoolExcludeResp *pc_resp = NULL; + Mgmt__PoolRanksResp *pc_resp = NULL; assert_int_equal(resp->status, DRPC__STATUS__SUCCESS); assert_non_null(resp->body.data); - pc_resp = mgmt__pool_exclude_resp__unpack(NULL, resp->body.len, - resp->body.data); + pc_resp = mgmt__pool_ranks_resp__unpack(NULL, resp->body.len, resp->body.data); assert_non_null(pc_resp); assert_int_equal(pc_resp->status, exp_error); - mgmt__pool_exclude_resp__free_unpacked(pc_resp, NULL); + mgmt__pool_ranks_resp__free_unpacked(pc_resp, NULL); } static void @@ -1821,7 +1820,7 @@ test_drpc_exclude_bad_uuid(void **state) ds_mgmt_drpc_pool_exclude(&call, &resp); - expect_drpc_exclude_resp_with_error(&resp, -DER_INVAL); + expect_drpc_ranks_resp_with_error(&resp, -DER_INVAL); D_FREE(call.body.data); D_FREE(resp.body.data); @@ -1837,8 +1836,7 @@ test_drpc_exclude_mgmt_svc_fails(void **state) ds_mgmt_target_update_return = -DER_MISC; ds_mgmt_drpc_pool_exclude(&call, &resp); - expect_drpc_exclude_resp_with_error(&resp, - ds_mgmt_target_update_return); + expect_drpc_ranks_resp_with_error(&resp, ds_mgmt_target_update_return); D_FREE(call.body.data); D_FREE(resp.body.data); @@ -1853,7 +1851,7 @@ test_drpc_exclude_success(void **state) setup_exclude_drpc_call(&call, TEST_UUID, 0); ds_mgmt_drpc_pool_exclude(&call, &resp); - expect_drpc_exclude_resp_with_error(&resp, 0); + expect_drpc_ranks_resp_with_error(&resp, 0); D_FREE(call.body.data); D_FREE(resp.body.data); @@ -1901,21 +1899,6 @@ setup_drain_drpc_call(Drpc__Call *call, char *uuid, uint32_t rank) pack_pool_drain_req(call, &req); } -static void -expect_drpc_ranks_resp_with_error(Drpc__Response *resp, int exp_error) -{ - Mgmt__PoolRanksResp *pc_resp = NULL; - - assert_int_equal(resp->status, DRPC__STATUS__SUCCESS); - assert_non_null(resp->body.data); - - pc_resp = mgmt__pool_ranks_resp__unpack(NULL, resp->body.len, resp->body.data); - assert_non_null(pc_resp); - assert_int_equal(pc_resp->status, exp_error); - - mgmt__pool_ranks_resp__free_unpacked(pc_resp, NULL); -} - static void test_drpc_drain_bad_uuid(void **state) { diff --git a/src/tests/ftest/util/dmg_utils.py b/src/tests/ftest/util/dmg_utils.py index 9780e89aad1..f98c93d1ec8 100644 --- a/src/tests/ftest/util/dmg_utils.py +++ b/src/tests/ftest/util/dmg_utils.py @@ -894,8 +894,10 @@ def pool_exclude(self, pool, ranks, tgt_idx=None, force=False): Args: pool (str): Pool uuid. - ranks (int): Ranks of the daos_server to exclude - tgt_idx (int): target to be excluded from each pool + ranks (str): Comma separated daos_server-rank ranges to exclude e.g. + "0,2-5". + tgt_idx (list, optional): targets to exclude on ranks e.g. "1,2". + Defaults to None. force (bool, optional): force exclusion regardless of data loss. Defaults to False Returns: @@ -914,7 +916,8 @@ def pool_extend(self, pool, ranks): Args: pool (str): Pool uuid. - ranks (int): Ranks of the daos_server to extend + ranks (str): Comma separated daos_server-rank ranges to extend e.g. + "0,2-5". Returns: CmdResult: Object that contains exit status, stdout, and other @@ -932,8 +935,10 @@ def pool_drain(self, pool, ranks, tgt_idx=None): Args: pool (str): Pool uuid. - ranks (int): Ranks of the daos_server to drain - tgt_idx (int): target to be drained from each pool + ranks (str): Comma separated daos_server-rank ranges to drain e.g. + "0,2-5". + tgt_idx (list, optional): targets to drain on ranks e.g. "1,2". + Defaults to None. Returns: CmdResult: Object that contains exit status, stdout, and other @@ -951,8 +956,10 @@ def pool_reintegrate(self, pool, ranks, tgt_idx=None): Args: pool (str): Pool uuid. - ranks (int): Ranks of the daos_server to reintegrate - tgt_idx (int): target to be reintegrated from each pool + ranks (str): Comma separated daos_server-rank ranges to reintegrate + e.g. "0,2-5". + tgt_idx (list, optional): targets to reintegrate on ranks e.g. "1,2". + Defaults to None. Returns: CmdResult: Object that contains exit status, stdout, and other @@ -1028,7 +1035,7 @@ def system_clear_exclude(self, ranks, rank_hosts): Either ranks or rank_hosts is necessary. Pass in None to one of them. Args: - ranks (str): comma separated ranks to exclude. + ranks (str): Comma separated rank-ranges to exclude e.g. "0,2-5". rank_hosts (str): hostlist representing hosts whose managed ranks are to be operated on. @@ -1047,7 +1054,7 @@ def system_query(self, ranks=None, verbose=True): Args: ranks (str): Specify specific ranks to obtain it's status. Use - comma separated list for multiple ranks. e.g., 0,1. + comma separated rank-ranges for multiple ranks e.g. "0,2-5". Defaults to None, which means report all available ranks. verbose (bool): To obtain detailed query report @@ -1131,7 +1138,7 @@ def system_exclude(self, ranks, rank_hosts): Either ranks or rank_hosts is necessary. Pass in None to one of them. Args: - ranks (str): comma separated ranks to exclude. + ranks (str): Comma separated rank-ranges to exclude e.g. "0,2-5". rank_hosts (str): hostlist representing hosts whose managed ranks are to be operated on. @@ -1149,8 +1156,8 @@ def system_start(self, ranks=None): """Start the system. Args: - ranks (str, optional): comma separated ranks to stop. Defaults to - None. + ranks (str, optional): Comma separated rank-ranges to start e.g. + "0,2-5". Defaults to None. Raises: CommandFailure: if the dmg system start command fails. @@ -1177,8 +1184,8 @@ def system_stop(self, force=False, ranks=None): Args: force (bool, optional): whether to force the stop. Defaults to False. - ranks (str, optional): comma separated ranks to stop. Defaults to - None. + ranks (str, optional): Comma separated rank-ranges to stop e.g. + "0,2-5". Defaults to None. Raises: CommandFailure: if the dmg system stop command fails. diff --git a/src/tests/ftest/util/test_utils_pool.py b/src/tests/ftest/util/test_utils_pool.py index 2893f82b4ad..9cafcdbf885 100644 --- a/src/tests/ftest/util/test_utils_pool.py +++ b/src/tests/ftest/util/test_utils_pool.py @@ -555,20 +555,22 @@ def delete_acl(self, principal): return self.dmg.pool_delete_acl(self.identifier, principal=principal) @fail_on(CommandFailure) - def drain(self, rank, tgt_idx=None): + def drain(self, ranks, tgt_idx=None): """Use dmg to drain the rank and targets from this pool. Only supported with the dmg control method. Args: - rank (str): daos server rank to drain - tgt_idx (str, optional): targets to drain on ranks, ex: "1,2". Defaults to None. + ranks (str): Comma separated daos_server-rank ranges to drain e.g. + "0,2-5". + tgt_idx (list, optional): targets to drain on ranks e.g. "1,2". + Defaults to None. Returns: CmdResult: Object that contains exit status, stdout, and other information. """ - return self.dmg.pool_drain(self.identifier, rank, tgt_idx) + return self.dmg.pool_drain(self.identifier, ranks, tgt_idx) @fail_on(CommandFailure) def disable_aggregation(self): @@ -597,8 +599,10 @@ def exclude(self, ranks, tgt_idx=None, force=True): """Manually exclude a rank from this pool. Args: - ranks (list): a list daos server ranks (int) to exclude - tgt_idx (string, optional): targets to exclude on ranks, ex: "1,2". Defaults to None. + ranks (str): Comma separated daos_server-rank ranges to exclude e.g. + "0,2-5". + tgt_idx (list, optional): targets to exclude on ranks e.g. "1,2". + Defaults to None. force (bool, optional): force exclusion regardless of data loss. Defaults to true Returns: @@ -612,7 +616,8 @@ def extend(self, ranks): """Extend the pool to additional ranks. Args: - ranks (str): comma separate list of daos server ranks (int) to extend + ranks (str): Comma separated daos_server-rank ranges to extend e.g. + "0,2-5". Returns: CmdResult: Object that contains exit status, stdout, and other information. @@ -756,18 +761,20 @@ def query_targets(self, *args, **kwargs): return self.dmg.pool_query_targets(self.identifier, *args, **kwargs) @fail_on(CommandFailure) - def reintegrate(self, rank, tgt_idx=None): + def reintegrate(self, ranks, tgt_idx=None): """Use dmg to reintegrate the rank and targets into this pool. Args: - rank (str): daos server rank to reintegrate - tgt_idx (str, optional): targets to reintegrate on ranks, ex: "1,2". Defaults to None. + ranks (str): Comma separated daos_server-rank ranges to reintegrate + e.g. "0,2-5". + tgt_idx (list, optional): targets to reintegrate on ranks e.g. "1,2". + Defaults to None. Returns: CmdResult: Object that contains exit status, stdout, and other information. """ - return self.dmg.pool_reintegrate(self.identifier, rank, tgt_idx) + return self.dmg.pool_reintegrate(self.identifier, ranks, tgt_idx) @fail_on(CommandFailure) def set_property(self, prop_name, prop_value): @@ -1033,7 +1040,8 @@ def get_space_per_target(self, ranks, target_idx): """Get space usage per rank, per target using dmg pool query-targets. Args: - ranks (list): List of ranks to be queried + ranks (str): Comma separated daos_server-rank ranges to query e.g. + "0,2-5". target_idx (str): Comma-separated list of target idx(s) to be queried Returns: