Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-16878 pool: Reduce unexpected DER_NO_SERVICEs #15665

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 90 additions & 38 deletions src/pool/srv_pool.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -213,6 +214,7 @@ struct pool_svc {
rdb_path_t ps_handles; /* pool handle KVS */
rdb_path_t ps_user; /* pool user attributes KVS */
rdb_path_t ps_ops; /* metadata ops KVS */
int ps_error; /* in DB data (see pool_svc_lookup_leader) */
struct pool_svc_events ps_events;
uint32_t ps_global_version;
int ps_svc_rf;
Expand Down Expand Up @@ -1950,8 +1952,11 @@ read_db_for_stepping_up(struct pool_svc *svc, struct pool_buf **map_buf_out,

rc = ds_pool_svc_load(&tx, svc->ps_uuid, &svc->ps_root, &svc->ps_global_version, &map_buf,
&map_version);
if (rc != 0)
if (rc != 0) {
if (rc == -DER_DF_INCOMPT)
svc->ps_error = rc;
goto out_lock;
}

rc = pool_prop_read(&tx, svc, DAOS_PO_QUERY_PROP_ALL, &prop);
if (rc != 0) {
Expand Down Expand Up @@ -2263,6 +2268,8 @@ pool_svc_step_up_cb(struct ds_rsvc *rsvc)
d_rank_t rank = dss_self_rank();
int rc;

D_ASSERTF(svc->ps_error == 0, "ps_error: " DF_RC "\n", DP_RC(svc->ps_error));

/*
* If this is the only voting replica, it may have become the leader
* without doing any RPC. The primary group may have yet to be
Expand Down Expand Up @@ -2328,9 +2335,9 @@ pool_svc_step_up_cb(struct ds_rsvc *rsvc)
}

rc = ds_pool_iv_prop_update(svc->ps_pool, prop);
if (rc) {
D_ERROR("ds_pool_iv_prop_update failed: " DF_RC "\n", DP_RC(rc));
D_GOTO(out, rc);
if (rc != 0) {
DL_ERROR(rc, DF_UUID ": ds_pool_iv_prop_update failed", DP_UUID(svc->ps_uuid));
goto out;
}

if (!uuid_is_null(svc->ps_pool->sp_srv_cont_hdl)) {
Expand All @@ -2346,11 +2353,10 @@ pool_svc_step_up_cb(struct ds_rsvc *rsvc)
uuid_copy(svc->ps_pool->sp_srv_pool_hdl, pool_hdl_uuid);
}

rc = ds_pool_iv_srv_hdl_update(svc->ps_pool, pool_hdl_uuid,
cont_hdl_uuid);
if (rc) {
D_ERROR("ds_pool_iv_srv_hdl_update failed: " DF_RC "\n", DP_RC(rc));
D_GOTO(out, rc);
rc = ds_pool_iv_srv_hdl_update(svc->ps_pool, pool_hdl_uuid, cont_hdl_uuid);
if (rc != 0) {
DL_ERROR(rc, DF_UUID ": ds_pool_iv_srv_hdl_update failed", DP_UUID(svc->ps_uuid));
goto out;
}

/* resume pool upgrade if needed */
Expand Down Expand Up @@ -2385,27 +2391,39 @@ pool_svc_step_up_cb(struct ds_rsvc *rsvc)
D_FREE(map_buf);
if (prop != NULL)
daos_prop_free(prop);
if (rc < 0)
ds_pool_failed_add(svc->ps_uuid, rc);
else if (rc == 0)
ds_pool_failed_remove(svc->ps_uuid);
if (svc->ps_error != 0) {
/*
* Step up with the error anyway, so that RPCs to the PS
* receive an error instead of timeouts.
*/
DS_POOL_NOTE_PRINT(DF_UUID": rank %u became pool service leader "DF_U64
" with error: "DF_RC"\n", DP_UUID(svc->ps_uuid), rank,
svc->ps_rsvc.s_term, DP_RC(svc->ps_error));
rc = 0;
}
return rc;
}

static void
pool_svc_step_down_cb(struct ds_rsvc *rsvc)
{
struct pool_svc *svc = pool_svc_obj(rsvc);
d_rank_t rank = dss_self_rank();

pool_svc_step_down_metrics(svc);
fini_events(svc);
sched_cancel_and_wait(&svc->ps_reconf_sched);
sched_cancel_and_wait(&svc->ps_rfcheck_sched);
ds_cont_svc_step_down(svc->ps_cont_svc);
struct pool_svc *svc = pool_svc_obj(rsvc);
d_rank_t rank = dss_self_rank();

DS_POOL_NOTE_PRINT(DF_UUID": rank %u no longer pool service leader "DF_U64"\n",
DP_UUID(svc->ps_uuid), rank, svc->ps_rsvc.s_term);
if (svc->ps_error == 0) {
pool_svc_step_down_metrics(svc);
fini_events(svc);
sched_cancel_and_wait(&svc->ps_reconf_sched);
sched_cancel_and_wait(&svc->ps_rfcheck_sched);
ds_cont_svc_step_down(svc->ps_cont_svc);
DS_POOL_NOTE_PRINT(DF_UUID": rank %u no longer pool service leader "DF_U64"\n",
DP_UUID(svc->ps_uuid), rank, svc->ps_rsvc.s_term);
} else {
DS_POOL_NOTE_PRINT(DF_UUID": rank %u no longer pool service leader "DF_U64
" with error: "DF_RC"\n", DP_UUID(svc->ps_uuid), rank,
svc->ps_rsvc.s_term, DP_RC(svc->ps_error));
svc->ps_error = 0;
knard38 marked this conversation as resolved.
Show resolved Hide resolved
}
}

static void
Expand Down Expand Up @@ -2476,12 +2494,13 @@ ds_pool_rsvc_class_unregister(void)
ds_rsvc_class_unregister(DS_RSVC_CLASS_POOL);
}

/* Use pool_svc_lookup_leader instead. */
static int
pool_svc_lookup(uuid_t uuid, struct pool_svc **svcp)
{
struct ds_rsvc *rsvc;
d_iov_t id;
int rc;
d_iov_t id;
int rc;

d_iov_set(&id, uuid, sizeof(uuid_t));
rc = ds_rsvc_lookup(DS_RSVC_CLASS_POOL, &id, &rsvc);
Expand All @@ -2492,25 +2511,48 @@ pool_svc_lookup(uuid_t uuid, struct pool_svc **svcp)
}

static int
pool_svc_lookup_leader(uuid_t uuid, struct pool_svc **svcp,
struct rsvc_hint *hint)
pool_svc_lookup_leader(uuid_t uuid, struct pool_svc **svcp, struct rsvc_hint *hint)
{
struct ds_rsvc *rsvc;
d_iov_t id;
int rc;
struct ds_rsvc *rsvc;
d_iov_t id;
struct pool_svc *svc;
int rc;

rc = ds_pool_failed_lookup(uuid);
if (rc) {
D_ERROR(DF_UUID": failed to start: "DF_RC"\n",
DP_UUID(uuid), DP_RC(rc));
if (rc != 0) {
D_DEBUG(DB_MD, DF_UUID ": failed: " DF_RC "\n", DP_UUID(uuid), DP_RC(rc));
return -DER_NO_SERVICE;
}

d_iov_set(&id, uuid, sizeof(uuid_t));
rc = ds_rsvc_lookup_leader(DS_RSVC_CLASS_POOL, &id, &rsvc, hint);
if (rc != 0)
return rc;
*svcp = pool_svc_obj(rsvc);

/*
* The svc->ps_error field stores a persistent error, usually in the DB
* data, if any. For instance, "the layout of the DB data is
* incompatible with the software version". This mustn't be a replica
* error, because there may be a majorty of replicas working. We let the
* PS step up with this error so that it can serve all requests by
* returning the error. PS clients therefore get a quick error response
* instead of a timeout.
*
* Checking svc->ps_error here without confirming our leadership via
* rdb_raft_verify_leadership may cause some requests to get
* unnecessary errors, if there is a newer leader whose svc->ps_error
* is zero and is able to serve those requests. Such a state won't last
* much longer than an election timeout though, because we will step
* down due to inability to maintain a majority lease.
*/
svc = pool_svc_obj(rsvc);
if (svc->ps_error != 0) {
rc = svc->ps_error;
ds_rsvc_put_leader(rsvc);
return rc;
}

*svcp = svc;
return 0;
}

Expand Down Expand Up @@ -2582,6 +2624,7 @@ int ds_pool_failed_add(uuid_t uuid, int rc)
uuid_copy(psf->psf_uuid, uuid);
psf->psf_error = rc;
d_list_add_tail(&psf->psf_link, &pool_svc_failed_list);
DL_ERROR(rc, DF_UUID ": added to list of failed pools", DP_UUID(uuid));
out:
D_RWLOCK_UNLOCK(&psfl_rwlock);
return ret;
Expand All @@ -2596,6 +2639,8 @@ void ds_pool_failed_remove(uuid_t uuid)
d_list_for_each_entry_safe(psf, tmp, &pool_svc_failed_list, psf_link) {
if (uuid_compare(psf->psf_uuid, uuid) == 0) {
d_list_del_init(&psf->psf_link);
DL_INFO(psf->psf_error, DF_UUID ": removed from list of failed pools",
DP_UUID(uuid));
D_FREE(psf);
break;
}
Expand Down Expand Up @@ -3431,7 +3476,7 @@ ds_pool_svc_ops_lookup(struct rdb_tx *tx, void *pool_svc, uuid_t pool_uuid, uuid
int rc = 0;

if (!svc) {
rc = pool_svc_lookup(pool_uuid, &svc);
rc = pool_svc_lookup_leader(pool_uuid, &svc, NULL /* hint */);
if (rc != 0) {
DL_ERROR(rc, "pool_svc lookup failed");
goto out;
Expand Down Expand Up @@ -3479,7 +3524,7 @@ ds_pool_svc_ops_lookup(struct rdb_tx *tx, void *pool_svc, uuid_t pool_uuid, uuid
D_FREE(op_key_enc.iov_buf);
out_svc:
if (need_put_svc)
pool_svc_put(svc);
pool_svc_put_leader(svc);
out:
if (rc == 0) {
*is_dup = duplicate;
Expand Down Expand Up @@ -3526,7 +3571,7 @@ ds_pool_svc_ops_save(struct rdb_tx *tx, void *pool_svc, uuid_t pool_uuid, uuid_t
int rc = 0;

if (!svc) {
rc = pool_svc_lookup(pool_uuid, &svc);
rc = pool_svc_lookup_leader(pool_uuid, &svc, NULL /* hint */);
if (rc != 0) {
DL_ERROR(rc, "pool_svc lookup failed");
goto out;
Expand Down Expand Up @@ -3588,7 +3633,7 @@ ds_pool_svc_ops_save(struct rdb_tx *tx, void *pool_svc, uuid_t pool_uuid, uuid_t
D_FREE(op_key_enc.iov_buf);
out_svc:
if (need_put_svc)
pool_svc_put(svc);
pool_svc_put_leader(svc);
out:
return rc;
}
Expand Down Expand Up @@ -3681,6 +3726,13 @@ ds_pool_create_handler(crt_rpc_t *rpc)
ABT_rwlock_wrlock(svc->ps_lock);
ds_cont_wrlock_metadata(svc->ps_cont_svc);

if (svc->ps_error != 0) {
DL_ERROR(svc->ps_error, DF_UUID ": encountered pool service leader with error",
DP_UUID(svc->ps_uuid));
rc = svc->ps_error;
goto out_tx;
knard38 marked this conversation as resolved.
Show resolved Hide resolved
}

/* See if the DB has already been initialized. */
d_iov_set(&value, NULL /* buf */, 0 /* size */);
rc = rdb_tx_lookup(&tx, &svc->ps_root, &ds_pool_prop_map_buffer,
Expand Down
Loading