Skip to content

Commit

Permalink
DAOS-16878 pool: Reduce unexpected DER_NO_SERVICEs
Browse files Browse the repository at this point in the history
WIP...

Features: pool
Skip-nlt: true
Signed-off-by: Li Wei <[email protected]>
Required-githooks: true
  • Loading branch information
liw committed Jan 7, 2025
1 parent 14bddd2 commit 3c63d43
Showing 1 changed file with 79 additions and 38 deletions.
117 changes: 79 additions & 38 deletions src/pool/srv_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ struct pool_svc {
rdb_path_t ps_handles; /* pool handle KVS */
rdb_path_t ps_user; /* pool user attributes KVS */
rdb_path_t ps_ops; /* metadata ops KVS */
int ps_error; /* in DB (instead of replica) state */
struct pool_svc_events ps_events;
uint32_t ps_global_version;
int ps_svc_rf;
Expand Down Expand Up @@ -1950,8 +1951,11 @@ read_db_for_stepping_up(struct pool_svc *svc, struct pool_buf **map_buf_out,

rc = ds_pool_svc_load(&tx, svc->ps_uuid, &svc->ps_root, &svc->ps_global_version, &map_buf,
&map_version);
if (rc != 0)
if (rc != 0) {
if (rc == -DER_DF_INCOMPT)
svc->ps_error = rc;
goto out_lock;
}

rc = pool_prop_read(&tx, svc, DAOS_PO_QUERY_PROP_ALL, &prop);
if (rc != 0) {
Expand Down Expand Up @@ -2263,6 +2267,8 @@ pool_svc_step_up_cb(struct ds_rsvc *rsvc)
d_rank_t rank = dss_self_rank();
int rc;

D_ASSERTF(svc->ps_error == 0, "ps_error: " DF_RC "\n", DP_RC(svc->ps_error));

/*
* If this is the only voting replica, it may have become the leader
* without doing any RPC. The primary group may have yet to be
Expand Down Expand Up @@ -2328,9 +2334,9 @@ pool_svc_step_up_cb(struct ds_rsvc *rsvc)
}

rc = ds_pool_iv_prop_update(svc->ps_pool, prop);
if (rc) {
D_ERROR("ds_pool_iv_prop_update failed: " DF_RC "\n", DP_RC(rc));
D_GOTO(out, rc);
if (rc != 0) {
DL_ERROR(rc, DF_UUID ": ds_pool_iv_prop_update failed", DP_UUID(svc->ps_uuid));
goto out;
}

if (!uuid_is_null(svc->ps_pool->sp_srv_cont_hdl)) {
Expand All @@ -2346,11 +2352,10 @@ pool_svc_step_up_cb(struct ds_rsvc *rsvc)
uuid_copy(svc->ps_pool->sp_srv_pool_hdl, pool_hdl_uuid);
}

rc = ds_pool_iv_srv_hdl_update(svc->ps_pool, pool_hdl_uuid,
cont_hdl_uuid);
if (rc) {
D_ERROR("ds_pool_iv_srv_hdl_update failed: " DF_RC "\n", DP_RC(rc));
D_GOTO(out, rc);
rc = ds_pool_iv_srv_hdl_update(svc->ps_pool, pool_hdl_uuid, cont_hdl_uuid);
if (rc != 0) {
DL_ERROR(rc, DF_UUID ": ds_pool_iv_srv_hdl_update failed", DP_UUID(svc->ps_uuid));
goto out;
}

/* resume pool upgrade if needed */
Expand Down Expand Up @@ -2385,27 +2390,39 @@ pool_svc_step_up_cb(struct ds_rsvc *rsvc)
D_FREE(map_buf);
if (prop != NULL)
daos_prop_free(prop);
if (rc < 0)
ds_pool_failed_add(svc->ps_uuid, rc);
else if (rc == 0)
ds_pool_failed_remove(svc->ps_uuid);
if (svc->ps_error != 0) {
/*
* Step up with the error anyway, so that RPCs to the PS
* receive an error instead of timeouts.
*/
DS_POOL_NOTE_PRINT(DF_UUID": rank %u became pool service leader "DF_U64
" with error: "DF_RC"\n", DP_UUID(svc->ps_uuid), rank,
svc->ps_rsvc.s_term, DP_RC(svc->ps_error));
rc = 0;
}
return rc;
}

static void
pool_svc_step_down_cb(struct ds_rsvc *rsvc)
{
struct pool_svc *svc = pool_svc_obj(rsvc);
d_rank_t rank = dss_self_rank();

pool_svc_step_down_metrics(svc);
fini_events(svc);
sched_cancel_and_wait(&svc->ps_reconf_sched);
sched_cancel_and_wait(&svc->ps_rfcheck_sched);
ds_cont_svc_step_down(svc->ps_cont_svc);
struct pool_svc *svc = pool_svc_obj(rsvc);
d_rank_t rank = dss_self_rank();

DS_POOL_NOTE_PRINT(DF_UUID": rank %u no longer pool service leader "DF_U64"\n",
DP_UUID(svc->ps_uuid), rank, svc->ps_rsvc.s_term);
if (svc->ps_error == 0) {
pool_svc_step_down_metrics(svc);
fini_events(svc);
sched_cancel_and_wait(&svc->ps_reconf_sched);
sched_cancel_and_wait(&svc->ps_rfcheck_sched);
ds_cont_svc_step_down(svc->ps_cont_svc);
DS_POOL_NOTE_PRINT(DF_UUID": rank %u no longer pool service leader "DF_U64"\n",
DP_UUID(svc->ps_uuid), rank, svc->ps_rsvc.s_term);
} else {
DS_POOL_NOTE_PRINT(DF_UUID": rank %u no longer pool service leader "DF_U64
" with error: "DF_RC"\n", DP_UUID(svc->ps_uuid), rank,
svc->ps_rsvc.s_term, DP_RC(svc->ps_error));
svc->ps_error = 0;
}
}

static void
Expand Down Expand Up @@ -2476,12 +2493,13 @@ ds_pool_rsvc_class_unregister(void)
ds_rsvc_class_unregister(DS_RSVC_CLASS_POOL);
}

/* Use pool_svc_lookup_leader instead. */
static int
pool_svc_lookup(uuid_t uuid, struct pool_svc **svcp)
{
struct ds_rsvc *rsvc;
d_iov_t id;
int rc;
d_iov_t id;
int rc;

d_iov_set(&id, uuid, sizeof(uuid_t));
rc = ds_rsvc_lookup(DS_RSVC_CLASS_POOL, &id, &rsvc);
Expand All @@ -2492,25 +2510,40 @@ pool_svc_lookup(uuid_t uuid, struct pool_svc **svcp)
}

static int
pool_svc_lookup_leader(uuid_t uuid, struct pool_svc **svcp,
struct rsvc_hint *hint)
pool_svc_lookup_leader(uuid_t uuid, struct pool_svc **svcp, struct rsvc_hint *hint)
{
struct ds_rsvc *rsvc;
d_iov_t id;
int rc;
struct ds_rsvc *rsvc;
d_iov_t id;
struct pool_svc *svc;
int rc;

rc = ds_pool_failed_lookup(uuid);
if (rc) {
D_ERROR(DF_UUID": failed to start: "DF_RC"\n",
DP_UUID(uuid), DP_RC(rc));
if (rc != 0) {
D_DEBUG(DB_MD, DF_UUID ": failed: " DF_RC "\n", DP_UUID(uuid), DP_RC(rc));
return -DER_NO_SERVICE;
}

d_iov_set(&id, uuid, sizeof(uuid_t));
rc = ds_rsvc_lookup_leader(DS_RSVC_CLASS_POOL, &id, &rsvc, hint);
if (rc != 0)
return rc;
*svcp = pool_svc_obj(rsvc);

/*
* Checking svc->ps_error without confirming our leadership via
* rdb_raft_verify_leadership may cause some requests to get
* unnecessary errors, if there is a newer leader whose svc->ps_error
* is zero and is able to serve those requests. Such a state won't last
* much longer than an election timeout though, because we will step
* down due to inability to maintain a majority lease.
*/
svc = pool_svc_obj(rsvc);
if (svc->ps_error != 0) {
rc = svc->ps_error;
ds_rsvc_put_leader(rsvc);
return rc;
}

*svcp = svc;
return 0;
}

Expand Down Expand Up @@ -2582,6 +2615,7 @@ int ds_pool_failed_add(uuid_t uuid, int rc)
uuid_copy(psf->psf_uuid, uuid);
psf->psf_error = rc;
d_list_add_tail(&psf->psf_link, &pool_svc_failed_list);
DL_ERROR(rc, DF_UUID ": added to list of failed pools", DP_UUID(uuid));
out:
D_RWLOCK_UNLOCK(&psfl_rwlock);
return ret;
Expand All @@ -2596,6 +2630,8 @@ void ds_pool_failed_remove(uuid_t uuid)
d_list_for_each_entry_safe(psf, tmp, &pool_svc_failed_list, psf_link) {
if (uuid_compare(psf->psf_uuid, uuid) == 0) {
d_list_del_init(&psf->psf_link);
DL_INFO(psf->psf_error, DF_UUID ": removed from list of failed pools",
DP_UUID(uuid));
D_FREE(psf);
break;
}
Expand Down Expand Up @@ -3431,7 +3467,7 @@ ds_pool_svc_ops_lookup(struct rdb_tx *tx, void *pool_svc, uuid_t pool_uuid, uuid
int rc = 0;

if (!svc) {
rc = pool_svc_lookup(pool_uuid, &svc);
rc = pool_svc_lookup_leader(pool_uuid, &svc, NULL /* hint */);
if (rc != 0) {
DL_ERROR(rc, "pool_svc lookup failed");
goto out;
Expand Down Expand Up @@ -3479,7 +3515,7 @@ ds_pool_svc_ops_lookup(struct rdb_tx *tx, void *pool_svc, uuid_t pool_uuid, uuid
D_FREE(op_key_enc.iov_buf);
out_svc:
if (need_put_svc)
pool_svc_put(svc);
pool_svc_put_leader(svc);
out:
if (rc == 0) {
*is_dup = duplicate;
Expand Down Expand Up @@ -3526,7 +3562,7 @@ ds_pool_svc_ops_save(struct rdb_tx *tx, void *pool_svc, uuid_t pool_uuid, uuid_t
int rc = 0;

if (!svc) {
rc = pool_svc_lookup(pool_uuid, &svc);
rc = pool_svc_lookup_leader(pool_uuid, &svc, NULL /* hint */);
if (rc != 0) {
DL_ERROR(rc, "pool_svc lookup failed");
goto out;
Expand Down Expand Up @@ -3588,7 +3624,7 @@ ds_pool_svc_ops_save(struct rdb_tx *tx, void *pool_svc, uuid_t pool_uuid, uuid_t
D_FREE(op_key_enc.iov_buf);
out_svc:
if (need_put_svc)
pool_svc_put(svc);
pool_svc_put_leader(svc);
out:
return rc;
}
Expand Down Expand Up @@ -3681,6 +3717,11 @@ ds_pool_create_handler(crt_rpc_t *rpc)
ABT_rwlock_wrlock(svc->ps_lock);
ds_cont_wrlock_metadata(svc->ps_cont_svc);

if (svc->ps_error != 0) {
rc = svc->ps_error;
goto out_tx;
}

/* See if the DB has already been initialized. */
d_iov_set(&value, NULL /* buf */, 0 /* size */);
rc = rdb_tx_lookup(&tx, &svc->ps_root, &ds_pool_prop_map_buffer,
Expand Down

0 comments on commit 3c63d43

Please sign in to comment.