-
Notifications
You must be signed in to change notification settings - Fork 306
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DAOS-16916 container: check inflight open #15682
Changes from all commits
5dafc6b
6f14810
676dc46
8eeafe7
b0dff0f
d581ddf
d1bda9f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
/** | ||
* (C) Copyright 2016-2024 Intel Corporation. | ||
* (C) Copyright 2025 Google LLC | ||
* | ||
* SPDX-License-Identifier: BSD-2-Clause-Patent | ||
*/ | ||
|
@@ -1607,11 +1608,23 @@ | |
*/ | ||
D_ASSERT(hdl->sch_cont != NULL); | ||
D_ASSERT(hdl->sch_cont->sc_pool != NULL); | ||
|
||
hdl->sch_cont->sc_open++; | ||
if (hdl->sch_cont->sc_open > 1) { | ||
/* If there is an inflight open being stuck, then | ||
* let's retry and wait until it finished. | ||
*/ | ||
if (hdl->sch_cont->sc_open_initializing) { | ||
hdl->sch_cont->sc_open--; | ||
D_GOTO(err_cont, rc = -DER_AGAIN); | ||
} | ||
|
||
if (hdl->sch_cont->sc_open > 1) | ||
goto opened; | ||
/* Only go through if the 1st open succeeds */ | ||
if (hdl->sch_cont->sc_props_fetched) | ||
goto opened; | ||
} | ||
|
||
hdl->sch_cont->sc_open_initializing = 1; | ||
if (ds_pool_restricted(hdl->sch_cont->sc_pool->spc_pool, false)) | ||
goto csum_init; | ||
|
||
|
@@ -1646,6 +1659,8 @@ | |
rc = ds_cont_csummer_init(hdl->sch_cont); | ||
if (rc != 0) | ||
D_GOTO(err_dtx, rc); | ||
|
||
hdl->sch_cont->sc_open_initializing = 0; | ||
} | ||
opened: | ||
if (cont_hdl != NULL) { | ||
|
@@ -1663,6 +1678,7 @@ | |
dtx_cont_close(hdl->sch_cont, true); | ||
|
||
err_cont: | ||
hdl->sch_cont->sc_open_initializing = 0; | ||
if (daos_handle_is_valid(poh)) { | ||
int rc_tmp; | ||
|
||
|
@@ -1750,9 +1766,15 @@ | |
D_DEBUG(DB_TRACE, "open pool/cont/hdl "DF_UUID"/"DF_UUID"/"DF_UUID"\n", | ||
DP_UUID(pool_uuid), DP_UUID(cont_uuid), DP_UUID(cont_hdl_uuid)); | ||
|
||
retry: | ||
rc = ds_pool_thread_collective(pool_uuid, PO_COMP_ST_NEW | PO_COMP_ST_DOWN | | ||
PO_COMP_ST_DOWNOUT, cont_open_one, &arg, 0); | ||
if (rc != 0) | ||
if (rc != 0) { | ||
if (rc == -DER_AGAIN) { | ||
dss_sleep(50); | ||
goto retry; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Busy retry? It may be better to wait for former inflight opening to return and then check again. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is a collective operation. which will yield, but I assume wait a few ms might be ok. |
||
} | ||
|
||
/* Once it exclude the target from the pool, since the target | ||
* might still in the cart group, so IV cont open might still | ||
* come to this target, especially if cont open/close will be | ||
|
@@ -1762,6 +1784,7 @@ | |
D_ERROR("open "DF_UUID"/"DF_UUID"/"DF_UUID":"DF_RC"\n", | ||
DP_UUID(pool_uuid), DP_UUID(cont_uuid), | ||
DP_UUID(cont_hdl_uuid), DP_RC(rc)); | ||
} | ||
|
||
return rc; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
/* | ||
* (C) Copyright 2015-2024 Intel Corporation. | ||
* (C) Copyright 2025 Google LLC | ||
* | ||
* SPDX-License-Identifier: BSD-2-Clause-Patent | ||
*/ | ||
|
@@ -66,20 +67,11 @@ | |
ABT_cond sc_scrub_cond; | ||
ABT_cond sc_rebuild_cond; | ||
ABT_cond sc_fini_cond; | ||
uint32_t sc_dtx_resyncing:1, | ||
sc_dtx_reindex:1, | ||
sc_dtx_reindex_abort:1, | ||
sc_dtx_delay_reset:1, | ||
sc_dtx_registered:1, | ||
sc_props_fetched:1, | ||
sc_stopping:1, | ||
sc_destroying:1, | ||
sc_vos_agg_active:1, | ||
sc_ec_agg_active:1, | ||
/* flag of CONT_CAPA_READ_DATA/_WRITE_DATA disabled */ | ||
sc_rw_disabled:1, | ||
sc_scrubbing:1, | ||
sc_rebuilding:1; | ||
uint32_t sc_dtx_resyncing : 1, sc_dtx_reindex : 1, sc_dtx_reindex_abort : 1, | ||
sc_dtx_delay_reset : 1, sc_dtx_registered : 1, sc_props_fetched : 1, sc_stopping : 1, | ||
sc_destroying : 1, sc_vos_agg_active : 1, sc_ec_agg_active : 1, | ||
/* flag of CONT_CAPA_READ_DATA/_WRITE_DATA disabled */ | ||
sc_rw_disabled : 1, sc_scrubbing : 1, sc_rebuilding : 1, sc_open_initializing : 1; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this the recommended coding style? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is actually only suppose to change 1 line, not sure why it messed up like this, which might due to git-hook sth. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This might be messed up by clang-format hooks. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, clang-format does this. I believe if you use ; instead of comma, you still use same space and don't have the formatting issue There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. verified (though we do this in other places in DAOS)
This prints 16 which means bar and fub are placed together. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in fact, you can make this change without any formatting and clang-format will fix it and make it look nice |
||
uint32_t sc_dtx_batched_gen; | ||
/* Tracks the schedule request for aggregation ULT */ | ||
struct sched_request *sc_agg_req; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This "sc_open_initializing" flag will be cleared when second open return -DER_AGAIN? Why don't we simply use a ABT_Mutex to serialize above open procedures?