Skip to content

Commit

Permalink
fix: zk reconnect in db sdk
Browse files Browse the repository at this point in the history
  • Loading branch information
vagetablechicken committed Dec 2, 2023
1 parent 17b166b commit 5eb5d6c
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 8 deletions.
19 changes: 13 additions & 6 deletions src/sdk/db_sdk.cc
Original file line number Diff line number Diff line change
Expand Up @@ -195,12 +195,19 @@ ClusterSDK::~ClusterSDK() {
}

void ClusterSDK::CheckZk() {
if (session_id_ == 0) {
WatchNotify();
} else if (session_id_ != zk_client_->GetSessionTerm()) {
LOG(WARNING) << "session changed, re-watch notify";
WatchNotify();
// ensure that zk client is alive
if (zk_client_->EnsureConnected()) {
if (session_id_ == 0) {
WatchNotify();
} else if (session_id_ != zk_client_->GetSessionTerm()) {
LOG(WARNING) << "session changed, re-watch notify";
WatchNotify();

Check warning on line 204 in src/sdk/db_sdk.cc

View check run for this annotation

Codecov / codecov/patch

src/sdk/db_sdk.cc#L203-L204

Added lines #L203 - L204 were not covered by tests
}
} else {
// 5min print once
LOG_EVERY_N(WARNING, 150) << "zk client is not connected, reconnect later";

Check warning on line 208 in src/sdk/db_sdk.cc

View check run for this annotation

Codecov / codecov/patch

src/sdk/db_sdk.cc#L208

Added line #L208 was not covered by tests
}

pool_.DelayTask(2000, [this] { CheckZk(); });
}

Expand Down Expand Up @@ -383,7 +390,7 @@ bool ClusterSDK::InitTabletClient() {
std::vector<std::string> tablets;
bool ok = zk_client_->GetNodes(tablets);
if (!ok) {
LOG(WARNING) << "fail to get tablet";
LOG(WARNING) << "fail to get tablets from zk";

Check warning on line 393 in src/sdk/db_sdk.cc

View check run for this annotation

Codecov / codecov/patch

src/sdk/db_sdk.cc#L393

Added line #L393 was not covered by tests
return false;
}
std::map<std::string, std::string> real_ep_map;
Expand Down
3 changes: 2 additions & 1 deletion src/sdk/db_sdk.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,8 @@ class ClusterSDK : public DBSDK {
std::string globalvar_changed_notify_path_;
std::string leader_path_;
std::string taskmanager_leader_path_;

// CheckZk will be called periodically, so we don't need to check zk_client_ before using it
// if failed, just retry
::openmldb::zk::ZkClient* zk_client_;
::baidu::common::ThreadPool pool_;
};
Expand Down
20 changes: 19 additions & 1 deletion src/zk/zk_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ void NodeWatcher(zhandle_t* zh, int type, int state, const char* path, void* wat
}

void ItemWatcher(zhandle_t* zh, int type, int state, const char* path, void* watcher_ctx) {
PDLOG(INFO, "node watcher with event type %d, state %d", type, state);
PDLOG(INFO, "item watcher with event type %d, state %d", type, state);
if (zoo_get_context(zh)) {
ZkClient* client = const_cast<ZkClient*>(reinterpret_cast<const ZkClient*>(zoo_get_context(zh)));
std::string path_str(path);
Expand Down Expand Up @@ -583,8 +583,13 @@ void ZkClient::LogEvent(int type, int state, const char* path) {
if (type == ZOO_SESSION_EVENT) {
if (state == ZOO_CONNECTED_STATE) {
Connected();
} else if(state == ZOO_CONNECTING_STATE || state == ZOO_ASSOCIATING_STATE) {

Check warning on line 586 in src/zk/zk_client.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] reported by reviewdog 🐶 Missing space before ( in if( [whitespace/parens] [5] Raw Output: src/zk/zk_client.cc:586: Missing space before ( in if( [whitespace/parens] [5]

Check warning on line 586 in src/zk/zk_client.cc

View check run for this annotation

Codecov / codecov/patch

src/zk/zk_client.cc#L586

Added line #L586 was not covered by tests
//just wait

Check warning on line 587 in src/zk/zk_client.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] reported by reviewdog 🐶 Should have a space between // and comment [whitespace/comments] [4] Raw Output: src/zk/zk_client.cc:587: Should have a space between // and comment [whitespace/comments] [4]
} else if (state == ZOO_EXPIRED_SESSION_STATE) {
connected_ = false;
} else {
// unknow state, should retry
connected_ = false;

Check warning on line 592 in src/zk/zk_client.cc

View check run for this annotation

Codecov / codecov/patch

src/zk/zk_client.cc#L592

Added line #L592 was not covered by tests
}
}
}
Expand Down Expand Up @@ -630,5 +635,18 @@ bool ZkClient::Mkdir(const std::string& path) {
return MkdirNoLock(path);
}

bool ZkClient::EnsureConnected() {

Check warning on line 638 in src/zk/zk_client.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] reported by reviewdog 🐶 Line ends in whitespace. Consider deleting these extra spaces. [whitespace/end_of_line] [4] Raw Output: src/zk/zk_client.cc:638: Line ends in whitespace. Consider deleting these extra spaces. [whitespace/end_of_line] [4]
if (!IsConnected()) {
LOG(WARNING) << "reconnect zk";
if (Reconnect()) {
LOG(INFO) << "reconnect zk ok";

Check warning on line 642 in src/zk/zk_client.cc

View check run for this annotation

Codecov / codecov/patch

src/zk/zk_client.cc#L640-L642

Added lines #L640 - L642 were not covered by tests
} else {
LOG(WARNING) << "reconnect zk failed";
return false;

Check warning on line 645 in src/zk/zk_client.cc

View check run for this annotation

Codecov / codecov/patch

src/zk/zk_client.cc#L644-L645

Added lines #L644 - L645 were not covered by tests
}
}
return true;
}

} // namespace zk
} // namespace openmldb
5 changes: 5 additions & 0 deletions src/zk/zk_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,11 @@ class ZkClient {
// when reconnect, need Register and Watchnodes again
bool Reconnect();

// ensure that zk client is connected:

Check warning on line 141 in src/zk/zk_client.h

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] reported by reviewdog 🐶 Weird number of spaces at line-start. Are you using a 2-space indent? [whitespace/indent] [3] Raw Output: src/zk/zk_client.h:141: Weird number of spaces at line-start. Are you using a 2-space indent? [whitespace/indent] [3]
// if not, try to reconnect, return false if reconnect failed

Check warning on line 142 in src/zk/zk_client.h

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] reported by reviewdog 🐶 Weird number of spaces at line-start. Are you using a 2-space indent? [whitespace/indent] [3] Raw Output: src/zk/zk_client.h:142: Weird number of spaces at line-start. Are you using a 2-space indent? [whitespace/indent] [3]
// DON'T use zk client if this function return false

Check warning on line 143 in src/zk/zk_client.h

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] reported by reviewdog 🐶 Weird number of spaces at line-start. Are you using a 2-space indent? [whitespace/indent] [3] Raw Output: src/zk/zk_client.h:143: Weird number of spaces at line-start. Are you using a 2-space indent? [whitespace/indent] [3]
bool EnsureConnected();

Check warning on line 144 in src/zk/zk_client.h

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] reported by reviewdog 🐶 Weird number of spaces at line-start. Are you using a 2-space indent? [whitespace/indent] [3] Raw Output: src/zk/zk_client.h:144: Weird number of spaces at line-start. Are you using a 2-space indent? [whitespace/indent] [3]

private:
void Connected();

Expand Down

0 comments on commit 5eb5d6c

Please sign in to comment.