From 92e22850aa839954edece7aa25949351f2c7c535 Mon Sep 17 00:00:00 2001 From: huanghaibin Date: Sun, 26 Jan 2025 12:13:14 +0800 Subject: [PATCH] [improve](cloud-mow)Add delete bitmap metrics for MS (#47047) When reading or writing delete bitmap fail, record failed count to metrics. --- cloud/src/common/bvars.cpp | 5 +++++ cloud/src/common/bvars.h | 2 ++ cloud/src/meta-service/meta_service.cpp | 4 ++++ 3 files changed, 11 insertions(+) diff --git a/cloud/src/common/bvars.cpp b/cloud/src/common/bvars.cpp index a0b0a2da9c213e..6385fc7c9e815f 100644 --- a/cloud/src/common/bvars.cpp +++ b/cloud/src/common/bvars.cpp @@ -85,6 +85,11 @@ BvarLatencyRecorderWithTag g_bvar_ms_finish_tablet_job("ms", "finish_tablet_job" BvarLatencyRecorderWithTag g_bvar_ms_get_cluster_status("ms", "get_cluster_status"); BvarLatencyRecorderWithTag g_bvar_ms_set_cluster_status("ms", "set_cluster_status"); BvarLatencyRecorderWithTag g_bvar_ms_check_kv("ms", "check_kv"); +bvar::Adder g_bvar_update_delete_bitmap_fail_counter; +bvar::Window > g_bvar_update_delete_bitmap_fail_counter_minute("ms", "update_delete_bitmap_fail", &g_bvar_update_delete_bitmap_fail_counter, 60); +bvar::Adder g_bvar_get_delete_bitmap_fail_counter; +bvar::Window > g_bvar_get_delete_bitmap_fail_counter_minute("ms", "get_delete_bitmap_fail", &g_bvar_get_delete_bitmap_fail_counter, 60); + // recycler's bvars // TODO: use mbvar for per instance, https://github.com/apache/brpc/blob/master/docs/cn/mbvar_c++.md diff --git a/cloud/src/common/bvars.h b/cloud/src/common/bvars.h index 93340a6c0d291f..ff1d3520b30dd5 100644 --- a/cloud/src/common/bvars.h +++ b/cloud/src/common/bvars.h @@ -160,6 +160,8 @@ extern BvarLatencyRecorderWithTag g_bvar_ms_get_rl_task_commit_attach; extern BvarLatencyRecorderWithTag g_bvar_ms_reset_rl_progress; extern BvarLatencyRecorderWithTag g_bvar_ms_get_txn_id; extern BvarLatencyRecorderWithTag g_bvar_ms_check_kv; +extern bvar::Adder g_bvar_update_delete_bitmap_fail_counter; +extern bvar::Adder g_bvar_get_delete_bitmap_fail_counter; // recycler's bvars extern BvarStatusWithTag g_bvar_recycler_recycle_index_earlest_ts; diff --git a/cloud/src/meta-service/meta_service.cpp b/cloud/src/meta-service/meta_service.cpp index 86fb0f4faed269..a02e13e28f33d7 100644 --- a/cloud/src/meta-service/meta_service.cpp +++ b/cloud/src/meta-service/meta_service.cpp @@ -1921,6 +1921,7 @@ void MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont << " put_size=" << txn->put_bytes() << " num_put_keys=" << txn->num_put_keys() << " txn_size=" << txn->approximate_bytes(); msg = ss.str(); + g_bvar_update_delete_bitmap_fail_counter << 1; return; } current_key_count = 0; @@ -1963,6 +1964,7 @@ void MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont << " delete_bitmap_value=" << current_value_count << " put_size=" << txn->put_bytes() << " num_put_keys=" << txn->num_put_keys() << " txn_size=" << txn->approximate_bytes(); msg = ss.str(); + g_bvar_update_delete_bitmap_fail_counter << 1; return; } LOG(INFO) << "update_delete_bitmap tablet_id=" << tablet_id << " lock_id=" << request->lock_id() @@ -2075,6 +2077,7 @@ void MetaServiceImpl::get_delete_bitmap(google::protobuf::RpcController* control ss << "internal error, failed to get delete bitmap, internal round=" << round << ", ret=" << err; msg = ss.str(); + g_bvar_get_delete_bitmap_fail_counter << 1; return; } @@ -2118,6 +2121,7 @@ void MetaServiceImpl::get_delete_bitmap(google::protobuf::RpcController* control << ",exceed max byte"; msg = ss.str(); LOG(WARNING) << msg; + g_bvar_get_delete_bitmap_fail_counter << 1; return; } round++;