From 7674a3c689751965da83339187373ef8e97ce31b Mon Sep 17 00:00:00 2001 From: Huang Wei Date: Thu, 9 May 2024 18:45:55 +0800 Subject: [PATCH 01/17] feat: iot table --- .../ddl/CREATE_INDEX_STATEMENT.md | 6 + .../ddl/CREATE_TABLE_STATEMENT.md | 23 +- hybridse/include/node/node_manager.h | 4 +- hybridse/include/node/sql_node.h | 13 +- hybridse/src/node/node_manager.cc | 9 +- hybridse/src/node/plan_node_test.cc | 2 +- hybridse/src/node/sql_node_test.cc | 2 +- hybridse/src/plan/planner.cc | 3 +- hybridse/src/planv2/ast_node_converter.cc | 37 +- hybridse/src/sdk/codec_sdk.cc | 2 +- src/base/status.h | 2 +- src/base/status_util.h | 7 + src/catalog/distribute_iterator.cc | 2 +- src/client/tablet_client.cc | 150 ++-- src/client/tablet_client.h | 16 +- src/cmd/display.h | 3 +- src/cmd/sql_cmd_test.cc | 16 +- src/codec/field_codec.h | 4 +- src/flags.cc | 4 + src/nameserver/name_server_impl.cc | 20 +- src/proto/common.proto | 7 + src/proto/tablet.proto | 1 + src/sdk/node_adapter.cc | 7 + src/sdk/option.h | 14 +- src/sdk/sql_cluster_router.cc | 363 ++++++++- src/storage/index_organized_table.cc | 717 ++++++++++++++++++ src/storage/index_organized_table.h | 77 ++ src/storage/iot_segment.cc | 411 ++++++++++ src/storage/iot_segment.h | 304 ++++++++ src/storage/iot_segment_test.cc | 517 +++++++++++++ src/storage/mem_table.cc | 72 +- src/storage/mem_table.h | 25 +- src/storage/mem_table_iterator.cc | 7 +- src/storage/mem_table_iterator.h | 24 +- src/storage/schema.cc | 25 +- src/storage/schema.h | 35 +- src/storage/segment.cc | 32 +- src/storage/segment.h | 29 +- src/storage/table.cc | 4 +- src/storage/table_iterator_test.cc | 6 +- src/tablet/tablet_impl.cc | 214 ++++-- tools/tool.py | 11 +- 42 files changed, 2933 insertions(+), 294 deletions(-) create mode 100644 src/storage/index_organized_table.cc create mode 100644 src/storage/index_organized_table.h create mode 100644 src/storage/iot_segment.cc create mode 100644 src/storage/iot_segment.h create mode 100644 src/storage/iot_segment_test.cc diff --git a/docs/zh/openmldb_sql/ddl/CREATE_INDEX_STATEMENT.md b/docs/zh/openmldb_sql/ddl/CREATE_INDEX_STATEMENT.md index abfa201ab29..0ed5661e993 100644 --- a/docs/zh/openmldb_sql/ddl/CREATE_INDEX_STATEMENT.md +++ b/docs/zh/openmldb_sql/ddl/CREATE_INDEX_STATEMENT.md @@ -55,6 +55,12 @@ CREATE INDEX index3 ON t5 (col3) OPTIONS (ts=ts1, ttl_type=absolute, ttl=30d); ``` 关于`TTL`和`TTL_TYPE`的更多信息参考[这里](./CREATE_TABLE_STATEMENT.md) +IOT表创建不同类型的索引,不指定type创建Covering索引,指定type为secondary,创建Secondary索引: +```SQL +CREATE INDEX index_s ON t5 (col3) OPTIONS (ts=ts1, ttl_type=absolute, ttl=30d, type=secondary); +``` +同keys和ts列的索引被视为同一个索引,不要尝试建立不同type的同一索引。 + ## 相关SQL [DROP INDEX](./DROP_INDEX_STATEMENT.md) diff --git a/docs/zh/openmldb_sql/ddl/CREATE_TABLE_STATEMENT.md b/docs/zh/openmldb_sql/ddl/CREATE_TABLE_STATEMENT.md index 750b198d897..895cd5f43f6 100644 --- a/docs/zh/openmldb_sql/ddl/CREATE_TABLE_STATEMENT.md +++ b/docs/zh/openmldb_sql/ddl/CREATE_TABLE_STATEMENT.md @@ -223,7 +223,7 @@ IndexOption ::= | 配置项 | 描述 | expr | 用法示例 | |------------|---------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------| -| `KEY` | 索引列(必选)。OpenMLDB支持单列索引,也支持联合索引。当`KEY`后只有一列时,仅在该列上建立索引。当`KEY`后有多列时,建立这几列的联合索引:将多列按顺序拼接成一个字符串作为索引。 | 支持单列索引:`ColumnName`
或联合索引:
`(ColumnName (, ColumnName)* ) ` | 单列索引:`INDEX(KEY=col1)`
联合索引:`INDEX(KEY=(col1, col2))` | +| `KEY/CKEY/SKEY` | 索引列(必选)。OpenMLDB支持单列索引,也支持联合索引。当`KEY`后只有一列时,仅在该列上建立索引。当`KEY`后有多列时,建立这几列的联合索引:将多列按顺序拼接成一个字符串作为索引。多KEY使用见[Index-Orgnized Table(IOT)](#index-orgnized-tableiot)。 | 支持单列索引:`ColumnName`
或联合索引:
`(ColumnName (, ColumnName)* ) ` | 单列索引:`INDEX(KEY=col1)`
联合索引:`INDEX(KEY=(col1, col2))` | | `TS` | 索引时间列(可选)。同一个索引上的数据将按照时间索引列排序。当不显式配置`TS`时,使用数据插入的时间戳作为索引时间。时间列的类型只能为BigInt或者Timestamp | `ColumnName` | `INDEX(KEY=col1, TS=std_time)`。索引列为col1,col1相同的数据行按std_time排序。 | | `TTL_TYPE` | 淘汰规则(可选)。包括四种类型,当不显式配置`TTL_TYPE`时,默认使用`ABSOLUTE`过期配置。 | 支持的expr如下:`ABSOLUTE`
`LATEST`
`ABSORLAT`
`ABSANDLAT`。 | 具体用法可以参考下文“TTL和TTL_TYPE的配置细则” | | `TTL` | 最大存活时间/条数(可选)。依赖于`TTL_TYPE`,不同的`TTL_TYPE`有不同的`TTL` 配置方式。当不显式配置`TTL`时,`TTL=0`,表示不设置淘汰规则,OpenMLDB将不会淘汰记录。 | 支持数值:`int_literal`
或数值带时间单位(`S,M,H,D`):`interval_literal`
或元组形式:`( interval_literal , int_literal )` |具体用法可以参考下文“TTL和TTL_TYPE的配置细则” | @@ -240,6 +240,27 @@ IndexOption ::= ```{note} 最大过期时间和最大存活条数的限制,是出于性能考虑。如果你一定要配置更大的TTL值,可先创建表时临时使用合规的TTL值,然后使用nameserver的UpdateTTL接口来调整到所需的值(可无视max限制),生效需要经过一个gc时间;或者,调整nameserver配置`absolute_ttl_max`和`latest_ttl_max`,重启生效后再创建表。 ``` +#### Index-Orgnized Table(IOT) + +索引使用KEY设置时创建Covering索引,在OpenMLDB中Covering索引存储完整的数据行,也因此占用内存较多。如果希望内存占用更低,同时允许性能损失,可以使用IOT表。IOT表中可以建三种类型的索引: +- `CKEY`:Clustered索引,存完整数据行。配置的CKEY+TS用于唯一标识一行数据,INSERT重复主键时将更新数据(会触发所有索引上的删除旧数据,再INSERT新数据,性能会有损失)。也可只使用CKEY,不配置TS,CKEY唯一标识一行数据。查询到此索引的性能无损失。 +- `SKEY`:Secondary索引,存主键。不配置TS时,同SKEY下按插入时间排序。查询时先在Secondary索引中找到对应主键值,再根据主键查数据,查询性能有损失。 +- `KEY`:Covering索引,存完整数据行。不配置TS时,同KEY下按插入时间排序。查询到此索引的性能无损失。 + +创建IOT表,第一个索引必须是唯一一个Clustered索引,其他索引可选。暂不支持调整Clustered索引的顺序。 + +```sql +CREATE TABLE iot (c1 int64, c2 int64, c3 int64, INDEX(ckey=c1, ts=c2)); -- 一个Clustered索引 +CREATE TABLE iot (c1 int64, c2 int64, c3 int64, INDEX(ckey=c1), INDEX(skey=c2)); -- 一个Clustered索引和一个Secondary索引 +CREATE TABLE iot (c1 int64, c2 int64, c3 int64, INDEX(ckey=c1), INDEX(skey=c2), INDEX(key=c3)); -- 一个Clustered索引、一个Secondary索引和一个Covering索引 +``` + +IOT各个索引的TTL与普通表的不同点是,IOT Clustered索引的ttl淘汰,将触发其他索引的删除操作,而Secondary索引和Covering索引的ttl淘汰,只会删除自身索引中的数据,不会触发其他索引的删除操作。通常来讲,除非有必要让Secondary和Covering索引更加节约内存,可以只设置Clustered索引的ttl,不设置Secondary和Covering索引的ttl。 + +##### 注意事项 + +- IOT表不可以并发写入相同主键的多条数据,可能出现冲突,至少一条数据会写入失败。IOT表中已存在的相同主键的数据不需要额外处理,将会被覆盖。为了不用修复导入,请在导入前做好数据清洗,对导入数据中相同主键的数据进行去重。(覆盖会出触发所有索引中的删除,单线程写入效率也非常低,所以并不推荐单线程导入。) +- #### Example **示例1:创建一张带单列索引的表** diff --git a/hybridse/include/node/node_manager.h b/hybridse/include/node/node_manager.h index 9fc217d6f82..bc29b484f16 100644 --- a/hybridse/include/node/node_manager.h +++ b/hybridse/include/node/node_manager.h @@ -173,8 +173,8 @@ class NodeManager { SqlNode *MakeColumnIndexNode(SqlNodeList *keys, SqlNode *ts, SqlNode *ttl, SqlNode *version); SqlNode *MakeColumnIndexNode(SqlNodeList *index_item_list); - SqlNode *MakeIndexKeyNode(const std::string &key); - SqlNode *MakeIndexKeyNode(const std::vector &keys); + SqlNode *MakeIndexKeyNode(const std::string &key, const std::string &type); + SqlNode *MakeIndexKeyNode(const std::vector &keys, const std::string &type); SqlNode *MakeIndexTsNode(const std::string &ts); SqlNode *MakeIndexTTLNode(ExprListNode *ttl_expr); SqlNode *MakeIndexTTLTypeNode(const std::string &ttl_type); diff --git a/hybridse/include/node/sql_node.h b/hybridse/include/node/sql_node.h index 96ea7a94163..2a2841bfa84 100644 --- a/hybridse/include/node/sql_node.h +++ b/hybridse/include/node/sql_node.h @@ -2084,14 +2084,19 @@ class CreateStmt : public SqlNode { class IndexKeyNode : public SqlNode { public: IndexKeyNode() : SqlNode(kIndexKey, 0, 0) {} - explicit IndexKeyNode(const std::string &key) : SqlNode(kIndexKey, 0, 0), key_({key}) {} - explicit IndexKeyNode(const std::vector &keys) : SqlNode(kIndexKey, 0, 0), key_(keys) {} + explicit IndexKeyNode(const std::string &key, const std::string &type) + : SqlNode(kIndexKey, 0, 0), key_({key}), index_type_(type) {} + explicit IndexKeyNode(const std::vector &keys, const std::string &type) + : SqlNode(kIndexKey, 0, 0), key_(keys), index_type_(type) {} ~IndexKeyNode() {} void AddKey(const std::string &key) { key_.push_back(key); } + void SetIndexType(const std::string &type) { index_type_ = type; } std::vector &GetKey() { return key_; } + std::string &GetIndexType() { return index_type_; } private: std::vector key_; + std::string index_type_ = "key"; }; class IndexVersionNode : public SqlNode { public: @@ -2145,6 +2150,7 @@ class ColumnIndexNode : public SqlNode { public: ColumnIndexNode() : SqlNode(kColumnIndex, 0, 0), + index_type_("key"), ts_(""), version_(""), version_count_(0), @@ -2155,6 +2161,8 @@ class ColumnIndexNode : public SqlNode { std::vector &GetKey() { return key_; } void SetKey(const std::vector &key) { key_ = key; } + void SetIndexType(const std::string &type) { index_type_ = type; } + std::string &GetIndexType() { return index_type_; } std::string GetTs() const { return ts_; } @@ -2183,6 +2191,7 @@ class ColumnIndexNode : public SqlNode { private: std::vector key_; + std::string index_type_; std::string ts_; std::string version_; int version_count_; diff --git a/hybridse/src/node/node_manager.cc b/hybridse/src/node/node_manager.cc index ffa1fe2092f..91936235000 100644 --- a/hybridse/src/node/node_manager.cc +++ b/hybridse/src/node/node_manager.cc @@ -451,6 +451,7 @@ SqlNode *NodeManager::MakeColumnIndexNode(SqlNodeList *index_item_list) { switch (node_ptr->GetType()) { case kIndexKey: index_ptr->SetKey(dynamic_cast(node_ptr)->GetKey()); + index_ptr->SetIndexType(dynamic_cast(node_ptr)->GetIndexType()); break; case kIndexTs: index_ptr->SetTs(dynamic_cast(node_ptr)->GetColumnName()); @@ -649,12 +650,12 @@ FnParaNode *NodeManager::MakeFnParaNode(const std::string &name, const TypeNode ::hybridse::node::FnParaNode *para_node = new ::hybridse::node::FnParaNode(expr_id); return RegisterNode(para_node); } -SqlNode *NodeManager::MakeIndexKeyNode(const std::string &key) { - SqlNode *node_ptr = new IndexKeyNode(key); +SqlNode *NodeManager::MakeIndexKeyNode(const std::string &key, const std::string &type) { + SqlNode *node_ptr = new IndexKeyNode(key, type); return RegisterNode(node_ptr); } -SqlNode *NodeManager::MakeIndexKeyNode(const std::vector &keys) { - SqlNode *node_ptr = new IndexKeyNode(keys); +SqlNode *NodeManager::MakeIndexKeyNode(const std::vector &keys, const std::string &type) { + SqlNode *node_ptr = new IndexKeyNode(keys, type); return RegisterNode(node_ptr); } SqlNode *NodeManager::MakeIndexTsNode(const std::string &ts) { diff --git a/hybridse/src/node/plan_node_test.cc b/hybridse/src/node/plan_node_test.cc index aac111f8bf3..68eb0349a71 100644 --- a/hybridse/src/node/plan_node_test.cc +++ b/hybridse/src/node/plan_node_test.cc @@ -228,7 +228,7 @@ TEST_F(PlanNodeTest, MultiPlanNodeTest) { TEST_F(PlanNodeTest, ExtractColumnsAndIndexsTest) { SqlNodeList *index_items = manager_->MakeNodeList(); - index_items->PushBack(manager_->MakeIndexKeyNode("col4")); + index_items->PushBack(manager_->MakeIndexKeyNode("col4", "key")); index_items->PushBack(manager_->MakeIndexTsNode("col5")); ColumnIndexNode *index_node = dynamic_cast(manager_->MakeColumnIndexNode(index_items)); index_node->SetName("index1"); diff --git a/hybridse/src/node/sql_node_test.cc b/hybridse/src/node/sql_node_test.cc index 67bb861a812..c67a21b31d7 100644 --- a/hybridse/src/node/sql_node_test.cc +++ b/hybridse/src/node/sql_node_test.cc @@ -666,7 +666,7 @@ TEST_F(SqlNodeTest, IndexVersionNodeTest) { TEST_F(SqlNodeTest, CreateIndexNodeTest) { SqlNodeList *index_items = node_manager_->MakeNodeList(); - index_items->PushBack(node_manager_->MakeIndexKeyNode("col4")); + index_items->PushBack(node_manager_->MakeIndexKeyNode("col4", "key")); index_items->PushBack(node_manager_->MakeIndexTsNode("col5")); ColumnIndexNode *index_node = dynamic_cast(node_manager_->MakeColumnIndexNode(index_items)); CreatePlanNode *node = node_manager_->MakeCreateTablePlanNode( diff --git a/hybridse/src/plan/planner.cc b/hybridse/src/plan/planner.cc index b2a57b4128c..f97cb944c4d 100644 --- a/hybridse/src/plan/planner.cc +++ b/hybridse/src/plan/planner.cc @@ -1123,7 +1123,7 @@ bool Planner::ExpandCurrentHistoryWindow(std::vector index_names; @@ -1183,7 +1183,6 @@ base::Status Planner::TransformTableDef(const std::string &table_name, const Nod case node::kColumnIndex: { node::ColumnIndexNode *column_index = static_cast(column_desc); - if (column_index->GetName().empty()) { column_index->SetName(PlanAPI::GenerateName("INDEX", table->indexes_size())); } diff --git a/hybridse/src/planv2/ast_node_converter.cc b/hybridse/src/planv2/ast_node_converter.cc index a8453e1221c..cd9332f908b 100644 --- a/hybridse/src/planv2/ast_node_converter.cc +++ b/hybridse/src/planv2/ast_node_converter.cc @@ -1583,7 +1583,7 @@ base::Status ConvertColumnIndexNode(const zetasql::ASTIndexDefinition* ast_def_n } // case entry->name() -// "key" -> IndexKeyNode +// "key"/"ckey"/"skey" -> IndexKeyNode // "ts" -> IndexTsNode // "ttl" -> IndexTTLNode // "ttl_type" -> IndexTTLTypeNode @@ -1592,14 +1592,13 @@ base::Status ConvertIndexOption(const zetasql::ASTOptionsEntry* entry, node::Nod node::SqlNode** output) { auto name = entry->name()->GetAsString(); absl::string_view name_v(name); - if (absl::EqualsIgnoreCase("key", name_v)) { + if (absl::EqualsIgnoreCase("key", name_v) || absl::EqualsIgnoreCase("ckey", name_v) || absl::EqualsIgnoreCase("skey", name_v)) { switch (entry->value()->node_kind()) { case zetasql::AST_PATH_EXPRESSION: { std::string column_name; CHECK_STATUS( AstPathExpressionToString(entry->value()->GetAsOrNull(), &column_name)); - *output = node_manager->MakeIndexKeyNode(column_name); - + *output = node_manager->MakeIndexKeyNode(column_name, absl::AsciiStrToLower(name_v)); return base::Status::OK(); } case zetasql::AST_STRUCT_CONSTRUCTOR_WITH_PARENS: { @@ -1617,7 +1616,7 @@ base::Status ConvertIndexOption(const zetasql::ASTOptionsEntry* entry, node::Nod ast_struct_expr->field_expression(0)->GetAsOrNull(), &key_str)); node::IndexKeyNode* index_keys = - dynamic_cast(node_manager->MakeIndexKeyNode(key_str)); + dynamic_cast(node_manager->MakeIndexKeyNode(key_str, absl::AsciiStrToLower(name_v))); for (int i = 1; i < field_expr_len; ++i) { std::string key; @@ -1628,7 +1627,6 @@ base::Status ConvertIndexOption(const zetasql::ASTOptionsEntry* entry, node::Nod index_keys->AddKey(key); } *output = index_keys; - return base::Status::OK(); } default: { @@ -2166,13 +2164,34 @@ base::Status ConvertCreateIndexStatement(const zetasql::ASTCreateIndexStatement* keys.push_back(path.back()); } node::SqlNodeList* index_node_list = node_manager->MakeNodeList(); - - node::SqlNode* index_key_node = node_manager->MakeIndexKeyNode(keys); + // extract index type from options + std::string index_type{"key"}; + if (root->options_list() != nullptr) { + for (const auto option : root->options_list()->options_entries()) { + if (auto name = option->name()->GetAsString(); absl::EqualsIgnoreCase(name, "type")) { + CHECK_TRUE(option->value()->node_kind() == zetasql::AST_PATH_EXPRESSION, common::kSqlAstError, + "Invalid index type, should be path expression"); + std::string type_name; + CHECK_STATUS( + AstPathExpressionToString(option->value()->GetAsOrNull(), &type_name)); + if (absl::EqualsIgnoreCase(type_name, "secondary")) { + index_type = "skey"; + } else if (!absl::EqualsIgnoreCase(type_name, "covering")) { + FAIL_STATUS(common::kSqlAstError, "Invalid index type: ", type_name); + } + } + } + } + node::SqlNode* index_key_node = node_manager->MakeIndexKeyNode(keys, index_type); index_node_list->PushBack(index_key_node); if (root->options_list() != nullptr) { for (const auto option : root->options_list()->options_entries()) { + // ignore type + if (auto name = option->name()->GetAsString(); absl::EqualsIgnoreCase(name, "type")) { + continue; + } node::SqlNode* node = nullptr; - CHECK_STATUS(ConvertIndexOption(option, node_manager, &node)); + CHECK_STATUS(ConvertIndexOption(option, node_manager, &node)); // option set secondary index type if (node != nullptr) { // NOTE: unhandled option will return OK, but node is not set index_node_list->PushBack(node); diff --git a/hybridse/src/sdk/codec_sdk.cc b/hybridse/src/sdk/codec_sdk.cc index 9b910dd28cd..c09216b2600 100644 --- a/hybridse/src/sdk/codec_sdk.cc +++ b/hybridse/src/sdk/codec_sdk.cc @@ -73,7 +73,7 @@ bool RowIOBufView::Reset(const butil::IOBuf& buf) { return false; } str_addr_length_ = codec::GetAddrLength(size_); - DLOG(INFO) << "size " << size_ << " addr length " << str_addr_length_; + DLOG(INFO) << "size " << size_ << " addr length " << (unsigned int)str_addr_length_; return true; } diff --git a/src/base/status.h b/src/base/status.h index c7e5ec75198..3bb29128cc4 100644 --- a/src/base/status.h +++ b/src/base/status.h @@ -190,7 +190,7 @@ enum ReturnCode { }; struct Status { - Status(int code_i, std::string msg_i) : code(code_i), msg(msg_i) {} + Status(int code_i, const std::string& msg_i) : code(code_i), msg(msg_i) {} Status() : code(ReturnCode::kOk), msg("ok") {} inline bool OK() const { return code == ReturnCode::kOk; } inline const std::string& GetMsg() const { return msg; } diff --git a/src/base/status_util.h b/src/base/status_util.h index 1d0db238d61..e0bd5758304 100644 --- a/src/base/status_util.h +++ b/src/base/status_util.h @@ -161,6 +161,13 @@ LOG(WARNING) << "Status: " << _s->ToString(); \ } while (0) +#define APPEND_AND_WARN(s, msg) \ + do { \ + ::hybridse::sdk::Status* _s = (s); \ + _s->Append((msg)); \ + LOG(WARNING) << "Status: " << _s->ToString(); \ + } while (0) + /// @brief s.msg += append_str, and warn it #define CODE_APPEND_AND_WARN(s, code, msg) \ do { \ diff --git a/src/catalog/distribute_iterator.cc b/src/catalog/distribute_iterator.cc index 032d3ec75f2..519dec5f2fa 100644 --- a/src/catalog/distribute_iterator.cc +++ b/src/catalog/distribute_iterator.cc @@ -155,7 +155,7 @@ bool FullTableIterator::NextFromRemote() { } } else { kv_it_ = iter->second->Traverse(tid_, cur_pid_, "", "", 0, FLAGS_traverse_cnt_limit, false, 0, count); - DLOG(INFO) << "count " << count; + DVLOG(1) << "count " << count; } if (kv_it_ && kv_it_->Valid()) { last_pk_ = kv_it_->GetLastPK(); diff --git a/src/client/tablet_client.cc b/src/client/tablet_client.cc index a1dd925fcde..3b058bfd92c 100644 --- a/src/client/tablet_client.cc +++ b/src/client/tablet_client.cc @@ -73,8 +73,7 @@ bool TabletClient::Query(const std::string& db, const std::string& sql, const st } bool TabletClient::Query(const std::string& db, const std::string& sql, - const std::vector& parameter_types, - const std::string& parameter_row, + const std::vector& parameter_types, const std::string& parameter_row, brpc::Controller* cntl, ::openmldb::api::QueryResponse* response, const bool is_debug) { if (cntl == NULL || response == NULL) return false; ::openmldb::api::QueryRequest request; @@ -163,7 +162,7 @@ base::Status TabletClient::TruncateTable(uint32_t tid, uint32_t pid) { request.set_tid(tid); request.set_pid(pid); if (!client_.SendRequest(&::openmldb::api::TabletServer_Stub::TruncateTable, &request, &response, - FLAGS_request_timeout_ms, 1)) { + FLAGS_request_timeout_ms, 1)) { return {base::ReturnCode::kRPCError, "send request failed!"}; } else if (response.code() == 0) { return {}; @@ -177,7 +176,7 @@ base::Status TabletClient::CreateTable(const ::openmldb::api::TableMeta& table_m table_meta_ptr->CopyFrom(table_meta); ::openmldb::api::CreateTableResponse response; if (!client_.SendRequest(&::openmldb::api::TabletServer_Stub::CreateTable, &request, &response, - FLAGS_request_timeout_ms * 2, 1)) { + FLAGS_request_timeout_ms * 2, 1)) { return {base::ReturnCode::kRPCError, "send request failed!"}; } else if (response.code() == 0) { return {}; @@ -206,9 +205,8 @@ bool TabletClient::UpdateTableMetaForAddField(uint32_t tid, const std::vector>& dimensions, - int memory_usage_limit, bool put_if_absent) { - + const std::vector>& dimensions, int memory_usage_limit, + bool put_if_absent, bool check_exists) { ::google::protobuf::RepeatedPtrField<::openmldb::api::Dimension> pb_dimensions; for (size_t i = 0; i < dimensions.size(); i++) { ::openmldb::api::Dimension* d = pb_dimensions.Add(); @@ -216,12 +214,12 @@ base::Status TabletClient::Put(uint32_t tid, uint32_t pid, uint64_t time, const d->set_idx(dimensions[i].second); } - return Put(tid, pid, time, base::Slice(value), &pb_dimensions, memory_usage_limit, put_if_absent); + return Put(tid, pid, time, base::Slice(value), &pb_dimensions, memory_usage_limit, put_if_absent, check_exists); } base::Status TabletClient::Put(uint32_t tid, uint32_t pid, uint64_t time, const base::Slice& value, - ::google::protobuf::RepeatedPtrField<::openmldb::api::Dimension>* dimensions, - int memory_usage_limit, bool put_if_absent) { + ::google::protobuf::RepeatedPtrField<::openmldb::api::Dimension>* dimensions, + int memory_usage_limit, bool put_if_absent, bool check_exists) { ::openmldb::api::PutRequest request; if (memory_usage_limit < 0 || memory_usage_limit > 100) { return {base::ReturnCode::kError, absl::StrCat("invalid memory_usage_limit ", memory_usage_limit)}; @@ -234,9 +232,10 @@ base::Status TabletClient::Put(uint32_t tid, uint32_t pid, uint64_t time, const request.set_pid(pid); request.mutable_dimensions()->Swap(dimensions); request.set_put_if_absent(put_if_absent); + request.set_check_exists(check_exists); ::openmldb::api::PutResponse response; - auto st = client_.SendRequestSt(&::openmldb::api::TabletServer_Stub::Put, - &request, &response, FLAGS_request_timeout_ms, 1); + auto st = client_.SendRequestSt(&::openmldb::api::TabletServer_Stub::Put, &request, &response, + FLAGS_request_timeout_ms, 1); if (!st.OK()) { return st; } @@ -244,7 +243,7 @@ base::Status TabletClient::Put(uint32_t tid, uint32_t pid, uint64_t time, const } base::Status TabletClient::Put(uint32_t tid, uint32_t pid, const std::string& pk, uint64_t time, - const std::string& value) { + const std::string& value) { ::openmldb::api::PutRequest request; auto dim = request.add_dimensions(); dim->set_key(pk); @@ -254,8 +253,8 @@ base::Status TabletClient::Put(uint32_t tid, uint32_t pid, const std::string& pk request.set_tid(tid); request.set_pid(pid); ::openmldb::api::PutResponse response; - auto st = client_.SendRequestSt(&::openmldb::api::TabletServer_Stub::Put, - &request, &response, FLAGS_request_timeout_ms, 1); + auto st = client_.SendRequestSt(&::openmldb::api::TabletServer_Stub::Put, &request, &response, + FLAGS_request_timeout_ms, 1); if (!st.OK()) { return st; } @@ -368,7 +367,7 @@ base::Status TabletClient::LoadTable(const std::string& name, uint32_t tid, uint } base::Status TabletClient::LoadTableInternal(const ::openmldb::api::TableMeta& table_meta, - std::shared_ptr task_info) { + std::shared_ptr task_info) { ::openmldb::api::LoadTableRequest request; ::openmldb::api::TableMeta* cur_table_meta = request.mutable_table_meta(); cur_table_meta->CopyFrom(table_meta); @@ -523,7 +522,7 @@ bool TabletClient::GetManifest(uint32_t tid, uint32_t pid, ::openmldb::common::S base::Status TabletClient::GetTableStatus(::openmldb::api::GetTableStatusResponse& response) { ::openmldb::api::GetTableStatusRequest request; auto st = client_.SendRequestSt(&::openmldb::api::TabletServer_Stub::GetTableStatus, &request, &response, - FLAGS_request_timeout_ms, 1); + FLAGS_request_timeout_ms, 1); if (st.OK()) { return {response.code(), response.msg()}; } @@ -535,14 +534,14 @@ base::Status TabletClient::GetTableStatus(uint32_t tid, uint32_t pid, ::openmldb } base::Status TabletClient::GetTableStatus(uint32_t tid, uint32_t pid, bool need_schema, - ::openmldb::api::TableStatus& table_status) { + ::openmldb::api::TableStatus& table_status) { ::openmldb::api::GetTableStatusRequest request; request.set_tid(tid); request.set_pid(pid); request.set_need_schema(need_schema); ::openmldb::api::GetTableStatusResponse response; auto st = client_.SendRequestSt(&::openmldb::api::TabletServer_Stub::GetTableStatus, &request, &response, - FLAGS_request_timeout_ms, 1); + FLAGS_request_timeout_ms, 1); if (!st.OK()) { return st; } @@ -552,9 +551,10 @@ base::Status TabletClient::GetTableStatus(uint32_t tid, uint32_t pid, bool need_ return {response.code(), response.msg()}; } -std::shared_ptr TabletClient::Scan(uint32_t tid, uint32_t pid, - const std::string& pk, const std::string& idx_name, - uint64_t stime, uint64_t etime, uint32_t limit, uint32_t skip_record_num, std::string& msg) { +std::shared_ptr TabletClient::Scan(uint32_t tid, uint32_t pid, const std::string& pk, + const std::string& idx_name, uint64_t stime, + uint64_t etime, uint32_t limit, + uint32_t skip_record_num, std::string& msg) { ::openmldb::api::ScanRequest request; request.set_pk(pk); request.set_st(stime); @@ -568,7 +568,7 @@ std::shared_ptr TabletClient::Scan(uint32_t tid, request.set_skip_record_num(skip_record_num); auto response = std::make_shared(); bool ok = client_.SendRequest(&::openmldb::api::TabletServer_Stub::Scan, &request, response.get(), - FLAGS_request_timeout_ms, 1); + FLAGS_request_timeout_ms, 1); if (response->has_msg()) { msg = response->msg(); } @@ -578,9 +578,9 @@ std::shared_ptr TabletClient::Scan(uint32_t tid, return std::make_shared<::openmldb::base::ScanKvIterator>(pk, response); } -std::shared_ptr TabletClient::Scan(uint32_t tid, uint32_t pid, - const std::string& pk, const std::string& idx_name, - uint64_t stime, uint64_t etime, uint32_t limit, std::string& msg) { +std::shared_ptr TabletClient::Scan(uint32_t tid, uint32_t pid, const std::string& pk, + const std::string& idx_name, uint64_t stime, + uint64_t etime, uint32_t limit, std::string& msg) { return Scan(tid, pid, pk, idx_name, stime, etime, limit, 0, msg); } @@ -708,7 +708,7 @@ bool TabletClient::SetExpire(uint32_t tid, uint32_t pid, bool is_expire) { } base::Status TabletClient::GetTableFollower(uint32_t tid, uint32_t pid, uint64_t& offset, - std::map& info_map) { + std::map& info_map) { ::openmldb::api::GetTableFollowerRequest request; ::openmldb::api::GetTableFollowerResponse response; request.set_tid(tid); @@ -798,6 +798,57 @@ bool TabletClient::Get(uint32_t tid, uint32_t pid, const std::string& pk, uint64 return true; } +base::Status TabletClient::Get(uint32_t tid, uint32_t pid, const std::string& pk, uint64_t time, + const std::string& idx_name, std::string& value, uint64_t& ts) { + ::openmldb::api::GetRequest request; + ::openmldb::api::GetResponse response; + request.set_tid(tid); + request.set_pid(pid); + request.set_key(pk); + request.set_ts(time); + if (!idx_name.empty()) { + request.set_idx_name(idx_name); + } + auto st = client_.SendRequestSt(&::openmldb::api::TabletServer_Stub::Get, &request, &response, + FLAGS_request_timeout_ms, 1); + if (!st.OK()) { + return st; + } + + if (response.code() == 0) { + value.swap(*response.mutable_value()); + ts = response.ts(); + } + return {response.code(), response.msg()}; +} + +base::Status TabletClient::Get(uint32_t tid, uint32_t pid, const std::string& pk, uint64_t stime, api::GetType stype, + uint64_t etime, const std::string& idx_name, std::string& value, + uint64_t& ts) { + ::openmldb::api::GetRequest request; + ::openmldb::api::GetResponse response; + request.set_tid(tid); + request.set_pid(pid); + request.set_key(pk); + request.set_ts(stime); + request.set_type(stype); + request.set_et(etime); + if (!idx_name.empty()) { + request.set_idx_name(idx_name); + } + auto st = client_.SendRequestSt(&::openmldb::api::TabletServer_Stub::Get, &request, &response, + FLAGS_request_timeout_ms, 1); + if (!st.OK()) { + return st; + } + + if (response.code() == 0) { + value.swap(*response.mutable_value()); + ts = response.ts(); + } + return {response.code(), response.msg()}; +} + bool TabletClient::Delete(uint32_t tid, uint32_t pid, const std::string& pk, const std::string& idx_name, std::string& msg) { ::openmldb::api::DeleteRequest request; @@ -839,8 +890,7 @@ base::Status TabletClient::Delete(uint32_t tid, uint32_t pid, const sdk::DeleteO request.set_ts_name(option.ts_name); } request.set_enable_decode_value(option.enable_decode_value); - bool ok = client_.SendRequest(&::openmldb::api::TabletServer_Stub::Delete, &request, &response, - timeout_ms, 1); + bool ok = client_.SendRequest(&::openmldb::api::TabletServer_Stub::Delete, &request, &response, timeout_ms, 1); if (!ok || response.code() != 0) { return {base::ReturnCode::kError, response.msg()}; } @@ -884,8 +934,10 @@ bool TabletClient::DeleteBinlog(uint32_t tid, uint32_t pid, openmldb::common::St } std::shared_ptr TabletClient::Traverse(uint32_t tid, uint32_t pid, - const std::string& idx_name, const std::string& pk, uint64_t ts, uint32_t limit, bool skip_current_pk, - uint32_t ts_pos, uint32_t& count) { + const std::string& idx_name, + const std::string& pk, uint64_t ts, + uint32_t limit, bool skip_current_pk, + uint32_t ts_pos, uint32_t& count) { ::openmldb::api::TraverseRequest request; auto response = std::make_shared(); request.set_tid(tid); @@ -965,8 +1017,8 @@ bool TabletClient::AddIndex(uint32_t tid, uint32_t pid, const ::openmldb::common } bool TabletClient::AddMultiIndex(uint32_t tid, uint32_t pid, - const std::vector<::openmldb::common::ColumnKey>& column_keys, - std::shared_ptr task_info) { + const std::vector<::openmldb::common::ColumnKey>& column_keys, + std::shared_ptr task_info) { ::openmldb::api::AddIndexRequest request; ::openmldb::api::GeneralResponse response; request.set_tid(tid); @@ -1038,9 +1090,8 @@ bool TabletClient::LoadIndexData(uint32_t tid, uint32_t pid, uint32_t partition_ } bool TabletClient::ExtractIndexData(uint32_t tid, uint32_t pid, uint32_t partition_num, - const std::vector<::openmldb::common::ColumnKey>& column_key, - uint64_t offset, bool dump_data, - std::shared_ptr task_info) { + const std::vector<::openmldb::common::ColumnKey>& column_key, uint64_t offset, + bool dump_data, std::shared_ptr task_info) { if (column_key.empty()) { if (task_info) { task_info->set_status(::openmldb::api::TaskStatus::kFailed); @@ -1212,7 +1263,7 @@ bool TabletClient::CallSQLBatchRequestProcedure(const std::string& db, const std } bool static ParseBatchRequestMeta(const base::Slice& meta, const base::Slice& data, - ::openmldb::api::SQLBatchRequestQueryRequest* request) { + ::openmldb::api::SQLBatchRequestQueryRequest* request) { uint64_t total_len = 0; const int32_t* buf = reinterpret_cast(meta.data()); int32_t cnt = meta.size() / sizeof(int32_t); @@ -1237,9 +1288,9 @@ bool static ParseBatchRequestMeta(const base::Slice& meta, const base::Slice& da } base::Status TabletClient::CallSQLBatchRequestProcedure(const std::string& db, const std::string& sp_name, - const base::Slice& meta, const base::Slice& data, - bool is_debug, uint64_t timeout_ms, - brpc::Controller* cntl, openmldb::api::SQLBatchRequestQueryResponse* response) { + const base::Slice& meta, const base::Slice& data, bool is_debug, + uint64_t timeout_ms, brpc::Controller* cntl, + openmldb::api::SQLBatchRequestQueryResponse* response) { ::openmldb::api::SQLBatchRequestQueryRequest request; request.set_sp_name(sp_name); request.set_is_procedure(true); @@ -1263,10 +1314,9 @@ base::Status TabletClient::CallSQLBatchRequestProcedure(const std::string& db, c return {}; } -base::Status TabletClient::CallSQLBatchRequestProcedure(const std::string& db, const std::string& sp_name, - const base::Slice& meta, const base::Slice& data, - bool is_debug, uint64_t timeout_ms, - openmldb::RpcCallback* callback) { +base::Status TabletClient::CallSQLBatchRequestProcedure( + const std::string& db, const std::string& sp_name, const base::Slice& meta, const base::Slice& data, bool is_debug, + uint64_t timeout_ms, openmldb::RpcCallback* callback) { if (callback == nullptr) { return {base::ReturnCode::kError, "callback is null"}; } @@ -1285,8 +1335,8 @@ base::Status TabletClient::CallSQLBatchRequestProcedure(const std::string& db, c return {base::ReturnCode::kError, "append to iobuf error"}; } callback->GetController()->set_timeout_ms(timeout_ms); - if (!client_.SendRequest(&::openmldb::api::TabletServer_Stub::SQLBatchRequestQuery, - callback->GetController().get(), &request, callback->GetResponse().get(), callback)) { + if (!client_.SendRequest(&::openmldb::api::TabletServer_Stub::SQLBatchRequestQuery, callback->GetController().get(), + &request, callback->GetResponse().get(), callback)) { return {base::ReturnCode::kError, "stub is null"}; } return {}; @@ -1383,9 +1433,9 @@ bool TabletClient::DropFunction(const ::openmldb::common::ExternalFun& fun, std: return true; } -bool TabletClient::CreateAggregator(const ::openmldb::api::TableMeta& base_table_meta, - uint32_t aggr_tid, uint32_t aggr_pid, uint32_t index_pos, - const ::openmldb::base::LongWindowInfo& window_info) { +bool TabletClient::CreateAggregator(const ::openmldb::api::TableMeta& base_table_meta, uint32_t aggr_tid, + uint32_t aggr_pid, uint32_t index_pos, + const ::openmldb::base::LongWindowInfo& window_info) { ::openmldb::api::CreateAggregatorRequest request; ::openmldb::api::TableMeta* base_meta_ptr = request.mutable_base_table_meta(); base_meta_ptr->CopyFrom(base_table_meta); @@ -1411,7 +1461,7 @@ bool TabletClient::CreateAggregator(const ::openmldb::api::TableMeta& base_table bool TabletClient::GetAndFlushDeployStats(::openmldb::api::DeployStatsResponse* res) { ::openmldb::api::GAFDeployStatsRequest req; bool ok = client_.SendRequest(&::openmldb::api::TabletServer_Stub::GetAndFlushDeployStats, &req, res, - FLAGS_request_timeout_ms, FLAGS_request_max_retry); + FLAGS_request_timeout_ms, FLAGS_request_max_retry); return ok && res->code() == 0; } diff --git a/src/client/tablet_client.h b/src/client/tablet_client.h index 177124208fc..dd2879b8306 100644 --- a/src/client/tablet_client.h +++ b/src/client/tablet_client.h @@ -78,20 +78,24 @@ class TabletClient : public Client { base::Status Put(uint32_t tid, uint32_t pid, const std::string& pk, uint64_t time, const std::string& value); base::Status Put(uint32_t tid, uint32_t pid, uint64_t time, const std::string& value, - const std::vector>& dimensions, - int memory_usage_limit = 0, bool put_if_absent = false); + const std::vector>& dimensions, int memory_usage_limit = 0, + bool put_if_absent = false, bool check_exists = false); base::Status Put(uint32_t tid, uint32_t pid, uint64_t time, const base::Slice& value, - ::google::protobuf::RepeatedPtrField<::openmldb::api::Dimension>* dimensions, - int memory_usage_limit = 0, bool put_if_absent = false); + ::google::protobuf::RepeatedPtrField<::openmldb::api::Dimension>* dimensions, + int memory_usage_limit = 0, bool put_if_absent = false, bool check_exists = false); bool Get(uint32_t tid, uint32_t pid, const std::string& pk, uint64_t time, std::string& value, // NOLINT uint64_t& ts, // NOLINT - std::string& msg); // NOLINT + std::string& msg); // NOLINT bool Get(uint32_t tid, uint32_t pid, const std::string& pk, uint64_t time, const std::string& idx_name, std::string& value, uint64_t& ts, std::string& msg); // NOLINT - + base::Status Get(uint32_t tid, uint32_t pid, const std::string& pk, uint64_t time, const std::string& idx_name, + std::string& value, uint64_t& ts); // NOLINT + base::Status Get(uint32_t tid, uint32_t pid, const std::string& pk, uint64_t stime, api::GetType stype, + uint64_t etime, const std::string& idx_name, std::string& value, + uint64_t& ts); // NOLINT bool Delete(uint32_t tid, uint32_t pid, const std::string& pk, const std::string& idx_name, std::string& msg); // NOLINT diff --git a/src/cmd/display.h b/src/cmd/display.h index 34e1f851e39..0d7d2819964 100644 --- a/src/cmd/display.h +++ b/src/cmd/display.h @@ -105,6 +105,7 @@ __attribute__((unused)) static void PrintColumnKey( t.add("ts"); t.add("ttl"); t.add("ttl_type"); + t.add("type"); t.end_of_row(); int index_pos = 1; for (int i = 0; i < column_key_field.size(); i++) { @@ -141,7 +142,7 @@ __attribute__((unused)) static void PrintColumnKey( t.add("-"); // ttl t.add("-"); // ttl_type } - + t.add(common::IndexType_Name(column_key.type())); t.end_of_row(); } stream << t; diff --git a/src/cmd/sql_cmd_test.cc b/src/cmd/sql_cmd_test.cc index fe8faa21504..f23ddfa3f4a 100644 --- a/src/cmd/sql_cmd_test.cc +++ b/src/cmd/sql_cmd_test.cc @@ -1442,18 +1442,18 @@ TEST_P(DBSDKTest, SQLDeletetRow) { res = sr->ExecuteSQL(absl::StrCat("select * from ", table_name, ";"), &status); ASSERT_EQ(res->Size(), 3); std::string delete_sql = "delete from " + table_name + " where c1 = ?;"; - auto insert_row = sr->GetDeleteRow(db_name, delete_sql, &status); + auto delete_row = sr->GetDeleteRow(db_name, delete_sql, &status); ASSERT_TRUE(status.IsOK()); - insert_row->SetString(1, "key3"); - ASSERT_TRUE(insert_row->Build()); - sr->ExecuteDelete(insert_row, &status); + delete_row->SetString(1, "key3"); + ASSERT_TRUE(delete_row->Build()); + sr->ExecuteDelete(delete_row, &status); ASSERT_TRUE(status.IsOK()); res = sr->ExecuteSQL(absl::StrCat("select * from ", table_name, ";"), &status); ASSERT_EQ(res->Size(), 2); - insert_row->Reset(); - insert_row->SetString(1, "key100"); - ASSERT_TRUE(insert_row->Build()); - sr->ExecuteDelete(insert_row, &status); + delete_row->Reset(); + delete_row->SetString(1, "key100"); + ASSERT_TRUE(delete_row->Build()); + sr->ExecuteDelete(delete_row, &status); ASSERT_TRUE(status.IsOK()); res = sr->ExecuteSQL(absl::StrCat("select * from ", table_name, ";"), &status); ASSERT_EQ(res->Size(), 2); diff --git a/src/codec/field_codec.h b/src/codec/field_codec.h index 452578ff9fc..14f5ec14a5a 100644 --- a/src/codec/field_codec.h +++ b/src/codec/field_codec.h @@ -35,8 +35,8 @@ namespace codec { template static bool AppendColumnValue(const std::string& v, hybridse::sdk::DataType type, bool is_not_null, const std::string& null_value, T row) { - // check if null - if (v == null_value) { + // check if null, empty string will cast fail and throw bad_lexical_cast + if (v.empty() || v == null_value) { if (is_not_null) { return false; } diff --git a/src/flags.cc b/src/flags.cc index 2a061dbd263..744da1dac64 100644 --- a/src/flags.cc +++ b/src/flags.cc @@ -187,3 +187,7 @@ DEFINE_int32(sync_job_timeout, 30 * 60 * 1000, "sync job timeout, unit is milliseconds, should <= server.channel_keep_alive_time in TaskManager"); DEFINE_int32(deploy_job_max_wait_time_ms, 30 * 60 * 1000, "the max wait time of waiting deploy job"); DEFINE_bool(skip_grant_tables, true, "skip the grant tables"); + +// iot +// not exactly size, may plus some TODO(hw): too small? +DEFINE_uint32(cidx_gc_max_size, 1000, "config the max size for one cidx segment gc"); diff --git a/src/nameserver/name_server_impl.cc b/src/nameserver/name_server_impl.cc index 9c565272fb3..52acb8137d7 100644 --- a/src/nameserver/name_server_impl.cc +++ b/src/nameserver/name_server_impl.cc @@ -1568,8 +1568,7 @@ bool NameServerImpl::Init(const std::string& zk_cluster, const std::string& zk_p task_thread_pool_.DelayTask(FLAGS_make_snapshot_check_interval, boost::bind(&NameServerImpl::SchedMakeSnapshot, this)); std::shared_ptr<::openmldb::nameserver::TableInfo> table_info; - while ( - !GetTableInfo(::openmldb::nameserver::USER_INFO_NAME, ::openmldb::nameserver::INTERNAL_DB, &table_info)) { + while (!GetTableInfo(::openmldb::nameserver::USER_INFO_NAME, ::openmldb::nameserver::INTERNAL_DB, &table_info)) { std::this_thread::sleep_for(std::chrono::milliseconds(100)); } return true; @@ -3822,6 +3821,8 @@ void NameServerImpl::CreateTable(RpcController* controller, const CreateTableReq table_info->set_partition_num(1); table_info->set_replica_num(1); } + // TODO(hw): valid index pattern 1. all covering 2. clustered + secondary/covering(only one clustered and it should + // be the first one) auto status = schema::SchemaAdapter::CheckTableMeta(*table_info); if (!status.OK()) { PDLOG(WARNING, status.msg.c_str()); @@ -8678,6 +8679,11 @@ void NameServerImpl::AddIndex(RpcController* controller, const AddIndexRequest* std::vector<::openmldb::common::ColumnKey> column_key_vec; if (request->column_keys_size() > 0) { for (const auto& column_key : request->column_keys()) { + if (column_key.type() == common::IndexType::kClustered) { + base::SetResponseStatus(ReturnCode::kWrongColumnKey, "add clustered index is not allowed", response); + LOG(WARNING) << "add clustered index is not allowed"; + return; + } column_key_vec.push_back(column_key); } } else { @@ -9533,8 +9539,8 @@ base::Status NameServerImpl::CreateProcedureOnTablet(const ::openmldb::api::Crea ", endpoint: ", tb_client->GetEndpoint(), ", msg: ", status.GetMsg())}; } DLOG(INFO) << "create procedure on tablet success. db_name: " << sp_info.db_name() << ", " - << "sp_name: " << sp_info.sp_name() << ", " << "sql: " << sp_info.sql() - << "endpoint: " << tb_client->GetEndpoint(); + << "sp_name: " << sp_info.sp_name() << ", " + << "sql: " << sp_info.sql() << "endpoint: " << tb_client->GetEndpoint(); } return {}; } @@ -10074,11 +10080,7 @@ void NameServerImpl::ShowFunction(RpcController* controller, const ShowFunctionR base::Status NameServerImpl::InitGlobalVarTable() { std::map default_value = { - {"execute_mode", "online"}, - {"enable_trace", "false"}, - {"sync_job", "false"}, - {"job_timeout", "20000"} - }; + {"execute_mode", "online"}, {"enable_trace", "false"}, {"sync_job", "false"}, {"job_timeout", "20000"}}; // get table_info std::string db = INFORMATION_SCHEMA_DB; std::string table = GLOBAL_VARIABLES; diff --git a/src/proto/common.proto b/src/proto/common.proto index 8241e646f34..ee4c23e1c68 100755 --- a/src/proto/common.proto +++ b/src/proto/common.proto @@ -64,12 +64,19 @@ message TTLSt { optional uint64 lat_ttl = 3 [default = 0]; } +enum IndexType { + kCovering = 0; + kClustered = 1; + kSecondary = 2; +} + message ColumnKey { optional string index_name = 1; repeated string col_name = 2; optional string ts_name = 3; optional uint32 flag = 4 [default = 0]; // 0 mean index exist, 1 mean index has been deleted optional TTLSt ttl = 5; + optional IndexType type = 6 [default = kCovering]; } message EndpointAndTid { diff --git a/src/proto/tablet.proto b/src/proto/tablet.proto index bc160a01f1e..253eb35b33e 100755 --- a/src/proto/tablet.proto +++ b/src/proto/tablet.proto @@ -196,6 +196,7 @@ message PutRequest { optional uint32 format_version = 8 [default = 0, deprecated = true]; optional uint32 memory_limit = 9; optional bool put_if_absent = 10 [default = false]; + optional bool check_exists = 11 [default = false]; } message PutResponse { diff --git a/src/sdk/node_adapter.cc b/src/sdk/node_adapter.cc index c7c0d191922..d6b3979cfe7 100644 --- a/src/sdk/node_adapter.cc +++ b/src/sdk/node_adapter.cc @@ -383,6 +383,7 @@ bool NodeAdapter::TransformToTableDef(::hybridse::node::CreatePlanNode* create_n if (!TransformToColumnKey(column_index, column_names, index, status)) { return false; } + DLOG(INFO) << "index column key [" << index->ShortDebugString() << "]"; break; } @@ -471,6 +472,12 @@ bool NodeAdapter::TransformToColumnKey(hybridse::node::ColumnIndexNode* column_i for (const auto& key : column_index->GetKey()) { index->add_col_name(key); } + auto& type = column_index->GetIndexType(); + if (type == "skey") { + index->set_type(common::IndexType::kSecondary); + } else if (type == "ckey") { + index->set_type(common::IndexType::kClustered); + } // else default type kCovering // if no column_names, skip check if (!column_names.empty()) { for (const auto& col : index->col_name()) { diff --git a/src/sdk/option.h b/src/sdk/option.h index 3acb4e30afa..ae6fef7dfac 100644 --- a/src/sdk/option.h +++ b/src/sdk/option.h @@ -17,16 +17,26 @@ #ifndef SRC_SDK_OPTION_H_ #define SRC_SDK_OPTION_H_ +#include #include +#include "absl/strings/str_cat.h" + namespace openmldb { namespace sdk { struct DeleteOption { DeleteOption(std::optional idx_i, const std::string& key_i, const std::string& ts_name_i, - std::optional start_ts_i, std::optional end_ts_i) : - idx(idx_i), key(key_i), ts_name(ts_name_i), start_ts(start_ts_i), end_ts(end_ts_i) {} + std::optional start_ts_i, std::optional end_ts_i) + : idx(idx_i), key(key_i), ts_name(ts_name_i), start_ts(start_ts_i), end_ts(end_ts_i) {} DeleteOption() = default; + std::string DebugString() { + return absl::StrCat("idx: ", idx.has_value() ? std::to_string(idx.value()) : "-1", ", key: ", key, + ", ts_name: ", ts_name, + ", start_ts: ", start_ts.has_value() ? std::to_string(start_ts.value()) : "-1", + ", end_ts: ", end_ts.has_value() ? std::to_string(end_ts.value()) : "-1", + ", enable_decode_value: ", enable_decode_value ? "true" : "false"); + } std::optional idx = std::nullopt; std::string key; std::string ts_name; diff --git a/src/sdk/sql_cluster_router.cc b/src/sdk/sql_cluster_router.cc index 068538ba5e0..51e9a9c2848 100644 --- a/src/sdk/sql_cluster_router.cc +++ b/src/sdk/sql_cluster_router.cc @@ -62,6 +62,7 @@ #include "sdk/result_set_sql.h" #include "sdk/sdk_util.h" #include "sdk/split.h" +#include "storage/index_organized_table.h" #include "udf/udf.h" #include "vm/catalog.h" #include "vm/engine.h" @@ -1283,12 +1284,11 @@ bool SQLClusterRouter::ExecuteInsert(const std::string& db, const std::string& s std::vector fails; if (!codegen_rows.empty()) { - for (size_t i = 0 ; i < codegen_rows.size(); ++i) { + for (size_t i = 0; i < codegen_rows.size(); ++i) { auto r = codegen_rows[i]; auto row = std::make_shared(table_info, schema, r, put_if_absent); if (!PutRow(table_info->tid(), row, tablets, status)) { - LOG(WARNING) << "fail to put row[" - << "] due to: " << status->msg; + LOG(WARNING) << "fail to put row[" << i << "] due to: " << status->msg; fails.push_back(i); continue; } @@ -1322,12 +1322,156 @@ bool SQLClusterRouter::ExecuteInsert(const std::string& db, const std::string& s return true; } +bool IsIOT(const nameserver::TableInfo& table_info) { + auto& cks = table_info.column_key(); + if (cks.empty()) { + LOG(WARNING) << "no index in meta"; + return false; + } + if (cks[0].has_type() && cks[0].type() == common::IndexType::kClustered) { + // check other indexes + for (int i = 1; i < cks.size(); i++) { + if (cks[i].has_type() && cks[i].type() == common::IndexType::kClustered) { + LOG(WARNING) << "should be only one clustered index"; + return false; + } + } + return true; + } + return false; +} + +// clustered index idx must be 0 +bool IsClusteredIndexIdx(const openmldb::api::Dimension& dim_index) { return dim_index.idx() == 0; } +bool IsClusteredIndexIdx(const std::pair& dim_index) { return dim_index.second == 0; } + +std::string ClusteredIndexTsName(const nameserver::TableInfo& table_info) { + auto& cks = table_info.column_key(); + if (cks.empty()) { + LOG(WARNING) << "no index in meta"; + return ""; + } + if (cks[0].has_ts_name() && cks[0].ts_name() != storage::DEFAULT_TS_COL_NAME) { + return cks[0].ts_name(); + } + // if default ts col, return empty string + return ""; +} + bool SQLClusterRouter::PutRow(uint32_t tid, const std::shared_ptr& row, const std::vector>& tablets, ::hybridse::sdk::Status* status) { RET_FALSE_IF_NULL_AND_WARN(status, "output status is nullptr"); const auto& dimensions = row->GetDimensions(); uint64_t cur_ts = ::baidu::common::timer::get_micros() / 1000; + // if iot, check if primary key exists in cidx + if (IsIOT(row->GetTableInfo())) { + if (row->IsPutIfAbsent()) { + SET_STATUS_AND_WARN(status, StatusCode::kCmdError, "put_if_absent is not supported for iot table"); + return false; + } + // dimensions map>, find the idxid == 0 + bool valid = false; + uint64_t ts = 0; + std::string exists_value; // if empty, no primary key exists + + auto cols = row->GetTableInfo().column_desc(); // copy + codec::RowView row_view(cols); + // get cidx pid tablet for existence check + for (const auto& kv : dimensions) { + uint32_t pid = kv.first; + for (auto& pair : kv.second) { + if (IsClusteredIndexIdx(pair)) { + // check if primary key exists on tablet + auto tablet = tablets[pid]; + if (!tablet) { + SET_STATUS_AND_WARN(status, StatusCode::kCmdError, + "tablet accessor is nullptr, can't check clustered index"); + return false; + } + auto client = tablet->GetClient(); + if (!client) { + SET_STATUS_AND_WARN(status, StatusCode::kCmdError, + "tablet client is nullptr, can't check clustered index"); + return false; + } + int64_t get_ts = 0; + auto ts_name = ClusteredIndexTsName(row->GetTableInfo()); + if (!ts_name.empty()) { + bool found = false; + for (int i = 0; i < cols.size(); i++) { + if (cols.Get(i).name() == ts_name) { + row_view.GetInteger(reinterpret_cast(row->GetRow().c_str()), i, + cols.Get(i).data_type(), &get_ts); + found = true; + break; + } + } + if (!found || get_ts < 0) { + SET_STATUS_AND_WARN( + status, StatusCode::kCmdError, + found ? "invalid ts " + std::to_string(get_ts) : "get ts column failed"); + return false; + } + } else { + DLOG(INFO) << "no ts column in cidx"; + } + // if get_ts == 0, cidx may be without ts column, you should check ts col in cidx info, not by + // get_ts + DLOG(INFO) << "get primary key on iot table, pid " << pid << ", key " << pair.first << ", ts " + << get_ts; + // get rpc can't read all data(expired data may still in data skiplist), so we use put to check + // exists only check in cidx, no real insertion. get_ts may not be the current time, it can be ts + // col value, it's a bit different. + auto st = client->Put(tid, pid, get_ts, row->GetRow(), {pair}, + insert_memory_usage_limit_.load(std::memory_order_relaxed), false, true); + if (!st.OK() && st.GetCode() != base::ReturnCode::kKeyNotFound) { + APPEND_FROM_BASE_AND_WARN(status, st, "get primary key failed"); + return false; + } + valid = true; + DLOG(INFO) << "Get result: " << st.ToString(); + // check result, won't set exists_value if key not found + if (st.OK()) { + DLOG(INFO) << "primary key exists on iot table"; + exists_value = row->GetRow(); + ts = get_ts; + } + } + } + } + if (!valid) { + SET_STATUS_AND_WARN(status, StatusCode::kCmdError, + "can't check primary key on iot table, meta/connection error"); + return false; + } + DLOG_IF(INFO, exists_value.empty()) << "primary key not exists, safe to insert"; + if (!exists_value.empty()) { + // delete old data then insert new data, no concurrency control, be careful + // revertput or SQLDeleteRow is not easy to use here, so make a sql + DLOG(INFO) << "primary key exists, delete old data then insert new data"; + // just where primary key, not all columns(redundant condition) + auto hint = storage::IndexOrganizedTable::MakePkeysHint(row->GetTableInfo().column_desc(), + row->GetTableInfo().column_key(0)); + if (hint.empty()) { + SET_STATUS_AND_WARN(status, StatusCode::kCmdError, "make pkeys hint failed"); + return false; + } + auto sql = storage::IndexOrganizedTable::MakeDeleteSQL(row->GetTableInfo().db(), row->GetTableInfo().name(), + row->GetTableInfo().column_key(0), + (int8_t*)exists_value.c_str(), ts, row_view, hint); + if (sql.empty()) { + SET_STATUS_AND_WARN(status, StatusCode::kCmdError, "make delete sql failed"); + return false; + } + ExecuteSQL(sql, status); + if (status->code != 0) { + PREPEND_AND_WARN(status, "delete old data failed"); + return false; + } + DLOG(INFO) << "delete old data success"; + } + } for (const auto& kv : dimensions) { uint32_t pid = kv.first; if (pid < tablets.size()) { @@ -1341,16 +1485,17 @@ bool SQLClusterRouter::PutRow(uint32_t tid, const std::shared_ptr& client->Put(tid, pid, cur_ts, row->GetRow(), kv.second, insert_memory_usage_limit_.load(std::memory_order_relaxed), row->IsPutIfAbsent()); if (!ret.OK()) { - if (RevertPut(row->GetTableInfo(), pid, dimensions, cur_ts, base::Slice(row->GetRow()), tablets) - .IsOK()) { - SET_STATUS_AND_WARN(status, StatusCode::kCmdError, - absl::StrCat("INSERT failed, tid ", tid)); + APPEND_FROM_BASE(status, ret, "put failed"); + if (auto rp = RevertPut(row->GetTableInfo(), pid, dimensions, cur_ts, + base::Slice(row->GetRow()), tablets); + rp.IsOK()) { + APPEND_AND_WARN(status, "tid " + std::to_string(tid) + ". RevertPut success."); } else { - SET_STATUS_AND_WARN(status, StatusCode::kCmdError, - "INSERT failed, tid " + std::to_string(tid) + - ". Note that data might have been partially inserted. " - "You are encouraged to perform DELETE to remove any partially " - "inserted data before trying INSERT again."); + APPEND_AND_WARN(status, "tid " + std::to_string(tid) + + ". RevertPut failed: " + rp.ToString() + + "Note that data might have been partially inserted. " + "You are encouraged to perform DELETE to remove any " + "partially inserted data before trying INSERT again."); } return false; } @@ -1430,7 +1575,7 @@ bool SQLClusterRouter::ExecuteInsert(const std::string& db, const std::string& n std::vector> tablets; bool ret = cluster_sdk_->GetTablet(db, name, &tablets); if (!ret || tablets.empty()) { - status->msg = "fail to get table " + name + " tablet"; + SET_STATUS_AND_WARN(status, StatusCode::kCmdError, "fail to get table " + name + " tablet"); return false; } std::map> dimensions_map; @@ -1453,6 +1598,114 @@ bool SQLClusterRouter::ExecuteInsert(const std::string& db, const std::string& n } base::Slice row_value(value, len); uint64_t cur_ts = ::baidu::common::timer::get_micros() / 1000; + // TODO(hw): refactor with PutRow + // if iot, check if primary key exists in cidx + auto table_info = cluster_sdk_->GetTableInfo(db, name); + if (!table_info) { + SET_STATUS_AND_WARN(status, StatusCode::kCmdError, "fail to get table info"); + return false; + } + if (IsIOT(*table_info)) { + if (put_if_absent) { + SET_STATUS_AND_WARN(status, StatusCode::kCmdError, "put_if_absent is not supported for iot table"); + return false; + } + // dimensions map>, find the idxid == 0 + bool valid = false; + uint64_t ts = 0; + std::string exists_value; // if empty, no primary key exists + // TODO: ref putrow, fix later + auto cols = table_info->column_desc(); // copy + codec::RowView row_view(cols); + for (const auto& kv : dimensions_map) { + uint32_t pid = kv.first; + for (auto& pair : kv.second) { + if (IsClusteredIndexIdx(pair)) { + // check if primary key exists on tablet + auto tablet = tablets[pid]; + if (!tablet) { + SET_STATUS_AND_WARN(status, StatusCode::kCmdError, + "tablet accessor is nullptr, can't check clustered index"); + return false; + } + auto client = tablet->GetClient(); + if (!client) { + SET_STATUS_AND_WARN(status, StatusCode::kCmdError, + "tablet client is nullptr, can't check clustered index"); + return false; + } + int64_t get_ts = 0; + if (auto ts_name = ClusteredIndexTsName(*table_info); !ts_name.empty()) { + bool found = false; + for (int i = 0; i < cols.size(); i++) { + if (cols.Get(i).name() == ts_name) { + row_view.GetInteger(reinterpret_cast(value), i, cols.Get(i).data_type(), + &get_ts); + found = true; + break; + } + } + if (!found || get_ts < 0) { + SET_STATUS_AND_WARN(status, StatusCode::kCmdError, + found ? "invalid ts" + std::to_string(get_ts) : "get ts column failed"); + return false; + } + } else { + DLOG(INFO) << "no ts column in cidx"; + } + // if get_ts == 0, may be cidx without ts column + DLOG(INFO) << "get key " << pair.key() << ", ts " << get_ts; + ::google::protobuf::RepeatedPtrField<::openmldb::api::Dimension> dims; + dims.Add()->CopyFrom(pair); + auto st = client->Put(tid, pid, get_ts, row_value, &dims, + insert_memory_usage_limit_.load(std::memory_order_relaxed), false, true); + if (!st.OK() && st.GetCode() != base::ReturnCode::kKeyNotFound) { + APPEND_FROM_BASE_AND_WARN(status, st, "get primary key failed"); + return false; + } + valid = true; + DLOG(INFO) << "Get result: " << st.ToString(); + // check result, won't set exists_value if key not found + if (st.OK()) { + DLOG(INFO) << "primary key exist on iot table"; + exists_value = value; + ts = get_ts; + } + } + } + } + if (!valid) { + SET_STATUS_AND_WARN(status, StatusCode::kCmdError, + "can't check primary key on iot table, meta/connection error"); + return false; + } + DLOG_IF(INFO, exists_value.empty()) << "primary key not exists, safe to insert"; + if (!exists_value.empty()) { + // delete old data then insert new data, no concurrency control, be careful + // revertput or SQLDeleteRow is not easy to use here, so make a sql? + DLOG(INFO) << "primary key exists, delete old data then insert new data"; + // just where primary key, not all columns(redundant condition) + auto hint = + storage::IndexOrganizedTable::MakePkeysHint(table_info->column_desc(), table_info->column_key(0)); + if (hint.empty()) { + SET_STATUS_AND_WARN(status, StatusCode::kCmdError, "make pkeys hint failed"); + return false; + } + auto sql = storage::IndexOrganizedTable::MakeDeleteSQL(table_info->db(), table_info->name(), + table_info->column_key(0), + (int8_t*)exists_value.c_str(), ts, row_view, hint); + if (sql.empty()) { + SET_STATUS_AND_WARN(status, StatusCode::kCmdError, "make delete sql failed"); + return false; + } + ExecuteSQL(sql, status); + if (status->code != 0) { + PREPEND_AND_WARN(status, "delete old data failed"); + return false; + } + } + } + for (auto& kv : dimensions_map) { uint32_t pid = kv.first; if (pid < tablets.size()) { @@ -1460,17 +1713,13 @@ bool SQLClusterRouter::ExecuteInsert(const std::string& db, const std::string& n if (tablet) { auto client = tablet->GetClient(); if (client) { - DLOG(INFO) << "put data to endpoint " << client->GetEndpoint() << " with dimensions size " - << kv.second.size(); + DVLOG(3) << "put data to endpoint " << client->GetEndpoint() << " with dimensions size " + << kv.second.size(); auto ret = client->Put(tid, pid, cur_ts, row_value, &kv.second, insert_memory_usage_limit_.load(std::memory_order_relaxed), put_if_absent); if (!ret.OK()) { // TODO(hw): show put failed row(readable)? ::hybridse::codec::RowView::GetRowString? - SET_STATUS_AND_WARN(status, StatusCode::kCmdError, - "INSERT failed, tid " + std::to_string(tid) + - ". Note that data might have been partially inserted. " - "You are encouraged to perform DELETE to remove any partially " - "inserted data before trying INSERT again."); + APPEND_FROM_BASE(status, ret, "INSERT failed, tid " + std::to_string(tid)); std::map>> dimensions; for (const auto& val : dimensions_map) { std::vector> vec; @@ -1479,14 +1728,14 @@ bool SQLClusterRouter::ExecuteInsert(const std::string& db, const std::string& n } dimensions.emplace(val.first, std::move(vec)); } - auto table_info = cluster_sdk_->GetTableInfo(db, name); - if (!table_info) { - return false; - } + // TODO(hw): better to return absl::Status - if (RevertPut(*table_info, pid, dimensions, cur_ts, row_value, tablets).IsOK()) { - SET_STATUS_AND_WARN(status, StatusCode::kCmdError, - absl::StrCat("INSERT failed, tid ", tid)); + if (auto rp = RevertPut(*table_info, pid, dimensions, cur_ts, row_value, tablets); rp.IsOK()) { + APPEND_AND_WARN(status, "revert ok"); + } else { + APPEND_AND_WARN(status, + "revert failed. You are encouraged to perform DELETE to remove any " + "partially inserted data before trying INSERT again."); } return false; } @@ -1698,7 +1947,7 @@ std::shared_ptr SQLClusterRouter::HandleSQLCmd(const h } case hybridse::node::kCmdShowUser: { - std::vector value = { options_->user }; + std::vector value = {options_->user}; return ResultSetSQL::MakeResultSet({"User"}, {value}, status); } @@ -2119,6 +2368,32 @@ std::shared_ptr SQLClusterRouter::HandleSQLCmd(const h return {}; } +base::Status ValidateTableInfo(const nameserver::TableInfo& table_info) { + auto& indexs = table_info.column_key(); + if (indexs.empty()) { + LOG(INFO) << "no index specified, it'll add default index later"; + return {}; + } + if (indexs[0].type() == common::IndexType::kCovering) { + // MemTable, all other indexs should be covering + for (int i = 1; i < indexs.size(); i++) { + if (indexs[i].type() != common::IndexType::kCovering) { + return {base::ReturnCode::kInvalidArgs, "index " + std::to_string(i) + " should be covering"}; + } + } + } else if (indexs[0].type() == common::IndexType::kClustered) { + // IOT, no more clustered index, secondary and covering are valid + for (int i = 1; i < indexs.size(); i++) { + if (indexs[i].type() == common::IndexType::kClustered) { + return {base::ReturnCode::kInvalidArgs, "index " + std::to_string(i) + " should not be clustered"}; + } + } + } else { + return {base::ReturnCode::kInvalidArgs, "index 0 should be clustered or covering"}; + } + return {}; +} + base::Status SQLClusterRouter::HandleSQLCreateTable(hybridse::node::CreatePlanNode* create_node, const std::string& db, std::shared_ptr<::openmldb::client::NsClient> ns_ptr) { return HandleSQLCreateTable(create_node, db, ns_ptr, ""); @@ -2147,11 +2422,16 @@ base::Status SQLClusterRouter::HandleSQLCreateTable(hybridse::node::CreatePlanNo hybridse::base::Status sql_status; bool is_cluster_mode = cluster_sdk_->IsClusterMode(); + // TODO(hw): support MemTable and IOT, just force use IOT for test ::openmldb::sdk::NodeAdapter::TransformToTableDef(create_node, &table_info, default_replica_num, is_cluster_mode, &sql_status); if (sql_status.code != 0) { return base::Status(sql_status.code, sql_status.msg); } + // clustered should be the first index, user should set it, we don't adjust it + if (auto st = ValidateTableInfo(table_info); !st.OK()) { + return st; + } std::string msg; if (!ns_ptr->CreateTable(table_info, create_node->GetIfNotExist(), msg)) { return base::Status(base::ReturnCode::kSQLCmdRunError, msg); @@ -2763,7 +3043,8 @@ std::shared_ptr SQLClusterRouter::ExecuteSQL( } case hybridse::node::kPlanTypeCreateUser: { auto create_node = dynamic_cast(node); - UserInfo user_info;; + UserInfo user_info; + ; auto result = GetUser(create_node->Name(), &user_info); if (!result.ok()) { *status = {StatusCode::kCmdError, result.status().message()}; @@ -2982,7 +3263,7 @@ std::shared_ptr SQLClusterRouter::ExecuteSQL( if (is_online_mode) { // Handle in online mode config.emplace("spark.insert_memory_usage_limit", - std::to_string(insert_memory_usage_limit_.load(std::memory_order_relaxed))); + std::to_string(insert_memory_usage_limit_.load(std::memory_order_relaxed))); base_status = ImportOnlineData(sql, config, database, is_sync_job, offline_job_timeout, &job_info); } else { // Handle in offline mode @@ -3499,10 +3780,11 @@ hybridse::sdk::Status SQLClusterRouter::LoadDataMultipleFile(int id, int step, c const std::vector& file_list, const openmldb::sdk::LoadOptionsMapParser& options_parser, uint64_t* count) { + *count = 0; for (const auto& file : file_list) { uint64_t cur_count = 0; auto status = LoadDataSingleFile(id, step, database, table, file, options_parser, &cur_count); - DLOG(INFO) << "[thread " << id << "] Loaded " << count << " rows in " << file; + DLOG(INFO) << "[thread " << id << "] Loaded " << cur_count << " rows in " << file; if (!status.IsOK()) { return status; } @@ -3671,6 +3953,7 @@ hybridse::sdk::Status SQLClusterRouter::HandleDelete(const std::string& db, cons if (!status.IsOK()) { return status; } + DLOG(INFO) << "delete option: " << option.DebugString(); status = SendDeleteRequst(table_info, option); if (status.IsOK() && db != nameserver::INTERNAL_DB) { status = { @@ -4918,10 +5201,10 @@ std::shared_ptr SQLClusterRouter::GetNameServerJobResu } absl::StatusOr SQLClusterRouter::GetUser(const std::string& name, UserInfo* user_info) { - std::string sql = absl::StrCat("select * from ", nameserver::USER_INFO_NAME); + std::string sql = absl::StrCat("select * from ", nameserver::USER_INFO_NAME); hybridse::sdk::Status status; - auto rs = ExecuteSQLParameterized(nameserver::INTERNAL_DB, sql, - std::shared_ptr(), &status); + auto rs = + ExecuteSQLParameterized(nameserver::INTERNAL_DB, sql, std::shared_ptr(), &status); if (rs == nullptr) { return absl::InternalError(status.msg); } @@ -4973,6 +5256,8 @@ hybridse::sdk::Status SQLClusterRouter::UpdateUser(const UserInfo& user_info, co } hybridse::sdk::Status SQLClusterRouter::DeleteUser(const std::string& name) { + std::string sql = + absl::StrCat("delete from ", nameserver::USER_INFO_NAME, " where host = '%' and user = '", name, "';"); hybridse::sdk::Status status; auto ns_client = cluster_sdk_->GetNsClient(); @@ -4994,12 +5279,10 @@ void SQLClusterRouter::AddUserToConfig(std::map* confi } } -::hybridse::sdk::Status SQLClusterRouter::RevertPut(const nameserver::TableInfo& table_info, - uint32_t end_pid, - const std::map>>& dimensions, - uint64_t ts, - const base::Slice& value, - const std::vector>& tablets) { +::hybridse::sdk::Status SQLClusterRouter::RevertPut( + const nameserver::TableInfo& table_info, uint32_t end_pid, + const std::map>>& dimensions, uint64_t ts, + const base::Slice& value, const std::vector>& tablets) { codec::RowView row_view(table_info.column_desc()); std::map column_map; for (int32_t i = 0; i < table_info.column_desc_size(); i++) { diff --git a/src/storage/index_organized_table.cc b/src/storage/index_organized_table.cc new file mode 100644 index 00000000000..97b932cc5c2 --- /dev/null +++ b/src/storage/index_organized_table.cc @@ -0,0 +1,717 @@ +/* + * Copyright 2021 4Paradigm + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "storage/index_organized_table.h" + +#include + +#include "absl/strings/str_join.h" // dlog +#include "absl/strings/str_split.h" +#include "index_organized_table.h" +#include "sdk/sql_router.h" +#include "storage/iot_segment.h" + +DECLARE_uint32(absolute_default_skiplist_height); + +namespace openmldb::storage { + +IOTIterator* NewNullIterator() { + // if TimeEntries::Iterator is null, nothing will be used + return new IOTIterator(nullptr, type::CompressType::kNoCompress, {}); +} + +// TODO(hw): temp func to create iot iterator +IOTIterator* NewIOTIterator(Segment* segment, const Slice& key, Ticket& ticket, type::CompressType compress_type, + std::unique_ptr cidx_iter) { + void* entry = nullptr; + auto entries = segment->GetKeyEntries(); + if (entries == nullptr || segment->GetTsCnt() > 1 || entries->Get(key, entry) < 0 || entry == nullptr) { + return NewNullIterator(); + } + ticket.Push(reinterpret_cast(entry)); + return new IOTIterator(reinterpret_cast(entry)->entries.NewIterator(), compress_type, + std::move(cidx_iter)); +} + +IOTIterator* NewIOTIterator(Segment* segment, const Slice& key, uint32_t idx, Ticket& ticket, + type::CompressType compress_type, + std::unique_ptr cidx_iter) { + auto ts_idx_map = segment->GetTsIdxMap(); + auto pos = ts_idx_map.find(idx); + if (pos == ts_idx_map.end()) { + LOG(WARNING) << "can't find idx in segment"; + return NewNullIterator(); + } + auto entries = segment->GetKeyEntries(); + if (segment->GetTsCnt() == 1) { + return NewIOTIterator(segment, key, ticket, compress_type, std::move(cidx_iter)); + } + void* entry_arr = nullptr; + if (entries->Get(key, entry_arr) < 0 || entry_arr == nullptr) { + return NewNullIterator(); + } + auto entry = reinterpret_cast(entry_arr)[pos->second]; + ticket.Push(entry); + return new IOTIterator(entry->entries.NewIterator(), compress_type, std::move(cidx_iter)); +} + +TableIterator* IndexOrganizedTable::NewIterator(uint32_t index, const std::string& pk, Ticket& ticket) { + std::shared_ptr index_def = table_index_.GetIndex(index); + if (!index_def || !index_def->IsReady()) { + LOG(WARNING) << "index is invalid"; + return nullptr; + } + DLOG(INFO) << "new iter for index and pk " << index << " name " << index_def->GetName(); + uint32_t seg_idx = SegIdx(pk); + Slice spk(pk); + uint32_t real_idx = index_def->GetInnerPos(); + Segment* segment = GetSegment(real_idx, seg_idx); + auto ts_col = index_def->GetTsColumn(); + if (ts_col) { + // if secondary, use iot iterator + if (index_def->IsSecondaryIndex()) { + // get clustered index iter for secondary index + auto handler = catalog_->GetTable(GetDB(), GetName()); + if (!handler) { + LOG(WARNING) << "no TableHandler for " << GetDB() << "." << GetName(); + return nullptr; + } + auto tablet_table_handler = std::dynamic_pointer_cast(handler); + if (!tablet_table_handler) { + LOG(WARNING) << "convert TabletTableHandler failed for " << GetDB() << "." << GetName(); + return nullptr; + } + LOG(INFO) << "create iot iterator for pk"; + // TODO(hw): iter may be invalid if catalog updated + auto iter = + NewIOTIterator(segment, spk, ts_col->GetId(), ticket, GetCompressType(), + std::move(tablet_table_handler->GetWindowIterator(table_index_.GetIndex(0)->GetName()))); + return iter; + } + // clsutered and covering still use old iterator + return segment->NewIterator(spk, ts_col->GetId(), ticket, GetCompressType()); + } + // cidx without ts? or invalid case + DLOG(INFO) << "index ts col is null, reate no-ts iterator"; + // TODO(hw): sidx without ts? + return segment->NewIterator(spk, ticket, GetCompressType()); +} + +TraverseIterator* IndexOrganizedTable::NewTraverseIterator(uint32_t index) { + std::shared_ptr index_def = GetIndex(index); + if (!index_def || !index_def->IsReady()) { + PDLOG(WARNING, "index %u not found. tid %u pid %u", index, id_, pid_); + return nullptr; + } + DLOG(INFO) << "new traverse iter for index " << index << " name " << index_def->GetName(); + uint64_t expire_time = 0; + uint64_t expire_cnt = 0; + auto ttl = index_def->GetTTL(); + if (GetExpireStatus()) { // gc enabled + expire_time = GetExpireTime(*ttl); + expire_cnt = ttl->lat_ttl; + } + uint32_t real_idx = index_def->GetInnerPos(); + auto ts_col = index_def->GetTsColumn(); + if (ts_col) { + // if secondary, use iot iterator + if (index_def->IsSecondaryIndex()) { + // get clustered index iter for secondary index + auto handler = catalog_->GetTable(GetDB(), GetName()); + if (!handler) { + LOG(WARNING) << "no TableHandler for " << GetDB() << "." << GetName(); + return nullptr; + } + auto tablet_table_handler = std::dynamic_pointer_cast(handler); + if (!tablet_table_handler) { + LOG(WARNING) << "convert TabletTableHandler failed for " << GetDB() << "." << GetName(); + return nullptr; + } + LOG(INFO) << "create iot traverse iterator for traverse"; + // TODO(hw): iter may be invalid if catalog updated + auto iter = new IOTTraverseIterator( + GetSegments(real_idx), GetSegCnt(), ttl->ttl_type, expire_time, expire_cnt, ts_col->GetId(), + GetCompressType(), + std::move(tablet_table_handler->GetWindowIterator(table_index_.GetIndex(0)->GetName()))); + return iter; + } + DLOG(INFO) << "create memtable traverse iterator for traverse"; + return new MemTableTraverseIterator(GetSegments(real_idx), GetSegCnt(), ttl->ttl_type, expire_time, expire_cnt, + ts_col->GetId(), GetCompressType()); + } + DLOG(INFO) << "index ts col is null, reate no-ts iterator"; + return new MemTableTraverseIterator(GetSegments(real_idx), GetSegCnt(), ttl->ttl_type, expire_time, expire_cnt, 0, + GetCompressType()); +} + +::hybridse::vm::WindowIterator* IndexOrganizedTable::NewWindowIterator(uint32_t index) { + std::shared_ptr index_def = table_index_.GetIndex(index); + if (!index_def || !index_def->IsReady()) { + LOG(WARNING) << "index id " << index << " not found. tid " << id_ << " pid " << pid_; + return nullptr; + } + LOG(INFO) << "new window iter for index " << index << " name " << index_def->GetName(); + uint64_t expire_time = 0; + uint64_t expire_cnt = 0; + auto ttl = index_def->GetTTL(); + if (GetExpireStatus()) { + expire_time = GetExpireTime(*ttl); + expire_cnt = ttl->lat_ttl; + } + uint32_t real_idx = index_def->GetInnerPos(); + auto ts_col = index_def->GetTsColumn(); + uint32_t ts_idx = 0; + if (ts_col) { + ts_idx = ts_col->GetId(); + } + DLOG(INFO) << "ts is null? " << ts_col << ", ts_idx " << ts_idx; + // if secondary, use iot iterator + if (index_def->IsSecondaryIndex()) { + // get clustered index iter for secondary index + auto handler = catalog_->GetTable(GetDB(), GetName()); + if (!handler) { + LOG(WARNING) << "no TableHandler for " << GetDB() << "." << GetName(); + return nullptr; + } + auto tablet_table_handler = std::dynamic_pointer_cast(handler); + if (!tablet_table_handler) { + LOG(WARNING) << "convert TabletTableHandler failed for " << GetDB() << "." << GetName(); + return nullptr; + } + LOG(INFO) << "create iot key traverse iterator for window"; + // TODO(hw): iter may be invalid if catalog updated + auto iter = + new IOTKeyIterator(GetSegments(real_idx), GetSegCnt(), ttl->ttl_type, expire_time, expire_cnt, ts_idx, + GetCompressType(), tablet_table_handler, table_index_.GetIndex(0)->GetName()); + return iter; + } + return new MemTableKeyIterator(GetSegments(real_idx), GetSegCnt(), ttl->ttl_type, expire_time, expire_cnt, ts_idx, + GetCompressType()); +} + +bool IndexOrganizedTable::Init() { + if (!InitMeta()) { + LOG(WARNING) << "init meta failed. tid " << id_ << " pid " << pid_; + return false; + } + // IOTSegment should know which is the cidx, sidx and covering idx are both duplicate(even the values are different) + auto inner_indexs = table_index_.GetAllInnerIndex(); + for (uint32_t i = 0; i < inner_indexs->size(); i++) { + const std::vector& ts_vec = inner_indexs->at(i)->GetTsIdx(); + uint32_t cur_key_entry_max_height = KeyEntryMaxHeight(inner_indexs->at(i)); + + Segment** seg_arr = new Segment*[seg_cnt_]; + DLOG_ASSERT(!ts_vec.empty()) << "must have ts, include auto gen ts"; + if (!ts_vec.empty()) { + for (uint32_t j = 0; j < seg_cnt_; j++) { + // let segment know whether it is cidx + seg_arr[j] = new IOTSegment(cur_key_entry_max_height, ts_vec, inner_indexs->at(i)->GetTsIdxType()); + PDLOG(INFO, "init %u, %u segment. height %u, ts col num %u. tid %u pid %u", i, j, + cur_key_entry_max_height, ts_vec.size(), id_, pid_); + } + } else { + // unavaildable + for (uint32_t j = 0; j < seg_cnt_; j++) { + seg_arr[j] = new IOTSegment(cur_key_entry_max_height); + PDLOG(INFO, "init %u, %u segment. height %u tid %u pid %u", i, j, cur_key_entry_max_height, id_, pid_); + } + } + segments_[i] = seg_arr; + key_entry_max_height_ = cur_key_entry_max_height; + } + LOG(INFO) << "init iot table name " << name_ << ", id " << id_ << ", pid " << pid_ << ", seg_cnt " << seg_cnt_; + return true; +} + +bool IndexOrganizedTable::Put(const std::string& pk, uint64_t time, const char* data, uint32_t size) { + uint32_t seg_idx = SegIdx(pk); + Segment* segment = GetSegment(0, seg_idx); + if (segment == nullptr) { + return false; + } + Slice spk(pk); + segment->Put(spk, time, data, size); + record_byte_size_.fetch_add(GetRecordSize(size)); + return true; +} + +absl::Status IndexOrganizedTable::Put(uint64_t time, const std::string& value, const Dimensions& dimensions, + bool put_if_absent) { + if (dimensions.empty()) { + return absl::InvalidArgumentError(absl::StrCat(id_, ".", pid_, ": empty dimension")); + } + // inner index pos: -1 means invalid, so it's positive in inner_index_key_map + std::map inner_index_key_map; + std::pair cidx_inner_key_pair{-1, ""}; + std::vector secondary_inners; + for (auto iter = dimensions.begin(); iter != dimensions.end(); iter++) { + int32_t inner_pos = table_index_.GetInnerIndexPos(iter->idx()); + if (inner_pos < 0) { + return absl::InvalidArgumentError(absl::StrCat(id_, ".", pid_, ": invalid dimension idx ", iter->idx())); + } + if (iter->idx() == 0) { + cidx_inner_key_pair = {inner_pos, iter->key()}; + } + inner_index_key_map.emplace(inner_pos, iter->key()); + } + + const int8_t* data = reinterpret_cast(value.data()); + std::string uncompress_data; + uint32_t data_length = value.length(); + if (GetCompressType() == openmldb::type::kSnappy) { + snappy::Uncompress(value.data(), value.size(), &uncompress_data); + data = reinterpret_cast(uncompress_data.data()); + data_length = uncompress_data.length(); + } + if (data_length < codec::HEADER_LENGTH) { + return absl::InvalidArgumentError(absl::StrCat(id_, ".", pid_, ": invalid value")); + } + uint8_t version = codec::RowView::GetSchemaVersion(data); + auto decoder = GetVersionDecoder(version); + if (decoder == nullptr) { + return absl::InvalidArgumentError(absl::StrCat(id_, ".", pid_, ": invalid schema version ", version)); + } + std::optional clustered_tsv; + std::map> ts_value_map; + // we need two ref cnt + // 1. clustered and covering: put row ->DataBlock(i) + // 2. secondary: put pkeys+pts -> DataBlock(j) + uint32_t real_ref_cnt = 0, secondary_ref_cnt = 0; + // cidx_inner_key_pair can get the clustered index + for (const auto& kv : inner_index_key_map) { + auto inner_index = table_index_.GetInnerIndex(kv.first); + if (!inner_index) { + return absl::InvalidArgumentError(absl::StrCat(id_, ".", pid_, ": invalid inner index pos ", kv.first)); + } + std::map ts_map; + for (const auto& index_def : inner_index->GetIndex()) { + if (!index_def->IsReady()) { + continue; + } + auto ts_col = index_def->GetTsColumn(); + if (ts_col) { + int64_t ts = 0; + if (ts_col->IsAutoGenTs()) { + // clustered index still use current time to ttl and delete iter, we'll check time series size if ts is auto gen + ts = time; + } else if (decoder->GetInteger(data, ts_col->GetId(), ts_col->GetType(), &ts) != 0) { + return absl::InvalidArgumentError(absl::StrCat(id_, ".", pid_, ": get ts failed")); + } + if (ts < 0) { + return absl::InvalidArgumentError(absl::StrCat(id_, ".", pid_, ": ts is negative ", ts)); + } + // TODO(hw): why uint32_t to int32_t? + ts_map.emplace(ts_col->GetId(), ts); + + if (index_def->IsSecondaryIndex()) { + secondary_ref_cnt++; + } else { + real_ref_cnt++; + } + if (index_def->IsClusteredIndex()) { + clustered_tsv = ts; + } + } + } + if (!ts_map.empty()) { + ts_value_map.emplace(kv.first, std::move(ts_map)); + } + } + if (ts_value_map.empty()) { + return absl::InvalidArgumentError(absl::StrCat(id_, ".", pid_, ": empty ts value map")); + } + // it's ok to have no clustered/covering put or no secondary put, put will be applyed on other pid + // but if no clustered/covering put and no secondary put, it's invalid, check it in put-loop + DataBlock* cblock = nullptr; + DataBlock* sblock = nullptr; + if (real_ref_cnt > 0) { + cblock = new DataBlock(real_ref_cnt, value.c_str(), value.length()); // hard copy + } + if (secondary_ref_cnt > 0) { + // dimensions may not contain cidx, but we need cidx pkeys+pts for secondary index + // if contains, just use the key; if not, extract from value + if (cidx_inner_key_pair.first == -1) { + DLOG(INFO) << "cidx not in dimensions, extract from value"; + auto cidx = table_index_.GetIndex(0); + auto hint = MakePkeysHint(table_meta_->column_desc(), table_meta_->column_key(0)); + if (hint.empty()) { + return absl::InvalidArgumentError(absl::StrCat(id_, ".", pid_, ": cidx pkeys hint empty")); + } + cidx_inner_key_pair.second = + ExtractPkeys(table_meta_->column_key(0), (int8_t*)value.c_str(), *decoder, hint); + if (cidx_inner_key_pair.second.empty()) { + return absl::InvalidArgumentError(absl::StrCat(id_, ".", pid_, ": cidx pkeys+pts extract failed")); + } + DLOG_ASSERT(!clustered_tsv) << "clustered ts should not be set too"; + auto ts_col = cidx->GetTsColumn(); + if (!ts_col) { + return absl::InvalidArgumentError(absl::StrCat(id_, ".", pid_, ":no ts column in cidx")); + } + int64_t ts = 0; + if (ts_col->IsAutoGenTs()) { + // clustered index still use current time to ttl and delete iter, we'll check time series size if ts is auto gen + ts = time; + } else if (decoder->GetInteger(data, ts_col->GetId(), ts_col->GetType(), &ts) != 0) { + return absl::InvalidArgumentError(absl::StrCat(id_, ".", pid_, ": get ts failed")); + } + if (ts < 0) { + return absl::InvalidArgumentError(absl::StrCat(id_, ".", pid_, ": ts is negative ", ts)); + } + clustered_tsv = ts; + } + auto pkeys_pts = PackPkeysAndPts(cidx_inner_key_pair.second, clustered_tsv.value()); + if (GetCompressType() == type::kSnappy) { // sidx iterator will uncompress when getting pkeys+pts + std::string val; + ::snappy::Compress(pkeys_pts.c_str(), pkeys_pts.length(), &val); + sblock = new DataBlock(secondary_ref_cnt, val.c_str(), val.length()); + } else { + sblock = new DataBlock(secondary_ref_cnt, pkeys_pts.c_str(), pkeys_pts.length()); // hard copy + } + } + DLOG(INFO) << "put iot table " << id_ << "." << pid_ << " key+ts " << cidx_inner_key_pair.second << " - " + << (clustered_tsv ? std::to_string(clustered_tsv.value()) : "-1") << ", real ref cnt " << real_ref_cnt + << " secondary ref cnt " << secondary_ref_cnt; + + for (const auto& kv : inner_index_key_map) { + auto iter = ts_value_map.find(kv.first); + if (iter == ts_value_map.end()) { + continue; + } + uint32_t seg_idx = SegIdx(kv.second.ToString()); + auto iot_segment = dynamic_cast(GetSegment(kv.first, seg_idx)); + // TODO(hw): put if absent unsupportted + if (put_if_absent) { + return absl::InvalidArgumentError(absl::StrCat(id_, ".", pid_, ": iot put if absent is not supported")); + } + // clustered segment should be dedup and update will trigger all index update(impl in cli router) + if (!iot_segment->Put(kv.second, iter->second, cblock, sblock, false)) { + return absl::AlreadyExistsError("data exists"); // let caller know exists + } + } + record_byte_size_.fetch_add(GetRecordSize(value.length())); + return absl::OkStatus(); +} + +absl::Status IndexOrganizedTable::CheckDataExists(uint64_t tsv, const Dimensions& dimensions) { + // get cidx dim + if (dimensions.empty()) { + return absl::InvalidArgumentError(absl::StrCat(id_, ".", pid_, ": empty dimension")); + } + // inner index pos: -1 means invalid, so it's positive in inner_index_key_map + std::pair cidx_inner_key_pair{-1, ""}; + for (auto iter = dimensions.begin(); iter != dimensions.end(); iter++) { + int32_t inner_pos = table_index_.GetInnerIndexPos(iter->idx()); + if (inner_pos < 0) { + return absl::InvalidArgumentError(absl::StrCat(id_, ".", pid_, ": invalid dimension idx ", iter->idx())); + } + if (iter->idx() == 0) { + cidx_inner_key_pair = {inner_pos, iter->key()}; + } + } + if (cidx_inner_key_pair.first == -1) { + return absl::InvalidArgumentError(absl::StrCat(id_, ".", pid_, ": cidx not found")); + } + auto cidx = table_index_.GetIndex(0); + if (!cidx->IsReady()) { + return absl::InvalidArgumentError(absl::StrCat(id_, ".", pid_, ": cidx is not ready")); + } + auto ts_col = cidx->GetTsColumn(); + if (!ts_col) { + return absl::InvalidArgumentError(absl::StrCat(id_, ".", pid_, ": no ts column")); + } + DLOG(INFO) << "check iot table " << id_ << "." << pid_ << " key+ts " << cidx_inner_key_pair.second << " - " << tsv + << ", on index " << cidx->GetName() << " ts col " << ts_col->GetId(); + + uint32_t seg_idx = SegIdx(cidx_inner_key_pair.second); + auto iot_segment = dynamic_cast(GetSegment(cidx_inner_key_pair.first, seg_idx)); + // ts id -> ts value + return iot_segment->CheckKeyExists(cidx_inner_key_pair.second, {{ts_col->GetId(), tsv}}); +} + +// , error if empty +std::map> IndexOrganizedTable::MakePkeysHint( + const codec::Schema& schema, const common::ColumnKey& cidx_ck) { + if (cidx_ck.col_name().empty()) { + LOG(WARNING) << "empty cidx column key"; + return {}; + } + // pkey col idx in row + std::set pkey_set; + for (int i = 0; i < cidx_ck.col_name().size(); i++) { + pkey_set.insert(cidx_ck.col_name().Get(i)); + } + if (pkey_set.empty()) { + LOG(WARNING) << "empty pkey set"; + return {}; + } + if (pkey_set.size() != static_cast::size_type>(cidx_ck.col_name().size())) { + LOG(WARNING) << "pkey set size not equal to cidx pkeys size"; + return {}; + } + std::map> col_idx; + for (int i = 0; i < schema.size(); i++) { + if (pkey_set.find(schema.Get(i).name()) != pkey_set.end()) { + col_idx[schema.Get(i).name()] = {i, schema.Get(i).data_type()}; + } + } + if (col_idx.size() != pkey_set.size()) { + LOG(WARNING) << "col idx size not equal to cidx pkeys size"; + return {}; + } + return col_idx; +} + +// error if empty +std::string IndexOrganizedTable::MakeDeleteSQL( + const std::string& db, const std::string& name, const common::ColumnKey& cidx_ck, const int8_t* values, uint64_t ts, + const codec::RowView& row_view, const std::map>& col_idx) { + auto sql_prefix = absl::StrCat("delete from ", db, ".", name, " where "); + std::string cond; + for (int i = 0; i < cidx_ck.col_name().size(); i++) { + // append primary keys, pkeys in dimension are encoded, so we should get them from raw value + // split can't work if string has `|` + auto& col_name = cidx_ck.col_name().Get(i); + auto col = col_idx.find(col_name); + if (col == col_idx.end()) { + LOG(WARNING) << "col " << col_name << " not found in col idx"; + return ""; + } + std::string val; + row_view.GetStrValue(values, col->second.first, &val); + if (!cond.empty()) { + absl::StrAppend(&cond, " and "); + } + // TODO(hw): string should add quotes how about timestamp? + // check existence before, so here we skip + absl::StrAppend(&cond, col_name); + if (auto t = col->second.second; t == type::kVarchar || t == type::kString) { + absl::StrAppend(&cond, "=\"", val, "\""); + } else { + absl::StrAppend(&cond, "=", val); + } + } + // ts must be integer, won't be string + if (!cidx_ck.ts_name().empty() && cidx_ck.ts_name() != storage::DEFAULT_TS_COL_NAME) { + if (!cond.empty()) { + absl::StrAppend(&cond, " and "); + } + absl::StrAppend(&cond, cidx_ck.ts_name(), "=", std::to_string(ts)); + } + auto sql = absl::StrCat(sql_prefix, cond, ";"); + // TODO(hw): if delete failed, we can't revert. And if sidx skeys+sts doesn't change, no need to delete and + // then insert + DLOG(INFO) << "delete sql " << sql; + return sql; +} + +// error if empty +std::string IndexOrganizedTable::ExtractPkeys( + const common::ColumnKey& cidx_ck, const int8_t* values, const codec::RowView& row_view, + const std::map>& col_idx) { + // join with | + std::vector pkeys; + for (int i = 0; i < cidx_ck.col_name().size(); i++) { + auto& col_name = cidx_ck.col_name().Get(i); + auto col = col_idx.find(col_name); + if (col == col_idx.end()) { + LOG(WARNING) << "col " << col_name << " not found in col idx"; + return ""; + } + std::string val; + row_view.GetStrValue(values, col->second.first, &val); + pkeys.push_back(val); + } + return absl::StrJoin(pkeys, "|"); +} + +// index gc should try to do ExecuteGc for each waiting segment, but if some segments are gc before, we should release +// them so it will be a little complex +// should run under lock +absl::Status IndexOrganizedTable::ClusteredIndexGCByDelete(const std::shared_ptr& router) { + auto cur_index = table_index_.GetIndex(0); + if (!cur_index) { + return absl::FailedPreconditionError( + absl::StrCat("cidx def is null for ", id_, ".", pid_)); // why index is null? + } + if (!cur_index->IsClusteredIndex()) { + return absl::InternalError(absl::StrCat("cidx is not clustered for ", id_, ".", pid_)); // immpossible + } + if (!cur_index->IsReady()) { + return absl::FailedPreconditionError( + absl::StrCat("cidx is not ready for ", id_, ".", pid_, ", status ", cur_index->GetStatus())); + } + auto& ts_col = cur_index->GetTsColumn(); + // sometimes index def is valid, but ts_col is nullptr? protect it + if (!ts_col) { + return absl::FailedPreconditionError( + absl::StrCat("no ts col of cidx for ", id_, ".", pid_)); // current time ts can be get too + } + // clustered index grep all entries or less to delete(it's simpler to run delete sql) + // not the real gc, so don't change index status + auto i = cur_index->GetId(); + std::map ttl_st_map; + // only set cidx + ttl_st_map.emplace(ts_col->GetId(), *cur_index->GetTTL()); + GCEntryInfo info; // not thread safe + for (uint32_t j = 0; j < seg_cnt_; j++) { + uint64_t seg_gc_time = ::baidu::common::timer::get_micros() / 1000; + Segment* segment = segments_[i][j]; + auto iot_segment = dynamic_cast(segment); + iot_segment->GrepGCEntry(ttl_st_map, &info); + seg_gc_time = ::baidu::common::timer::get_micros() / 1000 - seg_gc_time; + PDLOG(INFO, "grep cidx segment[%u][%u] gc entries done consumed %lu for table %s tid %u pid %u", i, j, + seg_gc_time, name_.c_str(), id_, pid_); + } + // delete entries by sql + if (info.Size() > 0) { + LOG(INFO) << "delete cidx " << info.Size() << " entries by sql"; + + auto meta = GetTableMeta(); + auto cols = meta->column_desc(); // copy + codec::RowView row_view(cols); + auto hint = MakePkeysHint(cols, meta->column_key(0)); + if (hint.empty()) { + return absl::InternalError("make pkeys hint failed"); + } + for (size_t i = 0; i < info.Size(); i++) { + auto& keys_ts = info.GetEntries()[i]; + auto values = keys_ts.second; // get pkeys from values + auto ts = keys_ts.first; + auto sql = + MakeDeleteSQL(GetDB(), GetName(), meta->column_key(0), (int8_t*)values->data, ts, row_view, hint); + // TODO(hw): if delete failed, we can't revert. And if sidx skeys+sts doesn't change, no need to delete and + // then insert + DLOG(INFO) << "delete sql " << sql; + if (sql.empty()) { + return absl::InternalError("make delete sql failed"); + } + hybridse::sdk::Status status; + router->ExecuteSQL(sql, &status); + if (!status.IsOK()) { + return absl::InternalError("execute sql failed " + status.ToString()); + } + } + } + + return absl::OkStatus(); +} + +// TODO(hw): don't refactor with MemTable, make MemTable stable +void IndexOrganizedTable::SchedGCByDelete(const std::shared_ptr& router) { + std::lock_guard lock(gc_lock_); + uint64_t consumed = ::baidu::common::timer::get_micros(); + if (!enable_gc_.load(std::memory_order_relaxed)) { + LOG(INFO) << "iot table " << name_ << "[" << id_ << "." << pid_ << "] gc disabled"; + return; + } + LOG(INFO) << "iot table " << name_ << "[" << id_ << "." << pid_ << "] start making gc"; + // gc cidx first, it'll delete on all indexes + auto st = ClusteredIndexGCByDelete(router); + if (!st.ok()) { + LOG(WARNING) << "cidx gc by delete error: " << st.ToString(); + } + // TODO(hw): don't gc sidx or covering index? + // may core on GcFreeList + // but record cnt in segment and tablet status can't change if no gc or free + // for all index, only do free? don't do gc TODO how to check the record cnt? + uint64_t gc_idx_cnt = 0; + uint64_t gc_record_byte_size = 0; + auto inner_indexs = table_index_.GetAllInnerIndex(); + for (uint32_t i = 0; i < inner_indexs->size(); i++) { + const std::vector>& real_index = inner_indexs->at(i)->GetIndex(); + std::map ttl_st_map; + bool need_gc = true; + size_t deleted_num = 0; + std::vector deleting_pos; + for (size_t pos = 0; pos < real_index.size(); pos++) { + auto cur_index = real_index[pos]; + auto ts_col = cur_index->GetTsColumn(); + if (ts_col) { + ttl_st_map.emplace(ts_col->GetId(), *(cur_index->GetTTL())); + } + if (cur_index->GetStatus() == IndexStatus::kWaiting) { + cur_index->SetStatus(IndexStatus::kDeleting); + need_gc = false; + } else if (cur_index->GetStatus() == IndexStatus::kDeleting) { + deleting_pos.push_back(pos); + } else if (cur_index->GetStatus() == IndexStatus::kDeleted) { + deleted_num++; + } + } + if (!deleting_pos.empty()) { + if (segments_[i] != nullptr) { + for (uint32_t k = 0; k < seg_cnt_; k++) { + if (segments_[i][k] != nullptr) { + StatisticsInfo statistics_info(segments_[i][k]->GetTsCnt()); + if (real_index.size() == 1 || deleting_pos.size() + deleted_num == real_index.size()) { + segments_[i][k]->ReleaseAndCount(&statistics_info); + } else { + segments_[i][k]->ReleaseAndCount(deleting_pos, &statistics_info); + } + gc_idx_cnt += statistics_info.GetTotalCnt(); + gc_record_byte_size += statistics_info.record_byte_size; + } + } + } + for (auto pos : deleting_pos) { + real_index[pos]->SetStatus(IndexStatus::kDeleted); + } + deleted_num += deleting_pos.size(); + } + if (!need_gc) { + continue; + } + // skip cidx gc in segment, gcfreelist shouldn't be skiped, so we don't change the condition + if (deleted_num == real_index.size() || ttl_st_map.empty()) { + continue; + } + for (uint32_t j = 0; j < seg_cnt_; j++) { + uint64_t seg_gc_time = ::baidu::common::timer::get_micros() / 1000; + auto segment = dynamic_cast(segments_[i][j]); + StatisticsInfo statistics_info(segment->GetTsCnt()); + segment->IncrGcVersion(); + segment->GcFreeList(&statistics_info); + // don't gc in cidx, it's not a good way to impl, refactor later + segment->ExecuteGc(ttl_st_map, &statistics_info, segment->ClusteredTs()); + gc_idx_cnt += statistics_info.GetTotalCnt(); + gc_record_byte_size += statistics_info.record_byte_size; + seg_gc_time = ::baidu::common::timer::get_micros() / 1000 - seg_gc_time; + PDLOG(INFO, "gc segment[%u][%u] done consumed %lu for table %s tid %u pid %u", i, j, seg_gc_time, + name_.c_str(), id_, pid_); + } + } + consumed = ::baidu::common::timer::get_micros() - consumed; + record_byte_size_.fetch_sub(gc_record_byte_size, std::memory_order_relaxed); + UpdateTTL(); + LOG(INFO) << "iot table " << name_ << "[" << id_ << "." << pid_ << "] gc and update ttl done: " << consumed / 1000 + << " ms, total gc cnt " << gc_idx_cnt; +} + +bool IndexOrganizedTable::AddIndexToTable(const std::shared_ptr& index_def) { + std::vector ts_vec = {index_def->GetTsColumn()->GetId()}; + uint32_t inner_id = index_def->GetInnerPos(); + Segment** seg_arr = new Segment*[seg_cnt_]; + for (uint32_t j = 0; j < seg_cnt_; j++) { + seg_arr[j] = new IOTSegment(FLAGS_absolute_default_skiplist_height, ts_vec, {index_def->GetIndexType()}); + LOG(INFO) << "init iot segment inner_ts" << inner_id << "." << j << " for table " << name_ << "[" << id_ << "." + << pid_ << "], height " << FLAGS_absolute_default_skiplist_height << ", ts col num " << ts_vec.size() + << ", type " << IndexType_Name(index_def->GetIndexType()); + } + segments_[inner_id] = seg_arr; + return true; +} + +} // namespace openmldb::storage diff --git a/src/storage/index_organized_table.h b/src/storage/index_organized_table.h new file mode 100644 index 00000000000..a037c8cf05a --- /dev/null +++ b/src/storage/index_organized_table.h @@ -0,0 +1,77 @@ +/* + * Copyright 2021 4Paradigm + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SRC_STORAGE_INDEX_ORGANIZED_TABLE_H_ +#define SRC_STORAGE_INDEX_ORGANIZED_TABLE_H_ + +#include + +#include "catalog/tablet_catalog.h" +#include "storage/mem_table.h" + +namespace openmldb::storage { + +class IndexOrganizedTable : public MemTable { + public: + IndexOrganizedTable(const ::openmldb::api::TableMeta& table_meta, std::shared_ptr catalog) + : MemTable(table_meta), catalog_(catalog) {} + + TableIterator* NewIterator(uint32_t index, const std::string& pk, Ticket& ticket) override; + + TraverseIterator* NewTraverseIterator(uint32_t index) override; + + ::hybridse::vm::WindowIterator* NewWindowIterator(uint32_t index) override; + + bool Init() override; + + bool Put(const std::string& pk, uint64_t time, const char* data, uint32_t size) override; + + absl::Status Put(uint64_t time, const std::string& value, const Dimensions& dimensions, + bool put_if_absent) override; + + absl::Status CheckDataExists(uint64_t tsv, const Dimensions& dimensions); + + // TODO(hw): iot bulk load unsupported + bool GetBulkLoadInfo(::openmldb::api::BulkLoadInfoResponse* response) { return false; } + bool BulkLoad(const std::vector& data_blocks, + const ::google::protobuf::RepeatedPtrField<::openmldb::api::BulkLoadIndex>& indexes) { + return false; + } + bool AddIndexToTable(const std::shared_ptr& index_def) override; + + void SchedGCByDelete(const std::shared_ptr& router); + + static std::map> MakePkeysHint(const codec::Schema& schema, + const common::ColumnKey& cidx_ck); + static std::string MakeDeleteSQL(const std::string& db, const std::string& name, const common::ColumnKey& cidx_ck, + const int8_t* values, uint64_t ts, const codec::RowView& row_view, + const std::map>& col_idx); + static std::string ExtractPkeys(const common::ColumnKey& cidx_ck, const int8_t* values, + const codec::RowView& row_view, + const std::map>& col_idx); + + private: + absl::Status ClusteredIndexGCByDelete(const std::shared_ptr& router); + + private: + // to get current distribute iterator + std::shared_ptr catalog_; + + std::mutex gc_lock_; +}; +} // namespace openmldb::storage + +#endif diff --git a/src/storage/iot_segment.cc b/src/storage/iot_segment.cc new file mode 100644 index 00000000000..13cf1e55bef --- /dev/null +++ b/src/storage/iot_segment.cc @@ -0,0 +1,411 @@ +/* + * Copyright 2021 4Paradigm + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "storage/iot_segment.h" + +#include "iot_segment.h" + +namespace openmldb::storage { +base::Slice RowToSlice(const ::hybridse::codec::Row& row) { + butil::IOBuf buf; + size_t size; + if (codec::EncodeRpcRow(row, &buf, &size)) { + auto r = new char[buf.size()]; + buf.copy_to(r); // TODO(hw): don't copy, move it to slice + // slice own the new r + return {r, size, true}; + } + LOG(WARNING) << "convert row to slice failed"; + return {}; +} + +std::string PackPkeysAndPts(const std::string& pkeys, uint64_t pts) { + std::string buf; + uint32_t pkeys_size = pkeys.size(); + buf.append(reinterpret_cast(&pkeys_size), sizeof(uint32_t)); + buf.append(pkeys); + buf.append(reinterpret_cast(&pts), sizeof(uint64_t)); + return buf; +} + +bool UnpackPkeysAndPts(const std::string& block, std::string* pkeys, uint64_t* pts) { + DLOG_ASSERT(block.size() >= sizeof(uint32_t) + sizeof(uint64_t)) << "block size is " << block.size(); + uint32_t offset = 0; + uint32_t pkeys_size = *reinterpret_cast(block.data() + offset); + offset += sizeof(uint32_t); + pkeys->assign(block.data() + offset, pkeys_size); + offset += pkeys_size; + *pts = *reinterpret_cast(block.data() + offset); + DLOG_ASSERT(offset + sizeof(uint64_t) == block.size()) + << "offset is " << offset << " block size is " << block.size(); + return true; +} + +// put_if_absent unsupported, iot table will reject put, no need to check here, just ignore +bool IOTSegment::PutUnlock(const Slice& key, uint64_t time, DataBlock* row, bool put_if_absent, bool auto_gen_ts) { + void* entry = nullptr; + uint32_t byte_size = 0; + // one key just one entry + int ret = entries_->Get(key, entry); + if (ret < 0 || entry == nullptr) { + char* pk = new char[key.size()]; + memcpy(pk, key.data(), key.size()); + // need to delete memory when free node + Slice skey(pk, key.size()); + entry = reinterpret_cast(new KeyEntry(key_entry_max_height_)); + uint8_t height = entries_->Insert(skey, entry); + byte_size += GetRecordPkIdxSize(height, key.size(), key_entry_max_height_); + pk_cnt_.fetch_add(1, std::memory_order_relaxed); + // no need to check if absent when first put + } else if (IsClusteredTs(ts_idx_map_.begin()->first)) { + // if cidx and key match, check ts -> insert or update + if (auto_gen_ts) { + // cidx(keys) has just one entry for one keys, so if keys exists, needs delete + LOG_IF(ERROR, reinterpret_cast(entry)->entries.GetSize() > 1) + << "cidx keys has more than one entry, " << reinterpret_cast(entry)->entries.GetSize(); + // TODO(hw): client will delete old row, so if pkeys exists when auto ts, fail it + return false; + } else { + // cidx(keys+ts) check if ts match + if (ListContains(reinterpret_cast(entry), time, row, false)) { + LOG(WARNING) << "key " << key.ToString() << " ts " << time << " exists in cidx"; + return false; + } + } + } + + idx_cnt_vec_[0]->fetch_add(1, std::memory_order_relaxed); + uint8_t height = reinterpret_cast(entry)->entries.Insert(time, row); + reinterpret_cast(entry)->count_.fetch_add(1, std::memory_order_relaxed); + byte_size += GetRecordTsIdxSize(height); + idx_byte_size_.fetch_add(byte_size, std::memory_order_relaxed); + return true; +} + +bool IOTSegment::Put(const Slice& key, const std::map& ts_map, DataBlock* cblock, DataBlock* sblock, + bool put_if_absent) { + if (ts_map.empty()) { + return false; + } + if (ts_cnt_ == 1) { + bool ret = false; + if (auto pos = ts_map.find(ts_idx_map_.begin()->first); pos != ts_map.end()) { + // TODO(hw): why ts_map key is int32_t, default ts is uint32_t? + ret = Segment::Put(key, pos->second, + (index_types_[ts_idx_map_.begin()->second] == common::kSecondary ? sblock : cblock), + false, pos->first == DEFAULT_TS_COL_ID); + } + return ret; + } + void* entry_arr = nullptr; + std::lock_guard lock(mu_); + for (const auto& kv : ts_map) { + uint32_t byte_size = 0; + auto pos = ts_idx_map_.find(kv.first); + if (pos == ts_idx_map_.end()) { + continue; + } + if (entry_arr == nullptr) { + int ret = entries_->Get(key, entry_arr); + if (ret < 0 || entry_arr == nullptr) { + char* pk = new char[key.size()]; + memcpy(pk, key.data(), key.size()); + Slice skey(pk, key.size()); + KeyEntry** entry_arr_tmp = new KeyEntry*[ts_cnt_]; + for (uint32_t i = 0; i < ts_cnt_; i++) { + entry_arr_tmp[i] = new KeyEntry(key_entry_max_height_); + } + entry_arr = reinterpret_cast(entry_arr_tmp); + uint8_t height = entries_->Insert(skey, entry_arr); + byte_size += GetRecordPkMultiIdxSize(height, key.size(), key_entry_max_height_, ts_cnt_); + pk_cnt_.fetch_add(1, std::memory_order_relaxed); + } + } + auto entry = reinterpret_cast(entry_arr)[pos->second]; + auto auto_gen_ts = (pos->first == DEFAULT_TS_COL_ID); + auto pblock = (index_types_[pos->second] == common::kSecondary ? sblock : cblock); + if (IsClusteredTs(pos->first)) { + // if cidx and key match, check ts -> insert or update + if (auto_gen_ts) { + // cidx(keys) has just one entry for one keys, so if keys exists, needs delete + LOG_IF(ERROR, reinterpret_cast(entry)->entries.GetSize() > 1) + << "cidx keys has more than one entry, " << reinterpret_cast(entry)->entries.GetSize(); + // TODO(hw): client will delete old row, so if pkeys exists when auto ts, fail it + if (reinterpret_cast(entry)->entries.GetSize() > 0) { + LOG(WARNING) << "key " << key.ToString() << " exists in cidx"; + return false; + } + } else { + // cidx(keys+ts) check if ts match + if (ListContains(reinterpret_cast(entry), kv.second, pblock, false)) { + LOG(WARNING) << "key " << key.ToString() << " ts " << kv.second << " exists in cidx"; + return false; + } + } + } + uint8_t height = entry->entries.Insert(kv.second, pblock); + entry->count_.fetch_add(1, std::memory_order_relaxed); + byte_size += GetRecordTsIdxSize(height); + idx_byte_size_.fetch_add(byte_size, std::memory_order_relaxed); + idx_cnt_vec_[pos->second]->fetch_add(1, std::memory_order_relaxed); + } + return true; +} + +absl::Status IOTSegment::CheckKeyExists(const Slice& key, const std::map& ts_map) { + // check lock + void* entry_arr = nullptr; + std::lock_guard lock(mu_); // need shrink? + int ret = entries_->Get(key, entry_arr); + if (ret < 0 || entry_arr == nullptr) { + return absl::NotFoundError("key not found"); + } + if (ts_map.size() != 1) { + return absl::InvalidArgumentError("ts map size is not 1"); + } + auto idx_ts = ts_map.begin(); + auto pos = ts_idx_map_.find(idx_ts->first); + if (pos == ts_idx_map_.end()) { + return absl::InvalidArgumentError("ts not found"); + } + // be careful, ts id in arg maybe negative cuz it's int32, but id in member is uint32 + if (!IsClusteredTs(idx_ts->first)) { + LOG(WARNING) << "idx_ts->first " << idx_ts->first << " is not clustered ts " + << (clustered_ts_id_.has_value() ? std::to_string(clustered_ts_id_.value()) : "no"); + return absl::InvalidArgumentError("ts is not clustered"); + } + KeyEntry* entry = nullptr; + if (ts_cnt_ == 1) { + LOG_IF(ERROR, pos->second != 0) << "when ts cnt == 1, pos second is " << pos->second; + entry = reinterpret_cast(entry_arr); + } else { + entry = reinterpret_cast(entry_arr)[pos->second]; + } + + if (entry == nullptr) { + return absl::NotFoundError("ts entry not found"); + } + auto auto_gen_ts = (idx_ts->first == DEFAULT_TS_COL_ID); + if (auto_gen_ts) { + // cidx(keys) has just one entry for one keys, so if keys exists, needs delete + DLOG_ASSERT(reinterpret_cast(entry)->entries.GetSize() == 1) << "cidx keys has more than one entry"; + if (reinterpret_cast(entry)->entries.GetSize() > 0) { + return absl::AlreadyExistsError("key exists: " + key.ToString()); + } + } else { + // don't use listcontains, we don't need to check value, just check if time exists + storage::DataBlock* v = nullptr; + if (entry->entries.Get(idx_ts->second, v) == 0) { + return absl::AlreadyExistsError(absl::StrCat("key+ts exists: ", key.ToString(), ", ts ", idx_ts->second)); + } + } + + return absl::NotFoundError("ts not found"); +} +// TODO(hw): when add lock? ref segment, don't lock iter +void IOTSegment::GrepGCEntry(const std::map& ttl_st_map, GCEntryInfo* gc_entry_info) { + if (ttl_st_map.empty()) { + DLOG(INFO) << "ttl map is empty, skip gc"; + return; + } + + bool need_gc = false; + for (const auto& kv : ttl_st_map) { + if (ts_idx_map_.find(kv.first) == ts_idx_map_.end()) { + LOG(WARNING) << "ts idx " << kv.first << " not found"; + return; + } + if (kv.second.NeedGc()) { + need_gc = true; + } + } + if (!need_gc) { + DLOG(INFO) << "no need gc, skip gc"; + return; + } + GrepGCAllType(ttl_st_map, gc_entry_info); +} + +void GrepGC4Abs(KeyEntry* entry, const Slice& key, const TTLSt& ttl, uint64_t cur_time, uint64_t ttl_offset, + GCEntryInfo* gc_entry_info) { + if (ttl.abs_ttl == 0) { + return; // never expire + } + uint64_t expire_time = cur_time - ttl_offset - ttl.abs_ttl; + std::unique_ptr iter(entry->entries.NewIterator()); + iter->Seek(expire_time); + // delete (expire, last] + while (iter->Valid()) { + if (iter->GetKey() > expire_time) { + break; + } + // expire_time has offset, so we don't need to check if equal + // if (iter->GetKey() == expire_time) { + // continue; // save ==, don't gc + // } + gc_entry_info->AddEntry(key, iter->GetKey(), iter->GetValue()); + if (gc_entry_info->Full()) { + LOG(INFO) << "gc entry info full, stop gc grep"; + return; + } + iter->Next(); + } +} + +void GrepGC4Lat(KeyEntry* entry, const Slice& key, const TTLSt& ttl, GCEntryInfo* gc_entry_info) { + auto keep_cnt = ttl.lat_ttl; + if (keep_cnt == 0) { + return; // never exipre + } + + std::unique_ptr iter(entry->entries.NewIterator()); + iter->SeekToFirst(); + while (iter->Valid()) { + if (keep_cnt > 0) { + keep_cnt--; + } else { + gc_entry_info->AddEntry(key, iter->GetKey(), iter->GetValue()); + } + if (gc_entry_info->Full()) { + LOG(INFO) << "gc entry info full, stop gc grep"; + return; + } + iter->Next(); + } +} + +void GrepGC4AbsAndLat(KeyEntry* entry, const Slice& key, const TTLSt& ttl, uint64_t cur_time, uint64_t ttl_offset, + GCEntryInfo* gc_entry_info) { + if (ttl.abs_ttl == 0 || ttl.lat_ttl == 0) { + return; // never exipre + } + // keep both + uint64_t expire_time = cur_time - ttl_offset - ttl.abs_ttl; + auto keep_cnt = ttl.lat_ttl; + std::unique_ptr iter(entry->entries.NewIterator()); + iter->SeekToFirst(); + // if > lat cnt and < expire, delete + while (iter->Valid()) { + if (keep_cnt > 0) { + keep_cnt--; + } else if (iter->GetKey() < expire_time) { + gc_entry_info->AddEntry(key, iter->GetKey(), iter->GetValue()); + } + if (gc_entry_info->Full()) { + LOG(INFO) << "gc entry info full, stop gc grep"; + return; + } + iter->Next(); + } +} +void GrepGC4AbsOrLat(KeyEntry* entry, const Slice& key, const TTLSt& ttl, uint64_t cur_time, uint64_t ttl_offset, + GCEntryInfo* gc_entry_info) { + if (ttl.abs_ttl == 0 && ttl.lat_ttl == 0) { + return; + } + if (ttl.abs_ttl == 0) { + // == lat ttl + GrepGC4Lat(entry, key, ttl, gc_entry_info); + return; + } + if (ttl.lat_ttl == 0) { + GrepGC4Abs(entry, key, ttl, cur_time, ttl_offset, gc_entry_info); + return; + } + uint64_t expire_time = cur_time - ttl_offset - ttl.abs_ttl; + auto keep_cnt = ttl.lat_ttl; + std::unique_ptr iter(entry->entries.NewIterator()); + iter->SeekToFirst(); + // if > keep cnt or < expire time, delete + while (iter->Valid()) { + if (keep_cnt > 0) { + keep_cnt--; // safe + } else { + gc_entry_info->AddEntry(key, iter->GetKey(), iter->GetValue()); + iter->Next(); + continue; + } + if (iter->GetKey() < expire_time) { + gc_entry_info->AddEntry(key, iter->GetKey(), iter->GetValue()); + } + if (gc_entry_info->Full()) { + LOG(INFO) << "gc entry info full, stop gc grep"; + return; + } + iter->Next(); + } +} + +// actually only one ttl for cidx, clean up later +void IOTSegment::GrepGCAllType(const std::map& ttl_st_map, GCEntryInfo* gc_entry_info) { + uint64_t consumed = ::baidu::common::timer::get_micros(); + uint64_t cur_time = consumed / 1000; + std::unique_ptr it(entries_->NewIterator()); + it->SeekToFirst(); + while (it->Valid()) { + KeyEntry** entry_arr = reinterpret_cast(it->GetValue()); + Slice key = it->GetKey(); + it->Next(); + for (const auto& kv : ttl_st_map) { + DLOG(INFO) << "key " << key.ToString() << ", ts idx " << kv.first << ", ttl " << kv.second.ToString() + << ", ts_cnt_ " << ts_cnt_; + if (!kv.second.NeedGc()) { + continue; + } + auto pos = ts_idx_map_.find(kv.first); + if (pos == ts_idx_map_.end() || pos->second >= ts_cnt_) { + LOG(WARNING) << "gc ts idx " << kv.first << " not found"; + continue; + } + KeyEntry* entry = nullptr; + // time series :[(ts, row), ...], so get key means get ts + if (ts_cnt_ == 1) { + LOG_IF(DFATAL, pos->second != 0) << "when ts cnt == 1, pos second is " << pos->second; + entry = reinterpret_cast(entry_arr); + } else { + entry = entry_arr[pos->second]; + } + if (entry == nullptr) { + DLOG(DFATAL) << "entry is null, impossible"; + continue; + } + entry->GetCount(); // for test + switch (kv.second.ttl_type) { + case ::openmldb::storage::TTLType::kAbsoluteTime: { + GrepGC4Abs(entry, key, kv.second, cur_time, ttl_offset_, gc_entry_info); + break; + } + case ::openmldb::storage::TTLType::kLatestTime: { + GrepGC4Lat(entry, key, kv.second, gc_entry_info); + break; + } + case ::openmldb::storage::TTLType::kAbsAndLat: { + GrepGC4AbsAndLat(entry, key, kv.second, cur_time, ttl_offset_, gc_entry_info); + break; + } + case ::openmldb::storage::TTLType::kAbsOrLat: { + GrepGC4AbsOrLat(entry, key, kv.second, cur_time, ttl_offset_, gc_entry_info); + break; + } + default: + return; + } + } + } + DLOG(INFO) << "[GC ts map] iot segment gc consumed " << (::baidu::common::timer::get_micros() - consumed) / 1000 + << "ms, gc entry size " << gc_entry_info->Size(); +} +} // namespace openmldb::storage diff --git a/src/storage/iot_segment.h b/src/storage/iot_segment.h new file mode 100644 index 00000000000..01b63ca7f84 --- /dev/null +++ b/src/storage/iot_segment.h @@ -0,0 +1,304 @@ +/* + * Copyright 2021 4Paradigm + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SRC_STORAGE_IOT_SEGMENT_H_ +#define SRC_STORAGE_IOT_SEGMENT_H_ + +#include "catalog/tablet_catalog.h" +#include "codec/row_codec.h" +#include "codec/row_iterator.h" +#include "codec/sql_rpc_row_codec.h" +#include "storage/mem_table_iterator.h" +#include "storage/segment.h" +#include "storage/table.h" // for storage::Schema + +DECLARE_uint32(cidx_gc_max_size); + +namespace openmldb::storage { + +base::Slice RowToSlice(const ::hybridse::codec::Row& row); + +// [pkeys_size, pkeys, pts_size, ts_id, tsv, ...] +std::string PackPkeysAndPts(const std::string& pkeys, uint64_t pts); +bool UnpackPkeysAndPts(const std::string& block, std::string* pkeys, uint64_t* pts); + +// secondary index iterator +// GetValue will lookup, and it may trigger rpc +class IOTIterator : public MemTableIterator { + public: + IOTIterator(TimeEntries::Iterator* it, type::CompressType compress_type, + std::unique_ptr<::hybridse::codec::WindowIterator> cidx_iter) + : MemTableIterator(it, compress_type), cidx_iter_(std::move(cidx_iter)) {} + virtual ~IOTIterator() {} + + openmldb::base::Slice GetValue() const override { + auto pkeys_pts = MemTableIterator::GetValue(); + std::string pkeys; + uint64_t ts; + if (!UnpackPkeysAndPts(pkeys_pts.ToString(), &pkeys, &ts)) { + LOG(WARNING) << "unpack pkeys and pts failed"; + return ""; + } + cidx_iter_->Seek(pkeys); + if (cidx_iter_->Valid()) { + // seek to ts + auto ts_iter = cidx_iter_->GetValue(); + ts_iter->Seek(ts); + if (ts_iter->Valid()) { + return RowToSlice(ts_iter->GetValue()); + } + } + // TODO(hw): Valid() to check row data? what if only one entry invalid? + return ""; + } + + private: + std::unique_ptr<::hybridse::codec::WindowIterator> cidx_iter_; +}; + +class IOTTraverseIterator : public MemTableTraverseIterator { + public: + IOTTraverseIterator(Segment** segments, uint32_t seg_cnt, ::openmldb::storage::TTLType ttl_type, + uint64_t expire_time, uint64_t expire_cnt, uint32_t ts_index, type::CompressType compress_type, + std::unique_ptr<::hybridse::codec::WindowIterator> cidx_iter) + : MemTableTraverseIterator(segments, seg_cnt, ttl_type, expire_time, expire_cnt, ts_index, compress_type), + cidx_iter_(std::move(cidx_iter)) {} + ~IOTTraverseIterator() override {} + + openmldb::base::Slice GetValue() const override { + auto pkeys_pts = MemTableTraverseIterator::GetValue(); + std::string pkeys; + uint64_t ts; + if (!UnpackPkeysAndPts(pkeys_pts.ToString(), &pkeys, &ts)) { + LOG(WARNING) << "unpack pkeys and pts failed"; + return ""; + } + // distribute cidx iter should seek to (key, ts) + DLOG(INFO) << "seek to " << pkeys << ", " << ts; + cidx_iter_->Seek(pkeys); + if (cidx_iter_->Valid()) { + // seek to ts + auto ts_iter_ = cidx_iter_->GetValue(); + ts_iter_->Seek(ts); + if (ts_iter_->Valid()) { + // TODO(hw): hard copy, or hold ts_iter to store value? IOTIterator should be the same. + DLOG(INFO) << "valid, " << ts_iter_->GetValue().ToString(); + return RowToSlice(ts_iter_->GetValue()); + } + } + LOG(WARNING) << "no suitable iter"; + return ""; // won't core, just no row for select? + } + + private: + std::unique_ptr<::hybridse::codec::WindowIterator> cidx_iter_; + std::unique_ptr ts_iter_; +}; + +class IOTWindowIterator : public MemTableWindowIterator { + public: + IOTWindowIterator(TimeEntries::Iterator* it, ::openmldb::storage::TTLType ttl_type, uint64_t expire_time, + uint64_t expire_cnt, type::CompressType compress_type, + std::unique_ptr<::hybridse::codec::WindowIterator> cidx_iter) + : MemTableWindowIterator(it, ttl_type, expire_time, expire_cnt, compress_type), + cidx_iter_(std::move(cidx_iter)) { + DLOG(INFO) << "create IOTWindowIterator"; + } + // for debug + void SetSchema(const codec::Schema& schema, const std::vector& pkeys_idx) { + pkeys_idx_ = pkeys_idx; + row_view_.reset(new codec::RowView(schema)); + } + const ::hybridse::codec::Row& GetValue() override { + auto pkeys_pts = MemTableWindowIterator::GetValue(); + if (pkeys_pts.empty()) { + LOG(WARNING) << "empty pkeys_pts for key " << GetKey(); + return dummy; + } + + // unpack the row and get pkeys+pts + // Row -> cols + std::string pkeys; + uint64_t ts; + if (!UnpackPkeysAndPts(pkeys_pts.ToString(), &pkeys, &ts)) { + LOG(WARNING) << "unpack pkeys and pts failed"; + return dummy; + } + // TODO(hw): what if no ts? it'll be 0 for temp + DLOG(INFO) << "pkeys=" << pkeys << ", ts=" << ts; + cidx_iter_->Seek(pkeys); + if (cidx_iter_->Valid()) { + // seek to ts + DLOG(INFO) << "seek to ts " << ts; + // hold the row iterator to avoid invalidation + cidx_ts_iter_ = std::move(cidx_iter_->GetValue()); + cidx_ts_iter_->Seek(ts); + // must be the same keys+ts + if (cidx_ts_iter_->Valid()) { + // DLOG(INFO) << "valid, is the same value? " << GetKeys(cidx_ts_iter_->GetValue()); + return cidx_ts_iter_->GetValue(); + } + } + // Valid() to check row data? what if only one entry invalid? + return dummy; + } + + private: + std::string GetKeys(const hybridse::codec::Row& pkeys_pts) { + std::string pkeys, key; // RowView Get will assign output, no need to clear + for (auto pkey_idx : pkeys_idx_) { + if (!pkeys.empty()) { + pkeys += "|"; + } + // TODO(hw): if null, append to key? + auto ret = row_view_->GetStrValue(pkeys_pts.buf(), pkey_idx, &key); + if (ret == -1) { + LOG(WARNING) << "get pkey failed"; + return {}; + } + pkeys += key.empty() ? hybridse::codec::EMPTY_STRING : key; + DLOG(INFO) << pkey_idx << "=" << key; + } + return pkeys; + } + + private: + std::unique_ptr<::hybridse::codec::WindowIterator> cidx_iter_; + std::unique_ptr cidx_ts_iter_; + // for debug + std::unique_ptr row_view_; + std::vector pkeys_idx_; + + ::hybridse::codec::Row dummy; +}; + +class IOTKeyIterator : public MemTableKeyIterator { + public: + IOTKeyIterator(Segment** segments, uint32_t seg_cnt, ::openmldb::storage::TTLType ttl_type, uint64_t expire_time, + uint64_t expire_cnt, uint32_t ts_index, type::CompressType compress_type, + std::shared_ptr cidx_handler, const std::string& cidx_name) + : MemTableKeyIterator(segments, seg_cnt, ttl_type, expire_time, expire_cnt, ts_index, compress_type) { + // cidx_iter will be used by RowIterator but it's unique, so create it when get RowIterator + cidx_handler_ = cidx_handler; + cidx_name_ = cidx_name; + } + + ~IOTKeyIterator() override {} + void SetSchema(const std::shared_ptr& schema, + const std::shared_ptr& cidx) { + schema_ = *schema; // copy + // pkeys idx + std::map col_idx_map; + for (int i = 0; i < schema_.size(); i++) { + col_idx_map[schema_[i].name()] = i; + } + pkeys_idx_.clear(); + for (auto pkey : cidx->GetColumns()) { + pkeys_idx_.emplace_back(col_idx_map[pkey.GetName()]); + } + } + ::hybridse::vm::RowIterator* GetRawValue() override { + DLOG(INFO) << "GetRawValue for key " << GetKey().ToString() << ", bind cidx " << cidx_name_; + TimeEntries::Iterator* it = GetTimeIter(); + auto cidx_iter = cidx_handler_->GetWindowIterator(cidx_name_); + auto iter = + new IOTWindowIterator(it, ttl_type_, expire_time_, expire_cnt_, compress_type_, std::move(cidx_iter)); + // iter->SetSchema(schema_, pkeys_idx_); + return iter; + } + + private: + std::shared_ptr cidx_handler_; + std::string cidx_name_; + // test + codec::Schema schema_; + std::vector pkeys_idx_; +}; + +class GCEntryInfo { + public: + typedef std::pair Entry; + ~GCEntryInfo() { + for (auto& entry : entries_) { + entry.second->dim_cnt_down--; + // TODO delete? + } + } + void AddEntry(const Slice& keys, uint64_t ts, storage::DataBlock* ptr) { + // to avoid Block deleted before gc, add ref + ptr->dim_cnt_down++; // TODO(hw): no concurrency? or make sure under lock + entries_.emplace_back(ts, ptr); + } + std::size_t Size() { return entries_.size(); } + std::vector& GetEntries() { return entries_; } + bool Full() { return entries_.size() >= FLAGS_cidx_gc_max_size; } + + private: + // std::vector> entries_; + std::vector entries_; +}; + +class IOTSegment : public Segment { + public: + explicit IOTSegment(uint8_t height) : Segment(height) {} + IOTSegment(uint8_t height, const std::vector& ts_idx_vec, + const std::vector& index_types) + : Segment(height, ts_idx_vec), index_types_(index_types) { + // find clustered ts id + for (uint32_t i = 0; i < ts_idx_vec.size(); i++) { + if (index_types_[i] == common::kClustered) { + clustered_ts_id_ = ts_idx_vec[i]; + break; + } + } + } + ~IOTSegment() override {} + + bool PutUnlock(const Slice& key, uint64_t time, DataBlock* row, bool put_if_absent, bool check_all_time); + bool Put(const Slice& key, const std::map& ts_map, DataBlock* cblock, DataBlock* sblock, + bool put_if_absent = false); + // use ts map to get idx in entry_arr + // no ok status, exists or not found + absl::Status CheckKeyExists(const Slice& key, const std::map& ts_map); + // DEFAULT_TS_COL_ID is uint32_t max, so clsutered_ts_id_ can't have a init value, use std::optional + bool IsClusteredTs(uint32_t ts_id) { + return clustered_ts_id_.has_value() ? (ts_id == clustered_ts_id_.value()) : false; + } + + std::optional ClusteredTs() const { return clustered_ts_id_; } + + void GrepGCEntry(const std::map& ttl_st_map, GCEntryInfo* gc_entry_info); + + MemTableIterator* NewIterator(const Slice& key, Ticket& ticket, type::CompressType compress_type) { // NOLINT + DLOG_ASSERT(false) << "unsupported, let iot table create it"; + return nullptr; + } + MemTableIterator* NewIterator(const Slice& key, uint32_t idx, Ticket& ticket, // NOLINT + type::CompressType compress_type) { + DLOG_ASSERT(false) << "unsupported, let iot table create it"; + return nullptr; + } + + private: + void GrepGCAllType(const std::map& ttl_st_map, GCEntryInfo* gc_entry_info); + + private: + std::vector index_types_; + std::optional clustered_ts_id_; +}; + +} // namespace openmldb::storage +#endif // SRC_STORAGE_IOT_SEGMENT_H_ diff --git a/src/storage/iot_segment_test.cc b/src/storage/iot_segment_test.cc new file mode 100644 index 00000000000..312c92c5a87 --- /dev/null +++ b/src/storage/iot_segment_test.cc @@ -0,0 +1,517 @@ +/* + * Copyright 2021 4Paradigm + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "storage/iot_segment.h" + +#include +#include + +#include "absl/strings/str_cat.h" +#include "base/glog_wrapper.h" +#include "base/slice.h" +#include "gtest/gtest.h" +#include "storage/record.h" + +using ::openmldb::base::Slice; + +namespace openmldb { +namespace storage { + +// iotsegment is not the same with segment, so we need to test it separately +class IOTSegmentTest : public ::testing::Test { + public: + IOTSegmentTest() {} + ~IOTSegmentTest() {} +}; + +TEST_F(IOTSegmentTest, PutAndScan) { + IOTSegment segment(8, {1, 3, 5}, + {common::IndexType::kClustered, common::IndexType::kSecondary, common::IndexType::kCovering}); + Slice pk("test1"); + std::string value = "test0"; + auto cblk = new DataBlock(2, value.c_str(), value.size()); // 1 clustered + 1 covering, hard copy + auto sblk = new DataBlock(1, value.c_str(), value.size()); // 1 secondary, fake value, hard copy + // use the frenquently used Put method + ASSERT_TRUE(segment.Put(pk, {{1, 100}, {3, 300}, {5, 500}}, cblk, sblk)); + // if first one is clustered index, segment put will fail in the first time, no need to revert + ASSERT_FALSE(segment.Put(pk, {{1, 100}}, cblk, sblk)); + ASSERT_FALSE(segment.Put(pk, {{1, 100}, {3, 300}, {5, 500}}, cblk, sblk)); + ASSERT_EQ(1, (int64_t)segment.GetPkCnt()); + Ticket ticket; + // iter clustered(idx 1), not the secondary, don't create iot iter + std::unique_ptr it( + segment.Segment::NewIterator("test1", 1, ticket, type::CompressType::kNoCompress)); + it->Seek(500); // find less than + ASSERT_TRUE(it->Valid()); + ASSERT_EQ(100, (int64_t)it->GetKey()); + ::openmldb::base::Slice val = it->GetValue(); + std::string result(val.data(), val.size()); + ASSERT_EQ("test0", result); + it->Next(); + ASSERT_FALSE(it->Valid()); // just one row + + // if first one is not the clustered index, we can't know if it exists, be careful + ASSERT_TRUE(segment.Put(pk, {{3, 300}, {5, 500}}, nullptr, nullptr)); +} + +TEST_F(IOTSegmentTest, PutAndScanWhenDefaultTs) { + // in the same inner index, it won't have the same ts id + IOTSegment segment(8, {DEFAULT_TS_COL_ID, 3, 5}, + {common::IndexType::kClustered, common::IndexType::kSecondary, common::IndexType::kCovering}); + Slice pk("test1"); + std::string value = "test0"; + auto cblk = new DataBlock(2, value.c_str(), value.size()); // 1 clustered + 1 covering, hard copy + auto sblk = new DataBlock(1, value.c_str(), value.size()); // 1 secondary, fake value, hard copy + // use the frenquently used Put method + ASSERT_TRUE(segment.Put(pk, {{DEFAULT_TS_COL_ID, 100}, {3, 300}, {5, 500}}, cblk, sblk)); + // if first one is clustered index, segment put will fail in the first time, no need to revert + ASSERT_FALSE(segment.Put(pk, {{DEFAULT_TS_COL_ID, 100}}, cblk, sblk)); + ASSERT_FALSE(segment.Put(pk, {{DEFAULT_TS_COL_ID, 100}, {3, 300}, {5, 500}}, cblk, sblk)); + ASSERT_EQ(1, (int64_t)segment.GetPkCnt()); + Ticket ticket; + // iter clustered(idx 1), not the secondary, don't create iot iter + std::unique_ptr it( + segment.Segment::NewIterator("test1", DEFAULT_TS_COL_ID, ticket, type::CompressType::kNoCompress)); + it->Seek(500); // find less than + ASSERT_TRUE(it->Valid()); + ASSERT_EQ(100, (int64_t)it->GetKey()); + ::openmldb::base::Slice val = it->GetValue(); + std::string result(val.data(), val.size()); + ASSERT_EQ("test0", result); + it->Next(); + ASSERT_FALSE(it->Valid()); // just one row + + // if first one is not the clustered index, we can't know if it exists, be careful + ASSERT_TRUE(segment.Put(pk, {{3, 300}, {5, 500}}, nullptr, nullptr)); +} + +TEST_F(IOTSegmentTest, CheckKeyExists) { + IOTSegment segment(8, {1, 3, 5}, + {common::IndexType::kClustered, common::IndexType::kSecondary, common::IndexType::kCovering}); + Slice pk("test1"); + std::string value = "test0"; + auto cblk = new DataBlock(2, value.c_str(), value.size()); // 1 clustered + 1 covering, hard copy + auto sblk = new DataBlock(1, value.c_str(), value.size()); // 1 secondary, fake value, hard copy + // use the frenquently used Put method + segment.Put(pk, {{1, 100}, {3, 300}, {5, 500}}, cblk, sblk); + ASSERT_EQ(1, (int64_t)segment.GetPkCnt()); + // check if exists in cidx segment(including 'ttl expired but not gc') + auto st = segment.CheckKeyExists(pk, {{1, 100}}); + ASSERT_TRUE(absl::IsAlreadyExists(st)) << st.ToString(); + st = segment.CheckKeyExists(pk, {{1, 300}}); + ASSERT_TRUE(absl::IsNotFound(st)) << st.ToString(); + // check sidx/covering idx will fail + st = segment.CheckKeyExists(pk, {{3, 300}}); + ASSERT_TRUE(absl::IsInvalidArgument(st)) << st.ToString(); +} + +// report result, don't need to print args in here, just print the failure +::testing::AssertionResult CheckStatisticsInfo(const StatisticsInfo& expect, const StatisticsInfo& value) { + if (expect.idx_cnt_vec.size() != value.idx_cnt_vec.size()) { + return ::testing::AssertionFailure() + << "idx_cnt_vec size expect " << expect.idx_cnt_vec.size() << " but got " << value.idx_cnt_vec.size(); + } + for (size_t idx = 0; idx < expect.idx_cnt_vec.size(); idx++) { + if (expect.idx_cnt_vec[idx] != value.idx_cnt_vec[idx]) { + return ::testing::AssertionFailure() << "idx_cnt_vec[" << idx << "] expect " << expect.idx_cnt_vec[idx] + << " but got " << value.idx_cnt_vec[idx]; + } + } + if (expect.record_byte_size != value.record_byte_size) { + return ::testing::AssertionFailure() + << "record_byte_size expect " << expect.record_byte_size << " but got " << value.record_byte_size; + } + if (expect.idx_byte_size != value.idx_byte_size) { + return ::testing::AssertionFailure() + << "idx_byte_size expect " << expect.idx_byte_size << " but got " << value.idx_byte_size; + } + return ::testing::AssertionSuccess(); +} + +// helper +::testing::AssertionResult CheckStatisticsInfo(std::initializer_list vec, uint64_t idx_byte_size, + uint64_t record_byte_size, const StatisticsInfo& value) { + StatisticsInfo info(0); // overwrite by set idx_cnt_vec + info.idx_cnt_vec = vec; + info.idx_byte_size = idx_byte_size; + info.record_byte_size = record_byte_size; + return CheckStatisticsInfo(info, value); +} + +StatisticsInfo CreateStatisticsInfo(uint64_t idx_cnt, uint64_t idx_byte_size, uint64_t record_byte_size) { + StatisticsInfo info(1); + info.idx_cnt_vec[0] = idx_cnt; + info.idx_byte_size = idx_byte_size; + info.record_byte_size = record_byte_size; + return info; +} + +// TODO(hw): gc multi idx has bug, fix later +// TEST_F(IOTSegmentTest, TestGc4Head) { +// IOTSegment segment(8); +// Slice pk("PK"); +// segment.Put(pk, 9768, "test1", 5); +// segment.Put(pk, 9769, "test2", 5); +// StatisticsInfo gc_info(1); +// segment.Gc4Head(1, &gc_info); +// CheckStatisticsInfo(CreateStatisticsInfo(1, 0, GetRecordSize(5)), gc_info); +// Ticket ticket; +// std::unique_ptr it(segment.NewIterator(pk, ticket, type::CompressType::kNoCompress)); +// it->Seek(9769); +// ASSERT_TRUE(it->Valid()); +// ASSERT_EQ(9769, (int64_t)it->GetKey()); +// ::openmldb::base::Slice value = it->GetValue(); +// std::string result(value.data(), value.size()); +// ASSERT_EQ("test2", result); +// it->Next(); +// ASSERT_FALSE(it->Valid()); +// } + +TEST_F(IOTSegmentTest, TestGc4TTL) { + // cidx segment won't execute gc, gc will be done in iot gc + // and multi idx gc `GcAllType` has bug, skip test it + { + std::vector idx_vec = {1}; + std::vector idx_type = {common::IndexType::kClustered}; + auto segment = std::make_unique(8, idx_vec, idx_type); + Slice pk("test1"); + std::string value = "test0"; + auto cblk = new DataBlock(1, value.c_str(), value.size()); // 1 clustered + 1 covering, hard copy + auto sblk = new DataBlock(1, value.c_str(), value.size()); // 1 secondary, fake value, hard copy + ASSERT_TRUE(segment->Put(pk, {{1, 100}}, cblk, sblk)); + // ref iot gc SchedGCByDelete + StatisticsInfo statistics_info(segment->GetTsCnt()); + segment->IncrGcVersion(); + segment->GcFreeList(&statistics_info); + segment->ExecuteGc({{1, {1, 0, TTLType::kAbsoluteTime}}}, &statistics_info, segment->ClusteredTs()); + ASSERT_TRUE(CheckStatisticsInfo({0}, 0, 0, statistics_info)); + } + { + std::vector idx_vec = {1}; + std::vector idx_type = {common::IndexType::kSecondary}; + auto segment = std::make_unique(8, idx_vec, idx_type); + Slice pk("test1"); + std::string value = "test0"; + auto cblk = new DataBlock(1, value.c_str(), value.size()); // 1 clustered + 1 covering, hard copy + // execute gc will delete it + auto sblk = new DataBlock(1, value.c_str(), value.size()); // 1 secondary, fake value, hard copy + ASSERT_TRUE(segment->Put(pk, {{1, 100}}, cblk, sblk)); + // ref iot gc SchedGCByDelete + StatisticsInfo statistics_info(segment->GetTsCnt()); + segment->IncrGcVersion(); // 1 + segment->GcFreeList(&statistics_info); + segment->ExecuteGc({{1, {1, 0, TTLType::kAbsoluteTime}}}, &statistics_info, segment->ClusteredTs()); + // secondary will gc, but idx_byte_size is 0(GcFreeList change it) + ASSERT_TRUE(CheckStatisticsInfo({1}, 0, GetRecordSize(5), statistics_info)); + + segment->IncrGcVersion(); // 2 + segment->GcFreeList(&statistics_info); // empty + ASSERT_TRUE(CheckStatisticsInfo({1}, 0, GetRecordSize(5), statistics_info)); + segment->IncrGcVersion(); // delta default is 2, version should >=2, and node_cache free version should >= 3 + segment->GcFreeList(&statistics_info); + // don't know why 197 + ASSERT_TRUE(CheckStatisticsInfo({1}, 197, GetRecordSize(5), statistics_info)); + } +} + +// TEST_F(IOTSegmentTest, TestGc4TTLAndHead) { +// IOTSegment segment(8); +// segment.Put("PK1", 9766, "test1", 5); +// segment.Put("PK1", 9767, "test2", 5); +// segment.Put("PK1", 9768, "test3", 5); +// segment.Put("PK1", 9769, "test4", 5); +// segment.Put("PK2", 9765, "test1", 5); +// segment.Put("PK2", 9766, "test2", 5); +// segment.Put("PK2", 9767, "test3", 5); +// StatisticsInfo gc_info(1); +// // Gc4TTLAndHead only change gc_info.vec[0], check code +// // no expire +// segment.Gc4TTLAndHead(0, 0, &gc_info); +// ASSERT_TRUE(CheckStatisticsInfo({0}, 0, 0, gc_info)); +// // no lat expire, so all records won't be deleted +// segment.Gc4TTLAndHead(9999, 0, &gc_info); +// ASSERT_TRUE(CheckStatisticsInfo({0}, 0, 0, gc_info)); +// // no abs expire, so all records won't be deleted +// segment.Gc4TTLAndHead(0, 3, &gc_info); +// ASSERT_TRUE(CheckStatisticsInfo({0}, 0, 0, gc_info)); +// // current_time > expire_time means not expired, so == is outdate and lat 2, so `9765` should be deleted +// segment.Gc4TTLAndHead(9765, 2, &gc_info); +// ASSERT_TRUE(CheckStatisticsInfo({1}, 0, GetRecordSize(5), gc_info)); +// // gc again, no record expired, info won't update +// segment.Gc4TTLAndHead(9765, 2, &gc_info); +// ASSERT_TRUE(CheckStatisticsInfo({1}, 0, GetRecordSize(5), gc_info)); +// // new info +// gc_info.Reset(); +// // time <= 9770 is abs expired, but lat 1, so just 1 record per key left, 4 deleted +// segment.Gc4TTLAndHead(9770, 1, &gc_info); +// ASSERT_TRUE(CheckStatisticsInfo({4}, 0, 4 * GetRecordSize(5), gc_info)); +// uint64_t cnt = 0; +// ASSERT_EQ(0, segment.GetCount("PK1", cnt)); +// ASSERT_EQ(1, cnt); +// ASSERT_EQ(0, segment.GetCount("PK2", cnt)); +// ASSERT_EQ(1, cnt); +// } + +// TEST_F(IOTSegmentTest, TestGc4TTLOrHead) { +// IOTSegment segment(8); +// segment.Put("PK1", 9766, "test1", 5); +// segment.Put("PK1", 9767, "test2", 5); +// segment.Put("PK1", 9768, "test3", 5); +// segment.Put("PK1", 9769, "test4", 5); +// segment.Put("PK2", 9765, "test1", 5); +// segment.Put("PK2", 9766, "test2", 5); +// segment.Put("PK2", 9767, "test3", 5); +// StatisticsInfo gc_info(1); +// // no expire +// segment.Gc4TTLOrHead(0, 0, &gc_info); +// ASSERT_TRUE(CheckStatisticsInfo({0}, 0, 0, gc_info)); +// // all record <= 9765 should be deleted, no matter the lat expire +// segment.Gc4TTLOrHead(9765, 0, &gc_info); +// ASSERT_TRUE(CheckStatisticsInfo({1}, 0, GetRecordSize(5), gc_info)); +// gc_info.Reset(); +// // even abs no expire, only lat 3 per key +// segment.Gc4TTLOrHead(0, 3, &gc_info); +// ASSERT_TRUE(CheckStatisticsInfo({1}, 0, GetRecordSize(5), gc_info)); +// gc_info.Reset(); +// segment.Gc4TTLOrHead(9765, 3, &gc_info); +// ASSERT_TRUE(CheckStatisticsInfo({0}, 0, 0, gc_info)); +// segment.Gc4TTLOrHead(9766, 2, &gc_info); +// ASSERT_TRUE(CheckStatisticsInfo({2}, 0, 2 * GetRecordSize(5), gc_info)); +// gc_info.Reset(); +// segment.Gc4TTLOrHead(9770, 1, &gc_info); +// ASSERT_TRUE(CheckStatisticsInfo({3}, 0, 3 * GetRecordSize(5), gc_info)); +// } + +// TEST_F(IOTSegmentTest, TestStat) { +// IOTSegment segment(8); +// segment.Put("PK", 9768, "test1", 5); +// segment.Put("PK", 9769, "test2", 5); +// ASSERT_EQ(2, (int64_t)segment.GetIdxCnt()); +// ASSERT_EQ(1, (int64_t)segment.GetPkCnt()); +// StatisticsInfo gc_info(1); +// segment.Gc4TTL(9765, &gc_info); +// ASSERT_EQ(0, gc_info.GetTotalCnt()); +// gc_info.Reset(); +// segment.Gc4TTL(9768, &gc_info); +// ASSERT_EQ(1, (int64_t)segment.GetIdxCnt()); +// ASSERT_EQ(1, gc_info.GetTotalCnt()); +// segment.Gc4TTL(9770, &gc_info); +// ASSERT_EQ(2, gc_info.GetTotalCnt()); +// ASSERT_EQ(0, (int64_t)segment.GetIdxCnt()); +// } + +// TEST_F(IOTSegmentTest, GetTsIdx) { +// std::vector ts_idx_vec = {1, 3, 5}; +// IOTSegment segment(8, ts_idx_vec); +// ASSERT_EQ(3, (int64_t)segment.GetTsCnt()); +// uint32_t real_idx = UINT32_MAX; +// ASSERT_EQ(-1, segment.GetTsIdx(0, real_idx)); +// ASSERT_EQ(0, segment.GetTsIdx(1, real_idx)); +// ASSERT_EQ(0, (int64_t)real_idx); +// ASSERT_EQ(-1, segment.GetTsIdx(2, real_idx)); +// ASSERT_EQ(0, segment.GetTsIdx(3, real_idx)); +// ASSERT_EQ(1, (int64_t)real_idx); +// ASSERT_EQ(-1, segment.GetTsIdx(4, real_idx)); +// ASSERT_EQ(0, segment.GetTsIdx(5, real_idx)); +// ASSERT_EQ(2, (int64_t)real_idx); +// } + +// int GetCount(IOTSegment* segment, int idx) { +// int count = 0; +// std::unique_ptr pk_it(segment->GetKeyEntries()->NewIterator()); +// if (!pk_it) { +// return 0; +// } +// uint32_t real_idx = idx; +// segment->GetTsIdx(idx, real_idx); +// pk_it->SeekToFirst(); +// while (pk_it->Valid()) { +// KeyEntry* entry = nullptr; +// if (segment->GetTsCnt() > 1) { +// entry = reinterpret_cast(pk_it->GetValue())[real_idx]; +// } else { +// entry = reinterpret_cast(pk_it->GetValue()); +// } +// std::unique_ptr ts_it(entry->entries.NewIterator()); +// ts_it->SeekToFirst(); +// while (ts_it->Valid()) { +// count++; +// ts_it->Next(); +// } +// pk_it->Next(); +// } +// return count; +// } + +// TEST_F(IOTSegmentTest, ReleaseAndCount) { +// std::vector ts_idx_vec = {1, 3}; +// IOTSegment segment(8, ts_idx_vec); +// ASSERT_EQ(2, (int64_t)segment.GetTsCnt()); +// for (int i = 0; i < 100; i++) { +// std::string key = "key" + std::to_string(i); +// uint64_t ts = 1669013677221000; +// for (int j = 0; j < 2; j++) { +// DataBlock* data = new DataBlock(2, key.c_str(), key.length()); +// std::map ts_map = {{1, ts + j}, {3, ts + j}}; +// segment.Put(Slice(key), ts_map, data); +// } +// } +// ASSERT_EQ(200, GetCount(&segment, 1)); +// ASSERT_EQ(200, GetCount(&segment, 3)); +// StatisticsInfo gc_info(1); +// segment.ReleaseAndCount({1}, &gc_info); +// ASSERT_EQ(0, GetCount(&segment, 1)); +// ASSERT_EQ(200, GetCount(&segment, 3)); +// segment.ReleaseAndCount(&gc_info); +// ASSERT_EQ(0, GetCount(&segment, 1)); +// ASSERT_EQ(0, GetCount(&segment, 3)); +// } + +// TEST_F(IOTSegmentTest, ReleaseAndCountOneTs) { +// IOTSegment segment(8); +// for (int i = 0; i < 100; i++) { +// std::string key = "key" + std::to_string(i); +// uint64_t ts = 1669013677221000; +// for (int j = 0; j < 2; j++) { +// segment.Put(Slice(key), ts + j, key.c_str(), key.size()); +// } +// } +// StatisticsInfo gc_info(1); +// ASSERT_EQ(200, GetCount(&segment, 0)); +// segment.ReleaseAndCount(&gc_info); +// ASSERT_EQ(0, GetCount(&segment, 0)); +// } + +// TEST_F(IOTSegmentTest, TestDeleteRange) { +// IOTSegment segment(8); +// for (int idx = 0; idx < 10; idx++) { +// std::string key = absl::StrCat("key", idx); +// std::string value = absl::StrCat("value", idx); +// uint64_t ts = 1000; +// for (int i = 0; i < 10; i++) { +// segment.Put(Slice(key), ts + i, value.data(), 6); +// } +// } +// ASSERT_EQ(100, GetCount(&segment, 0)); +// std::string pk = "key2"; +// Ticket ticket; +// std::unique_ptr it(segment.NewIterator(pk, ticket, type::CompressType::kNoCompress)); +// it->Seek(1005); +// ASSERT_TRUE(it->Valid() && it->GetKey() == 1005); +// ASSERT_TRUE(segment.Delete(std::nullopt, pk, 1005, 1004)); +// ASSERT_EQ(99, GetCount(&segment, 0)); +// it->Seek(1005); +// ASSERT_FALSE(it->Valid() && it->GetKey() == 1005); +// ASSERT_TRUE(segment.Delete(std::nullopt, pk, 1005, std::nullopt)); +// ASSERT_EQ(94, GetCount(&segment, 0)); +// it->Seek(1005); +// ASSERT_FALSE(it->Valid()); +// pk = "key3"; +// ASSERT_TRUE(segment.Delete(std::nullopt, pk)); +// pk = "key4"; +// ASSERT_TRUE(segment.Delete(std::nullopt, pk, 1005, 1001)); +// ASSERT_EQ(80, GetCount(&segment, 0)); +// segment.IncrGcVersion(); +// segment.IncrGcVersion(); +// StatisticsInfo gc_info(1); +// segment.GcFreeList(&gc_info); +// CheckStatisticsInfo(CreateStatisticsInfo(20, 1012, 20 * (6 + sizeof(DataBlock))), gc_info); +// } + +// TEST_F(IOTSegmentTest, PutIfAbsent) { +// { +// IOTSegment segment(8); // so ts_cnt_ == 1 +// // check all time == false +// segment.Put("PK", 1, "test1", 5, true); +// segment.Put("PK", 1, "test2", 5, true); // even key&time is the same, different value means different record +// ASSERT_EQ(2, (int64_t)segment.GetIdxCnt()); +// ASSERT_EQ(1, (int64_t)segment.GetPkCnt()); +// segment.Put("PK", 2, "test3", 5, true); +// segment.Put("PK", 2, "test4", 5, true); +// segment.Put("PK", 3, "test5", 5, true); +// segment.Put("PK", 3, "test6", 5, true); +// ASSERT_EQ(6, (int64_t)segment.GetIdxCnt()); +// // insert exists rows +// segment.Put("PK", 2, "test3", 5, true); +// segment.Put("PK", 1, "test1", 5, true); +// segment.Put("PK", 1, "test2", 5, true); +// segment.Put("PK", 3, "test6", 5, true); +// ASSERT_EQ(6, (int64_t)segment.GetIdxCnt()); +// // new rows +// segment.Put("PK", 2, "test7", 5, true); +// ASSERT_EQ(7, (int64_t)segment.GetIdxCnt()); +// segment.Put("PK", 0, "test8", 5, true); // seek to last, next is empty +// ASSERT_EQ(8, (int64_t)segment.GetIdxCnt()); +// } + +// { +// // support when ts_cnt_ != 1 too +// std::vector ts_idx_vec = {1, 3}; +// IOTSegment segment(8, ts_idx_vec); +// ASSERT_EQ(2, (int64_t)segment.GetTsCnt()); +// std::string key = "PK"; +// uint64_t ts = 1669013677221000; +// // the same ts +// for (int j = 0; j < 2; j++) { +// DataBlock* data = new DataBlock(2, key.c_str(), key.length()); +// std::map ts_map = {{1, ts}, {3, ts}}; +// segment.Put(Slice(key), ts_map, data, true); +// } +// ASSERT_EQ(1, GetCount(&segment, 1)); +// ASSERT_EQ(1, GetCount(&segment, 3)); +// } + +// { +// // put ts_map contains DEFAULT_TS_COL_ID +// std::vector ts_idx_vec = {DEFAULT_TS_COL_ID}; +// IOTSegment segment(8, ts_idx_vec); +// ASSERT_EQ(1, (int64_t)segment.GetTsCnt()); +// std::string key = "PK"; +// std::map ts_map = {{DEFAULT_TS_COL_ID, 100}}; // cur time == 100 +// auto* block = new DataBlock(1, "test1", 5); +// segment.Put(Slice(key), ts_map, block, true); +// ASSERT_EQ(1, GetCount(&segment, DEFAULT_TS_COL_ID)); +// ts_map = {{DEFAULT_TS_COL_ID, 200}}; +// block = new DataBlock(1, "test1", 5); +// segment.Put(Slice(key), ts_map, block, true); +// ASSERT_EQ(1, GetCount(&segment, DEFAULT_TS_COL_ID)); +// } + +// { +// // put ts_map contains DEFAULT_TS_COL_ID +// std::vector ts_idx_vec = {DEFAULT_TS_COL_ID, 1, 3}; +// IOTSegment segment(8, ts_idx_vec); +// ASSERT_EQ(3, (int64_t)segment.GetTsCnt()); +// std::string key = "PK"; +// std::map ts_map = {{DEFAULT_TS_COL_ID, 100}}; // cur time == 100 +// auto* block = new DataBlock(1, "test1", 5); +// segment.Put(Slice(key), ts_map, block, true); +// ASSERT_EQ(1, GetCount(&segment, DEFAULT_TS_COL_ID)); +// ts_map = {{DEFAULT_TS_COL_ID, 200}}; +// block = new DataBlock(1, "test1", 5); +// segment.Put(Slice(key), ts_map, block, true); +// ASSERT_EQ(1, GetCount(&segment, DEFAULT_TS_COL_ID)); +// } +// } + +} // namespace storage +} // namespace openmldb + +int main(int argc, char** argv) { + ::openmldb::base::SetLogLevel(INFO); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/src/storage/mem_table.cc b/src/storage/mem_table.cc index 023974e3c6b..910fdc06e4d 100644 --- a/src/storage/mem_table.cc +++ b/src/storage/mem_table.cc @@ -17,6 +17,7 @@ #include "storage/mem_table.h" #include + #include #include @@ -26,8 +27,8 @@ #include "common/timer.h" #include "gflags/gflags.h" #include "schema/index_util.h" -#include "storage/record.h" #include "storage/mem_table_iterator.h" +#include "storage/record.h" DECLARE_uint32(skiplist_max_height); DECLARE_uint32(skiplist_max_height); @@ -54,7 +55,7 @@ MemTable::MemTable(const ::openmldb::api::TableMeta& table_meta) : Table(table_meta.storage_mode(), table_meta.name(), table_meta.tid(), table_meta.pid(), 0, true, 60 * 1000, std::map(), ::openmldb::type::TTLType::kAbsoluteTime, ::openmldb::type::CompressType::kNoCompress), - segments_(MAX_INDEX_NUM, nullptr) { + segments_(MAX_INDEX_NUM, nullptr) { seg_cnt_ = 8; enable_gc_ = true; segment_released_ = false; @@ -80,7 +81,7 @@ MemTable::~MemTable() { PDLOG(INFO, "drop memtable. tid %u pid %u", id_, pid_); } -bool MemTable::Init() { +bool MemTable::InitMeta() { key_entry_max_height_ = FLAGS_key_entry_max_height; if (!InitFromMeta()) { return false; @@ -88,21 +89,33 @@ bool MemTable::Init() { if (table_meta_->seg_cnt() > 0) { seg_cnt_ = table_meta_->seg_cnt(); } + return true; +} + +uint32_t MemTable::KeyEntryMaxHeight(const std::shared_ptr& inner_idx) { uint32_t global_key_entry_max_height = 0; if (table_meta_->has_key_entry_max_height() && table_meta_->key_entry_max_height() <= FLAGS_skiplist_max_height && table_meta_->key_entry_max_height() > 0) { global_key_entry_max_height = table_meta_->key_entry_max_height(); } + if (global_key_entry_max_height > 0) { + return global_key_entry_max_height; + } else { + return inner_idx->GetKeyEntryMaxHeight(FLAGS_absolute_default_skiplist_height, + FLAGS_latest_default_skiplist_height); + } +} +bool MemTable::Init() { + if (!InitMeta()) { + LOG(WARNING) << "init meta failed. tid " << id_ << " pid " << pid_; + return false; + } + auto inner_indexs = table_index_.GetAllInnerIndex(); for (uint32_t i = 0; i < inner_indexs->size(); i++) { const std::vector& ts_vec = inner_indexs->at(i)->GetTsIdx(); - uint32_t cur_key_entry_max_height = 0; - if (global_key_entry_max_height > 0) { - cur_key_entry_max_height = global_key_entry_max_height; - } else { - cur_key_entry_max_height = inner_indexs->at(i)->GetKeyEntryMaxHeight(FLAGS_absolute_default_skiplist_height, - FLAGS_latest_default_skiplist_height); - } + uint32_t cur_key_entry_max_height = KeyEntryMaxHeight(inner_indexs->at(i)); + Segment** seg_arr = new Segment*[seg_cnt_]; if (!ts_vec.empty()) { for (uint32_t j = 0; j < seg_cnt_; j++) { @@ -226,10 +239,8 @@ absl::Status MemTable::Put(uint64_t time, const std::string& value, const Dimens } bool MemTable::Delete(const ::openmldb::api::LogEntry& entry) { - std::optional start_ts = entry.has_ts() ? std::optional{entry.ts()} - : std::nullopt; - std::optional end_ts = entry.has_end_ts() ? std::optional{entry.end_ts()} - : std::nullopt; + std::optional start_ts = entry.has_ts() ? std::optional{entry.ts()} : std::nullopt; + std::optional end_ts = entry.has_end_ts() ? std::optional{entry.end_ts()} : std::nullopt; if (entry.dimensions_size() > 0) { for (const auto& dimension : entry.dimensions()) { if (!Delete(dimension.idx(), dimension.key(), start_ts, end_ts)) { @@ -259,8 +270,8 @@ bool MemTable::Delete(const ::openmldb::api::LogEntry& entry) { return true; } -bool MemTable::Delete(uint32_t idx, const std::string& key, - const std::optional& start_ts, const std::optional& end_ts) { +bool MemTable::Delete(uint32_t idx, const std::string& key, const std::optional& start_ts, + const std::optional& end_ts) { auto index_def = GetIndex(idx); if (!index_def || !index_def->IsReady()) { return false; @@ -336,7 +347,7 @@ void MemTable::SchedGc() { for (uint32_t k = 0; k < seg_cnt_; k++) { if (segments_[i][k] != nullptr) { StatisticsInfo statistics_info(segments_[i][k]->GetTsCnt()); - if (real_index.size() == 1 || deleting_pos.size() + deleted_num == real_index.size()) { + if (real_index.size() == 1 || deleting_pos.size() + deleted_num == real_index.size()) { segments_[i][k]->ReleaseAndCount(&statistics_info); } else { segments_[i][k]->ReleaseAndCount(deleting_pos, &statistics_info); @@ -377,8 +388,8 @@ void MemTable::SchedGc() { } consumed = ::baidu::common::timer::get_micros() - consumed; record_byte_size_.fetch_sub(gc_record_byte_size, std::memory_order_relaxed); - PDLOG(INFO, "gc finished, gc_idx_cnt %lu, consumed %lu ms for table %s tid %u pid %u", - gc_idx_cnt, consumed / 1000, name_.c_str(), id_, pid_); + PDLOG(INFO, "gc finished, gc_idx_cnt %lu, consumed %lu ms for table %s tid %u pid %u", gc_idx_cnt, consumed / 1000, + name_.c_str(), id_, pid_); UpdateTTL(); } @@ -620,18 +631,25 @@ bool MemTable::GetRecordIdxCnt(uint32_t idx, uint64_t** stat, uint32_t* size) { } bool MemTable::AddIndexToTable(const std::shared_ptr& index_def) { - std::vector ts_vec = { index_def->GetTsColumn()->GetId() }; + std::vector ts_vec = {index_def->GetTsColumn()->GetId()}; uint32_t inner_id = index_def->GetInnerPos(); Segment** seg_arr = new Segment*[seg_cnt_]; for (uint32_t j = 0; j < seg_cnt_; j++) { seg_arr[j] = new Segment(FLAGS_absolute_default_skiplist_height, ts_vec); PDLOG(INFO, "init %u, %u segment. height %u, ts col num %u. tid %u pid %u", inner_id, j, - FLAGS_absolute_default_skiplist_height, ts_vec.size(), id_, pid_); + FLAGS_absolute_default_skiplist_height, ts_vec.size(), id_, pid_); } segments_[inner_id] = seg_arr; return true; } +uint32_t MemTable::SegIdx(const std::string& pk) { + if (seg_cnt_ > 1) { + return ::openmldb::base::hash(pk.c_str(), pk.length(), SEED) % seg_cnt_; + } + return 0; +} + ::hybridse::vm::WindowIterator* MemTable::NewWindowIterator(uint32_t index) { std::shared_ptr index_def = table_index_.GetIndex(index); if (!index_def || !index_def->IsReady()) { @@ -651,8 +669,8 @@ ::hybridse::vm::WindowIterator* MemTable::NewWindowIterator(uint32_t index) { if (ts_col) { ts_idx = ts_col->GetId(); } - return new MemTableKeyIterator(segments_[real_idx], seg_cnt_, ttl->ttl_type, - expire_time, expire_cnt, ts_idx, GetCompressType()); + return new MemTableKeyIterator(segments_[real_idx], seg_cnt_, ttl->ttl_type, expire_time, expire_cnt, ts_idx, + GetCompressType()); } TraverseIterator* MemTable::NewTraverseIterator(uint32_t index) { @@ -671,11 +689,11 @@ TraverseIterator* MemTable::NewTraverseIterator(uint32_t index) { uint32_t real_idx = index_def->GetInnerPos(); auto ts_col = index_def->GetTsColumn(); if (ts_col) { - return new MemTableTraverseIterator(segments_[real_idx], seg_cnt_, ttl->ttl_type, - expire_time, expire_cnt, ts_col->GetId(), GetCompressType()); + return new MemTableTraverseIterator(segments_[real_idx], seg_cnt_, ttl->ttl_type, expire_time, expire_cnt, + ts_col->GetId(), GetCompressType()); } - return new MemTableTraverseIterator(segments_[real_idx], seg_cnt_, ttl->ttl_type, - expire_time, expire_cnt, 0, GetCompressType()); + return new MemTableTraverseIterator(segments_[real_idx], seg_cnt_, ttl->ttl_type, expire_time, expire_cnt, 0, + GetCompressType()); } bool MemTable::GetBulkLoadInfo(::openmldb::api::BulkLoadInfoResponse* response) { diff --git a/src/storage/mem_table.h b/src/storage/mem_table.h index 694203c3e40..c85ffd12da4 100644 --- a/src/storage/mem_table.h +++ b/src/storage/mem_table.h @@ -54,10 +54,10 @@ class MemTable : public Table { absl::Status Put(uint64_t time, const std::string& value, const Dimensions& dimensions, bool put_if_absent) override; - bool GetBulkLoadInfo(::openmldb::api::BulkLoadInfoResponse* response); + virtual bool GetBulkLoadInfo(::openmldb::api::BulkLoadInfoResponse* response); - bool BulkLoad(const std::vector& data_blocks, - const ::google::protobuf::RepeatedPtrField<::openmldb::api::BulkLoadIndex>& indexes); + virtual bool BulkLoad(const std::vector& data_blocks, + const ::google::protobuf::RepeatedPtrField<::openmldb::api::BulkLoadIndex>& indexes); bool Delete(const ::openmldb::api::LogEntry& entry) override; @@ -68,7 +68,7 @@ class MemTable : public Table { TraverseIterator* NewTraverseIterator(uint32_t index) override; - ::hybridse::vm::WindowIterator* NewWindowIterator(uint32_t index); + ::hybridse::vm::WindowIterator* NewWindowIterator(uint32_t index) override; // release all memory allocated uint64_t Release(); @@ -104,15 +104,26 @@ class MemTable : public Table { protected: bool AddIndexToTable(const std::shared_ptr& index_def) override; + uint32_t SegIdx(const std::string& pk); + + Segment* GetSegment(uint32_t real_idx, uint32_t seg_idx) { + // TODO(hw): protect + return segments_[real_idx][seg_idx]; + } + Segment** GetSegments(uint32_t real_idx) { return segments_[real_idx]; } + + bool InitMeta(); + uint32_t KeyEntryMaxHeight(const std::shared_ptr& inner_idx); + private: bool CheckAbsolute(const TTLSt& ttl, uint64_t ts); bool CheckLatest(uint32_t index_id, const std::string& key, uint64_t ts); - bool Delete(uint32_t idx, const std::string& key, - const std::optional& start_ts, const std::optional& end_ts); + bool Delete(uint32_t idx, const std::string& key, const std::optional& start_ts, + const std::optional& end_ts); - private: + protected: uint32_t seg_cnt_; std::vector segments_; std::atomic enable_gc_; diff --git a/src/storage/mem_table_iterator.cc b/src/storage/mem_table_iterator.cc index 22cd7964640..f508d404af7 100644 --- a/src/storage/mem_table_iterator.cc +++ b/src/storage/mem_table_iterator.cc @@ -138,7 +138,7 @@ void MemTableKeyIterator::Next() { NextPK(); } -::hybridse::vm::RowIterator* MemTableKeyIterator::GetRawValue() { +TimeEntries::Iterator* MemTableKeyIterator::GetTimeIter() { TimeEntries::Iterator* it = nullptr; if (segments_[seg_idx_]->GetTsCnt() > 1) { KeyEntry* entry = ((KeyEntry**)pk_it_->GetValue())[ts_idx_]; // NOLINT @@ -150,6 +150,11 @@ ::hybridse::vm::RowIterator* MemTableKeyIterator::GetRawValue() { ticket_.Push((KeyEntry*)pk_it_->GetValue()); // NOLINT } it->SeekToFirst(); + return it; +} + +::hybridse::vm::RowIterator* MemTableKeyIterator::GetRawValue() { + TimeEntries::Iterator* it = GetTimeIter(); return new MemTableWindowIterator(it, ttl_type_, expire_time_, expire_cnt_, compress_type_); } diff --git a/src/storage/mem_table_iterator.h b/src/storage/mem_table_iterator.h index 4b3b2514824..427cdc09100 100644 --- a/src/storage/mem_table_iterator.h +++ b/src/storage/mem_table_iterator.h @@ -18,6 +18,7 @@ #include #include + #include "storage/segment.h" #include "vm/catalog.h" @@ -27,9 +28,12 @@ namespace storage { class MemTableWindowIterator : public ::hybridse::vm::RowIterator { public: MemTableWindowIterator(TimeEntries::Iterator* it, ::openmldb::storage::TTLType ttl_type, uint64_t expire_time, - uint64_t expire_cnt, type::CompressType compress_type) - : it_(it), record_idx_(1), expire_value_(expire_time, expire_cnt, ttl_type), - row_(), compress_type_(compress_type) {} + uint64_t expire_cnt, type::CompressType compress_type) + : it_(it), + record_idx_(1), + expire_value_(expire_time, expire_cnt, ttl_type), + row_(), + compress_type_(compress_type) {} ~MemTableWindowIterator(); @@ -59,8 +63,7 @@ class MemTableWindowIterator : public ::hybridse::vm::RowIterator { class MemTableKeyIterator : public ::hybridse::vm::WindowIterator { public: MemTableKeyIterator(Segment** segments, uint32_t seg_cnt, ::openmldb::storage::TTLType ttl_type, - uint64_t expire_time, uint64_t expire_cnt, uint32_t ts_index, - type::CompressType compress_type); + uint64_t expire_time, uint64_t expire_cnt, uint32_t ts_index, type::CompressType compress_type); ~MemTableKeyIterator() override; @@ -77,10 +80,13 @@ class MemTableKeyIterator : public ::hybridse::vm::WindowIterator { const hybridse::codec::Row GetKey() override; + protected: + TimeEntries::Iterator* GetTimeIter(); + private: void NextPK(); - private: + protected: Segment** segments_; uint32_t const seg_cnt_; uint32_t seg_idx_; @@ -97,10 +103,10 @@ class MemTableKeyIterator : public ::hybridse::vm::WindowIterator { class MemTableTraverseIterator : public TraverseIterator { public: MemTableTraverseIterator(Segment** segments, uint32_t seg_cnt, ::openmldb::storage::TTLType ttl_type, - uint64_t expire_time, uint64_t expire_cnt, uint32_t ts_index, - type::CompressType compress_type); + uint64_t expire_time, uint64_t expire_cnt, uint32_t ts_index, + type::CompressType compress_type); ~MemTableTraverseIterator() override; - inline bool Valid() override; + bool Valid() override; void Next() override; void NextPK() override; void Seek(const std::string& key, uint64_t time) override; diff --git a/src/storage/schema.cc b/src/storage/schema.cc index 3250a047a8b..f8a9d4fa4a6 100644 --- a/src/storage/schema.cc +++ b/src/storage/schema.cc @@ -129,6 +129,21 @@ uint32_t InnerIndexSt::GetKeyEntryMaxHeight(uint32_t abs_max_height, uint32_t la return max_height; } +int64_t InnerIndexSt::ClusteredTsId() { + int64_t id = -1; + for (const auto& cur_index : index_) { + if (cur_index->IsClusteredIndex()) { + auto ts_col = cur_index->GetTsColumn(); + DLOG_ASSERT(ts_col) << "clustered index should have ts column, even auto gen"; + if (ts_col) { + id = ts_col->GetId(); + } + } + } + return id; +} + + TableIndex::TableIndex() { indexs_ = std::make_shared>>(); inner_indexs_ = std::make_shared>>(); @@ -195,7 +210,8 @@ int TableIndex::ParseFromMeta(const ::openmldb::api::TableMeta& table_meta) { } } } - uint32_t key_idx = 0; + + // pos == idx for (int pos = 0; pos < table_meta.column_key_size(); pos++) { const auto& column_key = table_meta.column_key(pos); std::string name = column_key.index_name(); @@ -209,8 +225,10 @@ int TableIndex::ParseFromMeta(const ::openmldb::api::TableMeta& table_meta) { for (const auto& cur_col_name : column_key.col_name()) { col_vec.push_back(*(col_map[cur_col_name])); } - auto index = std::make_shared(column_key.index_name(), key_idx, status, - ::openmldb::type::IndexType::kTimeSerise, col_vec); + // index type is optional + common::IndexType index_type = column_key.has_type() ? column_key.type() : common::IndexType::kCovering; + auto index = std::make_shared(column_key.index_name(), pos, status, + ::openmldb::type::IndexType::kTimeSerise, col_vec, index_type); if (!column_key.ts_name().empty()) { const std::string& ts_name = column_key.ts_name(); index->SetTsColumn(col_map[ts_name]); @@ -226,7 +244,6 @@ int TableIndex::ParseFromMeta(const ::openmldb::api::TableMeta& table_meta) { DLOG(WARNING) << "add index failed"; return -1; } - key_idx++; } } // add default dimension diff --git a/src/storage/schema.h b/src/storage/schema.h index 9edc6e54b2a..39ee5891700 100644 --- a/src/storage/schema.h +++ b/src/storage/schema.h @@ -24,6 +24,7 @@ #include #include +#include "base/glog_wrapper.h" #include "common/timer.h" #include "proto/name_server.pb.h" #include "proto/tablet.pb.h" @@ -35,13 +36,7 @@ static constexpr uint32_t MAX_INDEX_NUM = 200; static constexpr uint32_t DEFAULT_TS_COL_ID = UINT32_MAX; static constexpr const char* DEFAULT_TS_COL_NAME = "___default_ts___"; -enum TTLType { - kAbsoluteTime = 1, - kRelativeTime = 2, - kLatestTime = 3, - kAbsAndLat = 4, - kAbsOrLat = 5 -}; +enum TTLType { kAbsoluteTime = 1, kRelativeTime = 2, kLatestTime = 3, kAbsAndLat = 4, kAbsOrLat = 5 }; // ttl unit: millisecond struct TTLSt { @@ -147,8 +142,7 @@ struct TTLSt { }; struct ExpiredChecker { - ExpiredChecker(uint64_t abs, uint64_t lat, TTLType type) : - abs_expired_ttl(abs), lat_ttl(lat), ttl_type(type) {} + ExpiredChecker(uint64_t abs, uint64_t lat, TTLType type) : abs_expired_ttl(abs), lat_ttl(lat), ttl_type(type) {} bool IsExpired(uint64_t abs, uint32_t record_idx) const { switch (ttl_type) { case TTLType::kAbsoluteTime: @@ -234,6 +228,11 @@ class IndexDef { IndexDef(const std::string& name, uint32_t id, IndexStatus status); IndexDef(const std::string& name, uint32_t id, const IndexStatus& status, ::openmldb::type::IndexType type, const std::vector& column_idx_map); + IndexDef(const std::string& name, uint32_t id, const IndexStatus& status, ::openmldb::type::IndexType type, + const std::vector& column_idx_map, common::IndexType index_type) + : IndexDef(name, id, status, type, column_idx_map) { + index_type_ = index_type; + } const std::string& GetName() const { return name_; } inline const std::shared_ptr& GetTsColumn() const { return ts_column_; } void SetTsColumn(const std::shared_ptr& ts_column) { ts_column_ = ts_column; } @@ -250,15 +249,22 @@ class IndexDef { inline uint32_t GetInnerPos() const { return inner_pos_; } ::openmldb::common::ColumnKey GenColumnKey(); + common::IndexType GetIndexType() const { return index_type_; } + bool IsSecondaryIndex() { return index_type_ == common::IndexType::kSecondary; } + bool IsClusteredIndex() { return index_type_ == common::IndexType::kClustered; } + private: std::string name_; uint32_t index_id_; uint32_t inner_pos_; std::atomic status_; + // for compatible, type is only kTimeSerise ::openmldb::type::IndexType type_; std::vector columns_; std::shared_ptr ttl_st_; std::shared_ptr ts_column_; + // 0 covering, 1 clustered, 2 secondary, default 0 + common::IndexType index_type_ = common::IndexType::kCovering; }; class InnerIndexSt { @@ -270,11 +276,22 @@ class InnerIndexSt { ts_.push_back(ts_col->GetId()); } } + LOG_IF(DFATAL, ts_.size() != index_.size()) << "ts size not equal to index size"; } inline uint32_t GetId() const { return id_; } inline const std::vector& GetTsIdx() const { return ts_; } + // len(ts) == len(type) + inline std::vector GetTsIdxType() const { + std::vector ts_idx_type; + for (const auto& cur_index : index_) { + if (cur_index->GetTsColumn()) ts_idx_type.push_back(cur_index->GetIndexType()); + } + return ts_idx_type; + } inline const std::vector>& GetIndex() const { return index_; } uint32_t GetKeyEntryMaxHeight(uint32_t abs_max_height, uint32_t lat_max_height) const; + // -1 means no clustered idx in here, it's safe to cvt to uint32_t when id >= 0 + int64_t ClusteredTsId(); private: const uint32_t id_; diff --git a/src/storage/segment.cc b/src/storage/segment.cc index 6eb721d353c..87de216a1fb 100644 --- a/src/storage/segment.cc +++ b/src/storage/segment.cc @@ -313,12 +313,13 @@ bool Segment::GetTsIdx(const std::optional& idx, uint32_t* ts_idx) { return true; } -bool Segment::Delete(const std::optional& idx, const Slice& key, - uint64_t ts, const std::optional& end_ts) { +bool Segment::Delete(const std::optional& idx, const Slice& key, uint64_t ts, + const std::optional& end_ts) { uint32_t ts_idx = 0; if (!GetTsIdx(idx, &ts_idx)) { return false; } + void* entry = nullptr; if (entries_->Get(key, entry) < 0 || entry == nullptr) { return true; @@ -354,7 +355,7 @@ bool Segment::Delete(const std::optional& idx, const Slice& key, { std::lock_guard lock(mu_); data_node = key_entry->entries.Split(ts); - DLOG(INFO) << "entry " << key.ToString() << " split by " << ts; + DLOG(INFO) << "after delete, entry " << key.ToString() << " split by " << ts; } if (data_node != nullptr) { node_cache_.AddValueNodeList(ts_idx, gc_version_.load(std::memory_order_relaxed), data_node); @@ -434,11 +435,16 @@ void Segment::ExecuteGc(const TTLSt& ttl_st, StatisticsInfo* statistics_info) { } } -void Segment::ExecuteGc(const std::map& ttl_st_map, StatisticsInfo* statistics_info) { +void Segment::ExecuteGc(const std::map& ttl_st_map, StatisticsInfo* statistics_info, + std::optional clustered_ts_id) { if (ttl_st_map.empty()) { return; } if (ts_cnt_ <= 1) { + if (clustered_ts_id.has_value() && ts_idx_map_.begin()->first == clustered_ts_id.value()) { + LOG(INFO) << "skip normal gc in cidx"; + return; + } ExecuteGc(ttl_st_map.begin()->second, statistics_info); return; } @@ -454,7 +460,7 @@ void Segment::ExecuteGc(const std::map& ttl_st_map, StatisticsI if (!need_gc) { return; } - GcAllType(ttl_st_map, statistics_info); + GcAllType(ttl_st_map, statistics_info, clustered_ts_id); } void Segment::Gc4Head(uint64_t keep_cnt, StatisticsInfo* statistics_info) { @@ -485,11 +491,16 @@ void Segment::Gc4Head(uint64_t keep_cnt, StatisticsInfo* statistics_info) { idx_cnt_vec_[0]->fetch_sub(statistics_info->GetIdxCnt(0) - old, std::memory_order_relaxed); } -void Segment::GcAllType(const std::map& ttl_st_map, StatisticsInfo* statistics_info) { +void Segment::GcAllType(const std::map& ttl_st_map, StatisticsInfo* statistics_info, + std::optional clustered_ts_id) { uint64_t old = statistics_info->GetTotalCnt(); uint64_t consumed = ::baidu::common::timer::get_micros(); std::unique_ptr it(entries_->NewIterator()); it->SeekToFirst(); + for (auto [ts, ttl_st] : ttl_st_map) { + DLOG(INFO) << "ts " << ts << " ttl_st " << ttl_st.ToString() << " it will be current time - ttl?"; + } + while (it->Valid()) { KeyEntry** entry_arr = reinterpret_cast(it->GetValue()); Slice key = it->GetKey(); @@ -501,6 +512,11 @@ void Segment::GcAllType(const std::map& ttl_st_map, StatisticsI } auto pos = ts_idx_map_.find(kv.first); if (pos == ts_idx_map_.end() || pos->second >= ts_cnt_) { + LOG(WARNING) << ""; + continue; + } + if (clustered_ts_id.has_value() && kv.first == clustered_ts_id.value()) { + LOG(INFO) << "skip normal gc in cidx"; continue; } KeyEntry* entry = entry_arr[pos->second]; @@ -596,8 +612,8 @@ void Segment::GcAllType(const std::map& ttl_st_map, StatisticsI } } } - DEBUGLOG("[GcAll] segment gc consumed %lu, count %lu", (::baidu::common::timer::get_micros() - consumed) / 1000, - statistics_info->GetTotalCnt() - old); + DLOG(INFO) << "[GcAll] segment gc consumed " << (::baidu::common::timer::get_micros() - consumed) / 1000 + << "ms, count " << statistics_info->GetTotalCnt() - old; } void Segment::SplitList(KeyEntry* entry, uint64_t ts, ::openmldb::base::Node** node) { diff --git a/src/storage/segment.h b/src/storage/segment.h index 511df69e5c4..daaf25fe2f6 100644 --- a/src/storage/segment.h +++ b/src/storage/segment.h @@ -46,6 +46,7 @@ class MemTableIterator : public TableIterator { void Seek(const uint64_t time) override; bool Valid() override; void Next() override; + // GetXXX will core if it_==nullptr, don't use it without valid openmldb::base::Slice GetValue() const override; uint64_t GetKey() const override; void SeekToFirst() override; @@ -68,7 +69,7 @@ class Segment { public: explicit Segment(uint8_t height); Segment(uint8_t height, const std::vector& ts_idx_vec); - ~Segment(); + virtual ~Segment(); // legacy interface called by memtable and ut void Put(const Slice& key, uint64_t time, const char* data, uint32_t size, bool put_if_absent = false, @@ -78,25 +79,28 @@ class Segment { void BulkLoadPut(unsigned int key_entry_id, const Slice& key, uint64_t time, DataBlock* row); // main put method - bool Put(const Slice& key, const std::map& ts_map, DataBlock* row, bool put_if_absent = false); + virtual bool Put(const Slice& key, const std::map& ts_map, DataBlock* row, + bool put_if_absent = false); bool Delete(const std::optional& idx, const Slice& key); - bool Delete(const std::optional& idx, const Slice& key, - uint64_t ts, const std::optional& end_ts); + bool Delete(const std::optional& idx, const Slice& key, uint64_t ts, + const std::optional& end_ts); void Release(StatisticsInfo* statistics_info); void ExecuteGc(const TTLSt& ttl_st, StatisticsInfo* statistics_info); - void ExecuteGc(const std::map& ttl_st_map, StatisticsInfo* statistics_info); + void ExecuteGc(const std::map& ttl_st_map, StatisticsInfo* statistics_info, + std::optional clustered_ts_id = std::nullopt); void Gc4TTL(const uint64_t time, StatisticsInfo* statistics_info); void Gc4Head(uint64_t keep_cnt, StatisticsInfo* statistics_info); void Gc4TTLAndHead(const uint64_t time, const uint64_t keep_cnt, StatisticsInfo* statistics_info); void Gc4TTLOrHead(const uint64_t time, const uint64_t keep_cnt, StatisticsInfo* statistics_info); - void GcAllType(const std::map& ttl_st_map, StatisticsInfo* statistics_info); + void GcAllType(const std::map& ttl_st_map, StatisticsInfo* statistics_info, + std::optional clustered_ts_id = std::nullopt); - MemTableIterator* NewIterator(const Slice& key, Ticket& ticket, type::CompressType compress_type); // NOLINT - MemTableIterator* NewIterator(const Slice& key, uint32_t idx, Ticket& ticket, // NOLINT + virtual MemTableIterator* NewIterator(const Slice& key, Ticket& ticket, type::CompressType compress_type); // NOLINT + virtual MemTableIterator* NewIterator(const Slice& key, uint32_t idx, Ticket& ticket, // NOLINT type::CompressType compress_type); uint64_t GetIdxCnt() const { return idx_cnt_vec_[0]->load(std::memory_order_relaxed); } @@ -141,17 +145,17 @@ class Segment { void ReleaseAndCount(const std::vector& id_vec, StatisticsInfo* statistics_info); - private: + protected: void FreeList(uint32_t ts_idx, ::openmldb::base::Node* node, StatisticsInfo* statistics_info); void SplitList(KeyEntry* entry, uint64_t ts, ::openmldb::base::Node** node); bool GetTsIdx(const std::optional& idx, uint32_t* ts_idx); bool ListContains(KeyEntry* entry, uint64_t time, DataBlock* row, bool check_all_time); - bool PutUnlock(const Slice& key, uint64_t time, DataBlock* row, bool put_if_absent = false, - bool check_all_time = false); + virtual bool PutUnlock(const Slice& key, uint64_t time, DataBlock* row, bool put_if_absent = false, + bool check_all_time = false); - private: + protected: KeyEntries* entries_; std::mutex mu_; std::atomic idx_byte_size_; @@ -159,6 +163,7 @@ class Segment { uint8_t key_entry_max_height_; uint32_t ts_cnt_; std::atomic gc_version_; + // std::map ts_idx_map_; std::vector>> idx_cnt_vec_; uint64_t ttl_offset_; diff --git a/src/storage/table.cc b/src/storage/table.cc index 7126430a9d5..ebb27bf73ef 100644 --- a/src/storage/table.cc +++ b/src/storage/table.cc @@ -207,8 +207,10 @@ bool Table::AddIndex(const ::openmldb::common::ColumnKey& column_key) { } col_vec.push_back(it->second); } + + common::IndexType index_type = column_key.has_type() ? column_key.type() : common::IndexType::kCovering; index_def = std::make_shared(column_key.index_name(), table_index_.GetMaxIndexId() + 1, - IndexStatus::kReady, ::openmldb::type::IndexType::kTimeSerise, col_vec); + IndexStatus::kReady, ::openmldb::type::IndexType::kTimeSerise, col_vec, index_type); if (!column_key.ts_name().empty()) { if (auto ts_iter = schema.find(column_key.ts_name()); ts_iter == schema.end()) { PDLOG(WARNING, "not found ts_name[%s]. tid %u pid %u", column_key.ts_name().c_str(), id_, pid_); diff --git a/src/storage/table_iterator_test.cc b/src/storage/table_iterator_test.cc index 3af20940266..a880fc8151a 100644 --- a/src/storage/table_iterator_test.cc +++ b/src/storage/table_iterator_test.cc @@ -383,7 +383,8 @@ TEST_P(TableIteratorTest, releaseKeyIterator) { dim->set_key(key); std::string value; ASSERT_EQ(0, codec.EncodeRow(row, &value)); - table->Put(0, value, request.dimensions()); + auto st = table->Put(0, value, request.dimensions()); + ASSERT_TRUE(st.ok()) << st.ToString(); } } @@ -429,7 +430,8 @@ TEST_P(TableIteratorTest, SeekNonExistent) { dim->set_key(key); std::string value; ASSERT_EQ(0, codec.EncodeRow(row, &value)); - table->Put(0, value, request.dimensions()); + auto st = table->Put(0, value, request.dimensions()); + ASSERT_TRUE(st.ok()) << st.ToString(); } } diff --git a/src/tablet/tablet_impl.cc b/src/tablet/tablet_impl.cc index 2f7544f2847..b3b05e7b8e4 100644 --- a/src/tablet/tablet_impl.cc +++ b/src/tablet/tablet_impl.cc @@ -18,6 +18,7 @@ #include #include + #include #include #ifdef DISALLOW_COPY_AND_ASSIGN @@ -34,8 +35,6 @@ #include "absl/cleanup/cleanup.h" #include "absl/time/clock.h" #include "absl/time/time.h" -#include "boost/bind.hpp" -#include "boost/container/deque.hpp" #include "base/file_util.h" #include "base/glog_wrapper.h" #include "base/hash.h" @@ -44,6 +43,8 @@ #include "base/status.h" #include "base/strings.h" #include "base/sys_info.h" +#include "boost/bind.hpp" +#include "boost/container/deque.hpp" #include "brpc/controller.h" #include "butil/iobuf.h" #include "codec/codec.h" @@ -62,6 +63,7 @@ #include "schema/schema_adapter.h" #include "storage/binlog.h" #include "storage/disk_table_snapshot.h" +#include "storage/index_organized_table.h" #include "storage/segment.h" #include "storage/table.h" #include "tablet/file_sender.h" @@ -201,7 +203,7 @@ bool TabletImpl::Init(const std::string& zk_cluster, const std::string& zk_path, if (!zk_cluster.empty()) { zk_client_ = new ZkClient(zk_cluster, real_endpoint, FLAGS_zk_session_timeout, endpoint, zk_path, - FLAGS_zk_auth_schema, FLAGS_zk_cert); + FLAGS_zk_auth_schema, FLAGS_zk_cert); bool ok = zk_client_->Init(); if (!ok) { PDLOG(ERROR, "fail to init zookeeper with cluster %s", zk_cluster.c_str()); @@ -374,8 +376,8 @@ void TabletImpl::UpdateTTL(RpcController* ctrl, const ::openmldb::api::UpdateTTL base::SetResponseStatus(base::ReturnCode::kWriteDataFailed, "write meta data failed", response); return; } - PDLOG(INFO, "update table tid %u pid %u ttl meta to abs_ttl %lu lat_ttl %lu index_name %s", tid, pid, abs_ttl, lat_ttl, - index_name.c_str()); + PDLOG(INFO, "update table tid %u pid %u ttl meta to abs_ttl %lu lat_ttl %lu index_name %s", tid, pid, abs_ttl, + lat_ttl, index_name.c_str()); response->set_code(::openmldb::base::ReturnCode::kOk); response->set_msg("ok"); } @@ -464,7 +466,7 @@ int32_t TabletImpl::GetIndex(const ::openmldb::api::GetRequest* request, const : const std::map>& vers_schema, CombineIterator* it, std::string* value, uint64_t* ts) { if (it == nullptr || value == nullptr || ts == nullptr) { - PDLOG(WARNING, "invalid args"); + LOG(WARNING) << "invalid args"; return -1; } uint64_t st = request->ts(); @@ -472,10 +474,12 @@ int32_t TabletImpl::GetIndex(const ::openmldb::api::GetRequest* request, const : uint64_t et = request->et(); const openmldb::api::GetType& et_type = request->et_type(); if (st_type == ::openmldb::api::kSubKeyEq && et_type == ::openmldb::api::kSubKeyEq && st != et) { + LOG(WARNING) << "invalid args for st " << st << " not equal to et " << et; return -1; } ::openmldb::api::GetType real_et_type = et_type; ::openmldb::storage::TTLType ttl_type = it->GetTTLType(); + uint64_t expire_time = it->GetExpireTime(); if (ttl_type == ::openmldb::storage::TTLType::kAbsoluteTime || ttl_type == ::openmldb::storage::TTLType::kAbsOrLat) { @@ -484,22 +488,28 @@ int32_t TabletImpl::GetIndex(const ::openmldb::api::GetRequest* request, const : if (et < expire_time && et_type == ::openmldb::api::GetType::kSubKeyGt) { real_et_type = ::openmldb::api::GetType::kSubKeyGe; } + DLOG(INFO) << "expire time " << expire_time << ", after adjust: et " << et << " real_et_type " << real_et_type; + bool enable_project = false; openmldb::codec::RowProject row_project(vers_schema, request->projection()); if (request->projection().size() > 0) { bool ok = row_project.Init(); if (!ok) { - PDLOG(WARNING, "invalid project list"); + LOG(WARNING) << "invalid project list"; return -1; } enable_project = true; } + // it's ok when st < et(after adjust), we should return 0 rows cuz no valid data for this range + // but we have set the code -1, don't change the return code, accept it. if (st > 0 && st < et) { - DEBUGLOG("invalid args for st %lu less than et %lu or expire time %lu", st, et, expire_time); + DLOG(WARNING) << "invalid args for st " << st << " less than et " << et; return -1; } + DLOG(INFO) << "it valid " << it->Valid(); if (it->Valid()) { *ts = it->GetTs(); + DLOG(INFO) << "check " << *ts << " " << st << " " << et << " " << st_type << " " << real_et_type; if (st_type == ::openmldb::api::GetType::kSubKeyEq && st > 0 && *ts != st) { return 1; } @@ -513,7 +523,7 @@ int32_t TabletImpl::GetIndex(const ::openmldb::api::GetRequest* request, const : const int8_t* row_ptr = reinterpret_cast(data.data()); bool ok = row_project.Project(row_ptr, data.size(), &ptr, &size); if (!ok) { - PDLOG(WARNING, "fail to make a projection"); + LOG(WARNING) << "fail to make a projection"; return -4; } value->assign(reinterpret_cast(ptr), size); @@ -543,7 +553,7 @@ int32_t TabletImpl::GetIndex(const ::openmldb::api::GetRequest* request, const : break; default: - PDLOG(WARNING, "invalid et type %s", ::openmldb::api::GetType_Name(et_type).c_str()); + LOG(WARNING) << "invalid et type " << ::openmldb::api::GetType_Name(et_type).c_str(); return -2; } if (jump_out) { @@ -556,7 +566,7 @@ int32_t TabletImpl::GetIndex(const ::openmldb::api::GetRequest* request, const : const int8_t* row_ptr = reinterpret_cast(data.data()); bool ok = row_project.Project(row_ptr, data.size(), &ptr, &size); if (!ok) { - PDLOG(WARNING, "fail to make a projection"); + LOG(WARNING) << "fail to make a projection"; return -4; } value->assign(reinterpret_cast(ptr), size); @@ -670,6 +680,7 @@ void TabletImpl::Get(RpcController* controller, const ::openmldb::api::GetReques int32_t code = GetIndex(request, *table_meta, vers_schema, &combine_it, value, &ts); response->set_ts(ts); response->set_code(code); + DLOG(WARNING) << "get key " << request->key() << " ts " << ts << " code " << code; uint64_t end_time = ::baidu::common::timer::get_micros(); if (start_time + FLAGS_query_slow_log_threshold < end_time) { std::string index_name; @@ -749,10 +760,43 @@ void TabletImpl::Put(RpcController* controller, const ::openmldb::api::PutReques response->set_msg("invalid dimension parameter"); return; } - DLOG(INFO) << "put data to tid " << tid << " pid " << pid << " with key " << request->dimensions(0).key(); - // 1. normal put: ok, invalid data - // 2. put if absent: ok, exists but ignore, invalid data - st = table->Put(entry.ts(), entry.value(), entry.dimensions(), request->put_if_absent()); + if (request->check_exists()) { + // table should be iot + auto iot = std::dynamic_pointer_cast(table); + if (!iot) { + response->set_code(::openmldb::base::ReturnCode::kTableMetaIsIllegal); + response->set_msg("table type is not iot"); + return; + } + DLOG(INFO) << "check data exists in tid " << tid << " pid " << pid << " with key " + << entry.dimensions(0).key() << " ts " << entry.ts(); + // ts is ts value when check exists + st = iot->CheckDataExists(entry.ts(), entry.dimensions()); + } else { + DLOG(INFO) << "put data to tid " << tid << " pid " << pid << " with key " << request->dimensions(0).key(); + // 1. normal put: ok, invalid data + // 2. put if absent: ok, exists but ignore, invalid data + st = table->Put(entry.ts(), entry.value(), entry.dimensions(), request->put_if_absent()); + } + } + // when check exists, we won't do log + if (request->check_exists()) { + DLOG_ASSERT(request->check_exists()) << "check_exists should be true"; + DLOG_ASSERT(!request->put_if_absent()) << "put_if_absent should be false"; + DLOG(INFO) << "result " << st.ToString(); + // return ok if exists + if (absl::IsAlreadyExists(st)) { + response->set_code(base::ReturnCode::kOk); + response->set_msg("exists"); + } else if (absl::IsNotFound(st)) { + response->set_code(base::ReturnCode::kKeyNotFound); + response->set_msg(st.ToString()); + } else { + // other errors + response->set_code(base::ReturnCode::kError); + response->set_msg(st.ToString()); + } + return; } if (!st.ok()) { @@ -1332,7 +1376,7 @@ void TabletImpl::Traverse(RpcController* controller, const ::openmldb::api::Trav } base::Status TabletImpl::CheckTable(uint32_t tid, uint32_t pid, bool check_leader, - const std::shared_ptr& table) { + const std::shared_ptr
& table) { if (!table) { PDLOG(WARNING, "table does not exist. tid %u, pid %u", tid, pid); return {base::ReturnCode::kTableIsNotExist, "table does not exist"}; @@ -1349,15 +1393,16 @@ base::Status TabletImpl::CheckTable(uint32_t tid, uint32_t pid, bool check_leade } base::Status TabletImpl::DeleteAllIndex(const std::shared_ptr& table, - const std::shared_ptr& cur_index, - const std::string& key, - std::optional start_ts, - std::optional end_ts, + const std::shared_ptr& cur_index, const std::string& key, + std::optional start_ts, std::optional end_ts, bool skip_cur_ts_col, const std::shared_ptr& client_manager, uint32_t partition_num) { storage::Ticket ticket; std::unique_ptr iter(table->NewIterator(cur_index->GetId(), key, ticket)); + DLOG(INFO) << "delete all index in " << table->GetId() << "." << cur_index->GetId() << ", key " << key + << ", start_ts " << (start_ts.has_value() ? std::to_string(start_ts.value()) : "-1") << ", end_ts " + << (end_ts.has_value() ? std::to_string(end_ts.value()) : "-1"); if (start_ts.has_value()) { iter->Seek(start_ts.value()); } else { @@ -1365,7 +1410,7 @@ base::Status TabletImpl::DeleteAllIndex(const std::shared_ptr& t } auto indexs = table->GetAllIndex(); while (iter->Valid()) { - DEBUGLOG("cur ts %lu cur index pos %u", iter->GetKey(), cur_index->GetId()); + DLOG(INFO) << "cur ts " << iter->GetKey(); if (end_ts.has_value() && iter->GetKey() <= end_ts.value()) { break; } @@ -1449,14 +1494,13 @@ base::Status TabletImpl::DeleteAllIndex(const std::shared_ptr& t if (client == nullptr) { return {base::ReturnCode::kDeleteFailed, absl::StrCat("client is nullptr, pid ", cur_pid)}; } - DEBUGLOG("delete idx %u pid %u pk %s ts %lu end_ts %lu", - option.idx.value(), cur_pid, option.key.c_str(), option.start_ts.value(), option.end_ts.value()); std::string msg; // do not delete other index data option.enable_decode_value = false; + DLOG(INFO) << "pid " << cur_pid << " delete key " << option.DebugString(); if (auto status = client->Delete(table->GetId(), cur_pid, option, FLAGS_request_timeout_ms); !status.OK()) { return {base::ReturnCode::kDeleteFailed, - absl::StrCat("delete failed. key ", option.key, " pid ", cur_pid, " msg: ", status.GetMsg())}; + absl::StrCat("delete failed. key ", option.key, " pid ", cur_pid, " msg: ", status.GetMsg())}; } } @@ -1477,7 +1521,7 @@ void TabletImpl::Delete(RpcController* controller, const ::openmldb::api::Delete } auto table = GetTable(tid, pid); if (auto status = CheckTable(tid, pid, true, table); !status.OK()) { - SetResponseStatus(status, response); + SET_RESP_AND_WARN(response, status.GetCode(), status.GetMsg()); return; } auto replicator = GetReplicator(tid, pid); @@ -1546,13 +1590,14 @@ void TabletImpl::Delete(RpcController* controller, const ::openmldb::api::Delete } } } + DLOG(INFO) << tid << "." << pid << ": delete request " << request->ShortDebugString() << ", delete others " + << delete_others; auto aggrs = GetAggregators(tid, pid); if (!aggrs && !delete_others) { if (table->Delete(entry)) { - DEBUGLOG("delete ok. tid %u, pid %u, key %s", tid, pid, request->key().c_str()); + DLOG(INFO) << tid << "." << pid << ": delete ok, key " << request->key(); } else { - response->set_code(::openmldb::base::ReturnCode::kDeleteFailed); - response->set_msg("delete failed"); + SET_RESP_AND_WARN(response, base::ReturnCode::kDeleteFailed, "delete failed"); return; } } else { @@ -1584,36 +1629,37 @@ void TabletImpl::Delete(RpcController* controller, const ::openmldb::api::Delete } uint32_t pid_num = tablet_table_handler->GetPartitionNum(); auto table_client_manager = tablet_table_handler->GetTableClientManager(); + DLOG(INFO) << "delete from table & aggr " << tid << "." << pid; if (entry.dimensions_size() > 0) { const auto& dimension = entry.dimensions(0); uint32_t idx = dimension.idx(); auto index_def = table->GetIndex(idx); const auto& key = dimension.key(); if (delete_others) { - auto status = DeleteAllIndex(table, index_def, key, start_ts, end_ts, false, - table_client_manager, pid_num); + auto status = + DeleteAllIndex(table, index_def, key, start_ts, end_ts, false, table_client_manager, pid_num); if (!status.OK()) { SET_RESP_AND_WARN(response, status.GetCode(), status.GetMsg()); return; } } if (!table->Delete(idx, key, start_ts, end_ts)) { - response->set_code(::openmldb::base::ReturnCode::kDeleteFailed); - response->set_msg("delete failed"); + SET_RESP_AND_WARN(response, base::ReturnCode::kDeleteFailed, "delete from partition failed"); return; } auto aggr = get_aggregator(aggrs, idx); if (aggr) { if (!aggr->Delete(key, start_ts, end_ts)) { - PDLOG(WARNING, "delete from aggr failed. base table: tid[%u] pid[%u] index[%u] key[%s]. " - "aggr table: tid[%u]", + PDLOG(WARNING, + "delete from aggr failed. base table: tid[%u] pid[%u] index[%u] key[%s]. " + "aggr table: tid[%u]", tid, pid, idx, key.c_str(), aggr->GetAggrTid()); response->set_code(::openmldb::base::ReturnCode::kDeleteFailed); response->set_msg("delete from associated pre-aggr table failed"); return; } } - DEBUGLOG("delete ok. tid %u, pid %u, key %s", tid, pid, key.c_str()); + DLOG(INFO) << tid << "." << pid << ": table & agg delete ok, key " << key; } else { bool is_first_hit_index = true; for (const auto& index_def : table->GetAllIndex()) { @@ -1629,8 +1675,8 @@ void TabletImpl::Delete(RpcController* controller, const ::openmldb::api::Delete while (iter->Valid()) { auto pk = iter->GetPK(); if (delete_others && is_first_hit_index) { - auto status = DeleteAllIndex(table, index_def, pk, start_ts, end_ts, true, - table_client_manager, pid_num); + auto status = + DeleteAllIndex(table, index_def, pk, start_ts, end_ts, true, table_client_manager, pid_num); if (!status.OK()) { SET_RESP_AND_WARN(response, status.GetCode(), status.GetMsg()); return; @@ -1638,15 +1684,16 @@ void TabletImpl::Delete(RpcController* controller, const ::openmldb::api::Delete } iter->NextPK(); if (!table->Delete(idx, pk, start_ts, end_ts)) { - response->set_code(::openmldb::base::ReturnCode::kDeleteFailed); - response->set_msg("delete failed"); + SET_RESP_AND_WARN(response, base::ReturnCode::kDeleteFailed, "delete failed"); return; } auto aggr = get_aggregator(aggrs, idx); if (aggr) { if (!aggr->Delete(pk, start_ts, end_ts)) { - PDLOG(WARNING, "delete from aggr failed. base table: tid[%u] pid[%u] index[%u] key[%s]. " - "aggr table: tid[%u]", tid, pid, idx, pk.c_str(), aggr->GetAggrTid()); + PDLOG(WARNING, + "delete from aggr failed. base table: tid[%u] pid[%u] index[%u] key[%s]. " + "aggr table: tid[%u]", + tid, pid, idx, pk.c_str(), aggr->GetAggrTid()); response->set_code(::openmldb::base::ReturnCode::kDeleteFailed); response->set_msg("delete from associated pre-aggr table failed"); return; @@ -1655,11 +1702,11 @@ void TabletImpl::Delete(RpcController* controller, const ::openmldb::api::Delete } is_first_hit_index = false; } + DLOG(INFO) << tid << "." << pid << ": table & agg delete ok when no entry dim."; } } response->set_code(::openmldb::base::ReturnCode::kOk); response->set_msg("ok"); - replicator->AppendEntry(entry); if (FLAGS_binlog_notify_on_put) { replicator->Notify(); @@ -2499,7 +2546,7 @@ void TabletImpl::SetExpire(RpcController* controller, const ::openmldb::api::Set } void TabletImpl::MakeSnapshotInternal(uint32_t tid, uint32_t pid, uint64_t end_offset, - std::shared_ptr<::openmldb::api::TaskInfo> task, bool is_force) { + std::shared_ptr<::openmldb::api::TaskInfo> task, bool is_force) { PDLOG(INFO, "MakeSnapshotInternal begin, tid[%u] pid[%u]", tid, pid); std::shared_ptr
table; std::shared_ptr snapshot; @@ -3051,8 +3098,8 @@ void TabletImpl::LoadTable(RpcController* controller, const ::openmldb::api::Loa std::string db_path = GetDBPath(root_path, tid, pid); if (!::openmldb::base::IsExists(db_path)) { - PDLOG(WARNING, "table db path does not exist, but still load. tid %u, pid %u, path %s", - tid, pid, db_path.c_str()); + PDLOG(WARNING, "table db path does not exist, but still load. tid %u, pid %u, path %s", tid, pid, + db_path.c_str()); } std::shared_ptr
table = GetTable(tid, pid); @@ -3475,7 +3522,7 @@ void TabletImpl::CreateTable(RpcController* controller, const ::openmldb::api::C } void TabletImpl::TruncateTable(RpcController* controller, const ::openmldb::api::TruncateTableRequest* request, - ::openmldb::api::TruncateTableResponse* response, Closure* done) { + ::openmldb::api::TruncateTableResponse* response, Closure* done) { brpc::ClosureGuard done_guard(done); uint32_t tid = request->tid(); uint32_t pid = request->pid(); @@ -3488,8 +3535,8 @@ void TabletImpl::TruncateTable(RpcController* controller, const ::openmldb::api: for (const auto& aggr : *aggrs) { auto agg_table = aggr->GetAggTable(); if (!agg_table) { - PDLOG(WARNING, "aggrate table does not exist. tid[%u] pid[%u] index pos[%u]", - tid, pid, aggr->GetIndexPos()); + PDLOG(WARNING, "aggrate table does not exist. tid[%u] pid[%u] index pos[%u]", tid, pid, + aggr->GetIndexPos()); response->set_code(::openmldb::base::ReturnCode::kTableIsNotExist); response->set_msg("aggrate table does not exist"); return; @@ -3497,13 +3544,13 @@ void TabletImpl::TruncateTable(RpcController* controller, const ::openmldb::api: uint32_t agg_tid = agg_table->GetId(); uint32_t agg_pid = agg_table->GetPid(); if (auto status = TruncateTableInternal(agg_tid, agg_pid); !status.OK()) { - PDLOG(WARNING, "truncate aggrate table failed. tid[%u] pid[%u] index pos[%u]", - agg_tid, agg_pid, aggr->GetIndexPos()); + PDLOG(WARNING, "truncate aggrate table failed. tid[%u] pid[%u] index pos[%u]", agg_tid, agg_pid, + aggr->GetIndexPos()); base::SetResponseStatus(status, response); return; } - PDLOG(INFO, "truncate aggrate table success. tid[%u] pid[%u] index pos[%u]", - agg_tid, agg_pid, aggr->GetIndexPos()); + PDLOG(INFO, "truncate aggrate table success. tid[%u] pid[%u] index pos[%u]", agg_tid, agg_pid, + aggr->GetIndexPos()); } } response->set_code(::openmldb::base::ReturnCode::kOk); @@ -3556,8 +3603,8 @@ base::Status TabletImpl::TruncateTableInternal(uint32_t tid, uint32_t pid) { if (catalog_->AddTable(*table_meta, new_table)) { LOG(INFO) << "add table " << table_meta->name() << " to catalog with db " << table_meta->db(); } else { - LOG(WARNING) << "fail to add table " << table_meta->name() - << " to catalog with db " << table_meta->db(); + LOG(WARNING) << "fail to add table " << table_meta->name() << " to catalog with db " + << table_meta->db(); return {::openmldb::base::ReturnCode::kCatalogUpdateFailed, "fail to update catalog"}; } } @@ -3595,7 +3642,7 @@ void TabletImpl::ExecuteGc(RpcController* controller, const ::openmldb::api::Exe gc_pool_.AddTask(boost::bind(&TabletImpl::GcTable, this, tid, pid, true)); response->set_code(::openmldb::base::ReturnCode::kOk); response->set_msg("ok"); - PDLOG(INFO, "ExecuteGc. tid %u pid %u", tid, pid); + PDLOG(INFO, "ExecuteGc add task. tid %u pid %u", tid, pid); } void TabletImpl::GetTableFollower(RpcController* controller, const ::openmldb::api::GetTableFollowerRequest* request, @@ -3961,6 +4008,25 @@ int TabletImpl::UpdateTableMeta(const std::string& path, ::openmldb::api::TableM return UpdateTableMeta(path, table_meta, false); } +bool IsIOT(const ::openmldb::api::TableMeta* table_meta) { + auto cks = table_meta->column_key(); + if (cks.empty()) { + LOG(WARNING) << "no index in meta"; + return false; + } + if (cks[0].has_type() && cks[0].type() == common::IndexType::kClustered) { + // check other indexes + for (int i = 1; i < cks.size(); i++) { + if (cks[i].has_type() && cks[i].type() == common::IndexType::kClustered) { + LOG(WARNING) << "should be only one clustered index"; + return false; + } + } + return true; + } + return false; +} + int TabletImpl::CreateTableInternal(const ::openmldb::api::TableMeta* table_meta, std::string& msg) { uint32_t tid = table_meta->tid(); uint32_t pid = table_meta->pid(); @@ -3990,7 +4056,12 @@ int TabletImpl::CreateTableInternal(const ::openmldb::api::TableMeta* table_meta } std::string table_db_path = GetDBPath(db_root_path, tid, pid); if (table_meta->storage_mode() == openmldb::common::kMemory) { - table = std::make_shared(*table_meta); + if (IsIOT(table_meta)) { + LOG(INFO) << "create iot table " << tid << "." << pid; + table = std::make_shared(*table_meta, catalog_); + } else { + table = std::make_shared(*table_meta); + } } else { table = std::make_shared(*table_meta, table_db_path); } @@ -4228,7 +4299,16 @@ void TabletImpl::GcTable(uint32_t tid, uint32_t pid, bool execute_once) { std::shared_ptr
table = GetTable(tid, pid); if (table) { int32_t gc_interval = table->GetStorageMode() == common::kMemory ? FLAGS_gc_interval : FLAGS_disk_gc_interval; - table->SchedGc(); + if (auto iot = std::dynamic_pointer_cast(table); iot) { + sdk::SQLRouterOptions options; + options.zk_cluster = zk_cluster_; + options.zk_path = zk_path_; + auto router = sdk::NewClusterSQLRouter(options); + iot->SchedGCByDelete(router); // add a lock to avoid gc one table in the same time + } else { + table->SchedGc(); + } + if (!execute_once) { gc_pool_.DelayTask(gc_interval * 60 * 1000, boost::bind(&TabletImpl::GcTable, this, tid, pid, false)); } @@ -5164,12 +5244,12 @@ void TabletImpl::ExtractIndexData(RpcController* controller, const ::openmldb::a index_vec.push_back(cur_column_key); } if (IsClusterMode()) { - task_pool_.AddTask(boost::bind(&TabletImpl::ExtractIndexDataInternal, this, table, snapshot, - index_vec, request->partition_num(), request->offset(), request->dump_data(), + task_pool_.AddTask(boost::bind(&TabletImpl::ExtractIndexDataInternal, this, table, snapshot, index_vec, + request->partition_num(), request->offset(), request->dump_data(), task_ptr)); } else { - ExtractIndexDataInternal(table, snapshot, index_vec, request->partition_num(), request->offset(), - false, nullptr); + ExtractIndexDataInternal(table, snapshot, index_vec, request->partition_num(), request->offset(), false, + nullptr); } base::SetResponseOK(response); return; @@ -5781,9 +5861,10 @@ bool TabletImpl::CreateAggregatorInternal(const ::openmldb::api::CreateAggregato PDLOG(WARNING, "base table does not exist. tid %u, pid %u", base_meta.tid(), base_meta.pid()); return false; } - auto aggregator = ::openmldb::storage::CreateAggregator(base_meta, base_table, - *aggr_table->GetTableMeta(), aggr_table, aggr_replicator, request->index_pos(), request->aggr_col(), - request->aggr_func(), request->order_by_col(), request->bucket_size(), request->filter_col()); + auto aggregator = ::openmldb::storage::CreateAggregator( + base_meta, base_table, *aggr_table->GetTableMeta(), aggr_table, aggr_replicator, request->index_pos(), + request->aggr_col(), request->aggr_func(), request->order_by_col(), request->bucket_size(), + request->filter_col()); if (!aggregator) { msg.assign("create aggregator failed"); return false; @@ -5856,10 +5937,11 @@ TabletImpl::GetSystemTableIterator() { } auto schema = std::make_unique<::openmldb::codec::Schema>(); - + if (openmldb::schema::SchemaAdapter::ConvertSchema(*tablet_table_handler->GetSchema(), schema.get())) { std::map> tablet_clients = {{0, client}}; - return {{std::make_unique(tablet_table_handler->GetTid(), nullptr, tablet_clients), + return { + {std::make_unique(tablet_table_handler->GetTid(), nullptr, tablet_clients), std::move(schema)}}; } else { return std::nullopt; diff --git a/tools/tool.py b/tools/tool.py index b95a6246fc5..4f92f2a4098 100644 --- a/tools/tool.py +++ b/tools/tool.py @@ -219,7 +219,7 @@ def GetTableInfoHTTP(self, database, table_name = ''): ns = self.endpoint_map[self.ns_leader] conn = httplib.HTTPConnection(ns) param = {"db": database, "name": table_name} - headers = {"Content-type": "application/json"} + headers = {"Content-type": "application/json", "Authorization": "foo"} conn.request("POST", "/NameServer/ShowTable", json.dumps(param), headers) response = conn.getresponse() if response.status != 200: @@ -233,13 +233,15 @@ def GetTableInfoHTTP(self, database, table_name = ''): def ParseTableInfo(self, table_info): result = {} + if not table_info: + return Status(-1, "table info is empty"), None for record in table_info: is_leader = True if record[4] == "leader" else False is_alive = True if record[5] == "yes" else False partition = Partition(record[0], record[1], record[2], record[3], is_leader, is_alive, record[6]) result.setdefault(record[2], []) result[record[2]].append(partition) - return result + return Status(), result def ParseTableInfoJson(self, table_info): """parse one table's partition info from json""" @@ -260,8 +262,7 @@ def GetTablePartition(self, database, table_name): status, result = self.GetTableInfo(database, table_name) if not status.OK: return status, None - partition_dict = self.ParseTableInfo(result) - return Status(), partition_dict + return self.ParseTableInfo(result) def GetAllTable(self, database): status, result = self.GetTableInfo(database) @@ -323,7 +324,7 @@ def LoadTableHTTP(self, endpoint, name, tid, pid, storage): # ttl won't effect, set to 0, and seg cnt is always 8 # and no matter if leader param = {"table_meta": {"name": name, "tid": tid, "pid": pid, "ttl":0, "seg_cnt":8, "storage_mode": storage}} - headers = {"Content-type": "application/json"} + headers = {"Content-type": "application/json", "Authorization": "foo"} conn.request("POST", "/TabletServer/LoadTable", json.dumps(param), headers) response = conn.getresponse() if response.status != 200: From 92bacabf4be130a7703e07b1f57aaeb4e7b809b5 Mon Sep 17 00:00:00 2001 From: Huang Wei Date: Thu, 30 May 2024 18:10:26 +0800 Subject: [PATCH 02/17] fix --- src/nameserver/name_server_impl.cc | 1 - src/storage/index_organized_table.cc | 38 ++++++++++++++++++---------- src/storage/iot_segment.cc | 3 ++- src/storage/iot_segment.h | 14 +++------- src/storage/node_cache.cc | 2 +- src/storage/segment.cc | 10 ++++++-- src/storage/segment.h | 4 +-- src/storage/table_iterator_test.cc | 6 +++-- 8 files changed, 46 insertions(+), 32 deletions(-) diff --git a/src/nameserver/name_server_impl.cc b/src/nameserver/name_server_impl.cc index 52acb8137d7..c9f5dc36cf0 100644 --- a/src/nameserver/name_server_impl.cc +++ b/src/nameserver/name_server_impl.cc @@ -1431,7 +1431,6 @@ base::Status NameServerImpl::DeleteUserRecord(const std::string& host, const std for (int meta_idx = 0; meta_idx < table_partition.partition_meta_size(); meta_idx++) { if (table_partition.partition_meta(meta_idx).is_leader() && table_partition.partition_meta(meta_idx).is_alive()) { - uint64_t cur_ts = ::baidu::common::timer::get_micros() / 1000; std::string endpoint = table_partition.partition_meta(meta_idx).endpoint(); auto table_ptr = GetTablet(endpoint); if (!table_ptr->client_->Delete(tid, 0, host + "|" + user, "index", msg)) { diff --git a/src/storage/index_organized_table.cc b/src/storage/index_organized_table.cc index 97b932cc5c2..d198cdf1b56 100644 --- a/src/storage/index_organized_table.cc +++ b/src/storage/index_organized_table.cc @@ -305,7 +305,8 @@ absl::Status IndexOrganizedTable::Put(uint64_t time, const std::string& value, c if (ts_col) { int64_t ts = 0; if (ts_col->IsAutoGenTs()) { - // clustered index still use current time to ttl and delete iter, we'll check time series size if ts is auto gen + // clustered index still use current time to ttl and delete iter, we'll check time series size if ts + // is auto gen ts = time; } else if (decoder->GetInteger(data, ts_col->GetId(), ts_col->GetType(), &ts) != 0) { return absl::InvalidArgumentError(absl::StrCat(id_, ".", pid_, ": get ts failed")); @@ -362,7 +363,8 @@ absl::Status IndexOrganizedTable::Put(uint64_t time, const std::string& value, c } int64_t ts = 0; if (ts_col->IsAutoGenTs()) { - // clustered index still use current time to ttl and delete iter, we'll check time series size if ts is auto gen + // clustered index still use current time to ttl and delete iter, we'll check time series size if ts is + // auto gen ts = time; } else if (decoder->GetInteger(data, ts_col->GetId(), ts_col->GetType(), &ts) != 0) { return absl::InvalidArgumentError(absl::StrCat(id_, ".", pid_, ": get ts failed")); @@ -401,7 +403,15 @@ absl::Status IndexOrganizedTable::Put(uint64_t time, const std::string& value, c return absl::AlreadyExistsError("data exists"); // let caller know exists } } - record_byte_size_.fetch_add(GetRecordSize(value.length())); + // record size only has 1 copy, but if we delete sblock + // TODO(hw): test for cal + if (real_ref_cnt > 0) { + record_byte_size_.fetch_add(GetRecordSize(cblock->size)); + } + if (secondary_ref_cnt > 0) { + record_byte_size_.fetch_add(GetRecordSize(sblock->size)); + } + return absl::OkStatus(); } @@ -578,7 +588,6 @@ absl::Status IndexOrganizedTable::ClusteredIndexGCByDelete(const std::shared_ptr // delete entries by sql if (info.Size() > 0) { LOG(INFO) << "delete cidx " << info.Size() << " entries by sql"; - auto meta = GetTableMeta(); auto cols = meta->column_desc(); // copy codec::RowView row_view(cols); @@ -594,10 +603,10 @@ absl::Status IndexOrganizedTable::ClusteredIndexGCByDelete(const std::shared_ptr MakeDeleteSQL(GetDB(), GetName(), meta->column_key(0), (int8_t*)values->data, ts, row_view, hint); // TODO(hw): if delete failed, we can't revert. And if sidx skeys+sts doesn't change, no need to delete and // then insert - DLOG(INFO) << "delete sql " << sql; if (sql.empty()) { return absl::InternalError("make delete sql failed"); } + // delete will move node to node cache, it's alive, so GCEntryInfo can unref it hybridse::sdk::Status status; router->ExecuteSQL(sql, &status); if (!status.IsOK()) { @@ -623,10 +632,7 @@ void IndexOrganizedTable::SchedGCByDelete(const std::shared_ptr& if (!st.ok()) { LOG(WARNING) << "cidx gc by delete error: " << st.ToString(); } - // TODO(hw): don't gc sidx or covering index? - // may core on GcFreeList - // but record cnt in segment and tablet status can't change if no gc or free - // for all index, only do free? don't do gc TODO how to check the record cnt? + // TODO how to check the record byte size? uint64_t gc_idx_cnt = 0; uint64_t gc_record_byte_size = 0; auto inner_indexs = table_index_.GetAllInnerIndex(); @@ -663,6 +669,9 @@ void IndexOrganizedTable::SchedGCByDelete(const std::shared_ptr& } gc_idx_cnt += statistics_info.GetTotalCnt(); gc_record_byte_size += statistics_info.record_byte_size; + LOG(INFO) << "release segment[" << i << "][" << k << "] done, gc record cnt " + << statistics_info.GetTotalCnt() << ", gc record byte size " + << statistics_info.record_byte_size; } } } @@ -689,15 +698,18 @@ void IndexOrganizedTable::SchedGCByDelete(const std::shared_ptr& gc_idx_cnt += statistics_info.GetTotalCnt(); gc_record_byte_size += statistics_info.record_byte_size; seg_gc_time = ::baidu::common::timer::get_micros() / 1000 - seg_gc_time; - PDLOG(INFO, "gc segment[%u][%u] done consumed %lu for table %s tid %u pid %u", i, j, seg_gc_time, - name_.c_str(), id_, pid_); + LOG(INFO) << "gc segment[" << i << "][" << j << "] done, consumed time " << seg_gc_time << "ms for table " + << name_ << "[" << id_ << "." << pid_ << "], gc record cnt " << statistics_info.GetTotalCnt() + << ", gc record byte size " << statistics_info.record_byte_size; } } consumed = ::baidu::common::timer::get_micros() - consumed; + LOG(INFO) << "record byte size before gc: " << record_byte_size_.load() + << ", gc record byte size: " << gc_record_byte_size << ", gc idx cnt: " << gc_idx_cnt + << ", gc consumed: " << consumed / 1000 << " ms"; record_byte_size_.fetch_sub(gc_record_byte_size, std::memory_order_relaxed); UpdateTTL(); - LOG(INFO) << "iot table " << name_ << "[" << id_ << "." << pid_ << "] gc and update ttl done: " << consumed / 1000 - << " ms, total gc cnt " << gc_idx_cnt; + LOG(INFO) << "update ttl done"; } bool IndexOrganizedTable::AddIndexToTable(const std::shared_ptr& index_def) { diff --git a/src/storage/iot_segment.cc b/src/storage/iot_segment.cc index 13cf1e55bef..6e147da329b 100644 --- a/src/storage/iot_segment.cc +++ b/src/storage/iot_segment.cc @@ -92,6 +92,7 @@ bool IOTSegment::PutUnlock(const Slice& key, uint64_t time, DataBlock* row, bool reinterpret_cast(entry)->count_.fetch_add(1, std::memory_order_relaxed); byte_size += GetRecordTsIdxSize(height); idx_byte_size_.fetch_add(byte_size, std::memory_order_relaxed); + DLOG(INFO) << "idx_byte_size_ " << idx_byte_size_ << " after add " << byte_size; return true; } @@ -160,6 +161,7 @@ bool IOTSegment::Put(const Slice& key, const std::map& ts_map entry->count_.fetch_add(1, std::memory_order_relaxed); byte_size += GetRecordTsIdxSize(height); idx_byte_size_.fetch_add(byte_size, std::memory_order_relaxed); + DLOG(INFO) << "idx_byte_size_ " << idx_byte_size_; idx_cnt_vec_[pos->second]->fetch_add(1, std::memory_order_relaxed); } return true; @@ -382,7 +384,6 @@ void IOTSegment::GrepGCAllType(const std::map& ttl_st_map, GCEn DLOG(DFATAL) << "entry is null, impossible"; continue; } - entry->GetCount(); // for test switch (kv.second.ttl_type) { case ::openmldb::storage::TTLType::kAbsoluteTime: { GrepGC4Abs(entry, key, kv.second, cur_time, ttl_offset_, gc_entry_info); diff --git a/src/storage/iot_segment.h b/src/storage/iot_segment.h index 01b63ca7f84..b610241f240 100644 --- a/src/storage/iot_segment.h +++ b/src/storage/iot_segment.h @@ -234,7 +234,9 @@ class GCEntryInfo { ~GCEntryInfo() { for (auto& entry : entries_) { entry.second->dim_cnt_down--; - // TODO delete? + // data block should be moved to node_cache then delete + // I don't want delete block here + LOG_IF(ERROR, entry.second->dim_cnt_down == 0) << "dim_cnt_down=0 but no delete"; } } void AddEntry(const Slice& keys, uint64_t ts, storage::DataBlock* ptr) { @@ -282,15 +284,7 @@ class IOTSegment : public Segment { void GrepGCEntry(const std::map& ttl_st_map, GCEntryInfo* gc_entry_info); - MemTableIterator* NewIterator(const Slice& key, Ticket& ticket, type::CompressType compress_type) { // NOLINT - DLOG_ASSERT(false) << "unsupported, let iot table create it"; - return nullptr; - } - MemTableIterator* NewIterator(const Slice& key, uint32_t idx, Ticket& ticket, // NOLINT - type::CompressType compress_type) { - DLOG_ASSERT(false) << "unsupported, let iot table create it"; - return nullptr; - } + // if segment is not secondary idx, use normal NewIterator in Segment private: void GrepGCAllType(const std::map& ttl_st_map, GCEntryInfo* gc_entry_info); diff --git a/src/storage/node_cache.cc b/src/storage/node_cache.cc index 0f1286494e8..53f4d2c7cd9 100644 --- a/src/storage/node_cache.cc +++ b/src/storage/node_cache.cc @@ -113,7 +113,7 @@ void NodeCache::FreeNode(uint32_t idx, base::Node* node, S } gc_info->IncrIdxCnt(idx); gc_info->idx_byte_size += GetRecordTsIdxSize(node->Height()); - DLOG(INFO) << "delete key " << node->GetKey() << " with height " << node->Height(); + DLOG(INFO) << "delete key " << node->GetKey() << " with height " << (unsigned int)node->Height(); if (node->GetValue()->dim_cnt_down > 1) { node->GetValue()->dim_cnt_down--; } else { diff --git a/src/storage/segment.cc b/src/storage/segment.cc index 87de216a1fb..cbe3122b059 100644 --- a/src/storage/segment.cc +++ b/src/storage/segment.cc @@ -175,6 +175,7 @@ bool Segment::PutUnlock(const Slice& key, uint64_t time, DataBlock* row, bool pu reinterpret_cast(entry)->count_.fetch_add(1, std::memory_order_relaxed); byte_size += GetRecordTsIdxSize(height); idx_byte_size_.fetch_add(byte_size, std::memory_order_relaxed); + DLOG(INFO) << "idx_byte_size_ " << idx_byte_size_ << " after add " << byte_size; return true; } @@ -250,6 +251,7 @@ bool Segment::Put(const Slice& key, const std::map& ts_map, D uint8_t height = entry->entries.Insert(kv.second, row); entry->count_.fetch_add(1, std::memory_order_relaxed); byte_size += GetRecordTsIdxSize(height); + DLOG(INFO) << "idx_byte_size_ " << idx_byte_size_ << " after add " << byte_size; idx_byte_size_.fetch_add(byte_size, std::memory_order_relaxed); idx_cnt_vec_[pos->second]->fetch_add(1, std::memory_order_relaxed); } @@ -369,6 +371,7 @@ void Segment::FreeList(uint32_t ts_idx, ::openmldb::base::NodeIncrIdxCnt(ts_idx); ::openmldb::base::Node* tmp = node; idx_byte_size_.fetch_sub(GetRecordTsIdxSize(tmp->Height())); + DLOG(INFO) << "idx_byte_size_ " << idx_byte_size_ << " after sub " << GetRecordTsIdxSize(tmp->Height()); node = node->GetNextNoBarrier(0); DEBUGLOG("delete key %lu with height %u", tmp->GetKey(), tmp->Height()); if (tmp->GetValue()->dim_cnt_down > 1) { @@ -393,7 +396,10 @@ void Segment::GcFreeList(StatisticsInfo* statistics_info) { for (size_t idx = 0; idx < idx_cnt_vec_.size(); idx++) { idx_cnt_vec_[idx]->fetch_sub(statistics_info->GetIdxCnt(idx) - old.GetIdxCnt(idx), std::memory_order_relaxed); } + idx_byte_size_.fetch_sub(statistics_info->idx_byte_size - old.idx_byte_size); + DLOG(INFO) << "idx_byte_size_ " << idx_byte_size_ << " after sub " + << statistics_info->idx_byte_size - old.idx_byte_size; } void Segment::ExecuteGc(const TTLSt& ttl_st, StatisticsInfo* statistics_info) { @@ -442,7 +448,7 @@ void Segment::ExecuteGc(const std::map& ttl_st_map, StatisticsI } if (ts_cnt_ <= 1) { if (clustered_ts_id.has_value() && ts_idx_map_.begin()->first == clustered_ts_id.value()) { - LOG(INFO) << "skip normal gc in cidx"; + DLOG(INFO) << "skip normal gc in cidx"; return; } ExecuteGc(ttl_st_map.begin()->second, statistics_info); @@ -516,7 +522,7 @@ void Segment::GcAllType(const std::map& ttl_st_map, StatisticsI continue; } if (clustered_ts_id.has_value() && kv.first == clustered_ts_id.value()) { - LOG(INFO) << "skip normal gc in cidx"; + DLOG(INFO) << "skip normal gc in cidx"; continue; } KeyEntry* entry = entry_arr[pos->second]; diff --git a/src/storage/segment.h b/src/storage/segment.h index daaf25fe2f6..01a76374889 100644 --- a/src/storage/segment.h +++ b/src/storage/segment.h @@ -99,8 +99,8 @@ class Segment { void GcAllType(const std::map& ttl_st_map, StatisticsInfo* statistics_info, std::optional clustered_ts_id = std::nullopt); - virtual MemTableIterator* NewIterator(const Slice& key, Ticket& ticket, type::CompressType compress_type); // NOLINT - virtual MemTableIterator* NewIterator(const Slice& key, uint32_t idx, Ticket& ticket, // NOLINT + MemTableIterator* NewIterator(const Slice& key, Ticket& ticket, type::CompressType compress_type); // NOLINT + MemTableIterator* NewIterator(const Slice& key, uint32_t idx, Ticket& ticket, // NOLINT type::CompressType compress_type); uint64_t GetIdxCnt() const { return idx_cnt_vec_[0]->load(std::memory_order_relaxed); } diff --git a/src/storage/table_iterator_test.cc b/src/storage/table_iterator_test.cc index a880fc8151a..47847498e0d 100644 --- a/src/storage/table_iterator_test.cc +++ b/src/storage/table_iterator_test.cc @@ -152,7 +152,8 @@ TEST_P(TableIteratorTest, latest) { dim->set_key(key); std::string value; ASSERT_EQ(0, codec.EncodeRow(row, &value)); - table->Put(0, value, request.dimensions()); + auto st = table->Put(0, value, request.dimensions()); + ASSERT_TRUE(st.ok()) << st.ToString(); } } ::hybridse::vm::WindowIterator* it = table->NewWindowIterator(0); @@ -216,7 +217,8 @@ TEST_P(TableIteratorTest, smoketest2) { dim->set_key(key); std::string value; ASSERT_EQ(0, codec.EncodeRow(row, &value)); - table->Put(0, value, request.dimensions()); + auto st = table->Put(0, value, request.dimensions()); + ASSERT_TRUE(st.ok()) << st.ToString(); } } ::hybridse::vm::WindowIterator* it = table->NewWindowIterator(0); From 5153950dec158dbe9c0f3d9f2d3df6d3f86e7edf Mon Sep 17 00:00:00 2001 From: Huang Wei Date: Thu, 30 May 2024 18:33:03 +0800 Subject: [PATCH 03/17] fix --- src/sdk/sql_cluster_router.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sdk/sql_cluster_router.cc b/src/sdk/sql_cluster_router.cc index 51e9a9c2848..1870decd626 100644 --- a/src/sdk/sql_cluster_router.cc +++ b/src/sdk/sql_cluster_router.cc @@ -2422,7 +2422,7 @@ base::Status SQLClusterRouter::HandleSQLCreateTable(hybridse::node::CreatePlanNo hybridse::base::Status sql_status; bool is_cluster_mode = cluster_sdk_->IsClusterMode(); - // TODO(hw): support MemTable and IOT, just force use IOT for test + ::openmldb::sdk::NodeAdapter::TransformToTableDef(create_node, &table_info, default_replica_num, is_cluster_mode, &sql_status); if (sql_status.code != 0) { @@ -3044,7 +3044,7 @@ std::shared_ptr SQLClusterRouter::ExecuteSQL( case hybridse::node::kPlanTypeCreateUser: { auto create_node = dynamic_cast(node); UserInfo user_info; - ; + auto result = GetUser(create_node->Name(), &user_info); if (!result.ok()) { *status = {StatusCode::kCmdError, result.status().message()}; From 14f6f29e2fcfe0d1dc660ab985e17e6cebe47f9d Mon Sep 17 00:00:00 2001 From: Huang Wei Date: Fri, 31 May 2024 14:19:20 +0800 Subject: [PATCH 04/17] fix delete key entry --- src/storage/index_organized_table.cc | 8 ++--- src/storage/iot_segment.cc | 2 +- src/storage/key_entry.cc | 2 +- src/storage/node_cache.cc | 5 ++-- src/storage/record.h | 17 ++++++++--- src/storage/segment.cc | 45 ++++++++++++++++++++++++++-- 6 files changed, 64 insertions(+), 15 deletions(-) diff --git a/src/storage/index_organized_table.cc b/src/storage/index_organized_table.cc index d198cdf1b56..6e05c7c9ee0 100644 --- a/src/storage/index_organized_table.cc +++ b/src/storage/index_organized_table.cc @@ -403,7 +403,7 @@ absl::Status IndexOrganizedTable::Put(uint64_t time, const std::string& value, c return absl::AlreadyExistsError("data exists"); // let caller know exists } } - // record size only has 1 copy, but if we delete sblock + // cblock and sblock both will sub record_byte_size_ when delete, so add them all // TODO(hw): test for cal if (real_ref_cnt > 0) { record_byte_size_.fetch_add(GetRecordSize(cblock->size)); @@ -698,9 +698,9 @@ void IndexOrganizedTable::SchedGCByDelete(const std::shared_ptr& gc_idx_cnt += statistics_info.GetTotalCnt(); gc_record_byte_size += statistics_info.record_byte_size; seg_gc_time = ::baidu::common::timer::get_micros() / 1000 - seg_gc_time; - LOG(INFO) << "gc segment[" << i << "][" << j << "] done, consumed time " << seg_gc_time << "ms for table " - << name_ << "[" << id_ << "." << pid_ << "], gc record cnt " << statistics_info.GetTotalCnt() - << ", gc record byte size " << statistics_info.record_byte_size; + VLOG(1) << "gc segment[" << i << "][" << j << "] done, consumed time " << seg_gc_time << "ms for table " + << name_ << "[" << id_ << "." << pid_ << "], statistics_info: [" << statistics_info.DebugString() + << "]"; } } consumed = ::baidu::common::timer::get_micros() - consumed; diff --git a/src/storage/iot_segment.cc b/src/storage/iot_segment.cc index 6e147da329b..89a19e4838f 100644 --- a/src/storage/iot_segment.cc +++ b/src/storage/iot_segment.cc @@ -161,7 +161,7 @@ bool IOTSegment::Put(const Slice& key, const std::map& ts_map entry->count_.fetch_add(1, std::memory_order_relaxed); byte_size += GetRecordTsIdxSize(height); idx_byte_size_.fetch_add(byte_size, std::memory_order_relaxed); - DLOG(INFO) << "idx_byte_size_ " << idx_byte_size_; + DLOG(INFO) << "idx_byte_size_ " << idx_byte_size_ << " after add " << byte_size; idx_cnt_vec_[pos->second]->fetch_add(1, std::memory_order_relaxed); } return true; diff --git a/src/storage/key_entry.cc b/src/storage/key_entry.cc index 2713510f16c..8af33e8fc70 100644 --- a/src/storage/key_entry.cc +++ b/src/storage/key_entry.cc @@ -36,7 +36,7 @@ void KeyEntry::Release(uint32_t idx, StatisticsInfo* statistics_info) { if (node->GetValue()->dim_cnt_down > 1) { node->GetValue()->dim_cnt_down--; } else { - DEBUGLOG("delele data block for key %lu", node->GetKey()); + VLOG(1) << "delete data block for key " << node->GetKey(); statistics_info->record_byte_size += GetRecordSize(node->GetValue()->size); delete node->GetValue(); } diff --git a/src/storage/node_cache.cc b/src/storage/node_cache.cc index 53f4d2c7cd9..766dfe78be3 100644 --- a/src/storage/node_cache.cc +++ b/src/storage/node_cache.cc @@ -79,6 +79,7 @@ void NodeCache::Free(uint64_t version, StatisticsInfo* gc_info) { node1 = key_entry_node_list_.Split(version); node2 = value_node_list_.Split(version); } + DLOG(INFO) << "free version " << version << ", node1 " << node1 << ", node2 " << node2; while (node1) { auto entry_node_list = node1->GetValue(); for (auto& entry_node : *entry_node_list) { @@ -113,11 +114,11 @@ void NodeCache::FreeNode(uint32_t idx, base::Node* node, S } gc_info->IncrIdxCnt(idx); gc_info->idx_byte_size += GetRecordTsIdxSize(node->Height()); - DLOG(INFO) << "delete key " << node->GetKey() << " with height " << (unsigned int)node->Height(); + VLOG(1) << "delete key " << node->GetKey() << " with height " << (unsigned int)node->Height(); if (node->GetValue()->dim_cnt_down > 1) { node->GetValue()->dim_cnt_down--; } else { - DLOG(INFO) << "delele data block for key " << node->GetKey(); + VLOG(1) << "delete data block for key " << node->GetKey(); gc_info->record_byte_size += GetRecordSize(node->GetValue()->size); delete node->GetValue(); } diff --git a/src/storage/record.h b/src/storage/record.h index b3b06611f90..a5ab3f651ff 100644 --- a/src/storage/record.h +++ b/src/storage/record.h @@ -18,8 +18,10 @@ #define SRC_STORAGE_RECORD_H_ #include -#include "base/slice.h" + +#include "absl/strings/str_cat.h" #include "base/skiplist.h" +#include "base/slice.h" #include "storage/key_entry.h" namespace openmldb { @@ -67,9 +69,7 @@ struct StatisticsInfo { } } - uint64_t GetIdxCnt(uint32_t idx) const { - return idx >= idx_cnt_vec.size() ? 0 : idx_cnt_vec[idx]; - } + uint64_t GetIdxCnt(uint32_t idx) const { return idx >= idx_cnt_vec.size() ? 0 : idx_cnt_vec[idx]; } uint64_t GetTotalCnt() const { uint64_t total_cnt = 0; @@ -79,6 +79,15 @@ struct StatisticsInfo { return total_cnt; } + std::string DebugString() { + std::string str; + absl::StrAppend(&str, "idx_byte_size: ", idx_byte_size, " record_byte_size: ", record_byte_size, " idx_cnt: "); + for (uint32_t i = 0; i < idx_cnt_vec.size(); i++) { + absl::StrAppend(&str, i, ":", idx_cnt_vec[i], " "); + } + return str; + } + std::vector idx_cnt_vec; uint64_t idx_byte_size = 0; uint64_t record_byte_size = 0; diff --git a/src/storage/segment.cc b/src/storage/segment.cc index cbe3122b059..18a47c961c4 100644 --- a/src/storage/segment.cc +++ b/src/storage/segment.cc @@ -251,8 +251,8 @@ bool Segment::Put(const Slice& key, const std::map& ts_map, D uint8_t height = entry->entries.Insert(kv.second, row); entry->count_.fetch_add(1, std::memory_order_relaxed); byte_size += GetRecordTsIdxSize(height); - DLOG(INFO) << "idx_byte_size_ " << idx_byte_size_ << " after add " << byte_size; idx_byte_size_.fetch_add(byte_size, std::memory_order_relaxed); + DLOG(INFO) << "idx_byte_size_ " << idx_byte_size_ << " after add " << byte_size; idx_cnt_vec_[pos->second]->fetch_add(1, std::memory_order_relaxed); } return true; @@ -270,11 +270,13 @@ bool Segment::Delete(const std::optional& idx, const Slice& key) { entry_node = entries_->Remove(key); } if (entry_node != nullptr) { + DLOG(INFO) << "add key " << key.ToString() << " to node cache. version " << gc_version_; node_cache_.AddKeyEntryNode(gc_version_.load(std::memory_order_relaxed), entry_node); return true; } } else { base::Node* data_node = nullptr; + ::openmldb::base::Node* entry_node = nullptr; { std::lock_guard lock(mu_); void* entry_arr = nullptr; @@ -288,10 +290,24 @@ bool Segment::Delete(const std::optional& idx, const Slice& key) { uint64_t ts = it->GetKey(); data_node = key_entry->entries.Split(ts); } + bool is_empty = true; + for (uint32_t i = 0; i < ts_cnt_; i++) { + if (!reinterpret_cast(entry_arr)[i]->entries.IsEmpty()) { + is_empty = false; + break; + } + } + if (is_empty) { + entry_node = entries_->Remove(key); + } } if (data_node != nullptr) { node_cache_.AddValueNodeList(ts_idx, gc_version_.load(std::memory_order_relaxed), data_node); } + if (entry_node != nullptr) { + DLOG(INFO) << "add key " << key.ToString() << " to node cache. version " << gc_version_; + node_cache_.AddKeyEntryNode(gc_version_.load(std::memory_order_relaxed), entry_node); + } } return true; } @@ -354,14 +370,33 @@ bool Segment::Delete(const std::optional& idx, const Slice& key, uint6 } } base::Node* data_node = nullptr; + base::Node* entry_node = nullptr; { std::lock_guard lock(mu_); data_node = key_entry->entries.Split(ts); DLOG(INFO) << "after delete, entry " << key.ToString() << " split by " << ts; + bool is_empty = true; + if (ts_cnt_ == 1) { + is_empty = key_entry->entries.IsEmpty(); + } else { + for (uint32_t i = 0; i < ts_cnt_; i++) { + if (!reinterpret_cast(entry)[i]->entries.IsEmpty()) { + is_empty = false; + break; + } + } + } + if (is_empty) { + entry_node = entries_->Remove(key); + } } if (data_node != nullptr) { node_cache_.AddValueNodeList(ts_idx, gc_version_.load(std::memory_order_relaxed), data_node); } + if (entry_node != nullptr) { + DLOG(INFO) << "add key " << key.ToString() << " to node cache. version " << gc_version_; + node_cache_.AddKeyEntryNode(gc_version_.load(std::memory_order_relaxed), entry_node); + } return true; } @@ -373,11 +408,11 @@ void Segment::FreeList(uint32_t ts_idx, ::openmldb::base::NodeHeight())); DLOG(INFO) << "idx_byte_size_ " << idx_byte_size_ << " after sub " << GetRecordTsIdxSize(tmp->Height()); node = node->GetNextNoBarrier(0); - DEBUGLOG("delete key %lu with height %u", tmp->GetKey(), tmp->Height()); + VLOG(1) << "delete key " << tmp->GetKey() << " with height " << (unsigned int)tmp->Height(); if (tmp->GetValue()->dim_cnt_down > 1) { tmp->GetValue()->dim_cnt_down--; } else { - DEBUGLOG("delele data block for key %lu", tmp->GetKey()); + VLOG(1) << "delete data block for key " << tmp->GetKey(); statistics_info->record_byte_size += GetRecordSize(tmp->GetValue()->size); delete tmp->GetValue(); } @@ -391,8 +426,10 @@ void Segment::GcFreeList(StatisticsInfo* statistics_info) { return; } StatisticsInfo old = *statistics_info; + DLOG(INFO) << "cur " << old.DebugString(); uint64_t free_list_version = cur_version - FLAGS_gc_deleted_pk_version_delta; node_cache_.Free(free_list_version, statistics_info); + DLOG(INFO) << "after node cache free " << statistics_info->DebugString(); for (size_t idx = 0; idx < idx_cnt_vec_.size(); idx++) { idx_cnt_vec_[idx]->fetch_sub(statistics_info->GetIdxCnt(idx) - old.GetIdxCnt(idx), std::memory_order_relaxed); } @@ -614,6 +651,7 @@ void Segment::GcAllType(const std::map& ttl_st_map, StatisticsI } } if (entry_node != nullptr) { + DLOG(INFO) << "add key " << key.ToString() << " to node cache. version " << gc_version_; node_cache_.AddKeyEntryNode(gc_version_.load(std::memory_order_relaxed), entry_node); } } @@ -767,6 +805,7 @@ void Segment::Gc4TTLOrHead(const uint64_t time, const uint64_t keep_cnt, Statist } } if (entry_node != nullptr) { + DLOG(INFO) << "add key " << key.ToString() << " to node cache. version " << gc_version_; node_cache_.AddKeyEntryNode(gc_version_.load(std::memory_order_relaxed), entry_node); } uint64_t cur_idx_cnt = statistics_info->GetIdxCnt(0); From 320115404fe120cfc1920948c1e8bf9a4b001eef Mon Sep 17 00:00:00 2001 From: Huang Wei Date: Fri, 31 May 2024 16:35:22 +0800 Subject: [PATCH 05/17] fix comment --- src/storage/index_organized_table.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/storage/index_organized_table.cc b/src/storage/index_organized_table.cc index 6e05c7c9ee0..84354dc5d9e 100644 --- a/src/storage/index_organized_table.cc +++ b/src/storage/index_organized_table.cc @@ -400,7 +400,8 @@ absl::Status IndexOrganizedTable::Put(uint64_t time, const std::string& value, c } // clustered segment should be dedup and update will trigger all index update(impl in cli router) if (!iot_segment->Put(kv.second, iter->second, cblock, sblock, false)) { - return absl::AlreadyExistsError("data exists"); // let caller know exists + // even no put_if_absent, return false if exists or wrong + return absl::AlreadyExistsError("data exists or wrong"); } } // cblock and sblock both will sub record_byte_size_ when delete, so add them all From e65fa28317effff793771e0aadaa862cfa853197 Mon Sep 17 00:00:00 2001 From: HuangWei Date: Thu, 27 Jun 2024 03:23:04 +0000 Subject: [PATCH 06/17] ut --- src/tablet/tablet_impl_test.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/tablet/tablet_impl_test.cc b/src/tablet/tablet_impl_test.cc index 985c59e51d3..1fae477edc1 100644 --- a/src/tablet/tablet_impl_test.cc +++ b/src/tablet/tablet_impl_test.cc @@ -6249,7 +6249,8 @@ TEST_F(TabletImplTest, DeleteRange) { ::openmldb::common::ColumnDesc* column_desc2 = table_meta->add_column_desc(); column_desc2->set_name("mcc"); column_desc2->set_data_type(::openmldb::type::kString); - SchemaCodec::SetIndex(table_meta->add_column_key(), "card", "card", "", ::openmldb::type::kAbsoluteTime, 120, 0); + // insert time ttl and 120 min, so data won't be gc by ttl + SchemaCodec::SetIndex(table_meta->add_column_key(), "card_idx", "card", "", ::openmldb::type::kAbsoluteTime, 120, 0); ::openmldb::api::CreateTableResponse response; tablet.CreateTable(NULL, &request, &response, &closure); @@ -6293,16 +6294,19 @@ TEST_F(TabletImplTest, DeleteRange) { delete_request.set_pid(1); delete_request.set_end_ts(1); tablet.Delete(NULL, &delete_request, &gen_response, &closure); - ASSERT_EQ(0, gen_response.code()); + ASSERT_EQ(0, gen_response.code()) << gen_response.ShortDebugString(); ::openmldb::api::ExecuteGcRequest e_request; e_request.set_tid(id); e_request.set_pid(1); tablet.ExecuteGc(NULL, &e_request, &gen_response, &closure); + ASSERT_EQ(0, gen_response.code()) << gen_response.ShortDebugString(); sleep(2); tablet.ExecuteGc(NULL, &e_request, &gen_response, &closure); + ASSERT_EQ(0, gen_response.code()) << gen_response.ShortDebugString(); sleep(2); assert_status(0, 0, 1626); tablet.ExecuteGc(NULL, &e_request, &gen_response, &closure); + ASSERT_EQ(0, gen_response.code()) << gen_response.ShortDebugString(); sleep(2); assert_status(0, 0, 0); } From 3141b57275889f4e73a4522ba585ccb237d5089c Mon Sep 17 00:00:00 2001 From: HuangWei Date: Thu, 27 Jun 2024 04:03:47 +0000 Subject: [PATCH 07/17] ut test --- src/tablet/tablet_impl_test.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tablet/tablet_impl_test.cc b/src/tablet/tablet_impl_test.cc index 1fae477edc1..8b9497c7746 100644 --- a/src/tablet/tablet_impl_test.cc +++ b/src/tablet/tablet_impl_test.cc @@ -6298,13 +6298,13 @@ TEST_F(TabletImplTest, DeleteRange) { ::openmldb::api::ExecuteGcRequest e_request; e_request.set_tid(id); e_request.set_pid(1); + // async task, need to wait + // segment: entries -> node cache tablet.ExecuteGc(NULL, &e_request, &gen_response, &closure); ASSERT_EQ(0, gen_response.code()) << gen_response.ShortDebugString(); sleep(2); - tablet.ExecuteGc(NULL, &e_request, &gen_response, &closure); - ASSERT_EQ(0, gen_response.code()) << gen_response.ShortDebugString(); - sleep(2); - assert_status(0, 0, 1626); + assert_status(0, 0, 0); + // gc node cache tablet.ExecuteGc(NULL, &e_request, &gen_response, &closure); ASSERT_EQ(0, gen_response.code()) << gen_response.ShortDebugString(); sleep(2); From ce93fd99d5aedd7acc4ab767b2504d2b406ba979 Mon Sep 17 00:00:00 2001 From: HuangWei Date: Thu, 27 Jun 2024 04:19:56 +0000 Subject: [PATCH 08/17] fix ut --- src/tablet/tablet_impl_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tablet/tablet_impl_test.cc b/src/tablet/tablet_impl_test.cc index 8b9497c7746..3df6a8e3553 100644 --- a/src/tablet/tablet_impl_test.cc +++ b/src/tablet/tablet_impl_test.cc @@ -6303,7 +6303,7 @@ TEST_F(TabletImplTest, DeleteRange) { tablet.ExecuteGc(NULL, &e_request, &gen_response, &closure); ASSERT_EQ(0, gen_response.code()) << gen_response.ShortDebugString(); sleep(2); - assert_status(0, 0, 0); + assert_status(100, 3400, 5786); // before node cache gc, status will be the same // gc node cache tablet.ExecuteGc(NULL, &e_request, &gen_response, &closure); ASSERT_EQ(0, gen_response.code()) << gen_response.ShortDebugString(); From 774bc3ec1d800fba3e8a4ec83c830100876f3aab Mon Sep 17 00:00:00 2001 From: HuangWei Date: Thu, 27 Jun 2024 05:47:41 +0000 Subject: [PATCH 09/17] sleep more for truncate --- src/cmd/sql_cmd_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cmd/sql_cmd_test.cc b/src/cmd/sql_cmd_test.cc index c20dae67bb2..43f31138fcf 100644 --- a/src/cmd/sql_cmd_test.cc +++ b/src/cmd/sql_cmd_test.cc @@ -1284,7 +1284,7 @@ TEST_P(DBSDKTest, Truncate) { sr->ExecuteSQL(absl::StrCat("insert into ", table_name, " values ('", key, "', 11, ", ts, ");"), &status); } } - absl::SleepFor(absl::Seconds(5)); + absl::SleepFor(absl::Seconds(8)); // sleep more to avoid truncate failed on partition offset mismatch res = sr->ExecuteSQL(absl::StrCat("select * from ", table_name, ";"), &status); ASSERT_EQ(res->Size(), 100); From 9cb4cfe47afeba1dc561d809286486d1d9722b57 Mon Sep 17 00:00:00 2001 From: HuangWei Date: Thu, 27 Jun 2024 07:25:32 +0000 Subject: [PATCH 10/17] sleep 16 --- src/cmd/sql_cmd_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cmd/sql_cmd_test.cc b/src/cmd/sql_cmd_test.cc index 43f31138fcf..0b29ae449cd 100644 --- a/src/cmd/sql_cmd_test.cc +++ b/src/cmd/sql_cmd_test.cc @@ -1284,7 +1284,7 @@ TEST_P(DBSDKTest, Truncate) { sr->ExecuteSQL(absl::StrCat("insert into ", table_name, " values ('", key, "', 11, ", ts, ");"), &status); } } - absl::SleepFor(absl::Seconds(8)); // sleep more to avoid truncate failed on partition offset mismatch + absl::SleepFor(absl::Seconds(16)); // sleep more to avoid truncate failed on partition offset mismatch res = sr->ExecuteSQL(absl::StrCat("select * from ", table_name, ";"), &status); ASSERT_EQ(res->Size(), 100); From 04ab5af4d3aa934e39b95d5ee2a27e96e8020eaf Mon Sep 17 00:00:00 2001 From: HuangWei Date: Thu, 27 Jun 2024 09:05:26 +0000 Subject: [PATCH 11/17] tool pytest fix and swig fix --- src/sdk/sql_cluster_router.cc | 8 ++++---- src/storage/index_organized_table.cc | 6 +++--- src/storage/index_organized_table.h | 20 ++++++++++---------- steps/test_python.sh | 4 ++-- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/sdk/sql_cluster_router.cc b/src/sdk/sql_cluster_router.cc index cd438b90c3b..4bbf57ec13a 100644 --- a/src/sdk/sql_cluster_router.cc +++ b/src/sdk/sql_cluster_router.cc @@ -1452,13 +1452,13 @@ bool SQLClusterRouter::PutRow(uint32_t tid, const std::shared_ptr& // revertput or SQLDeleteRow is not easy to use here, so make a sql DLOG(INFO) << "primary key exists, delete old data then insert new data"; // just where primary key, not all columns(redundant condition) - auto hint = storage::IndexOrganizedTable::MakePkeysHint(row->GetTableInfo().column_desc(), + auto hint = storage::MakePkeysHint(row->GetTableInfo().column_desc(), row->GetTableInfo().column_key(0)); if (hint.empty()) { SET_STATUS_AND_WARN(status, StatusCode::kCmdError, "make pkeys hint failed"); return false; } - auto sql = storage::IndexOrganizedTable::MakeDeleteSQL(row->GetTableInfo().db(), row->GetTableInfo().name(), + auto sql = storage::MakeDeleteSQL(row->GetTableInfo().db(), row->GetTableInfo().name(), row->GetTableInfo().column_key(0), (int8_t*)exists_value.c_str(), ts, row_view, hint); if (sql.empty()) { @@ -1687,12 +1687,12 @@ bool SQLClusterRouter::ExecuteInsert(const std::string& db, const std::string& n DLOG(INFO) << "primary key exists, delete old data then insert new data"; // just where primary key, not all columns(redundant condition) auto hint = - storage::IndexOrganizedTable::MakePkeysHint(table_info->column_desc(), table_info->column_key(0)); + storage::MakePkeysHint(table_info->column_desc(), table_info->column_key(0)); if (hint.empty()) { SET_STATUS_AND_WARN(status, StatusCode::kCmdError, "make pkeys hint failed"); return false; } - auto sql = storage::IndexOrganizedTable::MakeDeleteSQL(table_info->db(), table_info->name(), + auto sql = storage::MakeDeleteSQL(table_info->db(), table_info->name(), table_info->column_key(0), (int8_t*)exists_value.c_str(), ts, row_view, hint); if (sql.empty()) { diff --git a/src/storage/index_organized_table.cc b/src/storage/index_organized_table.cc index 84354dc5d9e..8b02cb74243 100644 --- a/src/storage/index_organized_table.cc +++ b/src/storage/index_organized_table.cc @@ -453,7 +453,7 @@ absl::Status IndexOrganizedTable::CheckDataExists(uint64_t tsv, const Dimensions } // , error if empty -std::map> IndexOrganizedTable::MakePkeysHint( +std::map> MakePkeysHint( const codec::Schema& schema, const common::ColumnKey& cidx_ck) { if (cidx_ck.col_name().empty()) { LOG(WARNING) << "empty cidx column key"; @@ -486,7 +486,7 @@ std::map> IndexOrganizedTable:: } // error if empty -std::string IndexOrganizedTable::MakeDeleteSQL( +std::string MakeDeleteSQL( const std::string& db, const std::string& name, const common::ColumnKey& cidx_ck, const int8_t* values, uint64_t ts, const codec::RowView& row_view, const std::map>& col_idx) { auto sql_prefix = absl::StrCat("delete from ", db, ".", name, " where "); @@ -529,7 +529,7 @@ std::string IndexOrganizedTable::MakeDeleteSQL( } // error if empty -std::string IndexOrganizedTable::ExtractPkeys( +std::string ExtractPkeys( const common::ColumnKey& cidx_ck, const int8_t* values, const codec::RowView& row_view, const std::map>& col_idx) { // join with | diff --git a/src/storage/index_organized_table.h b/src/storage/index_organized_table.h index a037c8cf05a..7e0eb5e351d 100644 --- a/src/storage/index_organized_table.h +++ b/src/storage/index_organized_table.h @@ -54,15 +54,6 @@ class IndexOrganizedTable : public MemTable { void SchedGCByDelete(const std::shared_ptr& router); - static std::map> MakePkeysHint(const codec::Schema& schema, - const common::ColumnKey& cidx_ck); - static std::string MakeDeleteSQL(const std::string& db, const std::string& name, const common::ColumnKey& cidx_ck, - const int8_t* values, uint64_t ts, const codec::RowView& row_view, - const std::map>& col_idx); - static std::string ExtractPkeys(const common::ColumnKey& cidx_ck, const int8_t* values, - const codec::RowView& row_view, - const std::map>& col_idx); - private: absl::Status ClusteredIndexGCByDelete(const std::shared_ptr& router); @@ -72,6 +63,15 @@ class IndexOrganizedTable : public MemTable { std::mutex gc_lock_; }; -} // namespace openmldb::storage +static std::map> MakePkeysHint(const codec::Schema& schema, + const common::ColumnKey& cidx_ck); +static std::string MakeDeleteSQL(const std::string& db, const std::string& name, const common::ColumnKey& cidx_ck, + const int8_t* values, uint64_t ts, const codec::RowView& row_view, + const std::map>& col_idx); +static std::string ExtractPkeys(const common::ColumnKey& cidx_ck, const int8_t* values, + const codec::RowView& row_view, + const std::map>& col_idx); + +} // namespace openmldb::storage #endif diff --git a/steps/test_python.sh b/steps/test_python.sh index 8c366f77b0c..3e3588b0db7 100644 --- a/steps/test_python.sh +++ b/steps/test_python.sh @@ -42,8 +42,8 @@ python3 -m pip install "${whl_name_sdk}[test]" cd "${ROOT_DIR}"/python/openmldb_tool/dist/ whl_name_tool=$(ls openmldb*.whl) echo "whl_name_tool:${whl_name_tool}" -# pip 23.1.2 just needs to install test(rpc is required by test) -python3 -m pip install "${whl_name_tool}[rpc,test]" +# pip 23.1.2 just needs to install test +python3 -m pip install "${whl_name_tool}[test]" python3 -m pip install pytest-cov From 782e6e188a82d183cb24e7960009db4d0e65e3f0 Mon Sep 17 00:00:00 2001 From: HuangWei Date: Thu, 27 Jun 2024 09:17:09 +0000 Subject: [PATCH 12/17] fix --- src/storage/index_organized_table.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/storage/index_organized_table.h b/src/storage/index_organized_table.h index 7e0eb5e351d..c09a4b9da19 100644 --- a/src/storage/index_organized_table.h +++ b/src/storage/index_organized_table.h @@ -64,14 +64,14 @@ class IndexOrganizedTable : public MemTable { std::mutex gc_lock_; }; -static std::map> MakePkeysHint(const codec::Schema& schema, - const common::ColumnKey& cidx_ck); -static std::string MakeDeleteSQL(const std::string& db, const std::string& name, const common::ColumnKey& cidx_ck, - const int8_t* values, uint64_t ts, const codec::RowView& row_view, - const std::map>& col_idx); -static std::string ExtractPkeys(const common::ColumnKey& cidx_ck, const int8_t* values, - const codec::RowView& row_view, - const std::map>& col_idx); +// don't make func static cuz swig sdk +std::map> MakePkeysHint(const codec::Schema& schema, + const common::ColumnKey& cidx_ck); +std::string MakeDeleteSQL(const std::string& db, const std::string& name, const common::ColumnKey& cidx_ck, + const int8_t* values, uint64_t ts, const codec::RowView& row_view, + const std::map>& col_idx); +std::string ExtractPkeys(const common::ColumnKey& cidx_ck, const int8_t* values, const codec::RowView& row_view, + const std::map>& col_idx); } // namespace openmldb::storage #endif From 50bb729713ced35cc967c85514542288fad714a3 Mon Sep 17 00:00:00 2001 From: HuangWei Date: Thu, 27 Jun 2024 09:41:50 +0000 Subject: [PATCH 13/17] clean --- src/storage/index_organized_table.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/storage/index_organized_table.cc b/src/storage/index_organized_table.cc index 8b02cb74243..5756ee79551 100644 --- a/src/storage/index_organized_table.cc +++ b/src/storage/index_organized_table.cc @@ -20,7 +20,6 @@ #include "absl/strings/str_join.h" // dlog #include "absl/strings/str_split.h" -#include "index_organized_table.h" #include "sdk/sql_router.h" #include "storage/iot_segment.h" From 993ba48d6480485086251e0927719aaca7f9866a Mon Sep 17 00:00:00 2001 From: HuangWei Date: Thu, 27 Jun 2024 10:02:20 +0000 Subject: [PATCH 14/17] move to base --- src/base/index_util.h | 131 +++++++++++++++++++++++++++ src/sdk/sql_cluster_router.cc | 9 +- src/storage/index_organized_table.cc | 105 +-------------------- src/storage/index_organized_table.h | 9 -- 4 files changed, 141 insertions(+), 113 deletions(-) create mode 100644 src/base/index_util.h diff --git a/src/base/index_util.h b/src/base/index_util.h new file mode 100644 index 00000000000..f9fe3b24a84 --- /dev/null +++ b/src/base/index_util.h @@ -0,0 +1,131 @@ +/* + * Copyright 2021 4Paradigm + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SRC_BASE_INDEX_UTIL_H_ +#define SRC_BASE_INDEX_UTIL_H_ + +#include + +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "base/glog_wrapper.h" +#include "codec/codec.h" +#include "storage/schema.h" + +namespace openmldb { +namespace base { + +// don't make func static cuz swig sdk + +// , error if empty +std::map> MakePkeysHint(const codec::Schema& schema, + const common::ColumnKey& cidx_ck) { + if (cidx_ck.col_name().empty()) { + LOG(WARNING) << "empty cidx column key"; + return {}; + } + // pkey col idx in row + std::set pkey_set; + for (int i = 0; i < cidx_ck.col_name().size(); i++) { + pkey_set.insert(cidx_ck.col_name().Get(i)); + } + if (pkey_set.empty()) { + LOG(WARNING) << "empty pkey set"; + return {}; + } + if (pkey_set.size() != static_cast::size_type>(cidx_ck.col_name().size())) { + LOG(WARNING) << "pkey set size not equal to cidx pkeys size"; + return {}; + } + std::map> col_idx; + for (int i = 0; i < schema.size(); i++) { + if (pkey_set.find(schema.Get(i).name()) != pkey_set.end()) { + col_idx[schema.Get(i).name()] = {i, schema.Get(i).data_type()}; + } + } + if (col_idx.size() != pkey_set.size()) { + LOG(WARNING) << "col idx size not equal to cidx pkeys size"; + return {}; + } + return col_idx; +} + +// error if empty +std::string MakeDeleteSQL(const std::string& db, const std::string& name, const common::ColumnKey& cidx_ck, + const int8_t* values, uint64_t ts, const codec::RowView& row_view, + const std::map>& col_idx) { + auto sql_prefix = absl::StrCat("delete from ", db, ".", name, " where "); + std::string cond; + for (int i = 0; i < cidx_ck.col_name().size(); i++) { + // append primary keys, pkeys in dimension are encoded, so we should get them from raw value + // split can't work if string has `|` + auto& col_name = cidx_ck.col_name().Get(i); + auto col = col_idx.find(col_name); + if (col == col_idx.end()) { + LOG(WARNING) << "col " << col_name << " not found in col idx"; + return ""; + } + std::string val; + row_view.GetStrValue(values, col->second.first, &val); + if (!cond.empty()) { + absl::StrAppend(&cond, " and "); + } + // TODO(hw): string should add quotes how about timestamp? + // check existence before, so here we skip + absl::StrAppend(&cond, col_name); + if (auto t = col->second.second; t == type::kVarchar || t == type::kString) { + absl::StrAppend(&cond, "=\"", val, "\""); + } else { + absl::StrAppend(&cond, "=", val); + } + } + // ts must be integer, won't be string + if (!cidx_ck.ts_name().empty() && cidx_ck.ts_name() != storage::DEFAULT_TS_COL_NAME) { + if (!cond.empty()) { + absl::StrAppend(&cond, " and "); + } + absl::StrAppend(&cond, cidx_ck.ts_name(), "=", std::to_string(ts)); + } + auto sql = absl::StrCat(sql_prefix, cond, ";"); + // TODO(hw): if delete failed, we can't revert. And if sidx skeys+sts doesn't change, no need to delete and + // then insert + DLOG(INFO) << "delete sql " << sql; + return sql; +} + +// error if empty +std::string ExtractPkeys(const common::ColumnKey& cidx_ck, const int8_t* values, const codec::RowView& row_view, + const std::map>& col_idx) { + // join with | + std::vector pkeys; + for (int i = 0; i < cidx_ck.col_name().size(); i++) { + auto& col_name = cidx_ck.col_name().Get(i); + auto col = col_idx.find(col_name); + if (col == col_idx.end()) { + LOG(WARNING) << "col " << col_name << " not found in col idx"; + return ""; + } + std::string val; + row_view.GetStrValue(values, col->second.first, &val); + pkeys.push_back(val); + } + return absl::StrJoin(pkeys, "|"); +} + +} // namespace base +} // namespace openmldb + +#endif // SRC_BASE_INDEX_UTIL_H_ diff --git a/src/sdk/sql_cluster_router.cc b/src/sdk/sql_cluster_router.cc index 4bbf57ec13a..8ef74f8ac2d 100644 --- a/src/sdk/sql_cluster_router.cc +++ b/src/sdk/sql_cluster_router.cc @@ -37,6 +37,7 @@ #include "base/file_util.h" #include "base/glog_wrapper.h" #include "base/status_util.h" +#include "base/index_util.h" #include "boost/none.hpp" #include "boost/property_tree/ini_parser.hpp" #include "boost/property_tree/ptree.hpp" @@ -1452,13 +1453,13 @@ bool SQLClusterRouter::PutRow(uint32_t tid, const std::shared_ptr& // revertput or SQLDeleteRow is not easy to use here, so make a sql DLOG(INFO) << "primary key exists, delete old data then insert new data"; // just where primary key, not all columns(redundant condition) - auto hint = storage::MakePkeysHint(row->GetTableInfo().column_desc(), + auto hint = base::MakePkeysHint(row->GetTableInfo().column_desc(), row->GetTableInfo().column_key(0)); if (hint.empty()) { SET_STATUS_AND_WARN(status, StatusCode::kCmdError, "make pkeys hint failed"); return false; } - auto sql = storage::MakeDeleteSQL(row->GetTableInfo().db(), row->GetTableInfo().name(), + auto sql = base::MakeDeleteSQL(row->GetTableInfo().db(), row->GetTableInfo().name(), row->GetTableInfo().column_key(0), (int8_t*)exists_value.c_str(), ts, row_view, hint); if (sql.empty()) { @@ -1687,12 +1688,12 @@ bool SQLClusterRouter::ExecuteInsert(const std::string& db, const std::string& n DLOG(INFO) << "primary key exists, delete old data then insert new data"; // just where primary key, not all columns(redundant condition) auto hint = - storage::MakePkeysHint(table_info->column_desc(), table_info->column_key(0)); + base::MakePkeysHint(table_info->column_desc(), table_info->column_key(0)); if (hint.empty()) { SET_STATUS_AND_WARN(status, StatusCode::kCmdError, "make pkeys hint failed"); return false; } - auto sql = storage::MakeDeleteSQL(table_info->db(), table_info->name(), + auto sql = base::MakeDeleteSQL(table_info->db(), table_info->name(), table_info->column_key(0), (int8_t*)exists_value.c_str(), ts, row_view, hint); if (sql.empty()) { diff --git a/src/storage/index_organized_table.cc b/src/storage/index_organized_table.cc index 5756ee79551..8bb76373c95 100644 --- a/src/storage/index_organized_table.cc +++ b/src/storage/index_organized_table.cc @@ -22,6 +22,7 @@ #include "absl/strings/str_split.h" #include "sdk/sql_router.h" #include "storage/iot_segment.h" +#include "base/index_util.h" DECLARE_uint32(absolute_default_skiplist_height); @@ -346,12 +347,12 @@ absl::Status IndexOrganizedTable::Put(uint64_t time, const std::string& value, c if (cidx_inner_key_pair.first == -1) { DLOG(INFO) << "cidx not in dimensions, extract from value"; auto cidx = table_index_.GetIndex(0); - auto hint = MakePkeysHint(table_meta_->column_desc(), table_meta_->column_key(0)); + auto hint = base::MakePkeysHint(table_meta_->column_desc(), table_meta_->column_key(0)); if (hint.empty()) { return absl::InvalidArgumentError(absl::StrCat(id_, ".", pid_, ": cidx pkeys hint empty")); } cidx_inner_key_pair.second = - ExtractPkeys(table_meta_->column_key(0), (int8_t*)value.c_str(), *decoder, hint); + base::ExtractPkeys(table_meta_->column_key(0), (int8_t*)value.c_str(), *decoder, hint); if (cidx_inner_key_pair.second.empty()) { return absl::InvalidArgumentError(absl::StrCat(id_, ".", pid_, ": cidx pkeys+pts extract failed")); } @@ -451,102 +452,6 @@ absl::Status IndexOrganizedTable::CheckDataExists(uint64_t tsv, const Dimensions return iot_segment->CheckKeyExists(cidx_inner_key_pair.second, {{ts_col->GetId(), tsv}}); } -// , error if empty -std::map> MakePkeysHint( - const codec::Schema& schema, const common::ColumnKey& cidx_ck) { - if (cidx_ck.col_name().empty()) { - LOG(WARNING) << "empty cidx column key"; - return {}; - } - // pkey col idx in row - std::set pkey_set; - for (int i = 0; i < cidx_ck.col_name().size(); i++) { - pkey_set.insert(cidx_ck.col_name().Get(i)); - } - if (pkey_set.empty()) { - LOG(WARNING) << "empty pkey set"; - return {}; - } - if (pkey_set.size() != static_cast::size_type>(cidx_ck.col_name().size())) { - LOG(WARNING) << "pkey set size not equal to cidx pkeys size"; - return {}; - } - std::map> col_idx; - for (int i = 0; i < schema.size(); i++) { - if (pkey_set.find(schema.Get(i).name()) != pkey_set.end()) { - col_idx[schema.Get(i).name()] = {i, schema.Get(i).data_type()}; - } - } - if (col_idx.size() != pkey_set.size()) { - LOG(WARNING) << "col idx size not equal to cidx pkeys size"; - return {}; - } - return col_idx; -} - -// error if empty -std::string MakeDeleteSQL( - const std::string& db, const std::string& name, const common::ColumnKey& cidx_ck, const int8_t* values, uint64_t ts, - const codec::RowView& row_view, const std::map>& col_idx) { - auto sql_prefix = absl::StrCat("delete from ", db, ".", name, " where "); - std::string cond; - for (int i = 0; i < cidx_ck.col_name().size(); i++) { - // append primary keys, pkeys in dimension are encoded, so we should get them from raw value - // split can't work if string has `|` - auto& col_name = cidx_ck.col_name().Get(i); - auto col = col_idx.find(col_name); - if (col == col_idx.end()) { - LOG(WARNING) << "col " << col_name << " not found in col idx"; - return ""; - } - std::string val; - row_view.GetStrValue(values, col->second.first, &val); - if (!cond.empty()) { - absl::StrAppend(&cond, " and "); - } - // TODO(hw): string should add quotes how about timestamp? - // check existence before, so here we skip - absl::StrAppend(&cond, col_name); - if (auto t = col->second.second; t == type::kVarchar || t == type::kString) { - absl::StrAppend(&cond, "=\"", val, "\""); - } else { - absl::StrAppend(&cond, "=", val); - } - } - // ts must be integer, won't be string - if (!cidx_ck.ts_name().empty() && cidx_ck.ts_name() != storage::DEFAULT_TS_COL_NAME) { - if (!cond.empty()) { - absl::StrAppend(&cond, " and "); - } - absl::StrAppend(&cond, cidx_ck.ts_name(), "=", std::to_string(ts)); - } - auto sql = absl::StrCat(sql_prefix, cond, ";"); - // TODO(hw): if delete failed, we can't revert. And if sidx skeys+sts doesn't change, no need to delete and - // then insert - DLOG(INFO) << "delete sql " << sql; - return sql; -} - -// error if empty -std::string ExtractPkeys( - const common::ColumnKey& cidx_ck, const int8_t* values, const codec::RowView& row_view, - const std::map>& col_idx) { - // join with | - std::vector pkeys; - for (int i = 0; i < cidx_ck.col_name().size(); i++) { - auto& col_name = cidx_ck.col_name().Get(i); - auto col = col_idx.find(col_name); - if (col == col_idx.end()) { - LOG(WARNING) << "col " << col_name << " not found in col idx"; - return ""; - } - std::string val; - row_view.GetStrValue(values, col->second.first, &val); - pkeys.push_back(val); - } - return absl::StrJoin(pkeys, "|"); -} - // index gc should try to do ExecuteGc for each waiting segment, but if some segments are gc before, we should release // them so it will be a little complex // should run under lock @@ -591,7 +496,7 @@ absl::Status IndexOrganizedTable::ClusteredIndexGCByDelete(const std::shared_ptr auto meta = GetTableMeta(); auto cols = meta->column_desc(); // copy codec::RowView row_view(cols); - auto hint = MakePkeysHint(cols, meta->column_key(0)); + auto hint = base::MakePkeysHint(cols, meta->column_key(0)); if (hint.empty()) { return absl::InternalError("make pkeys hint failed"); } @@ -600,7 +505,7 @@ absl::Status IndexOrganizedTable::ClusteredIndexGCByDelete(const std::shared_ptr auto values = keys_ts.second; // get pkeys from values auto ts = keys_ts.first; auto sql = - MakeDeleteSQL(GetDB(), GetName(), meta->column_key(0), (int8_t*)values->data, ts, row_view, hint); + base::MakeDeleteSQL(GetDB(), GetName(), meta->column_key(0), (int8_t*)values->data, ts, row_view, hint); // TODO(hw): if delete failed, we can't revert. And if sidx skeys+sts doesn't change, no need to delete and // then insert if (sql.empty()) { diff --git a/src/storage/index_organized_table.h b/src/storage/index_organized_table.h index c09a4b9da19..014e1a56a0a 100644 --- a/src/storage/index_organized_table.h +++ b/src/storage/index_organized_table.h @@ -64,14 +64,5 @@ class IndexOrganizedTable : public MemTable { std::mutex gc_lock_; }; -// don't make func static cuz swig sdk -std::map> MakePkeysHint(const codec::Schema& schema, - const common::ColumnKey& cidx_ck); -std::string MakeDeleteSQL(const std::string& db, const std::string& name, const common::ColumnKey& cidx_ck, - const int8_t* values, uint64_t ts, const codec::RowView& row_view, - const std::map>& col_idx); -std::string ExtractPkeys(const common::ColumnKey& cidx_ck, const int8_t* values, const codec::RowView& row_view, - const std::map>& col_idx); - } // namespace openmldb::storage #endif From 71471806a6ade7fc42b3600e818da158afdcc44d Mon Sep 17 00:00:00 2001 From: HuangWei Date: Thu, 27 Jun 2024 10:15:46 +0000 Subject: [PATCH 15/17] fix --- src/base/index_util.cc | 121 +++++++++++++++++++++++++++++++++++++++++ src/base/index_util.h | 94 ++------------------------------ 2 files changed, 125 insertions(+), 90 deletions(-) create mode 100644 src/base/index_util.cc diff --git a/src/base/index_util.cc b/src/base/index_util.cc new file mode 100644 index 00000000000..679ce2deaa7 --- /dev/null +++ b/src/base/index_util.cc @@ -0,0 +1,121 @@ +/* + * Copyright 2021 4Paradigm + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "base/index_util.h" + +#include + +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "base/glog_wrapper.h" +#include "storage/schema.h" + +namespace openmldb::base { +// , error if empty +std::map> MakePkeysHint(const codec::Schema& schema, + const common::ColumnKey& cidx_ck) { + if (cidx_ck.col_name().empty()) { + LOG(WARNING) << "empty cidx column key"; + return {}; + } + // pkey col idx in row + std::set pkey_set; + for (int i = 0; i < cidx_ck.col_name().size(); i++) { + pkey_set.insert(cidx_ck.col_name().Get(i)); + } + if (pkey_set.empty()) { + LOG(WARNING) << "empty pkey set"; + return {}; + } + if (pkey_set.size() != static_cast::size_type>(cidx_ck.col_name().size())) { + LOG(WARNING) << "pkey set size not equal to cidx pkeys size"; + return {}; + } + std::map> col_idx; + for (int i = 0; i < schema.size(); i++) { + if (pkey_set.find(schema.Get(i).name()) != pkey_set.end()) { + col_idx[schema.Get(i).name()] = {i, schema.Get(i).data_type()}; + } + } + if (col_idx.size() != pkey_set.size()) { + LOG(WARNING) << "col idx size not equal to cidx pkeys size"; + return {}; + } + return col_idx; +} + +// error if empty +std::string MakeDeleteSQL(const std::string& db, const std::string& name, const common::ColumnKey& cidx_ck, + const int8_t* values, uint64_t ts, const codec::RowView& row_view, + const std::map>& col_idx) { + auto sql_prefix = absl::StrCat("delete from ", db, ".", name, " where "); + std::string cond; + for (int i = 0; i < cidx_ck.col_name().size(); i++) { + // append primary keys, pkeys in dimension are encoded, so we should get them from raw value + // split can't work if string has `|` + auto& col_name = cidx_ck.col_name().Get(i); + auto col = col_idx.find(col_name); + if (col == col_idx.end()) { + LOG(WARNING) << "col " << col_name << " not found in col idx"; + return ""; + } + std::string val; + row_view.GetStrValue(values, col->second.first, &val); + if (!cond.empty()) { + absl::StrAppend(&cond, " and "); + } + // TODO(hw): string should add quotes how about timestamp? + // check existence before, so here we skip + absl::StrAppend(&cond, col_name); + if (auto t = col->second.second; t == type::kVarchar || t == type::kString) { + absl::StrAppend(&cond, "=\"", val, "\""); + } else { + absl::StrAppend(&cond, "=", val); + } + } + // ts must be integer, won't be string + if (!cidx_ck.ts_name().empty() && cidx_ck.ts_name() != storage::DEFAULT_TS_COL_NAME) { + if (!cond.empty()) { + absl::StrAppend(&cond, " and "); + } + absl::StrAppend(&cond, cidx_ck.ts_name(), "=", std::to_string(ts)); + } + auto sql = absl::StrCat(sql_prefix, cond, ";"); + // TODO(hw): if delete failed, we can't revert. And if sidx skeys+sts doesn't change, no need to delete and + // then insert + DLOG(INFO) << "delete sql " << sql; + return sql; +} + +// error if empty +std::string ExtractPkeys(const common::ColumnKey& cidx_ck, const int8_t* values, const codec::RowView& row_view, + const std::map>& col_idx) { + // join with | + std::vector pkeys; + for (int i = 0; i < cidx_ck.col_name().size(); i++) { + auto& col_name = cidx_ck.col_name().Get(i); + auto col = col_idx.find(col_name); + if (col == col_idx.end()) { + LOG(WARNING) << "col " << col_name << " not found in col idx"; + return ""; + } + std::string val; + row_view.GetStrValue(values, col->second.first, &val); + pkeys.push_back(val); + } + return absl::StrJoin(pkeys, "|"); +} + +} // namespace openmldb::base diff --git a/src/base/index_util.h b/src/base/index_util.h index f9fe3b24a84..11392b37bf0 100644 --- a/src/base/index_util.h +++ b/src/base/index_util.h @@ -19,111 +19,25 @@ #include -#include "absl/strings/str_cat.h" -#include "absl/strings/str_join.h" -#include "base/glog_wrapper.h" #include "codec/codec.h" -#include "storage/schema.h" namespace openmldb { namespace base { -// don't make func static cuz swig sdk +// don't declare func in table header cuz swig sdk // , error if empty std::map> MakePkeysHint(const codec::Schema& schema, - const common::ColumnKey& cidx_ck) { - if (cidx_ck.col_name().empty()) { - LOG(WARNING) << "empty cidx column key"; - return {}; - } - // pkey col idx in row - std::set pkey_set; - for (int i = 0; i < cidx_ck.col_name().size(); i++) { - pkey_set.insert(cidx_ck.col_name().Get(i)); - } - if (pkey_set.empty()) { - LOG(WARNING) << "empty pkey set"; - return {}; - } - if (pkey_set.size() != static_cast::size_type>(cidx_ck.col_name().size())) { - LOG(WARNING) << "pkey set size not equal to cidx pkeys size"; - return {}; - } - std::map> col_idx; - for (int i = 0; i < schema.size(); i++) { - if (pkey_set.find(schema.Get(i).name()) != pkey_set.end()) { - col_idx[schema.Get(i).name()] = {i, schema.Get(i).data_type()}; - } - } - if (col_idx.size() != pkey_set.size()) { - LOG(WARNING) << "col idx size not equal to cidx pkeys size"; - return {}; - } - return col_idx; -} + const common::ColumnKey& cidx_ck); // error if empty std::string MakeDeleteSQL(const std::string& db, const std::string& name, const common::ColumnKey& cidx_ck, const int8_t* values, uint64_t ts, const codec::RowView& row_view, - const std::map>& col_idx) { - auto sql_prefix = absl::StrCat("delete from ", db, ".", name, " where "); - std::string cond; - for (int i = 0; i < cidx_ck.col_name().size(); i++) { - // append primary keys, pkeys in dimension are encoded, so we should get them from raw value - // split can't work if string has `|` - auto& col_name = cidx_ck.col_name().Get(i); - auto col = col_idx.find(col_name); - if (col == col_idx.end()) { - LOG(WARNING) << "col " << col_name << " not found in col idx"; - return ""; - } - std::string val; - row_view.GetStrValue(values, col->second.first, &val); - if (!cond.empty()) { - absl::StrAppend(&cond, " and "); - } - // TODO(hw): string should add quotes how about timestamp? - // check existence before, so here we skip - absl::StrAppend(&cond, col_name); - if (auto t = col->second.second; t == type::kVarchar || t == type::kString) { - absl::StrAppend(&cond, "=\"", val, "\""); - } else { - absl::StrAppend(&cond, "=", val); - } - } - // ts must be integer, won't be string - if (!cidx_ck.ts_name().empty() && cidx_ck.ts_name() != storage::DEFAULT_TS_COL_NAME) { - if (!cond.empty()) { - absl::StrAppend(&cond, " and "); - } - absl::StrAppend(&cond, cidx_ck.ts_name(), "=", std::to_string(ts)); - } - auto sql = absl::StrCat(sql_prefix, cond, ";"); - // TODO(hw): if delete failed, we can't revert. And if sidx skeys+sts doesn't change, no need to delete and - // then insert - DLOG(INFO) << "delete sql " << sql; - return sql; -} + const std::map>& col_idx); // error if empty std::string ExtractPkeys(const common::ColumnKey& cidx_ck, const int8_t* values, const codec::RowView& row_view, - const std::map>& col_idx) { - // join with | - std::vector pkeys; - for (int i = 0; i < cidx_ck.col_name().size(); i++) { - auto& col_name = cidx_ck.col_name().Get(i); - auto col = col_idx.find(col_name); - if (col == col_idx.end()) { - LOG(WARNING) << "col " << col_name << " not found in col idx"; - return ""; - } - std::string val; - row_view.GetStrValue(values, col->second.first, &val); - pkeys.push_back(val); - } - return absl::StrJoin(pkeys, "|"); -} + const std::map>& col_idx); } // namespace base } // namespace openmldb From 1d683dcb85d3bb587069e36dbc88365fbabbfa10 Mon Sep 17 00:00:00 2001 From: Huang Wei Date: Fri, 28 Jun 2024 11:58:43 +0800 Subject: [PATCH 16/17] fix coverage ut --- hybridse/src/node/sql_node.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/hybridse/src/node/sql_node.cc b/hybridse/src/node/sql_node.cc index 5055b7dabb2..258b487a4d3 100644 --- a/hybridse/src/node/sql_node.cc +++ b/hybridse/src/node/sql_node.cc @@ -1188,6 +1188,7 @@ static absl::flat_hash_map CreateSqlNodeTypeToNa {kCreateFunctionStmt, "kCreateFunctionStmt"}, {kCreateUserStmt, "kCreateUserStmt"}, {kAlterUserStmt, "kAlterUserStmt"}, + {kGrantStmt, "kGrantStmt"}, {kDynamicUdfFnDef, "kDynamicUdfFnDef"}, {kDynamicUdafFnDef, "kDynamicUdafFnDef"}, {kWithClauseEntry, "kWithClauseEntry"}, From 966b2d562d4fe74f93c202cee2acd562f2d92c57 Mon Sep 17 00:00:00 2001 From: Huang Wei Date: Fri, 28 Jun 2024 15:57:12 +0800 Subject: [PATCH 17/17] fix --- hybridse/src/node/sql_node.cc | 1 + src/storage/index_organized_table.cc | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/hybridse/src/node/sql_node.cc b/hybridse/src/node/sql_node.cc index 258b487a4d3..05dc87e34d6 100644 --- a/hybridse/src/node/sql_node.cc +++ b/hybridse/src/node/sql_node.cc @@ -1188,6 +1188,7 @@ static absl::flat_hash_map CreateSqlNodeTypeToNa {kCreateFunctionStmt, "kCreateFunctionStmt"}, {kCreateUserStmt, "kCreateUserStmt"}, {kAlterUserStmt, "kAlterUserStmt"}, + {kRevokeStmt, "kRevokeStmt"}, {kGrantStmt, "kGrantStmt"}, {kDynamicUdfFnDef, "kDynamicUdfFnDef"}, {kDynamicUdafFnDef, "kDynamicUdafFnDef"}, diff --git a/src/storage/index_organized_table.cc b/src/storage/index_organized_table.cc index 8bb76373c95..aeb3302b22b 100644 --- a/src/storage/index_organized_table.cc +++ b/src/storage/index_organized_table.cc @@ -287,7 +287,7 @@ absl::Status IndexOrganizedTable::Put(uint64_t time, const std::string& value, c std::optional clustered_tsv; std::map> ts_value_map; // we need two ref cnt - // 1. clustered and covering: put row ->DataBlock(i) + // 1. clustered and covering: put row -> DataBlock(i) // 2. secondary: put pkeys+pts -> DataBlock(j) uint32_t real_ref_cnt = 0, secondary_ref_cnt = 0; // cidx_inner_key_pair can get the clustered index