Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(codec): encode/decode map value to codec row #3713

Merged
merged 3 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 34 additions & 3 deletions cases/query/udf_query.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -572,11 +572,12 @@ cases:
map('1', 2, '3', 4, '5', 6, '7', 8, '9', 10, '11', 12)['10'] as e8,
# first match on duplicate keys
map('1', 2, '1', 4, '1', 6, '7', 8, '9', 10, '11', 12)['1'] as e9,
map("c", 99, "d", NULL)["d"] as e10,
# map("c", 99, "d", NULL)["d"] as e10,
expect:
columns: ["e1 string", "e2 int", "e3 string", "e4 int", "e5 string", "e6 timestamp", "e7 int", "e8 int", "e9 int", "e10 int"]
# FIXME
columns: ["e1 string", "e2 int", "e3 string", "e4 int", "e5 string", "e6 timestamp", "e7 int", "e8 int", "e9 int"]
data: |
2, 100, NULL, 101, f, 2000, 10, NULL, 2, NULL
2, 100, NULL, 101, f, 2000, 10, NULL, 2
- id: 14
mode: request-unsupport
sql: |
Expand All @@ -588,3 +589,33 @@ cases:
columns: ["e1 bool", "e2 bool", "e3 bool"]
data: |
true, false, true

- id: 15
mode: request-unsupport
sql: |
select map(1, 2, 3, 4) as c1

- id: 16
mode: request-unsupport
# this covers basic codec for map data type
sql: |
select
c1[3] as o1, c2[1] as o2, c3['6'] as o3, c4[timestamp(8000)] as o4,
c5[int64(12)] as o5
from (select
map(1, 2, 3, 4) as c1,
map(1, '2', 3, '4') as c2,
map('5', timestamp(8000), '6', timestamp(9000)) as c3,
map(timestamp(8000), date("2012-12-12"), timestamp(9000), date("2014-11-11")) as c4,
map(int64(10), int16(11), int64(12), int16(13)) as c5
)
expect:
columns: ["o1 int", "o2 string", "o3 timestamp", "o4 date", "o5 int16"]
data: |
4, 2, 9000, 2012-12-12, 13

- id: 17
mode: request-unsupport
sql: |
select c1 + 8 from (select 9 as c1)

2 changes: 1 addition & 1 deletion hybridse/examples/toydb/src/tablet/tablet_catalog.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ bool TabletTableHandler::Init() {
// init types var
for (int32_t i = 0; i < schema_.size(); i++) {
const type::ColumnDef& column = schema_.Get(i);
codec::ColInfo col_info(column.name(), column.type(), i, 0);
codec::ColInfo col_info(column.name(), column.schema(), i, 0);
types_.insert(std::make_pair(column.name(), col_info));
}

Expand Down
31 changes: 31 additions & 0 deletions hybridse/include/base/fe_status.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,37 @@ static inline std::initializer_list<int> __output_literal_args(STREAM& stream,

#define MAX_STATUS_TRACE_SIZE 4096

// Evaluate and check the expression returns a absl::Status.
// End the current function by return status, if status is not OK
#define CHECK_ABSL_STATUS(expr) \
while (true) { \
auto _s = (expr); \
if (!_s.ok()) { \
return _s; \
} \
break; \
}

// Check the absl::StatusOr<T> object, end the current function
// by return 'object.status()' if it is not OK
#define CHECK_ABSL_STATUSOR(statusor) \
while (true) { \
if (!statusor.ok()) { \
return statusor.status(); \
} \
break; \
}

// Evaluate the expression returns Status, converted and return failed absl status if status not ok
#define CHECK_STATUS_TO_ABSL(expr) \
while (true) { \
auto _status = (expr); \
if (!_status.isOK()) { \
return absl::InternalError(_status.GetMsg()); \
} \
break; \
}

#define CHECK_STATUS(call, ...) \
while (true) { \
auto _status = (call); \
Expand Down
54 changes: 37 additions & 17 deletions hybridse/include/codec/fe_row_codec.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,10 @@
#include <map>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#include "absl/status/statusor.h"
#include "base/raw_buffer.h"
#include "butil/iobuf.h"
#include "gflags/gflags.h"
#include "proto/fe_type.pb.h"

namespace hybridse {
Expand All @@ -42,9 +41,18 @@
const std::string NONETOKEN = "!N@U#L$L%"; // NOLINT
const std::string EMPTY_STRING = "!@#$%"; // NOLINT

// TODO(chendihao): Change to inline function if do not depend on gflags
const std::unordered_map<::hybridse::type::Type, uint8_t>& GetTypeSizeMap();

// return true if the column considered base type in row codec.
// date & timestamp consider base type since they have single field in corresponding llvm struct,
// while string, map and array consider complex type.
//
// for base types, the column is written into row ptr by just writing the value of primitive type,
// for comple type, written is made by a string (or string-like) manner: str size + str data.
// map, array, or any other complex types, takes a extra encoding from their struct value into str data.
bool IsCodecBaseType(const type::ColumnSchema& sc);
bool IsCodecStrLikeType(const type::ColumnSchema& sc);

inline uint8_t GetAddrLength(uint32_t size) {
if (size <= UINT8_MAX) {
return 1;
Expand Down Expand Up @@ -180,26 +188,38 @@
};

struct ColInfo {
::hybridse::type::Type type;
// type is still used in same lagecy udf context,
// cautious use for non-base types
::hybridse::type::Type type() const {
if (!schema.has_base_type()) {
return type::kNull;

Check warning on line 195 in hybridse/include/codec/fe_row_codec.h

View check run for this annotation

Codecov / codecov/patch

hybridse/include/codec/fe_row_codec.h#L195

Added line #L195 was not covered by tests
}
return schema.base_type();
}

uint32_t idx;
uint32_t offset;
std::string name;
type::ColumnSchema schema;

ColInfo() {}
ColInfo(const std::string& name, ::hybridse::type::Type type, uint32_t idx,
uint32_t offset)
: type(type), idx(idx), offset(offset), name(name) {}
ColInfo(const std::string& name, ::hybridse::type::Type type, uint32_t idx, uint32_t offset)
: idx(idx), offset(offset), name(name) {
schema.set_base_type(type);
}

ColInfo(const std::string& name, const type::ColumnSchema& sc, uint32_t idx, uint32_t offset)
: idx(idx), offset(offset), name(name), schema(sc) {}
};

struct StringColInfo : public ColInfo {
uint32_t str_next_offset;
uint32_t str_start_offset;

StringColInfo() {}
StringColInfo(const std::string& name, ::hybridse::type::Type type,
StringColInfo(const std::string& name, ::hybridse::type::ColumnSchema sc,
uint32_t idx, uint32_t offset, uint32_t str_next_offset,
uint32_t str_start_offset)
: ColInfo(name, type, idx, offset),
: ColInfo(name, sc, idx, offset),
str_next_offset(str_next_offset),
str_start_offset(str_start_offset) {}
};
Expand All @@ -209,7 +229,7 @@
explicit SliceFormat(const hybridse::codec::Schema* schema);
virtual ~SliceFormat() {}

bool GetStringColumnInfo(size_t idx, StringColInfo* res) const;
absl::StatusOr<StringColInfo> GetStringColumnInfo(size_t idx) const;

const ColInfo* GetColumnInfo(size_t idx) const;

Expand All @@ -224,7 +244,7 @@
class RowFormat {
public:
virtual ~RowFormat() {}
virtual bool GetStringColumnInfo(size_t schema_idx, size_t idx, StringColInfo* res) const = 0;
virtual absl::StatusOr<StringColInfo> GetStringColumnInfo(size_t schema_idx, size_t idx) const = 0;
virtual const ColInfo* GetColumnInfo(size_t schema_idx, size_t idx) const = 0;
virtual size_t GetSliceId(size_t schema_idx) const = 0;
};
Expand All @@ -245,8 +265,8 @@
slice_formats_.clear();
}

bool GetStringColumnInfo(size_t schema_idx, size_t idx, StringColInfo* res) const override {
return slice_formats_[schema_idx].GetStringColumnInfo(idx, res);
absl::StatusOr<StringColInfo> GetStringColumnInfo(size_t schema_idx, size_t idx) const override {
return slice_formats_[schema_idx].GetStringColumnInfo(idx);
}

const ColInfo* GetColumnInfo(size_t schema_idx, size_t idx) const override {
Expand Down Expand Up @@ -287,8 +307,8 @@
}
}

bool GetStringColumnInfo(size_t schema_idx, size_t idx, StringColInfo* res) const override {
return slice_format_->GetStringColumnInfo(offsets_[schema_idx] + idx, res);
absl::StatusOr<StringColInfo> GetStringColumnInfo(size_t schema_idx, size_t idx) const override {
return slice_format_->GetStringColumnInfo(offsets_[schema_idx] + idx);

Check warning on line 311 in hybridse/include/codec/fe_row_codec.h

View check run for this annotation

Codecov / codecov/patch

hybridse/include/codec/fe_row_codec.h#L310-L311

Added lines #L310 - L311 were not covered by tests
}

const ColInfo* GetColumnInfo(size_t schema_idx, size_t idx) const override {
Expand Down
3 changes: 3 additions & 0 deletions hybridse/include/codec/type_codec.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ int32_t AppendString(int8_t* buf_ptr, uint32_t buf_size, uint32_t col_idx,
uint32_t str_start_offset, uint32_t str_field_offset,
uint32_t str_addr_space, uint32_t str_body_offset);

// write `str_offset` in address `str_offset_ptr`, actual written bytes determined by `str_addr_space`
void EncodeStrOffset(int8_t* str_offset_ptr, int32_t str_offset, int32_t str_addr_space);

inline int8_t GetAddrSpace(uint32_t size) {
if (size <= UINT8_MAX) {
return 1;
Expand Down
8 changes: 5 additions & 3 deletions hybridse/src/benchmark/udf_bm_case.cc
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,10 @@ void SumArrayListCol(benchmark::State* state, MODE mode, int64_t data_size,
schemas_context.GetRowFormat(schema_idx)->GetColumnInfo(col_idx);

codegen::MemoryWindowDecodeIRBuilder builder(&schemas_context, nullptr);
node::TypeNode type;
codegen::SchemaType2DataType(info->type, &type);
node::NodeManager nm;
auto rs = codegen::ColumnSchema2Type(info->schema, &nm);
ASSERT_TRUE(rs.ok());
auto* type = rs.value();

uint32_t col_size;
ASSERT_TRUE(codegen::GetLlvmColumnSize(&type, &col_size));
Expand All @@ -193,7 +195,7 @@ void SumArrayListCol(benchmark::State* state, MODE mode, int64_t data_size,

ASSERT_EQ(0, ::hybridse::codec::v1::GetCol(
reinterpret_cast<int8_t*>(&list_table_ref), 0, info->idx,
info->offset, info->type, buf));
info->offset, info->type(), buf));

{
switch (mode) {
Expand Down
1 change: 1 addition & 0 deletions hybridse/src/case/sql_case.cc
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ bool SqlCase::ExtractSchema(const std::vector<std::string>& columns,
}
column->set_type(type);
column->set_is_not_null(false);
column->mutable_schema()->set_base_type(column->type());
}
} catch (const std::exception& ex) {
LOG(WARNING) << "Fail to ExtractSchema: " << ex.what();
Expand Down
Loading
Loading