Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!(ffi): support field nullability in schema visitor #656

Merged
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 17 additions & 14 deletions ffi/examples/read-table/schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ typedef struct
{
char* name;
char* type;
bool is_nullable;
uintptr_t children;
} SchemaItem;

Expand All @@ -51,11 +52,12 @@ typedef struct

// lists are preallocated to have exactly enough space, so we just fill in the next open slot and
// increment our length
SchemaItem* add_to_list(SchemaItemList* list, char* name, char* type)
SchemaItem* add_to_list(SchemaItemList* list, char* name, char* type, bool is_nullable)
{
int idx = list->len;
list->list[idx].name = name;
list->list[idx].type = type;
list->list[idx].is_nullable = is_nullable;
list->len++;
return &list->list[idx];
}
Expand Down Expand Up @@ -106,49 +108,49 @@ void visit_struct(
void* data,
uintptr_t sibling_list_id,
struct KernelStringSlice name,
bool is_nullable,
uintptr_t child_list_id)
{
SchemaBuilder* builder = data;
char* name_ptr = allocate_string(name);
PRINT_CHILD_VISIT("struct", name_ptr, sibling_list_id, "Children", child_list_id);
SchemaItem* struct_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "struct");
SchemaItem* struct_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "struct", is_nullable);
struct_item->children = child_list_id;
}
void visit_array(
void* data,
uintptr_t sibling_list_id,
struct KernelStringSlice name,
bool contains_null,
bool is_nullable,
uintptr_t child_list_id)
{
SchemaBuilder* builder = data;
char* name_ptr = malloc(sizeof(char) * (name.len + 24));
snprintf(name_ptr, name.len + 1, "%s", name.ptr);
snprintf(name_ptr + name.len, 24, " (contains null: %s)", contains_null ? "true" : "false");
zachschuermann marked this conversation as resolved.
Show resolved Hide resolved
PRINT_CHILD_VISIT("array", name_ptr, sibling_list_id, "Types", child_list_id);
SchemaItem* array_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "array");
SchemaItem* array_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "array", is_nullable);
array_item->children = child_list_id;
}
void visit_map(
void* data,
uintptr_t sibling_list_id,
struct KernelStringSlice name,
bool value_contains_null,
bool is_nullable,
uintptr_t child_list_id)
{
SchemaBuilder* builder = data;
char* name_ptr = malloc(sizeof(char) * (name.len + 24));
snprintf(name_ptr, name.len + 1, "%s", name.ptr);
snprintf(name_ptr + name.len, 24, " (contains null: %s)", value_contains_null ? "true" : "false");
PRINT_CHILD_VISIT("map", name_ptr, sibling_list_id, "Types", child_list_id);
SchemaItem* map_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "map");
SchemaItem* map_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "map", is_nullable);
map_item->children = child_list_id;
}

void visit_decimal(
void* data,
uintptr_t sibling_list_id,
struct KernelStringSlice name,
bool is_nullable,
uint8_t precision,
uint8_t scale)
{
Expand All @@ -157,25 +159,26 @@ void visit_decimal(
char* type = malloc(19 * sizeof(char));
snprintf(type, 19, "decimal(%u)(%d)", precision, scale);
PRINT_NO_CHILD_VISIT(type, name_ptr, sibling_list_id);
add_to_list(&builder->lists[sibling_list_id], name_ptr, type);
add_to_list(&builder->lists[sibling_list_id], name_ptr, type, is_nullable);
}

void visit_simple_type(
void* data,
uintptr_t sibling_list_id,
struct KernelStringSlice name,
bool is_nullable,
char* type)
{
SchemaBuilder* builder = data;
char* name_ptr = allocate_string(name);
PRINT_NO_CHILD_VISIT(type, name_ptr, sibling_list_id);
add_to_list(&builder->lists[sibling_list_id], name_ptr, type);
add_to_list(&builder->lists[sibling_list_id], name_ptr, type, is_nullable);
}

#define DEFINE_VISIT_SIMPLE_TYPE(typename) \
void visit_##typename(void* data, uintptr_t sibling_list_id, struct KernelStringSlice name) \
{ \
visit_simple_type(data, sibling_list_id, name, #typename); \
#define DEFINE_VISIT_SIMPLE_TYPE(typename) \
void visit_##typename(void* data, uintptr_t sibling_list_id, struct KernelStringSlice name, bool is_nullable)\
{ \
visit_simple_type(data, sibling_list_id, name, is_nullable, #typename); \
}

DEFINE_VISIT_SIMPLE_TYPE(string)
Expand Down
153 changes: 115 additions & 38 deletions ffi/src/schema.rs
scovich marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use delta_kernel::schema::{ArrayType, DataType, MapType, PrimitiveType, StructTy
/// that element's (already-visited) children.
/// 4. The [`visit_schema`] method returns the id of the list of top-level columns
// WARNING: the visitor MUST NOT retain internal references to the string slices passed to visitor methods
// TODO: struct nullability and field metadata
// TODO: struct field metadata
#[repr(C)]
pub struct EngineSchemaVisitor {
/// opaque state pointer
Expand All @@ -43,6 +43,7 @@ pub struct EngineSchemaVisitor {
data: *mut c_void,
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
child_list_id: usize,
),

Expand All @@ -52,7 +53,7 @@ pub struct EngineSchemaVisitor {
data: *mut c_void,
sibling_list_id: usize,
name: KernelStringSlice,
contains_null: bool, // if this array can contain null values
is_nullable: bool,
child_list_id: usize,
),

Expand All @@ -63,7 +64,7 @@ pub struct EngineSchemaVisitor {
data: *mut c_void,
sibling_list_id: usize,
name: KernelStringSlice,
value_contains_null: bool, // if this map can contain null values
is_nullable: bool,
child_list_id: usize,
),

Expand All @@ -72,57 +73,106 @@ pub struct EngineSchemaVisitor {
data: *mut c_void,
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
precision: u8,
scale: u8,
),

/// Visit a `string` belonging to the list identified by `sibling_list_id`.
pub visit_string:
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
pub visit_string: extern "C" fn(
data: *mut c_void,
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
),

/// Visit a `long` belonging to the list identified by `sibling_list_id`.
pub visit_long:
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
pub visit_long: extern "C" fn(
data: *mut c_void,
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
),

/// Visit an `integer` belonging to the list identified by `sibling_list_id`.
pub visit_integer:
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
pub visit_integer: extern "C" fn(
data: *mut c_void,
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
),

/// Visit a `short` belonging to the list identified by `sibling_list_id`.
pub visit_short:
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
pub visit_short: extern "C" fn(
data: *mut c_void,
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
),

/// Visit a `byte` belonging to the list identified by `sibling_list_id`.
pub visit_byte:
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
pub visit_byte: extern "C" fn(
data: *mut c_void,
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
),

/// Visit a `float` belonging to the list identified by `sibling_list_id`.
pub visit_float:
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
pub visit_float: extern "C" fn(
data: *mut c_void,
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
),

/// Visit a `double` belonging to the list identified by `sibling_list_id`.
pub visit_double:
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
pub visit_double: extern "C" fn(
data: *mut c_void,
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
),

/// Visit a `boolean` belonging to the list identified by `sibling_list_id`.
pub visit_boolean:
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
pub visit_boolean: extern "C" fn(
data: *mut c_void,
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
),

/// Visit `binary` belonging to the list identified by `sibling_list_id`.
pub visit_binary:
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
pub visit_binary: extern "C" fn(
data: *mut c_void,
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
),

/// Visit a `date` belonging to the list identified by `sibling_list_id`.
pub visit_date:
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
pub visit_date: extern "C" fn(
data: *mut c_void,
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
),

/// Visit a `timestamp` belonging to the list identified by `sibling_list_id`.
pub visit_timestamp:
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
pub visit_timestamp: extern "C" fn(
data: *mut c_void,
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
),

/// Visit a `timestamp` with no timezone belonging to the list identified by `sibling_list_id`.
pub visit_timestamp_ntz:
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
pub visit_timestamp_ntz: extern "C" fn(
data: *mut c_void,
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
),
}

/// Visit the schema of the passed `SnapshotHandle`, using the provided `visitor`. See the
Expand All @@ -143,28 +193,55 @@ pub unsafe extern "C" fn visit_schema(
fn visit_struct_fields(visitor: &EngineSchemaVisitor, s: &StructType) -> usize {
let child_list_id = (visitor.make_field_list)(visitor.data, s.fields.len());
for field in s.fields() {
visit_schema_item(field.data_type(), field.name(), visitor, child_list_id);
visit_schema_item(
field.name(),
field.data_type(),
field.is_nullable(),
visitor,
child_list_id,
);
}
child_list_id
}

fn visit_array_item(visitor: &EngineSchemaVisitor, at: &ArrayType) -> usize {
fn visit_array_item(
visitor: &EngineSchemaVisitor,
at: &ArrayType,
contains_null: bool,
) -> usize {
let child_list_id = (visitor.make_field_list)(visitor.data, 1);
visit_schema_item(&at.element_type, "array_element", visitor, child_list_id);
visit_schema_item(
"array_element",
&at.element_type,
contains_null,
visitor,
child_list_id,
);
child_list_id
}

fn visit_map_types(visitor: &EngineSchemaVisitor, mt: &MapType) -> usize {
fn visit_map_types(
visitor: &EngineSchemaVisitor,
mt: &MapType,
value_contains_null: bool,
) -> usize {
let child_list_id = (visitor.make_field_list)(visitor.data, 2);
visit_schema_item(&mt.key_type, "map_key", visitor, child_list_id);
visit_schema_item(&mt.value_type, "map_value", visitor, child_list_id);
visit_schema_item("map_key", &mt.key_type, false, visitor, child_list_id);
visit_schema_item(
"map_value",
&mt.value_type,
value_contains_null,
visitor,
child_list_id,
);
child_list_id
}

// Visit a struct field (recursively) and add the result to the list of siblings.
fn visit_schema_item(
data_type: &DataType,
name: &str,
data_type: &DataType,
is_nullable: bool,
visitor: &EngineSchemaVisitor,
sibling_list_id: usize,
) {
Expand All @@ -173,7 +250,8 @@ pub unsafe extern "C" fn visit_schema(
(visitor.$visitor_fn)(
visitor.data,
sibling_list_id,
kernel_string_slice!(name)
kernel_string_slice!(name),
is_nullable
$(, $extra_args) *
)
};
Expand All @@ -183,12 +261,11 @@ pub unsafe extern "C" fn visit_schema(
DataType::Map(mt) => {
call!(
visit_map,
mt.value_contains_null,
visit_map_types(visitor, mt)
visit_map_types(visitor, mt, mt.value_contains_null)
)
}
DataType::Array(at) => {
call!(visit_array, at.contains_null, visit_array_item(visitor, at))
call!(visit_array, visit_array_item(visitor, at, at.contains_null))
}
DataType::Primitive(PrimitiveType::Decimal(precision, scale)) => {
call!(visit_decimal, *precision, *scale)
Expand Down
Loading