Skip to content

Commit

Permalink
Merge remote-tracking branch 'oss/main' into eval-sql-where
Browse files Browse the repository at this point in the history
  • Loading branch information
scovich committed Jan 16, 2025
2 parents 3b8a451 + 76c65c8 commit 29b9713
Show file tree
Hide file tree
Showing 18 changed files with 258 additions and 307 deletions.
7 changes: 3 additions & 4 deletions ffi/src/test_ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,17 @@ pub unsafe extern "C" fn get_testing_kernel_expression() -> Handle<SharedExpress
let array_data = ArrayData::new(array_type.clone(), vec![Scalar::Short(5), Scalar::Short(0)]);

let nested_fields = vec![
StructField::new("a", DataType::INTEGER, false),
StructField::new("b", array_type, false),
StructField::not_null("a", DataType::INTEGER),
StructField::not_null("b", array_type),
];
let nested_values = vec![Scalar::Integer(500), Scalar::Array(array_data.clone())];
let nested_struct = StructData::try_new(nested_fields.clone(), nested_values).unwrap();
let nested_struct_type = StructType::new(nested_fields);

let top_level_struct = StructData::try_new(
vec![StructField::new(
vec![StructField::nullable(
"top",
DataType::Struct(Box::new(nested_struct_type)),
true,
)],
vec![Scalar::Struct(nested_struct)],
)
Expand Down
4 changes: 2 additions & 2 deletions integration-tests/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ fn create_arrow_schema() -> arrow::datatypes::Schema {

fn create_kernel_schema() -> delta_kernel::schema::Schema {
use delta_kernel::schema::{DataType, Schema, StructField};
let field_a = StructField::new("a", DataType::LONG, false);
let field_b = StructField::new("b", DataType::BOOLEAN, false);
let field_a = StructField::not_null("a", DataType::LONG);
let field_b = StructField::not_null("b", DataType::BOOLEAN);
Schema::new(vec![field_a, field_b])
}

Expand Down
129 changes: 54 additions & 75 deletions kernel/src/actions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -524,38 +524,30 @@ mod tests {
.project(&[METADATA_NAME])
.expect("Couldn't get metaData field");

let expected = Arc::new(StructType::new([StructField::new(
let expected = Arc::new(StructType::new([StructField::nullable(
"metaData",
StructType::new([
StructField::new("id", DataType::STRING, false),
StructField::new("name", DataType::STRING, true),
StructField::new("description", DataType::STRING, true),
StructField::new(
StructField::not_null("id", DataType::STRING),
StructField::nullable("name", DataType::STRING),
StructField::nullable("description", DataType::STRING),
StructField::not_null(
"format",
StructType::new([
StructField::new("provider", DataType::STRING, false),
StructField::new(
StructField::not_null("provider", DataType::STRING),
StructField::not_null(
"options",
MapType::new(DataType::STRING, DataType::STRING, false),
false,
),
]),
false,
),
StructField::new("schemaString", DataType::STRING, false),
StructField::new(
"partitionColumns",
ArrayType::new(DataType::STRING, false),
false,
),
StructField::new("createdTime", DataType::LONG, true),
StructField::new(
StructField::not_null("schemaString", DataType::STRING),
StructField::not_null("partitionColumns", ArrayType::new(DataType::STRING, false)),
StructField::nullable("createdTime", DataType::LONG),
StructField::not_null(
"configuration",
MapType::new(DataType::STRING, DataType::STRING, false),
false,
),
]),
true,
)]));
assert_eq!(schema, expected);
}
Expand All @@ -566,61 +558,55 @@ mod tests {
.project(&[ADD_NAME])
.expect("Couldn't get add field");

let expected = Arc::new(StructType::new([StructField::new(
let expected = Arc::new(StructType::new([StructField::nullable(
"add",
StructType::new([
StructField::new("path", DataType::STRING, false),
StructField::new(
StructField::not_null("path", DataType::STRING),
StructField::not_null(
"partitionValues",
MapType::new(DataType::STRING, DataType::STRING, true),
false,
),
StructField::new("size", DataType::LONG, false),
StructField::new("modificationTime", DataType::LONG, false),
StructField::new("dataChange", DataType::BOOLEAN, false),
StructField::new("stats", DataType::STRING, true),
StructField::new(
StructField::not_null("size", DataType::LONG),
StructField::not_null("modificationTime", DataType::LONG),
StructField::not_null("dataChange", DataType::BOOLEAN),
StructField::nullable("stats", DataType::STRING),
StructField::nullable(
"tags",
MapType::new(DataType::STRING, DataType::STRING, false),
true,
),
deletion_vector_field(),
StructField::new("baseRowId", DataType::LONG, true),
StructField::new("defaultRowCommitVersion", DataType::LONG, true),
StructField::new("clusteringProvider", DataType::STRING, true),
StructField::nullable("baseRowId", DataType::LONG),
StructField::nullable("defaultRowCommitVersion", DataType::LONG),
StructField::nullable("clusteringProvider", DataType::STRING),
]),
true,
)]));
assert_eq!(schema, expected);
}

fn tags_field() -> StructField {
StructField::new(
StructField::nullable(
"tags",
MapType::new(DataType::STRING, DataType::STRING, false),
true,
)
}

fn partition_values_field() -> StructField {
StructField::new(
StructField::nullable(
"partitionValues",
MapType::new(DataType::STRING, DataType::STRING, false),
true,
)
}

fn deletion_vector_field() -> StructField {
StructField::new(
StructField::nullable(
"deletionVector",
DataType::struct_type([
StructField::new("storageType", DataType::STRING, false),
StructField::new("pathOrInlineDv", DataType::STRING, false),
StructField::new("offset", DataType::INTEGER, true),
StructField::new("sizeInBytes", DataType::INTEGER, false),
StructField::new("cardinality", DataType::LONG, false),
StructField::not_null("storageType", DataType::STRING),
StructField::not_null("pathOrInlineDv", DataType::STRING),
StructField::nullable("offset", DataType::INTEGER),
StructField::not_null("sizeInBytes", DataType::INTEGER),
StructField::not_null("cardinality", DataType::LONG),
]),
true,
)
}

Expand All @@ -629,21 +615,20 @@ mod tests {
let schema = get_log_schema()
.project(&[REMOVE_NAME])
.expect("Couldn't get remove field");
let expected = Arc::new(StructType::new([StructField::new(
let expected = Arc::new(StructType::new([StructField::nullable(
"remove",
StructType::new([
StructField::new("path", DataType::STRING, false),
StructField::new("deletionTimestamp", DataType::LONG, true),
StructField::new("dataChange", DataType::BOOLEAN, false),
StructField::new("extendedFileMetadata", DataType::BOOLEAN, true),
StructField::not_null("path", DataType::STRING),
StructField::nullable("deletionTimestamp", DataType::LONG),
StructField::not_null("dataChange", DataType::BOOLEAN),
StructField::nullable("extendedFileMetadata", DataType::BOOLEAN),
partition_values_field(),
StructField::new("size", DataType::LONG, true),
StructField::nullable("size", DataType::LONG),
tags_field(),
deletion_vector_field(),
StructField::new("baseRowId", DataType::LONG, true),
StructField::new("defaultRowCommitVersion", DataType::LONG, true),
StructField::nullable("baseRowId", DataType::LONG),
StructField::nullable("defaultRowCommitVersion", DataType::LONG),
]),
true,
)]));
assert_eq!(schema, expected);
}
Expand All @@ -653,20 +638,18 @@ mod tests {
let schema = get_log_schema()
.project(&[CDC_NAME])
.expect("Couldn't get remove field");
let expected = Arc::new(StructType::new([StructField::new(
let expected = Arc::new(StructType::new([StructField::nullable(
"cdc",
StructType::new([
StructField::new("path", DataType::STRING, false),
StructField::new(
StructField::not_null("path", DataType::STRING),
StructField::not_null(
"partitionValues",
MapType::new(DataType::STRING, DataType::STRING, true),
false,
),
StructField::new("size", DataType::LONG, false),
StructField::new("dataChange", DataType::BOOLEAN, false),
StructField::not_null("size", DataType::LONG),
StructField::not_null("dataChange", DataType::BOOLEAN),
tags_field(),
]),
true,
)]));
assert_eq!(schema, expected);
}
Expand All @@ -677,14 +660,13 @@ mod tests {
.project(&["txn"])
.expect("Couldn't get transaction field");

let expected = Arc::new(StructType::new([StructField::new(
let expected = Arc::new(StructType::new([StructField::nullable(
"txn",
StructType::new([
StructField::new("appId", DataType::STRING, false),
StructField::new("version", DataType::LONG, false),
StructField::new("lastUpdated", DataType::LONG, true),
StructField::not_null("appId", DataType::STRING),
StructField::not_null("version", DataType::LONG),
StructField::nullable("lastUpdated", DataType::LONG),
]),
true,
)]));
assert_eq!(schema, expected);
}
Expand All @@ -695,25 +677,22 @@ mod tests {
.project(&["commitInfo"])
.expect("Couldn't get commitInfo field");

let expected = Arc::new(StructType::new(vec![StructField::new(
let expected = Arc::new(StructType::new(vec![StructField::nullable(
"commitInfo",
StructType::new(vec![
StructField::new("timestamp", DataType::LONG, true),
StructField::new("inCommitTimestamp", DataType::LONG, true),
StructField::new("operation", DataType::STRING, true),
StructField::new(
StructField::nullable("timestamp", DataType::LONG),
StructField::nullable("inCommitTimestamp", DataType::LONG),
StructField::nullable("operation", DataType::STRING),
StructField::nullable(
"operationParameters",
MapType::new(DataType::STRING, DataType::STRING, false),
true,
),
StructField::new("kernelVersion", DataType::STRING, true),
StructField::new(
StructField::nullable("kernelVersion", DataType::STRING),
StructField::nullable(
"engineCommitInfo",
MapType::new(DataType::STRING, DataType::STRING, false),
true,
),
]),
true,
)]));
assert_eq!(schema, expected);
}
Expand Down
6 changes: 3 additions & 3 deletions kernel/src/actions/schemas.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,20 +84,20 @@ pub(crate) trait GetNullableContainerStructField {
// nullable values
impl<T: ToNullableContainerType> GetNullableContainerStructField for T {
fn get_nullable_container_struct_field(name: impl Into<String>) -> StructField {
StructField::new(name, T::to_nullable_container_type(), false)
StructField::not_null(name, T::to_nullable_container_type())
}
}

// Normal types produce non-nullable fields
impl<T: ToDataType> GetStructField for T {
fn get_struct_field(name: impl Into<String>) -> StructField {
StructField::new(name, T::to_data_type(), false)
StructField::not_null(name, T::to_data_type())
}
}

// Option types produce nullable fields
impl<T: ToDataType> GetStructField for Option<T> {
fn get_struct_field(name: impl Into<String>) -> StructField {
StructField::new(name, T::to_data_type(), true)
StructField::nullable(name, T::to_data_type())
}
}
3 changes: 1 addition & 2 deletions kernel/src/engine/arrow_conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -263,8 +263,7 @@ mod tests {
fn test_metadata_string_conversion() -> DeltaResult<()> {
let mut metadata = HashMap::new();
metadata.insert("description", "hello world".to_owned());
let struct_field =
StructField::new("name", DataType::STRING, false).with_metadata(metadata);
let struct_field = StructField::not_null("name", DataType::STRING).with_metadata(metadata);

let arrow_field = ArrowField::try_from(&struct_field)?;
let new_metadata = arrow_field.metadata();
Expand Down
Loading

0 comments on commit 29b9713

Please sign in to comment.