Skip to content

Commit

Permalink
function: Allow more expressive array signatures (apache#14532)
Browse files Browse the repository at this point in the history
* function: Allow more expressive array signatures

This commit allows for more expressive array function signatures.
Previously, `ArrayFunctionSignature` was an enum of potential argument
combinations and orders. For many array functions, none of the
`ArrayFunctionSignature` variants worked, so they used
`TypeSignature::VariadicAny` instead. This commit will allow those
functions to use more descriptive signatures which will prevent them
from having to perform manual type checking in the function
implementation.

As an example, this commit also updates the signature of the
`array_replace` family of functions to use a new expressive signature,
which removes a panic that existed previously.

There are still a couple of limitations with this approach. First of
all, there's no way to describe a function that has multiple different
arrays of different type or dimension. Additionally, there isn't
support for functions with map arrays and recursive arrays that have
more than one argument.

Works towards resolving apache#14451

* Add mutability

* Move mutability enum

* fmt

* Fix doctest

* Add validation to array args

* Remove mutability and update return types

* fmt

* Fix clippy

* Fix imports

* Add list coercion flag

* Some formatting fixes

* Some formatting fixes

* Remove ArrayFunctionArguments struct

* Simplify helper functions

* Update array_and_element behavior
  • Loading branch information
jkosh44 authored Feb 14, 2025
1 parent e11a8ca commit 469f18b
Show file tree
Hide file tree
Showing 9 changed files with 352 additions and 186 deletions.
44 changes: 36 additions & 8 deletions datafusion/common/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,13 @@ pub fn base_type(data_type: &DataType) -> DataType {
}
}

/// Information about how to coerce lists.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
pub enum ListCoercion {
/// [`DataType::FixedSizeList`] should be coerced to [`DataType::List`].
FixedSizedListToList,
}

/// A helper function to coerce base type in List.
///
/// Example
Expand All @@ -600,26 +607,47 @@ pub fn base_type(data_type: &DataType) -> DataType {
///
/// let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
/// let base_type = DataType::Float64;
/// let coerced_type = coerced_type_with_base_type_only(&data_type, &base_type);
/// let coerced_type = coerced_type_with_base_type_only(&data_type, &base_type, None);
/// assert_eq!(coerced_type, DataType::List(Arc::new(Field::new_list_field(DataType::Float64, true))));
pub fn coerced_type_with_base_type_only(
data_type: &DataType,
base_type: &DataType,
array_coercion: Option<&ListCoercion>,
) -> DataType {
match data_type {
DataType::List(field) | DataType::FixedSizeList(field, _) => {
let field_type =
coerced_type_with_base_type_only(field.data_type(), base_type);
match (data_type, array_coercion) {
(DataType::List(field), _)
| (DataType::FixedSizeList(field, _), Some(ListCoercion::FixedSizedListToList)) =>
{
let field_type = coerced_type_with_base_type_only(
field.data_type(),
base_type,
array_coercion,
);

DataType::List(Arc::new(Field::new(
field.name(),
field_type,
field.is_nullable(),
)))
}
DataType::LargeList(field) => {
let field_type =
coerced_type_with_base_type_only(field.data_type(), base_type);
(DataType::FixedSizeList(field, len), _) => {
let field_type = coerced_type_with_base_type_only(
field.data_type(),
base_type,
array_coercion,
);

DataType::FixedSizeList(
Arc::new(Field::new(field.name(), field_type, field.is_nullable())),
*len,
)
}
(DataType::LargeList(field), _) => {
let field_type = coerced_type_with_base_type_only(
field.data_type(),
base_type,
array_coercion,
);

DataType::LargeList(Arc::new(Field::new(
field.name(),
Expand Down
135 changes: 80 additions & 55 deletions datafusion/expr-common/src/signature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
//! and return types of functions in DataFusion.
use std::fmt::Display;
use std::num::NonZeroUsize;

use crate::type_coercion::aggregates::NUMERICS;
use arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
use datafusion_common::types::{LogicalTypeRef, NativeType};
use datafusion_common::utils::ListCoercion;
use itertools::Itertools;

/// Constant that is used as a placeholder for any valid timezone.
Expand Down Expand Up @@ -227,25 +227,13 @@ impl Display for TypeSignatureClass {

#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
pub enum ArrayFunctionSignature {
/// Specialized Signature for ArrayAppend and similar functions
/// The first argument should be List/LargeList/FixedSizedList, and the second argument should be non-list or list.
/// The second argument's list dimension should be one dimension less than the first argument's list dimension.
/// List dimension of the List/LargeList is equivalent to the number of List.
/// List dimension of the non-list is 0.
ArrayAndElement,
/// Specialized Signature for ArrayPrepend and similar functions
/// The first argument should be non-list or list, and the second argument should be List/LargeList.
/// The first argument's list dimension should be one dimension less than the second argument's list dimension.
ElementAndArray,
/// Specialized Signature for Array functions of the form (List/LargeList, Index+)
/// The first argument should be List/LargeList/FixedSizedList, and the next n arguments should be Int64.
ArrayAndIndexes(NonZeroUsize),
/// Specialized Signature for Array functions of the form (List/LargeList, Element, Optional Index)
ArrayAndElementAndOptionalIndex,
/// Specialized Signature for ArrayEmpty and similar functions
/// The function takes a single argument that must be a List/LargeList/FixedSizeList
/// or something that can be coerced to one of those types.
Array,
/// A function takes at least one List/LargeList/FixedSizeList argument.
Array {
/// A full list of the arguments accepted by this function.
arguments: Vec<ArrayFunctionArgument>,
/// Additional information about how array arguments should be coerced.
array_coercion: Option<ListCoercion>,
},
/// A function takes a single argument that must be a List/LargeList/FixedSizeList
/// which gets coerced to List, with element type recursively coerced to List too if it is list-like.
RecursiveArray,
Expand All @@ -257,25 +245,15 @@ pub enum ArrayFunctionSignature {
impl Display for ArrayFunctionSignature {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ArrayFunctionSignature::ArrayAndElement => {
write!(f, "array, element")
}
ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => {
write!(f, "array, element, [index]")
}
ArrayFunctionSignature::ElementAndArray => {
write!(f, "element, array")
}
ArrayFunctionSignature::ArrayAndIndexes(count) => {
write!(f, "array")?;
for _ in 0..count.get() {
write!(f, ", index")?;
ArrayFunctionSignature::Array { arguments, .. } => {
for (idx, argument) in arguments.iter().enumerate() {
write!(f, "{argument}")?;
if idx != arguments.len() - 1 {
write!(f, ", ")?;
}
}
Ok(())
}
ArrayFunctionSignature::Array => {
write!(f, "array")
}
ArrayFunctionSignature::RecursiveArray => {
write!(f, "recursive_array")
}
Expand All @@ -286,6 +264,34 @@ impl Display for ArrayFunctionSignature {
}
}

#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
pub enum ArrayFunctionArgument {
/// A non-list or list argument. The list dimensions should be one less than the Array's list
/// dimensions.
Element,
/// An Int64 index argument.
Index,
/// An argument of type List/LargeList/FixedSizeList. All Array arguments must be coercible
/// to the same type.
Array,
}

impl Display for ArrayFunctionArgument {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ArrayFunctionArgument::Element => {
write!(f, "element")
}
ArrayFunctionArgument::Index => {
write!(f, "index")
}
ArrayFunctionArgument::Array => {
write!(f, "array")
}
}
}
}

impl TypeSignature {
pub fn to_string_repr(&self) -> Vec<String> {
match self {
Expand Down Expand Up @@ -580,46 +586,65 @@ impl Signature {
pub fn array_and_element(volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::ArraySignature(
ArrayFunctionSignature::ArrayAndElement,
ArrayFunctionSignature::Array {
arguments: vec![
ArrayFunctionArgument::Array,
ArrayFunctionArgument::Element,
],
array_coercion: Some(ListCoercion::FixedSizedListToList),
},
),
volatility,
}
}
/// Specialized Signature for Array functions with an optional index
pub fn array_and_element_and_optional_index(volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::ArraySignature(
ArrayFunctionSignature::ArrayAndElementAndOptionalIndex,
),
volatility,
}
}
/// Specialized Signature for ArrayPrepend and similar functions
pub fn element_and_array(volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::ArraySignature(
ArrayFunctionSignature::ElementAndArray,
),
type_signature: TypeSignature::OneOf(vec![
TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
arguments: vec![
ArrayFunctionArgument::Array,
ArrayFunctionArgument::Element,
],
array_coercion: None,
}),
TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
arguments: vec![
ArrayFunctionArgument::Array,
ArrayFunctionArgument::Element,
ArrayFunctionArgument::Index,
],
array_coercion: None,
}),
]),
volatility,
}
}

/// Specialized Signature for ArrayElement and similar functions
pub fn array_and_index(volatility: Volatility) -> Self {
Self::array_and_indexes(volatility, NonZeroUsize::new(1).expect("1 is non-zero"))
}
/// Specialized Signature for ArraySlice and similar functions
pub fn array_and_indexes(volatility: Volatility, count: NonZeroUsize) -> Self {
Signature {
type_signature: TypeSignature::ArraySignature(
ArrayFunctionSignature::ArrayAndIndexes(count),
ArrayFunctionSignature::Array {
arguments: vec![
ArrayFunctionArgument::Array,
ArrayFunctionArgument::Index,
],
array_coercion: None,
},
),
volatility,
}
}
/// Specialized Signature for ArrayEmpty and similar functions
pub fn array(volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::ArraySignature(ArrayFunctionSignature::Array),
type_signature: TypeSignature::ArraySignature(
ArrayFunctionSignature::Array {
arguments: vec![ArrayFunctionArgument::Array],
array_coercion: None,
},
),
volatility,
}
}
Expand Down
4 changes: 2 additions & 2 deletions datafusion/expr/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ pub use datafusion_expr_common::columnar_value::ColumnarValue;
pub use datafusion_expr_common::groups_accumulator::{EmitTo, GroupsAccumulator};
pub use datafusion_expr_common::operator::Operator;
pub use datafusion_expr_common::signature::{
ArrayFunctionSignature, Signature, TypeSignature, TypeSignatureClass, Volatility,
TIMEZONE_WILDCARD,
ArrayFunctionArgument, ArrayFunctionSignature, Signature, TypeSignature,
TypeSignatureClass, Volatility, TIMEZONE_WILDCARD,
};
pub use datafusion_expr_common::type_coercion::binary;
pub use expr::{
Expand Down
Loading

0 comments on commit 469f18b

Please sign in to comment.