From 7fffd60392008ab6a763e82a324980a9fb455c40 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 18 Dec 2023 16:08:47 -0500 Subject: [PATCH] Improve documentation, add link to advanced_udf.rs in the user guide --- datafusion/expr/src/udf.rs | 11 +++++++---- docs/source/library-user-guide/adding-udfs.md | 2 ++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index dc6560ac115e..f780dc9ea75a 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -201,8 +201,8 @@ where /// This trait exposes the full API for implementing user defined functions and /// can be used to implement any function. /// -/// See [`advanced_udf.rs`] for a full example with implementation. See -/// [`ScalarUDF`] for details on a simpler API. +/// See [`advanced_udf.rs`] for a full example with complete implementation and +/// [`ScalarUDF`] for other available options. /// /// /// [`advanced_udf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs @@ -272,8 +272,11 @@ pub trait ScalarUDFImpl { /// count (so the function can know the resulting array size). /// /// # Performance - /// Many functions can be optimized for the case when one or more of their - /// arguments are constant values [`ColumnarValue::Scalar`]. + /// + /// For the best performance, the implementations of `invoke` should handle + /// the common case when one or more of their arguments are constant values + /// (aka [`ColumnarValue::Scalar`]). Calling [`ColumnarValue::into_array`] + /// and treating all arguments as arrays will work, but will be slower. fn invoke(&self, args: &[ColumnarValue]) -> Result; /// Returns any aliases (alternate names) for this function. This should not diff --git a/docs/source/library-user-guide/adding-udfs.md b/docs/source/library-user-guide/adding-udfs.md index 32125e2550bd..c51e4de3236c 100644 --- a/docs/source/library-user-guide/adding-udfs.md +++ b/docs/source/library-user-guide/adding-udfs.md @@ -78,6 +78,7 @@ The challenge however is that DataFusion doesn't know about this function. We ne To register a Scalar UDF, you need to wrap the function implementation in a [`ScalarUDF`] struct and then register it with the `SessionContext`. DataFusion provides the [`create_udf`] and helper functions to make this easier. +There is a lower level API with more functionality but is more complex, that is documented in [`advanced_udf.rs`]. ```rust use datafusion::logical_expr::{Volatility, create_udf}; @@ -97,6 +98,7 @@ let udf = create_udf( [`scalarudf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/struct.ScalarUDF.html [`create_udf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/fn.create_udf.html [`make_scalar_function`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/functions/fn.make_scalar_function.html +[`advanced_udf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs A few things to note: