diff --git a/velox/docs/functions/spark/string.rst b/velox/docs/functions/spark/string.rst index 16c0e0c80e2c..82095514539b 100644 --- a/velox/docs/functions/spark/string.rst +++ b/velox/docs/functions/spark/string.rst @@ -22,11 +22,11 @@ Unless specified otherwise, all functions return NULL if at least one of the arg .. spark:function:: concat_ws(separator, [string]/[array], ...) -> varchar - Returns the concatenation for ``string`` and all elements in ``array``, separated + Returns the concatenation of ``string`` and all elements in ``array``, separated by ``separator``. ``separator`` can be empty string. It takes variable number of remaining arguments. And ``string`` & ``array`` can be used in combination. NULL element is skipped in the concatenation. If ``separator`` is NULL, returns NULL, regardless of the - following inputs. For non-NULL ``separator``, if only it is provided or all remaining inputs + following inputs. For non-NULL ``separator``, if no remaining input or all remaining inputs are NULL, returns an empty string. :: SELECT concat_ws('~', 'a', 'b', 'c'); -- 'a~b~c' diff --git a/velox/expression/tests/SparkExpressionFuzzerTest.cpp b/velox/expression/tests/SparkExpressionFuzzerTest.cpp index 9ac8381a66b8..18262da2df80 100644 --- a/velox/expression/tests/SparkExpressionFuzzerTest.cpp +++ b/velox/expression/tests/SparkExpressionFuzzerTest.cpp @@ -55,9 +55,10 @@ int main(int argc, char** argv) { "replace", "might_contain", "unix_timestamp", - // Skip concat_ws as it triggers a test failure due to an incorrect - // expression generation from fuzzer: - // https://github.com/facebookincubator/velox/issues/6590 + // Skip concat_ws due to the below issue: + // We use "any" type in its signature to allow mixed + // using of VARCHAR & ARRAY. But the fuzzer + // couldn't generate correct expressions for it. "concat_ws"}; // Required by spark_partition_id function. diff --git a/velox/functions/sparksql/String.cpp b/velox/functions/sparksql/String.cpp index c8b487c3ad34..ad59b23f7fe5 100644 --- a/velox/functions/sparksql/String.cpp +++ b/velox/functions/sparksql/String.cpp @@ -112,9 +112,9 @@ void doApply( std::vector argMapping; std::vector constantStrings; auto numArgs = args.size(); - // Save constant values to constantStrings_. - // Identify and combine consecutive constant inputs. argMapping.reserve(numArgs - 1); + // Save intermediate result for consecutive constant string args. + // They are concatenated in advance. constantStrings.reserve(numArgs - 1); // For each array arg, save rawSizes, rawOffsets, indices, and elements // BaseBector. @@ -142,8 +142,8 @@ void doApply( } // Handles string arg. argMapping.push_back(i); - // Cannot concat string args in advance. if (!separator.has_value()) { + // Cannot concat consecutive constant string args in advance. constantStrings.push_back(""); continue; } @@ -169,7 +169,7 @@ void doApply( } } - // Number of string columns after combined constant ones. + // Number of string columns after combined consecutive constant ones. auto numStringCols = constantStrings.size(); // For column string arg decoding. std::vector decodedStringArgs; @@ -369,7 +369,9 @@ std::shared_ptr makeLength( std::vector> concatWsSignatures() { return {// The second and folowing arguments are varchar or array(varchar). - // The argument type will be checked in makeConcatWs. + // Use "any" to allow the mixed using of these two types. The + // argument type will be checked in makeConcatWs. + // // varchar, [varchar], [array(varchar)], ... -> varchar. exec::FunctionSignatureBuilder() .returnType("varchar")