From 94d34422ff937c9942f8016f873958b8661ac283 Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Fri, 24 Jan 2025 14:40:34 +0800 Subject: [PATCH] Initial --- velox/functions/lib/Re2Functions.cpp | 9 +++++-- .../functions/lib/tests/Re2FunctionsTest.cpp | 25 +++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/velox/functions/lib/Re2Functions.cpp b/velox/functions/lib/Re2Functions.cpp index 78ce3e2f29b56..1ef1bb69a3539 100644 --- a/velox/functions/lib/Re2Functions.cpp +++ b/velox/functions/lib/Re2Functions.cpp @@ -149,8 +149,13 @@ bool re2Extract( result.setNoCopy(row, StringView(extracted.data(), extracted.size())); return !StringView::isInline(extracted.size()); } else { - result.setNull(row, true); - return false; + if (emptyNoMatch) { + result.setNoCopy(row, StringView(nullptr, 0)); + return true; + } else { + result.setNull(row, true); + return false; + } } } } diff --git a/velox/functions/lib/tests/Re2FunctionsTest.cpp b/velox/functions/lib/tests/Re2FunctionsTest.cpp index f27d9cb46453f..d7e79d5984fb4 100644 --- a/velox/functions/lib/tests/Re2FunctionsTest.cpp +++ b/velox/functions/lib/tests/Re2FunctionsTest.cpp @@ -44,6 +44,13 @@ std::shared_ptr makeRegexExtract( return makeRe2Extract(name, inputArgs, config, /*emptyNoMatch=*/false); } +std::shared_ptr makeRegexExtractEmptyNoMatch( + const std::string& name, + const std::vector& inputArgs, + const core::QueryConfig& config) { + return makeRe2Extract(name, inputArgs, config, /*emptyNoMatch=*/true); +} + class Re2FunctionsTest : public test::FunctionBaseTest { public: static void SetUpTestCase() { @@ -54,6 +61,8 @@ class Re2FunctionsTest : public test::FunctionBaseTest { "re2_search", re2SearchSignatures(), makeRe2Search); exec::registerStatefulVectorFunction( "re2_extract", re2ExtractSignatures(), makeRegexExtract); + exec::registerStatefulVectorFunction( + "re2_extract_empty_no_match", re2ExtractSignatures(), makeRegexExtractEmptyNoMatch); exec::registerStatefulVectorFunction( "re2_extract_all", re2ExtractAllSignatures(), makeRe2ExtractAll); exec::registerStatefulVectorFunction("like", likeSignatures(), makeLike); @@ -383,6 +392,22 @@ TEST_F(Re2FunctionsTest, regexExtract) { }); } +template +void testRe2ExtractEmptyNoMatch(F&& regexExtract) { + // Group case that mismatch. + EXPECT_EQ( + regexExtract("rat cat\nbat dog", "ra(.)|blah(.)(.)", 2), ""); +} + +TEST_F(Re2FunctionsTest, regexExtract) { + testRe2ExtractEmptyNoMatch([&](std::optional str, + std::optional pattern, + std::optional group) { + return evaluateOnce( + "re2_extract_empty_no_match(c0, c1, c2)", str, pattern, group); + }); +} + TEST_F(Re2FunctionsTest, regexExtractBigintGroupId) { testRe2Extract([&](std::optional str, std::optional pattern,