From c1587506dc9bce270f81b47426332cb0a8eaec89 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Thu, 9 Jan 2025 11:40:36 +0000 Subject: [PATCH 1/3] Use repr formatting for enum --- python/cudf_polars/cudf_polars/dsl/expressions/string.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/string.py b/python/cudf_polars/cudf_polars/dsl/expressions/string.py index 256840c1f3d..dd4ed08eaee 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/string.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/string.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. # SPDX-License-Identifier: Apache-2.0 # TODO: remove need for this # ruff: noqa: D101 @@ -124,7 +124,7 @@ def _validate_input(self): StringFunction.Name.StripCharsEnd, StringFunction.Name.Uppercase, ): - raise NotImplementedError(f"String function {self.name}") + raise NotImplementedError(f"String function {self.name!r}") if self.name is StringFunction.Name.Contains: literal, strict = self.options if not literal: From 88fdd58776313a6b842ba46ddb81ced76a10d5ae Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Thu, 16 Jan 2025 13:20:26 +0000 Subject: [PATCH 2/3] Implement str.join --- .../cudf_polars/dsl/expressions/string.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/string.py b/python/cudf_polars/cudf_polars/dsl/expressions/string.py index dd4ed08eaee..46f30db2558 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/string.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/string.py @@ -111,6 +111,7 @@ def __init__( def _validate_input(self): if self.name not in ( + StringFunction.Name.ConcatVertical, StringFunction.Name.Contains, StringFunction.Name.EndsWith, StringFunction.Name.Lowercase, @@ -204,7 +205,20 @@ def do_evaluate( mapping: Mapping[Expr, Column] | None = None, ) -> Column: """Evaluate this expression given a dataframe for context.""" - if self.name is StringFunction.Name.Contains: + if self.name is StringFunction.Name.ConcatVertical: + (child,) = self.children + column = child.evaluate(df, context=context, mapping=mapping) + delimiter, ignore_nulls = self.options + if column.obj.null_count() > 0 and not ignore_nulls: + return Column(plc.Column.all_null_like(column.obj, 1)) + return Column( + plc.strings.combine.join_strings( + column.obj, + plc.interop.from_arrow(pa.scalar(delimiter, type=pa.string())), + plc.interop.from_arrow(pa.scalar(None, type=pa.string())), + ) + ) + elif self.name is StringFunction.Name.Contains: child, arg = self.children column = child.evaluate(df, context=context, mapping=mapping) From 1d8febc49141285540fcb6907b001e9612c01de3 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Thu, 23 Jan 2025 16:38:45 +0000 Subject: [PATCH 3/3] Add test of new functionality --- .../cudf_polars/tests/expressions/test_stringfunction.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py index 8d7d970eb07..4d41a8c590b 100644 --- a/python/cudf_polars/tests/expressions/test_stringfunction.py +++ b/python/cudf_polars/tests/expressions/test_stringfunction.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations @@ -454,3 +454,10 @@ def test_string_to_numeric_invalid(numeric_type): polars_except=pl.exceptions.InvalidOperationError, cudf_except=pl.exceptions.ComputeError, ) + + +@pytest.mark.parametrize("ignore_nulls", [False, True]) +@pytest.mark.parametrize("delimiter", ["", "/"]) +def test_string_join(ldf, ignore_nulls, delimiter): + q = ldf.select(pl.col("a").str.join(delimiter, ignore_nulls=ignore_nulls)) + assert_gpu_result_equal(q)