diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/string.py b/python/cudf_polars/cudf_polars/dsl/expressions/string.py index 256840c1f3d..46f30db2558 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/string.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/string.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. # SPDX-License-Identifier: Apache-2.0 # TODO: remove need for this # ruff: noqa: D101 @@ -111,6 +111,7 @@ def __init__( def _validate_input(self): if self.name not in ( + StringFunction.Name.ConcatVertical, StringFunction.Name.Contains, StringFunction.Name.EndsWith, StringFunction.Name.Lowercase, @@ -124,7 +125,7 @@ def _validate_input(self): StringFunction.Name.StripCharsEnd, StringFunction.Name.Uppercase, ): - raise NotImplementedError(f"String function {self.name}") + raise NotImplementedError(f"String function {self.name!r}") if self.name is StringFunction.Name.Contains: literal, strict = self.options if not literal: @@ -204,7 +205,20 @@ def do_evaluate( mapping: Mapping[Expr, Column] | None = None, ) -> Column: """Evaluate this expression given a dataframe for context.""" - if self.name is StringFunction.Name.Contains: + if self.name is StringFunction.Name.ConcatVertical: + (child,) = self.children + column = child.evaluate(df, context=context, mapping=mapping) + delimiter, ignore_nulls = self.options + if column.obj.null_count() > 0 and not ignore_nulls: + return Column(plc.Column.all_null_like(column.obj, 1)) + return Column( + plc.strings.combine.join_strings( + column.obj, + plc.interop.from_arrow(pa.scalar(delimiter, type=pa.string())), + plc.interop.from_arrow(pa.scalar(None, type=pa.string())), + ) + ) + elif self.name is StringFunction.Name.Contains: child, arg = self.children column = child.evaluate(df, context=context, mapping=mapping) diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py index 8d7d970eb07..4d41a8c590b 100644 --- a/python/cudf_polars/tests/expressions/test_stringfunction.py +++ b/python/cudf_polars/tests/expressions/test_stringfunction.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations @@ -454,3 +454,10 @@ def test_string_to_numeric_invalid(numeric_type): polars_except=pl.exceptions.InvalidOperationError, cudf_except=pl.exceptions.ComputeError, ) + + +@pytest.mark.parametrize("ignore_nulls", [False, True]) +@pytest.mark.parametrize("delimiter", ["", "/"]) +def test_string_join(ldf, ignore_nulls, delimiter): + q = ldf.select(pl.col("a").str.join(delimiter, ignore_nulls=ignore_nulls)) + assert_gpu_result_equal(q)