From 5111a55735a1669e652c4eb6c03235be1f0d7e7e Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Mon, 28 Oct 2024 18:12:07 +0000 Subject: [PATCH 01/27] Maybe working --- .../compute/kernels/scalar_cast_nested.cc | 25 ++++++++++---- .../arrow/compute/kernels/scalar_cast_test.cc | 33 +++++++++++++++++-- 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc index ec5291ef608a3..3c440a1f610bb 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc @@ -363,11 +363,7 @@ struct CastStruct { } } - if (out_field_index < out_field_count) { - return Status::TypeError( - "struct fields don't match or are in the wrong order: Input fields: ", - in_type.ToString(), " output fields: ", out_type.ToString()); - } + const ArraySpan& in_array = batch[0].array; ArrayData* out_array = out->array_data().get(); @@ -380,8 +376,23 @@ struct CastStruct { out_field_index = 0; for (int field_index : fields_to_select) { - const auto& values = (in_array.child_data[field_index].ToArrayData()->Slice( - in_array.offset, in_array.length)); + std::shared_ptr values; + if (field_index == -1) { + const auto& out_field = out_type.field(out_field_index); + if (out_field->nullable()) { + std::shared_ptr nulls; + RETURN_NOT_OK(MakeArrayOfNull(out_field->type()->GetSharedPtr(), batch.length) + .Value(&nulls)); + values = nulls->data(); + } else { + return Status::TypeError( + "struct fields don't match or are in the wrong order: Input fields: ", + in_type.ToString(), " output fields: ", out_type.ToString()); + } + } else { + values = (in_array.child_data[field_index].ToArrayData()->Slice(in_array.offset, + in_array.length)); + } const auto& target_type = out->type()->field(out_field_index++)->type(); ARROW_ASSIGN_OR_RAISE(Datum cast_values, diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index a8acf68f66c8b..3a93506454b93 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2760,9 +2760,9 @@ TEST(Cast, StructToBiggerStruct) { b = ArrayFromJSON(int8(), "[3, 4]"); ASSERT_OK_AND_ASSIGN(auto src, StructArray::Make({a, b}, field_names)); - const auto dest = arrow::struct_({std::make_shared("a", int8()), - std::make_shared("b", int8()), - std::make_shared("c", int8())}); + const auto dest = arrow::struct_({std::make_shared("a", int8(), /*nullable=*/false), + std::make_shared("b", int8(), /*nullable=*/false), + std::make_shared("c", int8(), /*nullable=*/false)}); const auto options = CastOptions::Safe(dest); EXPECT_RAISES_WITH_MESSAGE_THAT( @@ -2771,6 +2771,33 @@ TEST(Cast, StructToBiggerStruct) { Cast(src, options)); } +TEST(Cast, StructToBiggerNullableStruct) { + std::vector field_names = {"a", "b"}; + std::shared_ptr a, b, c; + a = ArrayFromJSON(int8(), "[1, 2]"); + b = ArrayFromJSON(int8(), "[3, 4]"); + c = ArrayFromJSON(int8(), "[null, null]"); + ASSERT_OK_AND_ASSIGN(auto src, StructArray::Make({a, b}, field_names)); + + const auto fields_dest = arrow::struct_({std::make_shared("a", int8()), + std::make_shared("b", int8()), + std::make_shared("c", int8())}); + const auto options = CastOptions::Safe(fields_dest); + + // std::vector> fields_src_nullable = { + // std::make_shared("a", int8(), true), + // std::make_shared("b", int8(), true), + // std::make_shared("c", int8(), true)}; + + ASSERT_OK_AND_ASSIGN(auto out, Cast(src, options)); + std::cout << out.ToString() << std::endl; + + // ASSERT_OK_AND_ASSIGN( + // auto dest, + // StructArray::Make({a, b, c}, {"a", "b", "c"})); + // CheckCast(src, dest, options); +} + TEST(Cast, StructToDifferentNullabilityStruct) { { // OK to go from non-nullable to nullable... From 6f44e375e739c7cf3ddaec27f26d334627568026 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Mon, 28 Oct 2024 18:41:45 +0000 Subject: [PATCH 02/27] Satisfactory tests --- .../arrow/compute/kernels/scalar_cast_test.cc | 31 +++++++------------ 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 3a93506454b93..ce748e6bbc666 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2760,9 +2760,10 @@ TEST(Cast, StructToBiggerStruct) { b = ArrayFromJSON(int8(), "[3, 4]"); ASSERT_OK_AND_ASSIGN(auto src, StructArray::Make({a, b}, field_names)); - const auto dest = arrow::struct_({std::make_shared("a", int8(), /*nullable=*/false), - std::make_shared("b", int8(), /*nullable=*/false), - std::make_shared("c", int8(), /*nullable=*/false)}); + const auto dest = + arrow::struct_({std::make_shared("a", int8()), + std::make_shared("b", int8()), + std::make_shared("c", int8(), /*nullable=*/false)}); const auto options = CastOptions::Safe(dest); EXPECT_RAISES_WITH_MESSAGE_THAT( @@ -2776,26 +2777,16 @@ TEST(Cast, StructToBiggerNullableStruct) { std::shared_ptr a, b, c; a = ArrayFromJSON(int8(), "[1, 2]"); b = ArrayFromJSON(int8(), "[3, 4]"); - c = ArrayFromJSON(int8(), "[null, null]"); ASSERT_OK_AND_ASSIGN(auto src, StructArray::Make({a, b}, field_names)); - const auto fields_dest = arrow::struct_({std::make_shared("a", int8()), - std::make_shared("b", int8()), - std::make_shared("c", int8())}); - const auto options = CastOptions::Safe(fields_dest); - - // std::vector> fields_src_nullable = { - // std::make_shared("a", int8(), true), - // std::make_shared("b", int8(), true), - // std::make_shared("c", int8(), true)}; + const auto type_dest = arrow::struct_( + {std::make_shared("a", int8()), std::make_shared("b", int8()), + std::make_shared("c", int8(), /*nullable=*/true)}); + const auto options = CastOptions::Safe(type_dest); - ASSERT_OK_AND_ASSIGN(auto out, Cast(src, options)); - std::cout << out.ToString() << std::endl; - - // ASSERT_OK_AND_ASSIGN( - // auto dest, - // StructArray::Make({a, b, c}, {"a", "b", "c"})); - // CheckCast(src, dest, options); + c = ArrayFromJSON(int8(), "[null, null]"); + ASSERT_OK_AND_ASSIGN(auto dest, StructArray::Make({a, b, c}, {"a", "b", "c"})); + CheckCast(src, dest); } TEST(Cast, StructToDifferentNullabilityStruct) { From 1f3a07d017fb93717115e3ff1a6dd55fca421a10 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Mon, 28 Oct 2024 18:41:55 +0000 Subject: [PATCH 03/27] Tidy --- cpp/src/arrow/compute/kernels/scalar_cast_nested.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc index 3c440a1f610bb..0e90c5288b38d 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc @@ -363,8 +363,6 @@ struct CastStruct { } } - - const ArraySpan& in_array = batch[0].array; ArrayData* out_array = out->array_data().get(); From cec4090d921bb182d3f32e326106b87c06b8914d Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Mon, 28 Oct 2024 20:21:05 +0000 Subject: [PATCH 04/27] Improve implementation --- .../compute/kernels/scalar_cast_nested.cc | 32 +++++++++++-------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc index 0e90c5288b38d..b60be1f9a91c7 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc @@ -363,6 +363,17 @@ struct CastStruct { } } + for (; out_field_index < out_field_count; ++out_field_index) { + const auto& out_field = out_type.field(out_field_index); + if (out_field->nullable()) { + fields_to_select[out_field_index] = -2; + } else { + return Status::TypeError( + "struct fields don't match or are in the wrong order: Input fields: ", + in_type.ToString(), " output fields: ", out_type.ToString()); + } + } + const ArraySpan& in_array = batch[0].array; ArrayData* out_array = out->array_data().get(); @@ -375,24 +386,19 @@ struct CastStruct { out_field_index = 0; for (int field_index : fields_to_select) { std::shared_ptr values; - if (field_index == -1) { - const auto& out_field = out_type.field(out_field_index); - if (out_field->nullable()) { - std::shared_ptr nulls; - RETURN_NOT_OK(MakeArrayOfNull(out_field->type()->GetSharedPtr(), batch.length) - .Value(&nulls)); - values = nulls->data(); - } else { - return Status::TypeError( - "struct fields don't match or are in the wrong order: Input fields: ", - in_type.ToString(), " output fields: ", out_type.ToString()); - } + const auto& target_type = out->type()->field(out_field_index++)->type(); + + if (field_index == -2) { + std::shared_ptr nulls; + RETURN_NOT_OK(MakeArrayOfNull(target_type->GetSharedPtr(), batch.length) + .Value(&nulls)); + values = nulls->data(); } else { values = (in_array.child_data[field_index].ToArrayData()->Slice(in_array.offset, in_array.length)); } - const auto& target_type = out->type()->field(out_field_index++)->type(); + // TODO(tomnewton): Do we need to call this after creating array of nulls. ARROW_ASSIGN_OR_RAISE(Datum cast_values, Cast(values, target_type, options, ctx->exec_context())); From b5f97b4deb91a5a096b08f6a2ea78b73f59d9b62 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Mon, 28 Oct 2024 20:51:52 +0000 Subject: [PATCH 05/27] Keep failures on out of order --- .../compute/kernels/scalar_cast_nested.cc | 34 ++++++++++++------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc index b60be1f9a91c7..197fe21e766ae 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc @@ -18,6 +18,7 @@ // Implementation of casting to (or between) list types #include +#include #include #include @@ -348,6 +349,12 @@ struct CastStruct { std::vector fields_to_select(out_field_count, -1); + // create a set of field names + std::set in_field_names; + for (int in_field_index = 0; in_field_index < in_field_count; ++in_field_index) { + in_field_names.insert(in_type.field(in_field_index)->name()); + } + int out_field_index = 0; for (int in_field_index = 0; in_field_index < in_field_count && out_field_index < out_field_count; @@ -360,18 +367,21 @@ struct CastStruct { in_type.ToString(), " ", out_type.ToString()); } fields_to_select[out_field_index++] = in_field_index; + } else if (in_field_names.count(out_field->name()) == 0) { + if (out_field->nullable()) { + fields_to_select[out_field_index++] = -2; + } else { + return Status::TypeError( + "struct fields don't match or are in the wrong order: Input fields: ", + in_type.ToString(), " output fields: ", out_type.ToString()); + } } } - for (; out_field_index < out_field_count; ++out_field_index) { - const auto& out_field = out_type.field(out_field_index); - if (out_field->nullable()) { - fields_to_select[out_field_index] = -2; - } else { - return Status::TypeError( - "struct fields don't match or are in the wrong order: Input fields: ", - in_type.ToString(), " output fields: ", out_type.ToString()); - } + if (out_field_index < out_field_count) { + return Status::TypeError( + "struct fields don't match or are in the wrong order: Input fields: ", + in_type.ToString(), " output fields: ", out_type.ToString()); } const ArraySpan& in_array = batch[0].array; @@ -390,15 +400,15 @@ struct CastStruct { if (field_index == -2) { std::shared_ptr nulls; - RETURN_NOT_OK(MakeArrayOfNull(target_type->GetSharedPtr(), batch.length) - .Value(&nulls)); + RETURN_NOT_OK( + MakeArrayOfNull(target_type->GetSharedPtr(), batch.length).Value(&nulls)); values = nulls->data(); } else { values = (in_array.child_data[field_index].ToArrayData()->Slice(in_array.offset, in_array.length)); } - // TODO(tomnewton): Do we need to call this after creating array of nulls. + // TODO(tomnewton): Do we need to call this after creating array of nulls. ARROW_ASSIGN_OR_RAISE(Datum cast_values, Cast(values, target_type, options, ctx->exec_context())); From febd7319becbe0c85a014771b29c11216b3303e7 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Mon, 28 Oct 2024 22:48:29 +0000 Subject: [PATCH 06/27] Fix some tests --- .../arrow/compute/kernels/scalar_cast_test.cc | 35 ++++++++----------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index ce748e6bbc666..c3df3831047fd 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2540,6 +2540,8 @@ static void CheckStructToStructSubset( d2 = ArrayFromJSON(dest_value_type, "[6, 51, 49]"); e2 = ArrayFromJSON(dest_value_type, "[19, 17, 74]"); + auto nulls = ArrayFromJSON(dest_value_type, "[null, null, null]"); + ASSERT_OK_AND_ASSIGN(auto src, StructArray::Make({a1, b1, c1, d1, e1}, field_names)); ASSERT_OK_AND_ASSIGN(auto dest1, @@ -2565,14 +2567,9 @@ static void CheckStructToStructSubset( CheckCast(src, dest5); // field does not exist - const auto dest6 = arrow::struct_({std::make_shared("a", int8()), - std::make_shared("d", int16()), - std::make_shared("f", int64())}); - const auto options6 = CastOptions::Safe(dest6); - EXPECT_RAISES_WITH_MESSAGE_THAT( - TypeError, - ::testing::HasSubstr("struct fields don't match or are in the wrong order"), - Cast(src, options6)); + ASSERT_OK_AND_ASSIGN(auto dest6, + StructArray::Make({a1, d1, nulls}, {"a", "d", "f"})); + CheckCast(src, dest6); // fields in wrong order const auto dest7 = arrow::struct_({std::make_shared("a", int8()), @@ -2639,6 +2636,8 @@ static void CheckStructToStructSubsetWithNulls( d2 = ArrayFromJSON(dest_value_type, "[6, 51, null]"); e2 = ArrayFromJSON(dest_value_type, "[null, 17, 74]"); + auto nulls = ArrayFromJSON(dest_value_type, "[null, null, null]"); + std::shared_ptr null_bitmap; BitmapFromVector({0, 1, 0}, &null_bitmap); @@ -2674,14 +2673,9 @@ static void CheckStructToStructSubsetWithNulls( CheckCast(src_null, dest5_null); // field does not exist - const auto dest6_null = arrow::struct_({std::make_shared("a", int8()), - std::make_shared("d", int16()), - std::make_shared("f", int64())}); - const auto options6_null = CastOptions::Safe(dest6_null); - EXPECT_RAISES_WITH_MESSAGE_THAT( - TypeError, - ::testing::HasSubstr("struct fields don't match or are in the wrong order"), - Cast(src_null, options6_null)); + ASSERT_OK_AND_ASSIGN(auto dest6_null, + StructArray::Make({a1, d1, nulls}, {"a", "d", "f"}, null_bitmap)); + CheckCast(src_null, dest6_null); // fields in wrong order const auto dest7_null = arrow::struct_({std::make_shared("a", int8()), @@ -2733,7 +2727,7 @@ TEST(Cast, StructToSameSizedAndNamedStruct) { CheckStructToStruct(NumericTypes() TEST(Cast, StructToStructSubset) { CheckStructToStructSubset(NumericTypes()); } TEST(Cast, StructToStructSubsetWithNulls) { - CheckStructToStructSubsetWithNulls(NumericTypes()); + CheckStructToStructSubsetWithNulls({int16()}); } TEST(Cast, StructToSameSizedButDifferentNamedStruct) { @@ -2760,10 +2754,9 @@ TEST(Cast, StructToBiggerStruct) { b = ArrayFromJSON(int8(), "[3, 4]"); ASSERT_OK_AND_ASSIGN(auto src, StructArray::Make({a, b}, field_names)); - const auto dest = - arrow::struct_({std::make_shared("a", int8()), - std::make_shared("b", int8()), - std::make_shared("c", int8(), /*nullable=*/false)}); + const auto dest = arrow::struct_( + {std::make_shared("a", int8()), std::make_shared("b", int8()), + std::make_shared("c", int8(), /*nullable=*/false)}); const auto options = CastOptions::Safe(dest); EXPECT_RAISES_WITH_MESSAGE_THAT( From 70cc216e572713b0d3b37195ceb44c71701c6794 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Mon, 28 Oct 2024 23:22:50 +0000 Subject: [PATCH 07/27] All tests pass --- .../compute/kernels/scalar_cast_nested.cc | 15 ++++++++--- .../arrow/compute/kernels/scalar_cast_test.cc | 26 +++++++------------ 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc index 197fe21e766ae..c4c173a50408e 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc @@ -378,10 +378,17 @@ struct CastStruct { } } - if (out_field_index < out_field_count) { - return Status::TypeError( - "struct fields don't match or are in the wrong order: Input fields: ", - in_type.ToString(), " output fields: ", out_type.ToString()); + for (; out_field_index < out_field_count; ++out_field_index) { + const auto& out_field = out_type.field(out_field_index); + if (in_field_names.count(out_field->name()) == 0 && out_field->nullable()) { + // If the field is nullable and not in the input fields we can still fill it with + // nulls. + fields_to_select[out_field_index] = -2; + } else { + return Status::TypeError( + "struct fields don't match or are in the wrong order: Input fields: ", + in_type.ToString(), " output fields: ", out_type.ToString()); + } } const ArraySpan& in_array = batch[0].array; diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index c3df3831047fd..6c5e091bf77dc 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2673,8 +2673,9 @@ static void CheckStructToStructSubsetWithNulls( CheckCast(src_null, dest5_null); // field does not exist - ASSERT_OK_AND_ASSIGN(auto dest6_null, - StructArray::Make({a1, d1, nulls}, {"a", "d", "f"}, null_bitmap)); + ASSERT_OK_AND_ASSIGN( + auto dest6_null, + StructArray::Make({a1, d1, nulls}, {"a", "d", "f"}, null_bitmap)); CheckCast(src_null, dest6_null); // fields in wrong order @@ -2731,20 +2732,16 @@ TEST(Cast, StructToStructSubsetWithNulls) { } TEST(Cast, StructToSameSizedButDifferentNamedStruct) { - std::vector field_names = {"a", "b"}; + std::vector src_field_names = {"a", "b"}; std::shared_ptr a, b; a = ArrayFromJSON(int8(), "[1, 2]"); b = ArrayFromJSON(int8(), "[3, 4]"); - ASSERT_OK_AND_ASSIGN(auto src, StructArray::Make({a, b}, field_names)); + auto nulls = ArrayFromJSON(int8(), "[null, null]"); + ASSERT_OK_AND_ASSIGN(auto src, StructArray::Make({a, b}, src_field_names)); - const auto dest = arrow::struct_( - {std::make_shared("c", int8()), std::make_shared("d", int8())}); - const auto options = CastOptions::Safe(dest); - - EXPECT_RAISES_WITH_MESSAGE_THAT( - TypeError, - ::testing::HasSubstr("struct fields don't match or are in the wrong order"), - Cast(src, options)); + std::vector dest_field_names = {"c", "d"}; + ASSERT_OK_AND_ASSIGN(auto dest, StructArray::Make({nulls, nulls}, dest_field_names)); + CheckCast(src, dest); } TEST(Cast, StructToBiggerStruct) { @@ -2772,11 +2769,6 @@ TEST(Cast, StructToBiggerNullableStruct) { b = ArrayFromJSON(int8(), "[3, 4]"); ASSERT_OK_AND_ASSIGN(auto src, StructArray::Make({a, b}, field_names)); - const auto type_dest = arrow::struct_( - {std::make_shared("a", int8()), std::make_shared("b", int8()), - std::make_shared("c", int8(), /*nullable=*/true)}); - const auto options = CastOptions::Safe(type_dest); - c = ArrayFromJSON(int8(), "[null, null]"); ASSERT_OK_AND_ASSIGN(auto dest, StructArray::Make({a, b, c}, {"a", "b", "c"})); CheckCast(src, dest); From f5d08c10d2a25029198e72a2f1704c092e6c12ec Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Mon, 28 Oct 2024 23:30:38 +0000 Subject: [PATCH 08/27] More test cases --- cpp/src/arrow/compute/kernels/scalar_cast_test.cc | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 6c5e091bf77dc..22a5aff3515e1 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2760,6 +2760,17 @@ TEST(Cast, StructToBiggerStruct) { TypeError, ::testing::HasSubstr("struct fields don't match or are in the wrong order"), Cast(src, options)); + + const auto dest2 = + arrow::struct_({std::make_shared("a", int8()), + std::make_shared("c", int8(), /*nullable=*/false), + std::make_shared("b", int8())}); + const auto options2 = CastOptions::Safe(dest2); + + EXPECT_RAISES_WITH_MESSAGE_THAT( + TypeError, + ::testing::HasSubstr("struct fields don't match or are in the wrong order"), + Cast(src, options2)); } TEST(Cast, StructToBiggerNullableStruct) { @@ -2772,6 +2783,9 @@ TEST(Cast, StructToBiggerNullableStruct) { c = ArrayFromJSON(int8(), "[null, null]"); ASSERT_OK_AND_ASSIGN(auto dest, StructArray::Make({a, b, c}, {"a", "b", "c"})); CheckCast(src, dest); + + ASSERT_OK_AND_ASSIGN(auto dest2, StructArray::Make({a, c, b}, {"a", "c", "b"})); + CheckCast(src, dest2); } TEST(Cast, StructToDifferentNullabilityStruct) { From d8867560024498e215c6bdf9c0843aae53b9eb7f Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Mon, 28 Oct 2024 23:37:12 +0000 Subject: [PATCH 09/27] New tests pass --- cpp/src/arrow/compute/kernels/scalar_cast_nested.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc index c4c173a50408e..6686c1384b515 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc @@ -349,7 +349,6 @@ struct CastStruct { std::vector fields_to_select(out_field_count, -1); - // create a set of field names std::set in_field_names; for (int in_field_index = 0; in_field_index < in_field_count; ++in_field_index) { in_field_names.insert(in_type.field(in_field_index)->name()); @@ -357,8 +356,7 @@ struct CastStruct { int out_field_index = 0; for (int in_field_index = 0; - in_field_index < in_field_count && out_field_index < out_field_count; - ++in_field_index) { + in_field_index < in_field_count && out_field_index < out_field_count;) { const auto& in_field = in_type.field(in_field_index); const auto& out_field = out_type.field(out_field_index); if (in_field->name() == out_field->name()) { @@ -367,6 +365,7 @@ struct CastStruct { in_type.ToString(), " ", out_type.ToString()); } fields_to_select[out_field_index++] = in_field_index; + in_field_index++; } else if (in_field_names.count(out_field->name()) == 0) { if (out_field->nullable()) { fields_to_select[out_field_index++] = -2; @@ -375,6 +374,8 @@ struct CastStruct { "struct fields don't match or are in the wrong order: Input fields: ", in_type.ToString(), " output fields: ", out_type.ToString()); } + } else { + in_field_index++; } } From c89f1ecce9c2750b70c2c5262f900625527dc529 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Mon, 28 Oct 2024 23:57:46 +0000 Subject: [PATCH 10/27] REduce duplication --- .../compute/kernels/scalar_cast_nested.cc | 50 ++++++++----------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc index 6686c1384b515..d3f176076ea31 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc @@ -354,38 +354,32 @@ struct CastStruct { in_field_names.insert(in_type.field(in_field_index)->name()); } - int out_field_index = 0; - for (int in_field_index = 0; - in_field_index < in_field_count && out_field_index < out_field_count;) { - const auto& in_field = in_type.field(in_field_index); + for (int in_field_index = 0, out_field_index = 0; + out_field_index < out_field_count;) { const auto& out_field = out_type.field(out_field_index); - if (in_field->name() == out_field->name()) { - if (in_field->nullable() && !out_field->nullable()) { - return Status::TypeError("cannot cast nullable field to non-nullable field: ", - in_type.ToString(), " ", out_type.ToString()); - } - fields_to_select[out_field_index++] = in_field_index; - in_field_index++; - } else if (in_field_names.count(out_field->name()) == 0) { - if (out_field->nullable()) { - fields_to_select[out_field_index++] = -2; - } else { - return Status::TypeError( - "struct fields don't match or are in the wrong order: Input fields: ", - in_type.ToString(), " output fields: ", out_type.ToString()); + const auto& in_field = in_type.field(in_field_index); + if (in_field_index < in_field_count) { + // If there are more in_fields check if they match the out_field. + if (in_field->name() == out_field->name()) { + if (in_field->nullable() && !out_field->nullable()) { + return Status::TypeError("cannot cast nullable field to non-nullable field: ", + in_type.ToString(), " ", out_type.ToString()); + } + fields_to_select[out_field_index++] = in_field_index; + in_field_index++; + continue; } - } else { - in_field_index++; } - } - - for (; out_field_index < out_field_count; ++out_field_index) { - const auto& out_field = out_type.field(out_field_index); if (in_field_names.count(out_field->name()) == 0 && out_field->nullable()) { - // If the field is nullable and not in the input fields we can still fill it with - // nulls. - fields_to_select[out_field_index] = -2; + // There will not be any matching in_field but we can fill with null. + fields_to_select[out_field_index++] = -2; + } else if (in_field_index < in_field_count) { + // Didn't match current in_field, and the we cannot fill with null, so + // try next in_field. + in_field_index++; } else { + // Didn't match current in_field, we cannot fill with null, and there + // are no more in_fields to try, so fail. return Status::TypeError( "struct fields don't match or are in the wrong order: Input fields: ", in_type.ToString(), " output fields: ", out_type.ToString()); @@ -401,7 +395,7 @@ struct CastStruct { in_array.offset, in_array.length)); } - out_field_index = 0; + int out_field_index = 0; for (int field_index : fields_to_select) { std::shared_ptr values; const auto& target_type = out->type()->field(out_field_index++)->type(); From 9d5d4361bb63d8fa4f6109beed5f7808afcc5a81 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Tue, 29 Oct 2024 00:05:57 +0000 Subject: [PATCH 11/27] Tidy --- .../compute/kernels/scalar_cast_nested.cc | 24 +++++++------------ 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc index d3f176076ea31..db41e0bd484d9 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc @@ -397,25 +397,19 @@ struct CastStruct { int out_field_index = 0; for (int field_index : fields_to_select) { - std::shared_ptr values; const auto& target_type = out->type()->field(out_field_index++)->type(); - if (field_index == -2) { - std::shared_ptr nulls; - RETURN_NOT_OK( - MakeArrayOfNull(target_type->GetSharedPtr(), batch.length).Value(&nulls)); - values = nulls->data(); + ARROW_ASSIGN_OR_RAISE(auto nulls, + MakeArrayOfNull(target_type->GetSharedPtr(), batch.length)); + out_array->child_data.push_back(nulls->data()); } else { - values = (in_array.child_data[field_index].ToArrayData()->Slice(in_array.offset, - in_array.length)); + const auto& values = (in_array.child_data[field_index].ToArrayData()->Slice( + in_array.offset, in_array.length)); + ARROW_ASSIGN_OR_RAISE(Datum cast_values, + Cast(values, target_type, options, ctx->exec_context())); + DCHECK(cast_values.is_array()); + out_array->child_data.push_back(cast_values.array()); } - - // TODO(tomnewton): Do we need to call this after creating array of nulls. - ARROW_ASSIGN_OR_RAISE(Datum cast_values, - Cast(values, target_type, options, ctx->exec_context())); - - DCHECK(cast_values.is_array()); - out_array->child_data.push_back(cast_values.array()); } return Status::OK(); From 3f4098048ed6c19545025e705f976a6dc40366bf Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Tue, 29 Oct 2024 00:07:24 +0000 Subject: [PATCH 12/27] Update comment --- cpp/src/arrow/compute/kernels/scalar_cast_nested.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc index db41e0bd484d9..8150cf07bf5a1 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc @@ -371,7 +371,7 @@ struct CastStruct { } } if (in_field_names.count(out_field->name()) == 0 && out_field->nullable()) { - // There will not be any matching in_field but we can fill with null. + // Didn't match current in_field, but we can will with null. fields_to_select[out_field_index++] = -2; } else if (in_field_index < in_field_count) { // Didn't match current in_field, and the we cannot fill with null, so From 0cacea9288051571235f1148dddb69a7c40acb6b Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Tue, 29 Oct 2024 00:08:48 +0000 Subject: [PATCH 13/27] Revert debug change --- cpp/src/arrow/compute/kernels/scalar_cast_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 22a5aff3515e1..d206252dc7fd9 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2728,7 +2728,7 @@ TEST(Cast, StructToSameSizedAndNamedStruct) { CheckStructToStruct(NumericTypes() TEST(Cast, StructToStructSubset) { CheckStructToStructSubset(NumericTypes()); } TEST(Cast, StructToStructSubsetWithNulls) { - CheckStructToStructSubsetWithNulls({int16()}); + CheckStructToStructSubsetWithNulls(NumericTypes()); } TEST(Cast, StructToSameSizedButDifferentNamedStruct) { From 3377c70ab9ea93fa005934a20ed2296107943b9f Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Wed, 30 Oct 2024 18:41:23 +0000 Subject: [PATCH 14/27] Tidy --- .../arrow/compute/kernels/scalar_cast_nested.cc | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc index 8150cf07bf5a1..d90748ad7a4ea 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc @@ -349,30 +349,34 @@ struct CastStruct { std::vector fields_to_select(out_field_count, -1); - std::set in_field_names; + std::set all_in_field_names; for (int in_field_index = 0; in_field_index < in_field_count; ++in_field_index) { - in_field_names.insert(in_type.field(in_field_index)->name()); + all_in_field_names.insert(in_type.field(in_field_index)->name()); } for (int in_field_index = 0, out_field_index = 0; out_field_index < out_field_count;) { const auto& out_field = out_type.field(out_field_index); - const auto& in_field = in_type.field(in_field_index); if (in_field_index < in_field_count) { + const auto& in_field = in_type.field(in_field_index); // If there are more in_fields check if they match the out_field. if (in_field->name() == out_field->name()) { if (in_field->nullable() && !out_field->nullable()) { return Status::TypeError("cannot cast nullable field to non-nullable field: ", in_type.ToString(), " ", out_type.ToString()); } + // Found matching in_field and out_field. fields_to_select[out_field_index++] = in_field_index; + // Using the same in_field for multiple out_fields is not allowed. in_field_index++; continue; } } - if (in_field_names.count(out_field->name()) == 0 && out_field->nullable()) { - // Didn't match current in_field, but we can will with null. - fields_to_select[out_field_index++] = -2; + if (all_in_field_names.count(out_field->name()) == 0 && out_field->nullable()) { + // Didn't match current in_field, but we can fill with null. + // Filling with null is only acceptable on nuallable fields when there + // is definitely no in_field with matching name. + fields_to_select[out_field_index++] = -2; // -2 is a sentinel value for fill with null. } else if (in_field_index < in_field_count) { // Didn't match current in_field, and the we cannot fill with null, so // try next in_field. From be084cd09d6e993e241d88ba996a57353682ce44 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Wed, 30 Oct 2024 18:41:46 +0000 Subject: [PATCH 15/27] Add some extra assertions with non nullable fields --- .../arrow/compute/kernels/scalar_cast_test.cc | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index d206252dc7fd9..16b3ec5bfffa1 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2571,6 +2571,15 @@ static void CheckStructToStructSubset( StructArray::Make({a1, d1, nulls}, {"a", "d", "f"})); CheckCast(src, dest6); + const auto dest6_5 = arrow::struct_( + {std::make_shared("a", int8()), std::make_shared("d", int16()), + std::make_shared("f", int64(), /*nullable=*/false)}); + const auto options6_5 = CastOptions::Safe(dest6_5); + EXPECT_RAISES_WITH_MESSAGE_THAT( + TypeError, + ::testing::HasSubstr("struct fields don't match or are in the wrong order"), + Cast(src, options6_5)); + // fields in wrong order const auto dest7 = arrow::struct_({std::make_shared("a", int8()), std::make_shared("c", int16()), @@ -2678,6 +2687,15 @@ static void CheckStructToStructSubsetWithNulls( StructArray::Make({a1, d1, nulls}, {"a", "d", "f"}, null_bitmap)); CheckCast(src_null, dest6_null); + const auto dest6_5_null = arrow::struct_( + {std::make_shared("a", int8()), std::make_shared("d", int16()), + std::make_shared("f", int64(), /*nullable=*/false)}); + const auto options6_5_null = CastOptions::Safe(dest6_5_null); + EXPECT_RAISES_WITH_MESSAGE_THAT( + TypeError, + ::testing::HasSubstr("struct fields don't match or are in the wrong order"), + Cast(src_null, options6_5_null)); + // fields in wrong order const auto dest7_null = arrow::struct_({std::make_shared("a", int8()), std::make_shared("c", int16()), @@ -2728,7 +2746,7 @@ TEST(Cast, StructToSameSizedAndNamedStruct) { CheckStructToStruct(NumericTypes() TEST(Cast, StructToStructSubset) { CheckStructToStructSubset(NumericTypes()); } TEST(Cast, StructToStructSubsetWithNulls) { - CheckStructToStructSubsetWithNulls(NumericTypes()); + CheckStructToStructSubsetWithNulls({int8()}); } TEST(Cast, StructToSameSizedButDifferentNamedStruct) { @@ -2742,6 +2760,15 @@ TEST(Cast, StructToSameSizedButDifferentNamedStruct) { std::vector dest_field_names = {"c", "d"}; ASSERT_OK_AND_ASSIGN(auto dest, StructArray::Make({nulls, nulls}, dest_field_names)); CheckCast(src, dest); + + const auto dest2 = arrow::struct_( + {std::make_shared("c", int8(), /*nullable=*/false), std::make_shared("d", int8())}); + const auto options2 = CastOptions::Safe(dest2); + + EXPECT_RAISES_WITH_MESSAGE_THAT( + TypeError, + ::testing::HasSubstr("struct fields don't match or are in the wrong order"), + Cast(src, options2)); } TEST(Cast, StructToBiggerStruct) { From 7feb26e989c460a10362167a31f273a059da772b Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Wed, 30 Oct 2024 18:47:15 +0000 Subject: [PATCH 16/27] Tidy test numbering --- .../arrow/compute/kernels/scalar_cast_test.cc | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 16b3ec5bfffa1..a25f64240570d 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2571,34 +2571,34 @@ static void CheckStructToStructSubset( StructArray::Make({a1, d1, nulls}, {"a", "d", "f"})); CheckCast(src, dest6); - const auto dest6_5 = arrow::struct_( + const auto dest7 = arrow::struct_( {std::make_shared("a", int8()), std::make_shared("d", int16()), std::make_shared("f", int64(), /*nullable=*/false)}); - const auto options6_5 = CastOptions::Safe(dest6_5); + const auto options7 = CastOptions::Safe(dest7); EXPECT_RAISES_WITH_MESSAGE_THAT( TypeError, ::testing::HasSubstr("struct fields don't match or are in the wrong order"), - Cast(src, options6_5)); + Cast(src, options7)); // fields in wrong order - const auto dest7 = arrow::struct_({std::make_shared("a", int8()), + const auto dest8 = arrow::struct_({std::make_shared("a", int8()), std::make_shared("c", int16()), std::make_shared("b", int64())}); - const auto options7 = CastOptions::Safe(dest7); + const auto options8 = CastOptions::Safe(dest8); EXPECT_RAISES_WITH_MESSAGE_THAT( TypeError, ::testing::HasSubstr("struct fields don't match or are in the wrong order"), - Cast(src, options7)); + Cast(src, options8)); // duplicate missing field names - const auto dest8 = arrow::struct_( + const auto dest9 = arrow::struct_( {std::make_shared("a", int8()), std::make_shared("c", int16()), std::make_shared("d", int32()), std::make_shared("a", int64())}); - const auto options8 = CastOptions::Safe(dest8); + const auto options9 = CastOptions::Safe(dest9); EXPECT_RAISES_WITH_MESSAGE_THAT( TypeError, ::testing::HasSubstr("struct fields don't match or are in the wrong order"), - Cast(src, options8)); + Cast(src, options9)); // duplicate present field names ASSERT_OK_AND_ASSIGN( @@ -2687,34 +2687,34 @@ static void CheckStructToStructSubsetWithNulls( StructArray::Make({a1, d1, nulls}, {"a", "d", "f"}, null_bitmap)); CheckCast(src_null, dest6_null); - const auto dest6_5_null = arrow::struct_( + const auto dest7_null = arrow::struct_( {std::make_shared("a", int8()), std::make_shared("d", int16()), std::make_shared("f", int64(), /*nullable=*/false)}); - const auto options6_5_null = CastOptions::Safe(dest6_5_null); + const auto options7_null = CastOptions::Safe(dest7_null); EXPECT_RAISES_WITH_MESSAGE_THAT( TypeError, ::testing::HasSubstr("struct fields don't match or are in the wrong order"), - Cast(src_null, options6_5_null)); + Cast(src_null, options7_null)); // fields in wrong order - const auto dest7_null = arrow::struct_({std::make_shared("a", int8()), + const auto dest8_null = arrow::struct_({std::make_shared("a", int8()), std::make_shared("c", int16()), std::make_shared("b", int64())}); - const auto options7_null = CastOptions::Safe(dest7_null); + const auto options8_null = CastOptions::Safe(dest8_null); EXPECT_RAISES_WITH_MESSAGE_THAT( TypeError, ::testing::HasSubstr("struct fields don't match or are in the wrong order"), - Cast(src_null, options7_null)); + Cast(src_null, options8_null)); // duplicate missing field names - const auto dest8_null = arrow::struct_( + const auto dest9_null = arrow::struct_( {std::make_shared("a", int8()), std::make_shared("c", int16()), std::make_shared("d", int32()), std::make_shared("a", int64())}); - const auto options8_null = CastOptions::Safe(dest8_null); + const auto options9_null = CastOptions::Safe(dest9_null); EXPECT_RAISES_WITH_MESSAGE_THAT( TypeError, ::testing::HasSubstr("struct fields don't match or are in the wrong order"), - Cast(src_null, options8_null)); + Cast(src_null, options9_null)); // duplicate present field values ASSERT_OK_AND_ASSIGN( @@ -2746,7 +2746,7 @@ TEST(Cast, StructToSameSizedAndNamedStruct) { CheckStructToStruct(NumericTypes() TEST(Cast, StructToStructSubset) { CheckStructToStructSubset(NumericTypes()); } TEST(Cast, StructToStructSubsetWithNulls) { - CheckStructToStructSubsetWithNulls({int8()}); + CheckStructToStructSubsetWithNulls(NumericTypes()); } TEST(Cast, StructToSameSizedButDifferentNamedStruct) { @@ -2757,9 +2757,9 @@ TEST(Cast, StructToSameSizedButDifferentNamedStruct) { auto nulls = ArrayFromJSON(int8(), "[null, null]"); ASSERT_OK_AND_ASSIGN(auto src, StructArray::Make({a, b}, src_field_names)); - std::vector dest_field_names = {"c", "d"}; - ASSERT_OK_AND_ASSIGN(auto dest, StructArray::Make({nulls, nulls}, dest_field_names)); - CheckCast(src, dest); + std::vector dest1_field_names = {"c", "d"}; + ASSERT_OK_AND_ASSIGN(auto dest1, StructArray::Make({nulls, nulls}, dest1_field_names)); + CheckCast(src, dest1); const auto dest2 = arrow::struct_( {std::make_shared("c", int8(), /*nullable=*/false), std::make_shared("d", int8())}); @@ -2778,15 +2778,15 @@ TEST(Cast, StructToBiggerStruct) { b = ArrayFromJSON(int8(), "[3, 4]"); ASSERT_OK_AND_ASSIGN(auto src, StructArray::Make({a, b}, field_names)); - const auto dest = arrow::struct_( + const auto dest1 = arrow::struct_( {std::make_shared("a", int8()), std::make_shared("b", int8()), std::make_shared("c", int8(), /*nullable=*/false)}); - const auto options = CastOptions::Safe(dest); + const auto options1 = CastOptions::Safe(dest1); EXPECT_RAISES_WITH_MESSAGE_THAT( TypeError, ::testing::HasSubstr("struct fields don't match or are in the wrong order"), - Cast(src, options)); + Cast(src, options1)); const auto dest2 = arrow::struct_({std::make_shared("a", int8()), From 680f224e32b4e01317e3a1cc9e18a3e9cdbeab74 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Wed, 30 Oct 2024 18:48:32 +0000 Subject: [PATCH 17/27] autoformat --- cpp/src/arrow/compute/kernels/scalar_cast_nested.cc | 10 ++++++---- cpp/src/arrow/compute/kernels/scalar_cast_test.cc | 5 +++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc index d90748ad7a4ea..dc57ec26d36c7 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc @@ -367,16 +367,18 @@ struct CastStruct { } // Found matching in_field and out_field. fields_to_select[out_field_index++] = in_field_index; - // Using the same in_field for multiple out_fields is not allowed. + // Using the same in_field for multiple out_fields is not allowed. in_field_index++; continue; } } if (all_in_field_names.count(out_field->name()) == 0 && out_field->nullable()) { - // Didn't match current in_field, but we can fill with null. - // Filling with null is only acceptable on nuallable fields when there + // Didn't match current in_field, but we can fill with null. + // Filling with null is only acceptable on nuallable fields when there // is definitely no in_field with matching name. - fields_to_select[out_field_index++] = -2; // -2 is a sentinel value for fill with null. + + // -2 is a sentinel value indicating fill with null. + fields_to_select[out_field_index++] = -2; } else if (in_field_index < in_field_count) { // Didn't match current in_field, and the we cannot fill with null, so // try next in_field. diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index a25f64240570d..c398806ee6308 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2761,8 +2761,9 @@ TEST(Cast, StructToSameSizedButDifferentNamedStruct) { ASSERT_OK_AND_ASSIGN(auto dest1, StructArray::Make({nulls, nulls}, dest1_field_names)); CheckCast(src, dest1); - const auto dest2 = arrow::struct_( - {std::make_shared("c", int8(), /*nullable=*/false), std::make_shared("d", int8())}); + const auto dest2 = + arrow::struct_({std::make_shared("c", int8(), /*nullable=*/false), + std::make_shared("d", int8())}); const auto options2 = CastOptions::Safe(dest2); EXPECT_RAISES_WITH_MESSAGE_THAT( From 358822f62192e386c9369030f8eaa3e81fafa440 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Thu, 31 Oct 2024 09:58:42 +0000 Subject: [PATCH 18/27] Some progress on MaterializationOfNestedVirtualColumn test --- cpp/src/arrow/dataset/scanner_test.cc | 28 +++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc index fccfc80032d31..7afd0a9ffe77f 100644 --- a/cpp/src/arrow/dataset/scanner_test.cc +++ b/cpp/src/arrow/dataset/scanner_test.cc @@ -2328,7 +2328,7 @@ DatasetAndBatches MakeNestedDataset() { field("b", boolean()), field("c", struct_({ field("d", int64()), - field("e", float64()), + field("e", int64()), })), }); const auto physical_schema = ::arrow::schema({ @@ -2531,14 +2531,14 @@ TEST(ScanNode, MaterializationOfVirtualColumn) { TEST(ScanNode, MaterializationOfNestedVirtualColumn) { TestPlan plan; - auto basic = MakeNestedDataset(); + auto nested = MakeNestedDataset(); auto options = std::make_shared(); options->projection = Materialize({"a", "b", "c"}, /*include_aug_fields=*/true); ASSERT_OK(acero::Declaration::Sequence( { - {"scan", ScanNodeOptions{basic.dataset, options}}, + {"scan", ScanNodeOptions{nested.dataset, options}}, {"augmented_project", acero::ProjectNodeOptions{ {field_ref("a"), field_ref("b"), field_ref("c")}}}, @@ -2546,11 +2546,23 @@ TEST(ScanNode, MaterializationOfNestedVirtualColumn) { }) .AddToPlan(plan.get())); - // TODO(ARROW-1888): allow scanner to "patch up" structs with casts - EXPECT_FINISHES_AND_RAISES_WITH_MESSAGE_THAT( - TypeError, - ::testing::HasSubstr("struct fields don't match or are in the wrong order"), - plan.Run()); + // std::cout << nested << std::endl; + // std::cout << nested.batches << std::endl; + auto expected = nested.batches; + + for (auto& batch : expected) { + // // TODO(tomnewton): finish this test + // // ProjectNode overwrites "c" placeholder with non-null drawn from guarantee + // const auto& value = *batch.guarantee.call()->arguments[1].literal(); + // batch.values[2] = value; + // ProjectNode fills in c.d with nulls + // const auto value = MakeNullScalar(int64()); + ASSERT_OK_AND_ASSIGN(auto nulls, + MakeArrayOfNull(int64()->GetSharedPtr(), batch.length)); + batch.values[2].array()->child_data.insert(batch.values[2].array()->child_data.begin(), nulls->data()); + } + + ASSERT_THAT(plan.Run(), Finishes(ResultWith(UnorderedElementsAreArray(expected)))); } TEST(ScanNode, MinimalEndToEnd) { From 378cef14e33247182fd2546bdae5e630c21ab320 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Thu, 31 Oct 2024 15:25:31 +0000 Subject: [PATCH 19/27] First test pass --- cpp/src/arrow/dataset/scanner_test.cc | 75 +++++++++++++-------------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc index 7afd0a9ffe77f..9d8684f036c7f 100644 --- a/cpp/src/arrow/dataset/scanner_test.cc +++ b/cpp/src/arrow/dataset/scanner_test.cc @@ -2339,24 +2339,25 @@ DatasetAndBatches MakeNestedDataset() { })), }); - return DatasetAndBatchesFromJSON(dataset_schema, physical_schema, - { - { - R"([{"a": 1, "b": null, "c": {"e": 0}}, - {"a": 2, "b": true, "c": {"e": 1}}])", - R"([{"a": null, "b": true, "c": {"e": 2}}, - {"a": 3, "b": false, "c": {"e": null}}])", - R"([{"a": null, "b": null, "c": null}])", - }, - { - R"([{"a": null, "b": true, "c": {"e": 4}}, - {"a": 4, "b": false, "c": null}])", - R"([{"a": 5, "b": null, "c": {"e": 6}}, - {"a": 6, "b": false, "c": {"e": 7}}, - {"a": 7, "b": false, "c": {"e": null}}])", - }, - }, - /*guarantees=*/{}); + return DatasetAndBatchesFromJSON( + dataset_schema, physical_schema, + { + { + R"([{"a": 1, "b": null, "c": {"e": 0}}, + {"a": 2, "b": true, "c": {"e": 1}}])", + }, + // R"([{"a": null, "b": true, "c": {"e": 2}}, + // {"a": 3, "b": false, "c": {"e": null}}])", + // R"([{"a": null, "b": null, "c": null}])", + // { + // R"([{"a": null, "b": true, "c": {"e": 4}}, + // {"a": 4, "b": false, "c": null}])", + // R"([{"a": 5, "b": null, "c": {"e": 6}}, + // {"a": 6, "b": false, "c": {"e": 7}}, + // {"a": 7, "b": false, "c": {"e": null}}])", + // }, + }, + /*guarantees=*/{}); } compute::Expression Materialize(std::vector names, @@ -2536,30 +2537,28 @@ TEST(ScanNode, MaterializationOfNestedVirtualColumn) { auto options = std::make_shared(); options->projection = Materialize({"a", "b", "c"}, /*include_aug_fields=*/true); - ASSERT_OK(acero::Declaration::Sequence( - { - {"scan", ScanNodeOptions{nested.dataset, options}}, - {"augmented_project", - acero::ProjectNodeOptions{ - {field_ref("a"), field_ref("b"), field_ref("c")}}}, - {"sink", acero::SinkNodeOptions{&plan.sink_gen}}, - }) - .AddToPlan(plan.get())); + ASSERT_OK( + acero::Declaration::Sequence({ + {"scan", ScanNodeOptions{nested.dataset, options}}, + {"augmented_project", acero::ProjectNodeOptions{{ + field_ref("a"), + field_ref("b"), + field_ref("c"), + + }}}, + {"sink", acero::SinkNodeOptions{&plan.sink_gen}}, + }) + .AddToPlan(plan.get())); - // std::cout << nested << std::endl; - // std::cout << nested.batches << std::endl; auto expected = nested.batches; for (auto& batch : expected) { - // // TODO(tomnewton): finish this test - // // ProjectNode overwrites "c" placeholder with non-null drawn from guarantee - // const auto& value = *batch.guarantee.call()->arguments[1].literal(); - // batch.values[2] = value; - // ProjectNode fills in c.d with nulls - // const auto value = MakeNullScalar(int64()); - ASSERT_OK_AND_ASSIGN(auto nulls, - MakeArrayOfNull(int64()->GetSharedPtr(), batch.length)); - batch.values[2].array()->child_data.insert(batch.values[2].array()->child_data.begin(), nulls->data()); + auto d = ArrayFromJSON(int64(), "[null, null]"); + auto e = ArrayFromJSON(int64(), "[0, 1]"); + std::vector field_names = {"d", "e"}; + ASSERT_OK_AND_ASSIGN(auto struct_array, + StructArray::Make({d, e}, field_names)); + batch.values[2] = struct_array; // Datum(); } ASSERT_THAT(plan.Run(), Finishes(ResultWith(UnorderedElementsAreArray(expected)))); From 10c45a173e710dfd0c0904d0f44e16a23f853a8c Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Thu, 31 Oct 2024 18:23:12 +0000 Subject: [PATCH 20/27] I think its very close to working --- cpp/src/arrow/dataset/scanner_test.cc | 64 ++++++++++++++++----------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc index 9d8684f036c7f..cd19c68635175 100644 --- a/cpp/src/arrow/dataset/scanner_test.cc +++ b/cpp/src/arrow/dataset/scanner_test.cc @@ -2339,25 +2339,24 @@ DatasetAndBatches MakeNestedDataset() { })), }); - return DatasetAndBatchesFromJSON( - dataset_schema, physical_schema, - { - { - R"([{"a": 1, "b": null, "c": {"e": 0}}, + return DatasetAndBatchesFromJSON(dataset_schema, physical_schema, + { + { + R"([{"a": 1, "b": null, "c": {"e": 0}}, {"a": 2, "b": true, "c": {"e": 1}}])", - }, - // R"([{"a": null, "b": true, "c": {"e": 2}}, - // {"a": 3, "b": false, "c": {"e": null}}])", - // R"([{"a": null, "b": null, "c": null}])", - // { - // R"([{"a": null, "b": true, "c": {"e": 4}}, - // {"a": 4, "b": false, "c": null}])", - // R"([{"a": 5, "b": null, "c": {"e": 6}}, - // {"a": 6, "b": false, "c": {"e": 7}}, - // {"a": 7, "b": false, "c": {"e": null}}])", - // }, - }, - /*guarantees=*/{}); + R"([{"a": null, "b": true, "c": {"e": 2}}, + {"a": 3, "b": false, "c": {"e": null}}])", + R"([{"a": null, "b": null, "c": null}])", + }, + { + R"([{"a": null, "b": true, "c": {"e": 4}}, + {"a": 4, "b": false, "c": null}])", + R"([{"a": 5, "b": null, "c": {"e": 6}}, + {"a": 6, "b": false, "c": {"e": 7}}, + {"a": 7, "b": false, "c": {"e": null}}])", + }, + }, + /*guarantees=*/{}); } compute::Expression Materialize(std::vector names, @@ -2544,7 +2543,6 @@ TEST(ScanNode, MaterializationOfNestedVirtualColumn) { field_ref("a"), field_ref("b"), field_ref("c"), - }}}, {"sink", acero::SinkNodeOptions{&plan.sink_gen}}, }) @@ -2553,12 +2551,28 @@ TEST(ScanNode, MaterializationOfNestedVirtualColumn) { auto expected = nested.batches; for (auto& batch : expected) { - auto d = ArrayFromJSON(int64(), "[null, null]"); - auto e = ArrayFromJSON(int64(), "[0, 1]"); - std::vector field_names = {"d", "e"}; - ASSERT_OK_AND_ASSIGN(auto struct_array, - StructArray::Make({d, e}, field_names)); - batch.values[2] = struct_array; // Datum(); + // auto d = ArrayFromJSON(int64(), "[null, null]"); + // auto e = ArrayFromJSON(int64(), "[0, 1]"); + // auto value = batch.values[2]; + + ASSERT_OK_AND_ASSIGN(auto nulls, + MakeArrayOfNull(int64()->GetSharedPtr(), batch.length)); + // auto e_copy = std::make_shared(batch.values[2].array()->child_data[0]); + // auto e = batch.values[2].make_array(); + auto e = batch.values[2].array()->child_data[0]; + // Array e_array; + auto e_array = std::make_shared(e); + // e->SetData() + // .push_back(batch.values[2].array()->child_data[0]); + + std::vector names = {"d", "e"}; + + std::vector> arrays = {nulls, e_array}; + + ASSERT_OK_AND_ASSIGN( + auto struct_array, + StructArray::Make(arrays, names)); + batch.values[2] = struct_array; } ASSERT_THAT(plan.Run(), Finishes(ResultWith(UnorderedElementsAreArray(expected)))); From 71f925899edd1068dc67f43e94b13979e3c8cd7f Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Fri, 1 Nov 2024 10:16:47 +0000 Subject: [PATCH 21/27] Even closer --- cpp/src/arrow/dataset/scanner_test.cc | 26 ++++++-------------------- cpp/src/arrow/datum.cc | 6 ++++-- 2 files changed, 10 insertions(+), 22 deletions(-) diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc index cd19c68635175..82b6b9ddb4254 100644 --- a/cpp/src/arrow/dataset/scanner_test.cc +++ b/cpp/src/arrow/dataset/scanner_test.cc @@ -2551,28 +2551,14 @@ TEST(ScanNode, MaterializationOfNestedVirtualColumn) { auto expected = nested.batches; for (auto& batch : expected) { - // auto d = ArrayFromJSON(int64(), "[null, null]"); - // auto e = ArrayFromJSON(int64(), "[0, 1]"); - // auto value = batch.values[2]; - + // Scan will fill in "c.d" with nulls. ASSERT_OK_AND_ASSIGN(auto nulls, MakeArrayOfNull(int64()->GetSharedPtr(), batch.length)); - // auto e_copy = std::make_shared(batch.values[2].array()->child_data[0]); - // auto e = batch.values[2].make_array(); - auto e = batch.values[2].array()->child_data[0]; - // Array e_array; - auto e_array = std::make_shared(e); - // e->SetData() - // .push_back(batch.values[2].array()->child_data[0]); - - std::vector names = {"d", "e"}; - - std::vector> arrays = {nulls, e_array}; - - ASSERT_OK_AND_ASSIGN( - auto struct_array, - StructArray::Make(arrays, names)); - batch.values[2] = struct_array; + auto c_data = batch.values[2].array()->Copy(); + c_data->child_data.insert(c_data->child_data.begin(), nulls->data()); + c_data->type = nested.dataset->schema()->field(2)->type(); + auto c_array = std::make_shared(c_data); + batch.values[2] = c_array; } ASSERT_THAT(plan.Run(), Finishes(ResultWith(UnorderedElementsAreArray(expected)))); diff --git a/cpp/src/arrow/datum.cc b/cpp/src/arrow/datum.cc index 2ac230232e1b7..00b870de9be2b 100644 --- a/cpp/src/arrow/datum.cc +++ b/cpp/src/arrow/datum.cc @@ -164,8 +164,10 @@ bool Datum::Equals(const Datum& other) const { return true; case Datum::SCALAR: return internal::SharedPtrEquals(this->scalar(), other.scalar()); - case Datum::ARRAY: - return internal::SharedPtrEquals(this->make_array(), other.make_array()); + case Datum::ARRAY: { + bool equal = internal::SharedPtrEquals(this->make_array(), other.make_array()); + return equal; + } case Datum::CHUNKED_ARRAY: return internal::SharedPtrEquals(this->chunked_array(), other.chunked_array()); case Datum::RECORD_BATCH: From 69d962eb2ced20f9b80709bfa08ec96662da59df Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Fri, 1 Nov 2024 10:40:05 +0000 Subject: [PATCH 22/27] Success --- cpp/src/arrow/dataset/scanner_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc index 82b6b9ddb4254..ad6933e0ad358 100644 --- a/cpp/src/arrow/dataset/scanner_test.cc +++ b/cpp/src/arrow/dataset/scanner_test.cc @@ -2557,7 +2557,7 @@ TEST(ScanNode, MaterializationOfNestedVirtualColumn) { auto c_data = batch.values[2].array()->Copy(); c_data->child_data.insert(c_data->child_data.begin(), nulls->data()); c_data->type = nested.dataset->schema()->field(2)->type(); - auto c_array = std::make_shared(c_data); + auto c_array = std::make_shared(c_data); batch.values[2] = c_array; } From ca0285cebdc090c705294299ef0da1d6e2753ecf Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Fri, 1 Nov 2024 10:44:13 +0000 Subject: [PATCH 23/27] REvert debug changes --- cpp/src/arrow/dataset/scanner_test.cc | 30 +++++++++++++-------------- cpp/src/arrow/datum.cc | 6 ++---- 2 files changed, 16 insertions(+), 20 deletions(-) diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc index ad6933e0ad358..7c78ac5669d55 100644 --- a/cpp/src/arrow/dataset/scanner_test.cc +++ b/cpp/src/arrow/dataset/scanner_test.cc @@ -2343,17 +2343,17 @@ DatasetAndBatches MakeNestedDataset() { { { R"([{"a": 1, "b": null, "c": {"e": 0}}, - {"a": 2, "b": true, "c": {"e": 1}}])", + {"a": 2, "b": true, "c": {"e": 1}}])", R"([{"a": null, "b": true, "c": {"e": 2}}, - {"a": 3, "b": false, "c": {"e": null}}])", + {"a": 3, "b": false, "c": {"e": null}}])", R"([{"a": null, "b": null, "c": null}])", }, { R"([{"a": null, "b": true, "c": {"e": 4}}, - {"a": 4, "b": false, "c": null}])", + {"a": 4, "b": false, "c": null}])", R"([{"a": 5, "b": null, "c": {"e": 6}}, - {"a": 6, "b": false, "c": {"e": 7}}, - {"a": 7, "b": false, "c": {"e": null}}])", + {"a": 6, "b": false, "c": {"e": 7}}, + {"a": 7, "b": false, "c": {"e": null}}])", }, }, /*guarantees=*/{}); @@ -2536,17 +2536,15 @@ TEST(ScanNode, MaterializationOfNestedVirtualColumn) { auto options = std::make_shared(); options->projection = Materialize({"a", "b", "c"}, /*include_aug_fields=*/true); - ASSERT_OK( - acero::Declaration::Sequence({ - {"scan", ScanNodeOptions{nested.dataset, options}}, - {"augmented_project", acero::ProjectNodeOptions{{ - field_ref("a"), - field_ref("b"), - field_ref("c"), - }}}, - {"sink", acero::SinkNodeOptions{&plan.sink_gen}}, - }) - .AddToPlan(plan.get())); + ASSERT_OK(acero::Declaration::Sequence( + { + {"scan", ScanNodeOptions{nested.dataset, options}}, + {"augmented_project", + acero::ProjectNodeOptions{ + {field_ref("a"), field_ref("b"), field_ref("c")}}}, + {"sink", acero::SinkNodeOptions{&plan.sink_gen}}, + }) + .AddToPlan(plan.get())); auto expected = nested.batches; diff --git a/cpp/src/arrow/datum.cc b/cpp/src/arrow/datum.cc index 00b870de9be2b..2ac230232e1b7 100644 --- a/cpp/src/arrow/datum.cc +++ b/cpp/src/arrow/datum.cc @@ -164,10 +164,8 @@ bool Datum::Equals(const Datum& other) const { return true; case Datum::SCALAR: return internal::SharedPtrEquals(this->scalar(), other.scalar()); - case Datum::ARRAY: { - bool equal = internal::SharedPtrEquals(this->make_array(), other.make_array()); - return equal; - } + case Datum::ARRAY: + return internal::SharedPtrEquals(this->make_array(), other.make_array()); case Datum::CHUNKED_ARRAY: return internal::SharedPtrEquals(this->chunked_array(), other.chunked_array()); case Datum::RECORD_BATCH: From 583701d59695d0cfb918f8e522e72e0d0b491682 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Fri, 1 Nov 2024 10:56:19 +0000 Subject: [PATCH 24/27] Autoformat --- cpp/src/arrow/dataset/scanner_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc index 7c78ac5669d55..b44d1d6e799ac 100644 --- a/cpp/src/arrow/dataset/scanner_test.cc +++ b/cpp/src/arrow/dataset/scanner_test.cc @@ -2549,7 +2549,7 @@ TEST(ScanNode, MaterializationOfNestedVirtualColumn) { auto expected = nested.batches; for (auto& batch : expected) { - // Scan will fill in "c.d" with nulls. + // Scan will fill in "c.d" with nulls. ASSERT_OK_AND_ASSIGN(auto nulls, MakeArrayOfNull(int64()->GetSharedPtr(), batch.length)); auto c_data = batch.values[2].array()->Copy(); From f41a9415188a23f6ef3988af8300af3124e52822 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Thu, 14 Nov 2024 08:34:40 +0000 Subject: [PATCH 25/27] Update cpp/src/arrow/compute/kernels/scalar_cast_nested.cc Co-authored-by: David Li --- cpp/src/arrow/compute/kernels/scalar_cast_nested.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc index dc57ec26d36c7..2273740930867 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc @@ -374,7 +374,7 @@ struct CastStruct { } if (all_in_field_names.count(out_field->name()) == 0 && out_field->nullable()) { // Didn't match current in_field, but we can fill with null. - // Filling with null is only acceptable on nuallable fields when there + // Filling with null is only acceptable on nullable fields when there // is definitely no in_field with matching name. // -2 is a sentinel value indicating fill with null. From 13dc2d1243f0deddf7343d63ea63cdecf9e0a299 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Thu, 14 Nov 2024 08:46:50 +0000 Subject: [PATCH 26/27] Add kFillNullSentinel --- cpp/src/arrow/compute/kernels/scalar_cast_nested.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc index 2273740930867..0a7b51a3d3f12 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc @@ -340,6 +340,8 @@ struct CastFixedList { }; struct CastStruct { + static constexpr int kFillNullSentinel = -2; + static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { const CastOptions& options = CastState::Get(ctx); const auto& in_type = checked_cast(*batch[0].type()); @@ -377,8 +379,7 @@ struct CastStruct { // Filling with null is only acceptable on nullable fields when there // is definitely no in_field with matching name. - // -2 is a sentinel value indicating fill with null. - fields_to_select[out_field_index++] = -2; + fields_to_select[out_field_index++] = kFillNullSentinel; } else if (in_field_index < in_field_count) { // Didn't match current in_field, and the we cannot fill with null, so // try next in_field. @@ -404,7 +405,7 @@ struct CastStruct { int out_field_index = 0; for (int field_index : fields_to_select) { const auto& target_type = out->type()->field(out_field_index++)->type(); - if (field_index == -2) { + if (field_index == kFillNullSentinel) { ARROW_ASSIGN_OR_RAISE(auto nulls, MakeArrayOfNull(target_type->GetSharedPtr(), batch.length)); out_array->child_data.push_back(nulls->data()); From dc653976ad595d75b869ca6137ae1bccf271b125 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Thu, 14 Nov 2024 14:20:38 +0000 Subject: [PATCH 27/27] Move kFillNullSentinel --- cpp/src/arrow/compute/kernels/scalar_cast_nested.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc index 0a7b51a3d3f12..0033d89fc40fc 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc @@ -340,9 +340,9 @@ struct CastFixedList { }; struct CastStruct { - static constexpr int kFillNullSentinel = -2; - static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { + static constexpr int kFillNullSentinel = -2; + const CastOptions& options = CastState::Get(ctx); const auto& in_type = checked_cast(*batch[0].type()); const auto& out_type = checked_cast(*out->type());