Skip to content

Commit

Permalink
Neater implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
Tom-Newton committed Jan 14, 2025
1 parent 44ac772 commit 7a3a569
Showing 1 changed file with 4 additions and 26 deletions.
30 changes: 4 additions & 26 deletions cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

// Implementation of casting to (or between) list types

#include <iostream>
#include <limits>
#include <map>
#include <utility>
Expand Down Expand Up @@ -342,12 +341,6 @@ struct CastFixedList {

struct CastStruct {
static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
// The equivalent logic for top level columns is
// https://github.com/apache/arrow/blob/6252e9ceeb0f8544c14f79d895a37ac198131f88/cpp/src/arrow/compute/expression.cc#L669
// - For top level columns duplicate names are considered an error when reading
// - When casting tables, reordering or filling top level columns with null is not
// supported. These behaviours are implemented in MakeExecBatch which seems to be
// used for scanning datasets and constructing data from expressions.
static constexpr int kFillNullSentinel = -2;

const CastOptions& options = CastState::Get(ctx);
Expand All @@ -365,29 +358,14 @@ struct CastStruct {

for (int out_field_index = 0; out_field_index < out_field_count; ++out_field_index) {
const auto& out_field = out_type.field(out_field_index);
auto maybe_in_field_indexes = in_fields.equal_range(out_field->name());
// for (const auto& field : in_fields) {
// std::cout << "Field name: " << field.first << ", Field index: " << field.second
// << std::endl;
// }
// std::cout << std::endl;
if (maybe_in_field_indexes.first != maybe_in_field_indexes.second) {
// There is at least one in_field with matching name.
const auto& in_field_index = maybe_in_field_indexes.first->second;
const auto& in_field = in_type.field(maybe_in_field_indexes.first->second);
auto maybe_in_field_index = in_fields.extract(out_field->name());
if (!maybe_in_field_index.empty()) {
const auto& in_field = in_type.field(maybe_in_field_index.mapped());
if (in_field->nullable() && !out_field->nullable()) {
return Status::TypeError("cannot cast nullable field to non-nullable field: ",
in_type.ToString(), " ", out_type.ToString());
}

in_fields.erase(maybe_in_field_indexes.first);
// if (maybe_in_field_indexes.first != --maybe_in_field_indexes.second) {
// // There is more than one `in_field` with matching name. Remove the one we
// are
// // using to maintain column order in the case that the output struct also has
// // duplicate field names.
// }
fields_to_select[out_field_index] = in_field_index;
fields_to_select[out_field_index] = maybe_in_field_index.mapped();
} else if (out_field->nullable()) {
fields_to_select[out_field_index] = kFillNullSentinel;
} else {
Expand Down

0 comments on commit 7a3a569

Please sign in to comment.