From 7b71156d99557168d46292c010f82b812947ffb8 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 22 Dec 2023 17:02:31 -0400 Subject: [PATCH] GH-39138: [R] Fix implicit conversion warnings (#39250) ### Rationale for this change We have failing CRAN checks because this warning occurs on one check machine. ### What changes are included in this PR? Implicit integer casts are made explicit and/or variable declarations were fixed so that fewer implicit integer casts were performed. Fully solving the warnings also requires https://github.com/r-lib/cpp11/pull/349 since some errors occur in those headers. ### Are these changes tested? This particular test we can't do on CI because the MacOS runner we have doesn't have a new enough `clang` to support the requisite `-W` flags. I tested this locally by adding `PKG_CXXFLAGS=-Wconversion -Wno-sign-conversion -Wsign-compare -Werror` to `Makevars.in`. ### Are there any user-facing changes? No * Closes: #39138 Authored-by: Dewey Dunnington Signed-off-by: Dewey Dunnington --- r/src/altrep.cpp | 56 +++++++++++++++++++---------- r/src/array.cpp | 18 ++++++---- r/src/array_to_vector.cpp | 14 ++++---- r/src/arraydata.cpp | 12 +++---- r/src/arrowExports.cpp | 76 +++++++++++++++++++-------------------- r/src/arrow_cpp11.h | 14 +++++++- r/src/arrow_types.h | 4 +-- r/src/chunkedarray.cpp | 5 ++- r/src/compression.cpp | 2 +- r/src/compute.cpp | 15 ++++---- r/src/dataset.cpp | 4 +-- r/src/datatype.cpp | 2 +- r/src/io.cpp | 11 ++++-- r/src/message.cpp | 4 +-- r/src/r_to_arrow.cpp | 18 +++++----- r/src/recordbatch.cpp | 14 ++++---- r/src/schema.cpp | 4 +-- r/src/table.cpp | 16 ++++----- 18 files changed, 165 insertions(+), 124 deletions(-) diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp index 9745393d01bbc..bdaac0a9ce5d2 100644 --- a/r/src/altrep.cpp +++ b/r/src/altrep.cpp @@ -275,7 +275,8 @@ struct AltrepVectorPrimitive : public AltrepVectorBase(R_ExternalPtrAddr(R_altrep_data1(alt))); auto resolve = altrep_data->locate(i); - const auto& array = altrep_data->chunked_array()->chunk(resolve.chunk_index); + const auto& array = + altrep_data->chunked_array()->chunk(static_cast(resolve.chunk_index)); auto j = resolve.index_in_chunk; return array->IsNull(j) ? cpp11::na() @@ -466,10 +467,10 @@ struct AltrepFactor : public AltrepVectorBase { std::unique_ptr unifier_ = ValueOrStop(DictionaryUnifier::Make(arr_type.value_type())); - size_t n_arrays = chunked_array->num_chunks(); + int n_arrays = chunked_array->num_chunks(); BufferVector arrays_transpose(n_arrays); - for (size_t i = 0; i < n_arrays; i++) { + for (int i = 0; i < n_arrays; i++) { const auto& dict_i = *internal::checked_cast(*chunked_array->chunk(i)) .dictionary(); @@ -559,17 +560,14 @@ struct AltrepFactor : public AltrepVectorBase { return dup; } - // The value at position i - static int Elt(SEXP alt, R_xlen_t i) { - if (Base::IsMaterialized(alt)) { - return INTEGER_ELT(Representation(alt), i); - } - + // The value at position i as an int64_t (to make bounds checking less verbose) + static int64_t Elt64(SEXP alt, R_xlen_t i) { auto altrep_data = reinterpret_cast(R_ExternalPtrAddr(R_altrep_data1(alt))); auto resolve = altrep_data->locate(i); - const auto& array = altrep_data->chunked_array()->chunk(resolve.chunk_index); + const auto& array = + altrep_data->chunked_array()->chunk(static_cast(resolve.chunk_index)); auto j = resolve.index_in_chunk; if (!array->IsNull(j)) { @@ -578,7 +576,7 @@ struct AltrepFactor : public AltrepVectorBase { if (WasUnified(alt)) { const auto* transpose_data = reinterpret_cast( - GetArrayTransposed(alt, resolve.chunk_index)->data()); + GetArrayTransposed(alt, static_cast(resolve.chunk_index))->data()); switch (indices->type_id()) { case Type::UINT8: @@ -617,7 +615,7 @@ struct AltrepFactor : public AltrepVectorBase { case Type::INT64: return indices->data()->GetValues(1)[j] + 1; case Type::UINT64: - return indices->data()->GetValues(1)[j] + 1; + return static_cast(indices->data()->GetValues(1)[j] + 1); default: break; } @@ -628,6 +626,18 @@ struct AltrepFactor : public AltrepVectorBase { return NA_INTEGER; } + // The value at position i as an int (which R needs because this is a factor) + static int Elt(SEXP alt, R_xlen_t i) { + if (Base::IsMaterialized(alt)) { + return INTEGER_ELT(Representation(alt), i); + } + + int64_t elt64 = Elt64(alt, i); + ARROW_R_DCHECK(elt64 == NA_INTEGER || elt64 >= 1); + ARROW_R_DCHECK(elt64 <= std::numeric_limits::max()); + return static_cast(elt64); + } + static R_xlen_t Get_region(SEXP alt, R_xlen_t start, R_xlen_t n, int* buf) { // If we have data2, we can just copy the region into buf // using the standard Get_region for this R type @@ -667,7 +677,7 @@ struct AltrepFactor : public AltrepVectorBase { // using the transpose data for this chunk const auto* transpose_data = reinterpret_cast(GetArrayTransposed(alt, j)->data()); - auto transpose = [transpose_data](int x) { return transpose_data[x]; }; + auto transpose = [transpose_data](int64_t x) { return transpose_data[x]; }; GetRegionDispatch(array, indices, transpose, out); @@ -677,7 +687,7 @@ struct AltrepFactor : public AltrepVectorBase { } else { // simpler case, identity transpose - auto transpose = [](int x) { return x; }; + auto transpose = [](int64_t x) { return static_cast(x); }; int* out = buf; for (const auto& array : slice->chunks()) { @@ -718,7 +728,13 @@ struct AltrepFactor : public AltrepVectorBase { VisitArraySpanInline( *array->data(), - /*valid_func=*/[&](index_type index) { *out++ = transpose(index) + 1; }, + /*valid_func=*/ + [&](index_type index) { + int64_t transposed = transpose(index) + 1; + ARROW_R_DCHECK(transposed >= 1); + ARROW_R_DCHECK(transposed <= std::numeric_limits::max()); + *out++ = static_cast(transposed); + }, /*null_func=*/[&]() { *out++ = cpp11::na(); }); } @@ -765,7 +781,8 @@ struct AltrepVectorString : public AltrepVectorBase> { bool no_nul = std::find(view_.begin(), view_.end(), '\0') == view_.end(); if (no_nul) { - return Rf_mkCharLenCE(view_.data(), view_.size(), CE_UTF8); + ARROW_R_DCHECK(view_.size() <= std::numeric_limits::max()); + return Rf_mkCharLenCE(view_.data(), static_cast(view_.size()), CE_UTF8); } else if (strip_out_nuls_) { return ConvertStripNul(); } else { @@ -802,7 +819,9 @@ struct AltrepVectorString : public AltrepVectorBase> { } nul_was_stripped_ = true; - return Rf_mkCharLenCE(stripped_string_.data(), stripped_len, CE_UTF8); + ARROW_R_DCHECK(stripped_len <= std::numeric_limits::max()); + return Rf_mkCharLenCE(stripped_string_.data(), static_cast(stripped_len), + CE_UTF8); } bool nul_was_stripped() const { return nul_was_stripped_; } @@ -847,7 +866,8 @@ struct AltrepVectorString : public AltrepVectorBase> { auto altrep_data = reinterpret_cast(R_ExternalPtrAddr(R_altrep_data1(alt))); auto resolve = altrep_data->locate(i); - const auto& array = altrep_data->chunked_array()->chunk(resolve.chunk_index); + const auto& array = + altrep_data->chunked_array()->chunk(static_cast(resolve.chunk_index)); auto j = resolve.index_in_chunk; SEXP s = NA_STRING; diff --git a/r/src/array.cpp b/r/src/array.cpp index ae76c01a94910..38406e494d67b 100644 --- a/r/src/array.cpp +++ b/r/src/array.cpp @@ -92,7 +92,7 @@ std::shared_ptr Array__Slice2(const std::shared_ptr& return array->Slice(offset, length); } -void arrow::r::validate_index(int i, int len) { +void arrow::r::validate_index(int64_t i, int64_t len) { if (i == NA_INTEGER) { cpp11::stop("'i' cannot be NA"); } @@ -119,10 +119,14 @@ r_vec_size Array__length(const std::shared_ptr& x) { } // [[arrow::export]] -int Array__offset(const std::shared_ptr& x) { return x->offset(); } +r_vec_size Array__offset(const std::shared_ptr& x) { + return r_vec_size(x->offset()); +} // [[arrow::export]] -int Array__null_count(const std::shared_ptr& x) { return x->null_count(); } +r_vec_size Array__null_count(const std::shared_ptr& x) { + return r_vec_size(x->null_count()); +} // [[arrow::export]] std::shared_ptr Array__type(const std::shared_ptr& x) { @@ -263,9 +267,9 @@ r_vec_size LargeListArray__value_length( } // [[arrow::export]] -r_vec_size FixedSizeListArray__value_length( +int FixedSizeListArray__value_length( const std::shared_ptr& array, int64_t i) { - return r_vec_size(array->value_length(i)); + return array->value_length(i); } // [[arrow::export]] @@ -294,10 +298,10 @@ cpp11::writable::integers ListArray__raw_value_offsets( } // [[arrow::export]] -cpp11::writable::integers LargeListArray__raw_value_offsets( +cpp11::writable::doubles LargeListArray__raw_value_offsets( const std::shared_ptr& array) { auto offsets = array->raw_value_offsets(); - return cpp11::writable::integers(offsets, offsets + array->length()); + return cpp11::writable::doubles(offsets, offsets + array->length()); } // [[arrow::export]] diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index bf026d2723a1a..2f0508eb7a47a 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -375,7 +375,7 @@ struct Converter_String : public Converter { private: static SEXP r_string_from_view(std::string_view view) { - return Rf_mkCharLenCE(view.data(), view.size(), CE_UTF8); + return Rf_mkCharLenCE(view.data(), static_cast(view.size()), CE_UTF8); } static SEXP r_string_from_view_strip_nul(std::string_view view, @@ -576,10 +576,10 @@ class Converter_Dictionary : public Converter { const auto& arr_type = checked_cast(*chunked_array->type()); unifier_ = ValueOrStop(DictionaryUnifier::Make(arr_type.value_type())); - size_t n_arrays = chunked_array->num_chunks(); + int n_arrays = chunked_array->num_chunks(); arrays_transpose_.resize(n_arrays); - for (size_t i = 0; i < n_arrays; i++) { + for (int i = 0; i < n_arrays; i++) { const auto& dict_i = *checked_cast(*chunked_array->chunk(i)).dictionary(); StopIfNotOk(unifier_->Unify(dict_i, &arrays_transpose_[i])); @@ -748,7 +748,7 @@ class Converter_Struct : public Converter { auto colnames = arrow::r::to_r_strings( type->fields(), [](const std::shared_ptr& field) { return field->name(); }); - out.attr(symbols::row_names) = arrow::r::short_row_names(n); + out.attr(symbols::row_names) = arrow::r::short_row_names(static_cast(n)); out.attr(R_NamesSymbol) = colnames; out.attr(R_ClassSymbol) = arrow::r::data::classes_tbl_df; @@ -756,7 +756,7 @@ class Converter_Struct : public Converter { } Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const { - int nf = converters.size(); + int nf = static_cast(converters.size()); for (int i = 0; i < nf; i++) { SEXP data_i = VECTOR_ELT(data, i); @@ -771,7 +771,7 @@ class Converter_Struct : public Converter { Status Ingest_some_nulls(SEXP data, const std::shared_ptr& array, R_xlen_t start, R_xlen_t n, size_t chunk_index) const { auto struct_array = checked_cast(array.get()); - int nf = converters.size(); + int nf = static_cast(converters.size()); // Flatten() deals with merging of nulls auto arrays = ValueOrStop(struct_array->Flatten(gc_memory_pool())); for (int i = 0; i < nf; i++) { @@ -1384,7 +1384,7 @@ cpp11::writable::list to_data_frame(const std::shared_ptr& data, tbl.attr(R_NamesSymbol) = names; tbl.attr(R_ClassSymbol) = arrow::r::data::classes_tbl_df; - tbl.attr(R_RowNamesSymbol) = arrow::r::short_row_names(nr); + tbl.attr(R_RowNamesSymbol) = arrow::r::short_row_names(static_cast(nr)); return tbl; } diff --git a/r/src/arraydata.cpp b/r/src/arraydata.cpp index cdab38f1147aa..d879e807323af 100644 --- a/r/src/arraydata.cpp +++ b/r/src/arraydata.cpp @@ -26,18 +26,18 @@ std::shared_ptr ArrayData__get_type( } // [[arrow::export]] -int ArrayData__get_length(const std::shared_ptr& x) { - return x->length; +r_vec_size ArrayData__get_length(const std::shared_ptr& x) { + return r_vec_size(x->length); } // [[arrow::export]] -int ArrayData__get_null_count(const std::shared_ptr& x) { - return x->null_count; +r_vec_size ArrayData__get_null_count(const std::shared_ptr& x) { + return r_vec_size(x->null_count); } // [[arrow::export]] -int ArrayData__get_offset(const std::shared_ptr& x) { - return x->offset; +r_vec_size ArrayData__get_offset(const std::shared_ptr& x) { + return r_vec_size(x->offset); } // [[arrow::export]] diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 790207efce1d2..75e0f27b4002e 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -110,7 +110,7 @@ BEGIN_CPP11 END_CPP11 } // array.cpp -int Array__offset(const std::shared_ptr& x); +r_vec_size Array__offset(const std::shared_ptr& x); extern "C" SEXP _arrow_Array__offset(SEXP x_sexp){ BEGIN_CPP11 arrow::r::Input&>::type x(x_sexp); @@ -118,7 +118,7 @@ BEGIN_CPP11 END_CPP11 } // array.cpp -int Array__null_count(const std::shared_ptr& x); +r_vec_size Array__null_count(const std::shared_ptr& x); extern "C" SEXP _arrow_Array__null_count(SEXP x_sexp){ BEGIN_CPP11 arrow::r::Input&>::type x(x_sexp); @@ -315,7 +315,7 @@ BEGIN_CPP11 END_CPP11 } // array.cpp -r_vec_size FixedSizeListArray__value_length(const std::shared_ptr& array, int64_t i); +int FixedSizeListArray__value_length(const std::shared_ptr& array, int64_t i); extern "C" SEXP _arrow_FixedSizeListArray__value_length(SEXP array_sexp, SEXP i_sexp){ BEGIN_CPP11 arrow::r::Input&>::type array(array_sexp); @@ -359,7 +359,7 @@ BEGIN_CPP11 END_CPP11 } // array.cpp -cpp11::writable::integers LargeListArray__raw_value_offsets(const std::shared_ptr& array); +cpp11::writable::doubles LargeListArray__raw_value_offsets(const std::shared_ptr& array); extern "C" SEXP _arrow_LargeListArray__raw_value_offsets(SEXP array_sexp){ BEGIN_CPP11 arrow::r::Input&>::type array(array_sexp); @@ -467,7 +467,7 @@ BEGIN_CPP11 END_CPP11 } // arraydata.cpp -int ArrayData__get_length(const std::shared_ptr& x); +r_vec_size ArrayData__get_length(const std::shared_ptr& x); extern "C" SEXP _arrow_ArrayData__get_length(SEXP x_sexp){ BEGIN_CPP11 arrow::r::Input&>::type x(x_sexp); @@ -475,7 +475,7 @@ BEGIN_CPP11 END_CPP11 } // arraydata.cpp -int ArrayData__get_null_count(const std::shared_ptr& x); +r_vec_size ArrayData__get_null_count(const std::shared_ptr& x); extern "C" SEXP _arrow_ArrayData__get_null_count(SEXP x_sexp){ BEGIN_CPP11 arrow::r::Input&>::type x(x_sexp); @@ -483,7 +483,7 @@ BEGIN_CPP11 END_CPP11 } // arraydata.cpp -int ArrayData__get_offset(const std::shared_ptr& x); +r_vec_size ArrayData__get_offset(const std::shared_ptr& x); extern "C" SEXP _arrow_ArrayData__get_offset(SEXP x_sexp){ BEGIN_CPP11 arrow::r::Input&>::type x(x_sexp); @@ -765,7 +765,7 @@ BEGIN_CPP11 END_CPP11 } // chunkedarray.cpp -r_vec_size ChunkedArray__num_chunks(const std::shared_ptr& chunked_array); +int ChunkedArray__num_chunks(const std::shared_ptr& chunked_array); extern "C" SEXP _arrow_ChunkedArray__num_chunks(SEXP chunked_array_sexp){ BEGIN_CPP11 arrow::r::Input&>::type chunked_array(chunked_array_sexp); @@ -869,11 +869,11 @@ BEGIN_CPP11 END_CPP11 } // compression.cpp -std::shared_ptr util___Codec__Create(arrow::Compression::type codec, R_xlen_t compression_level); +std::shared_ptr util___Codec__Create(arrow::Compression::type codec, int compression_level); extern "C" SEXP _arrow_util___Codec__Create(SEXP codec_sexp, SEXP compression_level_sexp){ BEGIN_CPP11 arrow::r::Input::type codec(codec_sexp); - arrow::r::Input::type compression_level(compression_level_sexp); + arrow::r::Input::type compression_level(compression_level_sexp); return cpp11::as_sexp(util___Codec__Create(codec, compression_level)); END_CPP11 } @@ -2024,14 +2024,14 @@ extern "C" SEXP _arrow_dataset___JsonFragmentScanOptions__Make(SEXP parse_option // dataset.cpp #if defined(ARROW_R_WITH_DATASET) -std::shared_ptr dataset___ParquetFragmentScanOptions__Make(bool use_buffered_stream, int64_t buffer_size, bool pre_buffer, int64_t thrift_string_size_limit, int64_t thrift_container_size_limit); +std::shared_ptr dataset___ParquetFragmentScanOptions__Make(bool use_buffered_stream, int64_t buffer_size, bool pre_buffer, int32_t thrift_string_size_limit, int32_t thrift_container_size_limit); extern "C" SEXP _arrow_dataset___ParquetFragmentScanOptions__Make(SEXP use_buffered_stream_sexp, SEXP buffer_size_sexp, SEXP pre_buffer_sexp, SEXP thrift_string_size_limit_sexp, SEXP thrift_container_size_limit_sexp){ BEGIN_CPP11 arrow::r::Input::type use_buffered_stream(use_buffered_stream_sexp); arrow::r::Input::type buffer_size(buffer_size_sexp); arrow::r::Input::type pre_buffer(pre_buffer_sexp); - arrow::r::Input::type thrift_string_size_limit(thrift_string_size_limit_sexp); - arrow::r::Input::type thrift_container_size_limit(thrift_container_size_limit_sexp); + arrow::r::Input::type thrift_string_size_limit(thrift_string_size_limit_sexp); + arrow::r::Input::type thrift_container_size_limit(thrift_container_size_limit_sexp); return cpp11::as_sexp(dataset___ParquetFragmentScanOptions__Make(use_buffered_stream, buffer_size, pre_buffer, thrift_string_size_limit, thrift_container_size_limit)); END_CPP11 } @@ -2567,10 +2567,10 @@ BEGIN_CPP11 END_CPP11 } // datatype.cpp -std::shared_ptr FixedSizeBinary__initialize(R_xlen_t byte_width); +std::shared_ptr FixedSizeBinary__initialize(int32_t byte_width); extern "C" SEXP _arrow_FixedSizeBinary__initialize(SEXP byte_width_sexp){ BEGIN_CPP11 - arrow::r::Input::type byte_width(byte_width_sexp); + arrow::r::Input::type byte_width(byte_width_sexp); return cpp11::as_sexp(FixedSizeBinary__initialize(byte_width)); END_CPP11 } @@ -3976,7 +3976,7 @@ BEGIN_CPP11 END_CPP11 } // message.cpp -r_vec_size ipc___Message__Verify(const std::unique_ptr& message); +bool ipc___Message__Verify(const std::unique_ptr& message); extern "C" SEXP _arrow_ipc___Message__Verify(SEXP message_sexp){ BEGIN_CPP11 arrow::r::Input&>::type message(message_sexp); @@ -4684,7 +4684,7 @@ BEGIN_CPP11 END_CPP11 } // recordbatch.cpp -r_vec_size RecordBatch__num_columns(const std::shared_ptr& x); +int RecordBatch__num_columns(const std::shared_ptr& x); extern "C" SEXP _arrow_RecordBatch__num_columns(SEXP x_sexp){ BEGIN_CPP11 arrow::r::Input&>::type x(x_sexp); @@ -4734,11 +4734,11 @@ BEGIN_CPP11 END_CPP11 } // recordbatch.cpp -std::shared_ptr RecordBatch__column(const std::shared_ptr& batch, R_xlen_t i); +std::shared_ptr RecordBatch__column(const std::shared_ptr& batch, int i); extern "C" SEXP _arrow_RecordBatch__column(SEXP batch_sexp, SEXP i_sexp){ BEGIN_CPP11 arrow::r::Input&>::type batch(batch_sexp); - arrow::r::Input::type i(i_sexp); + arrow::r::Input::type i(i_sexp); return cpp11::as_sexp(RecordBatch__column(batch, i)); END_CPP11 } @@ -4771,42 +4771,42 @@ BEGIN_CPP11 END_CPP11 } // recordbatch.cpp -std::shared_ptr RecordBatch__AddColumn(const std::shared_ptr& batch, R_xlen_t i, const std::shared_ptr& field, const std::shared_ptr& column); +std::shared_ptr RecordBatch__AddColumn(const std::shared_ptr& batch, int i, const std::shared_ptr& field, const std::shared_ptr& column); extern "C" SEXP _arrow_RecordBatch__AddColumn(SEXP batch_sexp, SEXP i_sexp, SEXP field_sexp, SEXP column_sexp){ BEGIN_CPP11 arrow::r::Input&>::type batch(batch_sexp); - arrow::r::Input::type i(i_sexp); + arrow::r::Input::type i(i_sexp); arrow::r::Input&>::type field(field_sexp); arrow::r::Input&>::type column(column_sexp); return cpp11::as_sexp(RecordBatch__AddColumn(batch, i, field, column)); END_CPP11 } // recordbatch.cpp -std::shared_ptr RecordBatch__SetColumn(const std::shared_ptr& batch, R_xlen_t i, const std::shared_ptr& field, const std::shared_ptr& column); +std::shared_ptr RecordBatch__SetColumn(const std::shared_ptr& batch, int i, const std::shared_ptr& field, const std::shared_ptr& column); extern "C" SEXP _arrow_RecordBatch__SetColumn(SEXP batch_sexp, SEXP i_sexp, SEXP field_sexp, SEXP column_sexp){ BEGIN_CPP11 arrow::r::Input&>::type batch(batch_sexp); - arrow::r::Input::type i(i_sexp); + arrow::r::Input::type i(i_sexp); arrow::r::Input&>::type field(field_sexp); arrow::r::Input&>::type column(column_sexp); return cpp11::as_sexp(RecordBatch__SetColumn(batch, i, field, column)); END_CPP11 } // recordbatch.cpp -std::shared_ptr RecordBatch__RemoveColumn(const std::shared_ptr& batch, R_xlen_t i); +std::shared_ptr RecordBatch__RemoveColumn(const std::shared_ptr& batch, int i); extern "C" SEXP _arrow_RecordBatch__RemoveColumn(SEXP batch_sexp, SEXP i_sexp){ BEGIN_CPP11 arrow::r::Input&>::type batch(batch_sexp); - arrow::r::Input::type i(i_sexp); + arrow::r::Input::type i(i_sexp); return cpp11::as_sexp(RecordBatch__RemoveColumn(batch, i)); END_CPP11 } // recordbatch.cpp -std::string RecordBatch__column_name(const std::shared_ptr& batch, R_xlen_t i); +std::string RecordBatch__column_name(const std::shared_ptr& batch, int i); extern "C" SEXP _arrow_RecordBatch__column_name(SEXP batch_sexp, SEXP i_sexp){ BEGIN_CPP11 arrow::r::Input&>::type batch(batch_sexp); - arrow::r::Input::type i(i_sexp); + arrow::r::Input::type i(i_sexp); return cpp11::as_sexp(RecordBatch__column_name(batch, i)); END_CPP11 } @@ -5346,7 +5346,7 @@ BEGIN_CPP11 END_CPP11 } // table.cpp -r_vec_size Table__num_columns(const std::shared_ptr& x); +int Table__num_columns(const std::shared_ptr& x); extern "C" SEXP _arrow_Table__num_columns(SEXP x_sexp){ BEGIN_CPP11 arrow::r::Input&>::type x(x_sexp); @@ -5379,20 +5379,20 @@ BEGIN_CPP11 END_CPP11 } // table.cpp -std::shared_ptr Table__column(const std::shared_ptr& table, R_xlen_t i); +std::shared_ptr Table__column(const std::shared_ptr& table, int i); extern "C" SEXP _arrow_Table__column(SEXP table_sexp, SEXP i_sexp){ BEGIN_CPP11 arrow::r::Input&>::type table(table_sexp); - arrow::r::Input::type i(i_sexp); + arrow::r::Input::type i(i_sexp); return cpp11::as_sexp(Table__column(table, i)); END_CPP11 } // table.cpp -std::shared_ptr Table__field(const std::shared_ptr& table, R_xlen_t i); +std::shared_ptr Table__field(const std::shared_ptr& table, int i); extern "C" SEXP _arrow_Table__field(SEXP table_sexp, SEXP i_sexp){ BEGIN_CPP11 arrow::r::Input&>::type table(table_sexp); - arrow::r::Input::type i(i_sexp); + arrow::r::Input::type i(i_sexp); return cpp11::as_sexp(Table__field(table, i)); END_CPP11 } @@ -5476,31 +5476,31 @@ BEGIN_CPP11 END_CPP11 } // table.cpp -std::shared_ptr Table__RemoveColumn(const std::shared_ptr& table, R_xlen_t i); +std::shared_ptr Table__RemoveColumn(const std::shared_ptr& table, int i); extern "C" SEXP _arrow_Table__RemoveColumn(SEXP table_sexp, SEXP i_sexp){ BEGIN_CPP11 arrow::r::Input&>::type table(table_sexp); - arrow::r::Input::type i(i_sexp); + arrow::r::Input::type i(i_sexp); return cpp11::as_sexp(Table__RemoveColumn(table, i)); END_CPP11 } // table.cpp -std::shared_ptr Table__AddColumn(const std::shared_ptr& table, R_xlen_t i, const std::shared_ptr& field, const std::shared_ptr& column); +std::shared_ptr Table__AddColumn(const std::shared_ptr& table, int i, const std::shared_ptr& field, const std::shared_ptr& column); extern "C" SEXP _arrow_Table__AddColumn(SEXP table_sexp, SEXP i_sexp, SEXP field_sexp, SEXP column_sexp){ BEGIN_CPP11 arrow::r::Input&>::type table(table_sexp); - arrow::r::Input::type i(i_sexp); + arrow::r::Input::type i(i_sexp); arrow::r::Input&>::type field(field_sexp); arrow::r::Input&>::type column(column_sexp); return cpp11::as_sexp(Table__AddColumn(table, i, field, column)); END_CPP11 } // table.cpp -std::shared_ptr Table__SetColumn(const std::shared_ptr& table, R_xlen_t i, const std::shared_ptr& field, const std::shared_ptr& column); +std::shared_ptr Table__SetColumn(const std::shared_ptr& table, int i, const std::shared_ptr& field, const std::shared_ptr& column); extern "C" SEXP _arrow_Table__SetColumn(SEXP table_sexp, SEXP i_sexp, SEXP field_sexp, SEXP column_sexp){ BEGIN_CPP11 arrow::r::Input&>::type table(table_sexp); - arrow::r::Input::type i(i_sexp); + arrow::r::Input::type i(i_sexp); arrow::r::Input&>::type field(field_sexp); arrow::r::Input&>::type column(column_sexp); return cpp11::as_sexp(Table__SetColumn(table, i, field, column)); diff --git a/r/src/arrow_cpp11.h b/r/src/arrow_cpp11.h index d8c4b719d1d3e..ab60586628164 100644 --- a/r/src/arrow_cpp11.h +++ b/r/src/arrow_cpp11.h @@ -27,6 +27,18 @@ #include "./nameof.h" +// Simple dcheck that doesn't use assert (i.e., won't crash the R session) +// Condition this on our own debug flag to avoid this ending up in any CRAN +// checks. +#if defined(ARROW_R_DEBUG) +#define ARROW_R_DCHECK(EXPR) \ + do { \ + if (!(EXPR)) Rf_error("Failed DCHECK: %s evaluated to false", #EXPR); \ + } while (false) +#else +#define ARROW_R_DCHECK(EXPR) +#endif + // borrowed from enc package // because R does not make these macros available (i.e. from Defn.h) #define UTF8_MASK (1 << 3) @@ -465,7 +477,7 @@ inline SEXP as_sexp(r_vec_size size) { if (x > std::numeric_limits::max()) { return Rf_ScalarReal(x); } else { - return Rf_ScalarInteger(x); + return Rf_ScalarInteger(static_cast(x)); } } diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index fadc39c75fc06..05c8f6062dabb 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -189,13 +189,13 @@ void validate_slice_offset(R_xlen_t offset, int64_t len); void validate_slice_length(R_xlen_t length, int64_t available); -void validate_index(int i, int len); +void validate_index(int64_t i, int64_t len); template void TraverseDots(cpp11::list dots, int num_fields, Lambda lambda) { cpp11::strings names(dots.attr(R_NamesSymbol)); - for (R_xlen_t i = 0, j = 0; j < num_fields; i++) { + for (int i = 0, j = 0; j < num_fields; i++) { auto name_i = names[i]; if (name_i.size() == 0) { diff --git a/r/src/chunkedarray.cpp b/r/src/chunkedarray.cpp index 36884bb531b62..258013fc4da57 100644 --- a/r/src/chunkedarray.cpp +++ b/r/src/chunkedarray.cpp @@ -34,9 +34,8 @@ r_vec_size ChunkedArray__null_count( } // [[arrow::export]] -r_vec_size ChunkedArray__num_chunks( - const std::shared_ptr& chunked_array) { - return r_vec_size(chunked_array->num_chunks()); +int ChunkedArray__num_chunks(const std::shared_ptr& chunked_array) { + return chunked_array->num_chunks(); } // [[arrow::export]] diff --git a/r/src/compression.cpp b/r/src/compression.cpp index 148c6e14002f5..bc893afd8d28b 100644 --- a/r/src/compression.cpp +++ b/r/src/compression.cpp @@ -22,7 +22,7 @@ // [[arrow::export]] std::shared_ptr util___Codec__Create(arrow::Compression::type codec, - R_xlen_t compression_level) { + int compression_level) { return ValueOrStop(arrow::util::Codec::Create(codec, compression_level)); } diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 87d1326ed3419..bd97e30005ca3 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -241,10 +241,10 @@ std::shared_ptr make_compute_options( interpolation); } if (!Rf_isNull(options["min_count"])) { - out->min_count = cpp11::as_cpp(options["min_count"]); + out->min_count = cpp11::as_cpp(options["min_count"]); } if (!Rf_isNull(options["skip_nulls"])) { - out->skip_nulls = cpp11::as_cpp(options["skip_nulls"]); + out->skip_nulls = cpp11::as_cpp(options["skip_nulls"]); } return out; } @@ -479,9 +479,9 @@ std::shared_ptr make_compute_options( func_name == "hash_stddev") { using Options = arrow::compute::VarianceOptions; auto out = std::make_shared(); - out->ddof = cpp11::as_cpp(options["ddof"]); + out->ddof = cpp11::as_cpp(options["ddof"]); if (!Rf_isNull(options["min_count"])) { - out->min_count = cpp11::as_cpp(options["min_count"]); + out->min_count = cpp11::as_cpp(options["min_count"]); } if (!Rf_isNull(options["skip_nulls"])) { out->skip_nulls = cpp11::as_cpp(options["skip_nulls"]); @@ -683,7 +683,7 @@ arrow::Status CallRScalarUDF(arrow::compute::KernelContext* context, } } - cpp11::sexp batch_length_sexp = cpp11::as_sexp(span.length); + cpp11::sexp batch_length_sexp = cpp11::as_sexp(static_cast(span.length)); std::shared_ptr output_type = result->type()->GetSharedPtr(); cpp11::sexp output_type_sexp = cpp11::to_r6(output_type); @@ -738,8 +738,7 @@ void RegisterScalarUDF(std::string name, cpp11::list func_sexp) { // Compute the Arity from the list of input kernels. We don't currently handle // variable numbers of arguments in a user-defined function. - int64_t n_args = - cpp11::as_cpp>(in_type_r[0])->num_fields(); + int n_args = cpp11::as_cpp>(in_type_r[0])->num_fields(); for (R_xlen_t i = 1; i < n_kernels; i++) { auto in_types = cpp11::as_cpp>(in_type_r[i]); if (in_types->num_fields() != n_args) { @@ -767,7 +766,7 @@ void RegisterScalarUDF(std::string name, cpp11::list func_sexp) { cpp11::sexp out_type_func = out_type_r[i]; std::vector compute_in_types(in_types->num_fields()); - for (int64_t j = 0; j < in_types->num_fields(); j++) { + for (int j = 0; j < in_types->num_fields(); j++) { compute_in_types[j] = arrow::compute::InputType(in_types->field(j)->type()); } diff --git a/r/src/dataset.cpp b/r/src/dataset.cpp index 83c430fb634d3..e53fc03bdb413 100644 --- a/r/src/dataset.cpp +++ b/r/src/dataset.cpp @@ -343,8 +343,8 @@ std::shared_ptr dataset___JsonFragmentScanOptions__ std::shared_ptr dataset___ParquetFragmentScanOptions__Make(bool use_buffered_stream, int64_t buffer_size, bool pre_buffer, - int64_t thrift_string_size_limit, - int64_t thrift_container_size_limit) { + int32_t thrift_string_size_limit, + int32_t thrift_container_size_limit) { auto options = std::make_shared(); if (use_buffered_stream) { options->reader_properties->enable_buffered_stream(); diff --git a/r/src/datatype.cpp b/r/src/datatype.cpp index f19ba92527157..2f2b89d658d91 100644 --- a/r/src/datatype.cpp +++ b/r/src/datatype.cpp @@ -201,7 +201,7 @@ std::shared_ptr DayTimeInterval__initialize() { } // [[arrow::export]] -std::shared_ptr FixedSizeBinary__initialize(R_xlen_t byte_width) { +std::shared_ptr FixedSizeBinary__initialize(int32_t byte_width) { if (byte_width == NA_INTEGER) { cpp11::stop("'byte_width' cannot be NA"); } diff --git a/r/src/io.cpp b/r/src/io.cpp index 321b1b17febc3..4d5ee31794ae8 100644 --- a/r/src/io.cpp +++ b/r/src/io.cpp @@ -253,11 +253,16 @@ class RConnectionFileInterface : public virtual arrow::io::FileInterface { return arrow::Status::IOError("R connection is closed"); } + if (nbytes > std::numeric_limits::max()) { + return arrow::Status::Invalid( + "Can't read more than INT_MAX bytes from an R connection"); + } + return SafeCallIntoR( [&] { cpp11::function read_bin = cpp11::package("base")["readBin"]; cpp11::writable::raws ptype((R_xlen_t)0); - cpp11::integers n = cpp11::as_sexp(nbytes); + cpp11::integers n = cpp11::as_sexp(static_cast(nbytes)); cpp11::sexp result = read_bin(connection_sexp_, ptype, n); @@ -512,8 +517,8 @@ struct ReencodeUTF8TransformFunctionWrapper { // UTF-16, and UTF-32. while (in_bytes_left > 0) { // Make enough place in the output to hopefully consume all of the input. - RETURN_NOT_OK( - builder.Reserve(std::max(in_bytes_left * kOversizeFactor, 4))); + RETURN_NOT_OK(builder.Reserve( + std::max(static_cast(in_bytes_left * kOversizeFactor), 4))); out_buf = builder.mutable_data() + builder.length(); out_bytes_left = builder.capacity() - builder.length(); diff --git a/r/src/message.cpp b/r/src/message.cpp index d9832ddc22a74..3f21873fea3b2 100644 --- a/r/src/message.cpp +++ b/r/src/message.cpp @@ -39,8 +39,8 @@ std::shared_ptr ipc___Message__body( } // [[arrow::export]] -r_vec_size ipc___Message__Verify(const std::unique_ptr& message) { - return r_vec_size(message->Verify()); +bool ipc___Message__Verify(const std::unique_ptr& message) { + return message->Verify(); } // [[arrow::export]] diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index d9bf848e24292..d2db11e14a787 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -335,7 +335,7 @@ struct RConvert { template static enable_if_integer> Convert(Type*, From from) { - return CIntFromRScalarImpl(from); + return CIntFromRScalarImpl(static_cast(from)); } // ---- convert R integer types to double @@ -461,7 +461,7 @@ class RPrimitiveConverter< if (std::is_same::value) { auto append_value = [this](r_value_type value) { - this->primitive_builder_->UnsafeAppend(value); + this->primitive_builder_->UnsafeAppend(static_cast(value)); return Status::OK(); }; return VisitVector(it, size, append_null, append_value); @@ -595,19 +595,21 @@ class RPrimitiveConverter::value>> return VisitVector(it, size, append_null, append_value); } - static int FromRDate(const Date32Type*, int from) { return from; } + static int FromRDate(const Date32Type*, double from) { return static_cast(from); } - static int64_t FromRDate(const Date64Type*, int from) { + static int64_t FromRDate(const Date64Type*, double from) { constexpr int64_t kMilliSecondsPerDay = 86400000; - return from * kMilliSecondsPerDay; + return static_cast(from * kMilliSecondsPerDay); } static int FromPosixct(const Date32Type*, double from) { constexpr int64_t kSecondsPerDay = 86400; - return from / kSecondsPerDay; + return static_cast(from / kSecondsPerDay); } - static int64_t FromPosixct(const Date64Type*, double from) { return from * 1000; } + static int64_t FromPosixct(const Date64Type*, double from) { + return static_cast(from * 1000); + } }; int64_t get_TimeUnit_multiplier(TimeUnit::type unit) { @@ -1081,7 +1083,7 @@ class RListConverter : public ListConverter { auto append_value = [this](SEXP value) { // TODO: if we decide that this can be run concurrently // we'll have to do vec_size() upfront - int n = arrow::r::vec_size(value); + R_xlen_t n = arrow::r::vec_size(value); RETURN_NOT_OK(this->list_builder_->ValidateOverflow(n)); RETURN_NOT_OK(this->list_builder_->Append()); diff --git a/r/src/recordbatch.cpp b/r/src/recordbatch.cpp index aca3a74fd81df..bf88e98ed1026 100644 --- a/r/src/recordbatch.cpp +++ b/r/src/recordbatch.cpp @@ -27,8 +27,8 @@ #include // [[arrow::export]] -r_vec_size RecordBatch__num_columns(const std::shared_ptr& x) { - return r_vec_size(x->num_columns()); +int RecordBatch__num_columns(const std::shared_ptr& x) { + return x->num_columns(); } // [[arrow::export]] @@ -80,7 +80,7 @@ cpp11::list RecordBatch__columns(const std::shared_ptr& batc // [[arrow::export]] std::shared_ptr RecordBatch__column( - const std::shared_ptr& batch, R_xlen_t i) { + const std::shared_ptr& batch, int i) { arrow::r::validate_index(i, batch->num_columns()); return batch->column(i); } @@ -106,7 +106,7 @@ bool RecordBatch__Equals(const std::shared_ptr& self, // [[arrow::export]] std::shared_ptr RecordBatch__AddColumn( - const std::shared_ptr& batch, R_xlen_t i, + const std::shared_ptr& batch, int i, const std::shared_ptr& field, const std::shared_ptr& column) { return ValueOrStop(batch->AddColumn(i, field, column)); @@ -114,7 +114,7 @@ std::shared_ptr RecordBatch__AddColumn( // [[arrow::export]] std::shared_ptr RecordBatch__SetColumn( - const std::shared_ptr& batch, R_xlen_t i, + const std::shared_ptr& batch, int i, const std::shared_ptr& field, const std::shared_ptr& column) { return ValueOrStop(batch->SetColumn(i, field, column)); @@ -122,14 +122,14 @@ std::shared_ptr RecordBatch__SetColumn( // [[arrow::export]] std::shared_ptr RecordBatch__RemoveColumn( - const std::shared_ptr& batch, R_xlen_t i) { + const std::shared_ptr& batch, int i) { arrow::r::validate_index(i, batch->num_columns()); return ValueOrStop(batch->RemoveColumn(i)); } // [[arrow::export]] std::string RecordBatch__column_name(const std::shared_ptr& batch, - R_xlen_t i) { + int i) { arrow::r::validate_index(i, batch->num_columns()); return batch->column_name(i); } diff --git a/r/src/schema.cpp b/r/src/schema.cpp index cf959707305a7..41d3d38d2eda3 100644 --- a/r/src/schema.cpp +++ b/r/src/schema.cpp @@ -29,14 +29,14 @@ std::shared_ptr Schema__from_fields( // [[arrow::export]] std::shared_ptr Schema__from_list(cpp11::list field_list) { - int n = field_list.size(); + R_xlen_t n = field_list.size(); bool nullable = true; cpp11::strings names(field_list.attr(R_NamesSymbol)); std::vector> fields(n); - for (int i = 0; i < n; i++) { + for (R_xlen_t i = 0; i < n; i++) { fields[i] = arrow::field( names[i], cpp11::as_cpp>(field_list[i]), nullable); diff --git a/r/src/table.cpp b/r/src/table.cpp index 04537000f5d48..04a8c7caf24fd 100644 --- a/r/src/table.cpp +++ b/r/src/table.cpp @@ -23,8 +23,8 @@ #include // [[arrow::export]] -r_vec_size Table__num_columns(const std::shared_ptr& x) { - return r_vec_size(x->num_columns()); +int Table__num_columns(const std::shared_ptr& x) { + return x->num_columns(); } // [[arrow::export]] @@ -49,14 +49,14 @@ std::shared_ptr Table__ReplaceSchemaMetadata( // [[arrow::export]] std::shared_ptr Table__column( - const std::shared_ptr& table, R_xlen_t i) { + const std::shared_ptr& table, int i) { arrow::r::validate_index(i, table->num_columns()); return table->column(i); } // [[arrow::export]] std::shared_ptr Table__field(const std::shared_ptr& table, - R_xlen_t i) { + int i) { arrow::r::validate_index(i, table->num_columns()); return table->field(i); } @@ -123,13 +123,13 @@ std::shared_ptr Table__GetColumnByName( // [[arrow::export]] std::shared_ptr Table__RemoveColumn( - const std::shared_ptr& table, R_xlen_t i) { + const std::shared_ptr& table, int i) { return ValueOrStop(table->RemoveColumn(i)); } // [[arrow::export]] std::shared_ptr Table__AddColumn( - const std::shared_ptr& table, R_xlen_t i, + const std::shared_ptr& table, int i, const std::shared_ptr& field, const std::shared_ptr& column) { return ValueOrStop(table->AddColumn(i, field, column)); @@ -137,7 +137,7 @@ std::shared_ptr Table__AddColumn( // [[arrow::export]] std::shared_ptr Table__SetColumn( - const std::shared_ptr& table, R_xlen_t i, + const std::shared_ptr& table, int i, const std::shared_ptr& field, const std::shared_ptr& column) { return ValueOrStop(table->SetColumn(i, field, column)); @@ -241,7 +241,7 @@ arrow::Status AddMetadataFromDots(SEXP lst, int num_fields, // Remove metadata for ExtensionType columns, because these have their own mechanism for // preserving R type information - for (R_xlen_t i = 0; i < schema->num_fields(); i++) { + for (int i = 0; i < schema->num_fields(); i++) { if (schema->field(i)->type()->id() == Type::EXTENSION) { metadata_columns[i] = R_NilValue; }