Skip to content

Commit

Permalink
apacheGH-39138: [R] Fix implicit conversion warnings (apache#39250)
Browse files Browse the repository at this point in the history
### Rationale for this change

We have failing CRAN checks because this warning occurs on one check machine.

### What changes are included in this PR?

Implicit integer casts are made explicit and/or variable declarations were fixed so that fewer implicit integer casts were performed. Fully solving the warnings also requires r-lib/cpp11#349 since some errors occur in those headers.

### Are these changes tested?

This particular test we can't do on CI because the MacOS runner we have doesn't have a new enough `clang` to support the requisite `-W` flags. I tested this locally by adding `PKG_CXXFLAGS=-Wconversion -Wno-sign-conversion -Wsign-compare -Werror` to `Makevars.in`.

### Are there any user-facing changes?

No
* Closes: apache#39138

Authored-by: Dewey Dunnington <[email protected]>
Signed-off-by: Dewey Dunnington <[email protected]>
  • Loading branch information
paleolimbot authored and dgreiss committed Feb 17, 2024
1 parent eb382a4 commit 0bd4842
Show file tree
Hide file tree
Showing 18 changed files with 165 additions and 124 deletions.
56 changes: 38 additions & 18 deletions r/src/altrep.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,8 @@ struct AltrepVectorPrimitive : public AltrepVectorBase<AltrepVectorPrimitive<sex
auto altrep_data =
reinterpret_cast<ArrowAltrepData*>(R_ExternalPtrAddr(R_altrep_data1(alt)));
auto resolve = altrep_data->locate(i);
const auto& array = altrep_data->chunked_array()->chunk(resolve.chunk_index);
const auto& array =
altrep_data->chunked_array()->chunk(static_cast<int>(resolve.chunk_index));
auto j = resolve.index_in_chunk;

return array->IsNull(j) ? cpp11::na<c_type>()
Expand Down Expand Up @@ -466,10 +467,10 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
std::unique_ptr<arrow::DictionaryUnifier> unifier_ =
ValueOrStop(DictionaryUnifier::Make(arr_type.value_type()));

size_t n_arrays = chunked_array->num_chunks();
int n_arrays = chunked_array->num_chunks();
BufferVector arrays_transpose(n_arrays);

for (size_t i = 0; i < n_arrays; i++) {
for (int i = 0; i < n_arrays; i++) {
const auto& dict_i =
*internal::checked_cast<const DictionaryArray&>(*chunked_array->chunk(i))
.dictionary();
Expand Down Expand Up @@ -559,17 +560,14 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
return dup;
}

// The value at position i
static int Elt(SEXP alt, R_xlen_t i) {
if (Base::IsMaterialized(alt)) {
return INTEGER_ELT(Representation(alt), i);
}

// The value at position i as an int64_t (to make bounds checking less verbose)
static int64_t Elt64(SEXP alt, R_xlen_t i) {
auto altrep_data =
reinterpret_cast<ArrowAltrepData*>(R_ExternalPtrAddr(R_altrep_data1(alt)));
auto resolve = altrep_data->locate(i);

const auto& array = altrep_data->chunked_array()->chunk(resolve.chunk_index);
const auto& array =
altrep_data->chunked_array()->chunk(static_cast<int>(resolve.chunk_index));
auto j = resolve.index_in_chunk;

if (!array->IsNull(j)) {
Expand All @@ -578,7 +576,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {

if (WasUnified(alt)) {
const auto* transpose_data = reinterpret_cast<const int32_t*>(
GetArrayTransposed(alt, resolve.chunk_index)->data());
GetArrayTransposed(alt, static_cast<int>(resolve.chunk_index))->data());

switch (indices->type_id()) {
case Type::UINT8:
Expand Down Expand Up @@ -617,7 +615,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
case Type::INT64:
return indices->data()->GetValues<int64_t>(1)[j] + 1;
case Type::UINT64:
return indices->data()->GetValues<uint64_t>(1)[j] + 1;
return static_cast<int64_t>(indices->data()->GetValues<uint64_t>(1)[j] + 1);
default:
break;
}
Expand All @@ -628,6 +626,18 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
return NA_INTEGER;
}

// The value at position i as an int (which R needs because this is a factor)
static int Elt(SEXP alt, R_xlen_t i) {
if (Base::IsMaterialized(alt)) {
return INTEGER_ELT(Representation(alt), i);
}

int64_t elt64 = Elt64(alt, i);
ARROW_R_DCHECK(elt64 == NA_INTEGER || elt64 >= 1);
ARROW_R_DCHECK(elt64 <= std::numeric_limits<int>::max());
return static_cast<int>(elt64);
}

static R_xlen_t Get_region(SEXP alt, R_xlen_t start, R_xlen_t n, int* buf) {
// If we have data2, we can just copy the region into buf
// using the standard Get_region for this R type
Expand Down Expand Up @@ -667,7 +677,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
// using the transpose data for this chunk
const auto* transpose_data =
reinterpret_cast<const int32_t*>(GetArrayTransposed(alt, j)->data());
auto transpose = [transpose_data](int x) { return transpose_data[x]; };
auto transpose = [transpose_data](int64_t x) { return transpose_data[x]; };

GetRegionDispatch(array, indices, transpose, out);

Expand All @@ -677,7 +687,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {

} else {
// simpler case, identity transpose
auto transpose = [](int x) { return x; };
auto transpose = [](int64_t x) { return static_cast<int>(x); };

int* out = buf;
for (const auto& array : slice->chunks()) {
Expand Down Expand Up @@ -718,7 +728,13 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {

VisitArraySpanInline<Type>(
*array->data(),
/*valid_func=*/[&](index_type index) { *out++ = transpose(index) + 1; },
/*valid_func=*/
[&](index_type index) {
int64_t transposed = transpose(index) + 1;
ARROW_R_DCHECK(transposed >= 1);
ARROW_R_DCHECK(transposed <= std::numeric_limits<int>::max());
*out++ = static_cast<int>(transposed);
},
/*null_func=*/[&]() { *out++ = cpp11::na<int>(); });
}

Expand Down Expand Up @@ -765,7 +781,8 @@ struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> {
bool no_nul = std::find(view_.begin(), view_.end(), '\0') == view_.end();

if (no_nul) {
return Rf_mkCharLenCE(view_.data(), view_.size(), CE_UTF8);
ARROW_R_DCHECK(view_.size() <= std::numeric_limits<int>::max());
return Rf_mkCharLenCE(view_.data(), static_cast<int>(view_.size()), CE_UTF8);
} else if (strip_out_nuls_) {
return ConvertStripNul();
} else {
Expand Down Expand Up @@ -802,7 +819,9 @@ struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> {
}

nul_was_stripped_ = true;
return Rf_mkCharLenCE(stripped_string_.data(), stripped_len, CE_UTF8);
ARROW_R_DCHECK(stripped_len <= std::numeric_limits<int>::max());
return Rf_mkCharLenCE(stripped_string_.data(), static_cast<int>(stripped_len),
CE_UTF8);
}

bool nul_was_stripped() const { return nul_was_stripped_; }
Expand Down Expand Up @@ -847,7 +866,8 @@ struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> {
auto altrep_data =
reinterpret_cast<ArrowAltrepData*>(R_ExternalPtrAddr(R_altrep_data1(alt)));
auto resolve = altrep_data->locate(i);
const auto& array = altrep_data->chunked_array()->chunk(resolve.chunk_index);
const auto& array =
altrep_data->chunked_array()->chunk(static_cast<int>(resolve.chunk_index));
auto j = resolve.index_in_chunk;

SEXP s = NA_STRING;
Expand Down
18 changes: 11 additions & 7 deletions r/src/array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ std::shared_ptr<arrow::Array> Array__Slice2(const std::shared_ptr<arrow::Array>&
return array->Slice(offset, length);
}

void arrow::r::validate_index(int i, int len) {
void arrow::r::validate_index(int64_t i, int64_t len) {
if (i == NA_INTEGER) {
cpp11::stop("'i' cannot be NA");
}
Expand All @@ -119,10 +119,14 @@ r_vec_size Array__length(const std::shared_ptr<arrow::Array>& x) {
}

// [[arrow::export]]
int Array__offset(const std::shared_ptr<arrow::Array>& x) { return x->offset(); }
r_vec_size Array__offset(const std::shared_ptr<arrow::Array>& x) {
return r_vec_size(x->offset());
}

// [[arrow::export]]
int Array__null_count(const std::shared_ptr<arrow::Array>& x) { return x->null_count(); }
r_vec_size Array__null_count(const std::shared_ptr<arrow::Array>& x) {
return r_vec_size(x->null_count());
}

// [[arrow::export]]
std::shared_ptr<arrow::DataType> Array__type(const std::shared_ptr<arrow::Array>& x) {
Expand Down Expand Up @@ -263,9 +267,9 @@ r_vec_size LargeListArray__value_length(
}

// [[arrow::export]]
r_vec_size FixedSizeListArray__value_length(
int FixedSizeListArray__value_length(
const std::shared_ptr<arrow::FixedSizeListArray>& array, int64_t i) {
return r_vec_size(array->value_length(i));
return array->value_length(i);
}

// [[arrow::export]]
Expand Down Expand Up @@ -294,10 +298,10 @@ cpp11::writable::integers ListArray__raw_value_offsets(
}

// [[arrow::export]]
cpp11::writable::integers LargeListArray__raw_value_offsets(
cpp11::writable::doubles LargeListArray__raw_value_offsets(
const std::shared_ptr<arrow::LargeListArray>& array) {
auto offsets = array->raw_value_offsets();
return cpp11::writable::integers(offsets, offsets + array->length());
return cpp11::writable::doubles(offsets, offsets + array->length());
}

// [[arrow::export]]
Expand Down
14 changes: 7 additions & 7 deletions r/src/array_to_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ struct Converter_String : public Converter {

private:
static SEXP r_string_from_view(std::string_view view) {
return Rf_mkCharLenCE(view.data(), view.size(), CE_UTF8);
return Rf_mkCharLenCE(view.data(), static_cast<int>(view.size()), CE_UTF8);
}

static SEXP r_string_from_view_strip_nul(std::string_view view,
Expand Down Expand Up @@ -576,10 +576,10 @@ class Converter_Dictionary : public Converter {
const auto& arr_type = checked_cast<const DictionaryType&>(*chunked_array->type());
unifier_ = ValueOrStop(DictionaryUnifier::Make(arr_type.value_type()));

size_t n_arrays = chunked_array->num_chunks();
int n_arrays = chunked_array->num_chunks();
arrays_transpose_.resize(n_arrays);

for (size_t i = 0; i < n_arrays; i++) {
for (int i = 0; i < n_arrays; i++) {
const auto& dict_i =
*checked_cast<const DictionaryArray&>(*chunked_array->chunk(i)).dictionary();
StopIfNotOk(unifier_->Unify(dict_i, &arrays_transpose_[i]));
Expand Down Expand Up @@ -748,15 +748,15 @@ class Converter_Struct : public Converter {
auto colnames = arrow::r::to_r_strings(
type->fields(),
[](const std::shared_ptr<Field>& field) { return field->name(); });
out.attr(symbols::row_names) = arrow::r::short_row_names(n);
out.attr(symbols::row_names) = arrow::r::short_row_names(static_cast<int>(n));
out.attr(R_NamesSymbol) = colnames;
out.attr(R_ClassSymbol) = arrow::r::data::classes_tbl_df;

return out;
}

Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const {
int nf = converters.size();
int nf = static_cast<int>(converters.size());
for (int i = 0; i < nf; i++) {
SEXP data_i = VECTOR_ELT(data, i);

Expand All @@ -771,7 +771,7 @@ class Converter_Struct : public Converter {
Status Ingest_some_nulls(SEXP data, const std::shared_ptr<arrow::Array>& array,
R_xlen_t start, R_xlen_t n, size_t chunk_index) const {
auto struct_array = checked_cast<const arrow::StructArray*>(array.get());
int nf = converters.size();
int nf = static_cast<int>(converters.size());
// Flatten() deals with merging of nulls
auto arrays = ValueOrStop(struct_array->Flatten(gc_memory_pool()));
for (int i = 0; i < nf; i++) {
Expand Down Expand Up @@ -1384,7 +1384,7 @@ cpp11::writable::list to_data_frame(const std::shared_ptr<Rectangle>& data,

tbl.attr(R_NamesSymbol) = names;
tbl.attr(R_ClassSymbol) = arrow::r::data::classes_tbl_df;
tbl.attr(R_RowNamesSymbol) = arrow::r::short_row_names(nr);
tbl.attr(R_RowNamesSymbol) = arrow::r::short_row_names(static_cast<int>(nr));

return tbl;
}
Expand Down
12 changes: 6 additions & 6 deletions r/src/arraydata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,18 @@ std::shared_ptr<arrow::DataType> ArrayData__get_type(
}

// [[arrow::export]]
int ArrayData__get_length(const std::shared_ptr<arrow::ArrayData>& x) {
return x->length;
r_vec_size ArrayData__get_length(const std::shared_ptr<arrow::ArrayData>& x) {
return r_vec_size(x->length);
}

// [[arrow::export]]
int ArrayData__get_null_count(const std::shared_ptr<arrow::ArrayData>& x) {
return x->null_count;
r_vec_size ArrayData__get_null_count(const std::shared_ptr<arrow::ArrayData>& x) {
return r_vec_size(x->null_count);
}

// [[arrow::export]]
int ArrayData__get_offset(const std::shared_ptr<arrow::ArrayData>& x) {
return x->offset;
r_vec_size ArrayData__get_offset(const std::shared_ptr<arrow::ArrayData>& x) {
return r_vec_size(x->offset);
}

// [[arrow::export]]
Expand Down
Loading

0 comments on commit 0bd4842

Please sign in to comment.