diff --git a/cpp/.clang-format b/.clang-format similarity index 100% rename from cpp/.clang-format rename to .clang-format diff --git a/cpp/.clang-tidy b/.clang-tidy similarity index 100% rename from cpp/.clang-tidy rename to .clang-tidy diff --git a/cpp/.clang-tidy-ignore b/.clang-tidy-ignore similarity index 100% rename from cpp/.clang-tidy-ignore rename to .clang-tidy-ignore diff --git a/.travis.yml b/.travis.yml index e37c9265577e8..9e347c1a13a8c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -249,11 +249,14 @@ matrix: - language: r cache: packages latex: false + before_script: + - if [ $ARROW_CI_R_AFFECTED != "1" ]; then exit; fi before_install: - $TRAVIS_BUILD_DIR/ci/travis_install_linux.sh - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh --only-library - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$TRAVIS_BUILD_DIR/cpp-install/lib - export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:$TRAVIS_BUILD_DIR/cpp-install/lib/pkgconfig + - $TRAVIS_BUILD_DIR/ci/travis_lint.sh - pushd ${TRAVIS_BUILD_DIR}/r diff --git a/ci/detect-changes.py b/ci/detect-changes.py index df041b921c811..e9a647c5e6d9c 100644 --- a/ci/detect-changes.py +++ b/ci/detect-changes.py @@ -26,7 +26,8 @@ perr = functools.partial(print, file=sys.stderr) -LANGUAGE_TOPICS = ['c_glib', 'cpp', 'go', 'java', 'js', 'python', 'ruby', 'rust'] +LANGUAGE_TOPICS = ['c_glib', 'cpp', 'go', 'java', 'js', 'python', + 'r', 'ruby', 'rust'] ALL_TOPICS = LANGUAGE_TOPICS + ['integration', 'site', 'dev'] @@ -66,7 +67,8 @@ def get_travis_head_commit(): def get_travis_commit_range(): cr = os.environ['TRAVIS_COMMIT_RANGE'] - # See https://github.com/travis-ci/travis-ci/issues/4596#issuecomment-139811122 + # See + # https://github.com/travis-ci/travis-ci/issues/4596#issuecomment-139811122 return cr.replace('...', '..') @@ -136,7 +138,7 @@ def get_affected_topics(affected_files): break elif p in ('cpp', 'format'): # Test C++ and bindings to the C++ library - for k in ('cpp', 'python', 'c_glib', 'ruby', 'integration'): + for k in ('cpp', 'python', 'c_glib', 'r', 'ruby', 'integration'): affected[k] = True elif p in ('java', 'js'): affected[p] = True @@ -144,8 +146,8 @@ def get_affected_topics(affected_files): elif p in ('c_glib'): affected[p] = True affected['ruby'] = True - elif p in ('go', 'integration', 'python', 'ruby', 'rust', 'site', - 'dev'): + elif p in ('go', 'integration', 'python', 'r', 'ruby', 'rust', + 'site', 'dev'): affected[p] = True return affected @@ -174,8 +176,8 @@ def get_windows_shell_eval(env): def run_from_travis(): if (os.environ['TRAVIS_REPO_SLUG'] == 'apache/arrow' and - os.environ['TRAVIS_BRANCH'] == 'master' and - os.environ['TRAVIS_EVENT_TYPE'] != 'pull_request'): + os.environ['TRAVIS_BRANCH'] == 'master' and + os.environ['TRAVIS_EVENT_TYPE'] != 'pull_request'): # Never skip anything on master builds in the official repository affected = dict.fromkeys(ALL_TOPICS, True) else: @@ -219,14 +221,14 @@ def run_from_appveyor(): if os.environ.get('TRAVIS'): try: print(run_from_travis()) - except: + except Exception: # Make sure the enclosing eval will return an error print("exit 1") raise elif os.environ.get('APPVEYOR'): try: print(run_from_appveyor()) - except: + except Exception: print("exit 1") raise else: diff --git a/ci/travis_lint.sh b/ci/travis_lint.sh index 8ebb58483f8d7..b8059f07ae94c 100755 --- a/ci/travis_lint.sh +++ b/ci/travis_lint.sh @@ -65,3 +65,9 @@ if [ "$ARROW_CI_PYTHON_AFFECTED" != "0" ]; then --config=$ARROW_PYTHON_DIR/.flake8.cython \ $ARROW_PYTHON_DIR fi + +if [ "$ARROW_CI_R_AFFECTED" != "0" ]; then + pushd $ARROW_R_DIR + ./lint.sh + popd +fi diff --git a/cpp/build-support/clang_format_exclusions.txt b/cpp/build-support/clang_format_exclusions.txt index d31d8a00d2ab7..66f35329d0c64 100644 --- a/cpp/build-support/clang_format_exclusions.txt +++ b/cpp/build-support/clang_format_exclusions.txt @@ -8,3 +8,4 @@ *thirdparty/ae/* *xxhash.cc *xxhash.h +*RcppExports.cpp* diff --git a/cpp/build-support/run_clang_format.py b/cpp/build-support/run_clang_format.py index 6edac5f0e9cbe..d0cc2b0f0f9dd 100755 --- a/cpp/build-support/run_clang_format.py +++ b/cpp/build-support/run_clang_format.py @@ -54,7 +54,9 @@ fullpaths = (os.path.join(directory, filename) for filename in filenames) source_files = [x for x in fullpaths - if x.endswith(".h") or x.endswith(".cc")] + if x.endswith(".h") or + x.endswith(".cc") or + x.endswith(".cpp")] formatted_filenames.extend( # Filter out files that match the globs in the globs file [filename for filename in source_files diff --git a/r/README.md b/r/README.md index 0ac781526a165..868fdff0a06e0 100644 --- a/r/README.md +++ b/r/README.md @@ -38,15 +38,15 @@ tf <- tempfile() #> # A tibble: 10 x 2 #> x y #> -#> 1 1 -0.255 -#> 2 2 -0.162 -#> 3 3 -0.614 -#> 4 4 -0.322 +#> 1 1 -0.255 +#> 2 2 -0.162 +#> 3 3 -0.614 +#> 4 4 -0.322 #> 5 5 0.0693 -#> 6 6 -0.920 -#> 7 7 -1.08 -#> 8 8 0.658 -#> 9 9 0.821 +#> 6 6 -0.920 +#> 7 7 -1.08 +#> 8 8 0.658 +#> 9 9 0.821 #> 10 10 0.539 arrow::write_arrow(tib, tf) @@ -56,14 +56,30 @@ as_tibble(pa$open_file(tf)$read_pandas()) #> # A tibble: 10 x 2 #> x y #> -#> 1 1 -0.255 -#> 2 2 -0.162 -#> 3 3 -0.614 -#> 4 4 -0.322 +#> 1 1 -0.255 +#> 2 2 -0.162 +#> 3 3 -0.614 +#> 4 4 -0.322 #> 5 5 0.0693 -#> 6 6 -0.920 -#> 7 7 -1.08 -#> 8 8 0.658 -#> 9 9 0.821 +#> 6 6 -0.920 +#> 7 7 -1.08 +#> 8 8 0.658 +#> 9 9 0.821 #> 10 10 0.539 ``` + +## Development + +### Code style + +We use Google C++ style in our C++ code. Check for style errors with + +``` +./lint.sh +``` + +You can fix the style issues with + +``` +./lint.sh --fix +``` \ No newline at end of file diff --git a/r/lint.sh b/r/lint.sh new file mode 100755 index 0000000000000..14e457d1647c2 --- /dev/null +++ b/r/lint.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +CPP_BUILD_SUPPORT=$SOURCE_DIR/../cpp/build-support + +LLVM_VERSION=6.0 +CLANG_FORMAT=clang-format-$LLVM_VERSION + +$CPP_BUILD_SUPPORT/run_clang_format.py $CLANG_FORMAT \ + $CPP_BUILD_SUPPORT/clang_format_exclusions.txt \ + $SOURCE_DIR/src --quiet $1 diff --git a/r/src/ArrayData.cpp b/r/src/ArrayData.cpp index fc6fba14607b5..2e284cdce825c 100644 --- a/r/src/ArrayData.cpp +++ b/r/src/ArrayData.cpp @@ -20,21 +20,22 @@ using namespace Rcpp; // [[Rcpp::export]] -std::shared_ptr ArrayData__get_type(const std::shared_ptr& x){ +std::shared_ptr ArrayData__get_type( + const std::shared_ptr& x) { return x->type; } // [[Rcpp::export]] -int ArrayData__get_length(const std::shared_ptr& x){ +int ArrayData__get_length(const std::shared_ptr& x) { return x->length; } // [[Rcpp::export]] -int ArrayData__get_null_count(const std::shared_ptr& x){ +int ArrayData__get_null_count(const std::shared_ptr& x) { return x->null_count; } // [[Rcpp::export]] -int ArrayData__get_offset(const std::shared_ptr& x){ +int ArrayData__get_offset(const std::shared_ptr& x) { return x->offset; } diff --git a/r/src/ChunkedArray.cpp b/r/src/ChunkedArray.cpp index 66b83cb2dba09..59f21f599b18d 100644 --- a/r/src/ChunkedArray.cpp +++ b/r/src/ChunkedArray.cpp @@ -21,34 +21,33 @@ using namespace Rcpp; using namespace arrow; template -inline SEXP simple_ChunkedArray_to_Vector(const std::shared_ptr& chunked_array){ +inline SEXP simple_ChunkedArray_to_Vector( + const std::shared_ptr& chunked_array) { using stored_type = typename Rcpp::Vector::stored_type; Rcpp::Vector out = no_init(chunked_array->length()); auto p = out.begin(); int k = 0; - for (int i=0; inum_chunks(); i++) { + for (int i = 0; i < chunked_array->num_chunks(); i++) { auto chunk = chunked_array->chunk(i); auto n = chunk->length(); // copy the data auto q = p; p = std::copy_n( - reinterpret_cast( - chunk->data()->buffers[1]->data() + chunk->offset() * sizeof(stored_type) - ), - n, p); + reinterpret_cast(chunk->data()->buffers[1]->data() + + chunk->offset() * sizeof(stored_type)), + n, p); // set NA using the bitmap auto bitmap_data = chunk->null_bitmap(); if (bitmap_data && RTYPE != RAWSXP) { - arrow::internal::BitmapReader bitmap_reader( - bitmap_data->data(), chunk->offset(), n - ); + arrow::internal::BitmapReader bitmap_reader(bitmap_data->data(), chunk->offset(), + n); - for (int j=0; j::get_na(); + q[k + j] = Rcpp::Vector::get_na(); } } } @@ -58,43 +57,47 @@ inline SEXP simple_ChunkedArray_to_Vector(const std::shared_ptr& chunked_array){ +int ChunkedArray__length(const std::shared_ptr& chunked_array) { return chunked_array->length(); } // [[Rcpp::export]] -int ChunkedArray__null_count(const std::shared_ptr& chunked_array){ +int ChunkedArray__null_count(const std::shared_ptr& chunked_array) { return chunked_array->null_count(); } // [[Rcpp::export]] -int ChunkedArray__num_chunks(const std::shared_ptr& chunked_array){ +int ChunkedArray__num_chunks(const std::shared_ptr& chunked_array) { return chunked_array->num_chunks(); } // [[Rcpp::export]] -std::shared_ptr ChunkedArray__chunk(const std::shared_ptr& chunked_array, int i){ +std::shared_ptr ChunkedArray__chunk( + const std::shared_ptr& chunked_array, int i) { return chunked_array->chunk(i); } // [[Rcpp::export]] -List ChunkedArray__chunks(const std::shared_ptr& chunked_array){ +List ChunkedArray__chunks(const std::shared_ptr& chunked_array) { return wrap(chunked_array->chunks()); } // [[Rcpp::export]] -std::shared_ptr ChunkedArray__type(const std::shared_ptr& chunked_array){ +std::shared_ptr ChunkedArray__type( + const std::shared_ptr& chunked_array) { return chunked_array->type(); } // [[Rcpp::export]] -SEXP ChunkedArray__as_vector(const std::shared_ptr& chunked_array){ - switch(chunked_array->type()->id()){ - case Type::INT8: return simple_ChunkedArray_to_Vector(chunked_array); - case Type::INT32: return simple_ChunkedArray_to_Vector(chunked_array); - case Type::DOUBLE: return simple_ChunkedArray_to_Vector(chunked_array); +SEXP ChunkedArray__as_vector(const std::shared_ptr& chunked_array) { + switch (chunked_array->type()->id()) { + case Type::INT8: + return simple_ChunkedArray_to_Vector(chunked_array); + case Type::INT32: + return simple_ChunkedArray_to_Vector(chunked_array); + case Type::DOUBLE: + return simple_ChunkedArray_to_Vector(chunked_array); default: break; } @@ -104,19 +107,21 @@ SEXP ChunkedArray__as_vector(const std::shared_ptr& chunked } // [[Rcpp::export]] -std::shared_ptr ChunkArray__Slice1( const std::shared_ptr& chunked_array, int offset) { +std::shared_ptr ChunkArray__Slice1( + const std::shared_ptr& chunked_array, int offset) { return chunked_array->Slice(offset); } // [[Rcpp::export]] -std::shared_ptr ChunkArray__Slice2( const std::shared_ptr& chunked_array, int offset, int length) { +std::shared_ptr ChunkArray__Slice2( + const std::shared_ptr& chunked_array, int offset, int length) { return chunked_array->Slice(offset, length); } // [[Rcpp::export]] -std::shared_ptr ChunkedArray__from_list(List chunks){ +std::shared_ptr ChunkedArray__from_list(List chunks) { std::vector> vec; - for ( SEXP chunk: chunks) { + for (SEXP chunk : chunks) { vec.push_back(Array__from_vector(chunk)); } return std::make_shared(std::move(vec)); diff --git a/r/src/Column.cpp b/r/src/Column.cpp index 4b0c597f27c24..a4e1a5d967bb6 100644 --- a/r/src/Column.cpp +++ b/r/src/Column.cpp @@ -28,11 +28,13 @@ int Column__null_count(const std::shared_ptr& column) { } // [[Rcpp::export]] -std::shared_ptr Column__type(const std::shared_ptr& column) { +std::shared_ptr Column__type( + const std::shared_ptr& column) { return column->type(); } // [[Rcpp::export]] -std::shared_ptr Column__data(const std::shared_ptr& column) { +std::shared_ptr Column__data( + const std::shared_ptr& column) { return column->data(); } diff --git a/r/src/DataType.cpp b/r/src/DataType.cpp index b038a4956e746..00e12eb1ce418 100644 --- a/r/src/DataType.cpp +++ b/r/src/DataType.cpp @@ -68,7 +68,8 @@ std::shared_ptr Date64__initialize() { return arrow::date64(); std::shared_ptr Null__initialize() { return arrow::null(); } // [[Rcpp::export]] -std::shared_ptr Decimal128Type__initialize(int32_t precision, int32_t scale) { +std::shared_ptr Decimal128Type__initialize(int32_t precision, + int32_t scale) { return arrow::decimal(precision, scale); } @@ -83,7 +84,8 @@ std::shared_ptr Timestamp__initialize1(arrow::TimeUnit::type un } // [[Rcpp::export]] -std::shared_ptr Timestamp__initialize2(arrow::TimeUnit::type unit, const std::string& timezone) { +std::shared_ptr Timestamp__initialize2(arrow::TimeUnit::type unit, + const std::string& timezone) { return arrow::timestamp(unit, timezone); } @@ -112,10 +114,10 @@ SEXP list__(SEXP x) { } template -std::vector> List_to_shared_ptr_vector(List x){ +std::vector> List_to_shared_ptr_vector(List x) { int n = x.size(); std::vector> vec; - for( SEXP element: x){ + for (SEXP element : x) { vec.push_back(as>(element)); } return vec; @@ -137,7 +139,8 @@ std::string DataType__name(const std::shared_ptr& type) { } // [[Rcpp::export]] -bool DataType__Equals(const std::shared_ptr& lhs, const std::shared_ptr& rhs) { +bool DataType__Equals(const std::shared_ptr& lhs, + const std::shared_ptr& rhs) { return lhs->Equals(*rhs); } @@ -172,17 +175,17 @@ std::string ListType__ToString(const std::shared_ptr& type) { } // [[Rcpp::export]] -int FixedWidthType__bit_width(const std::shared_ptr& type){ +int FixedWidthType__bit_width(const std::shared_ptr& type) { return type->bit_width(); } // [[Rcpp::export]] -arrow::DateUnit DateType__unit(const std::shared_ptr& type){ +arrow::DateUnit DateType__unit(const std::shared_ptr& type) { return type->unit(); } // [[Rcpp::export]] -arrow::TimeUnit::type TimeType__unit(const std::shared_ptr& type){ +arrow::TimeUnit::type TimeType__unit(const std::shared_ptr& type) { return type->unit(); } @@ -202,11 +205,12 @@ std::string TimestampType__timezone(const std::shared_ptr& } // [[Rcpp::export]] -arrow::TimeUnit::type TimestampType__unit(const std::shared_ptr& type) { +arrow::TimeUnit::type TimestampType__unit( + const std::shared_ptr& type) { return type->unit(); } // [[Rcpp::export]] -std::string Object__pointer_address(SEXP obj){ +std::string Object__pointer_address(SEXP obj) { return tfm::format("%p", EXTPTR_PTR(obj)); } diff --git a/r/src/MemoryPool.cpp b/r/src/MemoryPool.cpp index ddc1e05485f3c..d0e4c2406d987 100644 --- a/r/src/MemoryPool.cpp +++ b/r/src/MemoryPool.cpp @@ -20,16 +20,17 @@ using namespace Rcpp; // [[Rcpp::export]] -std::shared_ptr MemoryPool__default(){ - return std::shared_ptr(arrow::default_memory_pool(), NoDelete()); +std::shared_ptr MemoryPool__default() { + return std::shared_ptr(arrow::default_memory_pool(), + NoDelete()); } // [[Rcpp::export]] -int MemoryPool__bytes_allocated(std::shared_ptr pool){ +int MemoryPool__bytes_allocated(std::shared_ptr pool) { return pool->bytes_allocated(); } // [[Rcpp::export]] -int MemoryPool__max_memory(std::shared_ptr pool){ +int MemoryPool__max_memory(std::shared_ptr pool) { return pool->max_memory(); } diff --git a/r/src/RecordBatch.cpp b/r/src/RecordBatch.cpp index cfcd2b16610bf..d4bca930693d0 100644 --- a/r/src/RecordBatch.cpp +++ b/r/src/RecordBatch.cpp @@ -15,41 +15,43 @@ // specific language governing permissions and limitations // under the License. -#include "arrow_types.h" #include -#include #include +#include +#include "arrow_types.h" using namespace Rcpp; using namespace arrow; // [[Rcpp::export]] -int RecordBatch__num_columns(const std::shared_ptr& x){ +int RecordBatch__num_columns(const std::shared_ptr& x) { return x->num_columns(); } // [[Rcpp::export]] -int RecordBatch__num_rows(const std::shared_ptr& x){ +int RecordBatch__num_rows(const std::shared_ptr& x) { return x->num_rows(); } // [[Rcpp::export]] -std::shared_ptr RecordBatch__schema(const std::shared_ptr& x){ +std::shared_ptr RecordBatch__schema( + const std::shared_ptr& x) { return x->schema(); } // [[Rcpp::export]] -std::shared_ptr RecordBatch__column(const std::shared_ptr& batch, int i){ +std::shared_ptr RecordBatch__column( + const std::shared_ptr& batch, int i) { return batch->column(i); } // [[Rcpp::export]] -List RecordBatch__to_dataframe(const std::shared_ptr& batch){ +List RecordBatch__to_dataframe(const std::shared_ptr& batch) { int nc = batch->num_columns(); int nr = batch->num_rows(); List tbl(nc); CharacterVector names(nc); - for(int i=0; icolumn(i)); names[i] = batch->column_name(i); } @@ -75,12 +77,14 @@ std::shared_ptr read_record_batch_(std::string path) { } // [[Rcpp::export]] -int RecordBatch__to_file(const std::shared_ptr& batch, std::string path) { +int RecordBatch__to_file(const std::shared_ptr& batch, + std::string path) { std::shared_ptr stream; std::shared_ptr file_writer; R_ERROR_NOT_OK(arrow::io::FileOutputStream::Open(path, &stream)); - R_ERROR_NOT_OK(arrow::ipc::RecordBatchFileWriter::Open(stream.get(), batch->schema(), &file_writer)); + R_ERROR_NOT_OK(arrow::ipc::RecordBatchFileWriter::Open(stream.get(), batch->schema(), + &file_writer)); R_ERROR_NOT_OK(file_writer->WriteRecordBatch(*batch, true)); R_ERROR_NOT_OK(file_writer->Close()); @@ -91,16 +95,17 @@ int RecordBatch__to_file(const std::shared_ptr& batch, std:: } // [[Rcpp::export]] -std::shared_ptr RecordBatch__from_dataframe(DataFrame tbl){ +std::shared_ptr RecordBatch__from_dataframe(DataFrame tbl) { CharacterVector names = tbl.names(); std::vector> fields; std::vector> arrays; int nc = tbl.size(); - for(int i=0; i(std::string(names[i]), arrays[i]->type())); + fields.push_back( + std::make_shared(std::string(names[i]), arrays[i]->type())); } auto schema = std::make_shared(std::move(fields)); @@ -108,19 +113,22 @@ std::shared_ptr RecordBatch__from_dataframe(DataFrame tbl){ } // [[Rcpp::export]] -bool RecordBatch__Equals(const std::shared_ptr& self, const std::shared_ptr& other) { +bool RecordBatch__Equals(const std::shared_ptr& self, + const std::shared_ptr& other) { return self->Equals(*other); } // [[Rcpp::export]] -std::shared_ptr RecordBatch__RemoveColumn(const std::shared_ptr& batch, int i) { +std::shared_ptr RecordBatch__RemoveColumn( + const std::shared_ptr& batch, int i) { std::shared_ptr res; R_ERROR_NOT_OK(batch->RemoveColumn(i, &res)); return res; } // [[Rcpp::export]] -std::string RecordBatch__column_name(const std::shared_ptr& batch, int i) { +std::string RecordBatch__column_name(const std::shared_ptr& batch, + int i) { return batch->column_name(i); } @@ -128,18 +136,20 @@ std::string RecordBatch__column_name(const std::shared_ptr& CharacterVector RecordBatch__names(const std::shared_ptr& batch) { int n = batch->num_columns(); CharacterVector names(n); - for (int i=0; icolumn_name(i); } return names; } // [[Rcpp::export]] -std::shared_ptr RecordBatch__Slice1(std::shared_ptr& self, int offset) { +std::shared_ptr RecordBatch__Slice1( + std::shared_ptr& self, int offset) { return self->Slice(offset); } // [[Rcpp::export]] -std::shared_ptr RecordBatch__Slice2(std::shared_ptr& self, int offset, int length) { +std::shared_ptr RecordBatch__Slice2( + std::shared_ptr& self, int offset, int length) { return self->Slice(offset, length); } diff --git a/r/src/Table.cpp b/r/src/Table.cpp index 5917818ab6c57..52227d586147f 100644 --- a/r/src/Table.cpp +++ b/r/src/Table.cpp @@ -15,35 +15,33 @@ // specific language governing permissions and limitations // under the License. -#include "arrow_types.h" #include -#include #include +#include +#include "arrow_types.h" using namespace Rcpp; using namespace arrow; // [[Rcpp::export]] -std::shared_ptr Table__from_dataframe(DataFrame tbl){ +std::shared_ptr Table__from_dataframe(DataFrame tbl) { auto rb = RecordBatch__from_dataframe(tbl); std::shared_ptr out; - R_ERROR_NOT_OK(arrow::Table::FromRecordBatches({ std::move(rb) }, &out)); + R_ERROR_NOT_OK(arrow::Table::FromRecordBatches({std::move(rb)}, &out)); return out; } // [[Rcpp::export]] -int Table__num_columns(const std::shared_ptr& x){ +int Table__num_columns(const std::shared_ptr& x) { return x->num_columns(); } // [[Rcpp::export]] -int Table__num_rows(const std::shared_ptr& x){ - return x->num_rows(); -} +int Table__num_rows(const std::shared_ptr& x) { return x->num_rows(); } // [[Rcpp::export]] -std::shared_ptr Table__schema(const std::shared_ptr& x){ +std::shared_ptr Table__schema(const std::shared_ptr& x) { return x->schema(); } @@ -53,7 +51,8 @@ int Table__to_file(const std::shared_ptr& table, std::string path) std::shared_ptr file_writer; R_ERROR_NOT_OK(arrow::io::FileOutputStream::Open(path, &stream)); - R_ERROR_NOT_OK(arrow::ipc::RecordBatchFileWriter::Open(stream.get(), table->schema(), &file_writer)); + R_ERROR_NOT_OK(arrow::ipc::RecordBatchFileWriter::Open(stream.get(), table->schema(), + &file_writer)); R_ERROR_NOT_OK(file_writer->WriteTable(*table)); R_ERROR_NOT_OK(file_writer->Close()); @@ -73,23 +72,23 @@ std::shared_ptr read_table_(std::string path) { int num_batches = rbf_reader->num_record_batches(); std::vector> batches(num_batches); - for (int i=0; iReadRecordBatch(i, &batches[i])); } std::shared_ptr table; - R_ERROR_NOT_OK(arrow::Table::FromRecordBatches(std::move(batches), &table)) ; + R_ERROR_NOT_OK(arrow::Table::FromRecordBatches(std::move(batches), &table)); R_ERROR_NOT_OK(stream->Close()); return table; } // [[Rcpp::export]] -List Table__to_dataframe(const std::shared_ptr& table){ +List Table__to_dataframe(const std::shared_ptr& table) { int nc = table->num_columns(); int nr = table->num_rows(); List tbl(nc); CharacterVector names(nc); - for(int i=0; icolumn(i); tbl[i] = ChunkedArray__as_vector(column->data()); names[i] = column->name(); @@ -101,6 +100,7 @@ List Table__to_dataframe(const std::shared_ptr& table){ } // [[Rcpp::export]] -std::shared_ptr Table__column(const std::shared_ptr& table, int i) { +std::shared_ptr Table__column(const std::shared_ptr& table, + int i) { return table->column(i); } diff --git a/r/src/array.cpp b/r/src/array.cpp index 3a6b60e569bb4..02c52aad35aec 100644 --- a/r/src/array.cpp +++ b/r/src/array.cpp @@ -20,30 +20,27 @@ using namespace Rcpp; using namespace arrow; -namespace arrow{ -namespace r{ +namespace arrow { +namespace r { -template > +template > class SimpleRBuffer : public arrow::Buffer { -public: + public: + SimpleRBuffer(Vec vec) + : Buffer(reinterpret_cast(vec.begin()), + vec.size() * sizeof(typename Vec::stored_type)), + vec_(vec) {} - SimpleRBuffer(Vec vec) : - Buffer(reinterpret_cast(vec.begin()), vec.size() * sizeof(typename Vec::stored_type)), - vec_(vec) - {} - -private: + private: // vec_ holds the memory Vec vec_; }; template -std::shared_ptr SimpleArray(SEXP x){ +std::shared_ptr SimpleArray(SEXP x) { Rcpp::Vector vec(x); - std::vector> buffers { - nullptr, - std::make_shared>(vec) - }; + std::vector> buffers{ + nullptr, std::make_shared>(vec)}; int null_count = 0; if (RTYPE != RAWSXP) { @@ -53,18 +50,19 @@ std::shared_ptr SimpleArray(SEXP x){ if (first_na < vec.end()) { R_ERROR_NOT_OK(arrow::AllocateBuffer(vec.size(), &null_bitmap)); auto null_bitmap_data = null_bitmap->mutable_data(); - arrow::internal::FirstTimeBitmapWriter bitmap_writer(null_bitmap_data, 0, vec.size()); + arrow::internal::FirstTimeBitmapWriter bitmap_writer(null_bitmap_data, 0, + vec.size()); // first loop to clear all the bits before the first NA auto j = std::distance(vec.begin(), first_na); int i = 0; - for( ; i < j; i++, bitmap_writer.Next()) { + for (; i < j; i++, bitmap_writer.Next()) { bitmap_writer.Set(); } // then finish - for( ; i < vec.size(); i++, bitmap_writer.Next()) { - if (Rcpp::Vector::is_na(vec[i]) ) { + for (; i < vec.size(); i++, bitmap_writer.Next()) { + if (Rcpp::Vector::is_na(vec[i])) { bitmap_writer.Clear(); null_count++; } else { @@ -78,31 +76,27 @@ std::shared_ptr SimpleArray(SEXP x){ } auto data = ArrayData::Make( - std::make_shared(), - LENGTH(x), - std::move(buffers), - null_count, - 0 /*offset*/ + std::make_shared(), LENGTH(x), std::move(buffers), null_count, 0 /*offset*/ ); // return the right Array class return std::make_shared>(data); } -} -} +} // namespace r +} // namespace arrow // [[Rcpp::export]] -std::shared_ptr Array__from_vector(SEXP x){ - switch(TYPEOF(x)){ +std::shared_ptr Array__from_vector(SEXP x) { + switch (TYPEOF(x)) { case INTSXP: if (Rf_isFactor(x)) { break; } - return arrow::r::SimpleArray(x); + return arrow::r::SimpleArray(x); case REALSXP: // TODO: Dates, ... - return arrow::r::SimpleArray(x); + return arrow::r::SimpleArray(x); case RAWSXP: return arrow::r::SimpleArray(x); default: @@ -113,11 +107,11 @@ std::shared_ptr Array__from_vector(SEXP x){ return nullptr; } - template -inline SEXP simple_Array_to_Vector(const std::shared_ptr& array ){ +inline SEXP simple_Array_to_Vector(const std::shared_ptr& array) { using stored_type = typename Rcpp::Vector::stored_type; - auto start = reinterpret_cast(array->data()->buffers[1]->data() + array->offset() * sizeof(stored_type)); + auto start = reinterpret_cast( + array->data()->buffers[1]->data() + array->offset() * sizeof(stored_type)); size_t n = array->length(); Rcpp::Vector vec(start, start + n); @@ -125,10 +119,9 @@ inline SEXP simple_Array_to_Vector(const std::shared_ptr& array ){ // TODO: not sure what to do with RAWSXP since // R raw vector do not have a concept of missing data - arrow::internal::BitmapReader bitmap_reader( - array->null_bitmap()->data(), array->offset(), n - ); - for (size_t i=0; i < n; i++, bitmap_reader.Next()) { + arrow::internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), + array->offset(), n); + for (size_t i = 0; i < n; i++, bitmap_reader.Next()) { if (bitmap_reader.IsNotSet()) { vec[i] = Rcpp::Vector::get_na(); } @@ -139,13 +132,16 @@ inline SEXP simple_Array_to_Vector(const std::shared_ptr& array ){ } // [[Rcpp::export]] -SEXP Array__as_vector(const std::shared_ptr& array){ - switch(array->type_id()){ - case Type::INT8: return simple_Array_to_Vector(array); - case Type::INT32: return simple_Array_to_Vector(array); - case Type::DOUBLE: return simple_Array_to_Vector(array); - default: - break; +SEXP Array__as_vector(const std::shared_ptr& array) { + switch (array->type_id()) { + case Type::INT8: + return simple_Array_to_Vector(array); + case Type::INT32: + return simple_Array_to_Vector(array); + case Type::DOUBLE: + return simple_Array_to_Vector(array); + default: + break; } stop(tfm::format("cannot handle Array of type %d", array->type_id())); @@ -153,72 +149,71 @@ SEXP Array__as_vector(const std::shared_ptr& array){ } // [[Rcpp::export]] -std::shared_ptr Array__Slice1(const std::shared_ptr& array, int offset) { +std::shared_ptr Array__Slice1(const std::shared_ptr& array, + int offset) { return array->Slice(offset); } // [[Rcpp::export]] -std::shared_ptr Array__Slice2(const std::shared_ptr& array, int offset, int length) { +std::shared_ptr Array__Slice2(const std::shared_ptr& array, + int offset, int length) { return array->Slice(offset, length); } // [[Rcpp::export]] -bool Array__IsNull(const std::shared_ptr& x, int i){ - return x->IsNull(i); -} +bool Array__IsNull(const std::shared_ptr& x, int i) { return x->IsNull(i); } // [[Rcpp::export]] -bool Array__IsValid(const std::shared_ptr& x, int i){ +bool Array__IsValid(const std::shared_ptr& x, int i) { return x->IsValid(i); } // [[Rcpp::export]] -int Array__length(const std::shared_ptr& x){ - return x->length(); -} +int Array__length(const std::shared_ptr& x) { return x->length(); } // [[Rcpp::export]] -int Array__offset(const std::shared_ptr& x){ - return x->offset(); -} +int Array__offset(const std::shared_ptr& x) { return x->offset(); } // [[Rcpp::export]] -int Array__null_count(const std::shared_ptr& x){ - return x->null_count(); -} +int Array__null_count(const std::shared_ptr& x) { return x->null_count(); } // [[Rcpp::export]] -std::shared_ptr Array__type(const std::shared_ptr& x){ +std::shared_ptr Array__type(const std::shared_ptr& x) { return x->type(); } // [[Rcpp::export]] -std::string Array__ToString(const std::shared_ptr& x){ +std::string Array__ToString(const std::shared_ptr& x) { return x->ToString(); } // [[Rcpp::export]] -arrow::Type::type Array__type_id(const std::shared_ptr& x){ +arrow::Type::type Array__type_id(const std::shared_ptr& x) { return x->type_id(); } // [[Rcpp::export]] -bool Array__Equals(const std::shared_ptr& lhs, const std::shared_ptr& rhs){ +bool Array__Equals(const std::shared_ptr& lhs, + const std::shared_ptr& rhs) { return lhs->Equals(rhs); } // [[Rcpp::export]] -bool Array__ApproxEquals(const std::shared_ptr& lhs, const std::shared_ptr& rhs){ +bool Array__ApproxEquals(const std::shared_ptr& lhs, + const std::shared_ptr& rhs) { return lhs->ApproxEquals(rhs); } // [[Rcpp::export]] -std::shared_ptr Array__data(const std::shared_ptr& array){ +std::shared_ptr Array__data( + const std::shared_ptr& array) { return array->data(); } // [[Rcpp::export]] -bool Array__RangeEquals(const std::shared_ptr& self, const std::shared_ptr&other, int start_idx, int end_idx, int other_start_idx) { +bool Array__RangeEquals(const std::shared_ptr& self, + const std::shared_ptr& other, int start_idx, + int end_idx, int other_start_idx) { return self->RangeEquals(*other, start_idx, end_idx, other_start_idx); } @@ -230,8 +225,9 @@ LogicalVector Array__Mask(const std::shared_ptr& array) { auto n = array->length(); LogicalVector res(no_init(n)); - arrow::internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), array->offset(), n); - for (size_t i=0; i < array->length(); i++, bitmap_reader.Next()) { + arrow::internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), + array->offset(), n); + for (size_t i = 0; i < array->length(); i++, bitmap_reader.Next()) { res[i] = bitmap_reader.IsSet(); } return res; diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index 66754edd36550..ad49c90a5f55d 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -23,39 +23,42 @@ #include #include -#define R_ERROR_NOT_OK(s) do { if(!s.ok()) Rcpp::stop(s.ToString()); } while (0); +#define R_ERROR_NOT_OK(s) \ + do { \ + if (!s.ok()) Rcpp::stop(s.ToString()); \ + } while (0); template -struct NoDelete{ +struct NoDelete { inline void operator()(T* ptr){}; }; -namespace Rcpp{ -namespace traits{ +namespace Rcpp { +namespace traits { -struct wrap_type_shared_ptr_tag{}; -struct wrap_type_static_ptr_tag{}; +struct wrap_type_shared_ptr_tag {}; +struct wrap_type_static_ptr_tag {}; template -struct wrap_type_traits>{ +struct wrap_type_traits> { using wrap_category = wrap_type_shared_ptr_tag; }; template class Exporter>; -} -namespace internal{ +} // namespace traits +namespace internal { template -inline SEXP wrap_dispatch(const T& x, Rcpp::traits::wrap_type_shared_ptr_tag) ; +inline SEXP wrap_dispatch(const T& x, Rcpp::traits::wrap_type_shared_ptr_tag); template -inline SEXP wrap_dispatch(const T& x, Rcpp::traits::wrap_type_static_ptr_tag) ; +inline SEXP wrap_dispatch(const T& x, Rcpp::traits::wrap_type_static_ptr_tag); -} +} // namespace internal -} +} // namespace Rcpp #include @@ -64,43 +67,40 @@ RCPP_EXPOSED_ENUM_NODECL(arrow::DateUnit) RCPP_EXPOSED_ENUM_NODECL(arrow::TimeUnit::type) RCPP_EXPOSED_ENUM_NODECL(arrow::StatusCode) -namespace Rcpp{ -namespace traits{ +namespace Rcpp { +namespace traits { template class Exporter> { -public: - Exporter(SEXP self) : xp(extract_xp(self)){} + public: + Exporter(SEXP self) : xp(extract_xp(self)) {} - inline std::shared_ptr get(){ - return *Rcpp::XPtr>(xp); - } + inline std::shared_ptr get() { return *Rcpp::XPtr>(xp); } -private: + private: SEXP xp; - SEXP extract_xp(SEXP self){ + SEXP extract_xp(SEXP self) { static SEXP symb_xp = Rf_install(".:xp:."); - return Rf_findVarInFrame(self, symb_xp) ; + return Rf_findVarInFrame(self, symb_xp); } - }; -} +} // namespace traits -namespace internal{ +namespace internal { template -inline SEXP wrap_dispatch(const T& x, Rcpp::traits::wrap_type_shared_ptr_tag){ - return Rcpp::XPtr>(new std::shared_ptr(x)); +inline SEXP wrap_dispatch(const T& x, Rcpp::traits::wrap_type_shared_ptr_tag) { + return Rcpp::XPtr>( + new std::shared_ptr(x)); } -} +} // namespace internal -} +} // namespace Rcpp SEXP ChunkedArray__as_vector(const std::shared_ptr& chunked_array); SEXP Array__as_vector(const std::shared_ptr& array); std::shared_ptr Array__from_vector(SEXP x); std::shared_ptr RecordBatch__from_dataframe(Rcpp::DataFrame tbl); - diff --git a/r/src/field.cpp b/r/src/field.cpp index 7ea96a33e2dee..88ae286babfa9 100644 --- a/r/src/field.cpp +++ b/r/src/field.cpp @@ -22,7 +22,9 @@ using namespace Rcpp; // [[Rcpp::export]] -std::shared_ptr Field__initialize(const std::string& name, const std::shared_ptr& type, bool nullable = true) { +std::shared_ptr Field__initialize( + const std::string& name, const std::shared_ptr& type, + bool nullable = true) { return arrow::field(name, type, nullable); } @@ -32,11 +34,7 @@ std::string Field__ToString(const std::shared_ptr& type) { } // [[Rcpp::export]] -std::string Field__name(std::shared_ptr type) { - return type->name(); -} +std::string Field__name(std::shared_ptr type) { return type->name(); } // [[Rcpp::export]] -bool Field__nullable(std::shared_ptr type) { - return type->nullable(); -} +bool Field__nullable(std::shared_ptr type) { return type->nullable(); }