Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #406 #430

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp11test/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ Suggests:
xml2
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.1
RoxygenNote: 7.3.2
20 changes: 20 additions & 0 deletions cpp11test/R/cpp11.R
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,26 @@ string_push_back_ <- function() {
.Call(`_cpp11test_string_push_back_`)
}

grow_strings_cpp11_ <- function(n, seed) {
.Call(`_cpp11test_grow_strings_cpp11_`, n, seed)
}

grow_strings_rcpp_ <- function(n, seed) {
.Call(`_cpp11test_grow_strings_rcpp_`, n, seed)
}

grow_strings_manual_ <- function(n, seed) {
.Call(`_cpp11test_grow_strings_manual_`, n, seed)
}

assign_cpp11_ <- function(n, seed) {
.Call(`_cpp11test_assign_cpp11_`, n, seed)
}

assign_rcpp_ <- function(n, seed) {
.Call(`_cpp11test_assign_rcpp_`, n, seed)
}

sum_dbl_for_ <- function(x) {
.Call(`_cpp11test_sum_dbl_for_`, x)
}
Expand Down
35 changes: 35 additions & 0 deletions cpp11test/bench/strings.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
pkgload::load_all("cpp11test")

bench::press(len = as.integer(10^(0:6)), {
bench::mark(
assign_cpp11_(n = len, 123L),
assign_rcpp_(n = len, 123L),
iterations = 20
)
})[c("expression", "len", "min", "mem_alloc", "n_itr", "n_gc")]

# Longer benchmark, lots of gc
len <- as.integer(10^7)
bench::mark(
cpp11 = cpp11_push_and_truncate_(len),
rcpp = rcpp_push_and_truncate_(len),
min_iterations = 200
)[c("expression", "min", "mem_alloc", "n_itr", "n_gc")]

bench::press(len = as.integer(10^(0:6)), {
bench::mark(
grow_strings_cpp11_(len, 123L),
grow_strings_rcpp_(len, 123L),
grow_strings_manual_(len, 123L),
iterations = 20
)
})[c("expression", "len", "min", "mem_alloc", "n_itr", "n_gc")]

# Longer benchmark, lots of gc
len <- as.integer(10^7)
bench::mark(
cpp11 = cpp11_grow_strings_(len),
rcpp = rcpp_grow_strings_(len),
manual = manual_grow_strings_(len),
min_iterations = 200
)[c("expression", "min", "mem_alloc", "n_itr", "n_gc")]
40 changes: 40 additions & 0 deletions cpp11test/src/cpp11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,41 @@ extern "C" SEXP _cpp11test_string_push_back_() {
return cpp11::as_sexp(string_push_back_());
END_CPP11
}
// strings.cpp
cpp11::strings grow_strings_cpp11_(size_t n, int seed);
extern "C" SEXP _cpp11test_grow_strings_cpp11_(SEXP n, SEXP seed) {
BEGIN_CPP11
return cpp11::as_sexp(grow_strings_cpp11_(cpp11::as_cpp<cpp11::decay_t<size_t>>(n), cpp11::as_cpp<cpp11::decay_t<int>>(seed)));
END_CPP11
}
// strings.cpp
Rcpp::CharacterVector grow_strings_rcpp_(size_t n, int seed);
extern "C" SEXP _cpp11test_grow_strings_rcpp_(SEXP n, SEXP seed) {
BEGIN_CPP11
return cpp11::as_sexp(grow_strings_rcpp_(cpp11::as_cpp<cpp11::decay_t<size_t>>(n), cpp11::as_cpp<cpp11::decay_t<int>>(seed)));
END_CPP11
}
// strings.cpp
SEXP grow_strings_manual_(size_t n, int seed);
extern "C" SEXP _cpp11test_grow_strings_manual_(SEXP n, SEXP seed) {
BEGIN_CPP11
return cpp11::as_sexp(grow_strings_manual_(cpp11::as_cpp<cpp11::decay_t<size_t>>(n), cpp11::as_cpp<cpp11::decay_t<int>>(seed)));
END_CPP11
}
// strings.cpp
cpp11::strings assign_cpp11_(size_t n, int seed);
extern "C" SEXP _cpp11test_assign_cpp11_(SEXP n, SEXP seed) {
BEGIN_CPP11
return cpp11::as_sexp(assign_cpp11_(cpp11::as_cpp<cpp11::decay_t<size_t>>(n), cpp11::as_cpp<cpp11::decay_t<int>>(seed)));
END_CPP11
}
// strings.cpp
Rcpp::CharacterVector assign_rcpp_(size_t n, int seed);
extern "C" SEXP _cpp11test_assign_rcpp_(SEXP n, SEXP seed) {
BEGIN_CPP11
return cpp11::as_sexp(assign_rcpp_(cpp11::as_cpp<cpp11::decay_t<size_t>>(n), cpp11::as_cpp<cpp11::decay_t<int>>(seed)));
END_CPP11
}
// sum.cpp
double sum_dbl_for_(cpp11::doubles x);
extern "C" SEXP _cpp11test_sum_dbl_for_(SEXP x) {
Expand Down Expand Up @@ -472,6 +507,8 @@ extern "C" {
extern SEXP run_testthat_tests(SEXP);

static const R_CallMethodDef CallEntries[] = {
{"_cpp11test_assign_cpp11_", (DL_FUNC) &_cpp11test_assign_cpp11_, 2},
{"_cpp11test_assign_rcpp_", (DL_FUNC) &_cpp11test_assign_rcpp_, 2},
{"_cpp11test_col_sums", (DL_FUNC) &_cpp11test_col_sums, 1},
{"_cpp11test_cpp11_add_vec_for_", (DL_FUNC) &_cpp11test_cpp11_add_vec_for_, 2},
{"_cpp11test_cpp11_insert_", (DL_FUNC) &_cpp11test_cpp11_insert_, 1},
Expand All @@ -488,6 +525,9 @@ static const R_CallMethodDef CallEntries[] = {
{"_cpp11test_gibbs_rcpp", (DL_FUNC) &_cpp11test_gibbs_rcpp, 2},
{"_cpp11test_gibbs_rcpp2", (DL_FUNC) &_cpp11test_gibbs_rcpp2, 2},
{"_cpp11test_grow_", (DL_FUNC) &_cpp11test_grow_, 1},
{"_cpp11test_grow_strings_cpp11_", (DL_FUNC) &_cpp11test_grow_strings_cpp11_, 2},
{"_cpp11test_grow_strings_manual_", (DL_FUNC) &_cpp11test_grow_strings_manual_, 2},
{"_cpp11test_grow_strings_rcpp_", (DL_FUNC) &_cpp11test_grow_strings_rcpp_, 2},
{"_cpp11test_my_message", (DL_FUNC) &_cpp11test_my_message, 2},
{"_cpp11test_my_message_n1", (DL_FUNC) &_cpp11test_my_message_n1, 1},
{"_cpp11test_my_message_n1fmt", (DL_FUNC) &_cpp11test_my_message_n1fmt, 1},
Expand Down
94 changes: 94 additions & 0 deletions cpp11test/src/strings.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
#include "cpp11/strings.hpp"
#include <random>
#include <vector>

#include <Rcpp.h>

// Test benchmark for string proxy assignment performance.
// We don't unwind_protect() before each `SET_STRING_ELT()` call,
Expand Down Expand Up @@ -33,3 +37,93 @@

return x;
}

// issue 406

std::random_device rd;
std::mt19937 gen(rd());

double random_double() {
std::uniform_real_distribution<double> dist(0.0, 1.0);
return dist(gen);
}

int random_int(int min, int max) {
std::uniform_int_distribution<int> dist(min, max);
return dist(gen);
}

std::string random_string() {
std::string s(10, '\0');
for (size_t i = 0; i < 10; i++) {
s[i] = random_int(0, 25) + 'a';
}
return s;
}

[[cpp11::register]] cpp11::strings grow_strings_cpp11_(size_t n, int seed) {
gen.seed(seed);
cpp11::writable::strings x;
for (size_t i = 0; i < n; ++i) {
x.push_back(random_string());
}
return x;
}

[[cpp11::register]] Rcpp::CharacterVector grow_strings_rcpp_(size_t n, int seed) {
gen.seed(seed);
Rcpp::CharacterVector x(n);
for (size_t i = 0; i < n; ++i) {
x[i] = random_string();
}
return x;
}

[[cpp11::register]] SEXP grow_strings_manual_(size_t n, int seed) {
gen.seed(seed);
SEXP data_ = PROTECT(Rf_allocVector(STRSXP, 0));
size_t size_ = 0;
size_t capacity_ = 0;
for (size_t i = 0; i < n; ++i) {
if (size_ == capacity_) {
capacity_ = capacity_ == 0 ? 1 : capacity_ * 2;
SEXP new_data_ = PROTECT(Rf_allocVector(STRSXP, capacity_));
for (size_t j = 0; j < size_; ++j) {
SET_STRING_ELT(new_data_, j, STRING_ELT(data_, j));
}
UNPROTECT(2);
data_ = PROTECT(new_data_);
}
SET_STRING_ELT(data_, size_++, Rf_mkChar(random_string().c_str()));
}
// copy back down to size
if (size_ < capacity_) {
SEXP new_data_ = PROTECT(Rf_allocVector(STRSXP, size_));
for (size_t j = 0; j < size_; ++j) {
SET_STRING_ELT(new_data_, j, STRING_ELT(data_, j));
}
UNPROTECT(2);
return new_data_;
} else {
UNPROTECT(1);
return data_;
}
}

[[cpp11::register]] cpp11::strings assign_cpp11_(size_t n, int seed) {
gen.seed(seed);
cpp11::writable::strings x(n);
for (size_t i = 0; i < n; ++i) {
x[i] = random_string();
}
return x;
}

[[cpp11::register]] Rcpp::CharacterVector assign_rcpp_(size_t n, int seed) {
gen.seed(seed);
Rcpp::CharacterVector x(n);
for (size_t i = 0; i < n; ++i) {
x[i] = random_string();
}
return x;
}
4 changes: 3 additions & 1 deletion inst/include/cpp11/r_vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,9 @@ class r_vector : public cpp11::r_vector<T> {
proxy at(const r_string& name) const;

void push_back(T value);
/// Implemented in `strings.hpp`
template <typename U = T,
typename std::enable_if<std::is_same<U, r_string>::value>::type* = nullptr>
void push_back(const std::string& value); // Pacha: r_string only (#406)
void push_back(const named_arg& value);
void pop_back();

Expand Down
43 changes: 28 additions & 15 deletions inst/include/cpp11/strings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,24 @@ typedef r_vector<r_string> strings;
namespace writable {

template <>
inline void r_vector<r_string>::set_elt(SEXP x, R_xlen_t i,
typename r_vector::underlying_type value) {
inline void r_vector<r_string>::set_elt(
SEXP x, R_xlen_t i, typename r_vector<r_string>::underlying_type value) {
// NOPROTECT: Likely too costly to unwind protect every set elt
SET_STRING_ELT(x, i, value);
}

// Pacha: Optimized push_back for std::string (borrows from @traversc' push_back_fast)
template <>
template <typename U, typename std::enable_if<std::is_same<U, r_string>::value>::type*>
inline void r_vector<r_string>::push_back(const std::string& value) {
while (this->length_ >= this->capacity_) {
this->reserve(this->capacity_ == 0 ? 1 : this->capacity_ * 2);
}
set_elt(this->data_, this->length_,
Rf_mkCharLenCE(value.c_str(), value.size(), CE_UTF8));
++this->length_;
}

inline bool operator==(const r_vector<r_string>::proxy& lhs, r_string rhs) {
return static_cast<r_string>(lhs).operator==(static_cast<std::string>(rhs).c_str());
}
Expand Down Expand Up @@ -95,17 +107,17 @@ inline SEXP alloc_if_charsxp(const SEXP data) {

template <>
inline r_vector<r_string>::r_vector(const SEXP& data)
: cpp11::r_vector<r_string>(alloc_or_copy(data)), capacity_(length_) {
: cpp11::r_vector<r_string>(alloc_or_copy(data)), capacity_(this->length_) {
if (detail::r_typeof(data) == CHARSXP) {
SET_STRING_ELT(data_, 0, data);
SET_STRING_ELT(this->data_, 0, data);
}
}

template <>
inline r_vector<r_string>::r_vector(SEXP&& data)
: cpp11::r_vector<r_string>(alloc_if_charsxp(data)), capacity_(length_) {
: cpp11::r_vector<r_string>(alloc_if_charsxp(data)), capacity_(this->length_) {
if (detail::r_typeof(data) == CHARSXP) {
SET_STRING_ELT(data_, 0, data);
SET_STRING_ELT(this->data_, 0, data);
}
}

Expand All @@ -117,14 +129,15 @@ inline r_vector<r_string>::r_vector(std::initializer_list<r_string> il)
unwind_protect([&] {
auto it = il.begin();

for (R_xlen_t i = 0; i < capacity_; ++i, ++it) {
for (R_xlen_t i = 0; i < this->capacity_; ++i, ++it) {
// i.e. to `SEXP`
underlying_type elt = static_cast<underlying_type>(*it);
typename r_vector<r_string>::underlying_type elt =
static_cast<typename r_vector<r_string>::underlying_type>(*it);

if (elt == NA_STRING) {
set_elt(data_, i, elt);
set_elt(this->data_, i, elt);
} else {
set_elt(data_, i, Rf_mkCharCE(Rf_translateCharUTF8(elt), CE_UTF8));
set_elt(this->data_, i, Rf_mkCharCE(Rf_translateCharUTF8(elt), CE_UTF8));
}
}
});
Expand All @@ -135,12 +148,12 @@ typedef r_vector<r_string> strings;
template <typename T>
inline void r_vector<T>::push_back(const named_arg& value) {
push_back(value.value());
if (Rf_xlength(names()) == 0) {
cpp11::writable::strings new_nms(size());
names() = new_nms;
if (Rf_xlength(this->names()) == 0) {
cpp11::writable::strings new_nms(this->size());
this->names() = new_nms;
}
cpp11::writable::strings nms(names());
nms[size() - 1] = value.name();
cpp11::writable::strings nms(this->names());
nms[this->size() - 1] = value.name();
}

} // namespace writable
Expand Down
Loading