Skip to content

Commit

Permalink
Migrate random.cpp to cpp11
Browse files Browse the repository at this point in the history
Todo:  migrate Range in random.cpp

- disambiguates `stop()` to `Rcpp::stop()` throughout. `cpp11::stop()` doesn't seem to throw errors that testthat sees as errors.
- Updates cpp11 / Rcpp setup, removing init.c
- Need to call `Rcpp::compileAttributes()` and `devtools::document()` as functions are migrated.
  • Loading branch information
jayhesselberth committed Jan 20, 2025
1 parent 5c23a55 commit 6469b35
Show file tree
Hide file tree
Showing 14 changed files with 100 additions and 166 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ Suggests:
tidyr
biocViews:
LinkingTo:
Rcpp (>= 1.0.0)
Rcpp (>= 1.0.0),
cpp11
VignetteBuilder:
knitr
Encoding: UTF-8
Expand Down
4 changes: 0 additions & 4 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,6 @@ partition_impl <- function(gdf, max_dist = -1L) {
.Call(`_valr_partition_impl`, gdf, max_dist)
}

random_impl <- function(genome, length, n, seed = 0L) {
.Call(`_valr_random_impl`, genome, length, n, seed)
}

shuffle_impl <- function(df, incl, within = FALSE, max_tries = 1000L, seed = 0L) {
.Call(`_valr_shuffle_impl`, df, incl, within, max_tries, seed)
}
Expand Down
5 changes: 5 additions & 0 deletions R/cpp11.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Generated by cpp11: do not edit by hand

random_impl <- function(genome, length, n, seed) {
.Call(`_valr_random_impl`, genome, length, n, seed)
}
2 changes: 1 addition & 1 deletion inst/include/genome.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ inline genome_map_t makeChromSizes(DataFrame genome,
IntegerVector sizes = genome[col_size] ;

if (unique(refs).length() != refs.length())
stop("duplicate reference names in genome file.") ;
Rcpp::stop("duplicate reference names in genome file.") ;

Check warning on line 27 in inst/include/genome.h

View check run for this annotation

Codecov / codecov/patch

inst/include/genome.h#L27

Added line #L27 was not covered by tests

int nchrom = genome.nrows() ;
for (int i = 0; i < nchrom; ++i) {
Expand Down
2 changes: 1 addition & 1 deletion inst/include/group_apply.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ inline void GroupApply(const ValrGroupedDataFrame& x,
int ng_y = shared_grps_y.size() ;

if (ng_x != ng_y) {
stop("incompatible groups found between x and y dataframes") ;
Rcpp::stop("incompatible groups found between x and y dataframes") ;

Check warning on line 31 in inst/include/group_apply.h

View check run for this annotation

Codecov / codecov/patch

inst/include/group_apply.h#L31

Added line #L31 was not covered by tests
}

// access the group .rows list
Expand Down
5 changes: 3 additions & 2 deletions inst/include/valr.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@
#ifndef valr__valr_H
#define valr__valr_H

// [[Rcpp::plugins(cpp11)]]

#include <Rcpp.h>
using namespace Rcpp ;

#include <cpp11.hpp>
using namespace cpp11;

#include "utils.h"
#include "grouped_dataframe.h"
#include "IntervalTree.h"
Expand Down
14 changes: 0 additions & 14 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,20 +166,6 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// random_impl
DataFrame random_impl(DataFrame genome, int length, int n, int seed);
RcppExport SEXP _valr_random_impl(SEXP genomeSEXP, SEXP lengthSEXP, SEXP nSEXP, SEXP seedSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< DataFrame >::type genome(genomeSEXP);
Rcpp::traits::input_parameter< int >::type length(lengthSEXP);
Rcpp::traits::input_parameter< int >::type n(nSEXP);
Rcpp::traits::input_parameter< int >::type seed(seedSEXP);
rcpp_result_gen = Rcpp::wrap(random_impl(genome, length, n, seed));
return rcpp_result_gen;
END_RCPP
}
// shuffle_impl
DataFrame shuffle_impl(DataFrame df, DataFrame incl, bool within, int max_tries, int seed);
RcppExport SEXP _valr_shuffle_impl(SEXP dfSEXP, SEXP inclSEXP, SEXP withinSEXP, SEXP max_triesSEXP, SEXP seedSEXP) {
Expand Down
3 changes: 1 addition & 2 deletions src/closest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ DataFrame closest_impl(ValrGroupedDataFrame x, ValrGroupedDataFrame y,
int ng_y = grp_idx_y.size() ;

if (ng_x != ng_y) {
stop("incompatible groups found between x and y dataframes") ;
Rcpp::stop("incompatible groups found between x and y dataframes") ;

Check warning on line 236 in src/closest.cpp

View check run for this annotation

Codecov / codecov/patch

src/closest.cpp#L236

Added line #L236 was not covered by tests
}

// access the group .rows list
Expand Down Expand Up @@ -297,4 +297,3 @@ DataFrame closest_impl(ValrGroupedDataFrame x, ValrGroupedDataFrame y,
return res ;

}

57 changes: 57 additions & 0 deletions src/cpp11.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Generated by cpp11: do not edit by hand
// clang-format off

#include <cpp11/R.hpp>
#include <Rcpp.h>
using namespace Rcpp;
#include "cpp11/declarations.hpp"
#include <R_ext/Visibility.h>

// random.cpp
writable::data_frame random_impl(DataFrame genome, int length, int n, int seed);
extern "C" SEXP _valr_random_impl(SEXP genome, SEXP length, SEXP n, SEXP seed) {
BEGIN_CPP11
return cpp11::as_sexp(random_impl(cpp11::as_cpp<cpp11::decay_t<DataFrame>>(genome), cpp11::as_cpp<cpp11::decay_t<int>>(length), cpp11::as_cpp<cpp11::decay_t<int>>(n), cpp11::as_cpp<cpp11::decay_t<int>>(seed)));
END_CPP11
}

extern "C" {
/* .Call calls */
extern SEXP _valr_bed12toexons_impl(SEXP);
extern SEXP _valr_closest_impl(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP _valr_complement_impl(SEXP, SEXP);
extern SEXP _valr_coverage_impl(SEXP, SEXP, SEXP, SEXP);
extern SEXP _valr_dist_impl(SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP _valr_flank_impl(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP _valr_gcoverage_impl(SEXP, SEXP);
extern SEXP _valr_intersect_impl(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP _valr_makewindows_impl(SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP _valr_merge_impl(SEXP, SEXP, SEXP);
extern SEXP _valr_partition_impl(SEXP, SEXP);
extern SEXP _valr_shuffle_impl(SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP _valr_subtract_impl(SEXP, SEXP, SEXP, SEXP);

static const R_CallMethodDef CallEntries[] = {
{"_valr_bed12toexons_impl", (DL_FUNC) &_valr_bed12toexons_impl, 1},
{"_valr_closest_impl", (DL_FUNC) &_valr_closest_impl, 6},
{"_valr_complement_impl", (DL_FUNC) &_valr_complement_impl, 2},
{"_valr_coverage_impl", (DL_FUNC) &_valr_coverage_impl, 4},
{"_valr_dist_impl", (DL_FUNC) &_valr_dist_impl, 5},
{"_valr_flank_impl", (DL_FUNC) &_valr_flank_impl, 8},
{"_valr_gcoverage_impl", (DL_FUNC) &_valr_gcoverage_impl, 2},
{"_valr_intersect_impl", (DL_FUNC) &_valr_intersect_impl, 7},
{"_valr_makewindows_impl", (DL_FUNC) &_valr_makewindows_impl, 5},
{"_valr_merge_impl", (DL_FUNC) &_valr_merge_impl, 3},
{"_valr_partition_impl", (DL_FUNC) &_valr_partition_impl, 2},
{"_valr_random_impl", (DL_FUNC) &_valr_random_impl, 4},
{"_valr_shuffle_impl", (DL_FUNC) &_valr_shuffle_impl, 5},
{"_valr_subtract_impl", (DL_FUNC) &_valr_subtract_impl, 4},
{NULL, NULL, 0}
};
}

extern "C" attribute_visible void R_init_valr(DllInfo* dll){
R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
R_useDynamicSymbols(dll, FALSE);
R_forceSymbols(dll, TRUE);
}
28 changes: 5 additions & 23 deletions src/gcoverage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ posTracker collatePositions(const IntegerVector& starts,
auto n = starts.size() ;

if (n != ends.size()) {
stop("incompatible start and end vector supplied") ;
cpp11::stop("incompatible start and end vector supplied") ;

Check warning on line 24 in src/gcoverage.cpp

View check run for this annotation

Codecov / codecov/patch

src/gcoverage.cpp#L24

Added line #L24 was not covered by tests
}

for (int i = 0; i < n; i++) {
Expand All @@ -46,7 +46,7 @@ DataFrame gcoverage_impl(const ValrGroupedDataFrame& gdf,
ListView idx(gdf.indices()) ;

if(max_coords.size() != ng) {
stop("max_coords must equal the number of groups in data.frame");
cpp11::stop("max_coords must equal the number of groups in data.frame");

Check warning on line 49 in src/gcoverage.cpp

View check run for this annotation

Codecov / codecov/patch

src/gcoverage.cpp#L49

Added line #L49 was not covered by tests
}

std::vector<int> out_indices, depths, starts, ends;
Expand Down Expand Up @@ -74,9 +74,9 @@ DataFrame gcoverage_impl(const ValrGroupedDataFrame& gdf,

for (auto p:pos) {
if (p.first > max_coord) {
warning("Out of bounds interval detected at position: %s \n"
" Out of bounds intervals will be ignored",
p.first);
Rcpp::warning(
"Out of bounds interval detected and will be ignored"
);
break;
}

Expand Down Expand Up @@ -113,21 +113,3 @@ DataFrame gcoverage_impl(const ValrGroupedDataFrame& gdf,

return subset_x ;
}

/*** R
library(dplyr)
x <- tibble::tribble(
~chrom, ~start, ~end, ~name, ~score, ~strand,
"chr1", 20, 70, 6, 25, "+",
"chr1", 50, 100, 1, 25, "-",
"chr1", 200, 250, 3, 25, "+",
"chr1", 220, 250, 3, 25, "+",
"chr2", 80, 130, 5, 25, "-",
"chr2", 150, 200, 4, 25, "+",
"chr2", 180, 230, 2, 25, "-",
"chr2", 190, 230, 2, 25, "-"
) |> group_by(chrom)
gcoverage_impl(x, max_coords = c(1000, 500)) |> as.data.frame()
*/
48 changes: 0 additions & 48 deletions src/init.c

This file was deleted.

56 changes: 22 additions & 34 deletions src/random.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,27 @@

#include "valr.h"

// [[Rcpp::export]]
DataFrame random_impl(DataFrame genome, int length, int n, int seed = 0) {
[[cpp11::register]]
writable::data_frame random_impl(DataFrame genome, int length, int n, int seed = 0) {

CharacterVector chroms = genome["chrom"] ;
NumericVector sizes = genome["size"] ;
std::vector<std::string> chroms = genome["chrom"] ;
std::vector<int> sizes = genome["size"] ;

int nchrom = chroms.size() ;

if (seed == 0)
seed = round(R::runif(0, RAND_MAX)) ;
seed = round(Rf_runif(0, RAND_MAX)) ;

// seed the generator
auto generator = ENGINE(seed) ;

// calculate weights for chrom distribution
float mass = sum(sizes) ;
NumericVector weights = sizes / mass ;
double mass = std::accumulate(sizes.begin(), sizes.end(), 0); ;

std::vector<double> weights(nchrom) ;
for (int i = 0; i < nchrom; ++i) {
weights[i] = sizes[i] / mass ;
}

Range chromidx(0, nchrom) ;
PCONST_DIST chrom_dist(chromidx.begin(), chromidx.end(), weights.begin()) ;
Expand All @@ -41,8 +45,8 @@ DataFrame random_impl(DataFrame genome, int length, int n, int seed = 0) {
size_rngs.push_back(size_dist) ;
}

CharacterVector rand_chroms(n) ;
IntegerVector rand_starts(n) ;
std::vector<std::string> rand_chroms(n) ;
std::vector<int> rand_starts(n) ;

for (int i = 0; i < n; ++i) {

Expand All @@ -55,31 +59,15 @@ DataFrame random_impl(DataFrame genome, int length, int n, int seed = 0) {
rand_starts[i] = rand_start ;
}

IntegerVector rand_ends = rand_starts + length ;
std::vector<int> rand_ends(rand_starts.size()) ;
for (int i = 0; i < rand_starts.size(); ++i) {
rand_ends[i] = rand_starts[i] + length ;
}

return DataFrame::create(_("chrom") = rand_chroms,
_("start") = rand_starts,
_("end") = rand_ends,
_("stringsAsFactors") = false) ;
return writable::data_frame({
"chrom"_nm = rand_chroms,
"start"_nm = rand_starts,
"end"_nm = rand_ends,
});

}

/***R
library(dplyr)
genome <- tibble::tribble(
~chrom, ~size,
"chr1", 191822,
"chr2", 17127713,
"chr3", 11923987
)
# show chrom disribution
random_impl(genome, length = 1000, n = 1e6, seed = 0) %>%
group_by(chrom) %>% summarize(n = n())
library(microbenchmark)
microbenchmark(
random_impl(genome, length = 1000, n = 1e6, seed = 0),
times = 10
)
*/
34 changes: 1 addition & 33 deletions src/shuffle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ DataFrame shuffle_impl(DataFrame df, DataFrame incl, bool within = false,
niter++ ;
if (niter > max_tries) {
// tried too many times to find an overlap, bail
stop("maximum iterations exceeded in bed_shuffle") ;
Rcpp::stop("maximum iterations exceeded in bed_shuffle") ;
}

// get a random interval index
Expand Down Expand Up @@ -264,35 +264,3 @@ DataFrame shuffle_impl(DataFrame df, DataFrame incl, bool within = false,
_("end") = ends_out,
_("stringsAsFactors") = false) ;
}

/***R
library(dplyr)
library(valr)
library(testthat)
library(microbenchmark)
genome <- tibble::tribble(
~chrom, ~size,
"chr1", 50000000,
"chr2", 60000000,
"chr3", 80000000
)
incl <- tibble::tribble(
~chrom, ~start, ~end,
"chr1", 1, 5000000,
"chr1", 5000000, 50000000,
"chr2", 1, 60000000,
"chr3", 1, 80000000
)
x <- bed_random(genome, n = 100) %>% bed_sort()
shuffle_impl(x, incl) %>%
group_by(chrom) %>%
summarize(count = n())
library(microbenchmark)
# microbenchmark(shuffle_impl(x, incl), n = 10, unit = 's')
*/
Loading

0 comments on commit 6469b35

Please sign in to comment.