Skip to content

Commit

Permalink
Migrate flank.cpp and makewindows.cpp
Browse files Browse the repository at this point in the history
Incorporated cpp11-compatible subset_df()

Removed failing flank tests that shouldn't have worked in the first place
  • Loading branch information
jayhesselberth committed Jan 22, 2025
1 parent 93ecbe1 commit 0917569
Show file tree
Hide file tree
Showing 9 changed files with 140 additions and 110 deletions.
8 changes: 0 additions & 8 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@ dist_impl <- function(x, y, x_grp_indexes, y_grp_indexes, distcalc) {
.Call(`_valr_dist_impl`, x, y, x_grp_indexes, y_grp_indexes, distcalc)
}

flank_impl <- function(df, genome, both = 0, left = 0, right = 0, fraction = FALSE, stranded = FALSE, trim = FALSE) {
.Call(`_valr_flank_impl`, df, genome, both, left, right, fraction, stranded, trim)
}

gcoverage_impl <- function(gdf, max_coords) {
.Call(`_valr_gcoverage_impl`, gdf, max_coords)
}
Expand All @@ -33,10 +29,6 @@ intersect_impl <- function(x, y, x_grp_indexes, y_grp_indexes, invert = FALSE, s
.Call(`_valr_intersect_impl`, x, y, x_grp_indexes, y_grp_indexes, invert, suffix_x, suffix_y)
}

makewindows_impl <- function(df, win_size = 0L, num_win = 0L, step_size = 0L, reverse = FALSE) {
.Call(`_valr_makewindows_impl`, df, win_size, num_win, step_size, reverse)
}

merge_impl <- function(gdf, max_dist = 0L, collapse = TRUE) {
.Call(`_valr_merge_impl`, gdf, max_dist, collapse)
}
Expand Down
8 changes: 8 additions & 0 deletions R/cpp11.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Generated by cpp11: do not edit by hand

flank_impl <- function(df, genome, both, left, right, fraction, stranded, trim) {
.Call(`_valr_flank_impl`, df, genome, both, left, right, fraction, stranded, trim)
}

makewindows_impl <- function(df, win_size, num_win, step_size, reverse) {
.Call(`_valr_makewindows_impl`, df, win_size, num_win, step_size, reverse)
}

random_impl <- function(genome, length, n, seed) {
.Call(`_valr_random_impl`, genome, length, n, seed)
}
3 changes: 3 additions & 0 deletions inst/include/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ DataFrame subset_dataframe(const DataFrame& df,
DataFrame subset_dataframe(const DataFrame& df,
IntegerVector indices) ;

writable::data_frame subset_dataframe(const data_frame& df,
std::vector<int> indices) ;

inline DataFrame check_is_grouped(const DataFrame& x) {
bool is_grouped(Rf_inherits(x, "grouped_df")) ;

Expand Down
33 changes: 0 additions & 33 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,24 +79,6 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// flank_impl
DataFrame flank_impl(DataFrame df, DataFrame genome, double both, double left, double right, bool fraction, bool stranded, bool trim);
RcppExport SEXP _valr_flank_impl(SEXP dfSEXP, SEXP genomeSEXP, SEXP bothSEXP, SEXP leftSEXP, SEXP rightSEXP, SEXP fractionSEXP, SEXP strandedSEXP, SEXP trimSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< DataFrame >::type df(dfSEXP);
Rcpp::traits::input_parameter< DataFrame >::type genome(genomeSEXP);
Rcpp::traits::input_parameter< double >::type both(bothSEXP);
Rcpp::traits::input_parameter< double >::type left(leftSEXP);
Rcpp::traits::input_parameter< double >::type right(rightSEXP);
Rcpp::traits::input_parameter< bool >::type fraction(fractionSEXP);
Rcpp::traits::input_parameter< bool >::type stranded(strandedSEXP);
Rcpp::traits::input_parameter< bool >::type trim(trimSEXP);
rcpp_result_gen = Rcpp::wrap(flank_impl(df, genome, both, left, right, fraction, stranded, trim));
return rcpp_result_gen;
END_RCPP
}
// gcoverage_impl
DataFrame gcoverage_impl(const ValrGroupedDataFrame& gdf, const IntegerVector& max_coords);
RcppExport SEXP _valr_gcoverage_impl(SEXP gdfSEXP, SEXP max_coordsSEXP) {
Expand Down Expand Up @@ -126,21 +108,6 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// makewindows_impl
DataFrame makewindows_impl(DataFrame df, int win_size, int num_win, int step_size, bool reverse);
RcppExport SEXP _valr_makewindows_impl(SEXP dfSEXP, SEXP win_sizeSEXP, SEXP num_winSEXP, SEXP step_sizeSEXP, SEXP reverseSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< DataFrame >::type df(dfSEXP);
Rcpp::traits::input_parameter< int >::type win_size(win_sizeSEXP);
Rcpp::traits::input_parameter< int >::type num_win(num_winSEXP);
Rcpp::traits::input_parameter< int >::type step_size(step_sizeSEXP);
Rcpp::traits::input_parameter< bool >::type reverse(reverseSEXP);
rcpp_result_gen = Rcpp::wrap(makewindows_impl(df, win_size, num_win, step_size, reverse));
return rcpp_result_gen;
END_RCPP
}
// merge_impl
DataFrame merge_impl(ValrGroupedDataFrame gdf, int max_dist, bool collapse);
RcppExport SEXP _valr_merge_impl(SEXP gdfSEXP, SEXP max_distSEXP, SEXP collapseSEXP) {
Expand Down
16 changes: 14 additions & 2 deletions src/cpp11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,20 @@ using namespace Rcpp;
#include "cpp11/declarations.hpp"
#include <R_ext/Visibility.h>

// flank.cpp
writable::data_frame flank_impl(data_frame df, data_frame genome, double both, double left, double right, bool fraction, bool stranded, bool trim);
extern "C" SEXP _valr_flank_impl(SEXP df, SEXP genome, SEXP both, SEXP left, SEXP right, SEXP fraction, SEXP stranded, SEXP trim) {
BEGIN_CPP11
return cpp11::as_sexp(flank_impl(cpp11::as_cpp<cpp11::decay_t<data_frame>>(df), cpp11::as_cpp<cpp11::decay_t<data_frame>>(genome), cpp11::as_cpp<cpp11::decay_t<double>>(both), cpp11::as_cpp<cpp11::decay_t<double>>(left), cpp11::as_cpp<cpp11::decay_t<double>>(right), cpp11::as_cpp<cpp11::decay_t<bool>>(fraction), cpp11::as_cpp<cpp11::decay_t<bool>>(stranded), cpp11::as_cpp<cpp11::decay_t<bool>>(trim)));
END_CPP11
}
// makewindows.cpp
writable::data_frame makewindows_impl(data_frame df, int win_size, int num_win, int step_size, bool reverse);
extern "C" SEXP _valr_makewindows_impl(SEXP df, SEXP win_size, SEXP num_win, SEXP step_size, SEXP reverse) {
BEGIN_CPP11
return cpp11::as_sexp(makewindows_impl(cpp11::as_cpp<cpp11::decay_t<data_frame>>(df), cpp11::as_cpp<cpp11::decay_t<int>>(win_size), cpp11::as_cpp<cpp11::decay_t<int>>(num_win), cpp11::as_cpp<cpp11::decay_t<int>>(step_size), cpp11::as_cpp<cpp11::decay_t<bool>>(reverse)));
END_CPP11
}
// random.cpp
writable::data_frame random_impl(data_frame genome, double length, int n, int seed);
extern "C" SEXP _valr_random_impl(SEXP genome, SEXP length, SEXP n, SEXP seed) {
Expand All @@ -22,10 +36,8 @@ extern SEXP _valr_closest_impl(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP _valr_complement_impl(SEXP, SEXP);
extern SEXP _valr_coverage_impl(SEXP, SEXP, SEXP, SEXP);
extern SEXP _valr_dist_impl(SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP _valr_flank_impl(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP _valr_gcoverage_impl(SEXP, SEXP);
extern SEXP _valr_intersect_impl(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP _valr_makewindows_impl(SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP _valr_merge_impl(SEXP, SEXP, SEXP);
extern SEXP _valr_partition_impl(SEXP, SEXP);
extern SEXP _valr_shuffle_impl(SEXP, SEXP, SEXP, SEXP, SEXP);
Expand Down
65 changes: 31 additions & 34 deletions src/flank.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// flank.cpp
//
// Copyright (C) 2016 - 2018 Jay Hesselberth and Kent Riemondy
// Copyright (C) 2016 - 2025 Jay Hesselberth and Kent Riemondy
//
// This file is part of valr.
//
Expand All @@ -9,10 +9,10 @@

#include "valr.h"

void check_coords(int start, int end,
int chrom_size, int idx, bool trim,
std::vector<int>& starts_out,
std::vector<int>& ends_out,
void check_coords(double start, double end,
double chrom_size, int idx, bool trim,
writable::doubles& starts_out,
writable::doubles& ends_out,
std::vector<int>& df_idx) {

if (start == end) return ;
Expand Down Expand Up @@ -42,26 +42,26 @@ void check_coords(int start, int end,
} // else trim
}

//[[Rcpp::export]]
DataFrame flank_impl(DataFrame df, DataFrame genome,
[[cpp11::register]]
writable::data_frame flank_impl(data_frame df, data_frame genome,
double both = 0, double left = 0, double right = 0,
bool fraction = false, bool stranded = false, bool trim = false) {

std::vector<std::string> chroms = df["chrom"];
IntegerVector starts = df["start"];
IntegerVector ends = df["end"];
strings chroms = df["chrom"];
doubles starts = df["start"];
doubles ends = df["end"];

// storage for outputs
std::vector<int> starts_out;
std::vector<int> ends_out;
writable::doubles starts_out;
writable::doubles ends_out;
std::vector<int> df_idx;

genome_map_t chrom_sizes = makeChromSizes(genome);
int lstart, lend, rstart, rend ;

if (stranded) {

std::vector<std::string> strands = df["strand"];
strings strand = df["strand"];

for (int i = 0; i < starts.size(); i++) {

Expand All @@ -70,7 +70,7 @@ DataFrame flank_impl(DataFrame df, DataFrame genome,
double size = end - start;

if (fraction) {
if (strands[i] == "+") {
if (strand[i] == "+") {
lstart = start - std::round(size * left);
lend = start;
rstart = end;
Expand All @@ -82,7 +82,7 @@ DataFrame flank_impl(DataFrame df, DataFrame genome,
rend = start ;
}
} else {
if (strands[i] == "+") {
if (strand[i] == "+") {
lstart = start - left;
lend = start;
rstart = end;
Expand All @@ -96,7 +96,7 @@ DataFrame flank_impl(DataFrame df, DataFrame genome,
}

std::string chrom = chroms[i];
int chrom_size = chrom_sizes[chrom];
double chrom_size = chrom_sizes[chrom];

// check and save coordinates
check_coords(lstart, lend, chrom_size, i, trim,
Expand Down Expand Up @@ -136,23 +136,20 @@ DataFrame flank_impl(DataFrame df, DataFrame genome,
}
}

DataFrame out = subset_dataframe(df, df_idx) ;

out["start"] = starts_out;
out["end"] = ends_out;
writable::data_frame subset = subset_dataframe(df, df_idx) ;

return out;
if (stranded) {
return writable::data_frame({
"chrom"_nm = subset["chrom"],
"start"_nm = starts_out,
"end"_nm = ends_out,
"strand"_nm = subset["strand"]
}) ;
} else {
return writable::data_frame({
"chrom"_nm = subset["chrom"],
"start"_nm = starts_out,
"end"_nm = ends_out
}) ;
}
}


/*** R
library(valr)
library(dplyr)
genome <- read_genome(valr_example('hg19.chrom.sizes.gz'))
x <- bed_random(genome)
devtools::load_all()
flank_impl(x, genome, both = 100) %>% as_data_frame()
*/

43 changes: 15 additions & 28 deletions src/makewindows.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// makewindows.cpp
//
// Copyright (C) 2016 - 2018 Jay Hesselberth and Kent Riemondy
// Copyright (C) 2016 - 2025 Jay Hesselberth and Kent Riemondy
//
// This file is part of valr.
//
Expand All @@ -9,17 +9,17 @@

#include "valr.h"

//[[Rcpp::export]]
DataFrame makewindows_impl(DataFrame df, int win_size = 0, int num_win = 0,
[[cpp11::register]]
writable::data_frame makewindows_impl(data_frame df, int win_size = 0, int num_win = 0,
int step_size = 0, bool reverse = false) {

NumericVector starts = df["start"] ;
NumericVector ends = df["end"] ;
doubles starts = df["start"] ;
doubles ends = df["end"] ;

std::vector<int> starts_out ;
std::vector<int> ends_out ;
writable::doubles starts_out ;
writable::doubles ends_out ;
std::vector<int> df_idxs ;
std::vector<int> win_ids;
writable::integers win_ids;

for (int i = 0; i < starts.size(); ++i) {

Expand Down Expand Up @@ -72,25 +72,12 @@ DataFrame makewindows_impl(DataFrame df, int win_size = 0, int num_win = 0,
}
}

DataFrame out = subset_dataframe(df, df_idxs) ;
writable::data_frame subset = subset_dataframe(df, df_idxs) ;

// replace original starts, ends, and .win_id
out["start"] = starts_out ;
out["end"] = ends_out ;
out[".win_id"] = win_ids ;

return out ;
return writable::data_frame({
"chrom"_nm = subset["chrom"],
"start"_nm = starts_out,
"end"_nm = ends_out,
".win_id"_nm = win_ids
}) ;
}

/*** R
library(valr)
library(dplyr)
x <- trbl_interval(
~chrom, ~start, ~end,
"chr1", 100, 200
)
bed_makewindows(x, win_size = 10)
bed_makewindows(x, win_size = 10, reverse = TRUE)
*/
Loading

0 comments on commit 0917569

Please sign in to comment.