From 2ba791443855defe3f9c56c78bf91781367c613b Mon Sep 17 00:00:00 2001 From: Jay Hesselberth Date: Wed, 8 Jan 2025 07:03:41 -0700 Subject: [PATCH] Use cpp11bigwig for `read_bigwig()` (#425) * Use cpp11bigwig for `read_bigwig()` * Update news --- DESCRIPTION | 1 + NEWS.md | 43 ++++++++++++++++++---------------- R/read_bed.r | 42 ++++++++------------------------- man/read_bigwig.Rd | 25 +++++++------------- tests/testthat/test_read_bed.r | 5 ++-- 5 files changed, 45 insertions(+), 71 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b2aca895..d2db423e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,6 +21,7 @@ Depends: Imports: broom, cli, + cpp11bigwig, dplyr (>= 0.8.0), ggplot2, lifecycle, diff --git a/NEWS.md b/NEWS.md index b97a807b..3e2040a7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,20 +1,23 @@ # valr (development version) -* `read_bigwig()` and `read_gtf()` were deprecated. The rtracklayer package used -for this functionality is no longer a dependency of valr due to errors from +* `read_bigwig()` now uses cpp11bigwig on CRAN. The `set_strand` param was removed to be + more consistent with expected bigWig contents. + +* `read_gtf()` was deprecated. The rtracklayer package used +for this functionality is no longer a dependency of valr due to errors from CRAN AddressSantizer checks of the UCSC c-library code vendored in rtracklayer. * valr now depends on R >= 4.0.0. # valr 0.8.2 -* Address NOTE on CRAN about Rd link targets. +* Address NOTE on CRAN about Rd link targets. -* Change maintainer email address. +* Change maintainer email address. # valr 0.8.1 -* Make vdiffr dependency optional during package testing. +* Make vdiffr dependency optional during package testing. # valr 0.8.0 @@ -26,9 +29,9 @@ CRAN AddressSantizer checks of the UCSC c-library code vendored in rtracklayer. # valr 0.6.8 -* `bed_closest()` now reports all x intervals, even when there are no closest y intervals (e.g. when there is no matching chromosome in y intervals). These intervals are returned populated with `NA` for `.overlap`, `.dist` and y interval locations. +* `bed_closest()` now reports all x intervals, even when there are no closest y intervals (e.g. when there is no matching chromosome in y intervals). These intervals are returned populated with `NA` for `.overlap`, `.dist` and y interval locations. -* Reimplemented `bed_closest()` to use binary search rather than an interval tree search. The closest y interval can be missed with the previous search strategy in high depth interval trees. +* Reimplemented `bed_closest()` to use binary search rather than an interval tree search. The closest y interval can be missed with the previous search strategy in high depth interval trees. * Fix off by one error when using `max_dist` argument in `bed_cluster()` (#401). @@ -36,7 +39,7 @@ CRAN AddressSantizer checks of the UCSC c-library code vendored in rtracklayer. * Removed `SystemRequirements` from DESCRIPTION to eliminate a NOTE on CRAN. -* `bed_coverage()` now reports intervals from `x` with no matching group in `y` (#395). +* `bed_coverage()` now reports intervals from `x` with no matching group in `y` (#395). # valr 0.6.6 @@ -49,15 +52,15 @@ CRAN AddressSantizer checks of the UCSC c-library code vendored in rtracklayer. # valr 0.6.5 -* Handle `max_dist` for first intervals in `bed_cluster()` (#388) +* Handle `max_dist` for first intervals in `bed_cluster()` (#388) # valr 0.6.4 -* Fixed intron score numbering error in `create_introns` (#377 @sheridar) +* Fixed intron score numbering error in `create_introns` (#377 @sheridar) -* Fixed bug in handling of list inputs for `bed_intersect()`(#380 @sheridar) +* Fixed bug in handling of list inputs for `bed_intersect()`(#380 @sheridar) -* Added `read_bigwig` and `read_gtf` functions to import data into valr compatible tibbles (#379) +* Added `read_bigwig` and `read_gtf` functions to import data into valr compatible tibbles (#379) * Kent Riemondy is now maintainer. @@ -69,7 +72,7 @@ CRAN AddressSantizer checks of the UCSC c-library code vendored in rtracklayer. ## Minor changes -* `RMariaDB` has replaced the deprecated `RMySQL` package as the database backend. +* `RMariaDB` has replaced the deprecated `RMySQL` package as the database backend. * valr now imports Rcpp, which should have always been the case, but was masked by its Import by readr, which recently dropped use of Rcpp. @@ -84,9 +87,9 @@ but was masked by its Import by readr, which recently dropped use of Rcpp. ## Major changes -* `trbl_interval()` and `trbl_genome()` custom `tibble` subclasses have been deemed unnecessary and have been removed from the package. +* `trbl_interval()` and `trbl_genome()` custom `tibble` subclasses have been deemed unnecessary and have been removed from the package. -* coercing `GRanges` to a `valr` compatible data.frame now uses the `gr_to_bed()` function rather than `as.trbl_interal()` methods. +* coercing `GRanges` to a `valr` compatible data.frame now uses the `gr_to_bed()` function rather than `as.trbl_interal()` methods. ## Minor changes @@ -94,7 +97,7 @@ but was masked by its Import by readr, which recently dropped use of Rcpp. * The `sort_by` argument of `bed_random()` has been changed to `sorted`, and will now by default use `bed_sort()` to sort the output, rather than rely on naming the sorting columns. Sorting can -be suppressed by using `sorted = FALSE`. +be suppressed by using `sorted = FALSE`. * `bed_sort()` now uses base R sorting with the `radix` method for increased speed. (#353) @@ -108,13 +111,13 @@ be suppressed by using `sorted = FALSE`. # valr 0.5.0 -## Major changes +## Major changes -* Internal `Rcpp` functions have been reorganized to remove all dependencies on `dplyr` C++ functions. +* Internal `Rcpp` functions have been reorganized to remove all dependencies on `dplyr` C++ functions. ## Minor changes -* Due to internal refactoring of Rcpp functions, only data.frames containing Numeric, Logical, Integer, Character, and List column types are supported. Columns containing Raw, Complex, or other R classes are not supported and will issue an error. +* Due to internal refactoring of Rcpp functions, only data.frames containing Numeric, Logical, Integer, Character, and List column types are supported. Columns containing Raw, Complex, or other R classes are not supported and will issue an error. * Factors are now disallowed from grouping variables in multiset operations to avoid sort order discrepancies, and compatibility with factor handling in `dplyr` v.0.8.0. Factors will now be internally type-converted to character and a warning is issued. @@ -199,7 +202,7 @@ be suppressed by using `sorted = FALSE`. ## Minor changes * intervals returned from `bed_random()` are sorted by `chrom` and `start` by default. - + ## Bug fixes * Merge intervals in `bed_jaccard()` and use numeric values for calculation (fixes #204). diff --git a/R/read_bed.r b/R/read_bed.r index 371e0a2c..e9d27856 100644 --- a/R/read_bed.r +++ b/R/read_bed.r @@ -174,44 +174,22 @@ bed12_coltypes <- list( ) -#' Import and convert a bigwig file into a valr compatible tbl +#' Read a bigwig file into a valr compatible tbl #' -#' @description -#' `r lifecycle::badge("deprecated")` -#' -#' This function will output a 5 column tibble with -#' zero-based chrom, start, end, score, and strand columns. +#' This function will output a 4 column tibble with +#' zero-based chrom, start, end, value columns. #' #' @param path path to bigWig file -#' @param set_strand strand to add to output (defaults to "+") -#' -#' @note This functions uses \code{rtracklayer} to import bigwigs which -#' has unstable support for the windows platform and therefore may error -#' for windows users (particularly for 32 bit window users). +#' @param ... params for `cpp11bigwig::read_bigwig()` #' #' @examples -#' \dontrun{ -#' bw <- read_bigwig(valr_example("hg19.dnase1.bw")) -#' head(bw) -#' } +#' read_bigwig(valr_example("hg19.dnase1.bw")) +#' +#' read_bigwig(valr_example("hg19.dnase1.bw"), as = "GRanges") +#' #' @export -read_bigwig <- function(path, set_strand = "+") { - lifecycle::deprecate_stop( - when = "0.8.3", - what = "read_bigwig()", - details = c( - x = paste0( - "read_bigwig() was removed because rtracklayer does not pass ", - "CRAN AddressSantizer checks of the UCSC C-library code vendored ", - "in rtracklayer." - ), - i = paste0( - "use `bigWigToBedGraph` to convert bw to bedGraph,", - " and then `read_bedgraph()`. Alternatively use ", - "`rtracklayer::import()` then `gr_to_bed()`." - ) - ) - ) +read_bigwig <- function(path, ...) { + cpp11bigwig::read_bigwig(path, ...) } #' Import and convert a GTF/GFF file into a valr compatible bed tbl format diff --git a/man/read_bigwig.Rd b/man/read_bigwig.Rd index 069fc782..a1722c04 100644 --- a/man/read_bigwig.Rd +++ b/man/read_bigwig.Rd @@ -2,29 +2,22 @@ % Please edit documentation in R/read_bed.r \name{read_bigwig} \alias{read_bigwig} -\title{Import and convert a bigwig file into a valr compatible tbl} +\title{Read a bigwig file into a valr compatible tbl} \usage{ -read_bigwig(path, set_strand = "+") +read_bigwig(path, ...) } \arguments{ \item{path}{path to bigWig file} -\item{set_strand}{strand to add to output (defaults to "+")} +\item{...}{params for \code{cpp11bigwig::read_bigwig()}} } \description{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} - -This function will output a 5 column tibble with -zero-based chrom, start, end, score, and strand columns. -} -\note{ -This functions uses \code{rtracklayer} to import bigwigs which -has unstable support for the windows platform and therefore may error -for windows users (particularly for 32 bit window users). +This function will output a 4 column tibble with +zero-based chrom, start, end, value columns. } \examples{ -\dontrun{ - bw <- read_bigwig(valr_example("hg19.dnase1.bw")) - head(bw) -} +read_bigwig(valr_example("hg19.dnase1.bw")) + +read_bigwig(valr_example("hg19.dnase1.bw"), as = "GRanges") + } diff --git a/tests/testthat/test_read_bed.r b/tests/testthat/test_read_bed.r index 7df22ae7..1fb30164 100644 --- a/tests/testthat/test_read_bed.r +++ b/tests/testthat/test_read_bed.r @@ -45,9 +45,8 @@ test_that("read broadPeak", { }) test_that("read bigwig", { - skip_on_os("windows") - expect_error(read_bigwig(bigwig_path)) - # expect_equal(ncol(x), 5) + x <- read_bigwig(bigwig_path) + expect_equal(ncol(x), 4) })