From ad4e708027aba989f3afd50fffcc76b2bec084ac Mon Sep 17 00:00:00 2001 From: "hpages@fhcrc.org" Date: Tue, 20 May 2014 17:29:53 +0000 Subject: [PATCH] improve unit test for "trim" method for GenomicRanges objects git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/GenomicRanges@90538 bc3139a8-67e5-0310-9ffc-ced21a209358 --- DESCRIPTION | 2 +- R/GenomicRanges-class.R | 46 +++++++------- R/intra-range-methods.R | 4 +- R/setops-methods.R | 10 +-- inst/unitTests/test_intra-range-methods.R | 77 ++++++++++++----------- 5 files changed, 74 insertions(+), 65 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d098f5f..5642bd0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,7 +12,7 @@ Author: P. Aboyoun, H. Pages and M. Lawrence Maintainer: Bioconductor Package Maintainer biocViews: Genetics, Infrastructure, Sequencing, Annotation, Coverage Depends: R (>= 2.10), methods, BiocGenerics (>= 0.7.7), - S4Vectors (>= 0.0.5), IRanges (>= 1.99.8), GenomeInfoDb (>= 1.1.3) + S4Vectors (>= 0.0.5), IRanges (>= 1.99.14), GenomeInfoDb (>= 1.1.3) Imports: methods, utils, stats, BiocGenerics, IRanges, XVector LinkingTo: S4Vectors, IRanges, XVector (>= 0.5.5) Suggests: AnnotationDbi (>= 1.21.1), AnnotationHub, diff --git a/R/GenomicRanges-class.R b/R/GenomicRanges-class.R index 99e6893..2bf83b7 100644 --- a/R/GenomicRanges-class.R +++ b/R/GenomicRanges-class.R @@ -21,27 +21,6 @@ setClassUnion("GenomicRangesORmissing", c("GenomicRanges", "missing")) ### update(x) and clone(x) are defined. -### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -### Non-exported helper function. -### -### Returns index of out-of-bound ranges located on non-circular sequences -### whose length is not NA. Works on a GenomicRanges or GAlignments object. -### - -trimIndex <- function(x) -{ - if (length(x) == 0L) - return(integer(0)) - x_seqnames_id <- as.integer(seqnames(x)) - x_seqlengths <- unname(seqlengths(x)) - seqlevel_is_circ <- unname(isCircular(x)) %in% TRUE - seqlength_is_na <- is.na(x_seqlengths) - seqlevel_has_bounds <- !(seqlevel_is_circ | seqlength_is_na) - which(seqlevel_has_bounds[x_seqnames_id] & - (start(x) < 1L | end(x) > x_seqlengths[x_seqnames_id])) -} - - ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Getters. ### @@ -52,6 +31,7 @@ setMethod("names", "GenomicRanges", function(x) names(ranges(x))) #setMethod("constraint", "GenomicRanges", function(x) x@constraint) + ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Extra column slots (implemented by subclasses) ### @@ -74,6 +54,28 @@ setMethod("fixedColumnNames", "GenomicRanges", function(x) { colnames(as.data.frame(new(class(x)))) }) + +### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +### Non-exported helper function. +### +### Returns index of out-of-bound ranges located on non-circular sequences +### whose length is not NA. Works on a GenomicRanges or GAlignments object. +### + +getTrimIndex <- function(x) +{ + if (length(x) == 0L) + return(integer(0)) + x_seqnames_id <- as.integer(seqnames(x)) + x_seqlengths <- unname(seqlengths(x)) + seqlevel_is_circ <- unname(isCircular(x)) %in% TRUE + seqlength_is_na <- is.na(x_seqlengths) + seqlevel_has_bounds <- !(seqlevel_is_circ | seqlength_is_na) + which(seqlevel_has_bounds[x_seqnames_id] & + (start(x) < 1L | end(x) > x_seqlengths[x_seqnames_id])) +} + + ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Validity. ### @@ -164,7 +166,7 @@ valid.GenomicRanges.seqinfo <- function(x) ## TODO: This should be checked by validity method for Seqinfo objects. if (any(x_seqlengths < 0L, na.rm=TRUE)) return("'seqlengths(x)' contains negative values") - idx <- trimIndex(x) + idx <- getTrimIndex(x) if (length(idx) != 0L) warning("'ranges' contains values outside of sequence bounds. ", "See ?trim to subset ranges.") diff --git a/R/intra-range-methods.R b/R/intra-range-methods.R index 945e9dc..fe4b5c9 100644 --- a/R/intra-range-methods.R +++ b/R/intra-range-methods.R @@ -231,8 +231,8 @@ setMethod("trim", "GenomicRanges", function(x, use.names=TRUE) { ## We trim only out-of-bound ranges on non-circular sequences whose - ## length is not NA. See trimIndex() in GenomicRanges-class.R. - idx <- trimIndex(x) + ## length is not NA. See getTrimIndex() in GenomicRanges-class.R. + idx <- getTrimIndex(x) if (length(idx) == 0L) return(x) new_ranges <- ranges(x) diff --git a/R/setops-methods.R b/R/setops-methods.R index d67fb13..439e5a7 100644 --- a/R/setops-methods.R +++ b/R/setops-methods.R @@ -6,13 +6,13 @@ ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -### 2 non-exported low-level helper functions. +### 2 low-level helper functions. ### ### Both return a named integer vector where the names are guaranteed to be ### 'seqlevels(x)'. ### -minStartPerGRangesSequence <- function(x) +.minStartPerGRangesSequence <- function(x) { cil <- splitAsList(start(x), seqnames(x)) # CompressedIntegerList object ## The 4 lines below are equivalent to: @@ -31,7 +31,7 @@ minStartPerGRangesSequence <- function(x) ans } -maxEndPerGRangesSequence <- function(x) +.maxEndPerGRangesSequence <- function(x) { cil <- splitAsList(end(x), seqnames(x)) # CompressedIntegerList object ## The 4 lines below are equivalent to: @@ -83,7 +83,7 @@ setMethod("intersect", c("GRanges", "GRanges"), ## If the length of a sequence is unknown (NA), then we use ## the max end value found on that sequence in 'x' or 'y'. seqlengths[is.na(seqlengths)] <- - maxEndPerGRangesSequence(c(x, y))[is.na(seqlengths)] + .maxEndPerGRangesSequence(c(x, y))[is.na(seqlengths)] setdiff(x, gaps(y, end=seqlengths)) } ) @@ -104,7 +104,7 @@ setMethod("setdiff", c("GRanges", "GRanges"), ## If the length of a sequence is unknown (NA), then we use ## the max end value found on that sequence in 'x' or 'y'. seqlengths[is.na(seqlengths)] <- - maxEndPerGRangesSequence(c(x, y))[is.na(seqlengths)] + .maxEndPerGRangesSequence(c(x, y))[is.na(seqlengths)] gaps(union(gaps(x, end=seqlengths), y), end=seqlengths) } ) diff --git a/inst/unitTests/test_intra-range-methods.R b/inst/unitTests/test_intra-range-methods.R index daddc07..45d2976 100644 --- a/inst/unitTests/test_intra-range-methods.R +++ b/inst/unitTests/test_intra-range-methods.R @@ -79,6 +79,8 @@ test_GRangesList_shift <- function() test_GenomicRanges_flank <- function() { + checkIdentical(flank(GRanges(), 10), GRanges()) + gr_seqnames <- c("chr1", "chr2", "chr1", "chrM") gr_ranges <- IRanges(21:24, width=10) gr_strand <- strand(c("+", "-", "*", "-")) @@ -86,28 +88,28 @@ test_GenomicRanges_flank <- function() ## NO warning expected. S4Vectors:::errorIfWarning(current <- flank(gr, 10)) - checkTrue(S4Vectors:::errorIfWarning(validObject(current, complete=TRUE))) + checkTrue(S4Vectors:::errorIfWarning(validObject(current))) target_ranges <- IRanges(c(11, 32, 13, 34), width=10) target <- GRanges(gr_seqnames, target_ranges, gr_strand) checkIdentical(target, current) ## NO warning expected. S4Vectors:::errorIfWarning(current <- flank(gr, 10, start=FALSE)) - checkTrue(S4Vectors:::errorIfWarning(validObject(current, complete=TRUE))) + checkTrue(S4Vectors:::errorIfWarning(validObject(current))) target_ranges <- IRanges(c(31, 12, 33, 14), width=10) target <- GRanges(gr_seqnames, target_ranges, gr_strand) checkIdentical(target, current) ## NO warning expected. S4Vectors:::errorIfWarning(current <- flank(gr, 30)) - checkTrue(S4Vectors:::errorIfWarning(validObject(current, complete=TRUE))) + checkTrue(S4Vectors:::errorIfWarning(validObject(current))) target_ranges <- IRanges(c(-9, 32, -7, 34), width=30) target <- GRanges(gr_seqnames, target_ranges, gr_strand) checkIdentical(target, current) ## NO warning expected. S4Vectors:::errorIfWarning(current <- flank(gr, 30, start=FALSE)) - checkTrue(S4Vectors:::errorIfWarning(validObject(current, complete=TRUE))) + checkTrue(S4Vectors:::errorIfWarning(validObject(current))) target_ranges <- IRanges(c(31, -8, 33, -6), width=30) target <- GRanges(gr_seqnames, target_ranges, gr_strand) checkIdentical(target, current) @@ -121,9 +123,9 @@ test_GenomicRanges_flank <- function() suppressWarnings(current <- flank(gr, 10)) checkException(S4Vectors:::errorIfWarning( - validObject(current, complete=TRUE) + validObject(current) ), silent=TRUE) - checkTrue(suppressWarnings(validObject(current, complete=TRUE))) + checkTrue(suppressWarnings(validObject(current))) target_ranges <- IRanges(c(11, 32, 13, 34), width=10) checkIdentical(target_ranges, ranges(current)) @@ -132,7 +134,7 @@ test_GenomicRanges_flank <- function() ## NO warning expected. S4Vectors:::errorIfWarning(current <- flank(gr, 10)) - checkTrue(S4Vectors:::errorIfWarning(validObject(current, complete=TRUE))) + checkTrue(S4Vectors:::errorIfWarning(validObject(current))) target_ranges <- IRanges(c(11, 32, 13, 34), width=10) checkIdentical(target_ranges, ranges(current)) @@ -143,9 +145,9 @@ test_GenomicRanges_flank <- function() suppressWarnings(current <- flank(gr, 20)) checkException(S4Vectors:::errorIfWarning( - validObject(current, complete=TRUE) + validObject(current) ), silent=TRUE) - checkTrue(suppressWarnings(validObject(current, complete=TRUE))) + checkTrue(suppressWarnings(validObject(current))) target_ranges <- IRanges(c(1, 32, 3, 34), width=20) checkIdentical(target_ranges, ranges(current)) @@ -232,33 +234,38 @@ test_GenomicRanges_trim <- function() { checkIdentical(trim(GRanges()), GRanges()) - ## no seqlengths - gr <- make_test_GRanges() + gr_seqnames <- c("chr1", "chr2", "chr1", "chrM") + gr_ranges <- IRanges(0:3, width=30) + + ## NO warning expected. + S4Vectors:::errorIfWarning(gr <- GRanges(gr_seqnames, gr_ranges)) + checkTrue(S4Vectors:::errorIfWarning(validObject(gr))) checkIdentical(trim(gr), gr) - ## seqlengths, isCircular NA and FALSE - seqlengths(gr) <- c(10, NA, 20) - spos <- suppressWarnings(shift(gr, 5)) - tend <- end(trim(spos)) - checkIdentical(tend, c(10L, rep(15L, 3), 10L, 10L, rep(15L, 4))) - isCircular(gr)["chr1"] <- FALSE - spos <- suppressWarnings(shift(gr, 5)) - tend <- end(trim(spos)) - checkIdentical(tend, c(10L, rep(15L, 3), 10L, 10L, rep(15L, 4))) - - ## seqlengths, isCircular TRUE - gr <- make_test_GRanges() - seqlengths(gr) <- c(10, NA, 20) - isCircular(gr)["chr1"] <- TRUE - spos <- suppressWarnings(shift(gr, 5)) - tend <- end(trim(spos)) - checkIdentical(tend, end(spos)) - spos <- suppressWarnings(shift(gr, 15)) - tend <- end(trim(spos)) - checkIdentical(tend, c(rep(25L, 6), rep(20L, 4))) - isCircular(gr)["chr3"] <- TRUE - spos <- suppressWarnings(shift(gr, 15)) - tend <- end(trim(spos)) - checkIdentical(tend, end(spos)) + gr_seqlengths <- c(chr1=50, chr2=NA, chrM=NA) + + ## Warning expected. + checkException(S4Vectors:::errorIfWarning( + seqlengths(gr) <- gr_seqlengths + ), silent=TRUE) + suppressWarnings(seqlengths(gr) <- gr_seqlengths) + + checkException(S4Vectors:::errorIfWarning( + validObject(gr) + ), silent=TRUE) + checkTrue(suppressWarnings(validObject(gr))) + + gr <- trim(gr) + checkTrue(S4Vectors:::errorIfWarning(validObject(gr))) + target_ranges <- IRanges(c(1, 1, 2, 3), width=c(29, 30, 30, 30)) + checkIdentical(target_ranges, ranges(gr)) + + isCircular(gr) <- c(chr1=FALSE, chr2=FALSE, chrM=TRUE) + + ## NO warning expected. + gr_seqlengths <- c(chr1=50, chr2=NA, chrM=15) + S4Vectors:::errorIfWarning(seqlengths(gr) <- gr_seqlengths) + checkTrue(S4Vectors:::errorIfWarning(validObject(gr))) + checkIdentical(trim(gr), gr) }