From f85dd91d7d8eb0d20d1e6102f4c1dbd0e02d3ced Mon Sep 17 00:00:00 2001 From: "hpages@fhcrc.org" Date: Fri, 1 May 2015 01:12:01 +0000 Subject: [PATCH] Rolling back commits 103209 and 103207. mapCoords() and pmapCoords() are defunct in BioC 3.2 but should not be removed yet (not before BioC 3.3). git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/GenomicRanges@103332 bc3139a8-67e5-0310-9ffc-ced21a209358 --- DESCRIPTION | 3 +- NAMESPACE | 1 + R/mapCoords-methods.R | 117 ++++++++++++++++++++++++++++++++++++++ man/mapCoords-methods.Rd | 118 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 238 insertions(+), 1 deletion(-) create mode 100644 R/mapCoords-methods.R create mode 100644 man/mapCoords-methods.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 483336d..e14836d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -7,7 +7,7 @@ Description: The ability to efficiently represent and manipulate genomic intervals. Specialized containers for representing and manipulating short alignments against a reference genome are defined in the GenomicAlignments package. -Version: 1.21.6 +Version: 1.21.7 Author: P. Aboyoun, H. Pages and M. Lawrence Maintainer: Bioconductor Package Maintainer biocViews: Genetics, Infrastructure, Sequencing, Annotation, Coverage, @@ -50,6 +50,7 @@ Collate: utils.R findOverlaps-methods.R findOverlaps-GIntervalTree-methods.R nearest-methods.R + mapCoords-methods.R absoluteRanges.R tileGenome.R tile-methods.R diff --git a/NAMESPACE b/NAMESPACE index 12ddb9a..15ebcd9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -86,6 +86,7 @@ exportMethods( punion, pintersect, psetdiff, pgap, findOverlaps, countOverlaps, overlapsAny, subsetByOverlaps, precede, follow, nearest, distance, distanceToNearest, + mapCoords, pmapCoords, tile, subset, subjectHits, queryHits, diff --git a/R/mapCoords-methods.R b/R/mapCoords-methods.R new file mode 100644 index 0000000..06f8635 --- /dev/null +++ b/R/mapCoords-methods.R @@ -0,0 +1,117 @@ +### ========================================================================= +### 'mapCoords' and 'pmapCoords' methods +### ------------------------------------------------------------------------- +### + +### Generics are in IRanges. + +### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +### Helpers +### + +### 'x' is a GRangesList +### Returns a GRangesList with sorted elements. This method differs from +### sort() in that "-" strand elements are returned highest value to lowest. +.orderElementsByTranscription <- function(x, ignore.strand) { + original <- unlist(sapply(elementLengths(x), function(xx) 1:xx), + use.names=FALSE) + ## order by position + gr <- unlist(x, use.names = FALSE) + idx <- order(togroup(x), start(gr)) + gr <- gr[idx] + part <- PartitioningByWidth(x) + ## handle zero-width ranges + pstart <- start(part)[width(part) != 0L] + pend <- end(part)[width(part) != 0L] + + if (ignore.strand) { + ord <- S4Vectors:::mseq(pstart, pend) + } else { + neg <- strand(gr)[pstart] == "-" + ord <- S4Vectors:::mseq(ifelse(neg, pend, pstart), + ifelse(neg, pstart, pend)) + } + res <- relist(gr[ord], x) + res@unlistData$unordered <- original[idx[ord]] + res +} + +### 'x' is an IntegerList or NumericList +### Returns a numeric vector of cumulative sums within list elements. +.listCumsumShifted <- function(x) { + cs <- unlist(cumsum(x), use.names=FALSE) + shifted <- c(0L, head(cs, -1)) + shifted[start(PartitioningByWidth(elementLengths(x)))] <- 0L + shifted +} + +.mapCoords <- function(from, to, ..., ignore.strand, elt.hits, p=FALSE) { + if (ignore.strand) + strand(to) <- "*" + + ## sort elements of 'to' by chrom, position and strand + to <- .orderElementsByTranscription(to, ignore.strand=ignore.strand) + gr <- unlist(to, use.names = FALSE) + + ## overlaps + ol <- findOverlaps(from, gr, type="within", ignore.strand=ignore.strand) + if (p) { + ith_hits <- queryHits(ol) == togroup(to)[subjectHits(ol)] + ol <- ol[ith_hits] + } + + sHits <- subjectHits(ol) + qHits <- queryHits(ol) + eltPosition <- ranges(from)[qHits] + bounds <- ranges(gr)[sHits] + + ## location wrt start of individual list elements + if (ignore.strand) { + eltPosition <- shift(eltPosition, - start(bounds)) + } else { + neg <- as.vector(strand(gr)[sHits] == "-") + eltPosition[!neg] <- shift(eltPosition[!neg], - start(bounds)[!neg]) + eltPosition[neg] <- IRanges(end(bounds)[neg] - end(eltPosition)[neg], + width=width(eltPosition)[neg]) + } + ## location wrt start of combined list elements (e.g., transcript-level) + shifted <- .listCumsumShifted(width(to)) + cumPosition <- shift(eltPosition, 1L + shifted[sHits]) + + toInd <- togroup(to)[sHits] + if (elt.hits) + mcols <- DataFrame(fromHits=qHits, toHits=toInd, + eltHits=mcols(gr)$unordered[subjectHits(ol)]) + else mcols <- DataFrame(fromHits=qHits, toHits=toInd) + + GRanges(seqnames(gr)[sHits], cumPosition, strand = strand(gr[sHits]), mcols) +} + +### mapCoords: + +.msg <- c("'mapCoords' is defunct. ", + "Use 'mapToTranscripts' from the GenomicFeatures package ", + "or 'mapToAlignments' from the GenomicAlignments package ", + "instead.") + +setMethod("mapCoords", c("GenomicRanges", "GRangesList"), + function(from, to, ..., ignore.strand=TRUE, elt.hits=FALSE) + .Defunct(msg=wmsg(.msg)) +) + +setMethod("mapCoords", c("GenomicRanges", "GenomicRanges"), + function(from, to, ..., ignore.strand=TRUE, elt.hits=FALSE) + .Defunct(msg=wmsg(.msg)) +) + +### pmapCoords: + +setMethod("pmapCoords", c("GenomicRanges", "GRangesList"), + function(from, to, ..., ignore.strand=TRUE, elt.hits=FALSE) +{ + msg <- c("'pmapCoords' is defunct. ", + "Use 'pmapToTranscripts' from the GenomicFeatures package ", + "or 'pmapToAlignments' from the GenomicAlignments package ", + "instead.") + .Defunct(msg=wmsg(msg)) +}) diff --git a/man/mapCoords-methods.Rd b/man/mapCoords-methods.Rd new file mode 100644 index 0000000..e4f09b4 --- /dev/null +++ b/man/mapCoords-methods.Rd @@ -0,0 +1,118 @@ +\name{mapCoords-methods} + +\alias{mapCoords} +\alias{mapCoords-methods} +\alias{mapCoords,GenomicRanges,GRangesList-method} +\alias{mapCoords,GenomicRanges,GenomicRanges-method} +\alias{pmapCoords} +\alias{pmapCoords,GenomicRanges,GRangesList-method} + + +\title{Mapping ranges between sequences} + +\description{ + These functions are defunct. Use + \code{\link[GenomicFeatures]{mapToTranscripts}} from the + \pkg{GenomicFeatures} package or + \code{\link[GenomicAlignments]{mapToAlignments}} from the + \pkg{GenomicAlignments} package instead. + + A method for translating a set of input ranges through a + \link{GRangesList} object. Returns a \linkS4class{GenomicRanges} + object. + + The generics for \code{mapCoords} and \code{pmapCoords} are defined in the + \pkg{IRanges} package. A method for translating a set of input ranges through + a \link[GenomicAlignments]{GAlignments} object is defined and in the + \pkg{GenomicAlignments} package. +} + +\usage{ +\S4method{mapCoords}{GenomicRanges,GRangesList}(from, to, ..., + ignore.strand = TRUE, elt.hits = FALSE) + +\S4method{pmapCoords}{GenomicRanges,GRangesList}(from, to, ..., + ignore.strand = TRUE, elt.hits = FALSE) +} + +\arguments{ + \item{from}{The input ranges to map, usually a \code{\linkS4class{GRanges}}.} + + \item{to}{The alignment between the sequences in \code{from} and the + sequences in the result, usually a \code{\linkS4class{GRangesList}}.} + + \item{ignore.strand}{\code{logical}; When TRUE strand is ignored in + overlap operations.} + + \item{elt.hits}{\code{logical}; When TRUE, the output includes a metadata + column, eltHits, with indices of the inner list elements of \code{to} + hit by \code{from}. Useful for identifying elements of \code{to} hit + by \code{from}. See examples.} + + \item{\dots}{Arguments passed to other methods. Currently not used.} +} + +\value{ + A \code{GRanges} object of mapped coordinates with matching data as metadata + columns \code{fromHits} and \code{toHits}. The ranges in the output + \code{GRanges} are position relative to the outer list element of \code{to}; + all individual list elements are concatenated and counting starts at the 5' or + 3' end depending on strand. + + Matching data are the result of calling \code{findOverlaps} with type `within` + on ranges in \code{from} (the query) and the ranges in \code{to} (the + subject). In the case of \code{mapCoords} matching can be many-to-one or + one-to-many; one row is reported for each match. For \code{pmapCoords} + matching is one-to-one as the i-th element in \code{from} is only mapped to + the i-th element in \code{to}. + + When \code{elt.hits} is TRUE, the \code{eltHits} metadata column includes + the index of inner list elements in \code{to} hit by \code{from}. In + some applications it may be useful to identify the exact list element that + was overlapped. These elements can be extracted with the combination of + \code{toHits} (outer list index) and \code{eltHits} (inner list index). +} + +\details{ + DEFUNCT! Use \code{\link[GenomicFeatures]{mapToTranscripts}} from the + \pkg{GenomicFeatures} package or + \code{\link[GenomicAlignments]{mapToAlignments}} from the + \pkg{GenomicAlignments} package instead. + + Each element in \code{to} is taken to represent an alignment of a sequence on + a genome. The typical case is a set of transcript models, as might be obtained + via \code{GenomicFeatures::exonsBy}. Each outer list element of the + GRangesList represents a transcript while each each individual element is an + exon in the transcript. + + \code{mapCoords} and \code{pmapCoords} translate the ranges in \code{from} + relative to the transcript start (i.e., start of all ranges in \code{to}). The + widths of the individual elements (exons in this example) are concatenated and + counting starts at the 5' or 3' end depending on strand. Translated + coordinates are only reported for ranges in \code{from} that fall completely + `within` ranges in \code{to}. + + The transcript-centric coordinates are are useful, for example, when + predicting coding consequences of changes to the genomic sequence. + + \code{mapCoords} maps the i-th element in \code{from} to each element in + \code{to} returning in a many-to-many mapping. In contrast, \code{pmapCoords} + treats the two inputs as parallel vectors and maps the i-th element of + \code{from} to the i-th element of \code{to} returning a maximum of one result + per input element. +} + +\seealso{ + \itemize{ + \item{The generic \link[IRanges]{mapCoords-methods} in the IRanges package.} + \item{Additional methods in the GenomicAlignments package + \link[GenomicAlignments]{mapCoords-methods}.} + } +} + +\examples{ +## DEFUNCT! See ?mapToTranscripts in the GenomicFeatures package and +## ?mapToAlignments in the GenomicAlignments package. +} + +\author{M. Lawrence and V. Obenchain \url{vobencha@fhcrc.org}}