Rolling back commits 103209 and 103207. mapCoords() and pmapCoords() are

defunct in BioC 3.2 but should not be removed yet (not before BioC 3.3). git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/GenomicRanges@103332 bc3139a8-67e5-0310-9ffc-ced21a209358
RamsinghLab · May 1, 2015 · f85dd91 · f85dd91
1 parent 7ebacb1
commit f85dd91
Show file tree

Hide file tree

Showing 4 changed files with 238 additions and 1 deletion.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -7,7 +7,7 @@ Description: The ability to efficiently represent and manipulate genomic
 	intervals. Specialized containers for representing and manipulating
 	short alignments against a reference genome are defined in the
 	GenomicAlignments package.
-Version: 1.21.6
+Version: 1.21.7
 Author: P. Aboyoun, H. Pages and M. Lawrence
 Maintainer: Bioconductor Package Maintainer <[email protected]>
 biocViews: Genetics, Infrastructure, Sequencing, Annotation, Coverage,
@@ -50,6 +50,7 @@ Collate: utils.R
 	findOverlaps-methods.R
 	findOverlaps-GIntervalTree-methods.R
 	nearest-methods.R
+	mapCoords-methods.R
 	absoluteRanges.R
 	tileGenome.R
 	tile-methods.R

diff --git a/NAMESPACE b/NAMESPACE
@@ -86,6 +86,7 @@ exportMethods(
     punion, pintersect, psetdiff, pgap,
     findOverlaps, countOverlaps, overlapsAny, subsetByOverlaps,
     precede, follow, nearest, distance, distanceToNearest,
+    mapCoords, pmapCoords,
     tile,
     subset,
     subjectHits, queryHits,

diff --git a/R/mapCoords-methods.R b/R/mapCoords-methods.R
@@ -0,0 +1,117 @@
+### =========================================================================
+### 'mapCoords' and 'pmapCoords' methods
+### -------------------------------------------------------------------------
+###
+
+### Generics are in IRanges.
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Helpers
+###
+
+### 'x' is a GRangesList
+### Returns a GRangesList with sorted elements. This method differs from 
+### sort() in that "-" strand elements are returned highest value to lowest.
+.orderElementsByTranscription <- function(x, ignore.strand) {
+    original <- unlist(sapply(elementLengths(x), function(xx) 1:xx), 
+                       use.names=FALSE)
+    ## order by position
+    gr <- unlist(x, use.names = FALSE)
+    idx <- order(togroup(x), start(gr))
+    gr <- gr[idx]
+    part <- PartitioningByWidth(x)
+    ## handle zero-width ranges
+    pstart <- start(part)[width(part) != 0L]
+    pend <- end(part)[width(part) != 0L]
+
+    if (ignore.strand) {
+        ord <- S4Vectors:::mseq(pstart, pend)
+    } else {
+        neg <- strand(gr)[pstart] == "-"
+        ord <- S4Vectors:::mseq(ifelse(neg, pend, pstart),
+                                ifelse(neg, pstart, pend))
+    }
+    res <- relist(gr[ord], x)
+    res@unlistData$unordered <- original[idx[ord]] 
+    res
+}
+
+### 'x' is an IntegerList or NumericList
+### Returns a numeric vector of cumulative sums within list elements.
+.listCumsumShifted <- function(x) {
+    cs <- unlist(cumsum(x), use.names=FALSE)
+    shifted <- c(0L, head(cs, -1))
+    shifted[start(PartitioningByWidth(elementLengths(x)))] <- 0L
+    shifted
+}
+
+.mapCoords <- function(from, to, ..., ignore.strand, elt.hits, p=FALSE) {
+    if (ignore.strand)
+        strand(to) <- "*"
+
+    ## sort elements of 'to' by chrom, position and strand
+    to <- .orderElementsByTranscription(to, ignore.strand=ignore.strand)
+    gr <- unlist(to, use.names = FALSE)
+
+    ## overlaps
+    ol <- findOverlaps(from, gr, type="within", ignore.strand=ignore.strand)
+    if (p) {
+        ith_hits <- queryHits(ol) == togroup(to)[subjectHits(ol)]
+        ol <- ol[ith_hits] 
+    }
+
+    sHits <- subjectHits(ol)
+    qHits <- queryHits(ol)
+    eltPosition <- ranges(from)[qHits]
+    bounds <- ranges(gr)[sHits]
+
+    ## location wrt start of individual list elements
+    if (ignore.strand) {
+      eltPosition <- shift(eltPosition, - start(bounds))
+    } else {
+      neg <- as.vector(strand(gr)[sHits] == "-")
+      eltPosition[!neg] <- shift(eltPosition[!neg], - start(bounds)[!neg])
+      eltPosition[neg] <- IRanges(end(bounds)[neg] - end(eltPosition)[neg],
+                                  width=width(eltPosition)[neg])
+    }
+    ## location wrt start of combined list elements (e.g., transcript-level)
+    shifted <- .listCumsumShifted(width(to))
+    cumPosition <- shift(eltPosition, 1L + shifted[sHits])
+
+    toInd <- togroup(to)[sHits]
+    if (elt.hits)
+        mcols <- DataFrame(fromHits=qHits, toHits=toInd, 
+                           eltHits=mcols(gr)$unordered[subjectHits(ol)])
+    else mcols <- DataFrame(fromHits=qHits, toHits=toInd)
+
+    GRanges(seqnames(gr)[sHits], cumPosition, strand = strand(gr[sHits]), mcols)
+}
+
+### mapCoords:
+
+.msg <- c("'mapCoords' is defunct. ",
+          "Use 'mapToTranscripts' from the GenomicFeatures package ",
+          "or 'mapToAlignments' from the GenomicAlignments package ",
+          "instead.")
+
+setMethod("mapCoords", c("GenomicRanges", "GRangesList"), 
+    function(from, to, ..., ignore.strand=TRUE, elt.hits=FALSE) 
+        .Defunct(msg=wmsg(.msg))
+)
+
+setMethod("mapCoords", c("GenomicRanges", "GenomicRanges"), 
+    function(from, to, ..., ignore.strand=TRUE, elt.hits=FALSE) 
+        .Defunct(msg=wmsg(.msg))
+)
+
+### pmapCoords:
+
+setMethod("pmapCoords", c("GenomicRanges", "GRangesList"), 
+    function(from, to, ..., ignore.strand=TRUE, elt.hits=FALSE)
+{
+        msg <- c("'pmapCoords' is defunct. ",
+                 "Use 'pmapToTranscripts' from the GenomicFeatures package ",
+                 "or 'pmapToAlignments' from the GenomicAlignments package ",
+                 "instead.")
+        .Defunct(msg=wmsg(msg))
+})
diff --git a/man/mapCoords-methods.Rd b/man/mapCoords-methods.Rd
@@ -0,0 +1,118 @@
+\name{mapCoords-methods}
+
+\alias{mapCoords}
+\alias{mapCoords-methods}
+\alias{mapCoords,GenomicRanges,GRangesList-method}
+\alias{mapCoords,GenomicRanges,GenomicRanges-method}
+\alias{pmapCoords}
+\alias{pmapCoords,GenomicRanges,GRangesList-method}
+
+
+\title{Mapping ranges between sequences}
+
+\description{
+  These functions are defunct. Use 
+  \code{\link[GenomicFeatures]{mapToTranscripts}} from the
+  \pkg{GenomicFeatures} package or
+  \code{\link[GenomicAlignments]{mapToAlignments}} from the
+  \pkg{GenomicAlignments} package instead.
+
+  A method for translating a set of input ranges through a
+  \link{GRangesList} object. Returns a \linkS4class{GenomicRanges}
+  object.
+
+  The generics for \code{mapCoords} and \code{pmapCoords} are defined in the
+  \pkg{IRanges} package. A method for translating a set of input ranges through
+  a \link[GenomicAlignments]{GAlignments} object is defined and in the
+  \pkg{GenomicAlignments} package.
+}
+
+\usage{
+\S4method{mapCoords}{GenomicRanges,GRangesList}(from, to, ..., 
+          ignore.strand = TRUE, elt.hits = FALSE)
+
+\S4method{pmapCoords}{GenomicRanges,GRangesList}(from, to, ..., 
+          ignore.strand = TRUE, elt.hits = FALSE)
+}
+
+\arguments{
+  \item{from}{The input ranges to map, usually a \code{\linkS4class{GRanges}}.}
+
+  \item{to}{The alignment between the sequences in \code{from} and the
+    sequences in the result, usually a \code{\linkS4class{GRangesList}}.}
+
+  \item{ignore.strand}{\code{logical}; When TRUE strand is ignored in 
+    overlap operations.}
+
+  \item{elt.hits}{\code{logical}; When TRUE, the output includes a metadata 
+    column, eltHits, with indices of the inner list elements of \code{to}
+    hit by \code{from}. Useful for identifying elements of \code{to} hit
+    by \code{from}. See examples.}
+
+  \item{\dots}{Arguments passed to other methods. Currently not used.}
+}
+
+\value{
+  A \code{GRanges} object of mapped coordinates with matching data as metadata
+  columns \code{fromHits} and \code{toHits}.  The ranges in the output
+  \code{GRanges} are position relative to the outer list element of \code{to};
+  all individual list elements are concatenated and counting starts at the 5' or
+  3' end depending on strand.
+
+  Matching data are the result of calling \code{findOverlaps} with type `within`
+  on  ranges in \code{from} (the query) and the ranges in \code{to} (the
+  subject).  In the case of \code{mapCoords} matching can be many-to-one or
+  one-to-many; one row is reported for each match. For \code{pmapCoords}
+  matching is one-to-one as the i-th element in \code{from} is only mapped to
+  the i-th element in \code{to}.
+
+  When \code{elt.hits} is TRUE, the \code{eltHits} metadata column includes 
+  the index of inner list elements in \code{to} hit by \code{from}. In
+  some applications it may be useful to identify the exact list element that
+  was overlapped. These elements can be extracted with the combination of
+  \code{toHits} (outer list index) and \code{eltHits} (inner list index). 
+}
+
+\details{
+  DEFUNCT! Use \code{\link[GenomicFeatures]{mapToTranscripts}} from the
+  \pkg{GenomicFeatures} package or
+  \code{\link[GenomicAlignments]{mapToAlignments}} from the
+  \pkg{GenomicAlignments} package instead.
+
+  Each element in \code{to} is taken to represent an alignment of a sequence on
+  a genome. The typical case is a set of transcript models, as might be obtained
+  via \code{GenomicFeatures::exonsBy}.  Each outer list element of the
+  GRangesList represents a transcript while each each individual element is an
+  exon in the transcript.
+
+  \code{mapCoords} and \code{pmapCoords} translate the ranges in \code{from}
+  relative to the transcript start (i.e., start of all ranges in \code{to}). The
+  widths of the individual elements (exons in this example) are concatenated and
+  counting starts at the 5' or 3' end depending on strand. Translated
+  coordinates are only reported for ranges in \code{from} that fall completely
+  `within` ranges in \code{to}.
+
+  The transcript-centric coordinates are are useful, for example, when
+  predicting coding consequences of changes to the genomic sequence.
+
+  \code{mapCoords} maps the i-th element in \code{from} to each element in
+  \code{to} returning in a many-to-many mapping. In contrast, \code{pmapCoords}
+  treats the two inputs as parallel vectors and maps the i-th element of
+  \code{from} to the i-th element of \code{to} returning a maximum of one result
+  per input element.
+}
+
+\seealso{
+  \itemize{
+    \item{The generic \link[IRanges]{mapCoords-methods} in the IRanges package.}
+    \item{Additional methods in the GenomicAlignments package 
+          \link[GenomicAlignments]{mapCoords-methods}.}
+  }
+}
+
+\examples{
+## DEFUNCT! See ?mapToTranscripts in the GenomicFeatures package and
+## ?mapToAlignments in the GenomicAlignments package.
+}
+
+\author{M. Lawrence and V. Obenchain \url{vobencha@fhcrc.org}}