Skip to content

Commit

Permalink
new functionality for mapping assays to states
Browse files Browse the repository at this point in the history
  • Loading branch information
ttriche committed May 6, 2022
1 parent fdd63f2 commit 665f193
Show file tree
Hide file tree
Showing 9 changed files with 267 additions and 5 deletions.
8 changes: 4 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
Package: chromophobe
Type: Package
Title: Explore genomic state models, predictions, sample compositions & overlaps
Version: 0.99.503
Date: 2022-05-03
Version: 0.99.505
Date: 2022-05-05
Author: Tim Triche, Jr. <[email protected]>
Maintainer: Tim Triche, Jr. <[email protected]>
Description: Import, simplify, and compare ChromHMM and related segmentations
Depends: rtracklayer
Imports: BiocParallel,
GenomicFeatures,
RaggedExperiment,
matrixStats,
segmenter,
ggplot2
Suggests: seqHMM,
nullranges,
RaggedExperiment,
excluderanges,
nullranges,
Gviz
License: GPL
RoxygenNote: 7.1.2
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
# Generated by roxygen2: do not edit by hand

export(GenomicSegmentation)
export(GenomicSegmentationList)
export(addRoadmapColors)
export(aggregateStates)
export(asRaggedExperiment)
export(collapseAt)
export(colorHMM)
export(compressAndExportHMM)
export(donutPlot)
export(genomeFrac)
export(getConditions)
Expand All @@ -21,6 +24,7 @@ export(stackedPlot)
import(BiocParallel)
import(GenomeInfoDb)
import(GenomicRanges)
import(RaggedExperiment)
import(ggplot2)
import(matrixStats)
import(rtracklayer)
28 changes: 27 additions & 1 deletion R/AllClasses.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,33 @@ setValidity("scChromHMM", function(object) TRUE )
setClass("GenomicSegmentationList",
representation(trackLines = "List"),
contains = "List")
setValidity("GenomicSegmentationList", function(object) TRUE )
setValidity("GenomicSegmentationList", function(object) TRUE)


#' Like GRangesList, but with a trackLine slot, similar to UCSCData
#'
#' And also a friendly constructor for such things
#'
#' @param grl a GRangesList, with names that become trackLine names
#' @param ... additional arguments, currently ignored
#'
#' @return a GenomicSegmentationList object
#'
#' @import GenomicRanges
#' @import rtracklayer
#'
#' @export
GenomicSegmentationList <- function(grl, ...) {

lst <- lapply(grl, as, "GenomicSegmentation")
for (i in names(grl)) trackName(lst[[i]]) <- i

# add a validity method to match length(gsl@trackLines) and length(gsl)
trackLines <- List(lapply(lst, slot, "trackLine"))
gsl <- new("GenomicSegmentationList", trackLines=trackLines)
stop("GenomicSegmentationList coercion is incompletely implemented :-/")

}


# somewhat of a placeholder class for now
Expand Down
89 changes: 89 additions & 0 deletions R/asRaggedExperiment.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#' Take a set of segmentations and make them into a RaggedExperiment.
#'
#' This seems pretty banal, and in some respects it is, but there are
#' some fiddly bits to computing on segmentations, and this function
#' attempts to ease them. For example, it simply isn't possible to use
#' sparseAssay or disjoinAssay with character, integer, or factor matrices.
#' If all states are represented in all segmentations, this reduces to
#' generating and keeping a state-mapping key in metadata(), then referring
#' to it on the way out of the RaggedExperiment (usually this will employ
#' disjoinSummarizedExperiment to map a bunch of regions across a bunch of
#' segmentation models). Yet another possibility is that each segmentation
#' has a probability for each possible state, and a collection of segmentations
#' would be better stored as a tensor (stack of arrays). This function attempts
#' to finesse the most painful parts of the user experience (based on mine).
#' Irritating problems like missing states can make life difficult, so this
#' function deals with them up-front by enumerating states, then keying them.
#'
#' The function will probably be wrapped by a coercion before too much longer.
#'
#' @param segs segmentations, usually a GRangesList or GenomicSegmentations
#' @param scHMM scChromHMM-like posterior probabilities? (different tricks)
#' @param keycol mcols column to find the states that comprise a key ("name")
#' @param to what to call this keycol to in the RaggedExperiment? ("state")
#'
#' @return a RaggedExperiment RE, with `key` and `yek` in metadata(RE)
#'
#' @details The `states` method uses `key` on RE assays.
#' metadata(RE)$yek, as its name implies, reverses `key`.
#'
#'
#' @examples
#'
#' # some simplified chrY tracks
#' data(simpleY, package="chromophobe")
#' asRaggedExperiment(simpleY)
#'
#' @seealso states
#'
#' @import RaggedExperiment
#'
#' @export
asRaggedExperiment <- function(segs, scHMM=FALSE, keycol="name", to="state") {

if (scHMM) stop("scChromHMM support is not yet complete.")
if (elementType(segs) %in% c("GRanges", "GenomicSegmentation")) {
key <- levels(factor(mcols(unlist(segs))[, keycol]))
} else {
key <- levels(factor(mcols(segs)[, keycol]))
}
yek <- seq_along(key)
names(yek) <- key

RE <- RaggedExperiment(lapply(segs, .encode, yek=yek, keycol=keycol, to=to))
metadata(RE)$key <- key
metadata(RE)$yek <- yek
return(RE)

}


# helper fn
.encode <- function(seg, yek, keycol="name", to="state") {

mcols(seg)[, keycol] <- as.numeric(yek[mcols(seg)[, keycol]])
mcols(seg) <- mcols(seg)[, keycol, drop=FALSE]
names(mcols(seg)) <- to
return(seg)

}


# helper fn for testing
.decode <- function(asy, key) {

res <- apply(asy, 2, function(x) key[x])
rownames(res) <- rownames(asy)
return(res)

}


# helper fn for testing
.recode <- function(asy, yek) {

res <- apply(asy, 2, function(x) yek[x])
rownames(res) <- rownames(asy)
return(res)

}
35 changes: 35 additions & 0 deletions R/compressAndExportHMM.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#' Reduce a (usually simplified) segmentation, export to BED, compress, index.
#'
#' @param HMM a GenomicSegmentation or GRanges object
#' @param name a name to include as a trackLine in the BED file
#' @param filename the output (BED) file path (default: ./name.genome.bed)
#'
#' @import rtracklayer
#'
#' @examples
#'
#' data(chr19_HMM, package="chromophobe")
#' data(remc18state, package="chromophobe")
#'
#' simpler <- remc18state
#' simpler$SIMPLE <- sub("(Promoter|Enhancer)", "Active", simpler$SIMPLE)
#' simpler[simpler$SIMPLE == "Active", "RGBSIMPLE"] <- "255,0,0" # Red
#' simpler$SIMPLE <- sub("(Transcribed|Het_Rpt_Qui)", "Other", simpler$SIMPLE)
#' simpler[simpler$SIMPLE == "Other", "RGBSIMPLE"] <- "255,255,255" # White
#'
#' tmpdir <- tempdir()
#' setwd(tmpdir)
#' simplerHMM <- simplifyHMM(chr19_HMM, cols=simpler)
#' compressAndExportHMM(simplerHMM, name="chr19_HMM_simple",
#' filename=file.path(tmpdir, "chr19_HMM.simple.mm10.bed"))
#' list.files(tmpdir)
#'
#' @export
compressAndExportHMM <- function(HMM, name, filename=NULL) {

trackGenome <- unique(genome(HMM))
trackLine <- new("TrackLine", name=name)
if (is.null(filename)) filename <- paste(name, trackGenome, bed, sep=".")
export(aggregateStates(HMM), filename, index=TRUE)

}
Binary file added data/simpleY.rda
Binary file not shown.
19 changes: 19 additions & 0 deletions man/GenomicSegmentationList.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

52 changes: 52 additions & 0 deletions man/asRaggedExperiment.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

37 changes: 37 additions & 0 deletions man/compressAndExportHMM.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 665f193

Please sign in to comment.