diff --git a/NAMESPACE b/NAMESPACE index 28ac309..8a273de 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,7 @@ # Generated by roxygen2: do not edit by hand S3method(changeUniprot,CytoSignal) -S3method(changeUniprot,dgCMatrix) +S3method(changeUniprot,matrix_like) S3method(findNNDT,CytoSignal) S3method(findNNDT,matrix) S3method(findNNGauEB,CytoSignal) @@ -14,13 +14,19 @@ S3method(inferSignif,CytoSignal) S3method(inferSignif,dgCMatrix) S3method(normCounts,CytoSignal) S3method(normCounts,dgCMatrix) +S3method(veloNicheLR,CytoSignal) +S3method(veloNicheLR,matrix_like) export(addIntrDB) +export(addVelo) export(changeUniprot) export(createCytoSignal) export(findNNDT) export(findNNGauEB) export(findNNRaw) export(graphNicheLR) +export(hex_bin) +export(hex_coord) +export(hex_pos) export(imputeNiche) export(inferEpsParams) export(inferSignif) @@ -35,8 +41,16 @@ export(showImp) export(showLog) export(showScore) export(showUnpt) +export(showVelo) export(suggestInterval) +export(veloNicheLR) exportClasses(CytoSignal) exportClasses(ImpData) exportClasses(lrScores) +exportClasses(lrVelo) exportMethods(show) +importFrom(RTriangle,pslg) +importFrom(RTriangle,triangulate) +importFrom(Rcpp,evalCpp) +importFrom(plyr,count) +useDynLib(cytosignal) diff --git a/R/analysis.r b/R/analysis.r index 2071288..648e3ef 100644 --- a/R/analysis.r +++ b/R/analysis.r @@ -188,7 +188,7 @@ findNNDT <- function( #' #' @param cells.loc A matrix of cells location #' @param weight.sum The sum of the weights -#' +#' @importFrom RTriangle triangulate pslg #' @return A list of neighbors #' @export findNNDT.matrix <- function( @@ -559,6 +559,13 @@ normCounts.CytoSignal <- function( # return(object) # } + +# set a new class union containing dgCMatrix and matrix +setClassUnion( + "matrix_like", + c("dgCMatrix", "matrix") +) + #' Subset the imputed data (intr.data) by the specified intr index #' #' @param object A Cytosignal object @@ -584,7 +591,7 @@ changeUniprot <- function( #' @return A sparse matrix #' @export #' -changeUniprot.dgCMatrix <- function( +changeUniprot.matrix_like <- function( dge.raw, gene_to_uniprot, # mode = "unpt", @@ -1204,3 +1211,200 @@ runPears.std <- function( return(object) } + + +#' Compute the LR velo for specific ligand-receptor imputation obj pairs +#' +#' @param object A Cytosignal object +#' @param lig.slot The ligand slot to use +#' @param recep.slot The receptor slot to use +#' @param intr.db.name The intr database name to use +#' @param nn.use The neighbor index as niche +#' +#' @return A Cytosignal object +#' @export +#' +veloNicheLR <- function( + object, + ... +) { + UseMethod(generic = 'veloNicheLR', object = object) +} + +#' Sub function for veloNicheLR, input a sparse matrix +#' +#' @param dge.lig A sparse matrix for ligand +#' @param dge.recep A sparse matrix for receptor +#' @param nb.id.fac A factor of neighbor indices +#' @param lig.fac A factor of ligand indices +#' @param recep.fac A factor of receptor indices +#' +#' @return A sparse matrix +#' @export +#' +veloNicheLR.matrix_like <- function( + dge.lig, + dge.recep, + velo.mtx, + nb.id.fac, + lig.fac, + recep.fac +){ + + ### cavaet: remember to convert the Uniprot ids to indices! + # convert nb fac + nb.id.fac = sort(nb.id.fac) + nb.index = facToIndex(nb.id.fac) + + if (max(as.integer(names(lig.fac))) > nrow(dge.lig)){ + stop("Intr index out of dge bounds.") + } + + lig.index = facToIndex(lig.fac) + recep.index = facToIndex(recep.fac) + + # compute velos + res.mtx = VelographNicheLR_cpp( + unname(as.matrix(dge.lig)), + unname(as.matrix(dge.recep)), + unname(as.matrix(velo.mtx)), + lig.index[[1]], lig.index[[2]], + recep.index[[1]], recep.index[[2]], + nb.index[[1]], nb.index[[2]] + ) + + dimnames(res.mtx) = list(colnames(dge.lig)[as.integer(levels(nb.id.fac))], levels(lig.fac)) + # dimnames(res.mtx) = list(colnames(dge.lig), levels(lig.fac)) + # res.mtx = Matrix(res.mtx, sparse = T) + + return(res.mtx) +} + + + +#' Sub function for veloNicheLR, input a CytoSignal object +#' +#' @param object A Cytosignal object +#' @param lig.slot The ligand slot to use +#' @param recep.slot The receptor slot to use +#' @param intr.db.name The intr database name to use +#' @param nn.use slot that the neighbor index should be taken from, by default is the same as +#' the recep.slot. For example, if velo.obj = GauEps-DT, then nn.use = "DT". +#' nn.use could also be a user-defind factor. +#' +#' @return A Cytosignal object +#' @export +veloNicheLR.CytoSignal <- function( + object, + lig.slot, + recep.slot, + intr.db.name, + tag = NULL +){ + if (!lig.slot %in% names(object@imputation)){ + stop("Ligand slot not found.") + } + + if (!recep.slot %in% names(object@imputation)){ + stop("Receptor slot not found.") + } + + message("Computing velos using ", intr.db.name, " database.") + message("Ligand: ", lig.slot, ", Receptor: ", recep.slot, ".") + + if (!intr.db.name %in% c("diff_dep", "cont_dep")) { + stop("intr.db.name must be either 'diff_dep' or 'cont_dep'.") + } + + if (is.null(tag)) { + tag <- paste0(lig.slot, "-", recep.slot) + } + + if (tag %in% names(object@lrvelo)) { + stop("Tag already exists.") + } + + object@lrvelo[["default"]] <- tag + + # if (is.null(slot.use)) { + # slot.use <- object@lrscore[["default"]] + # } + + # if (!slot.use %in% names(object@lrscore)) { + # stop("lrvelo slot not found.") + # } + + # if (is.null(nn.use)) { + # nn.use <- recep.slot + # } + + # if (is.character(nn.use)) { + # if (!nn.use %in% names(object@imputation)){ + # stop("Imputation slot not found.") + # } + # nb.id.fac <- object@imputation[[nn.use]]@nn.id + # } else if (is.factor(nn.use)) { + # if (length(nn.use) != ncol(object@imputation[[lig.slot]]@intr.data)) + # stop("nn.use must have the same length as the number of cells.") + # nb.id.fac <- nn.use + # } else { + # stop("nn.use must be either a factor or a character.") + # } + + dge.lig <- object@imputation[[lig.slot]]@intr.data + dge.recep <- object@imputation[[recep.slot]]@intr.data + + if (!all.equal(dim(dge.lig), dim(dge.recep))){ + stop("dge.lig and dge.recep must have the same dimension.") + } + + if (!all.equal(object@intr.valid[["symbols"]][[lig.slot]], + object@intr.valid[["symbols"]][[recep.slot]])){ + stop("Unpt symbols generated from imputations not equal!") + } + + # velo and dge should have the same cells and genes + use.cells <- intersect(colnames(dge.lig), colnames(object@velo[["velo.s"]])) + use.genes <- intersect(rownames(dge.lig), rownames(object@velo[["velo.s"]])) + + # infer new neighbor index since the cells are filtered + new.cells.loc <- object@cells.loc[use.cells, ] + velo.nn.list <- findNNDT.matrix(new.cells.loc) + + # dge and velo should have the same cells and genes + dge.lig <- dge.lig[use.genes, use.cells] + dge.recep <- dge.recep[use.genes, use.cells] + velo.s <- object@velo[["velo.s"]][use.genes, use.cells] + velo.u <- object@velo[["velo.u"]][use.genes, use.cells] + + message("Number of velo cells: ", ncol(dge.lig), " / ", ncol(object@imputation[[lig.slot]]@intr.data)) + message("Number of velo genes: ", nrow(dge.lig), " / ", nrow(object@imputation[[lig.slot]]@intr.data)) + + intr.db.list <- checkIntr(use.genes, object@intr.valid[[intr.db.name]]) + + res.mtx <- veloNicheLR.matrix_like(dge.lig, dge.recep, + (velo.s + velo.u), velo.nn.list[["id"]], + intr.db.list[["ligands"]], intr.db.list[["receptors"]]) + + lrvelo.obj <- new( + "lrVelo", + lig.slot = lig.slot, + recep.slot = recep.slot, + intr.slot = intr.db.name, + intr.list = intr.db.list, + dim.valid = list( + "cells" = use.cells, + "genes" = use.genes), + intr.velo = res.mtx, + nn.id = velo.nn.list[["id"]], + nn.dist = velo.nn.list[["dist"]], + log = list( + "Used slot" = c(lig.slot, recep.slot) + ) + ) + + object@lrvelo[[tag]] <- lrvelo.obj + + return(object) +} + diff --git a/R/objects.r b/R/objects.r index 5594b8c..d1e3b00 100644 --- a/R/objects.r +++ b/R/objects.r @@ -25,7 +25,8 @@ #' @rdname CytoSignal-class #' @aliases CytoSignal-class #' @exportClass CytoSignal - +#' @importFrom Rcpp evalCpp +#' @useDynLib cytosignal CytoSignal <- setClass( Class = "CytoSignal", @@ -36,6 +37,7 @@ CytoSignal <- setClass( imputation = "list", lrscore = "list", velo = "list", + lrvelo = "list", intr.valid = "list", parameters = "list", log = "list", @@ -43,6 +45,7 @@ CytoSignal <- setClass( ) ) + #' The ImpData Class #' #' The ImpData object is created from one ST dataset. User could choose a preferred imputation method @@ -104,7 +107,6 @@ ImpData <- setClass( #' @aliases lrScores-class #' @exportClass lrScores - lrScores <- setClass( Class = "lrScores", slots = c( @@ -120,6 +122,48 @@ lrScores <- setClass( ) + +#' The lrvelo Class +#' +#' The lrvelo object is created from one ST dataset. User could choose two imputation methods to calculate +#' the ligand-receptor scores. The class stores the index of ligand, receptor, and interaction database, inferred +#' lrvelo for each interaction, and the log of each main steps. +#' +#' The key slots used in the lrVelo object are described below. +#' +#' @slot lig.slot ligand database +#' @slot recep.slot receptor database +#' @slot intr.slot interaction database +#' @slot intr.list list of interaction database +#' @slot velo.s A matrix of spliced velo for each gene +#' @slot velo.u A matrix of unspliced velo for each gene +#' @slot velo.intr A sparse matrix of velo for each intr +#' @slot nn.id A factor of nearest neighbor id. Re-do findNN since the order of cells may change! +#' @slot nn.dist A factor of nearest neighbor distance. Re-do findNN since the order of cells may change! +#' @slot log Log of each main steps +#' +#' @name lrVelo-class +#' @rdname lrVelo-class +#' @aliases lrVelo-class +#' @exportClass lrVelo + +lrVelo <- setClass( + Class = "lrVelo", + slots = c( + lig.slot = "character", + recep.slot = "character", + intr.slot = "character", + intr.list = "list", + dim.valid = "list", + intr.velo = "matrix_like", + nn.id = "factor", + nn.dist = "factor", + log = "list" + ) +) + + + #' show method for CytoSignal #' #' @param object CytoSignal object @@ -226,6 +270,35 @@ showScore <- function(object, slot.use = NULL){ } } + +#' show method for lrVelo +#' +#' @param object CytoSignal object +#' @param slot.use slot to use +#' +#' @export +showVelo <- function(object, slot.use = NULL) { + if (is.null(slot.use)){ + slot.use <- object@lrvelo[["default"]] + } + + cat( + "Default: Velo data using: \n", + slot.use, "\n", + "Number of intrs in the mtx:", ncol(object@lrvelo[[slot.use]]@intr.velo), "\n", + "Processing Log: \n" + ) + + print(object@lrvelo[[slot.use]]@log) + + if (ncol(object@lrvelo[[slot.use]]@intr.velo) > 0){ + print(object@lrvelo[[slot.use]]@intr.velo[1:5,1:5]) + } else { + cat("No velo data available.\n") + } +} + + #' show all current logs #' #' @param object CytoSignal object @@ -243,6 +316,11 @@ showLog <- function(object){ cat("Score Log: \n") print(object@lrscore[[2]]@log) + + if (length(object@lrscore) > 1){ + cat("Score Log: \n") + print(object@lrscore[[2]]@log) + } } #' show method for cytosignal obj @@ -465,3 +543,44 @@ suggestInterval <- function(object) { return(nn.dist.min) } + + +#' Add velocity data to CytoSignal object +#' +#' @param object CytoSignal object +#' @param velo.s matrix of spliced velo +#' @param velo.u matrix of unspliced velo +#' +#' @export +#' +addVelo <- function( + object, + velo.s, + velo.u +) { + if (!is.matrix(velo.s) | !is.matrix(velo.u)) { + stop("velo.s and velo.u must be matrix.") + } + + # check whether dimnames are the same + if (!all.equal(rownames(velo.s), rownames(velo.u))) { + stop("velo.s and velo.u must have the same rownames.") + } + + if (!all.equal(colnames(velo.s), colnames(velo.u))) { + stop("velo.s and velo.u must have the same colnames.") + } + + velo.s.intr = changeUniprot.matrix_like(velo.s, object@intr.valid[["gene_to_uniprot"]]) + velo.u.intr = changeUniprot.matrix_like(velo.u, object@intr.valid[["gene_to_uniprot"]]) + + object@velo <- list( + "velo.s" = velo.s.intr[[1]], + "velo.u" = velo.u.intr[[1]] + ) + + object@intr.valid[["symbols"]][["velo"]] = velo.s.intr[[2]] + + return(object) +} + diff --git a/R/plotting.r b/R/plotting.r index f655b7c..4697313 100644 --- a/R/plotting.r +++ b/R/plotting.r @@ -268,3 +268,187 @@ plotSignif <- function(object, num.plot, res_dir, plot.details = T, slot.use = N +#' Plot the lrvelo results +#' +#' + + +plotVelo <- function( + object, + num.plot, + plot_dir, + plot.fmt, + slot.use = NULL, + use.rank = NULL, + plot.clusters = TRUE, + colors.list = NULL, + z.scaler = 0.03, + use.cex = 0.1, + use.shape = 16, + use_xbins = 15, + use_ybins = 15, + arrow.line.width = 0.6, + arrow.width = 0.06, + width = 3, + height = 3, + use.phi = 45, + use.theta = -17 +) { + if (is.null(slot.use)){ + slot.use = object@lrvelo[["default"]] + } + + velo.obj <- object@lrvelo[[slot.use]] + + if (is.null(colors.list)){ + levels.use <- levels(object@clusters) + colors.list = as.character(paletteer::paletteer_d("ggsci::default_igv", + n = length(levels.use))) + names(colors.list) = levels.use + } + + if (length(colors.list) != length(levels(object@clusters))){ + stop("The length of colors.list is not equal to the length of levels(clusters).\n") + } + + col.fac = object@clusters + levels(col.fac) = colors.list + + if (is.null(use.rank)){ + use.rank <- object@lrscore[["default"]] + use.res.list <- object@lrscore[[use.rank]]@res.list[["result.hq.pear"]] + } + + if (use.rank %in% names(object@lrscore)){ + use.res.list <- object@lrscore[[use.rank]]@res.list[["result.hq.pear"]] + } else{ + stop("The slot is not in the object@lrscore slot.\n") + } + + # intrs that are in the lrscore@result.list + # velo may lack some genes so the intrs may be different + + velo.intr.index <- which(names(use.res.list) %in% colnames(velo.obj@intr.velo)) + + if (!plot.fmt %in% c("png", "pdf")){ + stop("The plot.fmt is not supported.\n") + } + + cat("Plotting the results...\n") + + plot.list <- lapply(num.plot, function(i){ + cat("No.", i, ", ", sep = "") + intr.rank <- velo.intr.index[i] + use.intr = names(use.res.list)[intr.rank] + + # get value for the ranked #1 intr + plot.df = as.data.frame(object@cells.loc[velo.obj@dim.valid$cells, ]) + plot.df$velo = velo.obj@intr.velo[rownames(plot.df), use.intr] + + pt.df = plot.df[, c(1,2)] + pt.df$z = 0 + + pt.df$col = as.character(col.fac[rownames(pt.df)]) + x.scale = max(pt.df$x) - min(pt.df$x) + y.scale = max(pt.df$y) - min(pt.df$y) + + #------------------------------------- plotting df for arrows -------------------------------------# + # weight -> cell counts in each bin; var4 -> average velo in each bin + bin = hex_bin(plot.df$x, plot.df$y, var4=plot.df$velo, var4.to.color = T, xbins = use_xbins, ybins = use_ybins) + + arrows.df = as.data.frame(bin[, c(1,2,3)]) + colnames(arrows.df) = c("x", "y", "bin_velo") + + ars.pos = arrows.df[arrows.df$bin_velo > 0, ] + ars.neg = arrows.df[arrows.df$bin_velo < 0, ] + ars.zero = arrows.df[arrows.df$bin_velo == 0, ] + # use.scale = max(abs(arrows.df$bin_velo)) + z.scale = max(arrows.df$bin_velo) - min(arrows.df$bin_velo) + + # scale the length of each arrow by the maximum abs(bin_velo) + # overall, scale by each intr + + z.hold = z.scale*z.scaler # set the interval between arrows and points + + if (nrow(ars.pos) > 0){ + ars.pos$length = abs(ars.pos$bin_velo)/z.scale + ars.pos.plot.df = data.frame( + x0 = ars.pos$x, y0 = ars.pos$y, z0 = z.hold, + x1 = ars.pos$x, y1 = ars.pos$y, z1 = ars.pos$length+z.hold + ) + } + + if (nrow(ars.neg) > 0){ + ars.neg$length = abs(ars.neg$bin_velo)/z.scale + ars.neg.plot.df = data.frame( + x0 = ars.neg$x, y0 = ars.neg$y, z0 = ars.neg$length+z.hold, + x1 = ars.neg$x, y1 = ars.neg$y, z1 = z.hold + ) + } + + if (nrow(ars.zero) > 0) { + ars.zero.plot.df = data.frame( + x0 = ars.zero$x, y0 = ars.zero$y, z0 = z.hold, + col = "#f7f7f7" + ) + } + + intr.name = getIntrNames(use.intr, object@intr.valid$intr.index) + + if (plot.fmt == "png") { + png(paste0(plot_dir, "/", "Rank_", intr.rank, "_", intr.name, "_3d.png"), width = width, height = height, units = "in", res = 400) + } else if (plot.fmt == "pdf") { + pdf(paste0(plot_dir, "/", "Rank_", intr.rank, "_", intr.name, "_3d.pdf"), width = width, height = height) + } else { + stop("Plotting format not supported.\n") + } + + # cex: control size of points + plot3D::points3D(pt.df$x, pt.df$y, pt.df$z, + xlim = c(min(pt.df$x) - x.scale*0.025, max(pt.df$x) + x.scale*0.025), + ylim = c(min(pt.df$y) - y.scale*0.025, max(pt.df$y) + y.scale*0.025), + zlim = c(-0.01, 1.1), expand = 0.3, + theta = use.theta, phi = use.phi, d = 2, + colvar = NULL, col = pt.df$col, + colkey = FALSE, pch = use.shape, cex = use.cex, + main = "CytoSignalVelo", zlab = "velocity", + xlab = "", ylab = "" + ) + + # plot points with velo = 0 to be grey points + if (nrow(ars.zero) > 0) { + ars.zero.plot.df = data.frame( + x0 = ars.zero$x, y0 = ars.zero$y, z0 = z.hold + ) + plot3D::points3D(ars.zero.plot.df$x0, ars.zero.plot.df$y0, ars.zero.plot.df$z0, + colvar = NULL, col = "#f7f7f7", + colkey = FALSE, pch = 19, cex = use.cex, add = T) + } + + ### parameters for plotting arrows + ### lwd: line width; length: arrow edge length; angle: arrow angle + + # positive arrows + if (nrow(ars.pos) > 0 ){ + plot3D::arrows3D( + x0 = ars.pos.plot.df$x0, y0 = ars.pos.plot.df$y0, z0 = ars.pos.plot.df$z0, + x1 = ars.pos.plot.df$x1, y1 = ars.pos.plot.df$y1, z1 = ars.pos.plot.df$z1, + colvar = NULL, col = "#d73027", lwd = arrow.line.width, length = arrow.width, + clab = intr.name, d = 3, add = T + ) + } + + # negative arrows + if (nrow(ars.neg) > 0){ + plot3D::arrows3D( + x0 = ars.neg.plot.df$x0, y0 = ars.neg.plot.df$y0, z0 = ars.neg.plot.df$z0, + x1 = ars.neg.plot.df$x1, y1 = ars.neg.plot.df$y1, z1 = ars.neg.plot.df$z1, + colvar = NULL, col = "#3c24f1", lwd = arrow.line.width, length = arrow.width, + clab = intr.name, d = 3, add = T + ) + } + + dev.off() + + }) +} diff --git a/R/plotting_utility.r b/R/plotting_utility.r new file mode 100644 index 0000000..191742d --- /dev/null +++ b/R/plotting_utility.r @@ -0,0 +1,305 @@ +#' GGPLOT2 FUNCTIONALITY FOR MAPPING TO HEXAGON SIZE AND COLOUR AESTHETICS +#' by Robin Edwards, 2013 (geotheory.co.uk, @geotheory) +#' This has been adapted from the ggplot bin_hex.R script that underpins geom_hex, etc +#' (see https://github.com/hadley/densityvis/blob/master/R/bin-hex.r). +#' +#' These functions implement aesthetic mapping to hexagon size (area), in addition to the existing +#' colour-mapping functionality. The key change is the addition of a new fourth variable (var4) +#' to hex_bin(), which complements the inbuilt hexagon binning functionality. The 'frequency.to.area' +#' argument enables the default mappings of binned data to colour and var4 to size to be interchanged. +#' The hmin/hmax arguments [0,1] set area mapping constraints (hmax can exceed 1). +#' xlim/xlat enable hexagon tesselation to be constrained independently of data range. +#' There may be some bugs in the implementation. A legend for hexagon size has not been implemented. + +# require(ggplot2) +# require(plyr) + +#' Bin data into hexagons (2d). +#' +#' @param x a numeric vector of x positions +#' @param y a numeric vector of y positions +#' @param weight \code{NULL} or a numeric vector providing weights for each +#' observation, replace `counts`, mapped to color +#' @param var4 \code{NULL} or a numeric vector providing weights for each +#' observation, averaged within each bin, mapped to hex bin size +#' @param height height of each hexagon, if \code{NULL} computed from ybins +#' @param width width of each hexagon, if \code{NULL} computed from ybins +#' @param xbins number of horizontal bins, if \code{width} unspecified +#' @param ybins number of vertical bins, if \code{height} unspecified +#' @param na.rm If \code{TRUE} missing values will be silently removed, +#' otherwise they will be removed with a warning. +#' @export +#' @seealso \code{\link{hex_pos}} for algorithm that finds hexagon center +#' closest to each point and \code{\link{hex_coord}} that generates +#' coordinates of each hexagon. +#' @return A data frame with columns \code{x}, \code{y} and \code{freq}, +#' and attributes \code{width} and \code{height}. + +#' @importFrom plyr count +#' +#' @examples +#' plot(hex_bin(runif(1e4), runif(1e4))) +#' plot(hex_bin(rnorm(1e4), rnorm(1e4))) +#' +#' data(baseball, package = "plyr") +#' bin <- hex_bin(baseball$g, baseball$ab) +hex_bin <- function(x, y, weight = NULL, var4 = NULL, width = NULL, height = NULL, + xbins = 20, ybins = 20, var4.to.color = FALSE, na.rm = FALSE, + hmin = 0, hmax = 1, xlim = NULL, ylim = NULL, ...) { + if(hmax > 1) warning("hmax > 1 is likely to result in some hexagon overplotting") + + cleaned <- clean_xy(x, y, weight, var4, xlim=xlim, ylim=ylim) + + if (is.null(xlim)) xlim <- c(min(cleaned$x), max(cleaned$x)) + if (is.null(ylim)) ylim <- c(min(cleaned$y), max(cleaned$y)) + if (is.null(width)) width <- diff(xlim) / xbins + if (is.null(height)) height <- diff(ylim) / ybins + height <- height * sqrt(3) + + pos <- hex_pos(cleaned$x, cleaned$y, width, height) + cleaned$x <- pos[,1]; cleaned$y <- pos[,2] + + # bin values by hexagon + binned <- plyr::count(cleaned, c("x", "y"), "weight") + + #### var4: another variable to map to size, and will be averaged within each hex bin #### + var4_sum <- aggregate(cleaned$var4, by=list(cleaned$x, cleaned$y), FUN=mean) + names(var4_sum) = c('x','y','var4') + # cols must match order of binned + binned$var4 <- var4_sum$var4[match(paste(binned$x, binned$y), paste(var4_sum$x, var4_sum$y))] + + # swap the mappings of weight/var4 from color/size to size/color + names(binned) <- c('x','y','col','size') + if(var4.to.color) binned <- transform(binned, size=col, col=size) + + # 'size' field now definitely maps to hex area + if(!is.null(var4) & min(binned$size)<0) warning("size vector cannot include negative values") + + # scale size variable and min-max parameters from hexagon area to side + binned$size = hex_side(binned$size) + hmax_a = hex_side(hmax)/hex_side(1) + hmin_a = hex_side(hmin)/hex_side(1) + hrange = hmax_a - hmin_a + + # normalise and rescale to custom min-max parameters + binned$size <- binned$size * hrange / max(binned$size) + hmin_a + + structure( + binned, + width = width, + height = height, + class = c("bin_hex", "data.frame") + ) +} + + + +clean_xy <- function(x, y, weight=NULL, var4=NULL, na.rm=TRUE, xlim=NULL, ylim=NULL) { + # If !na.rm, remove missing values with a warning. + # Otherwise just remove them + missing <- !is.finite(x) | !is.finite(y) + nmissing <- sum(missing) + + if (na.rm && nmissing > 0) { + warning("Removing ", nmissing, " missing values") + } + + # Check weights, and throw out missing values and zero-weight observations + if (is.null(weight)) { + weight <- rep.int(1, length(x)) + } else { + weight[is.na(weight)] <- 0 + } + + # Check sizes, and throw out missing values and zero-weight observations + if (is.null(var4)) { + var4 <- rep.int(1, length(x)) + } else { + var4[is.na(var4)] <- 0 + } + +# ok <- !missing & weight > 0 & var4 > 0 + ok <- !missing + + # if (all(ok)) { data.frame(x = x, y = y, weight = weight, var4 = var4) + # } else { + # x <- x[ok] + # y <- y[ok] + # var4 <- var4[ok] + # weight <- weight[ok] + # } + + if (!all(ok)){ + x <- x[ok] + y <- y[ok] + var4 <- var4[ok] + weight <- weight[ok] + } + + out <- data.frame(x = x, y = y, weight = weight, var4 = var4) + + if (!is.null(xlim)){ + out <- out[out$x >= min(xlim) & out$x <= max(xlim), ] + } + + if (!is.null(ylim)){ + out <- out[out$y >= min(ylim) & out$y <= max(ylim), ] + } + + return(out) +} + + +# plot.bin_hex <- function(x, ...) { +# if (!require("scales")) { +# message("Scales package required for plotting 2d densities") +# return() +# } + +# if(all(x$col == x$col[1])) { # no variation in colour variable +# col <- rep("black", nrow(x)) +# } else col <- cscale(x$col, seq_gradient_pal(low = "grey70", high = "red")) + +# hexes <- hex_coord(x=x$x, y=x$y, width=attr(x, "width"), height=attr(x, "height"), +# size=x$size, ...) + +# plot(hexes[,1], hexes[,2], type = "n", ...) +# polygon(hexes, col = col, border = NA) +# } + + +# Binning algorithms are available for various lattices in dimensions 2-8 +# (Conway and Sloane 1982). The following subroutine is a fast FORTRAN +# implementation of hexagonal binning. The key observation is that hexagon +# centers fall on two staggered lattices whose cells are rectangles. Presuming +# the long side of the rectangle is in the y direction, dividing the y +# coordinates by square root (3) [SQRT(3)] makes the cells square. Thus the +# algorithm uses two lattices with square cells. The first lattice has points +# at the integers with [0, 0] as the lower left value. The second lattice is +# shifted so that the lower left value is at [.5 , .5]. The x and y vectors +# are scaled into [0, SIZE] and [0, SIZE / SQRT(3)], respectively. SIZE +# determines the portions of the lattices that are used. For each data point, +# binning consists of finding one candidate lattice point from each lattice +# and then selecting the nearest of the two candidates. + +#' Find centre of closest hexagon. +#' +#' @param x numeric x position +#' @param y numeric y position +#' @param width of hexagon +#' @param height of hexagon +#' @return matrix giving position of closest hexagon center +#' @keywords internal +#' @export +#' @examples +#' x <- runif(1e4) +#' y <- runif(1e4) +#' res <- hex_pos(x, y, 0.5, 0.5) +#' plot(x, y, type = "n") +#' segments(x, y, res[, 1], res[, 2], col = "grey80") +#' points(unique(res), pch = 20, cex = 2) + + +hex_pos <- function(x, y, width, height) { + height <- height / sqrt(3) + + minx <- min(x, na.rm = TRUE) + miny <- min(y, na.rm = TRUE) + + # Scale to [0, nrows/ncols] + sx <- (x - minx) / width + sy <- (y - miny) / height + + # Find closest center: [0, 0] or [0.5, 0.5]? + fx <- round(sx) + fy <- round(sy) + + dist_0 <- 3 * (sx - fx)^2 + (sy - fy)^2 + dist_1 <- 3 * (sx - fx + 0.5)^2 + (sy - fy + 0.5)^2 + dist_2 <- 3 * (sx - fx + 0.5)^2 + (sy - fy - 0.5)^2 + dist_3 <- 3 * (sx - fx - 0.5)^2 + (sy - fy + 0.5)^2 + dist_4 <- 3 * (sx - fx - 0.5)^2 + (sy - fy - 0.5)^2 + dist_smallest <- pmin(dist_0, dist_1, dist_2, dist_3, dist_4) + + x_offset <- rep(0, length(x)) + x_offset[dist_smallest == dist_1] <- +0.5 + x_offset[dist_smallest == dist_2] <- +0.5 + x_offset[dist_smallest == dist_3] <- -0.5 + x_offset[dist_smallest == dist_4] <- -0.5 + + y_offset <- rep(0, length(y)) + y_offset[dist_smallest == dist_1] <- +0.5 + y_offset[dist_smallest == dist_2] <- -0.5 + y_offset[dist_smallest == dist_3] <- +0.5 + y_offset[dist_smallest == dist_4] <- -0.5 + + # Transform back to original coordinates + cbind(x = (fx - x_offset) * width + minx, y = (fy - y_offset) * height + miny) +} + +#' Generate hexagon coordinates. +#' +#' Long axis is horizontal. Edges clock-wise from far-left, separated by +#' row of missing values. +#' +#' @param x horizontal position of center +#' @param y vertical position of center +#' @param width hex width +#' @param height hex height +#' @export +#' @keywords internal +#' @return A two column matrix with 7 times as many rows as input. +#' @examples +#' x <- runif(1000) +#' y <- runif(1000) +#' res <- unique(hex_pos(x, y, 0.5, 0.5)) +#' hexes <- hex_coord(res[, 1], res[, 2], 0.6, 0.5) +#' +#' hexes <- hex_coord(res[, 1], res[, 2], rnorm(1000,.5,.3), rnorm(1000,.5,.3)) +#' +#' plot(hexes, type = "n") +#' polygon(hexes) +#' points(res) + + +hex_coord <- function(x, y, width, height, size = 1) { + dx <- size * width / 6 + dy <- size * height / 2 / sqrt(3) + + hex_x <- rbind(x - 2 * dx, x - dx, x + dx, x + 2 * dx, x + dx, x - dx, NA) + hex_y <- rbind(y, y + dy, y + dy, y, y - dy, y - dy, NA) + + cbind(as.vector(hex_x), as.vector(hex_y)) +} + + +hex_coord_df <- function(x, y, width, height, size = 1) { + # like hex_coord but returns a dataframe of vertices grouped by an id variable + dx <- size * width / 6 + dy <- size * height / 2 / sqrt(3) + + hex_x <- rbind(x - 2 * dx, x - dx, x + dx, x + 2 * dx, x + dx, x - dx) + hex_y <- rbind(y, y + dy, y + dy, y, y - dy, y - dy) + id <- rep(1:length(x), each=6) + + data.frame(cbind(x=as.vector(hex_x), y=as.vector(hex_y), id)) +} + + +## Functions for calculating hexagon geometries + +# hexagon side from area +hex_side = function(area) sqrt(2 * area / (sqrt(3)*3)) + +# hexagon area from side (not used) +hex_area = function(side) side^2 * sqrt(3) * 3/2 + +# quick function for gg-plotting with 1 line (limited functionality) +qplothex = function(x, y, var4 = NULL, f.to.a = FALSE, ...){ + bin = hex_bin(x=x, y=y, var4=var4, frequency.to.area=f.to.a, ...) + hexes = hex_coord_df(x=bin$x, y=bin$y, width=attr(bin, "width"), height=attr(bin, "height"), size=bin$size) + hexes$col = rep(bin$col, each=6) + ggplot(hexes, aes(x=x, y=y)) + geom_polygon(aes(fill=col, group=id)) +} + + diff --git a/R/utility.r b/R/utility.r index 84829ef..a334dc7 100644 --- a/R/utility.r +++ b/R/utility.r @@ -340,3 +340,26 @@ dataImpKNN <- function( } +getIntrNames <- function(res.intr.list, inter.index){ + intr.names = sapply(seq_along(res.intr.list), function(i){ + pair.index = which(inter.index$id_cp_interaction == res.intr.list[i]) + + ligand.name = inter.index[pair.index, 4] + if (ligand.name == ""){ # if complex + ligand.name = inter.index[pair.index, 2] + } + ligand.name = gsub("_HUMAN", "", ligand.name) + + receptor.name = inter.index[pair.index, 5] + if (receptor.name == ""){ + receptor.name = inter.index[pair.index, 3] + } + receptor.name = gsub("_HUMAN", "", receptor.name) + + # generate interaction names + # intr.name = paste0(gsub("_HUMAN", "", ligand.name), "-", gsub("_HUMAN", "", receptor.name)) + intr.name = paste0(ligand.name, "-", receptor.name) + }) + + return(intr.names) +} diff --git a/man/addVelo.Rd b/man/addVelo.Rd new file mode 100644 index 0000000..8ce6153 --- /dev/null +++ b/man/addVelo.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/objects.r +\name{addVelo} +\alias{addVelo} +\title{Add velocity data to CytoSignal object} +\usage{ +addVelo(object, velo.s, velo.u) +} +\arguments{ +\item{object}{CytoSignal object} + +\item{velo.s}{matrix of spliced velo} + +\item{velo.u}{matrix of unspliced velo} +} +\description{ +Add velocity data to CytoSignal object +} diff --git a/man/changeUniprot.dgCMatrix.Rd b/man/changeUniprot.matrix_like.Rd similarity index 74% rename from man/changeUniprot.dgCMatrix.Rd rename to man/changeUniprot.matrix_like.Rd index d0bfced..8cc85f1 100644 --- a/man/changeUniprot.dgCMatrix.Rd +++ b/man/changeUniprot.matrix_like.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/analysis.r -\name{changeUniprot.dgCMatrix} -\alias{changeUniprot.dgCMatrix} +\name{changeUniprot.matrix_like} +\alias{changeUniprot.matrix_like} \title{Sub function for changeUniprot, input a sparse matrix} \usage{ -\method{changeUniprot}{dgCMatrix}(dge.raw, gene_to_uniprot, verbose = T) +\method{changeUniprot}{matrix_like}(dge.raw, gene_to_uniprot, verbose = T) } \arguments{ \item{dge.raw}{A sparse matrix} diff --git a/man/hex_bin.Rd b/man/hex_bin.Rd new file mode 100644 index 0000000..fb9e4a5 --- /dev/null +++ b/man/hex_bin.Rd @@ -0,0 +1,75 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting_utility.r +\name{hex_bin} +\alias{hex_bin} +\title{GGPLOT2 FUNCTIONALITY FOR MAPPING TO HEXAGON SIZE AND COLOUR AESTHETICS +by Robin Edwards, 2013 (geotheory.co.uk, @geotheory) +This has been adapted from the ggplot bin_hex.R script that underpins geom_hex, etc +(see https://github.com/hadley/densityvis/blob/master/R/bin-hex.r).} +\usage{ +hex_bin( + x, + y, + weight = NULL, + var4 = NULL, + width = NULL, + height = NULL, + xbins = 20, + ybins = 20, + var4.to.color = FALSE, + na.rm = FALSE, + hmin = 0, + hmax = 1, + xlim = NULL, + ylim = NULL, + ... +) +} +\arguments{ +\item{x}{a numeric vector of x positions} + +\item{y}{a numeric vector of y positions} + +\item{weight}{\code{NULL} or a numeric vector providing weights for each +observation, replace \code{counts}, mapped to color} + +\item{var4}{\code{NULL} or a numeric vector providing weights for each +observation, averaged within each bin, mapped to hex bin size} + +\item{width}{width of each hexagon, if \code{NULL} computed from ybins} + +\item{height}{height of each hexagon, if \code{NULL} computed from ybins} + +\item{xbins}{number of horizontal bins, if \code{width} unspecified} + +\item{ybins}{number of vertical bins, if \code{height} unspecified} + +\item{na.rm}{If \code{TRUE} missing values will be silently removed, +otherwise they will be removed with a warning.} +} +\value{ +A data frame with columns \code{x}, \code{y} and \code{freq}, +and attributes \code{width} and \code{height}. +} +\description{ +These functions implement aesthetic mapping to hexagon size (area), in addition to the existing +colour-mapping functionality. The key change is the addition of a new fourth variable (var4) +to hex_bin(), which complements the inbuilt hexagon binning functionality. The 'frequency.to.area' +argument enables the default mappings of binned data to colour and var4 to size to be interchanged. +The hmin/hmax arguments \link{0,1} set area mapping constraints (hmax can exceed 1). +xlim/xlat enable hexagon tesselation to be constrained independently of data range. +There may be some bugs in the implementation. A legend for hexagon size has not been implemented. +Bin data into hexagons (2d). +} +\examples{ +plot(hex_bin(runif(1e4), runif(1e4))) +plot(hex_bin(rnorm(1e4), rnorm(1e4))) + +data(baseball, package = "plyr") +bin <- hex_bin(baseball$g, baseball$ab) +} +\seealso{ +\code{\link{hex_pos}} for algorithm that finds hexagon center +closest to each point and \code{\link{hex_coord}} that generates +coordinates of each hexagon. +} diff --git a/man/hex_coord.Rd b/man/hex_coord.Rd new file mode 100644 index 0000000..b82c88b --- /dev/null +++ b/man/hex_coord.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting_utility.r +\name{hex_coord} +\alias{hex_coord} +\title{Generate hexagon coordinates.} +\usage{ +hex_coord(x, y, width, height, size = 1) +} +\arguments{ +\item{x}{horizontal position of center} + +\item{y}{vertical position of center} + +\item{width}{hex width} + +\item{height}{hex height} +} +\value{ +A two column matrix with 7 times as many rows as input. +} +\description{ +Long axis is horizontal. Edges clock-wise from far-left, separated by +row of missing values. +} +\examples{ +x <- runif(1000) +y <- runif(1000) +res <- unique(hex_pos(x, y, 0.5, 0.5)) +hexes <- hex_coord(res[, 1], res[, 2], 0.6, 0.5) + +hexes <- hex_coord(res[, 1], res[, 2], rnorm(1000,.5,.3), rnorm(1000,.5,.3)) + +plot(hexes, type = "n") +polygon(hexes) +points(res) +} +\keyword{internal} diff --git a/man/hex_pos.Rd b/man/hex_pos.Rd new file mode 100644 index 0000000..181d30d --- /dev/null +++ b/man/hex_pos.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting_utility.r +\name{hex_pos} +\alias{hex_pos} +\title{Find centre of closest hexagon.} +\usage{ +hex_pos(x, y, width, height) +} +\arguments{ +\item{x}{numeric x position} + +\item{y}{numeric y position} + +\item{width}{of hexagon} + +\item{height}{of hexagon} +} +\value{ +matrix giving position of closest hexagon center +} +\description{ +Find centre of closest hexagon. +} +\examples{ +x <- runif(1e4) +y <- runif(1e4) +res <- hex_pos(x, y, 0.5, 0.5) +plot(x, y, type = "n") +segments(x, y, res[, 1], res[, 2], col = "grey80") +points(unique(res), pch = 20, cex = 2) +} +\keyword{internal} diff --git a/man/lrVelo-class.Rd b/man/lrVelo-class.Rd new file mode 100644 index 0000000..2ee5c0f --- /dev/null +++ b/man/lrVelo-class.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/objects.r +\docType{class} +\name{lrVelo-class} +\alias{lrVelo-class} +\alias{lrVelo} +\title{The lrvelo Class} +\description{ +The lrvelo object is created from one ST dataset. User could choose two imputation methods to calculate +the ligand-receptor scores. The class stores the index of ligand, receptor, and interaction database, inferred +lrvelo for each interaction, and the log of each main steps. +} +\details{ +The key slots used in the lrVelo object are described below. +} +\section{Slots}{ + +\describe{ +\item{\code{lig.slot}}{ligand database} + +\item{\code{recep.slot}}{receptor database} + +\item{\code{intr.slot}}{interaction database} + +\item{\code{intr.list}}{list of interaction database} + +\item{\code{velo.s}}{A matrix of spliced velo for each gene} + +\item{\code{velo.u}}{A matrix of unspliced velo for each gene} + +\item{\code{velo.intr}}{A sparse matrix of velo for each intr} + +\item{\code{nn.id}}{A factor of nearest neighbor id. Re-do findNN since the order of cells may change!} + +\item{\code{nn.dist}}{A factor of nearest neighbor distance. Re-do findNN since the order of cells may change!} + +\item{\code{log}}{Log of each main steps} +}} + diff --git a/man/plotVelo.Rd b/man/plotVelo.Rd new file mode 100644 index 0000000..12732bf --- /dev/null +++ b/man/plotVelo.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting.r +\name{plotVelo} +\alias{plotVelo} +\title{Plot the lrvelo results} +\usage{ +plotVelo( + object, + num.plot, + plot_dir, + plot.fmt, + slot.use = NULL, + use.rank = NULL, + plot.clusters = TRUE, + colors.list = NULL, + z.scaler = 0.03, + use.cex = 0.1, + use.shape = 16, + use_xbins = 15, + use_ybins = 15, + arrow.line.width = 0.6, + arrow.width = 0.06, + width = 3, + height = 3, + use.phi = 45, + use.theta = -17 +) +} +\description{ +Plot the lrvelo results +} diff --git a/man/showVelo.Rd b/man/showVelo.Rd new file mode 100644 index 0000000..df30e13 --- /dev/null +++ b/man/showVelo.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/objects.r +\name{showVelo} +\alias{showVelo} +\title{show method for lrVelo} +\usage{ +showVelo(object, slot.use = NULL) +} +\arguments{ +\item{object}{CytoSignal object} + +\item{slot.use}{slot to use} +} +\description{ +show method for lrVelo +} diff --git a/man/veloNicheLR.CytoSignal.Rd b/man/veloNicheLR.CytoSignal.Rd new file mode 100644 index 0000000..a519cff --- /dev/null +++ b/man/veloNicheLR.CytoSignal.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/analysis.r +\name{veloNicheLR.CytoSignal} +\alias{veloNicheLR.CytoSignal} +\title{Sub function for veloNicheLR, input a CytoSignal object} +\usage{ +\method{veloNicheLR}{CytoSignal}(object, lig.slot, recep.slot, intr.db.name, tag = NULL) +} +\arguments{ +\item{object}{A Cytosignal object} + +\item{lig.slot}{The ligand slot to use} + +\item{recep.slot}{The receptor slot to use} + +\item{intr.db.name}{The intr database name to use} + +\item{nn.use}{slot that the neighbor index should be taken from, by default is the same as +the recep.slot. For example, if velo.obj = GauEps-DT, then nn.use = "DT". +nn.use could also be a user-defind factor.} +} +\value{ +A Cytosignal object +} +\description{ +Sub function for veloNicheLR, input a CytoSignal object +} diff --git a/man/veloNicheLR.Rd b/man/veloNicheLR.Rd new file mode 100644 index 0000000..fe42519 --- /dev/null +++ b/man/veloNicheLR.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/analysis.r +\name{veloNicheLR} +\alias{veloNicheLR} +\title{Compute the LR velo for specific ligand-receptor imputation obj pairs} +\usage{ +veloNicheLR(object, ...) +} +\arguments{ +\item{object}{A Cytosignal object} + +\item{lig.slot}{The ligand slot to use} + +\item{recep.slot}{The receptor slot to use} + +\item{intr.db.name}{The intr database name to use} + +\item{nn.use}{The neighbor index as niche} +} +\value{ +A Cytosignal object +} +\description{ +Compute the LR velo for specific ligand-receptor imputation obj pairs +} diff --git a/man/veloNicheLR.matrix_like.Rd b/man/veloNicheLR.matrix_like.Rd new file mode 100644 index 0000000..0f80603 --- /dev/null +++ b/man/veloNicheLR.matrix_like.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/analysis.r +\name{veloNicheLR.matrix_like} +\alias{veloNicheLR.matrix_like} +\title{Sub function for veloNicheLR, input a sparse matrix} +\usage{ +\method{veloNicheLR}{matrix_like}(dge.lig, dge.recep, velo.mtx, nb.id.fac, lig.fac, recep.fac) +} +\arguments{ +\item{dge.lig}{A sparse matrix for ligand} + +\item{dge.recep}{A sparse matrix for receptor} + +\item{nb.id.fac}{A factor of neighbor indices} + +\item{lig.fac}{A factor of ligand indices} + +\item{recep.fac}{A factor of receptor indices} +} +\value{ +A sparse matrix +} +\description{ +Sub function for veloNicheLR, input a sparse matrix +} diff --git a/src/mat_exp.cpp b/src/mat_exp.cpp index d5767ad..fbafc49 100644 --- a/src/mat_exp.cpp +++ b/src/mat_exp.cpp @@ -1,8 +1,9 @@ -// [[Rcpp::depends(RcppArmadillo)]] - #include +// #include #include +// [[Rcpp::depends(RcppArmadillo)]] + using namespace std; using namespace Rcpp; using namespace arma; diff --git a/src/utils_velo.cpp b/src/utils_velo.cpp index 6b11bc5..93e0caf 100644 --- a/src/utils_velo.cpp +++ b/src/utils_velo.cpp @@ -1,5 +1,5 @@ #include -#include +// #include #include // #include // #include @@ -10,8 +10,6 @@ using namespace std; using namespace Rcpp; using namespace arma; - - // // [[Rcpp::export]] // arma::mat lrScore_cpp( // const arma::mat& dge,