diff --git a/.github/workflows/R_CMD_check.yaml b/.github/workflows/R_CMD_check.yaml new file mode 100644 index 000000000..56fe182c3 --- /dev/null +++ b/.github/workflows/R_CMD_check.yaml @@ -0,0 +1,30 @@ +on: + push: + branches: + - master + - develop + pull_request: + +jobs: + r-cmd-check: + + if: "!contains(github.event.head_commit.message, 'ci-skip')" + + name: R CMD check + container: + image: satijalab/seurat:develop + runs-on: self-hosted + + steps: + - uses: actions/checkout@v2 + + - name: Check + run: rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "warning", check_dir = "check") + shell: Rscript {0} + + - name: Upload check results + if: failure() + uses: actions/upload-artifact@master + with: + name: results + path: check diff --git a/DESCRIPTION b/DESCRIPTION index 746f644fc..d53ab9e81 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 3.2.0 -Date: 2020-07-15 +Version: 3.2.1 +Date: 2020-09-04 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Stuart T, Butler A, et al (2019) for more details. Authors@R: c( @@ -14,20 +14,19 @@ Authors@R: c( person(given = 'Patrick', family = 'Roelli', email = 'proelli@nygenome.org', role = 'ctb'), person(given = "Yuhan", family = "Hao", email = 'yhao@nygenome.org', role = 'ctb', comment = c(ORCID = '0000-0002-1810-0822')) ) -URL: http://www.satijalab.org/seurat, https://github.com/satijalab/seurat +URL: https://satijalab.org/seurat, https://github.com/satijalab/seurat BugReports: https://github.com/satijalab/seurat/issues Additional_repositories: https://mojaveazure.github.io/loomR Depends: - R (>= 3.4.0), + R (>= 3.6.0), methods, Imports: - ape, cluster, cowplot, fitdistrplus, future, future.apply, - ggplot2 (>= 3.0.0), + ggplot2 (>= 3.3.0), ggrepel, ggridges, graphics, @@ -43,6 +42,7 @@ Imports: lmtest, MASS, Matrix (>= 1.2-14), + matrixStats, miniUI, patchwork, pbapply, @@ -88,6 +88,7 @@ RoxygenNote: 7.1.1 Encoding: UTF-8 Suggests: loomR, + ape, testthat, hdf5r, S4Vectors, diff --git a/NAMESPACE b/NAMESPACE index 1cb4e1524..3f7a75a59 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,7 @@ S3method("DefaultAssay<-",Graph) S3method("DefaultAssay<-",Seurat) S3method("DefaultAssay<-",SpatialImage) S3method("Idents<-",Seurat) +S3method("Index<-",Neighbor) S3method("JS<-",DimReduc) S3method("JS<-",JackStrawData) S3method("Key<-",Assay) @@ -41,6 +42,7 @@ S3method(.DollarNames,SeuratCommand) S3method(AddMetaData,Assay) S3method(AddMetaData,Seurat) S3method(Cells,DimReduc) +S3method(Cells,Neighbor) S3method(Cells,STARmap) S3method(Cells,SlideSeq) S3method(Cells,SpatialImage) @@ -55,6 +57,7 @@ S3method(DefaultAssay,Graph) S3method(DefaultAssay,Seurat) S3method(DefaultAssay,SeuratCommand) S3method(DefaultAssay,SpatialImage) +S3method(Distances,Neighbor) S3method(Embeddings,DimReduc) S3method(Embeddings,Seurat) S3method(FindClusters,Seurat) @@ -87,6 +90,8 @@ S3method(GetTissueCoordinates,VisiumV1) S3method(HVFInfo,Assay) S3method(HVFInfo,Seurat) S3method(Idents,Seurat) +S3method(Index,Neighbor) +S3method(Indices,Neighbor) S3method(IsGlobal,DimReduc) S3method(IsGlobal,SpatialImage) S3method(IsGlobal,default) @@ -115,6 +120,7 @@ S3method(ReadH5AD,H5File) S3method(ReadH5AD,character) S3method(RenameCells,Assay) S3method(RenameCells,DimReduc) +S3method(RenameCells,Neighbor) S3method(RenameCells,STARmap) S3method(RenameCells,Seurat) S3method(RenameCells,SlideSeq) @@ -169,7 +175,9 @@ S3method(WhichCells,Seurat) S3method(WriteH5AD,Seurat) S3method(as.CellDataSet,Seurat) S3method(as.Graph,Matrix) +S3method(as.Graph,Neighbor) S3method(as.Graph,matrix) +S3method(as.Neighbor,Graph) S3method(as.Seurat,CellDataSet) S3method(as.Seurat,SingleCellExperiment) S3method(as.Seurat,loom) @@ -184,6 +192,7 @@ S3method(as.sparse,data.frame) S3method(as.sparse,matrix) S3method(dim,Assay) S3method(dim,DimReduc) +S3method(dim,Neighbor) S3method(dim,STARmap) S3method(dim,Seurat) S3method(dim,SlideSeq) @@ -209,6 +218,7 @@ S3method(subset,SpatialImage) S3method(subset,VisiumV1) export("DefaultAssay<-") export("Idents<-") +export("Index<-") export("JS<-") export("Key<-") export("Loadings<-") @@ -252,6 +262,7 @@ export(DietSeurat) export(DimHeatmap) export(DimPlot) export(DiscretePalette) +export(Distances) export(DoHeatmap) export(DotPlot) export(ElbowPlot) @@ -260,6 +271,7 @@ export(ExpMean) export(ExpSD) export(ExpVar) export(ExportToCellbrowser) +export(FastRowScale) export(FeatureLocator) export(FeaturePlot) export(FeatureScatter) @@ -294,6 +306,8 @@ export(ISpatialDimPlot) export(ISpatialFeaturePlot) export(Idents) export(Images) +export(Index) +export(Indices) export(IntegrateData) export(Intensity) export(IsGlobal) @@ -308,6 +322,7 @@ export(LabelPoints) export(LinkedDimPlot) export(LinkedFeaturePlot) export(Load10X_Spatial) +export(LoadAnnoyIndex) export(LoadSTARmap) export(Loadings) export(LocalStruct) @@ -320,6 +335,7 @@ export(MetaFeature) export(MinMax) export(Misc) export(MixingMetric) +export(Neighbors) export(NoAxes) export(NoGrid) export(NoLegend) @@ -367,6 +383,7 @@ export(RunUMAP) export(SCTransform) export(SVFInfo) export(SampleUMI) +export(SaveAnnoyIndex) export(ScaleData) export(ScaleFactors) export(ScoreJackStraw) @@ -392,6 +409,7 @@ export(TSNEPlot) export(Tool) export(TopCells) export(TopFeatures) +export(TopNeighbors) export(TransferData) export(UMAPPlot) export(UpdateSeuratObject) @@ -404,6 +422,7 @@ export(WhichCells) export(WhiteBackground) export(as.CellDataSet) export(as.Graph) +export(as.Neighbor) export(as.Seurat) export(as.SingleCellExperiment) export(as.loom) @@ -415,6 +434,7 @@ exportClasses(DimReduc) exportClasses(Graph) exportClasses(IntegrationData) exportClasses(JackStrawData) +exportClasses(Neighbor) exportClasses(Seurat) exportClasses(SeuratCommand) exportClasses(SpatialImage) @@ -442,11 +462,6 @@ importFrom(RcppAnnoy,AnnoyEuclidean) importFrom(RcppAnnoy,AnnoyHamming) importFrom(RcppAnnoy,AnnoyManhattan) importFrom(Rtsne,Rtsne) -importFrom(ape,Moran.I) -importFrom(ape,as.phylo) -importFrom(ape,drop.tip) -importFrom(ape,nodelabels) -importFrom(ape,plot.phylo) importFrom(cluster,clara) importFrom(cowplot,get_legend) importFrom(cowplot,plot_grid) @@ -485,6 +500,7 @@ importFrom(ggplot2,geom_line) importFrom(ggplot2,geom_point) importFrom(ggplot2,geom_polygon) importFrom(ggplot2,geom_raster) +importFrom(ggplot2,geom_rect) importFrom(ggplot2,geom_smooth) importFrom(ggplot2,geom_text) importFrom(ggplot2,geom_tile) @@ -579,6 +595,9 @@ importFrom(irlba,irlba) importFrom(jsonlite,fromJSON) importFrom(leiden,leiden) importFrom(lmtest,lrtest) +importFrom(matrixStats,rowMeans2) +importFrom(matrixStats,rowSds) +importFrom(matrixStats,rowSums2) importFrom(methods,"slot<-") importFrom(methods,.hasSlot) importFrom(methods,as) diff --git a/NEWS.md b/NEWS.md index 1495bc47f..4406d7625 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,32 @@ # News All notable changes to Seurat will be documented in this file. -The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) + +## [3.2.1] - 2020-09-04 +### Added +- Added support for nearest neighbor input and `return.model` parameter in `RunUMAP()` +- Enable named color vectors in `DoHeatmap()` +- Add `label.color` and `label.box` parameters to `DimPlot` +- Added `shuffle` and `seed` parameters to `DimPlot()` to help with overplotting +- Added new stacked violin plot functionality + +### Changes +- Allow setting `slot` parameter in `RunUMAP` +- Added support for FIt-SNE v1.2+ +- Fix for `Spatial*Plot` when running with interactive=TRUE +- Set max for number of items returned by `Top` and remove duplicate items when balanced=TRUE +- Fix logging bug when functions were run via `do.call()` +- Fix handling of weight.by.var parameter when approx=FALSE in `RunPCA()` +- Fix issue where feature names with dashes crashed `CellSelector` +- Fix issue where errors in subsetting were being swallowed +- Fix issue where labeling uncropped spatial plots was broken + +### Deprecated +- `CreateActivityMatrix` deprecated in favor of `Signac::GeneActivity` +- `ReadAlevin` and `ReadAlevinCsv` deprecated in favor of `SeuratWrappers::ReadAlevin` +- `ExportToCellbrowser` and `StopCellbrowser` deprecated in favor of `SeuratWrappers::ExportToCellbrowser` and `SeuratWrappers::StopCellbrowser` +- `ReadH5AD` and `WriteH5AD` deprecated in favor of h5Seurat/H5AD functionality found in SeuratDisk +- `as.loom` and `as.Seurat.loom` deprecated in favor of functionality found in SeuratDisk ## [3.2.0] - 2020-07-15 ### Added @@ -8,14 +34,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) object inheriting from the Assay class - Added ability to cluster idents and group features in `DotPlot` - Added ability to use RColorBrewer plaettes for split `DotPlots` -- Added visualization and analysis functionality for spatially resolved datasets (Visium, Slide-seq). +- Added visualization and analysis functionality for spatially resolved datasets (Visium, Slide-seq). ### Changes - Removed `add.iter` parameter from `RunTSNE` function - Fixed integer overflow error in the WilcoxDETest function - Minor visual fixes in `DoHeatmap` group bar + labels - Efficiency improvements in anchor scoring (`ScoreAnchors`) -- Fix bug in `FindClusters()` when the last node has no edges +- Fix bug in `FindClusters()` when the last node has no edges - Default to weighted = TRUE when constructing igraph objects in `RunLeiden`. Remove corresponding weights parameter from `FindClusters()`. - Fix handling of keys in `FeatureScatter()` - Change `CellSelector` to use Shiny gadgets instead of SDMTools @@ -25,7 +51,7 @@ object inheriting from the Assay class ## [3.1.5] - 2020-04-14 ### Added -- New `scale` parameter in `DotPlot` +- New `scale` parameter in `DotPlot` - New `keep.sparse parameter in `CreateGeneActivityMatrix` for a more memory efficient option - Added ability to store model learned by UMAP and project new data - New `strip.suffix` option in `Read10X`. **This changes the default behavior of `Read10X`**. @@ -46,7 +72,7 @@ object inheriting from the Assay class - Update `assay.used` slot for `DimReduc`s when Assay is renamed ## [3.1.4] - 2020-02-20 -### Changes +### Changes - Fixes to `DoHeatmap` to remain compatible with ggplot2 v3.3 - Adoption of `patchwork` framework to replace `CombinePlots` @@ -61,7 +87,7 @@ object inheriting from the Assay class - Fixes for keys with underscores - Fix issue with leiden option for `FindClusters` - Fix for data transfer when using sctransform -- SDMTools moved to Suggests as package is orphaned +- SDMTools moved to Suggests as package is orphaned ## [3.1.2] - 2019-12-11 ### Added @@ -110,7 +136,7 @@ object inheriting from the Assay class - Updates `ReadH5AD` to distinguish FVF methods - Fixes to UpdateSeuratObject for v2 objects - Sink all output from stdout to stderr -- Fix to scale.data cell ordering after subsetting +- Fix to scale.data cell ordering after subsetting - Enable `Assay` specification in `BuildClusterTree` - Fix `FeaturePlot` when using both `blend` and `split.by` - Fix to `WhichCells` when passing `cells` and `invert` @@ -181,7 +207,7 @@ object inheriting from the Assay class ### Changed - DiffusionMap dependency replaced with destiny to avoid archival -- Java dependency removed and functionality rewritten in Rcpp +- Java dependency removed and functionality rewritten in Rcpp - Speed and efficiency improvements for Rcpp code - More robust duplicate handling in CellCycleScoring diff --git a/R/RcppExports.R b/R/RcppExports.R index 572786cd2..9b9297dbf 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -17,12 +17,12 @@ RowMergeMatrices <- function(mat1, mat2, mat1_rownames, mat2_rownames, all_rowna .Call('_Seurat_RowMergeMatrices', PACKAGE = 'Seurat', mat1, mat2, mat1_rownames, mat2_rownames, all_rownames) } -LogNorm <- function(data, scale_factor, display_progress = TRUE) { - .Call('_Seurat_LogNorm', PACKAGE = 'Seurat', data, scale_factor, display_progress) +RowMergeMatricesList <- function(mat_list, mat_rownames, all_rownames) { + .Call('_Seurat_RowMergeMatricesList', PACKAGE = 'Seurat', mat_list, mat_rownames, all_rownames) } -FastRowScale <- function(mat, scale = TRUE, center = TRUE, scale_max = 10, display_progress = TRUE) { - .Call('_Seurat_FastRowScale', PACKAGE = 'Seurat', mat, scale, center, scale_max, display_progress) +LogNorm <- function(data, scale_factor, display_progress = TRUE) { + .Call('_Seurat_LogNorm', PACKAGE = 'Seurat', data, scale_factor, display_progress) } Standardize <- function(mat, display_progress = TRUE) { @@ -77,6 +77,10 @@ ReplaceColsC <- function(mat, col_idx, replacement) { .Call('_Seurat_ReplaceColsC', PACKAGE = 'Seurat', mat, col_idx, replacement) } +GraphToNeighborHelper <- function(mat) { + .Call('_Seurat_GraphToNeighborHelper', PACKAGE = 'Seurat', mat) +} + FindWeightsC <- function(integration_matrix, cells2, distances, anchor_cells2, integration_matrix_rownames, cell_index, anchor_score, min_dist, sd, display_progress) { .Call('_Seurat_FindWeightsC', PACKAGE = 'Seurat', integration_matrix, cells2, distances, anchor_cells2, integration_matrix_rownames, cell_index, anchor_score, min_dist, sd, display_progress) } diff --git a/R/clustering.R b/R/clustering.R index b1d0aed12..ca85cf50f 100644 --- a/R/clustering.R +++ b/R/clustering.R @@ -281,7 +281,7 @@ FindNeighbors.default <- function( searchtype = "standard", eps = nn.eps, metric = annoy.metric) - nn.ranked <- nn.ranked$nn.idx + nn.ranked <- Indices(object = nn.ranked) } else { if (verbose) { message("Building SNN based on a provided distance matrix") @@ -384,8 +384,10 @@ FindNeighbors.dist <- function( )) } -#' @param assay Assay to use in construction of SNN -#' @param features Features to use as input for building the SNN +#' @param assay Assay to use in construction of SNN; used only when \code{dims} +#' is \code{NULL} +#' @param features Features to use as input for building the SNN; used only when +#' \code{dims} is \code{NULL} #' @param reduction Reduction to use as input for building the SNN #' @param dims Dimensions of reduction to use as input #' @param do.plot Plot SNN graph on tSNE coordinates @@ -418,7 +420,6 @@ FindNeighbors.Seurat <- function( ) { CheckDots(...) if (!is.null(x = dims)) { - # assay <- assay %||% DefaultAssay(object = object) assay <- DefaultAssay(object = object[[reduction]]) data.use <- Embeddings(object = object[[reduction]]) if (max(dims) > ncol(x = data.use)) { @@ -514,6 +515,8 @@ AnnoyNN <- function(data, query = data, metric = "euclidean", n.trees = 50, k, k = k, search.k = search.k, include.distance = include.distance) + nn$idx <- idx + nn$alg.info <- list(metric = metric, ndim = ncol(x = data)) return(nn) } @@ -633,12 +636,13 @@ GroupSingletons <- function(ids, SNN, group.singletons = TRUE, verbose = TRUE) { # @param query Data to query against data # @param k Number of nearest neighbors to compute # @param method Nearest neighbor method to use: "rann", "annoy" +# @param cache.index Store algorithm index with results for reuse # @param ... additional parameters to specific neighbor finding method # -NNHelper <- function(data, query = data, k, method, ...) { +NNHelper <- function(data, query = data, k, method, cache.index = FALSE, ...) { args <- as.list(x = sys.frame(which = sys.nframe())) args <- c(args, list(...)) - return( + results <- ( switch( EXPR = method, "rann" = { @@ -652,6 +656,16 @@ NNHelper <- function(data, query = data, k, method, ...) { stop("Invalid method. Please choose one of 'rann', 'annoy'") ) ) + n.ob <- Neighbor( + nn.idx = results$nn.idx, + nn.dist = results$nn.dists, + alg.info = results$alg.info %||% list(), + cell.names = rownames(x = query) + ) + if (isTRUE(x = cache.index) && !is.null(x = results$idx)) { + slot(object = n.ob, name = "alg.idx") <- results$idx + } + return(n.ob) } # Run Leiden clustering algorithm @@ -724,7 +738,7 @@ RunLeiden <- function( object } else { stop( - "Method for Leiden not found for class", class(x = object), + "Method for Leiden not found for class", class(x = object), call. = FALSE ) } diff --git a/R/data.R b/R/data.R index 2a80e6d85..be7eba463 100644 --- a/R/data.R +++ b/R/data.R @@ -7,7 +7,7 @@ #' \item{s.genes}{Genes associated with S-phase} #' \item{g2m.genes}{Genes associated with G2M-phase} #' } -#' @source \url{http://science.sciencemag.org/content/352/6282/189} +#' @source \url{https://science.sciencemag.org/content/352/6282/189} #' "cc.genes" @@ -39,7 +39,7 @@ #' \item{s.genes}{Genes associated with S-phase} #' \item{g2m.genes}{Genes associated with G2M-phase} #' } -#' @source \url{http://science.sciencemag.org/content/352/6282/189} +#' @source \url{https://science.sciencemag.org/content/352/6282/189} #' #' @seealso \code{\link{cc.genes}} #' diff --git a/R/differential_expression.R b/R/differential_expression.R index 5333fbfd8..e3949568a 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -23,7 +23,6 @@ globalVariables( #' @return Matrix containing a ranked list of putative markers, and associated #' statistics (p-values, ROC score, etc.) #' -#' @importFrom ape drop.tip #' @importFrom stats setNames #' #' @export @@ -73,6 +72,9 @@ FindAllMarkers <- function( if (is.null(x = node)) { idents.all <- sort(x = unique(x = Idents(object = object))) } else { + if (!PackageCheck('ape', error = FALSE)) { + stop(cluster.ape, call. = FALSE) + } tree <- Tool(object = object, slot = 'BuildClusterTree') if (is.null(x = tree)) { stop("Please run 'BuildClusterTree' before finding markers on nodes") @@ -90,7 +92,7 @@ FindAllMarkers <- function( node, as.numeric(x = setdiff(x = descendants, y = keep.children)) ) - tree <- drop.tip(phy = tree, tip = drop.children) + tree <- ape::drop.tip(phy = tree, tip = drop.children) new.nodes <- unique(x = tree$edge[, 1, drop = TRUE]) idents.all <- (tree$Nnode + 2):max(tree$edge) } diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index 66480d135..3566347b0 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -749,7 +749,7 @@ RunLSI.Seurat <- function( verbose = TRUE, ... ) { - assay <- assay %||% DefaultAssay(object) + assay <- assay %||% DefaultAssay(object = object) assay.data <- GetAssay(object = object, assay = assay) reduction.data <- RunLSI( object = assay.data, @@ -836,9 +836,9 @@ RunPCA.default <- function( feature.loadings <- pca.results$rotation sdev <- pca.results$sdev if (weight.by.var) { - cell.embeddings <- pca.results$x %*% diag(pca.results$sdev[1:npcs]^2) - } else { cell.embeddings <- pca.results$x + } else { + cell.embeddings <- pca.results$x / (pca.results$sdev[1:npcs] * sqrt(x = ncol(x = object) - 1)) } } } @@ -989,6 +989,7 @@ RunTSNE.matrix <- function( 'Rtsne' = Rtsne( X = object, dims = dim.embed, + pca = FALSE, ... # PCA/is_distance )$Y, 'FIt-SNE' = fftRtsne(X = object, dims = dim.embed, rand_seed = seed.use, ...), @@ -1099,7 +1100,6 @@ RunTSNE.Seurat <- function( tsne.method = tsne.method, dim.embed = dim.embed, reduction.key = reduction.key, - pca = FALSE, ... ) } else if (!is.null(x = features)) { @@ -1110,7 +1110,6 @@ RunTSNE.Seurat <- function( tsne.method = tsne.method, dim.embed = dim.embed, reduction.key = reduction.key, - pca = FALSE, ... ) } else { @@ -1131,8 +1130,10 @@ RunTSNE.Seurat <- function( #' RunUMAP.default <- function( object, - reduction.model = NULL, + reduction.key = 'UMAP_', assay = NULL, + reduction.model = NULL, + return.model = FALSE, umap.method = 'uwot', n.neighbors = 30L, n.components = 2L, @@ -1151,7 +1152,6 @@ RunUMAP.default <- function( seed.use = 42, metric.kwds = NULL, angular.rp.forest = FALSE, - reduction.key = 'UMAP_', verbose = TRUE, ... ) { @@ -1169,6 +1169,26 @@ RunUMAP.default <- function( ) options(Seurat.warn.umap.uwot = FALSE) } + if (umap.method == 'uwot-learn') { + warning("'uwot-learn' is deprecated. Set umap.method = 'uwot' and return.model = TRUE") + umap.method <- "uwot" + return.model <- TRUE + } + if (return.model) { + if (verbose) { + message("UMAP will return its model") + } + umap.method = "uwot" + } + if (is.list(x = object)) { + names(x = object) <- c("idx", "dist") + } + if (!is.null(x = reduction.model)) { + if (verbose) { + message("Running UMAP projection") + } + umap.method <- "uwot-predict" + } umap.output <- switch( EXPR = umap.method, 'umap-learn' = { @@ -1211,55 +1231,49 @@ RunUMAP.default <- function( ) metric <- 'cosine' } - umap( - X = object, - n_threads = nbrOfWorkers(), - n_neighbors = as.integer(x = n.neighbors), - n_components = as.integer(x = n.components), - metric = metric, - n_epochs = n.epochs, - learning_rate = learning.rate, - min_dist = min.dist, - spread = spread, - set_op_mix_ratio = set.op.mix.ratio, - local_connectivity = local.connectivity, - repulsion_strength = repulsion.strength, - negative_sample_rate = negative.sample.rate, - a = a, - b = b, - fast_sgd = uwot.sgd, - verbose = verbose - ) - }, - 'uwot-learn' = { - if (metric == 'correlation') { - warning( - "UWOT does not implement the correlation metric, using cosine instead", - call. = FALSE, - immediate. = TRUE + if (is.list(x = object)) { + umap( + X = NULL, + nn_method = object, + n_threads = nbrOfWorkers(), + n_components = as.integer(x = n.components), + metric = metric, + n_epochs = n.epochs, + learning_rate = learning.rate, + min_dist = min.dist, + spread = spread, + set_op_mix_ratio = set.op.mix.ratio, + local_connectivity = local.connectivity, + repulsion_strength = repulsion.strength, + negative_sample_rate = negative.sample.rate, + a = a, + b = b, + fast_sgd = uwot.sgd, + verbose = verbose, + ret_model = return.model + ) + } else { + umap( + X = object, + n_threads = nbrOfWorkers(), + n_neighbors = as.integer(x = n.neighbors), + n_components = as.integer(x = n.components), + metric = metric, + n_epochs = n.epochs, + learning_rate = learning.rate, + min_dist = min.dist, + spread = spread, + set_op_mix_ratio = set.op.mix.ratio, + local_connectivity = local.connectivity, + repulsion_strength = repulsion.strength, + negative_sample_rate = negative.sample.rate, + a = a, + b = b, + fast_sgd = uwot.sgd, + verbose = verbose, + ret_model = return.model ) - metric <- 'cosine' } - umap( - X = object, - n_threads = nbrOfWorkers(), - n_neighbors = as.integer(x = n.neighbors), - n_components = as.integer(x = n.components), - metric = metric, - n_epochs = n.epochs, - learning_rate = learning.rate, - min_dist = min.dist, - spread = spread, - set_op_mix_ratio = set.op.mix.ratio, - local_connectivity = local.connectivity, - repulsion_strength = repulsion.strength, - negative_sample_rate = negative.sample.rate, - a = a, - b = b, - fast_sgd = uwot.sgd, - verbose = verbose, - ret_model = TRUE - ) }, 'uwot-predict' = { if (metric == 'correlation') { @@ -1272,11 +1286,11 @@ RunUMAP.default <- function( } if (is.null(x = reduction.model) || !inherits(x = reduction.model, what = 'DimReduc')) { stop( - "If using uwot-predict, please pass a DimReduc object with the model stored to reduction.model.", + "If running projection UMAP, please pass a DimReduc object with the model stored to reduction.model.", call. = FALSE ) } - model <- reduction.model %||% Misc( + model <- Misc( object = reduction.model, slot = "model" ) @@ -1286,23 +1300,41 @@ RunUMAP.default <- function( call. = FALSE ) } - umap_transform( - X = object, - model = model, - n_threads = nbrOfWorkers(), - n_epochs = n.epochs, - verbose = verbose - ) + if (is.list(x = object)) { + if (packageVersion(pkg = "uwot") <= '0.1.8.9000') { + stop("This uwot functionality requires uwot version >= 0.1.8.9000", + "Installing the latest version from github can be done with", + "remotes::install_github('jlmelville/uwot')") + } + uwot::umap_transform( + X = NULL, + nn_method = object, + model = model, + n_threads = nbrOfWorkers(), + n_epochs = n.epochs, + verbose = verbose + ) + } else { + umap_transform( + X = object, + model = model, + n_threads = nbrOfWorkers(), + n_epochs = n.epochs, + verbose = verbose + ) + } }, stop("Unknown umap method: ", umap.method, call. = FALSE) ) - if (umap.method == 'uwot-learn') { + if (return.model) { umap.model <- umap.output umap.output <- umap.output$embedding } colnames(x = umap.output) <- paste0(reduction.key, 1:ncol(x = umap.output)) if (inherits(x = object, what = 'dist')) { rownames(x = umap.output) <- attr(x = object, "Labels") + } else if (is.list(x = object)) { + rownames(x = umap.output) <- rownames(x = object$idx) } else { rownames(x = umap.output) <- rownames(x = object) } @@ -1312,7 +1344,7 @@ RunUMAP.default <- function( assay = assay, global = TRUE ) - if (umap.method == 'uwot-learn') { + if (return.model) { Misc(umap.reduction, slot = "model") <- umap.model } return(umap.reduction) @@ -1417,6 +1449,8 @@ RunUMAP.Graph <- function( #' @param graph Name of graph on which to run UMAP #' @param assay Assay to pull data for when using \code{features}, or assay used to construct Graph #' if running UMAP on a Graph +#' @param nn.name Name of knn output on which to run UMAP +#' @param slot The slot used to pull data for when using \code{features}. data slot is by default. #' @param umap.method UMAP implementation to run. Can be #' \describe{ #' \item{\code{uwot}:}{Runs umap via the uwot R package} @@ -1471,6 +1505,7 @@ RunUMAP.Graph <- function( #' @param reduction.name Name to store dimensional reduction under in the Seurat object #' @param reduction.key dimensional reduction key, specifies the string before #' the number for the dimension names. UMAP by default +#' @param return.model whether UMAP will return the uwot model #' @param seed.use Set a random seed. By default, sets the seed to 42. Setting #' NULL will not set a seed #' @param verbose Controls verbosity @@ -1481,13 +1516,16 @@ RunUMAP.Graph <- function( #' RunUMAP.Seurat <- function( object, - reduction.model = NULL, dims = NULL, reduction = 'pca', features = NULL, graph = NULL, - assay = 'RNA', + assay = DefaultAssay(object = object), + nn.name = NULL, + slot = 'data', umap.method = 'uwot', + reduction.model = NULL, + return.model = FALSE, n.neighbors = 30L, n.components = 2L, metric = 'cosine', @@ -1515,7 +1553,7 @@ RunUMAP.Seurat <- function( stop("Please specify only one of the following arguments: dims, features, or graph") } if (!is.null(x = features)) { - data.use <- as.matrix(x = t(x = GetAssayData(object = object, slot = 'data', assay = assay)[features, , drop = FALSE])) + data.use <- as.matrix(x = t(x = GetAssayData(object = object, slot = slot, assay = assay)[features, , drop = FALSE])) if (ncol(x = data.use) < n.components) { stop( "Please provide as many or more features than n.components: ", @@ -1539,6 +1577,8 @@ RunUMAP.Seurat <- function( call. = FALSE ) } + } else if (!is.null( x = nn.name)){ + data.use <- object[[nn.name]] } else if (!is.null(x = graph)) { data.use <- object[[graph]] } else { @@ -1547,6 +1587,7 @@ RunUMAP.Seurat <- function( object[[reduction.name]] <- RunUMAP( object = data.use, reduction.model = reduction.model, + return.model = return.model, assay = assay, umap.method = umap.method, n.neighbors = n.neighbors, @@ -1734,8 +1775,8 @@ EmpiricalP <- function(x, nullval) { # FIt-SNE helper function for calling fast_tsne from R # -# Based on Kluger Lab FIt-SNE v1.1.0 code on https://github.com/KlugerLab/FIt-SNE/blob/master/fast_tsne.R -# commit d2cf403 on Feb 8, 2019 +# Based on Kluger Lab FIt-SNE v1.2.1 code on https://github.com/KlugerLab/FIt-SNE/blob/master/fast_tsne.R +# commit 601608ed42e4be2765970910927da20f0b0bf9b9 on June 25, 2020 # #' @importFrom utils file_test # @@ -1743,19 +1784,18 @@ fftRtsne <- function(X, dims = 2, perplexity = 30, theta = 0.5, - check_duplicates = TRUE, - max_iter = 1000, + max_iter = 750, fft_not_bh = TRUE, ann_not_vptree = TRUE, stop_early_exag_iter = 250, exaggeration_factor = 12.0, no_momentum_during_exag = FALSE, - start_late_exag_iter = -1.0, + start_late_exag_iter = -1, late_exag_coeff = 1.0, mom_switch_iter = 250, momentum = 0.5, final_momentum = 0.8, - learning_rate = 200, + learning_rate = 'auto', n_trees = 50, search_k = -1, rand_seed = -1, @@ -1764,7 +1804,8 @@ fftRtsne <- function(X, min_num_intervals = 50, K = -1, sigma = -30, - initialization = NULL, + initialization = 'pca', + max_step_norm = 5, data_path = NULL, result_path = NULL, load_affinities = NULL, @@ -1799,19 +1840,19 @@ fftRtsne <- function(X, } # check fast_tsne version ft.out <- suppressWarnings(expr = system2(command = fast_tsne_path, stdout = TRUE)) - if (grepl(pattern = '= t-SNE v1.1', x = ft.out[1])) { - version_number <- '1.1.0' - } else if (grepl(pattern = '= t-SNE v1.0', x = ft.out[1])) { - version_number <- '1.0' - } else { + version_number <- regmatches(ft.out[1], regexpr('= t-SNE v[0-9.]+', ft.out[1])) + if (is.null(version_number)){ message("First line of fast_tsne output is") message(ft.out[1]) - stop("Our FIt-SNE wrapper requires FIt-SNE v1.X.X, please install the appropriate version from github.com/KlugerLab/FIt-SNE and have fast_tsne_path point to it if it's not in your path") + stop("Our FIt-SNE wrapper requires FIt-SNE v1.0+, please install the appropriate version from github.com/KlugerLab/FIt-SNE and have fast_tsne_path point to it if it's not in your path") + } else { + version_number <- gsub('= t-SNE v', '', version_number) } + is.wholenumber <- function(x, tol = .Machine$double.eps ^ 0.5) { return(abs(x = x - round(x = x)) < tol) } - if (version_number == '1.0' && df != 1.0) { + if (version_number == '1.0.0' && df != 1.0) { stop("This version of FIt-SNE does not support df!=1. Please install the appropriate version from github.com/KlugerLab/FIt-SNE") } if (!is.numeric(x = theta) || (theta < 0.0) || (theta > 1.0) ) { @@ -1832,6 +1873,9 @@ fftRtsne <- function(X, if (!is.numeric(x = exaggeration_factor)) { stop("exaggeration_factor should be numeric") } + if (!is.numeric(df)) { + stop("df should be numeric") + } if (!is.wholenumber(x = dims) || dims <= 0) { stop("Incorrect dimensionality.") } @@ -1841,10 +1885,52 @@ fftRtsne <- function(X, } else if (perplexity == 0) { search_k <- n_trees * max(perplexity_list) * 3 } else { - search_k <- n_trees * K * 3 + search_k <- n_trees * K + } + } + + if (is.character(learning_rate) && learning_rate =='auto') { + learning_rate = max(200, nrow(X)/exaggeration_factor) + } + if (is.character(start_late_exag_iter) && start_late_exag_iter =='auto') { + if (late_exag_coeff > 0) { + start_late_exag_iter = stop_early_exag_iter + } else { + start_late_exag_iter = -1 } } + + if (is.character(initialization) && initialization =='pca') { + if (rand_seed != -1) { + set.seed(rand_seed) + } + if (requireNamespace("rsvd")) { + message('Using rsvd() to compute the top PCs for initialization.') + X_c <- scale(X, center=T, scale=F) + rsvd_out <- rsvd(X_c, k=dims) + X_top_pcs <- rsvd_out$u %*% diag(rsvd_out$d, nrow=dims) + } else if(requireNamespace("irlba")) { + message('Using irlba() to compute the top PCs for initialization.') + X_colmeans <- colMeans(X) + irlba_out <- irlba(X,nv=dims, center=X_colmeans) + X_top_pcs <- irlba_out$u %*% diag(irlba_out$d, nrow=dims) + }else{ + stop("By default, FIt-SNE initializes the embedding with the + top PCs. We use either rsvd or irlba for fast computation. + To use this functionality, please install the rsvd package + with install.packages('rsvd') or the irlba package with + install.packages('ilrba'). Otherwise, set initialization + to NULL for random initialization, or any N by dims matrix + for custom initialization.") + } + initialization <- 0.0001*(X_top_pcs/sd(X_top_pcs[,1])) + + } else if (is.character(initialization) && initialization == 'random'){ + message('Random initialization') + initialization = NULL + } nbody_algo <- ifelse(test = fft_not_bh, yes = 2, no = 1) + if (is.null(load_affinities)) { load_affinities <- 0 } else { @@ -1856,18 +1942,23 @@ fftRtsne <- function(X, load_affinities <- 0 } } + knn_algo <- ifelse(test = ann_not_vptree, yes = 1, no = 2) + tX <- as.numeric(t(X)) + f <- file(description = data_path, open = "wb") n = nrow(x = X) D = ncol(x = X) writeBin(object = as.integer(x = n), con = f, size = 4) writeBin(object = as.integer(x = D), con = f, size = 4) - writeBin(object = as.numeric(x = theta), con = f, size = 8) #theta - writeBin(object = as.numeric(x = perplexity), con = f, size = 8) #theta + writeBin(object = as.numeric(x = theta), con = f, size = 8) + writeBin(object = as.numeric(x = perplexity), con = f, size = 8) + if (perplexity == 0) { writeBin(object = as.integer(x = length(x = perplexity_list)), con = f, size = 4) writeBin(object = perplexity_list, con = f) } + writeBin(object = as.integer(x = dims), con = f, size = 4) #theta writeBin(object = as.integer(x = max_iter), con = f, size = 4) writeBin(object = as.integer(x = stop_early_exag_iter), con = f, size = 4) @@ -1875,6 +1966,9 @@ fftRtsne <- function(X, writeBin(object = as.numeric(x = momentum), con = f, size = 8) writeBin(object = as.numeric(x = final_momentum), con = f, size = 8) writeBin(object = as.numeric(x = learning_rate), con = f, size = 8) + if (!(version_number %in% c('1.1.0', '1.0.0'))) { + writeBin(object = as.numeric(x = max_step_norm), f, size = 8) + } writeBin(object = as.integer(x = K), con = f, size = 4) #K writeBin(object = as.numeric(x = sigma), con = f, size = 8) #sigma writeBin(object = as.integer(x = nbody_algo), con = f, size = 4) #not barnes hut @@ -1888,10 +1982,9 @@ fftRtsne <- function(X, writeBin(object = as.integer(x = nterms), con = f, size = 4) writeBin(object = as.numeric(x = intervals_per_integer), con = f, size = 8) writeBin(object = as.integer(x = min_num_intervals), con = f, size = 4) - tX = c(t(X)) writeBin(object = tX, con = f) writeBin(object = as.integer(x = rand_seed), con = f, size = 4) - if (version_number != "1.0") { + if (version_number != "1.0.0") { writeBin(object = as.numeric(x = df), con = f, size = 8) } writeBin(object = as.integer(x = load_affinities), con = f, size = 4) @@ -1899,10 +1992,11 @@ fftRtsne <- function(X, writeBin(object = c(t(x = initialization)), con = f) } close(con = f) - if (version_number == "1.0") { + + if (version_number == "1.0.0") { flag <- system2( - command = fast_tsne_path, - args = c(data_path, result_path, nthreads) + command = fast_tsne_path, + args = c(data_path, result_path, nthreads) ) } else { flag <- system2( @@ -1910,9 +2004,11 @@ fftRtsne <- function(X, args = c(version_number, data_path, result_path, nthreads) ) } + if (flag != 0) { stop('tsne call failed') } + f <- file(description = result_path, open = "rb") n <- readBin(con = f, what = integer(), n = 1, size = 4) d <- readBin(con = f, what = integer(), n = 1, size = 4) diff --git a/R/generics.R b/R/generics.R index ee3adefda..ead6e3245 100644 --- a/R/generics.R +++ b/R/generics.R @@ -71,6 +71,18 @@ as.loom <- function(x, ...) { UseMethod(generic = 'as.loom', object = x) } +#' Convert objects to Neighbor ojbects +#' +#' @param x An object to convert to \code{Neighbor} +#' @param ... Arguments passed to other methods +#' +#' @rdname as.Neighbor +#' @export as.Neighbor +#' +as.Neighbor <- function(x, ...) { + UseMethod(generic = 'as.Neighbor', object = x) +} + #' Convert objects to Seurat objects #' #' @param x An object to convert to class \code{Seurat} @@ -224,6 +236,18 @@ DefaultAssay <- function(object, ...) { UseMethod(generic = 'DefaultAssay<-', object = object) } +#' Get the Neighbor nearest neighbors distance matrix +#' +#' @param object An object +#' @param ... Arguments passed to other methods +#' +#' @rdname Distances +#' @export Distances +#' +Distances <- function(object, ...) { + UseMethod(generic = 'Distances', object = object) +} + #' Get cell embeddings #' #' @param object An object @@ -526,6 +550,46 @@ Idents <- function(object, ... ) { UseMethod(generic = 'Idents<-', object = object) } +#' Get Neighbor algorithm index +#' +#' @param object An object +#' @param ... Arguments passed to other methods; +#' +#' @return Returns the value in the alg.idx slot of the Neighbor object +#' +#' @rdname Index +#' @export Index +#' +Index <- function(object, ...) { + UseMethod(generic = "Index", object = object) +} + +#' @inheritParams Index +#' @param value The index to store +#' +#' @return \code{Idents<-}: A Neighbor bject with the index stored +#' +#' @rdname Index +#' @export Index<- +#' +"Index<-" <- function(object, ..., value) { + UseMethod(generic = 'Index<-', object = object) +} + +#' Get Neighbor nearest neighbor index matrices +#' +#' @param object An object +#' @param ... Arguments passed to other methods; +#' +#' @return A matrix with the nearest neighbor indices +#' +#' @rdname Indices +#' @export Indices +#' +Indices <- function(object, ...) { + UseMethod(generic = "Indices", object = object) +} + #' Is an object global/persistent? #' #' Typically, when removing \code{Assay} objects from an \code{Seurat} object, diff --git a/R/integration.R b/R/integration.R index ef52009c5..d16d3d6ff 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1485,10 +1485,10 @@ SelectIntegrationFeatures <- function( } tie.val <- var.features[min(nfeatures, length(x = var.features))] features <- names(x = var.features[which(x = var.features > tie.val)]) + vf.list <- lapply(X = object.list, FUN = VariableFeatures) if (length(x = features) > 0) { feature.ranks <- sapply(X = features, FUN = function(x) { - ranks <- sapply(X = object.list, FUN = function(y) { - vf <- VariableFeatures(object = y) + ranks <- sapply(X = vf.list, FUN = function(vf) { if (x %in% vf) { return(which(x = x == vf)) } @@ -1500,8 +1500,7 @@ SelectIntegrationFeatures <- function( } features.tie <- var.features[which(x = var.features == tie.val)] tie.ranks <- sapply(X = names(x = features.tie), FUN = function(x) { - ranks <- sapply(X = object.list, FUN = function(y) { - vf <- VariableFeatures(object = y) + ranks <- sapply(X = vf.list, FUN = function(vf) { if (x %in% vf) { return(which(x = x == vf)) } @@ -2020,7 +2019,7 @@ FilterAnchors <- function( anchors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "anchors") position <- sapply(X = 1:nrow(x = anchors), FUN = function(x) { - which(x = anchors[x, "cell2"] == nn$nn.idx[anchors[x, "cell1"], ])[1] + which(x = anchors[x, "cell2"] == Indices(object = nn)[anchors[x, "cell1"], ])[1] }) anchors <- anchors[!is.na(x = position), ] if (verbose) { @@ -2128,7 +2127,7 @@ FindAnchorPairs <- function( verbose = TRUE ) { neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = 'neighbors') - max.nn <- c(ncol(x = neighbors$nnab$nn.idx), ncol(x = neighbors$nnba$nn.idx)) + max.nn <- c(ncol(x = neighbors$nnab), ncol(x = neighbors$nnba)) if (any(k.anchor > max.nn)) { message(paste0('warning: requested k.anchor = ', k.anchor, ', only ', min(max.nn), ' in dataset')) k.anchor <- min(max.nn) @@ -2140,7 +2139,7 @@ FindAnchorPairs <- function( nn.cells1 <- neighbors$cells1 nn.cells2 <- neighbors$cells2 cell1.index <- suppressWarnings(which(colnames(x = object) == nn.cells1, arr.ind = TRUE)) - ncell <- 1:nrow(x = neighbors$nnab$nn.idx) + ncell <- 1:nrow(x = neighbors$nnab) ncell <- ncell[ncell %in% cell1.index] anchors <- list() # pre allocate vector @@ -2148,10 +2147,12 @@ FindAnchorPairs <- function( anchors$cell2 <- anchors$cell1 anchors$score <- anchors$cell1 + 1 idx <- 0 + indices.ab <- Indices(object = neighbors$nnab) + indices.ba <- Indices(object = neighbors$nnba) for (cell in ncell) { - neighbors.ab <- neighbors$nnab$nn.idx[cell, 1:k.anchor] + neighbors.ab <- indices.ab[cell, 1:k.anchor] mutual.neighbors <- which( - x = neighbors$nnba$nn.idx[neighbors.ab, 1:k.anchor, drop = FALSE] == cell, + x = indices.ba[neighbors.ab, 1:k.anchor, drop = FALSE] == cell, arr.ind = TRUE )[, 1] for (i in neighbors.ab[mutual.neighbors]){ @@ -2370,9 +2371,9 @@ FindWeights <- function( method = nn.method, eps = eps ) - distances <- knn_2_2$nn.dists[, -1] + distances <- Distances(object = knn_2_2)[, -1] distances <- 1 - (distances / distances[, ncol(x = distances)]) - cell.index <- knn_2_2$nn.idx[, -1] + cell.index <- Indices(object = knn_2_2)[, -1] integration.matrix <- GetIntegrationData( object = object, integration.name = integration.name, @@ -3094,8 +3095,12 @@ ScoreAnchors <- function( anchor.df <- as.data.frame(x = GetIntegrationData(object = object, integration.name = integration.name, slot = 'anchors')) neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "neighbors") offset <- length(x = neighbors$cells1) - nbrsetA <- function(x) c(neighbors$nnaa$nn.idx[x, 1:k.score], neighbors$nnab$nn.idx[x, 1:k.score] + offset) - nbrsetB <- function(x) c(neighbors$nnba$nn.idx[x, 1:k.score], neighbors$nnbb$nn.idx[x, 1:k.score] + offset) + indices.aa <- Indices(object = neighbors$nnaa) + indices.bb <- Indices(object = neighbors$nnbb) + indices.ab <- Indices(object = neighbors$nnab) + indices.ba <- Indices(object = neighbors$nnba) + nbrsetA <- function(x) c(indices.aa[x, 1:k.score], indices.ab[x, 1:k.score] + offset) + nbrsetB <- function(x) c(indices.ba[x, 1:k.score], indices.bb[x, 1:k.score] + offset) # score = number of shared neighbors anchor.new <- data.frame( 'cell1' = anchor.df[, 1], diff --git a/R/objects.R b/R/objects.R index 51175ce19..ece1d889f 100644 --- a/R/objects.R +++ b/R/objects.R @@ -221,6 +221,36 @@ scalefactors <- function(spot, fiducial, hires, lowres) { setOldClass(Classes = c('scalefactors')) +#' The Neighbor class +#' +#' The Neighbor class is used to store the results of neighbor finding +#' algorithms +#' +#' @slot nn.idx Matrix containing the nearest neighbor indices +#' @slot nn.dist Matrix containing the nearest neighbor distances +#' @slot alg.idx The neighbor finding index (if applicable). E.g. the annoy +#' index +#' @slot alg.info Any information associated with the algorithm that may be +#' needed downstream (e.g. distance metric used with annoy is needed when +#' reading in from stored file). +#' @slot cell.names Names of the cells for which the neighbors have been +#' computed. +#' +#' @name Neighbor-class +#' @rdname Neighbor-class +#' @exportClass Neighbor +#' +Neighbor <- setClass( + Class = 'Neighbor', + slots = c( + nn.idx = 'matrix', + nn.dist = 'matrix', + alg.idx = 'ANY', + alg.info = 'list', + cell.names = 'character' + ) +) + #' The SeuratCommand Class #' #' The SeuratCommand is used for logging commands that are run on a SeuratObject. It stores parameters and timestamps @@ -1460,10 +1490,15 @@ LogSeuratCommand <- function(object, return.command = FALSE) { if (which.frame < 1) { stop("'LogSeuratCommand' cannot be called at the top level", call. = FALSE) } - command.name <- as.character(x = deparse(expr = sys.calls()[[which.frame]])) - command.name <- gsub(pattern = "\\.Seurat", replacement = "", x = command.name) - call.string <- command.name - command.name <- ExtractField(string = command.name, field = 1, delim = "\\(") + if (as.character(x = sys.calls()[[1]])[1] == "do.call") { + call.string <- deparse(expr = sys.calls()[[1]]) + command.name <- as.character(x = sys.calls()[[1]])[2] + } else { + command.name <- as.character(x = deparse(expr = sys.calls()[[which.frame]])) + command.name <- gsub(pattern = "\\.Seurat", replacement = "", x = command.name) + call.string <- command.name + command.name <- ExtractField(string = command.name, field = 1, delim = "\\(") + } #capture function arguments argnames <- names(x = formals(fun = sys.function(which = sys.parent(n = 1)))) argnames <- grep(pattern = "object", x = argnames, invert = TRUE, value = TRUE) @@ -1515,6 +1550,36 @@ LogSeuratCommand <- function(object, return.command = FALSE) { return(object) } +#' Pull Neighbor or Neighbor names +#' +#' Lists the names of \code{\link{Neighbor}} objects present in +#' a Seurat object. If slot is provided, pulls specified Neighbors object. +#' +#' @param object A Seurat object +#' @param slot Name of Neighbor object +#' +#' @return If \code{slot} is \code{NULL}, the names of all \code{Neighbor} objects +#' in this Seurat object. Otherwise, the \code{Neighbor} object requested +#' +#' @export +#' +Neighbors <- function(object, slot = NULL) { + neighbors <- FilterObjects(object = object, classes.keep = "Neighbor") + if (is.null(x = slot)) { + return(neighbors) + } + if (!slot %in% neighbors) { + warning( + "Cannot find a Neighbor object of name ", + slot, + " in this Seurat object", + call. = FALSE, + immediate. = TRUE + ) + } + return(slot(object = object, name = 'neighbors')[[slot]]) +} + #' Pull DimReducs or DimReduc names #' #' Lists the names of \code{\link{DimReduc}} objects present in @@ -1758,6 +1823,23 @@ TopCells <- function(object, dim = 1, ncells = 20, balanced = FALSE, ...) { )) } +#' Get nearest neighbors for given cell +#' +#' Return a vector of cell names of the nearest n cells. +#' +#' @param object \code{\link{Neighbor}} object +#' @param cell Cell of interest +#' @param n Number of neighbors to return +#' +#' @return Returns a vector of cell names +#' +#' @export +#' +TopNeighbors <- function(object, cell, n = 5) { + indices <- Indices(object = object)[cell, 1:n] + return(Cells(x = object)[indices]) +} + #' Update old Seurat object to accomodate new features #' #' Updates Seurat objects to new structure for storing data/calculations. @@ -2094,6 +2176,28 @@ as.Graph.matrix <- function(x, ...) { return(as.Graph.Matrix(x = as(object = x, Class = 'Matrix'))) } +#' @param weighted If TRUE, fill entries in Graph matrix with value from the +#' nn.dist slot of the Neighbor object +#' @rdname as.Graph +#' @export +#' @method as.Graph Neighbor +#' +as.Graph.Neighbor <- function(x, weighted = TRUE, ...) { + CheckDots(...) + j <- as.integer(x = Indices(object = x) - 1) + i <- as.integer(x = rep(x = (1:nrow(x = x)) - 1, times = ncol(x = x))) + if (weighted) { + vals <- as.vector(x = Distances(object = x)) + } else { + vals <- 1 + } + graph <- new(Class = "dgTMatrix", i = i, j = j, x = vals, Dim = as.integer(x = c(nrow(x = x), nrow(x = x)))) + rownames(x = graph) <- Cells(x = x) + colnames(x = graph) <- Cells(x = x) + graph <- as.Graph.Matrix(x = graph) + return(graph) +} + #' @details #' The Seurat method for \code{as.loom} will try to automatically fill in datasets based on data presence. #' For example, if an assay's scaled data slot isn't filled, then dimensional reduction and graph information @@ -2140,11 +2244,19 @@ as.loom.Seurat <- function( verbose = TRUE, ... ) { + .Deprecated( + new = "SeuratDisk::as.loom", + msg = paste( + "as.loom is being moved to SeuratDisk", + "For more details, please see https://github.com/mojaveazure/seurat-disk/tree/feat/loom", + sep = '\n' + ) + ) if (!PackageCheck('loomR', error = FALSE)) { stop("Please install loomR from GitHub before converting to a loom object") } CheckDots(..., fxns = 'loomR::create') - object <- UpdateSlots(object = x) + x <- UpdateSlots(object = x) # Set the default assay to make life easy assay <- assay %||% DefaultAssay(object = x) DefaultAssay(object = x) <- assay @@ -2267,6 +2379,22 @@ as.loom.Seurat <- function( return(lfile) } +#' @rdname as.Neighbor +#' @export +#' @method as.Neighbor Graph +#' +as.Neighbor.Graph <- function( + x, + ... +) { + nn.mats <- GraphToNeighborHelper(mat = x) + return(Neighbor( + nn.idx = nn.mats[[1]], + nn.dist = nn.mats[[2]], + cell.names = rownames(x = x) + )) +} + #' @param slot Slot to store expression data as #' #' @importFrom utils packageVersion @@ -2470,6 +2598,14 @@ as.Seurat.loom <- function( verbose = TRUE, ... ) { + .Deprecated( + package = "SeuratDisk", + msg = paste( + "Functionality for loading loom files to Seurat objects is being moved to SeuratDisk", + "For more details, please see https://github.com/mojaveazure/seurat-disk/tree/feat/loom", + sep = "\n" + ) + ) CheckDots(...) # Shouldn't be necessary if (!PackageCheck('loomR', error = FALSE)) { @@ -3035,6 +3171,14 @@ Command.Seurat <- function(object, command = NULL, value = NULL, ...) { return(params[[value]]) } +#' @rdname Cells +#' @method Cells Neighbor +#' @export +#' +Cells.Neighbor <- function(x) { + return(slot(object = x, name = "cell.names")) +} + #' @rdname Cells #' @method Cells SlideSeq #' @export @@ -3195,6 +3339,17 @@ DefaultAssay.SpatialImage <- function(object, ...) { return(object) } +#' @rdname Distances +#' @export +#' @method Distances Neighbor +#' +Distances.Neighbor <- function(object, ...) { + object <- UpdateSlots(object = object) + distances <- slot(object = object, name = "nn.dist") + rownames(x = distances) <- slot(object = object, name = "cell.names") + return(distances) +} + #' @rdname Embeddings #' @export #' @method Embeddings DimReduc @@ -3641,6 +3796,40 @@ Idents.Seurat <- function(object, ...) { return(object) } +#' @rdname Index +#' @export +#' @method Index Neighbor +Index.Neighbor <- function(object, ...) { + object <- UpdateSlots(object = object) + index <- slot(object = object, name = "alg.idx") + if (is.null(x = index)) { + return(NULL) + } + if (is.null.externalptr(index$.pointer)) { + return(NULL) + } + return(index) +} + +#' @rdname Index +#' @export +#' @method Index<- Neighbor +"Index<-.Neighbor" <- function(object, ..., value) { + CheckDots(...) + slot(object = object, name = "alg.idx") <- value + return(object) +} + +#' @rdname Indices +#' @export +#' @method Indices Neighbor +Indices.Neighbor <- function(object, ...) { + object <- UpdateSlots(object = object) + indices <- slot(object = object, name = "nn.idx") + rownames(x = indices) <- slot(object = object, name = "cell.names") + return(indices) +} + #' @rdname IsGlobal #' @export #' @method IsGlobal default @@ -4304,6 +4493,15 @@ ReadH5AD.H5File <- function( ... ) { CheckDots(...) + .Deprecated( + package = 'SeuratDisk', + msg = paste( + "Functionality for reading and writing H5AD files is being moved to SeuratDisk", + "For more details, please see https://github.com/mojaveazure/seurat-disk", + "and https://mojaveazure.github.io/seurat-disk/index.html", + sep = "\n" + ) + ) # Pull assay data # If X is an H5D, assume scaled # Otherwise, if file$exists(name = 'raw'), assume X is normalized @@ -4817,6 +5015,20 @@ RenameCells.DimReduc <- function(object, new.names = NULL, ...) { return(object) } +#' @param old.names vector of old cell names +#' @rdname RenameCells +#' @export +#' @method RenameCells Neighbor +#' +RenameCells.Neighbor <- function(object, old.names = NULL, new.names = NULL, ...) { + CheckDots(...) + neighbor.names <- Cells(x = object) + names(x = new.names) <- old.names + slot(object = object, name = "cell.names") <- unname(obj = new.names[neighbor.names]) + return(object) +} + + #' @param for.merge Only rename slots needed for merging Seurat objects. #' Currently only renames the raw.data and meta.data slots. #' @param add.cell.id prefix to add cell names @@ -4903,6 +5115,14 @@ RenameCells.Seurat <- function( new.names = unname(obj = new.cell.names[Cells(x = object[[i]])]) ) } + # Rename the Neighbor + for(i in Neighbors(object = object)) { + object[[i]] <- RenameCells( + object = object[[i]], + old.names = old.names, + new.names = new.cell.names + ) + } return(object) } @@ -5074,7 +5294,16 @@ SetAssayData.Assay <- function(object, slot, new.data, ...) { call. = FALSE ) } - new.data <- new.data[new.features, colnames(x = object), drop = FALSE] + new.cells <- na.omit(object = match( + x = new.cells, + table = colnames(x = object) + )) + if (is.unsorted(x = new.features)) { + new.data <- new.data[new.features, , drop = FALSE] + } + if (is.unsorted(x = new.cells)) { + new.data <- new.data[, new.cells, drop = FALSE] + } if (slot %in% c('counts', 'data') && !all(dim(x = new.data) == dim(x = object))) { stop( "Attempting to add a different number of cells and/or features", @@ -5797,6 +6026,15 @@ WriteH5AD.Seurat <- function( overwrite = FALSE, ... ) { + .Deprecated( + package = 'SeuratDisk', + msg = paste( + "Functionality for reading and writing H5AD files is being moved to SeuratDisk", + "For more details, please see https://github.com/mojaveazure/seurat-disk", + "and https://mojaveazure.github.io/seurat-disk/index.html", + sep = "\n" + ) + ) message("WriteH5AD is not currently operational, please use as.loom") .NotYetImplemented() if (!PackageCheck('hdf5r', error = FALSE)) { @@ -6417,6 +6655,13 @@ dim.DimReduc <- function(x) { return(dim(x = Embeddings(object = x))) } +#' @export +#' @method dim Neighbor +#' +dim.Neighbor <- function(x) { + return(dim(x = Indices(object = x))) +} + #' @export #' @method dim Seurat #' @@ -6591,17 +6836,12 @@ merge.Assay <- function( } } # Merge the counts (if present) - merged.counts <- ValidateDataForMerge(assay = assays[[1]], slot = "counts") - keys <- Key(object = assays[[1]]) - for (i in 2:length(x = assays)) { - merged.counts <- RowMergeSparseMatrices( - mat1 = merged.counts, - mat2 = ValidateDataForMerge(assay = assays[[i]], slot = "counts") - ) - if (length(Key(object = assays[[i]]) > 0)) { - keys[i] <- Key(object = assays[[i]]) - } - } + counts.mats <- lapply(X = assays, FUN = ValidateDataForMerge, slot = "counts") + keys <- sapply(X = assays, FUN = Key) + merged.counts <- RowMergeSparseMatrices( + mat1 = counts.mats[[1]], + mat2 = counts.mats[2:length(x = counts.mats)] + ) combined.assay <- CreateAssayObject( counts = merged.counts, min.cells = -1, @@ -6611,15 +6851,15 @@ merge.Assay <- function( Key(object = combined.assay) <- keys[1] } if (merge.data) { - merged.data <- ValidateDataForMerge(assay = assays[[1]], slot = "data") - for (i in 2:length(x = assays)) { - merged.data <- RowMergeSparseMatrices( - mat1 = merged.data, - mat2 = ValidateDataForMerge(assay = assays[[i]], slot = "data") - ) - } + data.mats <- lapply(X = assays, FUN = ValidateDataForMerge, slot = "data") + merged.data <- RowMergeSparseMatrices( + mat1 = data.mats[[1]], + mat2 = data.mats[2:length(x = data.mats)] + ) # only keep cells that made it through counts filtering params - merged.data <- merged.data[, colnames(x = combined.assay)] + if (!all.equal(target = colnames(x = combined.assay), current = colnames(x = merged.data))) { + merged.data <- merged.data[, colnames(x = combined.assay)] + } combined.assay <- SetAssayData( object = combined.assay, slot = "data", @@ -7116,7 +7356,11 @@ subset.Seurat <- function(x, subset, cells = NULL, features = NULL, idents = NUL slot(object = x, name = 'assays')[[assay]] <- tryCatch( expr = subset.Assay(x = x[[assay]], cells = cells, features = assay.features), error = function(e) { - return(NULL) + if (e$message == "Cannot find features provided") { + return(NULL) + } else { + stop(e) + } } ) } @@ -7132,7 +7376,11 @@ subset.Seurat <- function(x, subset, cells = NULL, features = NULL, idents = NUL x[[dimreduc]] <- tryCatch( expr = subset.DimReduc(x = x[[dimreduc]], cells = cells, features = features), error = function(e) { - return(NULL) + if (e$message %in% c("Cannot find cell provided", "Cannot find features provided")) { + return(NULL) + } else { + stop(e) + } } ) } @@ -7147,6 +7395,7 @@ subset.Seurat <- function(x, subset, cells = NULL, features = NULL, idents = NUL } } slot(object = x, name = 'graphs') <- list() + slot(object = x, name = 'neighbors') <- list() Idents(object = x, drop = TRUE) <- Idents(object = x)[cells] # subset images for (image in Images(object = x)) { @@ -7353,6 +7602,21 @@ setMethod( # because R doesn't allow S3-style [[<- for S4 classes slot(object = value, name = 'cell.embeddings') <- value[[Cells(x = x), ]] } 'reductions' + } else if (inherits(x = value, what = "Neighbor")) { + # Ensure all cells are present in the Seurat object + if (length(x = Cells(x = value)) > length(x = Cells(x = x))) { + stop( + "Cannot have more cells in the Neighbor object than are present in the Seurat object.", + call. = FALSE + ) + } + if (!all(Cells(x = value) %in% Cells(x = x))) { + stop( + "Cannot add cells in the Neighbor object that aren't present in the Seurat object.", + call. = FALSE + ) + } + 'neighbors' } else if (inherits(x = value, what = 'SeuratCommand')) { # Ensure Assay that SeuratCommand is associated with is present in the Seurat object if (is.null(x = DefaultAssay(object = value))) { @@ -7410,7 +7674,7 @@ setMethod( # because R doesn't allow S3-style [[<- for S4 classes } else { # Add other object to Seurat object # Ensure cells match in value and order - if (!inherits(x = value, what = c('SeuratCommand', 'NULL', 'SpatialImage')) && !all(Cells(x = value) == Cells(x = x))) { + if (!inherits(x = value, what = c('SeuratCommand', 'NULL', 'SpatialImage', 'Neighbor')) && !all(Cells(x = value) == Cells(x = x))) { stop("All cells in the object being added must match the cells in this object", call. = FALSE) } # Ensure we're not duplicating object names @@ -7692,6 +7956,20 @@ setMethod( } ) +setMethod( + f = 'show', + signature = 'Neighbor', + definition = function(object) { + cat( + "A Neighbor object containing the", + ncol(x = object), + "nearest neighbors for", + nrow(x = object), + "cells" + ) + } +) + setMethod( f = "show", signature = "Seurat", @@ -8011,11 +8289,25 @@ SubsetVST <- function(sct.info, cells, features) { # @seealso \{code{\link{TopCells}}} \{code{\link{TopFeatures}}} # Top <- function(data, num, balanced) { + nr <- nrow(x = data) + if (num > nr) { + warning("Requested number is larger than the number of available items (", + nr, "). Setting to ", nr , ".", call. = FALSE) + num <- nr + } + if (num == 1) { + balanced <- FALSE + } top <- if (balanced) { num <- round(x = num / 2) data <- data[order(data, decreasing = TRUE), , drop = FALSE] positive <- head(x = rownames(x = data), n = num) negative <- rev(x = tail(x = rownames(x = data), n = num)) + + # remove duplicates + if (positive[num] == negative[num]) { + negative <- negative[-num] + } list(positive = positive, negative = negative) } else { data <- data[rev(x = order(abs(x = data))), , drop = FALSE] diff --git a/R/preprocessing.R b/R/preprocessing.R index 1c78f7f16..f56f035c4 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -184,6 +184,15 @@ CreateGeneActivityMatrix <- function( keep.sparse = FALSE, verbose = TRUE ) { + .Deprecated( + new = 'Signac::GeneActivity', + msg = paste( + "CreateGeneActivityMatrix functionality is being moved to Signac.", + "Equivalent functionality can be", + "achieved via the Signac::GeneActivity function; for", + "more information on Signac, please see https://github.com/timoast/Signac" + ) + ) if (!PackageCheck('GenomicRanges', error = FALSE)) { stop("Please install GenomicRanges from Bioconductor.") } @@ -754,7 +763,7 @@ LogNormalize <- function(data, scale.factor = 1e4, verbose = TRUE) { #' #' @export #' -#' @references \url{https://www.biorxiv.org/content/early/2018/08/08/387241} +#' @references \url{https://www.biorxiv.org/content/10.1101/387241v1} #' #' @examples #' \dontrun{ @@ -864,6 +873,14 @@ MULTIseqDemux <- function( #' } #' ReadAlevinCsv <- function(base.path) { + .Deprecated( + new = "SeuratWrappers::ReadAlevin", + msg = paste( + "Reading data from Alevin files is being moved to SeuratWrappers", + "Details can be found at https://github.com/satijalab/seurat-wrappers", + sep = '\n' + ) + ) if (!dir.exists(base.path)) { stop("Directory provided does not exist") } @@ -911,6 +928,14 @@ ReadAlevinCsv <- function(base.path) { #' } #' ReadAlevin <- function(base.path) { + .Deprecated( + new = "SeuratWrappers::ReadAlevin", + msg = paste( + "Reading data from Alevin files is being moved to SeuratWrappers", + "Details can be found at https://github.com/satijalab/seurat-wrappers", + sep = '\n' + ) + ) if (!dir.exists(base.path)) { stop("Directory provided does not exist") } @@ -1356,7 +1381,6 @@ RunMarkVario <- function( #' @param verbose Display messages/progress #' #' @importFrom stats dist -#' @importFrom ape Moran.I #' #' @export #' @@ -1369,6 +1393,11 @@ RunMoransI <- function(data, pos, verbose = TRUE) { Rfast2.installed <- PackageCheck("Rfast2", error = FALSE) if (Rfast2.installed) { MyMoran <- Rfast2::moranI + } else if (!PackageCheck('ape', error = FALSE)) { + stop( + "'RunMoransI' requires either Rfast2 or ape to be installed", + call. = FALSE + ) } else { MyMoran <- ape::Moran.I if (getOption('Seurat.Rfast2.msg', TRUE)) { @@ -2793,13 +2822,15 @@ ScaleData.default <- function( row <- chunks[index, ] group <- row[[1]] block <- as.vector(x = blocks[, as.numeric(x = row[[2]])]) - data.scale <- scale.function( + arg.list <- list( mat = object[features[block[1]:block[2]], split.cells[[group]], drop = FALSE], scale = do.scale, center = do.center, scale_max = scale.max, display_progress = FALSE ) + arg.list <- arg.list[intersect(x = names(x = arg.list), y = names(x = formals(fun = scale.function)))] + data.scale <- do.call(what = scale.function, args = arg.list) dimnames(x = data.scale) <- dimnames(x = object[features[block[1]:block[2]], split.cells[[group]]]) suppressWarnings(expr = data.scale[is.na(x = data.scale)] <- 0) CheckGC() @@ -2841,13 +2872,15 @@ ScaleData.default <- function( for (i in 1:max.block) { my.inds <- ((block.size * (i - 1)):(block.size * i - 1)) + 1 my.inds <- my.inds[my.inds <= length(x = features)] - data.scale <- scale.function( + arg.list <- list( mat = object[features[my.inds], split.cells[[x]], drop = FALSE], scale = do.scale, center = do.center, scale_max = scale.max, display_progress = FALSE ) + arg.list <- arg.list[intersect(x = names(x = arg.list), y = names(x = formals(fun = scale.function)))] + data.scale <- do.call(what = scale.function, args = arg.list) dimnames(x = data.scale) <- dimnames(x = object[features[my.inds], split.cells[[x]]]) scaled.data[features[my.inds], split.cells[[x]]] <- data.scale rm(data.scale) diff --git a/R/tree.R b/R/tree.R index a20a3b92e..26640267e 100644 --- a/R/tree.R +++ b/R/tree.R @@ -2,6 +2,11 @@ #' NULL +cluster.ape <- paste( + "Cluster tree functionality requires 'ape'", + "please install with 'install.packages('ape')'" +) + #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Functions #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -33,7 +38,6 @@ NULL #' #' @return A Seurat object where the cluster tree can be accessed with \code{\link{Tool}} #' -#' @importFrom ape as.phylo #' @importFrom pbapply pblapply #' @importFrom stats dist hclust #' @importFrom utils txtProgressBar setTxtProgressBar @@ -56,6 +60,9 @@ BuildClusterTree <- function( reorder.numeric = FALSE, verbose = TRUE ) { + if (!PackageCheck('ape', error = FALSE)) { + stop(cluster.ape, call. = FALSE) + } assay <- assay %||% DefaultAssay(object = object) if (!is.null(x = graph)) { idents <- levels(x = object) @@ -127,7 +134,7 @@ BuildClusterTree <- function( )[[1]] data.dist <- dist(x = t(x = data.avg[features, ])) } - data.tree <- as.phylo(x = hclust(d = data.dist)) + data.tree <- ape::as.phylo(x = hclust(d = data.dist)) Tool(object = object) <- data.tree if (reorder) { if (verbose) { @@ -154,9 +161,6 @@ BuildClusterTree <- function( verbose = verbose ) } - # if (do.plot) { - # PlotClusterTree(object) - # } return(object) } diff --git a/R/utilities.R b/R/utilities.R index 8b63f64ef..f2b7b3ac3 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -8,26 +8,35 @@ NULL #' Calculate module scores for feature expression programs in single cells #' -#' Calculate the average expression levels of each program (cluster) on single cell level, -#' subtracted by the aggregated expression of control feature sets. -#' All analyzed features are binned based on averaged expression, and the control features are -#' randomly selected from each bin. +#' Calculate the average expression levels of each program (cluster) on single +#' cell level, subtracted by the aggregated expression of control feature sets. +#' All analyzed features are binned based on averaged expression, and the +#' control features are randomly selected from each bin. #' #' @param object Seurat object -#' @param features Feature expression programs in list -#' @param pool List of features to check expression levels agains, defaults to \code{rownames(x = object)} -#' @param nbin Number of bins of aggregate expression levels for all analyzed features -#' @param ctrl Number of control features selected from the same bin per analyzed feature +#' @param features A list of vectors of features for expression programs; each +#' entry should be a vector of feature names +#' @param pool List of features to check expression levels agains, defaults to +#' \code{rownames(x = object)} +#' @param nbin Number of bins of aggregate expression levels for all +#' analyzed features +#' @param ctrl Number of control features selected from the same bin per +#' analyzed feature #' @param k Use feature clusters returned from DoKMeans #' @param assay Name of assay to use -#' @param name Name for the expression programs +#' @param name Name for the expression programs; will append a number to the +#' end for each entry in \code{features} (eg. if \code{features} has three +#' programs, the results will be stored as \code{name1}, \code{name2}, +#' \code{name3}, respectively) #' @param seed Set a random seed. If NULL, seed is not set. #' @param search Search for symbol synonyms for features in \code{features} that -#' don't match features in \code{object}? Searches the HGNC's gene names database; -#' see \code{\link{UpdateSymbolList}} for more details +#' don't match features in \code{object}? Searches the HGNC's gene names +#' database; see \code{\link{UpdateSymbolList}} for more details #' @param ... Extra parameters passed to \code{\link{UpdateSymbolList}} #' -#' @return Returns a Seurat object with module scores added to object meta data +#' @return Returns a Seurat object with module scores added to object meta data; +#' each module is stored as \code{name#} for each module program present in +#' \code{features} #' #' @importFrom ggplot2 cut_number #' @importFrom Matrix rowMeans colMeans @@ -530,6 +539,29 @@ CollapseSpeciesExpressionMatrix <- function( return(object) } +# Create an Annoy index +# +# @note Function exists because it's not exported from \pkg{uwot} +# +# @param name Distance metric name +# @param ndim Number of dimensions +# +# @return An nn index object +# +#' @importFrom methods new +#' @importFrom RcppAnnoy AnnoyAngular AnnoyManhattan AnnoyEuclidean AnnoyHamming +# +CreateAnn <- function(name, ndim) { + return(switch( + EXPR = name, + cosine = new(Class = AnnoyAngular, ndim), + manhattan = new(Class = AnnoyManhattan, ndim), + euclidean = new(Class = AnnoyEuclidean, ndim), + hamming = new(Class = AnnoyHamming, ndim), + stop("BUG: unknown Annoy metric '", name, "'") + )) +} + #' Run a custom distance function on an input data matrix #' #' @author Jean Fan @@ -643,6 +675,14 @@ ExportToCellbrowser <- function( skip.reductions = FALSE, ... ) { + .Deprecated( + new = "SeuratWrappers::ExportToCellbrowser", + msg = paste( + "Cell browser functionality is moving to SeuratWrappers", + "For more details, please see https://github.com/satijalab/seurat-wrappers", + sep = '\n' + ) + ) vars <- c(...) if (is.null(x = vars)) { vars <- c("nCount_RNA", "nFeature_RNA") @@ -862,6 +902,52 @@ ExpVar <- function(x) { return(log1p(x = var(x = expm1(x = x)))) } +#' Scale and/or center matrix rowwise +#' +#' Performs row scaling and/or centering. Equivalent to using t(scale(t(mat))) +#' in R except in the case of NA values. +#' +#' @param mat A matrix +#' @param center a logical value indicating whether to center the rows +#' @param scale a logical value indicating whether to scale the rows +#' @param scale_max clip all values greater than scale_max to scale_max. Don't +#' clip if Inf. +#' @return Returns the center/scaled matrix +#' +#' @importFrom matrixStats rowMeans2 rowSds rowSums2 +#' +#' @export +#' +FastRowScale <- function( + mat, + center = TRUE, + scale = TRUE, + scale_max = 10 +) { + # inspired by https://www.r-bloggers.com/a-faster-scale-function/ + if (center) { + rm <- rowMeans2(x = mat, na.rm = TRUE) + } + if (scale) { + if (center) { + rsd <- rowSds(mat, center = rm) + } else { + rsd <- sqrt(x = rowSums2(x = mat^2)/(ncol(x = mat) - 1)) + } + } + if (center) { + mat <- mat - rm + } + if (scale) { + mat <- mat / rsd + } + if (scale_max != Inf) { + mat[mat > scale_max] <- scale_max + } + return(mat) +} + + #' Get updated synonyms for gene symbols #' #' Find current gene symbols based on old or alias symbols using the gene @@ -884,7 +970,7 @@ ExpVar <- function(x) { #' where each entry is the current symbol found for each symbol provided and the #' names are the provided symbols. Otherwise, a named vector with the same information. #' -#' @source \url{https://www.genenames.org/} \url{http://rest.genenames.org/} +#' @source \url{https://www.genenames.org/} \url{https://www.genenames.org/help/rest/} #' #' @importFrom utils txtProgressBar setTxtProgressBar #' @importFrom httr GET accept_json timeout status_code content @@ -1188,6 +1274,44 @@ PercentageFeatureSet <- function( return(percent.featureset) } +#' Load the Annoy index file +#' +#' @param object Neighbor object +#' @param file Path to file with annoy index +#' +#' @return Returns the Neighbor object with the index stored +#' @export +#' +LoadAnnoyIndex <- function(object, file){ + metric <- slot(object = object, name = "alg.info")$metric + ndim <- slot(object = object, name = "alg.info")$ndim + if (is.null(x = metric)) { + stop("Provided Neighbor object wasn't generated with annoy") + } + annoy.idx <- CreateAnn(name = metric, ndim = ndim) + annoy.idx$load(path.expand(path = file)) + Index(object = object) <- annoy.idx + return(object) +} + +#' Save the Annoy index +#' +#' @param object A Neighbor object with the annoy index stored +#' @param file Path to file to write index to +#' +#' @export +#' +SaveAnnoyIndex <- function( + object, + file +) { + index <- Index(object = object) + if (is.null(x = index)) { + stop("Index for provided Neighbor object is NULL") + } + index$save(path.expand(path = file)) +} + #' Regroup idents based on meta.data info #' #' For cells in each ident, set a new identity based on the most common value @@ -1227,7 +1351,7 @@ RegroupIdents <- function(object, metadata) { #' with zeros in the matrix not containing the row. #' #' @param mat1 First matrix -#' @param mat2 Second matrix +#' @param mat2 Second matrix or list of matrices #' #' @return A merged matrix #' @@ -1237,34 +1361,37 @@ RegroupIdents <- function(object, metadata) { # #' @export #' -RowMergeSparseMatrices <- function(mat1, mat2){ - if (inherits(x = mat1, what = "data.frame")) { - mat1 <- as.matrix(x = mat1) - } - if (inherits(x = mat2, what = "data.frame")) { - mat2 <- as.matrix(x = mat2) +RowMergeSparseMatrices <- function(mat1, mat2) { + all.mat <- c(list(mat1), mat2) + all.rownames <- list() + all.colnames <- list() + use.cbind <- TRUE + for(i in 1:length(x = all.mat)) { + if (inherits(x = all.mat[[i]], what = "data.frame")) { + all.mat[[i]] <- as.matrix(x = all.mat[[i]]) + } + all.rownames[[i]] <- rownames(x = all.mat[[i]]) + all.colnames[[i]] <- colnames(x = all.mat[[i]]) + # use cbind if all matrices have the same rownames + if (i > 1) { + if (!isTRUE(x = all.equal(target = all.rownames[[i]], current = all.rownames[[i - 1]]))) { + use.cbind <- FALSE + } + } } - mat1.names <- rownames(x = mat1) - mat2.names <- rownames(x = mat2) - if (length(x = mat1.names) == length(x = mat2.names) && all(mat1.names == mat2.names)) { - new.mat <- cbind(mat1, mat2) + if (isTRUE(x = use.cbind)) { + new.mat <- do.call(what = cbind, args = all.mat) } else { - mat1 <- as(object = mat1, Class = "RsparseMatrix") - mat2 <- as(object = mat2, Class = "RsparseMatrix") - all.names <- union(x = mat1.names, y = mat2.names) - new.mat <- RowMergeMatrices( - mat1 = mat1, - mat2 = mat2, - mat1_rownames = mat1.names, - mat2_rownames = mat2.names, + all.mat <- lapply(X = all.mat, FUN = as, Class = "RsparseMatrix") + all.names <- unique(x = do.call(what = c, args = all.rownames)) + new.mat <- RowMergeMatricesList( + mat_list = all.mat, + mat_rownames = all.rownames, all_rownames = all.names ) rownames(x = new.mat) <- make.unique(names = all.names) } - colnames(x = new.mat) <- make.unique(names = c( - colnames(x = mat1), - colnames(x = mat2) - )) + colnames(x = new.mat) <- make.unique(names = c(do.call(what = c, all.colnames))) return(new.mat) } @@ -1281,6 +1408,14 @@ RowMergeSparseMatrices <- function(mat1, mat2){ #' } #' StopCellbrowser <- function() { + .Deprecated( + new = "SeuratWrappers::StopCellbrowser", + msg = paste( + "Cell browser functionality is moving to SeuratWrappers", + "For more details, please see https://github.com/satijalab/seurat-wrappers", + sep = '\n' + ) + ) if (py_module_available(module = "cellbrowser")) { cb <- import(module = "cellbrowser") cb$cellbrowser$stop() @@ -1691,6 +1826,14 @@ IsMatrixEmpty <- function(x) { return(all(matrix.dims == 0) || matrix.na) } +# Check if externalptr is null +# From https://stackoverflow.com/questions/26666614/how-do-i-check-if-an-externalptr-is-null-from-within-r +# +is.null.externalptr <- function(pointer) { + stopifnot(is(pointer, "externalptr")) + .Call("isnull", pointer) +} + # Check whether an assay has been processed by sctransform # # @param assay assay to check diff --git a/R/visualization.R b/R/visualization.R index 9f71b9573..3b9f7ae82 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -311,7 +311,11 @@ DoHeatmap <- function( if (group.bar) { # TODO: Change group.bar to annotation.bar default.colors <- c(hue_pal()(length(x = levels(x = group.use)))) - cols <- group.colors[1:length(x = levels(x = group.use))] %||% default.colors + if (!is.null(x = names(x = group.colors))) { + cols <- unname(obj = group.colors[levels(x = group.use)]) + } else { + cols <- group.colors[1:length(x = levels(x = group.use))] %||% default.colors + } if (any(is.na(x = cols))) { cols[is.na(x = cols)] <- default.colors[is.na(x = cols)] cols <- Col2Hex(cols) @@ -481,8 +485,10 @@ HTOHeatmap <- function( #' @param log plot the feature axis on log scale #' @param ncol Number of columns if multiple plots are displayed #' @param slot Use non-normalized counts data for plotting +#' @param stack Horizontally stack plots for each feature #' @param combine Combine plots into a single \code{\link[patchwork]{patchwork}ed} -#' ggplot object. If \code{FALSE}, return a list of ggplot objects +#' ggplot object. If \code{FALSE}, return a list of ggplot +#' @param fill.by Color violins/ridges based on either 'feature' or 'ident' #' #' @return A \code{\link[patchwork]{patchwork}ed} ggplot object if #' \code{combine = TRUE}; otherwise, a list of ggplot objects @@ -505,7 +511,9 @@ RidgePlot <- function( log = FALSE, ncol = NULL, slot = 'data', - combine = TRUE + stack = FALSE, + combine = TRUE, + fill.by = 'feature' ) { return(ExIPlot( object = object, @@ -521,7 +529,9 @@ RidgePlot <- function( group.by = group.by, log = log, slot = slot, - combine = combine + stack = stack, + combine = combine, + fill.by = fill.by )) } @@ -536,6 +546,7 @@ RidgePlot <- function( #' @param split.plot plot each group of the split violin plots by multiple or #' single violin shapes. #' @param adjust Adjust parameter for geom_violin +#' @param flip flip plot orientation (identities on x-axis) #' #' @return A \code{\link[patchwork]{patchwork}ed} ggplot object if #' \code{combine = TRUE}; otherwise, a list of ggplot objects @@ -565,7 +576,10 @@ VlnPlot <- function( ncol = NULL, slot = 'data', split.plot = FALSE, - combine = TRUE + stack = FALSE, + combine = TRUE, + fill.by = 'feature', + flip = FALSE ) { if ( !is.null(x = split.by) & @@ -597,7 +611,10 @@ VlnPlot <- function( split.by = split.by, log = log, slot = slot, - combine = combine + stack = stack, + combine = combine, + fill.by = fill.by, + flip = flip )) } @@ -700,8 +717,14 @@ ColorDimSplit <- function( #' @param order Specify the order of plotting for the idents. This can be #' useful for crowded plots if points of interest are being buried. Provide #' either a full list of valid idents or a subset to be plotted last (on top) +#' @param shuffle Whether to randomly shuffle the order of points. This can be +#' useful for crowded plots if points of interest are being buried. (default is FALSE) +#' @param seed Sets the seed if randomly shuffling the order of points. #' @param label Whether to label the clusters #' @param label.size Sets size of labels +#' @param label.color Sets the color of the label text +#' @param label.box Whether to put a box around the label text (geom_text vs +#' geom_label) #' @param repel Repel labels #' @param cells.highlight A list of character or numeric vectors of cells to #' highlight. If only one group of cells desired, can simply @@ -748,8 +771,12 @@ DimPlot <- function( split.by = NULL, shape.by = NULL, order = NULL, + shuffle = FALSE, + seed = 1, label = FALSE, label.size = 4, + label.color = 'black', + label.box = FALSE, repel = FALSE, cells.highlight = NULL, cols.highlight = '#DE2D26', @@ -763,6 +790,10 @@ DimPlot <- function( } reduction <- reduction %||% DefaultDimReduc(object = object) cells <- cells %||% colnames(x = object) + if (isTRUE(x = shuffle)) { + set.seed(seed = seed) + cells <- sample(x = cells) + } data <- Embeddings(object = object[[reduction]])[cells, dims] data <- as.data.frame(x = data) dims <- paste0(Key(object = object[[reduction]]), dims) @@ -804,7 +835,9 @@ DimPlot <- function( id = x, repel = repel, size = label.size, - split.by = split.by + split.by = split.by, + box = label.box, + color = label.color ) } if (!is.null(x = split.by)) { @@ -2784,7 +2817,7 @@ SpatialPlot <- function( if (interactive) { return(ISpatialDimPlot( object = object, - image = image, + image = images[1], group.by = group.by, alpha = alpha )) @@ -3619,13 +3652,11 @@ JackStrawPlot <- function( #' Plots previously computed tree (from BuildClusterTree) #' #' @param object Seurat object -#' @param \dots Additional arguments to ape::plot.phylo +#' @param \dots Additional arguments to +#' \code{\link[ape:plot.phylo]{ape::plot.phylo}} #' #' @return Plots dendogram (must be precomputed using BuildClusterTree), returns no value #' -#' @importFrom ape plot.phylo -#' @importFrom ape nodelabels -#' #' @export #' #' @examples @@ -3633,12 +3664,15 @@ JackStrawPlot <- function( #' PlotClusterTree(object = pbmc_small) #' PlotClusterTree <- function(object, ...) { + if (!PackageCheck('ape', error = FALSE)) { + stop(cluster.ape, call. = FALSE) + } if (is.null(x = Tool(object = object, slot = "BuildClusterTree"))) { stop("Phylogenetic tree does not exist, build using BuildClusterTree") } data.tree <- Tool(object = object, slot = "BuildClusterTree") - plot.phylo(x = data.tree, direction = "downwards", ...) - nodelabels() + ape::plot.phylo(x = data.tree, direction = "downwards", ...) + ape::nodelabels() } #' Visualize Dimensional Reduction genes @@ -3680,16 +3714,18 @@ VizDimLoadings <- function( ncol = NULL, combine = TRUE ) { - ncol <- ncol %||% 2 - if (length(x = dims) == 1) { - ncol <- 1 - } - if (length(x = dims) > 6) { - ncol <- 3 - } - if (length(x = dims) > 9) { - ncol <- 4 - } + if (is.null(x = ncol)) { + ncol <- 2 + if (length(x = dims) == 1) { + ncol <- 1 + } + if (length(x = dims) > 6) { + ncol <- 3 + } + if (length(x = dims) > 9) { + ncol <- 4 + } + } loadings <- Loadings(object = object[[reduction]], projected = projected) features <- lapply( X = dims, @@ -3943,6 +3979,11 @@ CellSelector <- function(plot, object = NULL, ident = 'SelectedCells', ...) { plot.data <- GGpointToBase(plot = plot, do.plot = FALSE) plot.data$selected_ <- FALSE rownames(x = plot.data) <- rownames(x = plot$data) + colnames(x = plot.data) <- gsub( + pattern = '-', + replacement = '.', + x = colnames(x = plot.data) + ) # Server function server <- function(input, output, session) { plot.env <- reactiveValues(data = plot.data) @@ -4483,30 +4524,15 @@ LabelClusters <- function( if (any(!groups %in% possible.clusters)) { stop("The following clusters were not found: ", paste(groups[!groups %in% possible.clusters], collapse = ",")) } + pb <- ggplot_build(plot = plot) if (geom == 'GeomSpatial') { - pb <- ggplot_build(plot = plot) data[, xynames["y"]] = max(data[, xynames["y"]]) - data[, xynames["y"]] + min(data[, xynames["y"]]) if (!pb$plot$plot_env$crop) { - # pretty hacky solution to learn the linear transform to put the data into - # the rescaled coordinates when not cropping in. Probably a better way to - # do this via ggplot y.transform <- c(0, nrow(x = pb$plot$plot_env$image)) - pb$layout$panel_params[[1]]$y.range data[, xynames["y"]] <- data[, xynames["y"]] + sum(y.transform) - data$x <- data[, xynames["x"]] - data$y <- data[, xynames["y"]] - panel_params_image <- c() - panel_params_image$x.range <- c(0, ncol(x = pb$plot$plot_env$image)) - panel_params_image$y.range <- c(0, nrow(x = pb$plot$plot_env$image)) - suppressWarnings(panel_params_image$x$continuous_range <- c(0, ncol(x = pb$plot$plot_env$image))) - suppressWarnings(panel_params_image$y$continuous_range <- c(0, nrow(x = pb$plot$plot_env$image))) - image.xform <- pb$layout$coord$transform(data, panel_params_image)[, c("x", "y")] - plot.xform <- pb$layout$coord$transform(data, pb$layout$panel_params[[1]])[, c("x", "y")] - x.xform <- lm(data$x ~ plot.xform$x) - y.xform <- lm(data$y ~ plot.xform$y) - data[, xynames['y']] <- image.xform$y * y.xform$coefficients[2] + y.xform$coefficients[1] - data[, xynames['x']] <- image.xform$x *x.xform$coefficients[2] + x.xform$coefficients[1] } } + data <- cbind(data, color = pb$data[[1]][[1]]) labels.loc <- lapply( X = groups, FUN = function(group) { @@ -4538,6 +4564,7 @@ LabelClusters <- function( ))) } data.medians[, id] <- group + data.medians$color <- data.use$color[1] return(data.medians) } ) @@ -4566,7 +4593,7 @@ LabelClusters <- function( mapping = aes_string(x = xynames['x'], y = xynames['y'], label = id, fill = id), show.legend = FALSE, ... - ) + ) + scale_fill_manual(values = labels.loc$color[order(labels.loc[, id])]) } else { geom.use <- ifelse(test = repel, yes = geom_text_repel, no = geom_text) plot <- plot + geom.use( @@ -4576,7 +4603,6 @@ LabelClusters <- function( ... ) } - return(plot) } @@ -5331,7 +5357,8 @@ DefaultDimReduc <- function(object, assay = NULL) { # anything that can be retreived by FetchData) # @param idents Which classes to include in the plot (default is all) # @param ncol Number of columns if multiple plots are displayed -# @param sort Sort identity classes (on the x-axis) by the average expression of the attribute being potted +# @param sort Sort identity classes (on the x-axis) by the average expression of the attribute being potted, +# or, if stack is True, sort both identity classes and features by hierarchical clustering # @param y.max Maximum y axis value # @param same.y.lims Set all the y-axis limits to the same values # @param adjust Adjust parameter for geom_violin @@ -5341,8 +5368,11 @@ DefaultDimReduc <- function(object, assay = NULL) { # @param split.by A variable to split the plot by # @param log plot Y axis on log scale # @param slot Use non-normalized counts data for plotting +# @param stack Horizontally stack plots for multiple feature # @param combine Combine plots into a single \code{\link[patchwork]{patchwork}ed} # ggplot object. If \code{FALSE}, return a list of ggplot objects +# @param fill.by Color violins/ridges based on either 'feature' or 'ident' +# @param flip flip plot orientation (identities on x-axis) # # @return A \code{\link[patchwork]{patchwork}ed} ggplot object if # \code{combine = TRUE}; otherwise, a list of ggplot objects @@ -5368,15 +5398,35 @@ ExIPlot <- function( split.by = NULL, log = FALSE, slot = 'data', - combine = TRUE + stack = FALSE, + combine = TRUE, + fill.by = NULL, + flip = FALSE ) { assay <- assay %||% DefaultAssay(object = object) DefaultAssay(object = object) <- assay - ncol <- ncol %||% ifelse( - test = length(x = features) > 9, - yes = 4, - no = min(length(x = features), 3) - ) + if (isTRUE(x = stack)) { + if (!is.null(x = ncol)) { + warning( + "'ncol' is ignored with 'stack' is TRUE", + call. = FALSE, + immediate. = TRUE + ) + } + if (!is.null(x = y.max)) { + warning( + "'y.max' is ignored when 'stack' is TRUE", + call. = FALSE, + immediate. = TRUE + ) + } + } else { + ncol <- ncol %||% ifelse( + test = length(x = features) > 9, + yes = 4, + no = min(length(x = features), 3) + ) + } data <- FetchData(object = object, vars = features, slot = slot) features <- colnames(x = data) if (is.null(x = idents)) { @@ -5413,7 +5463,7 @@ ExIPlot <- function( cols <- Interleave(cols, InvertHex(hexadecimal = cols)) } cols <- rep_len(x = cols, length.out = length(x = levels(x = split))) - names(x = cols) <- sort(x = levels(x = split)) + names(x = cols) <- levels(x = split) if ((length(x = cols) > 2) & (type == "splitViolin")) { warning("Split violin is only supported for <3 groups, using multi-violin.") type <- "violin" @@ -5422,6 +5472,22 @@ ExIPlot <- function( if (same.y.lims && is.null(x = y.max)) { y.max <- max(data) } + if (isTRUE(x = stack)) { + return(MultiExIPlot( + type = type, + data = data, + idents = idents, + split = split, + sort = sort, + same.y.lims = same.y.lims, + adjust = adjust, + cols = cols, + pt.size = pt.size, + log = log, + fill.by = fill.by, + flip = flip + )) + } plots <- lapply( X = features, FUN = function(x) { @@ -5441,8 +5507,16 @@ ExIPlot <- function( ) label.fxn <- switch( EXPR = type, - 'violin' = ylab, - "splitViolin" = ylab, + 'violin' = if (stack) { + xlab + } else { + ylab + }, + "splitViolin" = if (stack) { + xlab + } else { + ylab + }, 'ridge' = xlab, stop("Unknown ExIPlot type ", type, call. = FALSE) ) @@ -6100,6 +6174,230 @@ MakeLabels <- function(data) { return(labels) } + +# Plot expression of multiple features by identity on a plot +# +# @param data Data to plot +# @param idents Idents to use +# @param type Make either a 'ridge' or 'violin' plot +# @param sort Sort identity classes and features based on hierarchical clustering +# @param same.y.lims Indicates whether to use the same ylim for each feature +# @param adjust Adjust parameter for geom_violin +# @param cols Colors to use for plotting +# @param log plot Y axis on log scale +# @param fill.by Color violins/ridges based on either 'feature' or 'ident' +# @param seed.use Random seed to use. If NULL, don't set a seed +# @param flip flip plot orientation (identities on x-axis) +# +# @return A ggplot-based Expression-by-Identity plot +# +#' @importFrom cowplot theme_cowplot +#' @importFrom utils globalVariables +#' @importFrom stats rnorm dist hclust +#' @importFrom ggridges geom_density_ridges theme_ridges +#' @importFrom ggplot2 ggplot aes_string facet_grid theme labs geom_rect +#' geom_violin geom_jitter ylim position_jitterdodge scale_fill_manual +#' scale_y_log10 scale_x_log10 scale_y_discrete scale_x_continuous +#' scale_y_continuous waiver +#' +MultiExIPlot <- function( + data, + idents, + split = NULL, + type = 'violin', + sort = FALSE, + same.y.lims = same.y.lims, + adjust = 1, + pt.size = 0, + cols = NULL, + seed.use = 42, + log = FALSE, + fill.by = NULL, + flip = NULL +) { + if (!(fill.by %in% c("feature", "ident"))) { + stop("`fill.by` must be either `feature` or `ident`") + } + if (!is.null(x = seed.use)) { + set.seed(seed = seed.use) + } + if (!is.data.frame(x = data) || ncol(x = data) < 2) { + stop("MultiExIPlot requires a data frame with >1 column") + } + data <- Melt(x = data) + data <- data.frame( + feature = data$cols, + expression = data$vals, + ident = rep_len(x = idents, length.out = nrow(x = data)) + ) + if ((is.character(x = sort) && nchar(x = sort) > 0) || sort) { + data$feature <- as.vector(x = data$feature) + data$ident <- as.vector(x = data$ident) + # build matrix of average expression (#-features by #-idents), lexical ordering + avgs.matrix <- sapply( + X = split(x = data, f = data$ident), + FUN = function(df) { + return(tapply( + X = df$expression, + INDEX = df$feature, + FUN = mean + )) + } + ) + idents.order <- hclust(d = dist(x = t(x = L2Norm(mat = avgs.matrix, MARGIN = 2))))$order + avgs.matrix <- avgs.matrix[,idents.order] + avgs.matrix <- L2Norm(mat = avgs.matrix, MARGIN = 1) + # order feature clusters by position of their "rank-1 idents" + position <- apply(X = avgs.matrix, MARGIN = 1, FUN = which.max) + mat <- hclust(d = dist(x = avgs.matrix))$merge + orderings <- list() + for (i in 1:nrow(mat)) { + x <- if (mat[i,1] < 0) -mat[i,1] else orderings[[mat[i,1]]] + y <- if (mat[i,2] < 0) -mat[i,2] else orderings[[mat[i,2]]] + x.pos <- min(x = position[x]) + y.pos <- min(x = position[y]) + orderings[[i]] <- if (x.pos < y.pos) { + c(x, y) + } else { + c(y, x) + } + } + features.order <- orderings[[length(x = orderings)]] + data$feature <- factor( + x = data$feature, + levels = unique(x = sort(x = data$feature))[features.order] + ) + data$ident <- factor( + x = data$ident, + levels = unique(x = sort(x = data$ident))[rev(x = idents.order)] + ) + } else { + data$feature <- factor(x = data$feature, levels = unique(x = data$feature)) + } + if (log) { + noise <- rnorm(n = nrow(x = data)) / 200 + data$expression <- data$expression + 1 + } else { + noise <- rnorm(n = nrow(x = data)) / 100000 + } + for (f in unique(x = data$feature)) { + if (all(data$expression[(data$feature == f)] == data$expression[(data$feature == f)][1])) { + warning( + "All cells have the same value of ", + f, + call. = FALSE, + immediate. = TRUE + ) + } else { + data$expression[(data$feature == f)] <- data$expression[(data$feature == f)] + noise[(data$feature == f)] + } + } + if (type == 'violin' && !is.null(x = split)) { + data$split <- rep_len(x = split, length.out = nrow(data)) + vln.geom <- geom_violin + fill.by <- 'split' + } else if (type == 'splitViolin' && !is.null(x = split)) { + data$split <- rep_len(x = split, length.out = nrow(data)) + vln.geom <- geom_split_violin + fill.by <- 'split' + type <- 'violin' + } else { + vln.geom <- geom_violin + } + switch( + EXPR = type, + 'violin' = { + geom <- list(vln.geom(scale = 'width', adjust = adjust, trim = TRUE)) + }, + 'ridge' = { + geom <- list( + geom_density_ridges(scale = 4), + theme_ridges(), + scale_y_discrete(expand = c(0.01, 0)) + ) + }, + stop("Unknown plot type: ", type) + ) + if (flip) { + x <- 'ident' + x.label <- 'Identity' + y <- 'expression' + y.label <- 'Expression Level' + } else { + y <- 'ident' + y.label <- 'Identity' + x <- 'expression' + x.label <- 'Expression Level' + } + plot <- ggplot( + data = data, + mapping = aes_string(x = x, y = y, fill = fill.by)[c(2, 3, 1)] + ) + + labs(x = x.label, y = y.label, fill = NULL) + + theme_cowplot() + plot <- do.call(what = '+', args = list(plot, geom)) + if (flip) { + plot <- plot + + scale_y_continuous( + expand = c(0, 0), + labels = function(x) c(rep(x = '', times = length(x)-2), x[length(x) - 1], '')) + + facet_grid(feature ~ ., scales = (if (same.y.lims) 'fixed' else 'free')) + + FacetTheme( + panel.spacing = unit(0, 'lines'), + panel.background = element_rect(fill = NA, color = "black"), + axis.text.y = element_text(size = 7), + axis.text.x = element_text(angle = 45, hjust = 1), + strip.text.y.right = element_text(angle = 0)) + } else { + plot <- plot + + scale_x_continuous( + expand = c(0, 0), + labels = function(x) c(rep(x = '', times = length(x)-2), x[length(x) - 1], '')) + + facet_grid(. ~ feature, scales = (if (same.y.lims) 'fixed' else 'free')) + + FacetTheme( + panel.spacing = unit(0, 'lines'), + panel.background = element_rect(fill = NA, color = "black"), + axis.text.x = element_text(size = 7), + strip.text.x = element_text(angle = -90)) + } + if (log) { + plot <- plot + scale_x_log10() + } + if (!is.null(x = cols)) { + if (!is.null(x = split)) { + idents <- unique(x = as.vector(x = data$ident)) + splits <- unique(x = as.vector(x = data$split)) + labels <- if (length(x = splits) == 2) { + splits + } else { + unlist(x = lapply( + X = idents, + FUN = function(pattern, x) { + x.mod <- gsub( + pattern = paste0(pattern, '.'), + replacement = paste0(pattern, ': '), + x = x, + fixed = TRUE + ) + x.keep <- grep(pattern = ': ', x = x.mod, fixed = TRUE) + x.return <- x.mod[x.keep] + names(x = x.return) <- x[x.keep] + return(x.return) + }, + x = unique(x = as.vector(x = data$split)) + )) + } + if (is.null(x = names(x = labels))) { + names(x = labels) <- labels + } + } else { + labels <- levels(x = droplevels(data$ident)) + } + plot <- plot + scale_fill_manual(values = cols, labels = labels) + } + return(plot) +} + # Create a scatterplot with data from a ggplot2 scatterplot # # @param plot.data The original ggplot2 scatterplot data diff --git a/README.md b/README.md index 2149c20c8..ec419e930 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![CRAN Version](https://www.r-pkg.org/badges/version/Seurat)](https://cran.r-project.org/package=Seurat) [![CRAN Downloads](https://cranlogs.r-pkg.org/badges/Seurat)](https://cran.r-project.org/package=Seurat) -# Seurat v3.2.0 +# Seurat v3.2.1 Seurat is an R toolkit for single cell genomics, developed and maintained by the Satija Lab at NYGC. diff --git a/azure-pipelines.yml b/azure-pipelines.yml deleted file mode 100644 index d631b7d42..000000000 --- a/azure-pipelines.yml +++ /dev/null @@ -1,26 +0,0 @@ - -# Build and test changes to SeuratWrappers -# Inspired by Jim Hester's Azure Pipelines tests -# https://github.com/jimhester/azuretest - -trigger: -- master -- develop - -pr: -- master -- develop -- release/* - -jobs: - - job: 'Linux' - timeoutInMinutes: 30 - pool: Default - variables: - R_LIBS_USER: '$(Agent.BuildDirectory)/R/library' - container: satijalab/seurat:develop - steps: - - script: | - R CMD build . - R CMD check --as-cran --no-manual Seurat_* - displayName: 'R CMD Check' diff --git a/cran-comments.md b/cran-comments.md index 2ebbb72a1..697bf709f 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -3,6 +3,7 @@ ## Test environments * local Ubuntu 16.04.6 install, R 3.6.1 * local Ubuntu 18.04.4 install, R 4.0.1 +* local Ubuntu 20.04 install, R 4.0.2 * local Windows 10 install, R 4.0.0 * Ubuntu 16.04.6 (on travis-ci), R 4.0.0, R devel * macOS 10.13.6 (on travis-ci), R 4.0.2 @@ -38,4 +39,4 @@ There were 3 NOTEs: There are three pacakges that imports Seurat: scMappR, Signac, and SoupX; this update does not impact their functionality -There are four packages that suggest Seurat: BisqueRNA, clustree, nanny, Rmagic, singleCellHaystack, treefit; this update does not impact their functionality. +There are eight packages that suggest Seurat: BisqueRNA, clustree, DIscBIO, nanny, Rmagic, scSorter, singleCellHaystack, treefit; this update does not impact their functionality. diff --git a/man/AddModuleScore.Rd b/man/AddModuleScore.Rd index 0ca38567b..b58b35a08 100644 --- a/man/AddModuleScore.Rd +++ b/man/AddModuleScore.Rd @@ -21,36 +21,45 @@ AddModuleScore( \arguments{ \item{object}{Seurat object} -\item{features}{Feature expression programs in list} +\item{features}{A list of vectors of features for expression programs; each +entry should be a vector of feature names} -\item{pool}{List of features to check expression levels agains, defaults to \code{rownames(x = object)}} +\item{pool}{List of features to check expression levels agains, defaults to +\code{rownames(x = object)}} -\item{nbin}{Number of bins of aggregate expression levels for all analyzed features} +\item{nbin}{Number of bins of aggregate expression levels for all +analyzed features} -\item{ctrl}{Number of control features selected from the same bin per analyzed feature} +\item{ctrl}{Number of control features selected from the same bin per +analyzed feature} \item{k}{Use feature clusters returned from DoKMeans} \item{assay}{Name of assay to use} -\item{name}{Name for the expression programs} +\item{name}{Name for the expression programs; will append a number to the +end for each entry in \code{features} (eg. if \code{features} has three +programs, the results will be stored as \code{name1}, \code{name2}, +\code{name3}, respectively)} \item{seed}{Set a random seed. If NULL, seed is not set.} \item{search}{Search for symbol synonyms for features in \code{features} that -don't match features in \code{object}? Searches the HGNC's gene names database; -see \code{\link{UpdateSymbolList}} for more details} +don't match features in \code{object}? Searches the HGNC's gene names +database; see \code{\link{UpdateSymbolList}} for more details} \item{...}{Extra parameters passed to \code{\link{UpdateSymbolList}}} } \value{ -Returns a Seurat object with module scores added to object meta data +Returns a Seurat object with module scores added to object meta data; +each module is stored as \code{name#} for each module program present in +\code{features} } \description{ -Calculate the average expression levels of each program (cluster) on single cell level, -subtracted by the aggregated expression of control feature sets. -All analyzed features are binned based on averaged expression, and the control features are -randomly selected from each bin. +Calculate the average expression levels of each program (cluster) on single +cell level, subtracted by the aggregated expression of control feature sets. +All analyzed features are binned based on averaged expression, and the +control features are randomly selected from each bin. } \examples{ \dontrun{ diff --git a/man/Cells.Rd b/man/Cells.Rd index f4a8b8eef..38109b472 100644 --- a/man/Cells.Rd +++ b/man/Cells.Rd @@ -4,6 +4,7 @@ \alias{Cells} \alias{Cells.default} \alias{Cells.DimReduc} +\alias{Cells.Neighbor} \alias{Cells.SlideSeq} \alias{Cells.STARmap} \alias{Cells.VisiumV1} @@ -15,6 +16,8 @@ Cells(x) \method{Cells}{DimReduc}(x) +\method{Cells}{Neighbor}(x) + \method{Cells}{SlideSeq}(x) \method{Cells}{STARmap}(x) diff --git a/man/ColorDimSplit.Rd b/man/ColorDimSplit.Rd index 07ee07db5..eb335ec88 100644 --- a/man/ColorDimSplit.Rd +++ b/man/ColorDimSplit.Rd @@ -45,8 +45,14 @@ different colors and different shapes on cells} \item{\code{order}}{Specify the order of plotting for the idents. This can be useful for crowded plots if points of interest are being buried. Provide either a full list of valid idents or a subset to be plotted last (on top)} + \item{\code{shuffle}}{Whether to randomly shuffle the order of points. This can be +useful for crowded plots if points of interest are being buried. (default is FALSE)} + \item{\code{seed}}{Sets the seed if randomly shuffling the order of points.} \item{\code{label}}{Whether to label the clusters} \item{\code{label.size}}{Sets size of labels} + \item{\code{label.color}}{Sets the color of the label text} + \item{\code{label.box}}{Whether to put a box around the label text (geom_text vs +geom_label)} \item{\code{repel}}{Repel labels} \item{\code{cells.highlight}}{A list of character or numeric vectors of cells to highlight. If only one group of cells desired, can simply diff --git a/man/DimPlot.Rd b/man/DimPlot.Rd index a67940df6..e6a27f025 100644 --- a/man/DimPlot.Rd +++ b/man/DimPlot.Rd @@ -19,8 +19,12 @@ DimPlot( split.by = NULL, shape.by = NULL, order = NULL, + shuffle = FALSE, + seed = 1, label = FALSE, label.size = 4, + label.color = "black", + label.box = FALSE, repel = FALSE, cells.highlight = NULL, cols.highlight = "#DE2D26", @@ -66,10 +70,20 @@ different colors and different shapes on cells} useful for crowded plots if points of interest are being buried. Provide either a full list of valid idents or a subset to be plotted last (on top)} +\item{shuffle}{Whether to randomly shuffle the order of points. This can be +useful for crowded plots if points of interest are being buried. (default is FALSE)} + +\item{seed}{Sets the seed if randomly shuffling the order of points.} + \item{label}{Whether to label the clusters} \item{label.size}{Sets size of labels} +\item{label.color}{Sets the color of the label text} + +\item{label.box}{Whether to put a box around the label text (geom_text vs +geom_label)} + \item{repel}{Repel labels} \item{cells.highlight}{A list of character or numeric vectors of cells to diff --git a/man/Distances.Rd b/man/Distances.Rd new file mode 100644 index 000000000..60bac4db1 --- /dev/null +++ b/man/Distances.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generics.R, R/objects.R +\name{Distances} +\alias{Distances} +\alias{Distances.Neighbor} +\title{Get the Neighbor nearest neighbors distance matrix} +\usage{ +Distances(object, ...) + +\method{Distances}{Neighbor}(object, ...) +} +\arguments{ +\item{object}{An object} + +\item{...}{Arguments passed to other methods} +} +\description{ +Get the Neighbor nearest neighbors distance matrix +} diff --git a/man/FastRowScale.Rd b/man/FastRowScale.Rd new file mode 100644 index 000000000..a44eb4f8b --- /dev/null +++ b/man/FastRowScale.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utilities.R +\name{FastRowScale} +\alias{FastRowScale} +\title{Scale and/or center matrix rowwise} +\usage{ +FastRowScale(mat, center = TRUE, scale = TRUE, scale_max = 10) +} +\arguments{ +\item{mat}{A matrix} + +\item{center}{a logical value indicating whether to center the rows} + +\item{scale}{a logical value indicating whether to scale the rows} + +\item{scale_max}{clip all values greater than scale_max to scale_max. Don't +clip if Inf.} +} +\value{ +Returns the center/scaled matrix +} +\description{ +Performs row scaling and/or centering. Equivalent to using t(scale(t(mat))) +in R except in the case of NA values. +} diff --git a/man/FindNeighbors.Rd b/man/FindNeighbors.Rd index 38c8e1986..237641fd7 100644 --- a/man/FindNeighbors.Rd +++ b/man/FindNeighbors.Rd @@ -102,13 +102,15 @@ default of 0.0 implies exact nearest neighbor search} \item{force.recalc}{Force recalculation of SNN.} -\item{features}{Features to use as input for building the SNN} +\item{features}{Features to use as input for building the SNN; used only when +\code{dims} is \code{NULL}} \item{reduction}{Reduction to use as input for building the SNN} \item{dims}{Dimensions of reduction to use as input} -\item{assay}{Assay to use in construction of SNN} +\item{assay}{Assay to use in construction of SNN; used only when \code{dims} +is \code{NULL}} \item{do.plot}{Plot SNN graph on tSNE coordinates} diff --git a/man/Index.Rd b/man/Index.Rd new file mode 100644 index 000000000..4f5cf13f1 --- /dev/null +++ b/man/Index.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generics.R, R/objects.R +\name{Index} +\alias{Index} +\alias{Index<-} +\alias{Index.Neighbor} +\alias{Index<-.Neighbor} +\title{Get Neighbor algorithm index} +\usage{ +Index(object, ...) + +Index(object, ...) <- value + +\method{Index}{Neighbor}(object, ...) + +\method{Index}{Neighbor}(object, ...) <- value +} +\arguments{ +\item{object}{An object} + +\item{...}{Arguments passed to other methods;} + +\item{value}{The index to store} +} +\value{ +Returns the value in the alg.idx slot of the Neighbor object + +\code{Idents<-}: A Neighbor bject with the index stored +} +\description{ +Get Neighbor algorithm index +} diff --git a/man/Indices.Rd b/man/Indices.Rd new file mode 100644 index 000000000..daeb9cde2 --- /dev/null +++ b/man/Indices.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generics.R, R/objects.R +\name{Indices} +\alias{Indices} +\alias{Indices.Neighbor} +\title{Get Neighbor nearest neighbor index matrices} +\usage{ +Indices(object, ...) + +\method{Indices}{Neighbor}(object, ...) +} +\arguments{ +\item{object}{An object} + +\item{...}{Arguments passed to other methods;} +} +\value{ +A matrix with the nearest neighbor indices +} +\description{ +Get Neighbor nearest neighbor index matrices +} diff --git a/man/LoadAnnoyIndex.Rd b/man/LoadAnnoyIndex.Rd new file mode 100644 index 000000000..5589c6dcd --- /dev/null +++ b/man/LoadAnnoyIndex.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utilities.R +\name{LoadAnnoyIndex} +\alias{LoadAnnoyIndex} +\title{Load the Annoy index file} +\usage{ +LoadAnnoyIndex(object, file) +} +\arguments{ +\item{object}{Neighbor object} + +\item{file}{Path to file with annoy index} +} +\value{ +Returns the Neighbor object with the index stored +} +\description{ +Load the Annoy index file +} diff --git a/man/MULTIseqDemux.Rd b/man/MULTIseqDemux.Rd index 3b032e144..e21ee2a03 100644 --- a/man/MULTIseqDemux.Rd +++ b/man/MULTIseqDemux.Rd @@ -42,5 +42,5 @@ object <- MULTIseqDemux(object) } \references{ -\url{https://www.biorxiv.org/content/early/2018/08/08/387241} +\url{https://www.biorxiv.org/content/10.1101/387241v1} } diff --git a/man/Neighbor-class.Rd b/man/Neighbor-class.Rd new file mode 100644 index 000000000..4e7730b40 --- /dev/null +++ b/man/Neighbor-class.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/objects.R +\docType{class} +\name{Neighbor-class} +\alias{Neighbor-class} +\alias{Neighbor} +\title{The Neighbor class} +\description{ +The Neighbor class is used to store the results of neighbor finding +algorithms +} +\section{Slots}{ + +\describe{ +\item{\code{nn.idx}}{Matrix containing the nearest neighbor indices} + +\item{\code{nn.dist}}{Matrix containing the nearest neighbor distances} + +\item{\code{alg.idx}}{The neighbor finding index (if applicable). E.g. the annoy +index} + +\item{\code{alg.info}}{Any information associated with the algorithm that may be +needed downstream (e.g. distance metric used with annoy is needed when +reading in from stored file).} + +\item{\code{cell.names}}{Names of the cells for which the neighbors have been +computed.} +}} + diff --git a/man/Neighbors.Rd b/man/Neighbors.Rd new file mode 100644 index 000000000..054e53eb7 --- /dev/null +++ b/man/Neighbors.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/objects.R +\name{Neighbors} +\alias{Neighbors} +\title{Pull Neighbor or Neighbor names} +\usage{ +Neighbors(object, slot = NULL) +} +\arguments{ +\item{object}{A Seurat object} + +\item{slot}{Name of Neighbor object} +} +\value{ +If \code{slot} is \code{NULL}, the names of all \code{Neighbor} objects +in this Seurat object. Otherwise, the \code{Neighbor} object requested +} +\description{ +Lists the names of \code{\link{Neighbor}} objects present in +a Seurat object. If slot is provided, pulls specified Neighbors object. +} diff --git a/man/PlotClusterTree.Rd b/man/PlotClusterTree.Rd index 0ad645c41..36fb311bc 100644 --- a/man/PlotClusterTree.Rd +++ b/man/PlotClusterTree.Rd @@ -9,7 +9,8 @@ PlotClusterTree(object, ...) \arguments{ \item{object}{Seurat object} -\item{\dots}{Additional arguments to ape::plot.phylo} +\item{\dots}{Additional arguments to +\code{\link[ape:plot.phylo]{ape::plot.phylo}}} } \value{ Plots dendogram (must be precomputed using BuildClusterTree), returns no value diff --git a/man/RenameCells.Rd b/man/RenameCells.Rd index 9de23c626..52fce4116 100644 --- a/man/RenameCells.Rd +++ b/man/RenameCells.Rd @@ -4,6 +4,7 @@ \alias{RenameCells} \alias{RenameCells.Assay} \alias{RenameCells.DimReduc} +\alias{RenameCells.Neighbor} \alias{RenameCells.Seurat} \alias{RenameCells.VisiumV1} \title{Rename cells} @@ -14,6 +15,8 @@ RenameCells(object, ...) \method{RenameCells}{DimReduc}(object, new.names = NULL, ...) +\method{RenameCells}{Neighbor}(object, old.names = NULL, new.names = NULL, ...) + \method{RenameCells}{Seurat}( object, add.cell.id = NULL, @@ -31,6 +34,8 @@ RenameCells(object, ...) \item{new.names}{vector of new cell names} +\item{old.names}{vector of old cell names} + \item{add.cell.id}{prefix to add cell names} \item{for.merge}{Only rename slots needed for merging Seurat objects. diff --git a/man/RidgePlot.Rd b/man/RidgePlot.Rd index 063cda6c2..a3cfb3798 100644 --- a/man/RidgePlot.Rd +++ b/man/RidgePlot.Rd @@ -17,7 +17,9 @@ RidgePlot( log = FALSE, ncol = NULL, slot = "data", - combine = TRUE + stack = FALSE, + combine = TRUE, + fill.by = "feature" ) } \arguments{ @@ -47,8 +49,12 @@ expression of the attribute being potted, can also pass 'increasing' or 'decreas \item{slot}{Use non-normalized counts data for plotting} +\item{stack}{Horizontally stack plots for each feature} + \item{combine}{Combine plots into a single \code{\link[patchwork]{patchwork}ed} -ggplot object. If \code{FALSE}, return a list of ggplot objects} +ggplot object. If \code{FALSE}, return a list of ggplot} + +\item{fill.by}{Color violins/ridges based on either 'feature' or 'ident'} } \value{ A \code{\link[patchwork]{patchwork}ed} ggplot object if diff --git a/man/RowMergeSparseMatrices.Rd b/man/RowMergeSparseMatrices.Rd index b1667a0da..9cb3585b8 100644 --- a/man/RowMergeSparseMatrices.Rd +++ b/man/RowMergeSparseMatrices.Rd @@ -9,7 +9,7 @@ RowMergeSparseMatrices(mat1, mat2) \arguments{ \item{mat1}{First matrix} -\item{mat2}{Second matrix} +\item{mat2}{Second matrix or list of matrices} } \value{ A merged matrix diff --git a/man/RunUMAP.Rd b/man/RunUMAP.Rd index 5d5d3c8bb..920bed977 100644 --- a/man/RunUMAP.Rd +++ b/man/RunUMAP.Rd @@ -11,8 +11,10 @@ RunUMAP(object, ...) \method{RunUMAP}{default}( object, - reduction.model = NULL, + reduction.key = "UMAP_", assay = NULL, + reduction.model = NULL, + return.model = FALSE, umap.method = "uwot", n.neighbors = 30L, n.components = 2L, @@ -31,7 +33,6 @@ RunUMAP(object, ...) seed.use = 42, metric.kwds = NULL, angular.rp.forest = FALSE, - reduction.key = "UMAP_", verbose = TRUE, ... ) @@ -60,13 +61,16 @@ RunUMAP(object, ...) \method{RunUMAP}{Seurat}( object, - reduction.model = NULL, dims = NULL, reduction = "pca", features = NULL, graph = NULL, - assay = "RNA", + assay = DefaultAssay(object = object), + nn.name = NULL, + slot = "data", umap.method = "uwot", + reduction.model = NULL, + return.model = FALSE, n.neighbors = 30L, n.components = 2L, metric = "cosine", @@ -95,11 +99,16 @@ RunUMAP(object, ...) \item{...}{Arguments passed to other methods and UMAP} -\item{reduction.model}{\code{DimReduc} object that contains the umap model} +\item{reduction.key}{dimensional reduction key, specifies the string before +the number for the dimension names. UMAP by default} \item{assay}{Assay to pull data for when using \code{features}, or assay used to construct Graph if running UMAP on a Graph} +\item{reduction.model}{\code{DimReduc} object that contains the umap model} + +\item{return.model}{whether UMAP will return the uwot model} + \item{umap.method}{UMAP implementation to run. Can be \describe{ \item{\code{uwot}:}{Runs umap via the uwot R package} @@ -171,9 +180,6 @@ approximate nearest neighbor search. This can be faster, but is mostly on useful use an angular style distance such as cosine, correlation etc. In the case of those metrics angular forests will be chosen automatically.} -\item{reduction.key}{dimensional reduction key, specifies the string before -the number for the dimension names. UMAP by default} - \item{verbose}{Controls verbosity} \item{dims}{Which dimensions to use as input features, used only if @@ -188,6 +194,10 @@ on features} \item{graph}{Name of graph on which to run UMAP} +\item{nn.name}{Name of knn output on which to run UMAP} + +\item{slot}{The slot used to pull data for when using \code{features}. data slot is by default.} + \item{reduction.name}{Name to store dimensional reduction under in the Seurat object} } \value{ diff --git a/man/SaveAnnoyIndex.Rd b/man/SaveAnnoyIndex.Rd new file mode 100644 index 000000000..4109370d4 --- /dev/null +++ b/man/SaveAnnoyIndex.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utilities.R +\name{SaveAnnoyIndex} +\alias{SaveAnnoyIndex} +\title{Save the Annoy index} +\usage{ +SaveAnnoyIndex(object, file) +} +\arguments{ +\item{object}{A Neighbor object with the annoy index stored} + +\item{file}{Path to file to write index to} +} +\description{ +Save the Annoy index +} diff --git a/man/TopNeighbors.Rd b/man/TopNeighbors.Rd new file mode 100644 index 000000000..9e7a52a1d --- /dev/null +++ b/man/TopNeighbors.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/objects.R +\name{TopNeighbors} +\alias{TopNeighbors} +\title{Get nearest neighbors for given cell} +\usage{ +TopNeighbors(object, cell, n = 5) +} +\arguments{ +\item{object}{\code{\link{Neighbor}} object} + +\item{cell}{Cell of interest} + +\item{n}{Number of neighbors to return} +} +\value{ +Returns a vector of cell names +} +\description{ +Return a vector of cell names of the nearest n cells. +} diff --git a/man/UpdateSymbolList.Rd b/man/UpdateSymbolList.Rd index aca1e2339..2d4a92893 100644 --- a/man/UpdateSymbolList.Rd +++ b/man/UpdateSymbolList.Rd @@ -5,7 +5,7 @@ \alias{GeneSymbolThesarus} \title{Get updated synonyms for gene symbols} \source{ -\url{https://www.genenames.org/} \url{http://rest.genenames.org/} +\url{https://www.genenames.org/} \url{https://www.genenames.org/help/rest/} } \usage{ GeneSymbolThesarus( diff --git a/man/VlnPlot.Rd b/man/VlnPlot.Rd index 81d45c731..7edff2643 100644 --- a/man/VlnPlot.Rd +++ b/man/VlnPlot.Rd @@ -21,7 +21,10 @@ VlnPlot( ncol = NULL, slot = "data", split.plot = FALSE, - combine = TRUE + stack = FALSE, + combine = TRUE, + fill.by = "feature", + flip = FALSE ) } \arguments{ @@ -60,8 +63,14 @@ expression of the attribute being potted, can also pass 'increasing' or 'decreas \item{split.plot}{plot each group of the split violin plots by multiple or single violin shapes.} +\item{stack}{Horizontally stack plots for each feature} + \item{combine}{Combine plots into a single \code{\link[patchwork]{patchwork}ed} -ggplot object. If \code{FALSE}, return a list of ggplot objects} +ggplot object. If \code{FALSE}, return a list of ggplot} + +\item{fill.by}{Color violins/ridges based on either 'feature' or 'ident'} + +\item{flip}{flip plot orientation (identities on x-axis)} } \value{ A \code{\link[patchwork]{patchwork}ed} ggplot object if diff --git a/man/as.Graph.Rd b/man/as.Graph.Rd index a584fd308..b73db3822 100644 --- a/man/as.Graph.Rd +++ b/man/as.Graph.Rd @@ -4,6 +4,7 @@ \alias{as.Graph} \alias{as.Graph.Matrix} \alias{as.Graph.matrix} +\alias{as.Graph.Neighbor} \title{Convert a matrix (or Matrix) to the Graph class.} \usage{ as.Graph(x, ...) @@ -11,11 +12,16 @@ as.Graph(x, ...) \method{as.Graph}{Matrix}(x, ...) \method{as.Graph}{matrix}(x, ...) + +\method{as.Graph}{Neighbor}(x, weighted = TRUE, ...) } \arguments{ \item{x}{The matrix to convert} \item{...}{Arguments passed to other methods (ignored for now)} + +\item{weighted}{If TRUE, fill entries in Graph matrix with value from the +nn.dist slot of the Neighbor object} } \description{ Convert a matrix (or Matrix) to the Graph class. diff --git a/man/as.Neighbor.Rd b/man/as.Neighbor.Rd new file mode 100644 index 000000000..8d8845c69 --- /dev/null +++ b/man/as.Neighbor.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generics.R, R/objects.R +\name{as.Neighbor} +\alias{as.Neighbor} +\alias{as.Neighbor.Graph} +\title{Convert objects to Neighbor ojbects} +\usage{ +as.Neighbor(x, ...) + +\method{as.Neighbor}{Graph}(x, ...) +} +\arguments{ +\item{x}{An object to convert to \code{Neighbor}} + +\item{...}{Arguments passed to other methods} +} +\description{ +Convert objects to Neighbor ojbects +} diff --git a/man/cc.genes.Rd b/man/cc.genes.Rd index 995b97dbc..c49a1f43e 100644 --- a/man/cc.genes.Rd +++ b/man/cc.genes.Rd @@ -12,7 +12,7 @@ A list of two vectors } } \source{ -\url{http://science.sciencemag.org/content/352/6282/189} +\url{https://science.sciencemag.org/content/352/6282/189} } \usage{ cc.genes diff --git a/man/cc.genes.updated.2019.Rd b/man/cc.genes.updated.2019.Rd index 377a91e6c..64f5dff4a 100644 --- a/man/cc.genes.updated.2019.Rd +++ b/man/cc.genes.updated.2019.Rd @@ -12,7 +12,7 @@ A list of two vectors } } \source{ -\url{http://science.sciencemag.org/content/352/6282/189} +\url{https://science.sciencemag.org/content/352/6282/189} } \usage{ cc.genes.updated.2019 diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index d46af4e38..1b161732b 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -68,31 +68,29 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } -// LogNorm -Eigen::SparseMatrix LogNorm(Eigen::SparseMatrix data, int scale_factor, bool display_progress); -RcppExport SEXP _Seurat_LogNorm(SEXP dataSEXP, SEXP scale_factorSEXP, SEXP display_progressSEXP) { +// RowMergeMatricesList +Eigen::SparseMatrix RowMergeMatricesList(List mat_list, List mat_rownames, std::vector< std::string > all_rownames); +RcppExport SEXP _Seurat_RowMergeMatricesList(SEXP mat_listSEXP, SEXP mat_rownamesSEXP, SEXP all_rownamesSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type data(dataSEXP); - Rcpp::traits::input_parameter< int >::type scale_factor(scale_factorSEXP); - Rcpp::traits::input_parameter< bool >::type display_progress(display_progressSEXP); - rcpp_result_gen = Rcpp::wrap(LogNorm(data, scale_factor, display_progress)); + Rcpp::traits::input_parameter< List >::type mat_list(mat_listSEXP); + Rcpp::traits::input_parameter< List >::type mat_rownames(mat_rownamesSEXP); + Rcpp::traits::input_parameter< std::vector< std::string > >::type all_rownames(all_rownamesSEXP); + rcpp_result_gen = Rcpp::wrap(RowMergeMatricesList(mat_list, mat_rownames, all_rownames)); return rcpp_result_gen; END_RCPP } -// FastRowScale -Eigen::MatrixXd FastRowScale(Eigen::MatrixXd mat, bool scale, bool center, double scale_max, bool display_progress); -RcppExport SEXP _Seurat_FastRowScale(SEXP matSEXP, SEXP scaleSEXP, SEXP centerSEXP, SEXP scale_maxSEXP, SEXP display_progressSEXP) { +// LogNorm +Eigen::SparseMatrix LogNorm(Eigen::SparseMatrix data, int scale_factor, bool display_progress); +RcppExport SEXP _Seurat_LogNorm(SEXP dataSEXP, SEXP scale_factorSEXP, SEXP display_progressSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< Eigen::MatrixXd >::type mat(matSEXP); - Rcpp::traits::input_parameter< bool >::type scale(scaleSEXP); - Rcpp::traits::input_parameter< bool >::type center(centerSEXP); - Rcpp::traits::input_parameter< double >::type scale_max(scale_maxSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type data(dataSEXP); + Rcpp::traits::input_parameter< int >::type scale_factor(scale_factorSEXP); Rcpp::traits::input_parameter< bool >::type display_progress(display_progressSEXP); - rcpp_result_gen = Rcpp::wrap(FastRowScale(mat, scale, center, scale_max, display_progress)); + rcpp_result_gen = Rcpp::wrap(LogNorm(data, scale_factor, display_progress)); return rcpp_result_gen; END_RCPP } @@ -265,6 +263,17 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// GraphToNeighborHelper +List GraphToNeighborHelper(Eigen::SparseMatrix mat); +RcppExport SEXP _Seurat_GraphToNeighborHelper(SEXP matSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type mat(matSEXP); + rcpp_result_gen = Rcpp::wrap(GraphToNeighborHelper(mat)); + return rcpp_result_gen; +END_RCPP +} // FindWeightsC Eigen::SparseMatrix FindWeightsC(Eigen::SparseMatrix integration_matrix, NumericVector cells2, Eigen::MatrixXd distances, std::vector anchor_cells2, std::vector integration_matrix_rownames, Eigen::MatrixXd cell_index, Eigen::VectorXd anchor_score, double min_dist, double sd, bool display_progress); RcppExport SEXP _Seurat_FindWeightsC(SEXP integration_matrixSEXP, SEXP cells2SEXP, SEXP distancesSEXP, SEXP anchor_cells2SEXP, SEXP integration_matrix_rownamesSEXP, SEXP cell_indexSEXP, SEXP anchor_scoreSEXP, SEXP min_distSEXP, SEXP sdSEXP, SEXP display_progressSEXP) { @@ -337,13 +346,15 @@ BEGIN_RCPP END_RCPP } +RcppExport SEXP isnull(SEXP); + static const R_CallMethodDef CallEntries[] = { {"_Seurat_RunModularityClusteringCpp", (DL_FUNC) &_Seurat_RunModularityClusteringCpp, 9}, {"_Seurat_RunUMISampling", (DL_FUNC) &_Seurat_RunUMISampling, 4}, {"_Seurat_RunUMISamplingPerCell", (DL_FUNC) &_Seurat_RunUMISamplingPerCell, 4}, {"_Seurat_RowMergeMatrices", (DL_FUNC) &_Seurat_RowMergeMatrices, 5}, + {"_Seurat_RowMergeMatricesList", (DL_FUNC) &_Seurat_RowMergeMatricesList, 3}, {"_Seurat_LogNorm", (DL_FUNC) &_Seurat_LogNorm, 3}, - {"_Seurat_FastRowScale", (DL_FUNC) &_Seurat_FastRowScale, 5}, {"_Seurat_Standardize", (DL_FUNC) &_Seurat_Standardize, 2}, {"_Seurat_FastSparseRowScale", (DL_FUNC) &_Seurat_FastSparseRowScale, 5}, {"_Seurat_FastSparseRowScaleWithKnownStats", (DL_FUNC) &_Seurat_FastSparseRowScaleWithKnownStats, 7}, @@ -357,11 +368,13 @@ static const R_CallMethodDef CallEntries[] = { {"_Seurat_RowVar", (DL_FUNC) &_Seurat_RowVar, 1}, {"_Seurat_SparseRowVar", (DL_FUNC) &_Seurat_SparseRowVar, 2}, {"_Seurat_ReplaceColsC", (DL_FUNC) &_Seurat_ReplaceColsC, 3}, + {"_Seurat_GraphToNeighborHelper", (DL_FUNC) &_Seurat_GraphToNeighborHelper, 1}, {"_Seurat_FindWeightsC", (DL_FUNC) &_Seurat_FindWeightsC, 10}, {"_Seurat_IntegrateDataC", (DL_FUNC) &_Seurat_IntegrateDataC, 3}, {"_Seurat_ComputeSNN", (DL_FUNC) &_Seurat_ComputeSNN, 2}, {"_Seurat_WriteEdgeFile", (DL_FUNC) &_Seurat_WriteEdgeFile, 3}, {"_Seurat_DirectSNNToFile", (DL_FUNC) &_Seurat_DirectSNNToFile, 4}, + {"isnull", (DL_FUNC) &isnull, 1}, {NULL, NULL, 0} }; diff --git a/src/data_manipulation.cpp b/src/data_manipulation.cpp index 94c68e539..b9ea829d0 100644 --- a/src/data_manipulation.cpp +++ b/src/data_manipulation.cpp @@ -4,6 +4,7 @@ #include #include #include +#include using namespace Rcpp; // [[Rcpp::depends(RcppEigen)]] @@ -96,12 +97,12 @@ Eigen::SparseMatrix RowMergeMatrices(Eigen::SparseMatrix::InnerIterator it1(mat1, mat1_map[key]); it1; ++it1){ - tripletList.push_back(T(i, it1.col(), it1.value())); + tripletList.emplace_back(i, it1.col(), it1.value()); } } if (mat2_map.count(key)){ for(Eigen::SparseMatrix::InnerIterator it2(mat2, mat2_map[key]); it2; ++it2){ - tripletList.push_back(T(i, num_col1 + it2.col(), it2.value())); + tripletList.emplace_back(i, num_col1 + it2.col(), it2.value()); } } } @@ -110,6 +111,57 @@ Eigen::SparseMatrix RowMergeMatrices(Eigen::SparseMatrix RowMergeMatricesList( + List mat_list, + List mat_rownames, + std::vector< std::string > all_rownames +) { + // Convert Rcpp lists to c++ vectors + std::vector> mat_vec; + mat_vec.reserve(mat_list.size()); + std::vector> rownames_vec; + rownames_vec.reserve(mat_rownames.size()); + std::vector> map_vec; + map_vec.reserve(mat_list.size()); + int num_cols = 0; + int num_nZero = 0; + // offsets keep track of which column to add in to + std::vector offsets; + for (unsigned int i = 0; i < mat_list.size(); i++) { + mat_vec.emplace_back(Rcpp::as>(mat_list.at(i))); + rownames_vec.push_back(mat_rownames[i]); + // Set up hash maps for rowname based lookup + std::unordered_map mat_map; + for (unsigned int j = 0; j < rownames_vec[i].size(); j++) { + mat_map[rownames_vec[i][j]] = j; + } + map_vec.emplace_back(mat_map); + offsets.push_back(num_cols); + num_cols += mat_vec[i].cols(); + num_nZero += mat_vec[i].nonZeros(); + } + // set up tripletList for new matrix creation + std::vector tripletList; + int num_rows = all_rownames.size(); + tripletList.reserve(num_nZero); + // loop over all rows and add nonzero entries to tripletList + for(int i = 0; i < num_rows; i++) { + std::string key = all_rownames[i]; + for(int j = 0; j < mat_vec.size(); j++) { + if (map_vec[j].count(key)) { + for(Eigen::SparseMatrix::InnerIterator it1(mat_vec[j], map_vec[j][key]); it1; ++it1){ + tripletList.emplace_back(i, it1.col() + offsets[j], it1.value()); + } + } + } + } + Eigen::SparseMatrix combined_mat(num_rows, num_cols); + combined_mat.setFromTriplets(tripletList.begin(), tripletList.end()); + return combined_mat; +} + + // [[Rcpp::export]] Eigen::SparseMatrix LogNorm(Eigen::SparseMatrix data, int scale_factor, bool display_progress = true){ Progress p(data.outerSize(), display_progress); @@ -123,40 +175,6 @@ Eigen::SparseMatrix LogNorm(Eigen::SparseMatrix data, int scale_ return data; } -/* Performs row scaling and/or centering. Equivalent to using t(scale(t(mat))) in R. - Note: Doesn't handle NA/NaNs in the same way the R implementation does, */ - -// [[Rcpp::export]] -Eigen::MatrixXd FastRowScale(Eigen::MatrixXd mat, bool scale = true, bool center = true, - double scale_max = 10, bool display_progress = true){ - Progress p(mat.rows(), display_progress); - Eigen::MatrixXd scaled_mat(mat.rows(), mat.cols()); - for(int i=0; i < mat.rows(); ++i){ - p.increment(); - Eigen::ArrayXd r = mat.row(i).array(); - double rowMean = r.mean(); - double rowSdev = 1; - if(scale == true){ - if(center == true){ - rowSdev = sqrt((r - rowMean).square().sum() / (mat.cols() - 1)); - } - else{ - rowSdev = sqrt(r.square().sum() / (mat.cols() - 1)); - } - } - if(center == false){ - rowMean = 0; - } - scaled_mat.row(i) = (r - rowMean) / rowSdev; - for(int s=0; s scale_max){ - scaled_mat(i, s) = scale_max; - } - } - } - return scaled_mat; -} - /* Performs column scaling and/or centering. Equivalent to using scale(mat, TRUE, apply(x,2,sd)) in R. Note: Doesn't handle NA/NaNs in the same way the R implementation does, */ @@ -445,4 +463,53 @@ Eigen::SparseMatrix ReplaceColsC(Eigen::SparseMatrix mat, Numeri return(mat); } +template +std::vector sort_indexes(const std::vector &v) { + // initialize original index locations + std::vector idx(v.size()); + std::iota(idx.begin(), idx.end(), 0); + std::stable_sort(idx.begin(), idx.end(), + [&v](size_t i1, size_t i2) {return v[i1] < v[i2];}); + return idx; +} + +//[[Rcpp::export]] +List GraphToNeighborHelper(Eigen::SparseMatrix mat) { + mat = mat.transpose(); + //determine the number of neighbors + int n = 0; + for(Eigen::SparseMatrix::InnerIterator it(mat, 0); it; ++it) { + n += 1; + } + Eigen::MatrixXd nn_idx(mat.rows(), n); + Eigen::MatrixXd nn_dist(mat.rows(), n); + + for (int k=0; k row_idx; + std::vector row_dist; + row_idx.reserve(n); + row_dist.reserve(n); + for (Eigen::SparseMatrix::InnerIterator it(mat,k); it; ++it) { + if (n_k > (n-1)) { + Rcpp::stop("Not all cells have an equal number of neighbors."); + } + row_idx.push_back(it.row() + 1); + row_dist.push_back(it.value()); + n_k += 1; + } + if (n_k != n) { + Rcpp::Rcout << n << ":::" << n_k << std::endl; + Rcpp::stop("Not all cells have an equal number of neighbors."); + } + //order the idx based on dist + std::vector idx_order = sort_indexes(row_dist); + for(int i = 0; i < n; ++i) { + nn_idx(k, i) = row_idx[idx_order[i]]; + nn_dist(k, i) = row_dist[idx_order[i]]; + } + } + List neighbors = List::create(nn_idx, nn_dist); + return(neighbors); +} diff --git a/src/data_manipulation.h b/src/data_manipulation.h index 0cf525f3f..892df0a8f 100644 --- a/src/data_manipulation.h +++ b/src/data_manipulation.h @@ -21,10 +21,9 @@ Eigen::SparseMatrix RowMergeMatrices(Eigen::SparseMatrix mat1_rownames, std::vector< std::string > mat2_rownames, std::vector< std::string > all_rownames); +Eigen::SparseMatrix RowMergeMatricesList(List mat_list, List mat_rownames, std::vector< std::string > all_rownames); Eigen::SparseMatrix LogNorm(Eigen::SparseMatrix data, int scale_factor, bool display_progress ); -Eigen::MatrixXd FastRowScale(Eigen::MatrixXd mat, bool scale, bool center, double scale_max, - bool display_progress); NumericMatrix Standardize(const Eigen::Map mat, bool display_progress); Eigen::MatrixXd FastSparseRowScale(Eigen::SparseMatrix mat, bool scale, bool center, double scale_max, bool display_progress); diff --git a/src/valid_pointer.c b/src/valid_pointer.c new file mode 100644 index 000000000..f71829828 --- /dev/null +++ b/src/valid_pointer.c @@ -0,0 +1,6 @@ +#include + +// helper to determine if external c++ pointer is valid +SEXP isnull(SEXP pointer) { + return Rf_ScalarLogical(!R_ExternalPtrAddr(pointer)); +} \ No newline at end of file diff --git a/tests/testthat/test_data_manipulation.R b/tests/testthat/test_data_manipulation.R index 1d97e8f4b..48c8415f9 100644 --- a/tests/testthat/test_data_manipulation.R +++ b/tests/testthat/test_data_manipulation.R @@ -27,6 +27,14 @@ test_that("Row merging done correctly", { expect_equal(length(m3), 280) }) +test_that("Row merging with a list done correctly", { + m3 <- RowMergeMatricesList(mat_list = list(m1, m2), mat_rownames = list(m1.names, m2.names), all_rownames = all.names) + expect_equal(m3[1, 14], -0.17) + expect_equal(m3[3, 2], -1.4) + expect_equal(m3[14, 18], -0.43) + expect_equal(length(m3), 280) +}) + # Tests for log normalization # -------------------------------------------------------------------------------- context("Log Normalization") @@ -44,18 +52,18 @@ test_that("Log Normalization returns expected values", { # -------------------------------------------------------------------------------- context("Fast Scale Data Functions") -mat <- matrix(seq(0.001, 0.1, 0.001), nrow = 10, ncol = 10) +mat <- matrix(rnorm(n = 10*15), nrow = 10, ncol = 15) # should be the equivalent of t(scale(t(mat))) test_that("Fast implementation of row scaling returns expected values", { - expect_equal(t(scale(t(mat))[1:10, 1:10]), FastRowScale(mat, display_progress = FALSE)) - expect_equal(t(scale(t(mat), center = FALSE))[1:10, 1:10], - FastRowScale(mat, center = FALSE, display_progress = FALSE)) - expect_equal(t(scale(t(mat), scale = FALSE))[1:10, 1:10], - FastRowScale(mat, scale = FALSE, display_progress = FALSE)) - expect_equal(t(scale(t(mat), scale = FALSE, center = F))[1:10, 1:10], - FastRowScale(mat, scale = FALSE, center = F, display_progress = FALSE)) - mat.clipped <- FastRowScale(mat, scale_max = 0.2, display_progress = F) + expect_equal(t(scale(t(mat)))[1:10, 1:15], FastRowScale(mat)) + expect_equal(t(scale(t(mat), center = FALSE))[1:10, 1:15], + FastRowScale(mat, center = FALSE)) + expect_equal(t(scale(t(mat), scale = FALSE))[1:10, 1:15], + FastRowScale(mat, scale = FALSE)) + expect_equal(t(scale(t(mat), scale = FALSE, center = F))[1:10, 1:15], + FastRowScale(mat, scale = FALSE, center = F)) + mat.clipped <- FastRowScale(mat, scale_max = 0.2) expect_true(max(mat.clipped, na.rm = T) >= 0.2) }) diff --git a/tests/testthat/test_objects.R b/tests/testthat/test_objects.R index 8c0e0fc46..9f8f1fb84 100644 --- a/tests/testthat/test_objects.R +++ b/tests/testthat/test_objects.R @@ -198,6 +198,57 @@ test_that("Merging Assays handles case when data not present", { expect_equal(nnzero(x = GetAssayData(object = z, slot = "data")), 0) }) +# Tests for Neighbor object +# ------------------------------------------------------------------------------ +context("Neighbor") + +# converting to Graph and back + +n.rann.ob <- NNHelper( + data = Embeddings(object = pbmc_small[["pca"]]), + query = Embeddings(object = pbmc_small[["pca"]]), + k = 10, + method = "rann") + +test_that("Neighbor object methods work", { + expect_equal(dim(x = Indices(object = n.rann.ob)), c(80, 10)) + expect_equal(dim(x = n.rann.ob), c(80, 10)) + expect_equal(as.numeric(Indices(object = n.rann.ob)[1, 7]), 45, ) + expect_equal(dim(x = Distances(object = n.rann.ob)), c(80, 10)) + expect_equal(as.numeric(Distances(object = n.rann.ob)[2, 2]), 2.643759, tolerance = 1e-6) + expect_equal(length(x = Cells(x = n.rann.ob)), 80) + expect_equal(Cells(x = n.rann.ob)[c(1, 20, 80)], c("ATGCCAGAACGACT", "TACATCACGCTAAC", "CTTGATTGATCTTC")) + pbmc_small[["n.ob"]] <- n.rann.ob + pbmc_small <- RenameCells(object = pbmc_small, add.cell.id = "test") + expect_equal(Cells(x = pbmc_small[['n.ob']])[1], c("test_ATGCCAGAACGACT")) + expect_equal(TopNeighbors(object = n.rann.ob, cell = "ATGCCAGAACGACT", n = 5)[5], "GATATAACACGCAT") + expect_equal(length(TopNeighbors(object = n.rann.ob, cell = "ATGCCAGAACGACT", n = 7)), 7) + nrg <- as.Graph(x = n.rann.ob) + expect_true(inherits(x = nrg, what = "Graph")) + expect_equal(as.numeric(Distances(object = n.rann.ob)[2, 3]), nrg[2, Indices(object = n.rann.ob)[2, 3]]) + nro2 <- as.Neighbor(x = nrg) + expect_true(inherits(x = nro2, what = "Neighbor")) + expect_equal(Distances(object = n.rann.ob)[2, 3], Distances(object = nro2)[2, 3]) + expect_equal(Indices(object = n.rann.ob)[1, 6], Indices(object = nro2)[1, 6]) +}) + +n.annoy.ob <- NNHelper( + data = Embeddings(object = pbmc_small[["pca"]]), + query = Embeddings(object = pbmc_small[["pca"]]), + k = 10, + method = "annoy", + cache.index = TRUE) +idx.file <- tempfile() +SaveAnnoyIndex(object = n.annoy.ob, file = idx.file) +nao2 <- LoadAnnoyIndex(object = n.annoy.ob, file = idx.file) + +test_that("Saving/Loading annoy index", { + expect_error(SaveAnnoyIndex(object = n.rann.ob, file = idx.file)) + expect_equal(head(Indices(n.annoy.ob)), head(Indices(nao2))) + expect_equal(head(Distances(n.annoy.ob)), head(Distances(nao2))) + expect_false(is.null(x = Index(nao2))) +}) + # Tests for FetchData # ------------------------------------------------------------------------------ context("FetchData") @@ -265,4 +316,25 @@ test_that("passing an expression works", { expect_equal(lyz.pos[30], "CTTGATTGATCTTC") }) - +# Tests for small other functions +# ------------------------------------------------------------------------------ +test_that("Top works", { + dat <- Embeddings(object = pbmc_small[['pca']])[, 1, drop = FALSE] + expect_warning(Top(data = dat, num = 1000, balanced = FALSE)) + tpc1 <- Top(data = dat, num = 20, balanced = FALSE) + expect_equal(length(x = tpc1), 20) + expect_equal(tpc1[1], "ACGTGATGCCATGA") + expect_equal(tpc1[20], "GTCATACTTCGCCT") + tpc1b <- Top(data = dat, num = 20, balanced = TRUE) + expect_equal(length(x = tpc1b), 2) + expect_equal(names(tpc1b), c("positive", "negative")) + expect_equal(length(tpc1b[[1]]), 10) + expect_equal(length(tpc1b[[2]]), 10) + expect_equal(tpc1b[[1]][1], "GTCATACTTCGCCT") + expect_equal(tpc1b[[1]][10], "CTTGATTGATCTTC") + expect_equal(tpc1b[[2]][1], "ACGTGATGCCATGA") + expect_equal(tpc1b[[2]][10], "ATTGTAGATTCCCG") + tpc1.sub <- Top(data = dat[1:79, , drop = FALSE], num = 79, balanced = TRUE) + expect_equal(length(tpc1.sub[[1]]), 40) + expect_equal(length(tpc1.sub[[2]]), 39) +})