diff --git a/DESCRIPTION b/DESCRIPTION index de9208a01..e3a1fb92c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: clustifyr Title: Classifier for Single-cell RNA-seq Using Cell Clusters -Version: 1.15.1 +Version: 1.15.2 Description: Package designed to aid in classifying cells from single-cell RNA sequencing data using external reference data (e.g., bulk RNA-seq, scRNA-seq, microarray, gene lists). A variety of correlation based methods and gene list enrichment methods are provided to assist cell @@ -57,6 +57,7 @@ Imports: methods, SingleCellExperiment, SummarizedExperiment, + SeuratObject, matrixStats, S4Vectors, proxy, @@ -73,7 +74,6 @@ Suggests: BiocManager, remotes, shiny, - SeuratObject, gprofiler2, purrr, data.table, @@ -87,7 +87,7 @@ VignetteBuilder: ByteCompile: true Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 LazyData: true Config/Needs/website: pkgdown, diff --git a/NAMESPACE b/NAMESPACE index c07f854aa..ecc466191 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -49,6 +49,7 @@ export(make_comb_ref) export(marker_select) export(matrixize_markers) export(object_data) +export(object_loc_lookup) export(object_ref) export(overcluster) export(overcluster_test) @@ -68,8 +69,10 @@ export(ref_marker_select) export(reverse_marker_matrix) export(run_clustifyr_app) export(run_gsea) +export(sce_pbmc) export(seurat_meta) export(seurat_ref) +export(so_pbmc) export(write_meta) import(Matrix) import(dplyr) @@ -78,9 +81,16 @@ import(ggplot2) import(scales) import(tibble) importFrom(S4Vectors,DataFrame) +importFrom(SeuratObject,"DefaultAssay<-") +importFrom(SeuratObject,"Key<-") +importFrom(SeuratObject,CreateDimReducObject) +importFrom(SeuratObject,CreateSeuratObject) +importFrom(SeuratObject,DefaultAssay) +importFrom(SeuratObject,Key) +importFrom(SeuratObject,VariableFeatures) importFrom(SingleCellExperiment,colData) importFrom(SingleCellExperiment,logcounts) -importFrom(SummarizedExperiment,`colData<-`) +importFrom(SummarizedExperiment,"colData<-") importFrom(cowplot,theme_cowplot) importFrom(fgsea,fgsea) importFrom(httr,build_url) @@ -98,4 +108,5 @@ importFrom(stats,p.adjust) importFrom(stats,prcomp) importFrom(stats,quantile) importFrom(tidyr,gather) +importFrom(utils,packageVersion) importFrom(utils,read.csv) diff --git a/NEWS b/NEWS index 27b0c73c4..a098568e2 100644 --- a/NEWS +++ b/NEWS @@ -52,4 +52,7 @@ Changes in version 1.7.3 (2022-03-09) + Maintainer change Changes in version 1.15.1 (2023-10-31) -+ Replace `Seurat` dependency with `SeuratObject` \ No newline at end of file ++ Replace `Seurat` dependency with `SeuratObject` + +Changes in version 1.15.2 (2024-04-03) ++ Add support for `Seurat` version 5 objects diff --git a/R/clustifyR-package.R b/R/clustifyR-package.R index 4b7dd68ac..482ce87b7 100644 --- a/R/clustifyR-package.R +++ b/R/clustifyR-package.R @@ -17,5 +17,6 @@ #' @importFrom cowplot theme_cowplot #' @importFrom fgsea fgsea #' @importFrom methods is +#' @importFrom SeuratObject Key Key<- DefaultAssay DefaultAssay<- ## usethis namespace: end NULL diff --git a/R/data.R b/R/data.R index b937d6f4a..9ea010d5b 100644 --- a/R/data.R +++ b/R/data.R @@ -43,24 +43,6 @@ #' @source `[pbmc_matrix]` processed by Seurat "pbmc_vargenes" -#' Small clustered Seurat2 object -#' -#' @family data -#' @source `[pbmc_small]` processed by seurat -"s_small" - -#' Small clustered Seurat3 object -#' -#' @family data -#' @source `[pbmc_small]` processed by Seurat -"s_small3" - -#' Small SingleCellExperiment object -#' -#' @family data -#' @source \url{https://github.com/hemberg-lab/scRNA.seq.datasets/blob/master/R/segerstolpe.R} -"sce_small" - #' reference matrix from seurat citeseq CBMC tutorial #' #' @family data @@ -73,12 +55,6 @@ #' @source \url{https://satijalab.org/seurat/v3.0/multimodal_vignette.html#identify-differentially-expressed-proteins-between-clusters} "cbmc_m" -#' lookup table for single cell object structures -#' -#' @family data -#' @source various packages -"object_loc_lookup" - #' table of references stored in clustifyrdata #' #' @family data diff --git a/R/main.R b/R/main.R index ce066faa8..b468f0261 100644 --- a/R/main.R +++ b/R/main.R @@ -59,7 +59,7 @@ clustify <- function(input, ...) { #' metadata = pbmc_meta, #' ref_mat = cbmc_ref, #' query_genes = pbmc_vargenes, -#' cluster_col = "classified", +#' cluster_col = "RNA_snn_res.0.5", #' verbose = TRUE #' ) #' @@ -69,28 +69,40 @@ clustify <- function(input, ...) { #' metadata = pbmc_meta, #' ref_mat = cbmc_ref, #' query_genes = pbmc_vargenes, -#' cluster_col = "classified", +#' cluster_col = "RNA_snn_res.0.5", #' compute_method = "cosine" #' ) -#' +#' +#' # Annotate a SingleCellExperiment object +#' sce <- sce_pbmc() +#' clustify( +#' sce, +#' cbmc_ref, +#' cluster_col = "clusters", +#' obj_out = TRUE, +#' per_cell = FALSE, +#' dr = "umap" +#' ) +#' #' # Annotate a Seurat object +#' so <- so_pbmc() #' clustify( -#' s_small3, +#' so, #' cbmc_ref, -#' cluster_col = "RNA_snn_res.1", +#' cluster_col = "seurat_clusters", #' obj_out = TRUE, #' per_cell = FALSE, -#' dr = "tsne" +#' dr = "umap" #' ) #' #' # Annotate (and return) a Seurat object per-cell #' clustify( -#' input = s_small3, +#' input = so, #' ref_mat = cbmc_ref, -#' cluster_col = "RNA_snn_res.1", +#' cluster_col = "seurat_clusters", #' obj_out = TRUE, #' per_cell = TRUE, -#' dr = "tsne" +#' dr = "umap" #' ) #' @export clustify.default <- function(input, @@ -107,7 +119,7 @@ clustify.default <- function(input, lookuptable = NULL, rm0 = FALSE, obj_out = TRUE, - seurat_out = TRUE, + seurat_out = obj_out, vec_out = FALSE, rename_prefix = NULL, threshold = "auto", @@ -161,8 +173,9 @@ clustify.default <- function(input, stop("given `cluster_col` is not a column in `metadata`", call. = FALSE) } - if (length(query_genes) == 0) { - message("var.features not found, using all genes instead") + if (is.null(query_genes) || length(query_genes) == 0) { + message("Variable features not available, using all genes instead\n", + "consider supplying variable features to `query_genes` argument.") query_genes <- NULL } @@ -248,8 +261,8 @@ clustify.default <- function(input, message("similarity computation completed, matrix of ", dim(res)[1], " x ", dim(res)[2], ", preparing output") } - if ((obj_out && - seurat_out) && + obj_out <- seurat_out + if (obj_out && !inherits(input_original, c( "matrix", "Matrix", @@ -324,8 +337,8 @@ clustify.Seurat <- function(input, pseudobulk_method = "mean", use_var_genes = TRUE, dr = "umap", - seurat_out = TRUE, obj_out = TRUE, + seurat_out = obj_out, vec_out = FALSE, threshold = "auto", verbose = TRUE, @@ -380,8 +393,8 @@ clustify.Seurat <- function(input, if (n_perm != 0) { res <- -log(res$p_val + .01, 10) } - - if (!(seurat_out && obj_out) && !vec_out || vec) { + obj_out <- seurat_out + if (!obj_out && !vec_out || vec) { res } else { df_temp <- cor_to_call( @@ -449,8 +462,8 @@ clustify.SingleCellExperiment <- function(input, pseudobulk_method = "mean", use_var_genes = TRUE, dr = "umap", - seurat_out = TRUE, obj_out = TRUE, + seurat_out = obj_out, vec_out = FALSE, threshold = "auto", verbose = TRUE, @@ -501,8 +514,8 @@ clustify.SingleCellExperiment <- function(input, if (n_perm != 0) { res <- -log(res$p_val + .01, 10) } - - if (!(seurat_out && obj_out) && !vec_out) { + obj_out <- seurat_out + if (!obj_out && !vec_out) { res } else { df_temp <- cor_to_call( @@ -588,7 +601,7 @@ clustify_lists <- function(input, ...) { } #' @rdname clustify_lists -#' @param input single-cell expression matrix or Seurat object +#' @param input single-cell expression matrix, Seurat object, or SingleCellExperiment #' @param marker matrix or dataframe of candidate genes for each cluster #' @param marker_inmatrix whether markers genes are already in preprocessed #' matrix form @@ -658,7 +671,7 @@ clustify_lists.default <- function(input, output_high = TRUE, lookuptable = NULL, obj_out = TRUE, - seurat_out = TRUE, + seurat_out = obj_out, vec_out = FALSE, rename_prefix = NULL, threshold = 0, @@ -778,10 +791,9 @@ clustify_lists.default <- function(input, if (verbose) { message("similarity computation completed, matrix of ", dim(res)[1], " x ", dim(res)[2], ", preparing output") } - + obj_out <- seurat_out if ((!inherits(input_original, c("matrix", "Matrix", "data.frame")) && - obj_out && - seurat_out) || (vec_out && + obj_out ) || (vec_out && inherits(input_original, c( "matrix", "Matrix", @@ -840,8 +852,8 @@ clustify_lists.Seurat <- function(input, metric = "hyper", output_high = TRUE, dr = "umap", - seurat_out = TRUE, obj_out = TRUE, + seurat_out = obj_out, vec_out = FALSE, threshold = 0, rename_prefix = NULL, @@ -885,8 +897,8 @@ clustify_lists.Seurat <- function(input, details_out = details_out, ... ) - - if (!(seurat_out && obj_out) && !vec_out || vec) { + obj_out <- seurat_out + if (!obj_out && !vec_out || vec) { res } else { if (metric != "consensus") { @@ -943,8 +955,8 @@ clustify_lists.SingleCellExperiment <- function(input, metric = "hyper", output_high = TRUE, dr = "umap", - seurat_out = TRUE, obj_out = TRUE, + seurat_out = obj_out, vec_out = FALSE, threshold = 0, rename_prefix = NULL, @@ -986,7 +998,7 @@ clustify_lists.SingleCellExperiment <- function(input, ... ) - if (!(seurat_out && obj_out) && !vec_out || vec) { + if (!obj_out && !vec_out || vec) { res } else { df_temp <- cor_to_call( diff --git a/R/seurat_wrapper.R b/R/object_access.R similarity index 66% rename from R/seurat_wrapper.R rename to R/object_access.R index c79a88aac..5fb46a91e 100644 --- a/R/seurat_wrapper.R +++ b/R/object_access.R @@ -1,3 +1,73 @@ +#' An example Seurat object +#' +#' @return a Seurat object populated with data +#' from the [pbmc_matrix_small] scRNA-seq dataset, additionally +#' annotated with cluster assignments. +#' +#' @importFrom SeuratObject CreateSeuratObject CreateDimReducObject VariableFeatures +#' @export +so_pbmc <- function() { + x <- pbmc_example_data() + so <- SeuratObject::CreateSeuratObject(x$mat, + meta.data = x$metadata) + umap_dr <- SeuratObject::CreateDimReducObject(embeddings = x$umap, + key = "umap_", + assay = "RNA") + if(is_seurat_v5()) { + so <- SeuratObject::SetAssayData(so, + "data", + SeuratObject::LayerData(so, layer = "counts") + ) + } else { + so <- SeuratObject::SetAssayData(so, + "data", + SeuratObject::GetAssayData(so, slot = "counts") + ) + } + so[["umap"]] <- umap_dr + SeuratObject::VariableFeatures(so) <- x$vargenes + so +} + +#' An example SingleCellExperiment object +#' +#' @return a SingleCellExperiment object populated with data +#' from the [pbmc_matrix_small] scRNA-seq dataset, additionally +#' annotated with cluster assignments. +#' +#' @export +sce_pbmc <- function() { + x <- pbmc_example_data() + md <- x$metadata[, c(1:5, 7)] + # rename to more sce-like names + colnames(md) <- c("cell_source", + "sum", + "detected", + "subsets_Mito_percent", + "clusters", + "cell_type") + SingleCellExperiment::SingleCellExperiment(list(counts = x$mat, + logcounts = x$mat), + colData = md, + reducedDims = list( + UMAP = x$umap + )) +} + +pbmc_example_data <- function() { + mat <- clustifyr::pbmc_matrix_small + md <- clustifyr::pbmc_meta + umap_cols <- c("UMAP_1", "UMAP_2") + umap <- as.matrix(md[, umap_cols]) + md <- md[, setdiff(colnames(md), umap_cols)] + vargenes <- clustifyr::pbmc_vargenes + + list(mat = mat, + metadata = md, + umap = umap, + vargenes = vargenes) +} + #' Function to access object data #' @return expression matrix, with genes as row names, #' and cell types as column names @@ -7,15 +77,15 @@ object_data <- function(object, ...) { } #' @rdname object_data -#' @param object object after tsne or umap projections -#' and clustering +#' @param object SingleCellExperiment or Seurat object #' @param slot data to access #' @param n_genes number of genes limit for Seurat variable genes, by default 1000, #' set to 0 to use all variable genes (generally not recommended) #' @param ... additional arguments #' @examples +#' so <- so_pbmc() #' mat <- object_data( -#' object = s_small3, +#' object = so, #' slot = "data" #' ) #' mat[1:3, 1:3] @@ -25,35 +95,14 @@ object_data.Seurat <- function(object, n_genes = 1000, ...) { if (slot == "data") { - temp <- tryCatch(object@assays$RNA@data, - error = function(e) { - message("detected spatial data, using raw counts") - object@assays$Spatial@counts - }) + temp <- get_seurat_matrix(object, ...) return(temp) } else if (slot == "meta.data") { return(object@meta.data) } else if (slot == "var.genes") { - vars <- tryCatch(object@assays$RNA@var.features, - error = function(e) { - object@assays$SCT@var.features - }) - if (length(vars) <= 1) { - message("trying to find variable genes in SCT assay") - vars <- tryCatch(object@assays$SCT@var.features, - error = function(e) {NA}) - } - if (length(vars) <= 1) { - message("trying to find variable genes in integrated assay") - vars <- tryCatch(object@assays$integrated@var.features, - error = function(e) {NA}) - } - if (length(vars) <= 1) { - message("trying to find variable genes in Spatial assay") - vars <- tryCatch(object@assays$Spatial@var.features, - error = function(e) {NA}) - } - if (length(vars) <= 1) { + vars <- SeuratObject::VariableFeatures(object) + + if (is.null(vars) || length(vars) <= 1) { message("variable genes not found, please manually specify with query_genes argument") } if ((length(vars) > n_genes) & (n_genes > 0)) { @@ -61,11 +110,59 @@ object_data.Seurat <- function(object, } return(vars) - } else if (slot == "pca") { - return(object@reductions$pca@feature.loadings) + } else { + stop(slot, " access method not implemented") } } +#' @importFrom utils packageVersion +is_seurat_v5 <- function() { + utils::packageVersion("SeuratObject") >= '5.0.0' +} + +extract_v5_matrix <- function(x, ...) { + ob_layers <- SeuratObject::Layers(x) + if("data" %in% ob_layers) { + res <- SeuratObject::LayerData(x, layer = "data", ...) + } else if ("counts" %in% ob_layers) { + message("Unable to find 'data' layer, using 'count' layer instead") + res <- SeuratObject::LayerData(x, layer = "counts", ...) + } else { + da <- DefaultAssay(x) + stop("\nUnable to find data or count layer in ", da, " Assay of SeuratObject\n", + "Extracting data from V5 objects with multiple count\n", + "or data layers is not supported") + } + res +} + +extract_v4_matrix <- function(x) { + res <- SeuratObject::GetAssayData(x, layer = "data") + + if(length(res) == 0) { + message("Unable to find 'data' slot, using 'count' slot instead") + res <- SeuratObject::GetAssayData(x, layer = "count") + } + + res +} + +get_seurat_matrix <- function(x, warn = TRUE) { + + ob_assay <- SeuratObject::DefaultAssay(x) + if(warn && ob_assay != "RNA") { + warning("Default assay of input Seurat object is ", ob_assay, "\n", + "Data will be used from this assay rather than RNA") + } + + if(is_seurat_v5()) { + res <- extract_v5_matrix(x) + } else { + res <- extract_v4_matrix(x) + } + res +} + #' @rdname object_data #' @param object object after tsne or umap projections #' and clustering @@ -73,8 +170,9 @@ object_data.Seurat <- function(object, #' @param ... additional arguments #' @importFrom SingleCellExperiment logcounts colData #' @examples +#' sce <- sce_pbmc() #' mat <- object_data( -#' object = sce_small, +#' object = sce, #' slot = "data" #' ) #' mat[1:3, 1:3] @@ -86,6 +184,8 @@ object_data.SingleCellExperiment <- function(object, return(SingleCellExperiment::logcounts(object)) } else if (slot == "meta.data") { return(as.data.frame(SingleCellExperiment::colData(object))) + } else { + stop(slot, " access method not implemented") } } @@ -102,21 +202,18 @@ write_meta <- function(object, ...) { #' @param meta new metadata dataframe #' @param ... additional arguments #' @examples +#' so <- so_pbmc() #' obj <- write_meta( -#' object = s_small3, -#' meta = seurat_meta(s_small3) +#' object = so, +#' meta = seurat_meta(so) #' ) #' @export write_meta.Seurat <- function(object, meta, ...) { - if ("SeuratObject" %in% loadedNamespaces()) { - object_new <- object - object_new@meta.data <- meta - return(object_new) - } else { - message("SeuratObject not loaded") - } + object_new <- object + object_new@meta.data <- meta + object_new } #' @rdname write_meta @@ -126,29 +223,27 @@ write_meta.Seurat <- function(object, #' @param ... additional arguments #' @importFrom SingleCellExperiment colData #' @importFrom S4Vectors DataFrame -#' @importFrom SummarizedExperiment `colData<-` +#' @importFrom SummarizedExperiment colData<- #' @examples +#' sce <- sce_pbmc() #' obj <- write_meta( -#' object = sce_small, -#' meta = object_data(sce_small, "meta.data") +#' object = sce, +#' meta = object_data(sce, "meta.data") #' ) #' @export write_meta.SingleCellExperiment <- function(object, meta, ...) { - if ("SingleCellExperiment" %in% loadedNamespaces()) { - colData(object) <- S4Vectors::DataFrame(meta) - return(object) - } else { - message("SingleCellExperiment not loaded") - } + colData(object) <- S4Vectors::DataFrame(meta) + object } #' Function to convert labelled seurat object to avg expression matrix #' @return reference expression matrix, with genes as row names, #' and cell types as column names #' @examples -#' ref <- seurat_ref(s_small3, cluster_col = "RNA_snn_res.1") +#' so <- so_pbmc() +#' ref <- seurat_ref(so, cluster_col = "seurat_clusters") #' @export seurat_ref <- function(seurat_object, ...) { UseMethod("seurat_ref", seurat_object) @@ -189,11 +284,15 @@ seurat_ref.Seurat <- function(seurat_object, } if (!is.null(assay_name)) { + og_assay <- SeuratObject::DefaultAssay(seurat_object) + assay_name <- setdiff(assay_name, og_assay) temp_mat <- temp_mat[0, ] for (element in assay_name) { - temp_mat2 <- seurat_object@assays[[element]]@counts + SeuratObject::DefaultAssay(seurat_object) <- element + temp_mat2 <- object_data(seurat_object, "data", warn = FALSE) temp_mat <- rbind(temp_mat, as.matrix(temp_mat2)) } + SeuratObject::DefaultAssay(seurat_object) <- og_assay } } else { stop("warning, not seurat3 object") @@ -214,7 +313,8 @@ seurat_ref.Seurat <- function(seurat_object, #' Function to convert labelled seurat object to fully prepared metadata #' @return dataframe of metadata, including dimension reduction plotting info #' @examples -#' m <- seurat_meta(s_small3) +#' so <- so_pbmc() +#' m <- seurat_meta(so) #' @export seurat_meta <- function(seurat_object, ...) { UseMethod("seurat_meta", seurat_object) @@ -278,9 +378,10 @@ object_ref <- function(input, ...) { #' averaging will be done on unlogged data #' @param ... additional arguments #' @examples +#' so <- so_pbmc() #' object_ref( -#' s_small3, -#' cluster_col = "RNA_snn_res.1" +#' so, +#' cluster_col = "seurat_clusters" #' ) #' @export object_ref.default <- function(input, diff --git a/R/plot.R b/R/plot.R index d1902d5dc..1fe570187 100644 --- a/R/plot.R +++ b/R/plot.R @@ -310,7 +310,8 @@ plot_cor <- function(cor_mat, if (cor_df[[cluster_col]][1] %in% metadata[[cluster_col]]) { plt_data <- dplyr::left_join(cor_df_long, metadata, - by = cluster_col + by = cluster_col, + relationship = "many-to-many" ) } else { plt_data <- dplyr::left_join(cor_df_long, diff --git a/R/utils.R b/R/utils.R index 88e4dd3ba..d45bf8fe7 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,3 +1,32 @@ +#' Check package is installed +#' @param pkg package to query +#' @return logical(1) indicating if package is available. +#' @noRd +is_pkg_available <- function(pkg, + action = c("none", "message", "warn", "error"), + msg = "") { + has_pkg <- requireNamespace(pkg, quietly = TRUE) + action <- match.arg(action) + + if(!has_pkg) { + switch(action, + message = message(pkg, + " not installed ", + msg), + warn = warning(pkg, + " not installed ", + msg, + call. = FALSE), + error = stop(pkg, + " not installed and is required for this function ", + msg, + call. = FALSE), + ) + } + has_pkg +} + + #' Overcluster by kmeans per cluster #' #' @param mat expression matrix @@ -372,7 +401,7 @@ get_best_str <- function(name, #' Find entries shared in all vectors #' @description return entries found in all supplied vectors. #' If the vector supplied is NULL or NA, then it will be excluded -#' from the comparision. +#' from the comparison. #' @param ... vectors #' @return vector of shared elements get_common_elements <- function(...) { @@ -569,7 +598,8 @@ cor_to_call_topn <- function(cor_mat, dplyr::select(df_temp_full, -c( !!dplyr::sym("type"), !!dplyr::sym("r") )), - by = stats::setNames(collapse_to_cluster, "type2") + by = stats::setNames(collapse_to_cluster, "type2"), + relationship = "many-to-many" ) df_temp_full <- dplyr::mutate(df_temp_full2, type = tidyr::replace_na( @@ -720,16 +750,17 @@ gene_pct_markerm <- function(matrix, #' #' @examples #' -#' # Seurat3 +#' # Seurat +#' so <- so_pbmc() #' clustify_nudge( -#' input = s_small3, +#' input = so, #' ref_mat = cbmc_ref, #' marker = cbmc_m, -#' cluster_col = "RNA_snn_res.1", +#' cluster_col = "seurat_clusters", #' threshold = 0.8, -#' seurat_out = FALSE, +#' obj_out = FALSE, #' mode = "pct", -#' dr = "tsne" +#' dr = "umap" #' ) #' #' # Matrix @@ -775,7 +806,7 @@ clustify_nudge <- function(input, ...) { #' in preprocessed matrix form #' @param mode use marker expression pct or ranked cor score for nudging #' @param obj_out whether to output object instead of cor matrix -#' @param seurat_out output cor matrix or called seurat object +#' @param seurat_out output cor matrix or called seurat object (deprecated, use obj_out) #' @param rename_prefix prefix to add to type and r column names #' @param lookuptable if not supplied, will look in built-in #' table for object parsing @@ -793,7 +824,6 @@ clustify_nudge.default <- function(input, query_genes = NULL, compute_method = "spearman", weight = 1, - seurat_out = FALSE, threshold = -Inf, dr = "umap", norm = "diff", @@ -801,6 +831,7 @@ clustify_nudge.default <- function(input, marker_inmatrix = TRUE, mode = "rank", obj_out = FALSE, + seurat_out = obj_out, rename_prefix = NULL, lookuptable = NULL, ...) { @@ -843,7 +874,7 @@ clustify_nudge.default <- function(input, metadata = metadata, cluster_col = cluster_col, query_genes = query_genes, - seurat_out = FALSE, + obj_out = FALSE, per_cell = FALSE ) @@ -877,9 +908,8 @@ clustify_nudge.default <- function(input, res <- resa[order(rownames(resa)), order(colnames(resa))] + resb[order(rownames(resb)), order(colnames(resb))] * weight - - if ((obj_out || - seurat_out) && + obj_out <- seurat_out + if (obj_out && !inherits(input_original, c("matrix", "Matrix", "data.frame"))) { df_temp <- cor_to_call( res, @@ -924,8 +954,8 @@ clustify_nudge.Seurat <- function(input, query_genes = NULL, compute_method = "spearman", weight = 1, - seurat_out = TRUE, - obj_out = FALSE, + obj_out = TRUE, + seurat_out = obj_out, threshold = -Inf, dr = "umap", norm = "diff", @@ -944,7 +974,7 @@ clustify_nudge.Seurat <- function(input, ref_mat = ref_mat, cluster_col = cluster_col, query_genes = query_genes, - seurat_out = FALSE, + obj_out = FALSE, per_cell = FALSE, dr = dr ) @@ -981,8 +1011,8 @@ clustify_nudge.Seurat <- function(input, res <- resa[order(rownames(resa)), order(colnames(resa))] + resb[order(rownames(resb)), order(colnames(resb))] * weight - - if (!(seurat_out || obj_out)) { + obj_out <- seurat_out + if (!obj_out) { res } else { df_temp <- cor_to_call( @@ -1010,19 +1040,77 @@ clustify_nudge.Seurat <- function(input, input } } +#' lookup table for single cell object structures +#' @importFrom SummarizedExperiment colData<- +#' @export +object_loc_lookup <- function() { + l <- list() + + l$SingleCellExperiment <- c( + expr = function(x) object_data(x, "data"), + meta = function(x) object_data(x, "meta.data"), + add_meta = function(x, md) { + colData(x) <- md + x}, + var = NULL, + col = "cell_type1" + ) + + l$Seurat <- c( + expr = function(x) object_data(x, "data"), + meta = function(x) object_data(x, "meta.data"), + add_meta = function(x, md) { + x@meta.data <- md + x}, + var = function(x) object_data(x, "var.genes"), + col = "RNA_snn_res.1" + ) + + l$URD <- c( + expr = function(x) x@logupx.data, + meta = function(x) x@meta, + add_meta = function(x, md) { + x@meta <- md + x}, + var = function(x) x@var.genes, + col = "cluster" + ) + + l$FunctionalSingleCellExperiment <- c( + expr = function(x) x@ExperimentList$rnaseq@assays$data$logcounts, + meta = function(x) x@ExperimentList$rnaseq@colData, + add_meta = function(x, md) { + x@ExperimentList$rnaseq@colData <- md + x}, + var = NULL, + col = "leiden_cluster" + ) + + l$CellDataSet <- c( + expr = function(x) do.call(function(x) {row.names(x) <- x@featureData@data$gene_short_name; return(x)}, list(x@assayData$exprs)), + meta = function(x) as.data.frame(x@phenoData@data), + add_meta = function(x, md) { + x@phenoData@data <- md + x}, + var = function(x) as.character(x@featureData@data$gene_short_name[x@featureData@data$use_for_ordering == T]), + col = "Main_Cluster" + ) + l +} #' more flexible parsing of single cell objects #' #' @param input input object #' @param type look up predefined slots/loc -#' @param expr_loc expression matrix location -#' @param meta_loc metadata location -#' @param var_loc variable genes location +#' @param expr_loc function that extracts expression matrix +#' @param meta_loc function that extracts metadata +#' @param var_loc function that extracts variable genes #' @param cluster_col column of clustering from metadata -#' @param lookuptable if not supplied, will look in built-in table for object parsing +#' @param lookuptable if not supplied, will use object_loc_lookup() for parsing. #' @return list of expression, metadata, vargenes, cluster_col info from object #' @examples -#' obj <- parse_loc_object(s_small3) +#' so <- so_pbmc() +#' obj <- parse_loc_object(so) #' length(obj) #' @export parse_loc_object <- function(input, @@ -1032,19 +1120,25 @@ parse_loc_object <- function(input, var_loc = NULL, cluster_col = NULL, lookuptable = NULL) { + if(!type %in% c("SingleCellExperiment", "Seurat")) { + warning("Support for ", type, " objects is deprecated ", + "and will be removed from clustifyr in the next version") + } + if (is.null(lookuptable)) { - object_loc_lookup1 <- clustifyr::object_loc_lookup + lookup <- object_loc_lookup() } else { - object_loc_lookup1 <- lookuptable + warning("Support for supplying custom objects is deprecated ", + "and will be removed from clustifyr in the next version") + lookup <- lookuptable } - if (length(intersect(type, colnames(object_loc_lookup1))) > 0) { - type <- intersect(type, colnames(object_loc_lookup1))[1] + if (type %in% names(lookup)) { parsed <- list( - eval(parse(text = object_loc_lookup1[[type]][1])), - as.data.frame(eval(parse(text = object_loc_lookup1[[type]][2]))), - eval(parse(text = object_loc_lookup1[[type]][3])), - object_loc_lookup1[[type]][4] + expr = lookup[[type]]$expr(input), + meta = as.data.frame(lookup[[type]]$meta(input)), + var = lookup[[type]]$var(input), + col = lookup[[type]]$col ) } else { parsed <- list(NULL, NULL, NULL, NULL) @@ -1053,18 +1147,15 @@ parse_loc_object <- function(input, names(parsed) <- c("expr", "meta", "var", "col") if (!(is.null(expr_loc))) { - parsed[["expr"]] <- eval(parse(text = paste0("input", expr_loc))) + parsed[["expr"]] <- expr_loc(input) } if (!(is.null(meta_loc))) { - parsed[["meta"]] <- - as.data.frame(eval(parse(text = paste0( - "input", meta_loc - )))) + parsed[["meta"]] <- as.data.frame(meta_loc(input)) } if (!(is.null(var_loc))) { - parsed[["var"]] <- eval(parse(text = paste0("input", var_loc))) + parsed[["var"]] <- var_loc(input) } if (!(is.null(cluster_col))) { @@ -1084,9 +1175,8 @@ parse_loc_object <- function(input, #' will look in built-in table for object parsing #' @return new object with new metadata inserted #' @examples -#' \dontrun{ -#' insert_meta_object(s_small3, seurat_meta(s_small3, dr = "tsne")) -#' } +#' so <- so_pbmc() +#' insert_meta_object(so, seurat_meta(so, dr = "umap")) #' @export insert_meta_object <- function(input, new_meta, @@ -1094,16 +1184,15 @@ insert_meta_object <- function(input, meta_loc = NULL, lookuptable = NULL) { if (is.null(lookuptable)) { - object_loc_lookup1 <- clustifyr::object_loc_lookup + lookup <- object_loc_lookup() } else { - object_loc_lookup1 <- lookuptable + lookup <- lookuptable } - if (!type %in% colnames(object_loc_lookup1)) { + if (!type %in% names(lookup)) { stop("unrecognized object type", call. = FALSE) } else { - text1 <- paste0(object_loc_lookup1[[type]][2], " <- ", "new_meta") - eval(parse(text = text1)) + input <- lookup[[type]]$add_meta(input, new_meta) return(input) } } @@ -1180,7 +1269,7 @@ overcluster_test <- function(expr, metadata, query_genes = genes, cluster_col = cluster_col, - seurat_out = FALSE + obj_out = FALSE ) res2 <- clustify( expr, @@ -1188,7 +1277,7 @@ overcluster_test <- function(expr, metadata, query_genes = genes, cluster_col = "new_clusters", - seurat_out = FALSE + obj_out = FALSE ) o1 <- plot_dims( metadata, @@ -1993,38 +2082,20 @@ plot_rank_bias <- function( #' ) #' @export append_genes <- function(gene_vector, ref_matrix) { - rownamesGSEMatrix <- rownames(ref_matrix) - # Get rownames from GSEMatrix (new GSE file) - - rowCountHumanGenes <- length(gene_vector) - # Calculate number of rows from list of full human genes - rowCountNewGSEFile <- nrow(ref_matrix) - # Calculate number of rows of GSE matrix - - missing_rows <- setdiff(gene_vector, rownamesGSEMatrix) - # Use setdiff function to figure out rows which are different/missing - # from GSE matrix - + missing_rows <- setdiff(gene_vector, rownames(ref_matrix)) + zeroExpressionMatrix <- matrix( 0, nrow = length(missing_rows), ncol = ncol(ref_matrix) ) - # Create a placeholder matrix with zeroes and missing_rows length rownames(zeroExpressionMatrix) <- missing_rows - # Assign row names colnames(zeroExpressionMatrix) <- colnames(ref_matrix) - # Assign column names - + full_matrix <- rbind(ref_matrix, zeroExpressionMatrix) - # Bind GSEMatrix and zeroExpressionMatrix together - - # Reorder matrix full_matrix <- full_matrix[gene_vector, ] - # Reorder fullMatrix to preserve gene order - return(full_matrix) - # Return fullMatrix + full_matrix } #' Given a count matrix, determine if the matrix has been either diff --git a/data-raw/object_loc_lookup.R b/data-raw/object_loc_lookup.R index 88bc4ad93..acb222ca8 100644 --- a/data-raw/object_loc_lookup.R +++ b/data-raw/object_loc_lookup.R @@ -1,38 +1,5 @@ library(usethis) -object_loc_lookup$SingleCellExperiment <- c( - expr = "input@assays$data$logcounts", - meta = "as.data.frame(input@colData)", - var = NULL, - col = "cell_type1" -) -object_loc_lookup$URD <- c( - expr = "input@logupx.data", - meta = "input@meta", - var = "input@var.genes", - col = "cluster" -) - -object_loc_lookup$FunctionalSingleCellExperiment <- c( - expr = "input@ExperimentList$rnaseq@assays$data$logcounts", - meta = "input@ExperimentList$rnaseq@colData", - var = NULL, - col = "leiden_cluster" -) - -object_loc_lookup$Seurat <- c( - expr = "input@assays$RNA@data", - meta = "input@meta.data", - var = "input@assays$RNA@var.features", - col = "RNA_snn_res.1" -) - -object_loc_lookup$CellDataSet <- c( - expr = "do.call(function(x) {row.names(x) <- input@featureData@data$gene_short_name; return(x)}, list(input@assayData$exprs))", - meta = "as.data.frame(input@phenoData@data)", - var = "as.character(input@featureData@data$gene_short_name[input@featureData@data$use_for_ordering == T])", - col = "Main_Cluster" -) usethis::use_data(object_loc_lookup, compress = "xz", overwrite = TRUE) diff --git a/data-raw/s_small.R b/data-raw/s_small.R deleted file mode 100644 index a0dd242b3..000000000 --- a/data-raw/s_small.R +++ /dev/null @@ -1,11 +0,0 @@ -library(usethis) - -# need seurat v2 -s_small <- Seurat::pbmc_small -attr(attr(s_small, "class"), "package") <- NULL -attr(attr(attr(s, "dr")[["pca"]], "class"), "package") <- NULL -attr(attr(attr(s, "dr")[["tsne"]], "class"), "package") <- NULL -attr(attr(attr(s, "spatial"), "class"), "package") <- NULL -attr(attr(attr(s@dr$pca, "jackstraw"), "class"), "package") <- NULL - -usethis::use_data(s_small, compress = "xz", overwrite = TRUE) diff --git a/data-raw/s_small3.R b/data-raw/s_small3.R deleted file mode 100644 index c60fe4162..000000000 --- a/data-raw/s_small3.R +++ /dev/null @@ -1,25 +0,0 @@ -library(usethis) - -s <- Seurat::pbmc_small -attr(attr(s, "class"), "package") <- NULL - -attr(attr(s@commands$NormalizeData.RNA, "class"), "package") <- NULL -attr(attr(s@commands$FindVariableFeatures.RNA, "class"), "package") <- NULL -attr(attr(s@commands$ScaleData.RNA, "class"), "package") <- NULL -attr(attr(s@commands$RunPCA.RNA, "class"), "package") <- NULL -attr(attr(s@commands$BuildSNN.RNA.pca, "class"), "package") <- NULL -attr(attr(s@commands$FindClusters, "class"), "package") <- NULL -attr(attr(s@commands$RunTSNE.pca, "class"), "package") <- NULL -attr(attr(s@commands$JackStraw.RNA.pca, "class"), "package") <- NULL -attr(attr(s@commands$ScoreJackStraw.pca, "class"), "package") <- NULL -attr(attr(s@commands$ProjectDim.RNA.pca, "class"), "package") <- NULL -attr(attr(attr(s@reductions$pca, "jackstraw"), "class"), "package") <- NULL -attr(attr(s@reductions$pca, "class"), "package") <- NULL -attr(attr(s@reductions$tsne, "class"), "package") <- NULL -attr(attr(s@graphs$RNA_snn, "class"), "package") <- NULL -attr(attr(s@assays$RNA, "class"), "package") <- NULL -attr(attr(s@reductions$tsne@jackstraw, "class"), "package") <- NULL -s@commands <- list() -s_small3 <- s - -usethis::use_data(s_small3, compress = "xz", overwrite = TRUE) diff --git a/data-raw/sce_small.R b/data-raw/sce_small.R deleted file mode 100644 index 347cc477b..000000000 --- a/data-raw/sce_small.R +++ /dev/null @@ -1,7 +0,0 @@ -library(usethis) -library(SingleCellExperiment) - -s <- readRDS(url("https://scrnaseq-public-datasets.s3.amazonaws.com/scater-objects/segerstolpe.rds")) -sce_small <- s[1:200, 1:200] - -use_data(sce_small, compress = "xz", overwrite = TRUE) diff --git a/data/s_small.rda b/data/s_small.rda deleted file mode 100644 index ad31a0dee..000000000 Binary files a/data/s_small.rda and /dev/null differ diff --git a/data/s_small3.rda b/data/s_small3.rda deleted file mode 100644 index 02e0d4202..000000000 Binary files a/data/s_small3.rda and /dev/null differ diff --git a/data/sce_small.rda b/data/sce_small.rda deleted file mode 100644 index 9a88c1937..000000000 Binary files a/data/sce_small.rda and /dev/null differ diff --git a/man/cbmc_m.Rd b/man/cbmc_m.Rd index 52a1aac6d..22d91557e 100644 --- a/man/cbmc_m.Rd +++ b/man/cbmc_m.Rd @@ -22,15 +22,11 @@ Other data: \code{\link{downrefs}}, \code{\link{human_genes_10x}}, \code{\link{mouse_genes_10x}}, -\code{\link{object_loc_lookup}}, -\code{\link{pbmc_markers_M3Drop}}, \code{\link{pbmc_markers}}, +\code{\link{pbmc_markers_M3Drop}}, \code{\link{pbmc_matrix_small}}, \code{\link{pbmc_meta}}, -\code{\link{pbmc_vargenes}}, -\code{\link{s_small3}}, -\code{\link{s_small}}, -\code{\link{sce_small}} +\code{\link{pbmc_vargenes}} } \concept{data} \keyword{datasets} diff --git a/man/cbmc_ref.Rd b/man/cbmc_ref.Rd index 5fe0f3563..fb52df60d 100644 --- a/man/cbmc_ref.Rd +++ b/man/cbmc_ref.Rd @@ -22,15 +22,11 @@ Other data: \code{\link{downrefs}}, \code{\link{human_genes_10x}}, \code{\link{mouse_genes_10x}}, -\code{\link{object_loc_lookup}}, -\code{\link{pbmc_markers_M3Drop}}, \code{\link{pbmc_markers}}, +\code{\link{pbmc_markers_M3Drop}}, \code{\link{pbmc_matrix_small}}, \code{\link{pbmc_meta}}, -\code{\link{pbmc_vargenes}}, -\code{\link{s_small3}}, -\code{\link{s_small}}, -\code{\link{sce_small}} +\code{\link{pbmc_vargenes}} } \concept{data} \keyword{datasets} diff --git a/man/clustify.Rd b/man/clustify.Rd index 689dd1533..d210de233 100644 --- a/man/clustify.Rd +++ b/man/clustify.Rd @@ -24,7 +24,7 @@ clustify(input, ...) lookuptable = NULL, rm0 = FALSE, obj_out = TRUE, - seurat_out = TRUE, + seurat_out = obj_out, vec_out = FALSE, rename_prefix = NULL, threshold = "auto", @@ -50,8 +50,8 @@ clustify(input, ...) pseudobulk_method = "mean", use_var_genes = TRUE, dr = "umap", - seurat_out = TRUE, obj_out = TRUE, + seurat_out = obj_out, vec_out = FALSE, threshold = "auto", verbose = TRUE, @@ -77,8 +77,8 @@ clustify(input, ...) pseudobulk_method = "mean", use_var_genes = TRUE, dr = "umap", - seurat_out = TRUE, obj_out = TRUE, + seurat_out = obj_out, vec_out = FALSE, threshold = "auto", verbose = TRUE, @@ -178,7 +178,7 @@ clustify( metadata = pbmc_meta, ref_mat = cbmc_ref, query_genes = pbmc_vargenes, - cluster_col = "classified", + cluster_col = "RNA_snn_res.0.5", verbose = TRUE ) @@ -188,27 +188,39 @@ clustify( metadata = pbmc_meta, ref_mat = cbmc_ref, query_genes = pbmc_vargenes, - cluster_col = "classified", + cluster_col = "RNA_snn_res.0.5", compute_method = "cosine" ) +# Annotate a SingleCellExperiment object +sce <- sce_pbmc() +clustify( + sce, + cbmc_ref, + cluster_col = "clusters", + obj_out = TRUE, + per_cell = FALSE, + dr = "umap" +) + # Annotate a Seurat object +so <- so_pbmc() clustify( - s_small3, + so, cbmc_ref, - cluster_col = "RNA_snn_res.1", + cluster_col = "seurat_clusters", obj_out = TRUE, per_cell = FALSE, - dr = "tsne" + dr = "umap" ) # Annotate (and return) a Seurat object per-cell clustify( - input = s_small3, + input = so, ref_mat = cbmc_ref, - cluster_col = "RNA_snn_res.1", + cluster_col = "seurat_clusters", obj_out = TRUE, per_cell = TRUE, - dr = "tsne" + dr = "umap" ) } diff --git a/man/clustify_lists.Rd b/man/clustify_lists.Rd index 260efd645..3a2b86e63 100644 --- a/man/clustify_lists.Rd +++ b/man/clustify_lists.Rd @@ -24,7 +24,7 @@ clustify_lists(input, ...) output_high = TRUE, lookuptable = NULL, obj_out = TRUE, - seurat_out = TRUE, + seurat_out = obj_out, vec_out = FALSE, rename_prefix = NULL, threshold = 0, @@ -49,8 +49,8 @@ clustify_lists(input, ...) metric = "hyper", output_high = TRUE, dr = "umap", - seurat_out = TRUE, obj_out = TRUE, + seurat_out = obj_out, vec_out = FALSE, threshold = 0, rename_prefix = NULL, @@ -73,8 +73,8 @@ clustify_lists(input, ...) metric = "hyper", output_high = TRUE, dr = "umap", - seurat_out = TRUE, obj_out = TRUE, + seurat_out = obj_out, vec_out = FALSE, threshold = 0, rename_prefix = NULL, @@ -84,7 +84,7 @@ clustify_lists(input, ...) ) } \arguments{ -\item{input}{single-cell expression matrix or Seurat object} +\item{input}{single-cell expression matrix, Seurat object, or SingleCellExperiment} \item{...}{passed to matrixize_markers} diff --git a/man/clustify_nudge.Rd b/man/clustify_nudge.Rd index 1a65a461a..a180a5119 100644 --- a/man/clustify_nudge.Rd +++ b/man/clustify_nudge.Rd @@ -18,7 +18,6 @@ clustify_nudge(input, ...) query_genes = NULL, compute_method = "spearman", weight = 1, - seurat_out = FALSE, threshold = -Inf, dr = "umap", norm = "diff", @@ -26,6 +25,7 @@ clustify_nudge(input, ...) marker_inmatrix = TRUE, mode = "rank", obj_out = FALSE, + seurat_out = obj_out, rename_prefix = NULL, lookuptable = NULL, ... @@ -39,8 +39,8 @@ clustify_nudge(input, ...) query_genes = NULL, compute_method = "spearman", weight = 1, - seurat_out = TRUE, - obj_out = FALSE, + obj_out = TRUE, + seurat_out = obj_out, threshold = -Inf, dr = "umap", norm = "diff", @@ -77,8 +77,6 @@ the expr_mat and ref_mat will be used for comparision.} \item{weight}{relative weight for the gene list scores, when added to correlation score} -\item{seurat_out}{output cor matrix or called seurat object} - \item{threshold}{identity calling minimum score threshold, only used when obj_out = T} @@ -95,6 +93,8 @@ in preprocessed matrix form} \item{obj_out}{whether to output object instead of cor matrix} +\item{seurat_out}{output cor matrix or called seurat object (deprecated, use obj_out)} + \item{rename_prefix}{prefix to add to type and r column names} \item{lookuptable}{if not supplied, will look in built-in @@ -110,16 +110,17 @@ bulk RNA-seq data and marker list } \examples{ -# Seurat3 +# Seurat +so <- so_pbmc() clustify_nudge( - input = s_small3, + input = so, ref_mat = cbmc_ref, marker = cbmc_m, - cluster_col = "RNA_snn_res.1", + cluster_col = "seurat_clusters", threshold = 0.8, - seurat_out = FALSE, + obj_out = FALSE, mode = "pct", - dr = "tsne" + dr = "umap" ) # Matrix diff --git a/man/clustifyr-package.Rd b/man/clustifyr-package.Rd index 036b805fc..7aac7efd5 100644 --- a/man/clustifyr-package.Rd +++ b/man/clustifyr-package.Rd @@ -2,8 +2,8 @@ % Please edit documentation in R/clustifyR-package.R \docType{package} \name{clustifyr-package} +\alias{clustifyr} \alias{clustifyr-package} -\alias{_PACKAGE} \title{clustifyr: Classifier for Single-cell RNA-seq Using Cell Clusters} \description{ Package designed to aid in classifying cells from single-cell RNA sequencing data using external reference data (e.g., bulk RNA-seq, scRNA-seq, microarray, gene lists). A variety of correlation based methods and gene list enrichment methods are provided to assist cell type assignment. diff --git a/man/downrefs.Rd b/man/downrefs.Rd index 3805c4052..3bf25a34d 100644 --- a/man/downrefs.Rd +++ b/man/downrefs.Rd @@ -22,15 +22,11 @@ Other data: \code{\link{cbmc_ref}}, \code{\link{human_genes_10x}}, \code{\link{mouse_genes_10x}}, -\code{\link{object_loc_lookup}}, -\code{\link{pbmc_markers_M3Drop}}, \code{\link{pbmc_markers}}, +\code{\link{pbmc_markers_M3Drop}}, \code{\link{pbmc_matrix_small}}, \code{\link{pbmc_meta}}, -\code{\link{pbmc_vargenes}}, -\code{\link{s_small3}}, -\code{\link{s_small}}, -\code{\link{sce_small}} +\code{\link{pbmc_vargenes}} } \concept{data} \keyword{datasets} diff --git a/man/get_common_elements.Rd b/man/get_common_elements.Rd index 03f4142de..122f04582 100644 --- a/man/get_common_elements.Rd +++ b/man/get_common_elements.Rd @@ -15,5 +15,5 @@ vector of shared elements \description{ return entries found in all supplied vectors. If the vector supplied is NULL or NA, then it will be excluded -from the comparision. +from the comparison. } diff --git a/man/human_genes_10x.Rd b/man/human_genes_10x.Rd index c06ceb9e8..074602ff0 100644 --- a/man/human_genes_10x.Rd +++ b/man/human_genes_10x.Rd @@ -22,15 +22,11 @@ Other data: \code{\link{cbmc_ref}}, \code{\link{downrefs}}, \code{\link{mouse_genes_10x}}, -\code{\link{object_loc_lookup}}, -\code{\link{pbmc_markers_M3Drop}}, \code{\link{pbmc_markers}}, +\code{\link{pbmc_markers_M3Drop}}, \code{\link{pbmc_matrix_small}}, \code{\link{pbmc_meta}}, -\code{\link{pbmc_vargenes}}, -\code{\link{s_small3}}, -\code{\link{s_small}}, -\code{\link{sce_small}} +\code{\link{pbmc_vargenes}} } \concept{data} \keyword{datasets} diff --git a/man/insert_meta_object.Rd b/man/insert_meta_object.Rd index 793624862..d73a1fb90 100644 --- a/man/insert_meta_object.Rd +++ b/man/insert_meta_object.Rd @@ -31,7 +31,6 @@ new object with new metadata inserted more flexible metadata update of single cell objects } \examples{ -\dontrun{ -insert_meta_object(s_small3, seurat_meta(s_small3, dr = "tsne")) -} +so <- so_pbmc() +insert_meta_object(so, seurat_meta(so, dr = "umap")) } diff --git a/man/mouse_genes_10x.Rd b/man/mouse_genes_10x.Rd index 1df6a511a..6f2e4d0f8 100644 --- a/man/mouse_genes_10x.Rd +++ b/man/mouse_genes_10x.Rd @@ -22,15 +22,11 @@ Other data: \code{\link{cbmc_ref}}, \code{\link{downrefs}}, \code{\link{human_genes_10x}}, -\code{\link{object_loc_lookup}}, -\code{\link{pbmc_markers_M3Drop}}, \code{\link{pbmc_markers}}, +\code{\link{pbmc_markers_M3Drop}}, \code{\link{pbmc_matrix_small}}, \code{\link{pbmc_meta}}, -\code{\link{pbmc_vargenes}}, -\code{\link{s_small3}}, -\code{\link{s_small}}, -\code{\link{sce_small}} +\code{\link{pbmc_vargenes}} } \concept{data} \keyword{datasets} diff --git a/man/object_data.Rd b/man/object_data.Rd index 03e361d59..6f6716c4a 100644 --- a/man/object_data.Rd +++ b/man/object_data.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/seurat_wrapper.R +% Please edit documentation in R/object_access.R \name{object_data} \alias{object_data} \alias{object_data.Seurat} @@ -31,13 +31,15 @@ and cell types as column names Function to access object data } \examples{ +so <- so_pbmc() mat <- object_data( - object = s_small3, + object = so, slot = "data" ) mat[1:3, 1:3] +sce <- sce_pbmc() mat <- object_data( - object = sce_small, + object = sce, slot = "data" ) mat[1:3, 1:3] diff --git a/man/object_loc_lookup.Rd b/man/object_loc_lookup.Rd index b56353146..bc1b2656a 100644 --- a/man/object_loc_lookup.Rd +++ b/man/object_loc_lookup.Rd @@ -1,36 +1,11 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R -\docType{data} +% Please edit documentation in R/utils.R \name{object_loc_lookup} \alias{object_loc_lookup} \title{lookup table for single cell object structures} -\format{ -An object of class \code{data.frame} with 4 rows and 6 columns. -} -\source{ -various packages -} \usage{ -object_loc_lookup +object_loc_lookup() } \description{ lookup table for single cell object structures } -\seealso{ -Other data: -\code{\link{cbmc_m}}, -\code{\link{cbmc_ref}}, -\code{\link{downrefs}}, -\code{\link{human_genes_10x}}, -\code{\link{mouse_genes_10x}}, -\code{\link{pbmc_markers_M3Drop}}, -\code{\link{pbmc_markers}}, -\code{\link{pbmc_matrix_small}}, -\code{\link{pbmc_meta}}, -\code{\link{pbmc_vargenes}}, -\code{\link{s_small3}}, -\code{\link{s_small}}, -\code{\link{sce_small}} -} -\concept{data} -\keyword{datasets} diff --git a/man/object_ref.Rd b/man/object_ref.Rd index fffbf6eb6..d289bf280 100644 --- a/man/object_ref.Rd +++ b/man/object_ref.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/seurat_wrapper.R +% Please edit documentation in R/object_access.R \name{object_ref} \alias{object_ref} \alias{object_ref.default} @@ -72,8 +72,9 @@ and cell types as column names Function to convert labelled object to avg expression matrix } \examples{ +so <- so_pbmc() object_ref( - s_small3, - cluster_col = "RNA_snn_res.1" + so, + cluster_col = "seurat_clusters" ) } diff --git a/man/parse_loc_object.Rd b/man/parse_loc_object.Rd index 7d1efe580..35c6ba42d 100644 --- a/man/parse_loc_object.Rd +++ b/man/parse_loc_object.Rd @@ -19,15 +19,15 @@ parse_loc_object( \item{type}{look up predefined slots/loc} -\item{expr_loc}{expression matrix location} +\item{expr_loc}{function that extracts expression matrix} -\item{meta_loc}{metadata location} +\item{meta_loc}{function that extracts metadata} -\item{var_loc}{variable genes location} +\item{var_loc}{function that extracts variable genes} \item{cluster_col}{column of clustering from metadata} -\item{lookuptable}{if not supplied, will look in built-in table for object parsing} +\item{lookuptable}{if not supplied, will use object_loc_lookup() for parsing.} } \value{ list of expression, metadata, vargenes, cluster_col info from object @@ -36,6 +36,7 @@ list of expression, metadata, vargenes, cluster_col info from object more flexible parsing of single cell objects } \examples{ -obj <- parse_loc_object(s_small3) +so <- so_pbmc() +obj <- parse_loc_object(so) length(obj) } diff --git a/man/pbmc_markers.Rd b/man/pbmc_markers.Rd index 22d691a8b..a4639a806 100644 --- a/man/pbmc_markers.Rd +++ b/man/pbmc_markers.Rd @@ -23,14 +23,10 @@ Other data: \code{\link{downrefs}}, \code{\link{human_genes_10x}}, \code{\link{mouse_genes_10x}}, -\code{\link{object_loc_lookup}}, \code{\link{pbmc_markers_M3Drop}}, \code{\link{pbmc_matrix_small}}, \code{\link{pbmc_meta}}, -\code{\link{pbmc_vargenes}}, -\code{\link{s_small3}}, -\code{\link{s_small}}, -\code{\link{sce_small}} +\code{\link{pbmc_vargenes}} } \concept{data} \keyword{datasets} diff --git a/man/pbmc_markers_M3Drop.Rd b/man/pbmc_markers_M3Drop.Rd index 6e5d84e4e..9611090ff 100644 --- a/man/pbmc_markers_M3Drop.Rd +++ b/man/pbmc_markers_M3Drop.Rd @@ -23,14 +23,10 @@ Other data: \code{\link{downrefs}}, \code{\link{human_genes_10x}}, \code{\link{mouse_genes_10x}}, -\code{\link{object_loc_lookup}}, \code{\link{pbmc_markers}}, \code{\link{pbmc_matrix_small}}, \code{\link{pbmc_meta}}, -\code{\link{pbmc_vargenes}}, -\code{\link{s_small3}}, -\code{\link{s_small}}, -\code{\link{sce_small}} +\code{\link{pbmc_vargenes}} } \concept{data} \keyword{datasets} diff --git a/man/pbmc_matrix_small.Rd b/man/pbmc_matrix_small.Rd index 0fd6d7db4..d2eed8e4e 100644 --- a/man/pbmc_matrix_small.Rd +++ b/man/pbmc_matrix_small.Rd @@ -23,14 +23,10 @@ Other data: \code{\link{downrefs}}, \code{\link{human_genes_10x}}, \code{\link{mouse_genes_10x}}, -\code{\link{object_loc_lookup}}, -\code{\link{pbmc_markers_M3Drop}}, \code{\link{pbmc_markers}}, +\code{\link{pbmc_markers_M3Drop}}, \code{\link{pbmc_meta}}, -\code{\link{pbmc_vargenes}}, -\code{\link{s_small3}}, -\code{\link{s_small}}, -\code{\link{sce_small}} +\code{\link{pbmc_vargenes}} } \concept{data} \keyword{datasets} diff --git a/man/pbmc_meta.Rd b/man/pbmc_meta.Rd index 726e3a117..a08e6edfe 100644 --- a/man/pbmc_meta.Rd +++ b/man/pbmc_meta.Rd @@ -23,14 +23,10 @@ Other data: \code{\link{downrefs}}, \code{\link{human_genes_10x}}, \code{\link{mouse_genes_10x}}, -\code{\link{object_loc_lookup}}, -\code{\link{pbmc_markers_M3Drop}}, \code{\link{pbmc_markers}}, +\code{\link{pbmc_markers_M3Drop}}, \code{\link{pbmc_matrix_small}}, -\code{\link{pbmc_vargenes}}, -\code{\link{s_small3}}, -\code{\link{s_small}}, -\code{\link{sce_small}} +\code{\link{pbmc_vargenes}} } \concept{data} \keyword{datasets} diff --git a/man/pbmc_vargenes.Rd b/man/pbmc_vargenes.Rd index e6beac4b2..4d79040c2 100644 --- a/man/pbmc_vargenes.Rd +++ b/man/pbmc_vargenes.Rd @@ -23,14 +23,10 @@ Other data: \code{\link{downrefs}}, \code{\link{human_genes_10x}}, \code{\link{mouse_genes_10x}}, -\code{\link{object_loc_lookup}}, -\code{\link{pbmc_markers_M3Drop}}, \code{\link{pbmc_markers}}, +\code{\link{pbmc_markers_M3Drop}}, \code{\link{pbmc_matrix_small}}, -\code{\link{pbmc_meta}}, -\code{\link{s_small3}}, -\code{\link{s_small}}, -\code{\link{sce_small}} +\code{\link{pbmc_meta}} } \concept{data} \keyword{datasets} diff --git a/man/s_small.Rd b/man/s_small.Rd deleted file mode 100644 index 61e2eb624..000000000 --- a/man/s_small.Rd +++ /dev/null @@ -1,36 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R -\docType{data} -\name{s_small} -\alias{s_small} -\title{Small clustered Seurat2 object} -\format{ -An object of class \code{seurat} of length 1. -} -\source{ -\verb{[pbmc_small]} processed by seurat -} -\usage{ -s_small -} -\description{ -Small clustered Seurat2 object -} -\seealso{ -Other data: -\code{\link{cbmc_m}}, -\code{\link{cbmc_ref}}, -\code{\link{downrefs}}, -\code{\link{human_genes_10x}}, -\code{\link{mouse_genes_10x}}, -\code{\link{object_loc_lookup}}, -\code{\link{pbmc_markers_M3Drop}}, -\code{\link{pbmc_markers}}, -\code{\link{pbmc_matrix_small}}, -\code{\link{pbmc_meta}}, -\code{\link{pbmc_vargenes}}, -\code{\link{s_small3}}, -\code{\link{sce_small}} -} -\concept{data} -\keyword{datasets} diff --git a/man/s_small3.Rd b/man/s_small3.Rd deleted file mode 100644 index 3ad0b5702..000000000 --- a/man/s_small3.Rd +++ /dev/null @@ -1,36 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R -\docType{data} -\name{s_small3} -\alias{s_small3} -\title{Small clustered Seurat3 object} -\format{ -An object of class \code{Seurat} of length 1. -} -\source{ -\verb{[pbmc_small]} processed by Seurat -} -\usage{ -s_small3 -} -\description{ -Small clustered Seurat3 object -} -\seealso{ -Other data: -\code{\link{cbmc_m}}, -\code{\link{cbmc_ref}}, -\code{\link{downrefs}}, -\code{\link{human_genes_10x}}, -\code{\link{mouse_genes_10x}}, -\code{\link{object_loc_lookup}}, -\code{\link{pbmc_markers_M3Drop}}, -\code{\link{pbmc_markers}}, -\code{\link{pbmc_matrix_small}}, -\code{\link{pbmc_meta}}, -\code{\link{pbmc_vargenes}}, -\code{\link{s_small}}, -\code{\link{sce_small}} -} -\concept{data} -\keyword{datasets} diff --git a/man/sce_pbmc.Rd b/man/sce_pbmc.Rd new file mode 100644 index 000000000..a2563c2df --- /dev/null +++ b/man/sce_pbmc.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/object_access.R +\name{sce_pbmc} +\alias{sce_pbmc} +\title{An example SingleCellExperiment object} +\usage{ +sce_pbmc() +} +\value{ +a SingleCellExperiment object populated with data +from the \link{pbmc_matrix_small} scRNA-seq dataset, additionally +annotated with cluster assignments. +} +\description{ +An example SingleCellExperiment object +} diff --git a/man/sce_small.Rd b/man/sce_small.Rd deleted file mode 100644 index 5513c3129..000000000 --- a/man/sce_small.Rd +++ /dev/null @@ -1,36 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R -\docType{data} -\name{sce_small} -\alias{sce_small} -\title{Small SingleCellExperiment object} -\format{ -An object of class \code{SingleCellExperiment} with 200 rows and 200 columns. -} -\source{ -\url{https://github.com/hemberg-lab/scRNA.seq.datasets/blob/master/R/segerstolpe.R} -} -\usage{ -sce_small -} -\description{ -Small SingleCellExperiment object -} -\seealso{ -Other data: -\code{\link{cbmc_m}}, -\code{\link{cbmc_ref}}, -\code{\link{downrefs}}, -\code{\link{human_genes_10x}}, -\code{\link{mouse_genes_10x}}, -\code{\link{object_loc_lookup}}, -\code{\link{pbmc_markers_M3Drop}}, -\code{\link{pbmc_markers}}, -\code{\link{pbmc_matrix_small}}, -\code{\link{pbmc_meta}}, -\code{\link{pbmc_vargenes}}, -\code{\link{s_small3}}, -\code{\link{s_small}} -} -\concept{data} -\keyword{datasets} diff --git a/man/seurat_meta.Rd b/man/seurat_meta.Rd index 39fc9137c..dd020be63 100644 --- a/man/seurat_meta.Rd +++ b/man/seurat_meta.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/seurat_wrapper.R +% Please edit documentation in R/object_access.R \name{seurat_meta} \alias{seurat_meta} \alias{seurat_meta.Seurat} @@ -24,5 +24,6 @@ dataframe of metadata, including dimension reduction plotting info Function to convert labelled seurat object to fully prepared metadata } \examples{ -m <- seurat_meta(s_small3) +so <- so_pbmc() +m <- seurat_meta(so) } diff --git a/man/seurat_ref.Rd b/man/seurat_ref.Rd index 0c0c4462d..7e47391f2 100644 --- a/man/seurat_ref.Rd +++ b/man/seurat_ref.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/seurat_wrapper.R +% Please edit documentation in R/object_access.R \name{seurat_ref} \alias{seurat_ref} \alias{seurat_ref.Seurat} @@ -49,5 +49,6 @@ and cell types as column names Function to convert labelled seurat object to avg expression matrix } \examples{ -ref <- seurat_ref(s_small3, cluster_col = "RNA_snn_res.1") +so <- so_pbmc() +ref <- seurat_ref(so, cluster_col = "seurat_clusters") } diff --git a/man/so_pbmc.Rd b/man/so_pbmc.Rd new file mode 100644 index 000000000..6e47c0965 --- /dev/null +++ b/man/so_pbmc.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/object_access.R +\name{so_pbmc} +\alias{so_pbmc} +\title{An example Seurat object} +\usage{ +so_pbmc() +} +\value{ +a Seurat object populated with data +from the \link{pbmc_matrix_small} scRNA-seq dataset, additionally +annotated with cluster assignments. +} +\description{ +An example Seurat object +} diff --git a/man/write_meta.Rd b/man/write_meta.Rd index 8e8aceaf5..06da349dc 100644 --- a/man/write_meta.Rd +++ b/man/write_meta.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/seurat_wrapper.R +% Please edit documentation in R/object_access.R \name{write_meta} \alias{write_meta} \alias{write_meta.Seurat} @@ -27,12 +27,14 @@ object with newly inserted metadata columns Function to write metadata to object } \examples{ +so <- so_pbmc() obj <- write_meta( - object = s_small3, - meta = seurat_meta(s_small3) + object = so, + meta = seurat_meta(so) ) +sce <- sce_pbmc() obj <- write_meta( - object = sce_small, - meta = object_data(sce_small, "meta.data") + object = sce, + meta = object_data(sce, "meta.data") ) } diff --git a/tests/testthat/test_cor.R b/tests/testthat/test_cor.R index 72d38f1df..f8fe0862b 100644 --- a/tests/testthat/test_cor.R +++ b/tests/testthat/test_cor.R @@ -115,140 +115,74 @@ test_that("test permutation", { expect_true(all(res_full$p_val >= 0 | res_full$p_val <= 0)) }) -# test_that("seurat object clustifying", { -# res <- clustify(s_small, -# cbmc_ref, -# cluster_col = "res.1", -# dr = "tsne" -# ) -# -# res <- clustify(s_small, -# cbmc_ref, -# cluster_col = "res.1", -# seurat_out = FALSE, -# per_cell = TRUE, -# dr = "tsne" -# ) -# -# res <- clustify(s_small, -# cbmc_ref, -# cluster_col = "res.1", -# seurat_out = FALSE, -# dr = "tsne" -# ) -# g <- plot_best_call( -# res, -# seurat_meta(s_small, -# dr = "tsne" -# ), -# cluster_col = "res.1", -# plot_r = TRUE, -# x = "tSNE_1", -# y = "tSNE_2" -# ) -# expect_true(ggplot2::is.ggplot(g[[1]])) -# }) - -# test_that("clustify reinserts seurat metadata correctly", { -# res <- clustify(s_small, -# cbmc_ref, -# cluster_col = "res.1", -# seurat_out = TRUE, -# per_cell = TRUE, -# dr = "tsne" -# ) -# res2 <- clustify(s_small, -# cbmc_ref, -# cluster_col = "res.1", -# seurat_out = TRUE, -# dr = "tsne" -# ) -# if ("Seurat" %in% loadedNamespaces()) { -# expect_true(class(res) %in% c("seurat")) -# } else { -# expect_true(is.matrix(res)) -# } -# }) - -test_that("seurat3 object clustifying", { - res <- clustify(s_small3, +so <- so_pbmc() + +test_that("seurat object clustifying", { + + res <- clustify(so, cbmc_ref, - cluster_col = "RNA_snn_res.1", - dr = "tsne" + cluster_col = "seurat_clusters", + dr = "umap" ) - res <- clustify(s_small3, + res <- clustify(so, cbmc_ref, - cluster_col = "RNA_snn_res.1", + cluster_col = "seurat_clusters", seurat_out = FALSE, per_cell = TRUE, - dr = "tsne" + dr = "umap" ) - res <- clustify(s_small3, + res <- clustify(so, cbmc_ref, - cluster_col = "RNA_snn_res.1", + cluster_col = "seurat_clusters", seurat_out = FALSE, - dr = "tsne" + dr = "umap" ) g <- plot_best_call(res, - seurat_meta(s_small3, - dr = "tsne" + seurat_meta(so, + dr = "umap" ), - cluster_col = "RNA_snn_res.1", - plot_r = TRUE + cluster_col = "seurat_clusters", + plot_r = TRUE, + x = "umap_1", + y = "umap_2" ) expect_true(ggplot2::is.ggplot(g[[1]])) }) test_that("object with passing vector as metadata", { - res <- clustify(s_small3, + res <- clustify(so, cbmc_ref, - metadata = s_small3@meta.data$RNA_snn_res.1, - dr = "tsne" + metadata = so$seurat_clusters, + dr = "umap" ) res <- clustify_lists( - s_small3, + so, marker = cbmc_m, - metadata = s_small3@meta.data$RNA_snn_res.1, - dr = "tsne", + metadata = so$seurat_clusters, + dr = "umap", metric = "posneg", seurat_out = FALSE ) - # res <- clustify(s_small, - # cbmc_ref, - # metadata = s_small@meta.data$res.1, - # dr = "tsne" - # ) - # res <- clustify_lists( - # s_small, - # marker = cbmc_m, - # metadata = s_small@meta.data$res.1, - # dr = "tsne", - # metric = "posneg", - # seurat_out = FALSE - # ) + expect_true(is.matrix(res)) }) -test_that("clustify reinserts seurat3 metadata correctly", { - res <- clustify(s_small3, +test_that("clustify reinserts seurat metadata correctly", { + res <- clustify(so, cbmc_ref, - cluster_col = "RNA_snn_res.1", + cluster_col = "seurat_clusters", seurat_out = TRUE, per_cell = TRUE, - dr = "tsne" + dr = "umap" ) - res2 <- clustify(s_small3, - cbmc_ref, - cluster_col = "RNA_snn_res.1", - seurat_out = TRUE, - dr = "tsne" - ) - if ("SeuratObject" %in% loadedNamespaces()) { - expect_true(class(res) %in% c("Seurat")) - } else { - expect_true(is.matrix(res)) - } + expect_true(is(res, "Seurat")) + # all input data identical on return + expect_true(all(so@meta.data == res@meta.data[, colnames(so@meta.data)])) + # clustifyr results present + expect_true(all(c("umap_1", "umap_2", "type", "r") %in% + colnames(res@meta.data))) + }) test_that("get_similarity handles NA entries", { @@ -369,21 +303,22 @@ test_that("cor throws readable error when ref_mat has 0 cols", { test_that("sparse matrix is accepted as input", { res <- clustify( - input = s_small3@assays$RNA@counts, - metadata = s_small3@meta.data, + input = as(pbmc_matrix_small, "sparseMatrix"), + metadata = pbmc_meta, ref_mat = cbmc_ref, query_genes = pbmc_vargenes, - cluster_col = "letter.idents", + cluster_col = "seurat_clusters", verbose = TRUE ) - - expect_equal(2, nrow(res)) + ex <- c(length(unique(pbmc_meta$seurat_clusters)), + ncol(cbmc_ref)) + expect_equal(dim(res), ex) }) test_that("correct error message is displayed for nonexistent cluster_col", { expect_error(res <- clustify( - input = s_small3@assays$RNA@counts, - metadata = s_small3@meta.data, + input = so@assays$RNA@counts, + metadata = so@meta.data, ref_mat = cbmc_ref, query_genes = pbmc_vargenes, cluster_col = "a", @@ -392,8 +327,7 @@ test_that("correct error message is displayed for nonexistent cluster_col", { }) test_that("input Seurat metadata columns are not changed (type, r, rn, etc). #259", { - skip_if_not_installed("SeuratObject") - tmp <- s_small3 + tmp <- so tmp@meta.data$type <- 0L tmp@meta.data$rn <- 0L tmp@meta.data$r <- 0L @@ -401,8 +335,8 @@ test_that("input Seurat metadata columns are not changed (type, r, rn, etc). #25 res <- clustify( input = tmp, ref_mat = cbmc_ref, - cluster_col = "RNA_snn_res.1", - dr = "tsne" + cluster_col = "seurat_clusters", + dr = "umap" ) expect_true(all(c("type", "rn", "r") %in% colnames(res@meta.data))) @@ -411,63 +345,63 @@ test_that("input Seurat metadata columns are not changed (type, r, rn, etc). #25 expect_true(all(res@meta.data$r == 0L)) }) -test_that("clustify_lists works with pos_neg_select and Seurat3 object", { +test_that("clustify_lists works with pos_neg_select and Seurat object", { res <- clustify_lists( - s_small3, + so, marker = cbmc_m, - cluster_col = "RNA_snn_res.1", - dr = "tsne", + cluster_col = "seurat_clusters", + dr = "umap", metric = "posneg", seurat_out = FALSE ) - expect_true(nrow(res) == 3) + expect_true(nrow(res) == length(unique(so$seurat_clusters))) }) -test_that("clustify_lists works with pos_neg_select, Seurat3 object, and lists of genes", { +test_that("clustify_lists works with pos_neg_select, Seurat object, and lists of genes", { res <- clustify_lists( - s_small3, + so, marker = as.list(cbmc_m), marker_inmatrix = FALSE, - cluster_col = "RNA_snn_res.1", - dr = "tsne", + cluster_col = "seurat_clusters", + dr = "umap", metric = "posneg", seurat_out = FALSE ) - expect_true(nrow(res) == 3) + expect_true(nrow(res) == length(unique(so$seurat_clusters))) }) -test_that("clustify_lists works with pos_neg_select, Seurat3 object, and matrix preprocessed by pos_neg_marker", { +test_that("clustify_lists works with pos_neg_select, Seurat object, and matrix preprocessed by pos_neg_marker", { res <- clustify_lists( - s_small3, + so, marker = pos_neg_marker(as.list(cbmc_m)), marker_inmatrix = FALSE, - cluster_col = "RNA_snn_res.1", - dr = "tsne", + cluster_col = "seurat_clusters", + dr = "umap", metric = "posneg", seurat_out = FALSE ) - expect_true(nrow(res) == 3) + expect_true(nrow(res) == length(unique(so$seurat_clusters))) }) -test_that("clustify_lists works with pct and Seurat3 object", { +test_that("clustify_lists works with pct and Seurat object", { res <- clustify_lists( - s_small3, + so, marker = cbmc_m, - cluster_col = "RNA_snn_res.1", - dr = "tsne", + cluster_col = "seurat_clusters", + dr = "umap", metric = "pct", seurat_out = FALSE ) - expect_true(nrow(res) == 3) + expect_true(nrow(res) == length(unique(so$seurat_clusters))) }) test_that("clustify_lists gives correct error message upon unrecognized method", { expect_error( res <- clustify_lists( - s_small3, + so, marker = cbmc_m, - cluster_col = "RNA_snn_res.1", - dr = "tsne", + cluster_col = "seurat_clusters", + dr = "umap", metric = "ptc", seurat_out = FALSE ) @@ -495,13 +429,14 @@ test_that("clustify takes factor for metadata", { expect_true(res[1, 1] != res2[1, 1]) }) +sce <- sce_pbmc() test_that("sce object clustifying", { - res <- clustify(sce_small, + res <- clustify(sce, cbmc_ref, - cluster_col = "cell_type1", + cluster_col = "clusters", obj_out = FALSE ) - expect_true(nrow(res) == 13) + expect_true(nrow(res) == nlevels(sce$clusters)) }) test_that("sce object clustify_lists", { @@ -510,14 +445,14 @@ test_that("sce object clustify_lists", { panm <- data.frame(other, delta) res <- clustify_lists( - sce_small, + sce, marker = panm, - cluster_col = "cell_type1", + cluster_col = "clusters", obj_out = FALSE, n = 100, metric = "pct" ) - expect_true(nrow(res) == 13) + expect_true(nrow(res) == nlevels(sce$clusters)) }) test_that("clustify filters low cell number clusters", { @@ -528,7 +463,7 @@ test_that("clustify filters low cell number clusters", { metadata = pbmc_meta2$classified, ref_mat = cbmc_ref, query_genes = pbmc_vargenes, - dr = "tsne", + dr = "umap", low_threshold_cell = 2, seurat_out = FALSE ) @@ -542,7 +477,7 @@ test_that("clustify_lists filters low cell number clusters", { input = pbmc_matrix_small, metadata = pbmc_meta2$classified, marker = cbmc_m, - dr = "tsne", + dr = "umap", low_threshold_cell = 2, seurat_out = FALSE ) @@ -550,37 +485,38 @@ test_that("clustify_lists filters low cell number clusters", { }) test_that("clustify n_genes options limits number of variable genes", { - res <- clustify(s_small3, + res <- clustify(so, cbmc_ref, - cluster_col = "RNA_snn_res.1", - dr = "tsne", + cluster_col = "seurat_clusters", + dr = "umap", obj_out = FALSE ) - res2 <- clustify(s_small3, + res2 <- clustify(so, cbmc_ref, n_genes = 2, - cluster_col = "RNA_snn_res.1", - dr = "tsne", + cluster_col = "seurat_clusters", + dr = "umap", obj_out = FALSE ) expect_true(res[1,1] != res2[1,1]) }) test_that("clustify n_genes options ignored if too large", { - res <- clustify(s_small3, + res <- clustify(so, cbmc_ref, - cluster_col = "RNA_snn_res.1", - dr = "tsne", + cluster_col = "seurat_clusters", + dr = "umap", + n_genes = 2e3, obj_out = FALSE ) - res2 <- clustify(s_small3, + res2 <- clustify(so, cbmc_ref, - n_genes = 20, - cluster_col = "RNA_snn_res.1", - dr = "tsne", + n_genes = 2e6, + cluster_col = "seurat_clusters", + dr = "umap", obj_out = FALSE ) - expect_true(res[1,1] == res2[1,1]) + expect_true(all.equal(res, res2)) }) test_that("pseudobulk using median", { diff --git a/tests/testthat/test_gsea.R b/tests/testthat/test_gsea.R index f20973df8..5cfe95059 100644 --- a/tests/testthat/test_gsea.R +++ b/tests/testthat/test_gsea.R @@ -4,6 +4,7 @@ context("run_gsea") shush <- function(...) { capture.output(..., file = nullfile()) } + test_that("output is correctly formatted", { data("pbmc_vargenes") diff --git a/tests/testthat/test_list.R b/tests/testthat/test_list.R index 9a2cc64e0..e4ec3d932 100644 --- a/tests/testthat/test_list.R +++ b/tests/testthat/test_list.R @@ -124,95 +124,34 @@ test_that("gsea outputs in cor matrix format", { expect_equal(9, nrow(res2)) }) -# test_that("seurat object clustify_lists-ing", { -# res <- clustify_lists( -# s_small, -# per_cell = FALSE, -# marker = pbmc_markers, -# marker_inmatrix = FALSE, -# metric = "jaccard", -# cluster_col = "res.1", -# seurat_out = FALSE, -# dr = "tsne" -# ) -# res <- clustify_lists( -# s_small, -# per_cell = FALSE, -# marker = pbmc_markers, -# marker_inmatrix = FALSE, -# metric = "jaccard", -# cluster_col = "res.1", -# seurat_out = FALSE, -# dr = "tsne" -# ) -# g <- plot_best_call( -# res, -# seurat_meta(s_small, -# dr = "tsne" -# ), -# cluster_col = "res.1", -# plot_r = TRUE, -# x = "tSNE_1", -# y = "tSNE_2" -# ) -# expect_true(ggplot2::is.ggplot(g[[1]])) -# }) - -# test_that("clustify_lists inserts seurat metadata correctly", { -# res <- clustify_lists( -# s_small, -# per_cell = FALSE, -# marker = pbmc_markers, -# marker_inmatrix = FALSE, -# metric = "jaccard", -# cluster_col = "res.1", -# seurat_out = TRUE, -# dr = "tsne" -# ) -# res2 <- clustify_lists( -# s_small, -# per_cell = TRUE, -# marker = pbmc_markers, -# marker_inmatrix = FALSE, -# metric = "jaccard", -# cluster_col = "res.1", -# seurat_out = TRUE, -# dr = "tsne" -# ) -# if ("Seurat" %in% loadedNamespaces()) { -# expect_true(class(res) %in% c("seurat")) -# } else { -# expect_true(is.matrix(res)) -# } -# }) - +so <- so_pbmc() test_that("seurat3 object clustify_lists-ing", { res <- clustify_lists( - s_small3, + so, per_cell = FALSE, marker = pbmc_markers, marker_inmatrix = FALSE, metric = "jaccard", - cluster_col = "RNA_snn_res.1", + cluster_col = "seurat_clusters", seurat_out = TRUE, dr = "tsne" ) res <- clustify_lists( - s_small3, + so, per_cell = FALSE, marker = pbmc_markers, marker_inmatrix = FALSE, metric = "jaccard", - cluster_col = "RNA_snn_res.1", + cluster_col = "seurat_clusters", seurat_out = FALSE, dr = "tsne" ) g <- plot_best_call( res, - seurat_meta(s_small3, + seurat_meta(so, dr = "tsne" ), - cluster_col = "RNA_snn_res.1", + cluster_col = "seurat_clusters", plot_r = TRUE, x = "tSNE_1", y = "tSNE_2" @@ -222,22 +161,22 @@ test_that("seurat3 object clustify_lists-ing", { test_that("clustify_lists inserts seurat3 metadata correctly", { res <- clustify_lists( - s_small3, + so, per_cell = FALSE, marker = pbmc_markers, marker_inmatrix = FALSE, metric = "jaccard", - cluster_col = "RNA_snn_res.1", + cluster_col = "seurat_clusters", seurat_out = TRUE, dr = "tsne" ) res2 <- clustify_lists( - s_small3, + so, per_cell = TRUE, marker = pbmc_markers, marker_inmatrix = FALSE, metric = "jaccard", - cluster_col = "RNA_snn_res.1", + cluster_col = "seurat_clusters", seurat_out = TRUE, dr = "tsne" ) @@ -286,28 +225,16 @@ test_that("run all gene list functions in clustify_lists", { test_that("run all gene list functions in clustify_lists and seurat object", { res <- clustify_lists( - s_small3, + so, marker = cbmc_m, dr = "tsne", - cluster_col = "RNA_snn_res.1", + cluster_col = "seurat_clusters", metric = "consensus", seurat_out = TRUE ) expect_true(is.data.frame(res) | "Seurat" %in% class(res)) }) -# test_that("run all gene list functions in clustify_lists and seurat object", { -# res <- clustify_lists( -# s_small, -# marker = cbmc_m, -# dr = "tsne", -# cluster_col = "res.1", -# metric = "consensus", -# seurat_out = TRUE -# ) -# expect_true(is.data.frame(res) | "seurat" %in% class(res)) -# }) - test_that("lists of genes will work with posneg", { lst_of_markers <- split(pbmc_markers$gene, pbmc_markers$cluster) res <- clustify_lists( @@ -354,3 +281,4 @@ test_that("clustify_lists input_markers mode with uneven number of marker per cl ) expect_equal(1, length(results)) }) + diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R index f0062de85..8bcbe9b4c 100644 --- a/tests/testthat/test_utils.R +++ b/tests/testthat/test_utils.R @@ -363,158 +363,95 @@ test_that("gene_pct_markerm norm options work", { expect_true(nrow(res2) == 9) }) -# test_that("clustify_nudge works with options and seruat2", { -# res <- clustify_nudge( -# input = s_small, -# ref_mat = cbmc_ref, -# marker = cbmc_m, -# cluster_col = "res.1", -# threshold = 0.8, -# seurat_out = FALSE, -# mode = "pct", -# dr = "tsne" -# ) -# expect_true(nrow(res) == 4) -# }) - +so <- so_pbmc() +sce <- sce_pbmc() test_that("clustify_nudge works with seurat_out", { - # res <- clustify_nudge( - # input = s_small, - # ref_mat = cbmc_ref, - # marker = cbmc_m, - # cluster_col = "res.1", - # threshold = 0.8, - # seurat_out = TRUE, - # mode = "pct", - # dr = "tsne" - # ) res <- clustify_nudge( - input = s_small3, + input = so, ref_mat = cbmc_ref, marker = cbmc_m, threshold = 0.8, seurat_out = TRUE, - cluster_col = "RNA_snn_res.1", + cluster_col = "seurat_clusters", mode = "pct", - dr = "tsne" + dr = "umap" ) - expect_true(3 == 3) + expect_true(is(res, "Seurat")) }) -# test_that("clustify_nudge works with rank/posneg option", { -# res <- clustify_nudge( -# input = s_small, -# ref_mat = cbmc_ref, -# marker = cbmc_m, -# cluster_col = "res.1", -# threshold = 0.8, -# seurat_out = FALSE, -# mode = "rank", -# dr = "tsne" -# ) -# expect_true(nrow(res) == 4) -# }) -test_that("clustify_nudge works with options and seruat3", { +test_that("clustify_nudge works with options and Seurat", { res <- clustify_nudge( - input = s_small3, + input = so, ref_mat = cbmc_ref, marker = cbmc_m, threshold = 0.8, - seurat_out = FALSE, - cluster_col = "RNA_snn_res.1", + obj_out = FALSE, + cluster_col = "seurat_clusters", mode = "pct", - dr = "tsne" + dr = "umap" ) - expect_true(nrow(res) == 3) + expect_true(nrow(res) == length(unique(so$seurat_clusters))) }) -# test_that("clustify_nudge works with seurat_out option", { -# res <- clustify_nudge( -# input = s_small, -# ref_mat = cbmc_ref, -# marker = cbmc_m, -# cluster_col = "res.1", -# threshold = 0.8, -# seurat_out = FALSE, -# marker_inmatrix = FALSE, -# mode = "pct", -# dr = "tsne" -# ) -# expect_true(nrow(res) == 4) -# }) test_that("clustify_nudge.Seurat works with seurat_out option", { res <- clustify_nudge( - input = s_small3, + input = so, ref_mat = cbmc_ref, marker = cbmc_m, - cluster_col = "RNA_snn_res.1", + cluster_col = "seurat_clusters", threshold = 0.8, seurat_out = TRUE, marker_inmatrix = FALSE, mode = "pct", - dr = "tsne" + dr = "umap" ) - + expect_true(is(res, "Seurat")) + res <- clustify_nudge( - input = s_small3, + input = so, ref_mat = cbmc_ref, marker = cbmc_m, - cluster_col = "RNA_snn_res.1", + cluster_col = "seurat_clusters", threshold = 0.8, seurat_out = FALSE, marker_inmatrix = FALSE, mode = "pct", - dr = "tsne" + dr = "umap" ) - expect_true(nrow(res) == 3) + expect_true(nrow(res) == length(unique(so$seurat_clusters))) }) test_that("clustify_nudge works with obj_out option", { - s3 <- s_small3 - setClass( - "ser3", - representation(meta.data = "data.frame") - ) - class(s3) <- "ser3" - object_loc_lookup2 <- data.frame( - ser3 = c( - expr = "input@assays$RNA@data", - meta = "input@meta.data", - var = "input@assays$RNA@var.features", - col = "RNA_snn_res.1" - ), - stringsAsFactors = FALSE - ) - + res <- clustify_nudge( - input = s3, + input = so, ref_mat = cbmc_ref, marker = cbmc_m, - lookuptable = object_loc_lookup2, - cluster_col = "RNA_snn_res.1", + cluster_col = "seurat_clusters", threshold = 0.8, obj_out = TRUE, marker_inmatrix = FALSE, mode = "pct", - dr = "tsne" + dr = "umap" ) + + expect_true(is(res, "Seurat")) res2 <- clustify_nudge( - input = s3, + input = so, ref_mat = cbmc_ref, marker = cbmc_m, - lookuptable = object_loc_lookup2, - cluster_col = "RNA_snn_res.1", + cluster_col = "seurat_clusters", threshold = 0.8, obj_out = FALSE, marker_inmatrix = FALSE, mode = "pct", - dr = "tsne" + dr = "umap" ) - expect_true(nrow(res2) == 3) + expect_true(nrow(res2) == length(unique(so$seurat_clusters))) }) test_that("clustify_nudge works with list of markers", { @@ -655,162 +592,71 @@ test_that("get_best_str finds correct values", { expect_equal(stringr::str_sub(a, 1, 2), stringr::str_sub(a2, 1, 2)) }) -# test_that("seurat_ref gets correct averages", { -# avg <- seurat_ref(s_small, -# cluster_col = "res.1", -# var_genes_only = TRUE -# ) -# avg3 <- seurat_ref(s_small, -# cluster_col = "res.1", -# var_genes_only = TRUE, -# if_log = FALSE -# ) -# avg2 <- seurat_ref(s_small, -# cluster_col = "res.1", -# var_genes_only = "PCA" -# ) -# expect_true(ncol(avg) == 4) -# }) - -test_that("object_ref with seurat3", { - s3 <- s_small3 - avg <- object_ref(s3, +test_that("object_ref with Seurat", { + avg <- object_ref(so, var_genes_only = TRUE, - cluster_col = "RNA_snn_res.1" + cluster_col = "seurat_clusters" ) - expect_true(ncol(avg) == 3) + expect_true(ncol(avg) == length(unique(so$seurat_clusters))) }) test_that("object_ref with SingleCellExperiment", { - sce <- sce_small avg <- object_ref(sce, - cluster_col = "cell_type1" + cluster_col = "clusters" ) - expect_equal(dim(avg), c(200, 13)) + expect_equal(dim(avg), + c(nrow(sce), length(unique(sce$clusters)))) }) test_that("object_ref gets correct averages", { - s3 <- s_small3 - class(s3) <- "ser3" - object_loc_lookup2 <- data.frame( - ser3 = c( - expr = "input@assays$RNA@data", - meta = "input@meta.data", - var = "input@assays$RNA@var.features", - col = "RNA_snn_res.1" - ), - stringsAsFactors = FALSE - ) - avg <- object_ref(s3, - lookuptable = object_loc_lookup2, + avg <- object_ref(so, + cluster_col = "seurat_clusters", var_genes_only = TRUE ) - expect_true(ncol(avg) == 3) + expect_true(ncol(avg) == length(unique(so$seurat_clusters))) }) -# test_that("seurat_ref gets other assay slots", { -# avg <- seurat_ref( -# s_small, -# cluster_col = "res.1", -# assay_name = "ADT", -# var_genes_only = TRUE -# ) -# avg2 <- seurat_ref( -# s_small, -# cluster_col = "res.1", -# assay_name = c("ADT", "ADT2"), -# var_genes_only = TRUE -# ) -# expect_true(nrow(avg2) - nrow(avg) == 2) -# }) -test_that("seurat_ref gets correct averages with seurat3 object", { +test_that("seurat_ref gets correct averages with Seurat object", { avg <- seurat_ref( - s_small3, - cluster_col = "RNA_snn_res.1", - assay_name = c("ADT", "ADT2"), - var_genes_only = TRUE - ) - avg <- seurat_ref( - s_small3, - cluster_col = "RNA_snn_res.1", - assay_name = c("ADT"), + so, + cluster_col = "seurat_clusters", var_genes_only = TRUE ) + tmp <- so + rna_assay <- tmp[["RNA"]] + Key(rna_assay) <- "rna2_" + tmp[["RNA2"]] <- rna_assay + avg2 <- seurat_ref( - s_small3, - cluster_col = "RNA_snn_res.1", - assay_name = c("ADT", "ADT2"), - var_genes_only = "PCA" + tmp, + cluster_col = "seurat_clusters", + assay_name = "RNA2", + var_genes_only = TRUE ) - expect_true(nrow(avg2) - nrow(avg) == 2) + expect_true(nrow(avg2) == nrow(avg) * 2) }) test_that("object parsing works for custom object", { - s3 <- s_small3 - class(s3) <- "ser3" - object_loc_lookup2 <- data.frame( - ser3 = c( - expr = "input@assays$RNA@data", - meta = "input@meta.data", - var = "input@assays$RNA@var.features", - col = "RNA_snn_res.1" - ), - stringsAsFactors = FALSE - ) - res2 <- clustify(s3, - cbmc_ref, - lookuptable = object_loc_lookup2, + res2 <- clustify(so, + cbmc_ref, + cluster_col = "seurat_clusters", obj_out = FALSE ) res <- clustify_lists( - s3, + so, + cluster_col = "seurat_clusters", marker = pbmc_markers, marker_inmatrix = FALSE, - lookuptable = object_loc_lookup2, obj_out = FALSE ) expect_true(nrow(res) == nrow(res2)) }) -test_that("object metadata assignment works for custom object", { - s3 <- s_small3 - setClass( - "ser3", - representation(meta.data = "data.frame") - ) - class(s3) <- "ser3" - object_loc_lookup2 <- data.frame( - ser3 = c( - expr = "input@assays$RNA@data", - meta = "input@meta.data", - var = "input@assays$RNA@var.features", - col = "RNA_snn_res.1" - ), - stringsAsFactors = FALSE - ) - - res2 <- clustify(s3, - cbmc_ref, - lookuptable = object_loc_lookup2, - obj_out = TRUE - ) - - res3 <- clustify_lists( - s3, - marker = pbmc_markers, - marker_inmatrix = FALSE, - lookuptable = object_loc_lookup2, - obj_out = TRUE, - rename_prefix = "A" - ) - - expect_true(is(res2, "ser3")) -}) test_that("cor_to_call renaming with suffix input works as intended, per_cell or otherwise", { res <- clustify( @@ -857,21 +703,15 @@ test_that("cor_to_call renaming with suffix input works as intended, per_cell or }) test_that("renaming with suffix input works as intended with clusify wrapper", { - # res <- clustify( - # input = s_small, - # ref_mat = cbmc_ref, - # cluster_col = "res.1", - # rename_suff = "a", - # dr = "tsne" - # ) - res2 <- clustify( - input = s_small3, + + res <- clustify( + input = so, ref_mat = cbmc_ref, - cluster_col = "RNA_snn_res.1", + cluster_col = "seurat_clusters", rename_suff = "a", - dr = "tsne" + dr = "umap" ) - expect_true(!is.null(res2)) + expect_true(!is.null(res)) }) test_that("ref_marker_select works with cutoffs", { @@ -945,7 +785,7 @@ test_that("clustify_nudge works with pos_neg_select", { cluster_col = "classified", norm = 0.5 ) - expect_true(all(dim(res) == c(9, 3))) + expect_true(all(dim(res) == c(length(unique(so$classified)), 3))) }) test_that("reverse_marker_matrix takes matrix of markers input", { @@ -1030,40 +870,22 @@ test_that("paring gmt files works on included example", { expect_true(is.list(gmt_list)) }) -# test_that("clustify_nudge works with pos_neg_select and seurat2 object", { -# pn_ref2 <- data.frame( -# "CD8 T" = c(0, 0, 1), -# row.names = c("CD4", "clustifyr0", "CD8B"), -# check.names = FALSE -# ) -# res <- clustify_nudge( -# s_small, -# cbmc_ref, -# pn_ref2, -# cluster_col = "res.1", -# norm = 0.5, -# dr = "tsne", -# seurat_out = FALSE -# ) -# expect_true(nrow(res) == 4) -# }) - -test_that("clustify_nudge works with pos_neg_select and Seurat3 object", { +test_that("clustify_nudge works with pos_neg_select and Seurat object", { pn_ref2 <- data.frame( "CD8 T" = c(0, 0, 1), row.names = c("CD4", "clustifyr0", "CD8B"), check.names = FALSE ) res <- clustify_nudge( - s_small3, + so, cbmc_ref, pn_ref2, - cluster_col = "RNA_snn_res.1", + cluster_col = "seurat_clusters", norm = 0.5, - dr = "tsne", + dr = "umap", seurat_out = FALSE ) - expect_true(nrow(res) == 3) + expect_true(nrow(res) == length(unique(so$seurat_clusters))) }) test_that("pos_neg_marker takes list, matrix, and dataframe", { @@ -1212,7 +1034,7 @@ test_that("cor_to_call can collapse_to_cluster", { # ref_mat = cbmc_ref, # query_genes = pbmc_vargenes, # cluster_col = "res.1", -# dr = "tsne", +# dr = "umap", # per_cell = TRUE, # collapse_to_cluster = TRUE # ) @@ -1221,12 +1043,12 @@ test_that("cor_to_call can collapse_to_cluster", { # test_that("seurat_meta warns about not finding dr", { # m <- seurat_meta(s_small, -# dr = "tsne" +# dr = "umap" # ) # m2 <- seurat_meta(s_small, # dr = "s" # ) -# m3 <- seurat_meta(s_small3, +# m3 <- seurat_meta(so, # dr = "s" # ) # expect_true(all(rownames(m) == rownames(m2))) @@ -1252,32 +1074,6 @@ test_that("find_rank_bias and query_rank_bias run correctly", { expect_true(all(dim(qres) == c(599,2))) }) -# test_that("assess_rank_bias goes through all pairs except unassigned", { -# avg2 <- average_clusters( -# pbmc_matrix_small, -# pbmc_meta$seurat_clusters -# ) -# res <- clustify( -# input = pbmc_matrix_small, -# metadata = pbmc_meta, -# ref_mat = cbmc_ref, -# query_genes = pbmc_vargenes, -# cluster_col = "seurat_clusters" -# ) -# call1 <- cor_to_call( -# res, -# metadata = pbmc_meta, -# cluster_col = "seurat_clusters", -# collapse_to_cluster = FALSE, -# threshold = 0.8 -# ) -# res_rank <- assess_rank_bias( -# avg2, -# cbmc_ref, -# res = call1 -# ) -# expect_true(length(res_rank) == 8) -# }) test_that("repeated insertionn of types into metdadata renames correctly", { res <- clustify( @@ -1316,93 +1112,83 @@ test_that("repeated insertionn of types into metdadata renames correctly", { }) test_that("object_ref with sce", { - avg <- object_ref(sce_small, - cluster_col = "cell_type1" + avg <- object_ref(sce, + cluster_col = "clusters" ) - expect_true(ncol(avg) == 13) + expect_true(ncol(avg) == length(unique(sce$clusters))) }) test_that("object_data works with sce", { mat <- object_data( - object = sce_small, + object = sce, slot = "data" ) - expect_true(ncol(mat) == 200) + expect_true(ncol(mat) == ncol(sce)) }) -# test_that("object_data works with seuratv2", { -# mat <- object_data( -# object = s_small, -# slot = "data" -# ) -# expect_true(ncol(mat) == 80) -# }) - -test_that("object_data works with Seuratv3", { +test_that("object_data works with Seurat", { mat <- object_data( - object = s_small3, + object = so, slot = "data" ) - expect_true(ncol(mat) == 80) + expect_true(ncol(mat) == ncol(so)) }) -test_that("object_data works with Seuratv4 spatial", { - s_small4 <- s_small3 - s_small4@assays$Spatial <- s_small4@assays$RNA - s_small4@assays$RNA@var.features <- NA - mat <- object_data( - object = s_small4, +test_that("object_data respects DefaultAssay in seurat object", { + spat <- so + mock_spat_assay <- spat[["RNA"]] + Key(mock_spat_assay) <- "spatial_" + spat[["Spatial"]] <- mock_spat_assay + DefaultAssay(spat) <- "Spatial" + var_genes <- object_data( + object = spat, slot = "var.genes" ) - expect_true(length(mat) > 1) + expect_true(length(var_genes) > 1) }) -test_that("append_genes creates a union reference matrix", { +test_that("append_genes pads matrix with supplied genes", { mat <- append_genes( - gene_vector = human_genes_10x, - ref_matrix = cbmc_ref + gene_vector = human_genes_10x, + ref_matrix = cbmc_ref ) - expect_true(nrow(mat) == 33514) -}) - -test_that("append_genes creates a union reference matrix", { + + expect_equal(nrow(mat), length(human_genes_10x)) + mat <- append_genes( - gene_vector = human_genes_10x, - ref_matrix = pbmc_matrix_small + gene_vector = human_genes_10x, + ref_matrix = pbmc_matrix_small ) - expect_true(nrow(mat) == 33514) -}) - -test_that("append_genes creates a union reference matrix", { + expect_equal(nrow(mat), length(human_genes_10x)) + + og_mat <- get_seurat_matrix(so) mat <- append_genes( gene_vector = human_genes_10x, - ref_matrix = s_small3@assays$RNA@counts - ) - expect_true(nrow(mat) == 33514) -}) - -test_that("check raw counts of matrices", { - mat <- check_raw_counts( - counts_matrix = pbmc_matrix_small, - max_log_value = 50 - ) - expect_true(mat == "log-normalized") -}) - -test_that("check raw counts of matrices", { - mat <- check_raw_counts( - counts_matrix = s_small3@assays$RNA@counts, - max_log_value = 50 - ) - expect_true(mat == "raw counts") -}) - -test_that("check raw counts of matrices", { - mat <- check_raw_counts( - counts_matrix = s_small3@assays$RNA@data, - max_log_value = 50 - ) - expect_true(mat == "log-normalized") + ref_matrix = og_mat + ) + expect_equal(nrow(mat), length(human_genes_10x)) +}) + +test_that("check data type of matrices", { + mat_type <- check_raw_counts( + counts_matrix = pbmc_matrix_small, + max_log_value = 50 + ) + expect_equal(mat_type, "log-normalized") + + m <- matrix(sample(10:200, 100, replace = TRUE)) + mat_type <- check_raw_counts( + counts_matrix = m, + max_log_value = 50 + ) + expect_equal(mat_type, "raw counts") + + og_mat <- get_seurat_matrix(so) + mat_type <- check_raw_counts( + counts_matrix = og_mat, + max_log_value = 50 + ) + expect_true(mat_type == "log-normalized") }) test_that("check atlas successfully built", { @@ -1428,58 +1214,75 @@ test_that("make_comb_ref works as intended", { test_that("calc_distance works as intended", { res <- calc_distance( - s_small3@reductions$tsne@cell.embeddings, - s_small3@meta.data$letter.idents, + SeuratObject::Embeddings(so, "umap"), + so$seurat_clusters, collapse_to_cluster = T ) - - expect_true(nrow(res) == 2 && ncol(res) == 2) + n_grps <- length(unique(so$seurat_clusters)) + ex_dim <- c(n_grps, n_grps) + expect_equal(dim(res), ex_dim) }) test_that("vec_out option works for clustify", { - res <- clustify(s_small3@assays$RNA@counts, - metadata = s_small3@meta.data, + if(is_seurat_v5()) { + mat <- SeuratObject::LayerData(so, "data") + } else{ + mat <- SeuratObject::GetAssayData(so, "data") + } + + res <- clustify(mat, + metadata = so@meta.data, ref_mat = cbmc_ref, - cluster_col = "RNA_snn_res.1", + cluster_col = "seurat_clusters", vec_out = TRUE ) - res2 <- clustify(s_small3, + res2 <- clustify(so, ref_mat = cbmc_ref, - cluster_col = "RNA_snn_res.1", + cluster_col = "seurat_clusters", rename_prefix = "abc", vec_out = TRUE ) - res3 <- clustify(sce_small, + res3 <- clustify(sce, cbmc_ref, - cluster_col = "cell_type1", + cluster_col = "clusters", vec_out = TRUE ) - expect_true(length(res) == 80 && length(res2) == 80 && length(res3) == 200) + expect_equal(length(res), ncol(mat)) + expect_equal(length(res2), ncol(so)) + expect_equal(length(res3), ncol(sce)) }) test_that("vec_out option works for clustify_lists", { - res <- clustify_lists(s_small3@assays$RNA@counts, - metadata = s_small3@meta.data, + if(is_seurat_v5()) { + mat <- SeuratObject::LayerData(so, "data") + } else{ + mat <- SeuratObject::GetAssayData(so, "data") + } + res <- clustify_lists(mat, + metadata = so@meta.data, marker = cbmc_m, - cluster_col = "RNA_snn_res.1", + cluster_col = "seurat_clusters", vec_out = TRUE ) - res2 <- clustify_lists(s_small3, + res2 <- clustify_lists(so, marker = cbmc_m, - cluster_col = "RNA_snn_res.1", + cluster_col = "seurat_clusters", rename_prefix = "abc", vec_out = TRUE ) - res3 <- clustify_lists(sce_small, + res3 <- clustify_lists(sce, marker = cbmc_m, - cluster_col = "cell_type1", + cluster_col = "clusters", vec_out = TRUE ) - expect_true(length(res) == 80 && length(res2) == 80 && length(res3) == 200) -}) \ No newline at end of file + expect_equal(length(res), ncol(mat)) + expect_equal(length(res2), ncol(so)) + expect_equal(length(res3), ncol(sce)) + +}) diff --git a/vignettes/clustifyr.Rmd b/vignettes/clustifyr.Rmd index c6d643136..559b84380 100644 --- a/vignettes/clustifyr.Rmd +++ b/vignettes/clustifyr.Rmd @@ -151,6 +151,7 @@ clustifyr_types <- plot_best_call( metadata = pbmc_meta, # meta.data table containing UMAP or tSNE data do_label = TRUE, # should the feature label be shown on each cluster? do_legend = FALSE, # should the legend be shown? + do_repel = FALSE, # use ggrepel to avoid overlapping labels cluster_col = "seurat_clusters" ) + ggtitle("clustifyr cell types") @@ -160,7 +161,8 @@ known_types <- plot_dims( data = pbmc_meta, # meta.data table containing UMAP or tSNE data feature = "classified", # name of column in meta.data to color clusters by do_label = TRUE, # should the feature label be shown on each cluster? - do_legend = FALSE # should the legend be shown? + do_legend = FALSE, # should the legend be shown? + do_repel = FALSE ) + ggtitle("Known cell types") @@ -211,54 +213,33 @@ pbmc_meta3 <- call_to_metadata( # Direct handling of `SingleCellExperiment` objects `clustifyr` can also use a `SingleCellExperiment` object as input and return a new `SingleCellExperiment` object with the cell types added as a column in the colData. -``` r +```{r} +library(SingleCellExperiment) +sce <- sce_pbmc() res <- clustify( - input = sce_small, # an SCE object + input = sce, # an SCE object ref_mat = cbmc_ref, # matrix of RNA-seq expression data for each cell type - cluster_col = "cell_type1", # name of column in meta.data containing cell clusters + cluster_col = "clusters", # name of column in meta.data containing cell clusters obj_out = TRUE # output SCE object with cell type inserted as "type" column ) -SingleCellExperiment::colData(res)[1:10,c("type", "r")] -#> DataFrame with 10 rows and 2 columns -#> type r -#> -#> AZ_A1 CD34+ 0.557678024919381 -#> AZ_A10 CD34+ 0.624777701661225 -#> AZ_A11 CD4 T 0.695067885340303 -#> AZ_A12 CD34+ 0.624777701661225 -#> AZ_A2 CD4 T 0.602804908958642 -#> AZ_A3 CD34+ 0.557678024919381 -#> AZ_A4 CD34+ 0.557678024919381 -#> AZ_A5 CD34+ 0.645378073051508 -#> AZ_A6 CD4 T 0.695067885340303 -#> AZ_A7 CD34+ 0.671644883893203 +colData(res)[1:10, c("type", "r")] ``` -# Direct handling of `seurat` v2 and v3 objects +# Direct handling of `Seurat` objects `clustifyr` can also use a `Seurat` object as input and return a new `Seurat` object with the cell types added as a column in the meta.data. -``` r +```{r} +so <- so_pbmc() res <- clustify( - input = s_small3, # a Seurat object - ref_mat = cbmc_ref, # matrix of RNA-seq expression data for each cell type - cluster_col = "RNA_snn_res.1", # name of column in meta.data containing cell clusters - obj_out = TRUE # output Seurat object with cell type inserted as "type" column + input = so, # a Seurat object + ref_mat = cbmc_ref, # matrix of RNA-seq expression data for each cell type + cluster_col = "seurat_clusters", # name of column in meta.data containing cell clusters + obj_out = TRUE # output Seurat object with cell type inserted as "type" column ) -res@meta.data[1:10, ] -#> nGene nUMI orig.ident res.0.8 res.1 type r -#> ATGCCAGAACGACT 47 70 SeuratProject 0 0 Memory CD4 T 0.7047302 -#> CATGGCCTGTGCAT 52 85 SeuratProject 0 0 Memory CD4 T 0.7047302 -#> GAACCTGATGAACC 50 87 SeuratProject 0 0 Memory CD4 T 0.7047302 -#> TGACTGGATTCTCA 56 127 SeuratProject 0 0 Memory CD4 T 0.7047302 -#> AGTCAGACTGCACA 53 173 SeuratProject 0 0 Memory CD4 T 0.7047302 -#> TCTGATACACGTGT 48 70 SeuratProject 0 0 Memory CD4 T 0.7047302 -#> TGGTATCTAAACAG 36 64 SeuratProject 0 0 Memory CD4 T 0.7047302 -#> GCAGCTCTGTTTCT 45 72 SeuratProject 0 0 Memory CD4 T 0.7047302 -#> GATATAACACGCAT 36 52 SeuratProject 0 0 Memory CD4 T 0.7047302 -#> AATGTTGACAGTCA 41 100 SeuratProject 0 0 Memory CD4 T 0.7047302 +res@meta.data[1:10, c("type", "r")] ``` # Building reference matrix from single cell expression matrix @@ -276,22 +257,16 @@ head(new_ref_matrix) # For further convenience, a shortcut function for generating reference matrix from `SingleCellExperiment` or `seurat` object is used. new_ref_matrix_sce <- object_ref( - input = sce_small, # SCE object - cluster_col = "cell_type1" # name of column in colData containing cell identities + input = sce, # SCE object + cluster_col = "clusters" # name of column in colData containing cell identities ) -new_ref_matrix_v3 <- seurat_ref( - seurat_object = s_small3, # SeuratV3 object - cluster_col = "RNA_snn_res.1" # name of column in meta.data containing cell identities +new_ref_matrix_so <- seurat_ref( + seurat_object = so, # Seurat object + cluster_col = "seurat_clusters" # name of column in meta.data containing cell identities ) -tail(new_ref_matrix_v3) -``` - -```{r "ucsc", eval = FALSE} -# There's also the option to pull UCSC cell browser data. -get_ext_reference(cb_url = "http://cells.ucsc.edu/?ds=kidney-atlas%2FFetal_Immune", - cluster_col = "celltype") +tail(new_ref_matrix_so) ```