diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml index 6564ea80..7da85478 100644 --- a/.github/workflows/check-bioc.yml +++ b/.github/workflows/check-bioc.yml @@ -53,8 +53,8 @@ jobs: fail-fast: false matrix: config: - - { os: ubuntu-latest, r: '4.3', bioc: '3.18', cont: "bioconductor/bioconductor_docker:RELEASE_3_18", rspm: "https://packagemanager.posit.co/cran/__linux__/jammy/latest" } - - { os: windows-latest, r: '4.3', bioc: '3.18'} + - { os: ubuntu-latest, r: '4.4', bioc: '3.20', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.posit.co/cran/__linux__/jammy/latest" } + - { os: windows-latest, r: '4.4', bioc: '3.20'} ## Check https://github.com/r-lib/actions/tree/master/examples ## for examples using the http-user-agent env: @@ -106,16 +106,16 @@ jobs: uses: actions/cache@v3 with: path: ${{ env.R_LIBS_USER }} - key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_18-r-4.3-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_18-r-4.3- + key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.4-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.4- - name: Cache R packages on Linux if: "!contains(github.event.head_commit.message, '/nocache') && runner.os == 'Linux' " uses: actions/cache@v3 with: path: /home/runner/work/_temp/Library - key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_18-r-4.3-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_18-r-4.3- + key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.4-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.4- - name: Install Linux system dependencies if: runner.os == 'Linux' @@ -289,7 +289,7 @@ jobs: if: failure() uses: actions/upload-artifact@master with: - name: ${{ runner.os }}-biocversion-RELEASE_3_18-r-4.3-results + name: ${{ runner.os }}-biocversion-devel-r-4.4-results path: check ## Note that DOCKER_PASSWORD is really a token for your dockerhub diff --git a/DESCRIPTION b/DESCRIPTION index a1dbaef0..34ba393f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -13,7 +13,7 @@ Authors@R: person(given = "Kent", family = "Riemondy", role = c("cre", "aut"), - email = "kent.riemondy@cuanschutz.edu"), + email = "kent.riemondy@gmail.com"), person(given = "Austin", family = "Gillen", role = "ctb", @@ -88,7 +88,7 @@ VignetteBuilder: ByteCompile: true Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 LazyData: true Config/Needs/website: pkgdown, diff --git a/NAMESPACE b/NAMESPACE index ecc46619..e3cd8c76 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -49,7 +49,6 @@ export(make_comb_ref) export(marker_select) export(matrixize_markers) export(object_data) -export(object_loc_lookup) export(object_ref) export(overcluster) export(overcluster_test) diff --git a/R/cellbrowsers.R b/R/cellbrowsers.R index 4b5c8d69..44398ef2 100644 --- a/R/cellbrowsers.R +++ b/R/cellbrowsers.R @@ -1,4 +1,3 @@ - #' Build reference atlases from external UCSC cellbrowsers #' #' @param cb_url URL of cellbrowser dataset (e.g. http://cells.ucsc.edu/?ds=cortex-dev). @@ -11,72 +10,83 @@ #' @return reference matrix #' @examples #' \dontrun{ -#' +#' #' # many datasets hosted by UCSC have UMI counts in the expression matrix #' # set if_log = FALSE if the expression matrix has not been natural log transformed -#' -#' get_ucsc_reference(cb_url = "https://cells.ucsc.edu/?ds=evocell+mus-musculus+marrow", -#' cluster_col = "Clusters", if_log = FALSE) -#' -#' get_ucsc_reference(cb_url = "http://cells.ucsc.edu/?ds=muscle-cell-atlas", -#' cluster_col = "cell_annotation", -#' if_log = FALSE) +#' +#' get_ucsc_reference( +#' cb_url = "https://cells.ucsc.edu/?ds=evocell+mus-musculus+marrow", +#' cluster_col = "Clusters", if_log = FALSE +#' ) +#' +#' get_ucsc_reference( +#' cb_url = "http://cells.ucsc.edu/?ds=muscle-cell-atlas", +#' cluster_col = "cell_annotation", +#' if_log = FALSE +#' ) #' } #' @export get_ucsc_reference <- function(cb_url, cluster_col, - ...){ - if(!requireNamespace("R.utils", quietly = TRUE)) { - stop("This function requires the R.utils package, please install\n", - "install.packages('R.utils')") - } - - if(!requireNamespace("data.table", quietly = TRUE)) { - stop("This function requires the data.table package, please install\n", - "install.packages('data.table')") - } - - url <- httr::parse_url(cb_url) - base_url <- url - ds <- url$query$ds - - # ds can include sub-datasets with syntax, "dataset+subdataset+and-so-on" - # files are hosted at urls: dataset/subdataset/andsoon/..." - ds_split <- strsplit(ds, "+", fixed = TRUE)[[1]] - ds <- paste0(ds_split, collapse = "/") - base_url$query <- "" + ...) { + if (!requireNamespace("R.utils", quietly = TRUE)) { + stop( + "This function requires the R.utils package, please install\n", + "install.packages('R.utils')" + ) + } + + if (!requireNamespace("data.table", quietly = TRUE)) { + stop( + "This function requires the data.table package, please install\n", + "install.packages('data.table')" + ) + } + + url <- httr::parse_url(cb_url) + base_url <- url + ds <- url$query$ds - mdata_url <- httr::modify_url(base_url, - path = file.path(ds, "meta.tsv")) - if(!httr::http_error(mdata_url)){ - mdata <- data.table::fread(mdata_url, data.table = FALSE, sep = "\t") - } else { - stop("unable to find metadata at url: ", mdata_url) - } - - mat_url <- httr::modify_url(base_url, - path = file.path(ds, "exprMatrix.tsv.gz")) - if(!httr::http_error(mat_url)){ - mat <- data.table::fread(mat_url, data.table = FALSE, sep = "\t") - } else { - stop("unable to find matrix at url: ", mat_url) - } + # ds can include sub-datasets with syntax, "dataset+subdataset+and-so-on" + # files are hosted at urls: dataset/subdataset/andsoon/..." + ds_split <- strsplit(ds, "+", fixed = TRUE)[[1]] + ds <- paste0(ds_split, collapse = "/") + base_url$query <- "" - rownames(mat) <- mat[, 1] - mat[, 1] <- NULL - mat <- as.matrix(mat) - - mm <- max(mat) - - if(mm > 50) { - dots <- list(...) - if(!"if_log" %in% names(dots) || dots$if_log) { - warning("the data matrix has a maximum value of ", mm, "\n", - "the data are likely not log transformed,\n", - "please set the if_log argument for average clusters accordingly") + mdata_url <- httr::modify_url(base_url, + path = file.path(ds, "meta.tsv") + ) + if (!httr::http_error(mdata_url)) { + mdata <- data.table::fread(mdata_url, data.table = FALSE, sep = "\t") + } else { + stop("unable to find metadata at url: ", mdata_url) } - } - - average_clusters(mat, mdata, cluster_col = cluster_col, ...) -} + mat_url <- httr::modify_url(base_url, + path = file.path(ds, "exprMatrix.tsv.gz") + ) + if (!httr::http_error(mat_url)) { + mat <- data.table::fread(mat_url, data.table = FALSE, sep = "\t") + } else { + stop("unable to find matrix at url: ", mat_url) + } + + rownames(mat) <- mat[, 1] + mat[, 1] <- NULL + mat <- as.matrix(mat) + + mm <- max(mat) + + if (mm > 50) { + dots <- list(...) + if (!"if_log" %in% names(dots) || dots$if_log) { + warning( + "the data matrix has a maximum value of ", mm, "\n", + "the data are likely not log transformed,\n", + "please set the if_log argument for average clusters accordingly" + ) + } + } + + average_clusters(mat, mdata, cluster_col = cluster_col, ...) +} diff --git a/R/common_dplyr.R b/R/common_dplyr.R index 32411bb6..ecc8492c 100644 --- a/R/common_dplyr.R +++ b/R/common_dplyr.R @@ -19,7 +19,8 @@ #' #' cor_to_call(res) #' @export -cor_to_call <- function(cor_mat, +cor_to_call <- function( + cor_mat, metadata = NULL, cluster_col = "cluster", collapse_to_cluster = FALSE, @@ -131,9 +132,9 @@ cor_to_call <- function(cor_mat, #' cluster_col = "classified", #' ref_mat = cbmc_ref #' ) -#' +#' #' res2 <- cor_to_call(res, cluster_col = "classified") -#' +#' #' call_to_metadata( #' res = res2, #' metadata = pbmc_meta, @@ -141,7 +142,8 @@ cor_to_call <- function(cor_mat, #' rename_prefix = "assigned" #' ) #' @export -call_to_metadata <- function(res, +call_to_metadata <- function( + res, metadata, cluster_col, per_cell = FALSE, @@ -255,7 +257,8 @@ call_to_metadata <- function(res, #' threshold = 0 #' ) #' @export -collapse_to_cluster <- function(res, +collapse_to_cluster <- function( + res, metadata, cluster_col, threshold = 0) { @@ -331,7 +334,8 @@ collapse_to_cluster <- function(res, #' #' cor_to_call_rank(res, threshold = "auto") #' @export -cor_to_call_rank <- function(cor_mat, +cor_to_call_rank <- function( + cor_mat, metadata = NULL, cluster_col = "cluster", collapse_to_cluster = FALSE, diff --git a/R/compare_genelist.R b/R/compare_genelist.R index 01a3c946..350f7472 100644 --- a/R/compare_genelist.R +++ b/R/compare_genelist.R @@ -14,7 +14,8 @@ #' mat <- binarize_expr(pbmc_avg) #' mat[1:3, 1:3] #' @export -binarize_expr <- function(mat, +binarize_expr <- function( + mat, n = 1000, cut = 0) { expr_mat <- mat @@ -57,7 +58,8 @@ binarize_expr <- function(mat, #' @examples #' matrixize_markers(pbmc_markers) #' @export -matrixize_markers <- function(marker_df, +matrixize_markers <- function( + marker_df, ranked = FALSE, n = NULL, step_weight = 1, @@ -223,7 +225,8 @@ get_vargenes <- function(marker_mat) { #' metric = "spearman" #' ) #' @export -compare_lists <- function(bin_mat, +compare_lists <- function( + bin_mat, marker_mat, n = 30000, metric = "hyper", @@ -232,10 +235,10 @@ compare_lists <- function(bin_mat, # check if matrix is binarized if (is.list(marker_mat)) { message("list of markers instead of matrix, only supports jaccard") - } + } if ((length(unique(bin_mat[, 1])) > 2) & (metric != "gsea")) { - warning("non-binarized data, running spearman instead") - metric <- "spearman" + warning("non-binarized data, running spearman instead") + metric <- "spearman" } if (details_out) { @@ -246,11 +249,11 @@ compare_lists <- function(bin_mat, names(marker_mat), function(y) { marker_list <- unlist(marker_mat[[y]], - use.names = FALSE + use.names = FALSE ) bin_temp <- bin_mat[, x][bin_mat[, x] == 1] list_top <- names(bin_temp) - + genes <- paste(intersect(list_top, marker_list), collapse = ",") genes } @@ -259,7 +262,7 @@ compare_lists <- function(bin_mat, } ) } - + if (metric == "hyper") { out <- lapply( colnames(bin_mat), @@ -287,45 +290,45 @@ compare_lists <- function(bin_mat, } } else if (metric == "jaccard") { if (is.list(marker_mat)) { - out <- lapply( - colnames(bin_mat), - function(x) { - per_col <- lapply( - names(marker_mat), - function(y) { - marker_list <- unlist(marker_mat[[y]], - use.names = FALSE - ) - bin_temp <- bin_mat[, x][bin_mat[, x] == 1] - list_top <- names(bin_temp) - - I <- length(intersect(list_top, marker_list)) - I / (length(list_top) + length(marker_list) - I) + out <- lapply( + colnames(bin_mat), + function(x) { + per_col <- lapply( + names(marker_mat), + function(y) { + marker_list <- unlist(marker_mat[[y]], + use.names = FALSE + ) + bin_temp <- bin_mat[, x][bin_mat[, x] == 1] + list_top <- names(bin_temp) + + I <- length(intersect(list_top, marker_list)) + I / (length(list_top) + length(marker_list) - I) + } + ) + do.call(cbind, per_col) } - ) - do.call(cbind, per_col) - } - ) + ) } else { - out <- lapply( - colnames(bin_mat), - function(x) { - per_col <- lapply( - colnames(marker_mat), - function(y) { - marker_list <- unlist(marker_mat[, y], - use.names = FALSE - ) - bin_temp <- bin_mat[, x][bin_mat[, x] == 1] - list_top <- names(bin_temp) - - I <- length(intersect(list_top, marker_list)) - I / (length(list_top) + length(marker_list) - I) + out <- lapply( + colnames(bin_mat), + function(x) { + per_col <- lapply( + colnames(marker_mat), + function(y) { + marker_list <- unlist(marker_mat[, y], + use.names = FALSE + ) + bin_temp <- bin_mat[, x][bin_mat[, x] == 1] + list_top <- names(bin_temp) + + I <- length(intersect(list_top, marker_list)) + I / (length(list_top) + length(marker_list) - I) + } + ) + do.call(cbind, per_col) } - ) - do.call(cbind, per_col) - } - ) + ) } } else if (metric == "spearman") { out <- lapply( @@ -384,15 +387,14 @@ compare_lists <- function(bin_mat, if (metric != "gsea") { if (!is.list(marker_mat)) { - res <- do.call(rbind, out) - rownames(res) <- colnames(bin_mat) - colnames(res) <- colnames(marker_mat) + res <- do.call(rbind, out) + rownames(res) <- colnames(bin_mat) + colnames(res) <- colnames(marker_mat) } else { - res <- do.call(rbind, out) - rownames(res) <- colnames(bin_mat) - colnames(res) <- names(marker_mat) + res <- do.call(rbind, out) + rownames(res) <- colnames(bin_mat) + colnames(res) <- names(marker_mat) } - } if (output_high) { @@ -407,8 +409,10 @@ compare_lists <- function(bin_mat, spe <- do.call(rbind, spe) rownames(spe) <- colnames(bin_mat) colnames(spe) <- names(marker_mat) - list(res = res, - details = spe) + list( + res = res, + details = spe + ) } else { res } diff --git a/R/compute_similarity.R b/R/compute_similarity.R index 6fa9998a..73a415f0 100644 --- a/R/compute_similarity.R +++ b/R/compute_similarity.R @@ -13,7 +13,8 @@ #' @param ... additional parameters not used yet #' @return matrix of numeric values, clusters from expr_mat as row names, #' cell types from ref_mat as column names -get_similarity <- function(expr_mat, +get_similarity <- function( + expr_mat, ref_mat, cluster_ids, compute_method, @@ -116,7 +117,8 @@ get_similarity <- function(expr_mat, #' @param rm0 consider 0 as missing data, recommended for per_cell #' @param ... additional parameters #' @return matrix of numeric values -permute_similarity <- function(expr_mat, +permute_similarity <- function( + expr_mat, ref_mat, cluster_ids, n_perm, @@ -198,7 +200,8 @@ permute_similarity <- function(expr_mat, #' @param rm0 consider 0 as missing data, recommended for per_cell #' @param ... additional parameters #' @return matrix of numeric values -calc_similarity <- function(query_mat, +calc_similarity <- function( + query_mat, ref_mat, compute_method, rm0 = FALSE, @@ -228,8 +231,8 @@ calc_similarity <- function(query_mat, return(similarity_score) } if (compute_method == "cosine") { - res <- proxy::simil(as.matrix(query_mat),ref_mat, method = "cosine", by_rows = FALSE) - similarity_score <- matrix(res, nrow = nrow(res)) + res <- proxy::simil(as.matrix(query_mat), ref_mat, method = "cosine", by_rows = FALSE) + similarity_score <- matrix(res, nrow = nrow(res)) return(similarity_score) } } @@ -282,7 +285,7 @@ vector_similarity <- function(vec1, vec2, compute_method, ...) { } if (!(compute_method %in% c("cosine", "kl_divergence"))) { - stop(paste(compute_method, "not implemented"), call. = FALSE) + stop(compute_method, " not implemented", call. = FALSE) } if (compute_method == "kl_divergence") { @@ -321,7 +324,8 @@ cosine <- function(vec1, vec2) { #' @param max_KL Maximal allowed value of KL-divergence. #' @return numeric value, with additional attributes, of kl divergence #' between the vectors -kl_divergence <- function(vec1, +kl_divergence <- function( + vec1, vec2, if_log = FALSE, total_reads = 1000, diff --git a/R/main.R b/R/main.R index b468f026..381f5017 100644 --- a/R/main.R +++ b/R/main.R @@ -72,7 +72,7 @@ clustify <- function(input, ...) { #' cluster_col = "RNA_snn_res.0.5", #' compute_method = "cosine" #' ) -#' +#' #' # Annotate a SingleCellExperiment object #' sce <- sce_pbmc() #' clustify( @@ -83,7 +83,7 @@ clustify <- function(input, ...) { #' per_cell = FALSE, #' dr = "umap" #' ) -#' +#' #' # Annotate a Seurat object #' so <- so_pbmc() #' clustify( @@ -105,7 +105,8 @@ clustify <- function(input, ...) { #' dr = "umap" #' ) #' @export -clustify.default <- function(input, +clustify.default <- function( + input, ref_mat, metadata = NULL, cluster_col = NULL, @@ -128,11 +129,11 @@ clustify.default <- function(input, if_log = TRUE, organism = "hsapiens", plot_name = NULL, - rds_name = NULL, + rds_name = NULL, expand_unassigned = FALSE, ...) { if (!compute_method %in% clustifyr_methods) { - stop(paste(compute_method, "correlation method not implemented"), + stop(compute_method, " correlation method not implemented", call. = FALSE ) } @@ -174,8 +175,10 @@ clustify.default <- function(input, } if (is.null(query_genes) || length(query_genes) == 0) { - message("Variable features not available, using all genes instead\n", - "consider supplying variable features to `query_genes` argument.") + message( + "Variable features not available, using all genes instead\n", + "consider supplying variable features to `query_genes` argument." + ) query_genes <- NULL } @@ -256,18 +259,18 @@ clustify.default <- function(input, ... ) } - + if (verbose) { message("similarity computation completed, matrix of ", dim(res)[1], " x ", dim(res)[2], ", preparing output") } - + obj_out <- seurat_out if (obj_out && !inherits(input_original, c( "matrix", "Matrix", "data.frame" - )) || (vec_out && + )) || (vec_out && inherits(input_original, c( "matrix", "Matrix", @@ -295,7 +298,7 @@ clustify.default <- function(input, return(df_temp_full[[paste0(rename_prefix, "_type")]]) } } - + out <- insert_meta_object(input_original, df_temp_full, lookuptable = lookuptable @@ -326,7 +329,8 @@ clustify.default <- function(input, #' @rdname clustify #' @export -clustify.Seurat <- function(input, +clustify.Seurat <- function( + input, ref_mat, cluster_col = NULL, query_genes = NULL, @@ -348,7 +352,7 @@ clustify.Seurat <- function(input, metadata = NULL, organism = "hsapiens", plot_name = NULL, - rds_name = NULL, + rds_name = NULL, expand_unassigned = FALSE, ...) { s_object <- input @@ -373,7 +377,7 @@ clustify.Seurat <- function(input, if (verbose) { message("object data retrieval complete, moving to similarity computation") } - + res <- clustify( expr_mat, ref_mat, @@ -426,11 +430,11 @@ clustify.Seurat <- function(input, res = df_temp, organism = organism, plot_name = plot_name, - rds_name = rds_name, + rds_name = rds_name, expand_unassigned = expand_unassigned ) } - + if (vec_out) { if (is.null(rename_prefix)) { return(df_temp_full[["type"]]) @@ -438,7 +442,7 @@ clustify.Seurat <- function(input, return(df_temp_full[[paste0(rename_prefix, "_type")]]) } } - + if ("SeuratObject" %in% loadedNamespaces()) { s_object <- write_meta(s_object, df_temp_full) return(s_object) @@ -452,7 +456,8 @@ clustify.Seurat <- function(input, #' @rdname clustify #' @export -clustify.SingleCellExperiment <- function(input, +clustify.SingleCellExperiment <- function( + input, ref_mat, cluster_col = NULL, query_genes = NULL, @@ -473,7 +478,7 @@ clustify.SingleCellExperiment <- function(input, metadata = NULL, organism = "hsapiens", plot_name = NULL, - rds_name = NULL, + rds_name = NULL, expand_unassigned = FALSE, ...) { s_object <- input @@ -490,11 +495,11 @@ clustify.SingleCellExperiment <- function(input, metadata <- object_data(s_object, "meta.data") } - + if (verbose) { message("object data retrieval complete, moving to similarity computation") } - + res <- clustify( expr_mat, ref_mat, @@ -547,11 +552,11 @@ clustify.SingleCellExperiment <- function(input, res = df_temp, organism = organism, plot_name = plot_name, - rds_name = rds_name, + rds_name = rds_name, expand_unassigned = expand_unassigned ) } - + if (vec_out) { if (is.null(rename_prefix)) { return(df_temp_full[["type"]]) @@ -559,7 +564,7 @@ clustify.SingleCellExperiment <- function(input, return(df_temp_full[[paste0(rename_prefix, "_type")]]) } } - + if ("SingleCellExperiment" %in% loadedNamespaces()) { if (!(is.null(rename_prefix))) { col_type <- stringr::str_c(rename_prefix, "_type") @@ -657,7 +662,8 @@ clustify_lists <- function(input, ...) { #' cell types from marker_mat as column names #' @export -clustify_lists.default <- function(input, +clustify_lists.default <- function( + input, marker, marker_inmatrix = TRUE, metadata = NULL, @@ -793,12 +799,12 @@ clustify_lists.default <- function(input, } obj_out <- seurat_out if ((!inherits(input_original, c("matrix", "Matrix", "data.frame")) && - obj_out ) || (vec_out && - inherits(input_original, c( - "matrix", - "Matrix", - "data.frame" - )))) { + obj_out) || (vec_out && + inherits(input_original, c( + "matrix", + "Matrix", + "data.frame" + )))) { if (metric != "consensus") { df_temp <- cor_to_call( res, @@ -825,7 +831,7 @@ clustify_lists.default <- function(input, return(df_temp_full[[paste0(rename_prefix, "_type")]]) } } - + out <- insert_meta_object(input_original, df_temp_full, lookuptable = lookuptable @@ -839,7 +845,8 @@ clustify_lists.default <- function(input, #' @rdname clustify_lists #' @export -clustify_lists.Seurat <- function(input, +clustify_lists.Seurat <- function( + input, metadata = NULL, cluster_col = NULL, if_log = TRUE, @@ -875,11 +882,11 @@ clustify_lists.Seurat <- function(input, metadata <- object_data(s_object, "meta.data") } cluster_info <- metadata - + if (verbose) { message("object data retrieval complete, moving to similarity computation") } - + res <- clustify_lists( input, per_cell = per_cell, @@ -928,7 +935,7 @@ clustify_lists.Seurat <- function(input, return(df_temp_full[[paste0(rename_prefix, "_type")]]) } } - + if ("SeuratObject" %in% loadedNamespaces()) { s_object <- write_meta(s_object, df_temp_full) return(s_object) @@ -942,7 +949,8 @@ clustify_lists.Seurat <- function(input, #' @rdname clustify_lists #' @export -clustify_lists.SingleCellExperiment <- function(input, +clustify_lists.SingleCellExperiment <- function( + input, metadata = NULL, cluster_col = NULL, if_log = TRUE, @@ -976,7 +984,7 @@ clustify_lists.SingleCellExperiment <- function(input, } else { metadata <- object_data(s_object, "meta.data") } - + if (verbose) { message("object data retrieval complete, moving to similarity computation") } @@ -1015,7 +1023,7 @@ clustify_lists.SingleCellExperiment <- function(input, per_cell = per_cell, rename_prefix = rename_prefix ) - + if (vec_out) { if (is.null(rename_prefix)) { return(df_temp_full[["type"]]) diff --git a/R/object_access.R b/R/object_access.R index 5fb46a91..0f3e2c24 100644 --- a/R/object_access.R +++ b/R/object_access.R @@ -1,71 +1,94 @@ #' An example Seurat object -#' +#' #' @return a Seurat object populated with data #' from the [pbmc_matrix_small] scRNA-seq dataset, additionally #' annotated with cluster assignments. -#' +#' #' @importFrom SeuratObject CreateSeuratObject CreateDimReducObject VariableFeatures #' @export so_pbmc <- function() { - x <- pbmc_example_data() - so <- SeuratObject::CreateSeuratObject(x$mat, - meta.data = x$metadata) - umap_dr <- SeuratObject::CreateDimReducObject(embeddings = x$umap, - key = "umap_", - assay = "RNA") - if(is_seurat_v5()) { - so <- SeuratObject::SetAssayData(so, - "data", - SeuratObject::LayerData(so, layer = "counts") + x <- pbmc_example_data() + so <- SeuratObject::CreateSeuratObject(x$mat, + meta.data = x$metadata ) - } else { - so <- SeuratObject::SetAssayData(so, - "data", - SeuratObject::GetAssayData(so, slot = "counts") + umap_dr <- SeuratObject::CreateDimReducObject( + embeddings = x$umap, + key = "umap_", + assay = "RNA" ) - } - so[["umap"]] <- umap_dr - SeuratObject::VariableFeatures(so) <- x$vargenes - so + if (is_seurat_v5()) { + so <- SeuratObject::SetAssayData( + so, + "data", + SeuratObject::LayerData(so, layer = "counts") + ) + } else { + so <- SeuratObject::SetAssayData( + so, + "data", + SeuratObject::GetAssayData(so, slot = "counts") + ) + } + so[["umap"]] <- umap_dr + SeuratObject::VariableFeatures(so) <- x$vargenes + so } - + #' An example SingleCellExperiment object -#' +#' #' @return a SingleCellExperiment object populated with data #' from the [pbmc_matrix_small] scRNA-seq dataset, additionally #' annotated with cluster assignments. -#' +#' #' @export sce_pbmc <- function() { - x <- pbmc_example_data() - md <- x$metadata[, c(1:5, 7)] - # rename to more sce-like names - colnames(md) <- c("cell_source", - "sum", - "detected", - "subsets_Mito_percent", - "clusters", - "cell_type") - SingleCellExperiment::SingleCellExperiment(list(counts = x$mat, - logcounts = x$mat), - colData = md, - reducedDims = list( - UMAP = x$umap - )) + x <- pbmc_example_data() + + cols_to_keep <- c( + "orig.ident", + "nCount_RNA", + "nFeature_RNA", + "percent.mt", + "RNA_snn_res.0.5", + "classified" + ) + + md <- x$metadata[, cols_to_keep] + # rename to more sce-like names + colnames(md) <- c( + "cell_source", + "sum", + "detected", + "subsets_Mito_percent", + "clusters", + "cell_type" + ) + SingleCellExperiment::SingleCellExperiment( + list( + counts = x$mat, + logcounts = x$mat + ), + colData = md, + reducedDims = list( + UMAP = x$umap + ) + ) } pbmc_example_data <- function() { - mat <- clustifyr::pbmc_matrix_small - md <- clustifyr::pbmc_meta - umap_cols <- c("UMAP_1", "UMAP_2") - umap <- as.matrix(md[, umap_cols]) - md <- md[, setdiff(colnames(md), umap_cols)] - vargenes <- clustifyr::pbmc_vargenes - - list(mat = mat, - metadata = md, - umap = umap, - vargenes = vargenes) + mat <- clustifyr::pbmc_matrix_small + md <- clustifyr::pbmc_meta + umap_cols <- c("UMAP_1", "UMAP_2") + umap <- as.matrix(md[, umap_cols]) + md <- md[, setdiff(colnames(md), umap_cols)] + vargenes <- clustifyr::pbmc_vargenes + + list( + mat = mat, + metadata = md, + umap = umap, + vargenes = vargenes + ) } #' Function to access object data @@ -77,7 +100,7 @@ object_data <- function(object, ...) { } #' @rdname object_data -#' @param object SingleCellExperiment or Seurat object +#' @param object SingleCellExperiment or Seurat object #' @param slot data to access #' @param n_genes number of genes limit for Seurat variable genes, by default 1000, #' set to 0 to use all variable genes (generally not recommended) @@ -90,7 +113,8 @@ object_data <- function(object, ...) { #' ) #' mat[1:3, 1:3] #' @export -object_data.Seurat <- function(object, +object_data.Seurat <- function( + object, slot, n_genes = 1000, ...) { @@ -101,66 +125,69 @@ object_data.Seurat <- function(object, return(object@meta.data) } else if (slot == "var.genes") { vars <- SeuratObject::VariableFeatures(object) - + if (is.null(vars) || length(vars) <= 1) { message("variable genes not found, please manually specify with query_genes argument") } if ((length(vars) > n_genes) & (n_genes > 0)) { vars <- vars[seq_len(n_genes)] } - + return(vars) } else { - stop(slot, " access method not implemented") + stop(slot, " access method not implemented") } } #' @importFrom utils packageVersion is_seurat_v5 <- function() { - utils::packageVersion("SeuratObject") >= '5.0.0' + utils::packageVersion("SeuratObject") >= "5.0.0" } extract_v5_matrix <- function(x, ...) { - ob_layers <- SeuratObject::Layers(x) - if("data" %in% ob_layers) { - res <- SeuratObject::LayerData(x, layer = "data", ...) - } else if ("counts" %in% ob_layers) { - message("Unable to find 'data' layer, using 'count' layer instead") - res <- SeuratObject::LayerData(x, layer = "counts", ...) - } else { - da <- DefaultAssay(x) - stop("\nUnable to find data or count layer in ", da, " Assay of SeuratObject\n", - "Extracting data from V5 objects with multiple count\n", - "or data layers is not supported") - } - res + ob_layers <- SeuratObject::Layers(x) + if ("data" %in% ob_layers) { + res <- SeuratObject::LayerData(x, layer = "data", ...) + } else if ("counts" %in% ob_layers) { + message("Unable to find 'data' layer, using 'count' layer instead") + res <- SeuratObject::LayerData(x, layer = "counts", ...) + } else { + da <- DefaultAssay(x) + stop( + "\nUnable to find data or count layer in ", da, " Assay of SeuratObject\n", + "Extracting data from V5 objects with multiple count\n", + "or data layers is not supported" + ) + } + res } extract_v4_matrix <- function(x) { - res <- SeuratObject::GetAssayData(x, layer = "data") - - if(length(res) == 0) { - message("Unable to find 'data' slot, using 'count' slot instead") - res <- SeuratObject::GetAssayData(x, layer = "count") - } - - res + res <- SeuratObject::GetAssayData(x, layer = "data") + + if (length(res) == 0) { + message("Unable to find 'data' slot, using 'count' slot instead") + res <- SeuratObject::GetAssayData(x, layer = "count") + } + + res } get_seurat_matrix <- function(x, warn = TRUE) { - - ob_assay <- SeuratObject::DefaultAssay(x) - if(warn && ob_assay != "RNA") { - warning("Default assay of input Seurat object is ", ob_assay, "\n", - "Data will be used from this assay rather than RNA") - } - - if(is_seurat_v5()) { - res <- extract_v5_matrix(x) - } else { - res <- extract_v4_matrix(x) - } - res + ob_assay <- SeuratObject::DefaultAssay(x) + if (warn && ob_assay != "RNA") { + warning( + "Default assay of input Seurat object is ", ob_assay, "\n", + "Data will be used from this assay rather than RNA" + ) + } + + if (is_seurat_v5()) { + res <- extract_v5_matrix(x) + } else { + res <- extract_v4_matrix(x) + } + res } #' @rdname object_data @@ -177,7 +204,8 @@ get_seurat_matrix <- function(x, warn = TRUE) { #' ) #' mat[1:3, 1:3] #' @export -object_data.SingleCellExperiment <- function(object, +object_data.SingleCellExperiment <- function( + object, slot, ...) { if (slot == "data") { @@ -208,7 +236,8 @@ write_meta <- function(object, ...) { #' meta = seurat_meta(so) #' ) #' @export -write_meta.Seurat <- function(object, +write_meta.Seurat <- function( + object, meta, ...) { object_new <- object @@ -231,7 +260,8 @@ write_meta.Seurat <- function(object, #' meta = object_data(sce, "meta.data") #' ) #' @export -write_meta.SingleCellExperiment <- function(object, +write_meta.SingleCellExperiment <- function( + object, meta, ...) { colData(object) <- S4Vectors::DataFrame(meta) @@ -241,7 +271,7 @@ write_meta.SingleCellExperiment <- function(object, #' Function to convert labelled seurat object to avg expression matrix #' @return reference expression matrix, with genes as row names, #' and cell types as column names -#' @examples +#' @examples #' so <- so_pbmc() #' ref <- seurat_ref(so, cluster_col = "seurat_clusters") #' @export @@ -265,7 +295,8 @@ seurat_ref <- function(seurat_object, ...) { #' averaging will be done on unlogged data #' @param ... additional arguments #' @export -seurat_ref.Seurat <- function(seurat_object, +seurat_ref.Seurat <- function( + seurat_object, cluster_col = "classified", var_genes_only = FALSE, assay_name = NULL, @@ -295,7 +326,7 @@ seurat_ref.Seurat <- function(seurat_object, SeuratObject::DefaultAssay(seurat_object) <- og_assay } } else { - stop("warning, not seurat3 object") + stop("Input is not a compatible Seurat object") } temp_res <- average_clusters( @@ -326,7 +357,8 @@ seurat_meta <- function(seurat_object, ...) { #' @param dr dimension reduction method #' @param ... additional arguments #' @export -seurat_meta.Seurat <- function(seurat_object, +seurat_meta.Seurat <- function( + seurat_object, dr = "umap", ...) { dr2 <- dr @@ -384,7 +416,8 @@ object_ref <- function(input, ...) { #' cluster_col = "seurat_clusters" #' ) #' @export -object_ref.default <- function(input, +object_ref.default <- function( + input, cluster_col = NULL, var_genes_only = FALSE, assay_name = NULL, @@ -432,7 +465,8 @@ object_ref.default <- function(input, #' @rdname object_ref #' @export -object_ref.Seurat <- function(input, +object_ref.Seurat <- function( + input, cluster_col = NULL, var_genes_only = FALSE, assay_name = NULL, @@ -464,7 +498,8 @@ object_ref.Seurat <- function(input, #' @rdname object_ref #' @export -object_ref.SingleCellExperiment <- function(input, +object_ref.SingleCellExperiment <- function( + input, cluster_col = NULL, var_genes_only = FALSE, assay_name = NULL, diff --git a/R/plot.R b/R/plot.R index 1fe57018..3fbd0e79 100644 --- a/R/plot.R +++ b/R/plot.R @@ -25,7 +25,8 @@ #' feature = "classified" #' ) #' @export -plot_dims <- function(data, +plot_dims <- function( + data, x = "UMAP_1", y = "UMAP_2", feature = NULL, @@ -78,17 +79,18 @@ plot_dims <- function(data, if (!is.null(alpha_col)) { p <- ggplot(data, aes(.data[[x]], .data[[y]])) + - geom_point(aes( - color = .data[[feature]], - alpha = .data[[alpha_col]] - ), # backticks protect special character gene names - size = pt_size + geom_point( + aes( + color = .data[[feature]], + alpha = .data[[alpha_col]] + ), # backticks protect special character gene names + size = pt_size ) + scale_alpha_continuous(range = c(0, 1)) } else { p <- ggplot(data, aes(.data[[x]], .data[[y]])) + geom_point(aes(color = .data[[feature]]), - size = pt_size + size = pt_size ) } @@ -272,7 +274,8 @@ pretty_palette_ramp_d <- #' y = "UMAP_2" #' ) #' @export -plot_cor <- function(cor_mat, +plot_cor <- function( + cor_mat, metadata, data_to_plot = colnames(cor_mat), cluster_col = NULL, @@ -381,7 +384,8 @@ plot_cor <- function(cor_mat, #' cell_col = "rn" #' ) #' @export -plot_gene <- function(expr_mat, +plot_gene <- function( + expr_mat, metadata, genes, cell_col = NULL, @@ -441,7 +445,8 @@ plot_gene <- function(expr_mat, #' @param ... passed to plot_dims #' @return list of ggplot object, cells projected by dr, #' colored by cell type classification -plot_call <- function(cor_mat, +plot_call <- function( + cor_mat, metadata, data_to_plot = colnames(cor_mat), ...) { @@ -489,7 +494,8 @@ plot_call <- function(cor_mat, #' cluster_col = "classified" #' ) #' @export -plot_best_call <- function(cor_mat, +plot_best_call <- function( + cor_mat, metadata, cluster_col = "cluster", collapse_to_cluster = FALSE, @@ -573,7 +579,8 @@ plot_best_call <- function(cor_mat, #' #' plot_cor_heatmap(res) #' @export -plot_cor_heatmap <- function(cor_mat, +plot_cor_heatmap <- function( + cor_mat, metadata = NULL, cluster_col = NULL, col = not_pretty_palette, diff --git a/R/run_fgsea.R b/R/run_fgsea.R index b82f02b1..0a287af9 100644 --- a/R/run_fgsea.R +++ b/R/run_fgsea.R @@ -17,7 +17,8 @@ #' @param no_warnings suppress warnings from gsea ties #' @return dataframe of gsea scores (pval, NES), with clusters as rownames #' @export -run_gsea <- function(expr_mat, +run_gsea <- function( + expr_mat, query_genes, cluster_ids = NULL, n_perm = 1000, diff --git a/R/shiny.R b/R/shiny.R index ad26eb9b..307d67f6 100644 --- a/R/shiny.R +++ b/R/shiny.R @@ -1,4 +1,4 @@ -#' Launch Shiny app version of clustifyr, +#' Launch Shiny app version of clustifyr, #' may need to run install_clustifyr_app() at first time to install packages #' @return instance of shiny app #' @examples @@ -7,6 +7,6 @@ #' } #' @export run_clustifyr_app <- function() { - appDir <- system.file("shinyapp", package = "clustifyr") - shiny::runApp(appDir, display.mode = "normal") + appDir <- system.file("shinyapp", package = "clustifyr") + shiny::runApp(appDir, display.mode = "normal") } diff --git a/R/utils.R b/R/utils.R index d45bf8fe..4c8c4c58 100644 --- a/R/utils.R +++ b/R/utils.R @@ -2,28 +2,32 @@ #' @param pkg package to query #' @return logical(1) indicating if package is available. #' @noRd -is_pkg_available <- function(pkg, +is_pkg_available <- function(pkg, action = c("none", "message", "warn", "error"), msg = "") { - has_pkg <- requireNamespace(pkg, quietly = TRUE) - action <- match.arg(action) - - if(!has_pkg) { - switch(action, - message = message(pkg, - " not installed ", - msg), - warn = warning(pkg, - " not installed ", - msg, - call. = FALSE), - error = stop(pkg, - " not installed and is required for this function ", - msg, - call. = FALSE), - ) - } - has_pkg + has_pkg <- requireNamespace(pkg, quietly = TRUE) + action <- match.arg(action) + + if (!has_pkg) { + switch(action, + message = message( + pkg, + " not installed ", + msg + ), + warn = warning(pkg, + " not installed ", + msg, + call. = FALSE + ), + error = stop(pkg, + " not installed and is required for this function ", + msg, + call. = FALSE + ), + ) + } + has_pkg } @@ -40,7 +44,8 @@ is_pkg_available <- function(pkg, #' ) #' length(res) #' @export -overcluster <- function(mat, +overcluster <- function( + mat, cluster_id, power = 0.15) { mat <- as.matrix(mat) @@ -95,7 +100,8 @@ overcluster <- function(mat, #' mat[1:3, 1:3] #' @importFrom matrixStats rowMaxs rowMedians colRanks #' @export -average_clusters <- function(mat, +average_clusters <- function( + mat, metadata, cluster_col = "cluster", if_log = TRUE, @@ -111,10 +117,12 @@ average_clusters <- function(mat, mat <- mat[, cluster_info[[cell_col]]] } } - - if(is.null(colnames(mat))){ - stop("The input matrix does not have colnames.\n", - "Check colnames() of input object") + + if (is.null(colnames(mat))) { + stop( + "The input matrix does not have colnames.\n", + "Check colnames() of input object" + ) } if (is.vector(cluster_info)) { if (ncol(mat) != length(cluster_info)) { @@ -128,7 +136,7 @@ average_clusters <- function(mat, !(cluster_col %in% colnames(metadata))) { stop("given `cluster_col` is not a column in `metadata`", call. = FALSE) } - + cluster_info_temp <- cluster_info[[cluster_col]] if (is.factor(cluster_info_temp)) { cluster_info_temp <- droplevels(cluster_info_temp) @@ -200,20 +208,23 @@ average_clusters <- function(mat, function(cell_ids) { if (!all(cell_ids %in% colnames(mat))) { stop("cell ids not found in input matrix", - call. = FALSE + call. = FALSE ) } mat_data <- mat[, cell_ids, drop = FALSE] # mat_data[mat_data == 0] <- NA - res1 <- matrixStats::rowQuantiles(as.matrix(mat_data), - probs = 0.25, - na.rm = TRUE) - res2 <- matrixStats::rowQuantiles(as.matrix(mat_data), - probs = 0.5, - na.rm = TRUE) - res3 <- matrixStats::rowQuantiles(as.matrix(mat_data), - probs = 0.75, - na.rm = TRUE) + res1 <- matrixStats::rowQuantiles(as.matrix(mat_data), + probs = 0.25, + na.rm = TRUE + ) + res2 <- matrixStats::rowQuantiles(as.matrix(mat_data), + probs = 0.5, + na.rm = TRUE + ) + res3 <- matrixStats::rowQuantiles(as.matrix(mat_data), + probs = 0.75, + na.rm = TRUE + ) res <- 0.5 * res2 + 0.25 * res1 + 0.25 * res3 res[is.na(res)] <- 0 names(res) <- rownames(mat_data) @@ -226,7 +237,7 @@ average_clusters <- function(mat, function(cell_ids) { if (!all(cell_ids %in% colnames(mat))) { stop("cell ids not found in input matrix", - call. = FALSE + call. = FALSE ) } mat_data <- mat[, cell_ids, drop = FALSE] @@ -242,13 +253,13 @@ average_clusters <- function(mat, function(cell_ids) { if (!all(cell_ids %in% colnames(mat))) { stop("cell ids not found in input matrix", - call. = FALSE + call. = FALSE ) } mat_data <- mat[, cell_ids, drop = FALSE] # mat_data[mat_data == 0] <- NA res <- matrixStats::rowMins(as.matrix(mat_data), - na.rm = TRUE + na.rm = TRUE ) res[is.na(res)] <- 0 names(res) <- rownames(mat_data) @@ -261,13 +272,13 @@ average_clusters <- function(mat, function(cell_ids) { if (!all(cell_ids %in% colnames(mat))) { stop("cell ids not found in input matrix", - call. = FALSE + call. = FALSE ) } mat_data <- mat[, cell_ids, drop = FALSE] # mat_data[mat_data == 0] <- NA res <- matrixStats::rowMaxs(as.matrix(mat_data), - na.rm = TRUE + na.rm = TRUE ) res[is.na(res)] <- 0 names(res) <- rownames(mat_data) @@ -326,7 +337,8 @@ average_clusters <- function(mat, #' @param cut_num binary cutoff for detection #' @return matrix of numeric values, with genes for row names, #' and clusters for column names -percent_clusters <- function(mat, +percent_clusters <- function( + mat, metadata, cluster_col = "cluster", cut_num = 0.5) { @@ -361,7 +373,8 @@ get_best_match_matrix <- function(cor_mat) { #' @param cor_mat correlation matrix #' @param carry_cor whether the correlation score gets reported #' @return string with ident call and possibly cor value -get_best_str <- function(name, +get_best_str <- function( + name, best_mat, cor_mat, carry_cor = TRUE) { @@ -443,7 +456,8 @@ get_common_elements <- function(...) { #' pathway_list = gl #' ) #' @export -calculate_pathway_gsea <- function(mat, +calculate_pathway_gsea <- function( + mat, pathway_list, n_perm = 1000, scale = TRUE, @@ -483,7 +497,8 @@ calculate_pathway_gsea <- function(mat, #' @param idents new idents to assign, must be length of 1 or #' same as clusters #' @return new dataframe of metadata -assign_ident <- function(metadata, +assign_ident <- function( + metadata, cluster_col = "cluster", ident_col = "type", clusters, @@ -535,7 +550,8 @@ assign_ident <- function(metadata, #' threshold = 0.5 #' ) #' @export -cor_to_call_topn <- function(cor_mat, +cor_to_call_topn <- function( + cor_mat, metadata = NULL, col = "cluster", collapse_to_cluster = FALSE, @@ -638,7 +654,8 @@ cor_to_call_topn <- function(cor_mat, #' @param returning whether to return mean, min, #' or max of the gene pct in the gene list #' @return vector of numeric values -gene_pct <- function(matrix, +gene_pct <- function( + matrix, genelist, clusters, returning = "mean") { @@ -694,7 +711,8 @@ gene_pct <- function(matrix, #' cluster_col = "classified" #' ) #' @export -gene_pct_markerm <- function(matrix, +gene_pct_markerm <- function( + matrix, marker_m, metadata, cluster_col = NULL, @@ -816,7 +834,8 @@ clustify_nudge <- function(input, ...) { #' @return single cell object, or matrix of numeric values, #' clusters from input as row names, cell types from ref_mat as column names #' @export -clustify_nudge.default <- function(input, +clustify_nudge.default <- function( + input, ref_mat, marker, metadata = NULL, @@ -947,7 +966,8 @@ clustify_nudge.default <- function(input, #' @rdname clustify_nudge #' @export -clustify_nudge.Seurat <- function(input, +clustify_nudge.Seurat <- function( + input, ref_mat, marker, cluster_col = NULL, @@ -1042,94 +1062,111 @@ clustify_nudge.Seurat <- function(input, } #' lookup table for single cell object structures #' @importFrom SummarizedExperiment colData<- -#' @export +#' @returns A list populated with standardized functions to +#' access relevant data structures in multiple single cell +#' data formats. object_loc_lookup <- function() { - l <- list() - - l$SingleCellExperiment <- c( - expr = function(x) object_data(x, "data"), - meta = function(x) object_data(x, "meta.data"), - add_meta = function(x, md) { - colData(x) <- md - x}, - var = NULL, - col = "cell_type1" - ) - - l$Seurat <- c( - expr = function(x) object_data(x, "data"), - meta = function(x) object_data(x, "meta.data"), - add_meta = function(x, md) { - x@meta.data <- md - x}, - var = function(x) object_data(x, "var.genes"), - col = "RNA_snn_res.1" - ) - - l$URD <- c( - expr = function(x) x@logupx.data, - meta = function(x) x@meta, - add_meta = function(x, md) { - x@meta <- md - x}, - var = function(x) x@var.genes, - col = "cluster" - ) - - l$FunctionalSingleCellExperiment <- c( - expr = function(x) x@ExperimentList$rnaseq@assays$data$logcounts, - meta = function(x) x@ExperimentList$rnaseq@colData, - add_meta = function(x, md) { - x@ExperimentList$rnaseq@colData <- md - x}, - var = NULL, - col = "leiden_cluster" - ) - - l$CellDataSet <- c( - expr = function(x) do.call(function(x) {row.names(x) <- x@featureData@data$gene_short_name; return(x)}, list(x@assayData$exprs)), - meta = function(x) as.data.frame(x@phenoData@data), - add_meta = function(x, md) { - x@phenoData@data <- md - x}, - var = function(x) as.character(x@featureData@data$gene_short_name[x@featureData@data$use_for_ordering == T]), - col = "Main_Cluster" - ) - l + l <- list() + + l$SingleCellExperiment <- c( + expr = function(x) object_data(x, "data"), + meta = function(x) object_data(x, "meta.data"), + add_meta = function(x, md) { + colData(x) <- md + x + }, + var = NULL, + col = "cell_type1" + ) + + l$Seurat <- c( + expr = function(x) object_data(x, "data"), + meta = function(x) object_data(x, "meta.data"), + add_meta = function(x, md) { + x@meta.data <- md + x + }, + var = function(x) object_data(x, "var.genes"), + col = "RNA_snn_res.1" + ) + + l$URD <- c( + expr = function(x) x@logupx.data, + meta = function(x) x@meta, + add_meta = function(x, md) { + x@meta <- md + x + }, + var = function(x) x@var.genes, + col = "cluster" + ) + + l$FunctionalSingleCellExperiment <- c( + expr = function(x) x@ExperimentList$rnaseq@assays$data$logcounts, + meta = function(x) x@ExperimentList$rnaseq@colData, + add_meta = function(x, md) { + x@ExperimentList$rnaseq@colData <- md + x + }, + var = NULL, + col = "leiden_cluster" + ) + + l$CellDataSet <- c( + expr = function(x) { + do.call(function(x) { + row.names(x) <- x@featureData@data$gene_short_name + return(x) + }, list(x@assayData$exprs)) + }, + meta = function(x) as.data.frame(x@phenoData@data), + add_meta = function(x, md) { + x@phenoData@data <- md + x + }, + var = function(x) as.character(x@featureData@data$gene_short_name[x@featureData@data$use_for_ordering == TRUE]), + col = "Main_Cluster" + ) + l } #' more flexible parsing of single cell objects #' #' @param input input object #' @param type look up predefined slots/loc -#' @param expr_loc function that extracts expression matrix -#' @param meta_loc function that extracts metadata -#' @param var_loc function that extracts variable genes +#' @param expr_loc function that extracts expression matrix +#' @param meta_loc function that extracts metadata +#' @param var_loc function that extracts variable genes #' @param cluster_col column of clustering from metadata -#' @param lookuptable if not supplied, will use object_loc_lookup() for parsing. +#' @param lookuptable if not supplied, will use object_loc_lookup() for parsing. #' @return list of expression, metadata, vargenes, cluster_col info from object #' @examples -#' so <- so_pbmc() +#' so <- so_pbmc() #' obj <- parse_loc_object(so) #' length(obj) #' @export -parse_loc_object <- function(input, +parse_loc_object <- function( + input, type = class(input), expr_loc = NULL, meta_loc = NULL, var_loc = NULL, cluster_col = NULL, lookuptable = NULL) { - if(!type %in% c("SingleCellExperiment", "Seurat")) { - warning("Support for ", type, " objects is deprecated ", - "and will be removed from clustifyr in the next version") + if (!type %in% c("SingleCellExperiment", "Seurat")) { + warning( + "Support for ", type, " objects is deprecated ", + "and will be removed from clustifyr in the next version" + ) } - + if (is.null(lookuptable)) { lookup <- object_loc_lookup() } else { - warning("Support for supplying custom objects is deprecated ", - "and will be removed from clustifyr in the next version") + warning( + "Support for supplying custom objects is deprecated ", + "and will be removed from clustifyr in the next version" + ) lookup <- lookuptable } @@ -1178,7 +1215,8 @@ parse_loc_object <- function(input, #' so <- so_pbmc() #' insert_meta_object(so, seurat_meta(so, dr = "umap")) #' @export -insert_meta_object <- function(input, +insert_meta_object <- function( + input, new_meta, type = class(input), meta_loc = NULL, @@ -1229,7 +1267,8 @@ insert_meta_object <- function(input, #' y_col = "UMAP_2" #' ) #' @export -overcluster_test <- function(expr, +overcluster_test <- function( + expr, metadata, ref_mat, cluster_col, @@ -1356,7 +1395,8 @@ overcluster_test <- function(expr, #' n = 5 #' ) #' @export -ref_feature_select <- function(mat, +ref_feature_select <- function( + mat, n = 3000, mode = "var", rm.lowvar = TRUE) { @@ -1407,7 +1447,8 @@ ref_feature_select <- function(mat, #' if_log = FALSE #' ) #' @export -feature_select_PCA <- function(mat = NULL, +feature_select_PCA <- function( + mat = NULL, pcs = NULL, n_pcs = 10, percentile = 0.99, @@ -1450,11 +1491,12 @@ feature_select_PCA <- function(mat = NULL, #' length(gene.lists) #' @importFrom utils read.csv #' @export -gmt_to_list <- function(path, +gmt_to_list <- function( + path, cutoff = 0, sep = "\thttp://www.broadinstitute.org/gsea/msigdb/cards/.*?\t") { - - df <- read.csv(path, sep = ",", + df <- read.csv(path, + sep = ",", header = FALSE, col.names = "V1" ) @@ -1511,7 +1553,8 @@ gmt_to_list <- function(path, #' 5 #' ) #' @export -plot_pathway_gsea <- function(mat, +plot_pathway_gsea <- function( + mat, pathway_list, n_perm = 1000, scale = TRUE, @@ -1560,7 +1603,8 @@ plot_pathway_gsea <- function(mat, #' ) #' mat[1:3, 1:3] #' @export -downsample_matrix <- function(mat, +downsample_matrix <- function( + mat, n = 1, keep_cluster_proportions = TRUE, metadata = NULL, @@ -1613,10 +1657,11 @@ downsample_matrix <- function(mat, #' ) #' @export ref_marker_select <- - function(mat, - cut = 0.5, - arrange = TRUE, - compto = 1) { + function( + mat, + cut = 0.5, + arrange = TRUE, + compto = 1) { mat <- mat[!is.na(rownames(mat)), ] mat <- mat[Matrix::rowSums(mat) != 0, ] ref_cols <- colnames(mat) @@ -1672,7 +1717,8 @@ ref_marker_select <- #' cols = names(pbmc_avg["PPBP", ]) #' ) #' @export -marker_select <- function(row1, +marker_select <- function( + row1, cols, cut = 1, compto = 1) { @@ -1716,7 +1762,8 @@ marker_select <- function(row1, #' cutoff_score = 0.8 #' ) #' @export -pos_neg_select <- function(input, +pos_neg_select <- function( + input, ref_mat, metadata, cluster_col = "cluster", @@ -1889,7 +1936,7 @@ get_unique_column <- function(df, id = NULL) { #' cluster_col = "classified", #' if_log = FALSE #' ) -#' +#' #' rankdiff <- find_rank_bias( #' avg, #' cbmc_ref, @@ -1929,15 +1976,19 @@ find_rank_bias <- function( )) rownames(r1) <- query_genes colnames(r1) <- colnames(ref_mat) - + # actual diff calculations - rdiff <- lapply(rownames(r1), - function(x) {res <- outer(r2[x, ], r1[x, ], FUN = "-") - # rownames(res) <- colnames(r1) - # colnames(res) <- colnames(r2) - res}) + rdiff <- lapply( + rownames(r1), + function(x) { + res <- outer(r2[x, ], r1[x, ], FUN = "-") + # rownames(res) <- colnames(r1) + # colnames(res) <- colnames(r2) + res + } + ) names(rdiff) <- rownames(r1) - + rdiff } @@ -1953,13 +2004,13 @@ find_rank_bias <- function( #' cluster_col = "classified", #' if_log = FALSE #' ) -#' +#' #' rankdiff <- find_rank_bias( #' avg, #' cbmc_ref, #' query_genes = pbmc_vargenes #' ) -#' +#' #' qres <- query_rank_bias( #' rankdiff, #' "CD14+ Mono", @@ -1971,7 +2022,8 @@ query_rank_bias <- function( id_mat, id_ref) { res <- lapply(bias_list, function(x) { - x[id_mat, id_ref]}) + x[id_mat, id_ref] + }) resdf <- data.frame(unlist(res)) colnames(resdf) <- paste0(id_mat, "_vs_ ", id_ref) tibble::rownames_to_column(resdf, "gene") @@ -1989,19 +2041,19 @@ query_rank_bias <- function( #' cluster_col = "classified", #' if_log = FALSE #' ) -#' +#' #' rankdiff <- find_rank_bias( #' avg, #' cbmc_ref, #' query_genes = pbmc_vargenes #' ) -#' +#' #' qres <- query_rank_bias( #' rankdiff, #' "CD14+ Mono", #' "CD14+ Mono" #' ) -#' +#' #' g <- plot_rank_bias( #' qres #' ) @@ -2016,55 +2068,69 @@ plot_rank_bias <- function( if (nrow(genes_high) == 0) { go_high <- "" } else { - res_high <- suppressMessages(gprofiler2::gost(query = genes_high$gene, - organism = "hsapiens", - sources = "GO:BP", - correction_method = "fdr", - evcodes = TRUE)) + res_high <- suppressMessages(gprofiler2::gost( + query = genes_high$gene, + organism = "hsapiens", + sources = "GO:BP", + correction_method = "fdr", + evcodes = TRUE + )) if (is.null(res_high)) { go_high <- "" } else { - go_high <- paste0(dplyr::slice(dplyr::filter(res_high[[1]], intersection_size > 1), seq_len(10))$term_name, - collapse = "\n") + go_high <- paste0(dplyr::slice(dplyr::filter(res_high[[1]], intersection_size > 1), seq_len(10))$term_name, + collapse = "\n" + ) } } if (nrow(genes_low) == 0) { go_low <- "" } else { - res_low <- suppressMessages(gprofiler2::gost(query = genes_low$gene, - organism = "hsapiens", - sources = "GO:BP", - correction_method = "fdr", - evcodes = TRUE)) + res_low <- suppressMessages(gprofiler2::gost( + query = genes_low$gene, + organism = "hsapiens", + sources = "GO:BP", + correction_method = "fdr", + evcodes = TRUE + )) if (is.null(res_low)) { go_low <- "" } else { - go_low <- paste0(dplyr::slice(dplyr::filter(res_low[[1]], intersection_size > 1), seq_len(10))$term_name, - collapse = "\n") + go_low <- paste0(dplyr::slice(dplyr::filter(res_low[[1]], intersection_size > 1), seq_len(10))$term_name, + collapse = "\n" + ) } } - + col <- colnames(bias_df)[2] - g <- ggplot2::ggplot(bias_df, ggplot2::aes(!!dplyr::sym(col))) + + g <- ggplot2::ggplot(bias_df, ggplot2::aes(!!dplyr::sym(col))) + ggplot2::geom_bar() + ggplot2::geom_bar(data = genes_high, color = "red", fill = "red") + ggplot2::geom_bar(data = genes_low, color = "blue", fill = "blue") + - cowplot::theme_cowplot() + - ggplot2::theme(axis.line.y = ggplot2::element_blank(), - axis.title.y = ggplot2::element_blank(), - axis.text.y = ggplot2::element_blank(), - axis.ticks.y = ggplot2::element_blank()) + - ggplot2::annotate("text", - x = max(bias_df[[2]]) * 1.1, y = nrow(bias_df)/70, - label= go_high, color = "red", size = 2, - hjust = 0) + + cowplot::theme_cowplot() + + ggplot2::theme( + axis.line.y = ggplot2::element_blank(), + axis.title.y = ggplot2::element_blank(), + axis.text.y = ggplot2::element_blank(), + axis.ticks.y = ggplot2::element_blank() + ) + + ggplot2::annotate("text", + x = max(bias_df[[2]]) * 1.1, y = nrow(bias_df) / 70, + label = go_high, color = "red", size = 2, + hjust = 0 + ) + ggplot2::annotate("text", - x = min(bias_df[[2]]) * 1.1, y = nrow(bias_df)/70, - label= go_low, color = "blue", size = 2, - hjust = 1) + - ggplot2::coord_cartesian(clip = "off", xlim = c(-max(abs(bias_df[[2]])) * 3, - max(abs(bias_df[[2]])) * 3), - ylim = c(0, nrow(bias_df)/50)) + x = min(bias_df[[2]]) * 1.1, y = nrow(bias_df) / 70, + label = go_low, color = "blue", size = 2, + hjust = 1 + ) + + ggplot2::coord_cartesian( + clip = "off", xlim = c( + -max(abs(bias_df[[2]])) * 3, + max(abs(bias_df[[2]])) * 3 + ), + ylim = c(0, nrow(bias_df) / 50) + ) } @@ -2083,7 +2149,7 @@ plot_rank_bias <- function( #' @export append_genes <- function(gene_vector, ref_matrix) { missing_rows <- setdiff(gene_vector, rownames(ref_matrix)) - + zeroExpressionMatrix <- matrix( 0, nrow = length(missing_rows), @@ -2092,7 +2158,7 @@ append_genes <- function(gene_vector, ref_matrix) { rownames(zeroExpressionMatrix) <- missing_rows colnames(zeroExpressionMatrix) <- colnames(ref_matrix) - + full_matrix <- rbind(ref_matrix, zeroExpressionMatrix) full_matrix <- full_matrix[gene_vector, ] full_matrix @@ -2115,22 +2181,18 @@ check_raw_counts <- function(counts_matrix, max_log_value = 50) { } if (is.integer(counts_matrix)) { return("raw counts") - } - else if (is.double(counts_matrix)) { + } else if (is.double(counts_matrix)) { if (all(counts_matrix == floor(counts_matrix))) { return("raw counts") } if (max(counts_matrix) > max_log_value) { return("normalized") - } - else if (min(counts_matrix) < 0) { + } else if (min(counts_matrix) < 0) { stop("negative values detected, likely scaled data") - } - else { + } else { return("log-normalized") } - } - else { + } else { stop("unknown matrix format: ", typeof(counts_matrix)) } } @@ -2150,15 +2212,16 @@ check_raw_counts <- function(counts_matrix, max_log_value = 50) { #' @return Combined matrix with all genes given #' @examples #' pbmc_ref_matrix <- average_clusters( -#' mat = pbmc_matrix_small, -#' metadata = pbmc_meta, -#' cluster_col = "classified", -#' if_log = TRUE # whether the expression matrix is already log transformed +#' mat = pbmc_matrix_small, +#' metadata = pbmc_meta, +#' cluster_col = "classified", +#' if_log = TRUE # whether the expression matrix is already log transformed #' ) #' references_to_combine <- list(pbmc_ref_matrix, cbmc_ref) #' atlas <- build_atlas(NULL, human_genes_10x, references_to_combine, NULL) #' @export -build_atlas <- function(matrix_fns = NULL, +build_atlas <- function( + matrix_fns = NULL, genes_fn, matrix_objs = NULL, output_fn = NULL) { @@ -2167,12 +2230,10 @@ build_atlas <- function(matrix_fns = NULL, ref_mats <- lapply(matrix_fns, readRDS) if (is.null(names(matrix_fns))) { names(ref_mats) <- stringr::str_remove(basename(matrix_fns), ".rds$") - } - else { + } else { names(ref_mats) <- names(matrix_fns) } - } - else if (is.null(matrix_fns) && !is.null(matrix_objs)) { + } else if (is.null(matrix_fns) && !is.null(matrix_objs)) { ref_mats <- matrix_objs if (is.null(names(matrix_objs))) { names(ref_mats) <- seq_len(length(matrix_objs)) @@ -2207,8 +2268,7 @@ build_atlas <- function(matrix_fns = NULL, if (!is.null(output_fn)) { saveRDS(atlas, output_fn) - } - else { + } else { return(atlas) } } @@ -2272,7 +2332,7 @@ make_comb_ref <- function(ref_mat, #' @examples #' \dontrun{ #' avg <- average_clusters( -#' pbmc_matrix_small, +#' pbmc_matrix_small, #' pbmc_meta$seurat_clusters #' ) #' res <- clustify( @@ -2290,8 +2350,8 @@ make_comb_ref <- function(ref_mat, #' threshold = 0.8 #' ) #' res_rank <- assess_rank_bias( -#' avg, -#' cbmc_ref, +#' avg, +#' cbmc_ref, #' res = top_call #' ) #' } @@ -2311,57 +2371,58 @@ assess_rank_bias <- function( query_genes = query_genes ) rbiases <- list() - for(i in seq_len(nrow(res))){ - id <- res[[1]][i] - ct <- res[[2]][i] - if (ct == "unassigned") { - if (expand_unassigned) { - message("checking unassigned types against every ref type") - rb <- lapply(colnames(ref_mat), function(x) { - query_rank_bias( - rankdiff, - id, - x - ) - }) - rbiases[i] <- list(NULL) - rbiases <- append(rbiases, rb) + for (i in seq_len(nrow(res))) { + id <- res[[1]][i] + ct <- res[[2]][i] + if (ct == "unassigned") { + if (expand_unassigned) { + message("checking unassigned types against every ref type") + rb <- lapply(colnames(ref_mat), function(x) { + query_rank_bias( + rankdiff, + id, + x + ) + }) + rbiases[i] <- list(NULL) + rbiases <- append(rbiases, rb) + } else { + rbiases[i] <- list(NULL) + } } else { - rbiases[i] <- list(NULL) + rb <- query_rank_bias( + rankdiff, + id, + ct + ) + rbiases <- append(rbiases, list(rb)) } - } else { - rb <- query_rank_bias( - rankdiff, - id, - ct - ) - rbiases <- append(rbiases, list(rb)) - } } - + if (!(is.null(rds_name))) { saveRDS(rbiases, paste0(rds_name, ".rds")) } message("Using gprofiler2 for GO analyses (internet connection required)") plts <- lapply(rbiases, function(x) { - if (is.null(x)) { - return(NULL) - } else { - plot_rank_bias(x, organism = organism) - } + if (is.null(x)) { + return(NULL) + } else { + plot_rank_bias(x, organism = organism) + } }) plts <- plts[!unlist(lapply(plts, function(x) is.null(x)))] if (!(is.null(plot_name))) { - p <- cowplot::plot_grid(plotlist = plts, ncol = 1) + p <- cowplot::plot_grid(plotlist = plts, ncol = 1) ggplot2::ggsave(paste0(plot_name, ".pdf"), - p, - width = 6, - height = 4 * length(rbiases), - limitsize = FALSE) + p, + width = 6, + height = 4 * length(rbiases), + limitsize = FALSE + ) } plts } - + #' Distance calculations for spatial coord #' @param coord dataframe or matrix of spatial coordinates, cell barcode as rownames #' @param metadata data.frame or vector containing cluster assignments per cell. @@ -2372,13 +2433,15 @@ assess_rank_bias <- function( #' @return min distance matrix #' @examples #' cbs <- paste0("cb_", 1:100) -#' -#' spatial_coords <- data.frame(row.names = cbs, -#' X = runif(100), -#' Y = runif(100)) +#' +#' spatial_coords <- data.frame( +#' row.names = cbs, +#' X = runif(100), +#' Y = runif(100) +#' ) #' group_ids <- sample(c("A", "B"), 100, replace = TRUE) #' dist_res <- calc_distance( -#' spatial_coords, +#' spatial_coords, #' group_ids #' ) #' @export @@ -2389,20 +2452,22 @@ calc_distance <- function( collapse_to_cluster = FALSE) { distm <- as.matrix(stats::dist(coord)) res <- average_clusters(distm, - metadata, - cluster_col, - if_log = FALSE, - output_log = FALSE, - method = "min") + metadata, + cluster_col, + if_log = FALSE, + output_log = FALSE, + method = "min" + ) if (collapse_to_cluster) { res2 <- average_clusters(t(res), - metadata, - cluster_col, - if_log = FALSE, - output_log = FALSE, - method = "min") + metadata, + cluster_col, + if_log = FALSE, + output_log = FALSE, + method = "min" + ) res2 } else { res } -} \ No newline at end of file +} diff --git a/README.Rmd b/README.Rmd index b13f099c..86dde1fc 100644 --- a/README.Rmd +++ b/README.Rmd @@ -4,10 +4,10 @@ output: github_document ```{r, echo = FALSE, message = FALSE} knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>", - fig.path = "man/figures/", - dpi = 300 + collapse = TRUE, + comment = "#>", + fig.path = "man/figures/", + dpi = 300 ) ``` @@ -62,10 +62,10 @@ library(clustifyr) # calculate correlation res <- clustify( - input = pbmc_matrix_small, - metadata = pbmc_meta$classified, - ref_mat = cbmc_ref, - query_genes = pbmc_vargenes + input = pbmc_matrix_small, + metadata = pbmc_meta$classified, + ref_mat = cbmc_ref, + query_genes = pbmc_vargenes ) # print assignments @@ -73,9 +73,9 @@ cor_to_call(res) # plot assignments on a projection plot_best_call( - cor_mat = res, - metadata = pbmc_meta, - cluster_col = "classified" + cor_mat = res, + metadata = pbmc_meta, + cluster_col = "classified" ) ``` @@ -84,27 +84,27 @@ plot_best_call( ```{r example_seurat, warning=F, message=F} # for SingleCellExperiment clustify( - input = sce_small, # an SCE object - ref_mat = cbmc_ref, # matrix of RNA-seq expression data for each cell type - cluster_col = "cell_type1", # name of column in meta.data containing cell clusters - obj_out = TRUE # output SCE object with cell type inserted as "type" column -) + input = sce_small, # an SCE object + ref_mat = cbmc_ref, # matrix of RNA-seq expression data for each cell type + cluster_col = "cell_type1", # name of column in meta.data containing cell clusters + obj_out = TRUE # output SCE object with cell type inserted as "type" column +) library(Seurat) # for Seurat3/4 clustify( - input = s_small3, - cluster_col = "RNA_snn_res.1", - ref_mat = cbmc_ref, - seurat_out = TRUE + input = s_small3, + cluster_col = "RNA_snn_res.1", + ref_mat = cbmc_ref, + seurat_out = TRUE ) # New output option, directly as a vector (in the order of the metadata), which can then be inserted into metadata dataframes and other workflows clustify( - input = s_small3, - cluster_col = "RNA_snn_res.1", - ref_mat = cbmc_ref, - vec_out = TRUE + input = s_small3, + cluster_col = "RNA_snn_res.1", + ref_mat = cbmc_ref, + vec_out = TRUE ) ``` @@ -113,14 +113,14 @@ New reference matrix can be made directly from `SingleCellExperiment` and `Seura ```{r example_ref_matrix} # make reference from SingleCellExperiment objects sce_ref <- object_ref( - input = sce_small, # SCE object - cluster_col = "cell_type1" # name of column in colData containing cell identities + input = sce_small, # SCE object + cluster_col = "cell_type1" # name of column in colData containing cell identities ) # make reference from seurat objects s_ref <- seurat_ref( - seurat_object = s_small3, - cluster_col = "RNA_snn_res.1" + seurat_object = s_small3, + cluster_col = "RNA_snn_res.1" ) head(s_ref) @@ -130,19 +130,19 @@ head(s_ref) ```{r example_seurat3, warning=F, message=F} clustify_lists( - input = pbmc_matrix_small, - metadata = pbmc_meta, - cluster_col = "classified", - marker = pbmc_markers, - marker_inmatrix = FALSE + input = pbmc_matrix_small, + metadata = pbmc_meta, + cluster_col = "classified", + marker = pbmc_markers, + marker_inmatrix = FALSE ) clustify_lists( - input = s_small3, - marker = pbmc_markers, - marker_inmatrix = FALSE, - cluster_col = "RNA_snn_res.1", - seurat_out = TRUE + input = s_small3, + marker = pbmc_markers, + marker_inmatrix = FALSE, + cluster_col = "RNA_snn_res.1", + seurat_out = TRUE ) ``` diff --git a/man/clustifyr-package.Rd b/man/clustifyr-package.Rd index 7aac7efd..95d3964f 100644 --- a/man/clustifyr-package.Rd +++ b/man/clustifyr-package.Rd @@ -18,7 +18,7 @@ Useful links: } \author{ -\strong{Maintainer}: Kent Riemondy \email{kent.riemondy@cuanschutz.edu} +\strong{Maintainer}: Kent Riemondy \email{kent.riemondy@gmail.com} Authors: \itemize{ diff --git a/man/object_loc_lookup.Rd b/man/object_loc_lookup.Rd index bc1b2656..077db2ad 100644 --- a/man/object_loc_lookup.Rd +++ b/man/object_loc_lookup.Rd @@ -6,6 +6,11 @@ \usage{ object_loc_lookup() } +\value{ +A list populated with standardized functions to +access relevant data structures in multiple single cell +data formats. +} \description{ lookup table for single cell object structures } diff --git a/tests/testthat/test_cor.R b/tests/testthat/test_cor.R index f8fe0862..f8f988c7 100644 --- a/tests/testthat/test_cor.R +++ b/tests/testthat/test_cor.R @@ -118,7 +118,6 @@ test_that("test permutation", { so <- so_pbmc() test_that("seurat object clustifying", { - res <- clustify(so, cbmc_ref, cluster_col = "seurat_clusters", @@ -151,9 +150,9 @@ test_that("seurat object clustifying", { test_that("object with passing vector as metadata", { res <- clustify(so, - cbmc_ref, - metadata = so$seurat_clusters, - dr = "umap" + cbmc_ref, + metadata = so$seurat_clusters, + dr = "umap" ) res <- clustify_lists( so, @@ -180,9 +179,8 @@ test_that("clustify reinserts seurat metadata correctly", { # all input data identical on return expect_true(all(so@meta.data == res@meta.data[, colnames(so@meta.data)])) # clustifyr results present - expect_true(all(c("umap_1", "umap_2", "type", "r") %in% - colnames(res@meta.data))) - + expect_true(all(c("umap_1", "umap_2", "type", "r") %in% + colnames(res@meta.data))) }) test_that("get_similarity handles NA entries", { @@ -310,8 +308,10 @@ test_that("sparse matrix is accepted as input", { cluster_col = "seurat_clusters", verbose = TRUE ) - ex <- c(length(unique(pbmc_meta$seurat_clusters)), - ncol(cbmc_ref)) + ex <- c( + length(unique(pbmc_meta$seurat_clusters)), + ncol(cbmc_ref) + ) expect_equal(dim(res), ex) }) @@ -445,7 +445,7 @@ test_that("sce object clustify_lists", { panm <- data.frame(other, delta) res <- clustify_lists( - sce, + sce, marker = panm, cluster_col = "clusters", obj_out = FALSE, @@ -486,74 +486,74 @@ test_that("clustify_lists filters low cell number clusters", { test_that("clustify n_genes options limits number of variable genes", { res <- clustify(so, - cbmc_ref, - cluster_col = "seurat_clusters", - dr = "umap", - obj_out = FALSE + cbmc_ref, + cluster_col = "seurat_clusters", + dr = "umap", + obj_out = FALSE ) res2 <- clustify(so, - cbmc_ref, - n_genes = 2, - cluster_col = "seurat_clusters", - dr = "umap", - obj_out = FALSE + cbmc_ref, + n_genes = 2, + cluster_col = "seurat_clusters", + dr = "umap", + obj_out = FALSE ) - expect_true(res[1,1] != res2[1,1]) + expect_true(res[1, 1] != res2[1, 1]) }) test_that("clustify n_genes options ignored if too large", { res <- clustify(so, - cbmc_ref, - cluster_col = "seurat_clusters", - dr = "umap", - n_genes = 2e3, - obj_out = FALSE + cbmc_ref, + cluster_col = "seurat_clusters", + dr = "umap", + n_genes = 2e3, + obj_out = FALSE ) res2 <- clustify(so, - cbmc_ref, - n_genes = 2e6, - cluster_col = "seurat_clusters", - dr = "umap", - obj_out = FALSE + cbmc_ref, + n_genes = 2e6, + cluster_col = "seurat_clusters", + dr = "umap", + obj_out = FALSE ) expect_true(all.equal(res, res2)) }) test_that("pseudobulk using median", { - res1 <- clustify( - input = pbmc_matrix_small, - metadata = pbmc_meta, - ref_mat = cbmc_ref, - query_genes = pbmc_vargenes, - cluster_col = "classified", - pseudobulk_method = "median" - ) - - res_full <- clustify( - input = pbmc_matrix_small, - metadata = pbmc_meta, - ref_mat = cbmc_ref, - query_genes = pbmc_vargenes, - cluster_col = "classified", - pseudobulk_method = "median", - n_perm = 2, - return_full = TRUE - ) - - expect_equal(res1, res_full$score) - expect_equal(length(res_full), 2) - expect_true(all(res_full$p_val >= 0 | res_full$p_val <= 0)) + res1 <- clustify( + input = pbmc_matrix_small, + metadata = pbmc_meta, + ref_mat = cbmc_ref, + query_genes = pbmc_vargenes, + cluster_col = "classified", + pseudobulk_method = "median" + ) + + res_full <- clustify( + input = pbmc_matrix_small, + metadata = pbmc_meta, + ref_mat = cbmc_ref, + query_genes = pbmc_vargenes, + cluster_col = "classified", + pseudobulk_method = "median", + n_perm = 2, + return_full = TRUE + ) + + expect_equal(res1, res_full$score) + expect_equal(length(res_full), 2) + expect_true(all(res_full$p_val >= 0 | res_full$p_val <= 0)) }) test_that("an informative error if matrix lacks colnames", { - tmp_mat <- pbmc_matrix_small - colnames(tmp_mat) <- NULL - expect_error(clustify( - input = tmp_mat, - metadata = pbmc_meta, - ref_mat = cbmc_ref, - query_genes = pbmc_vargenes, - cluster_col = "classified", - verbose = FALSE - )) -}) \ No newline at end of file + tmp_mat <- pbmc_matrix_small + colnames(tmp_mat) <- NULL + expect_error(clustify( + input = tmp_mat, + metadata = pbmc_meta, + ref_mat = cbmc_ref, + query_genes = pbmc_vargenes, + cluster_col = "classified", + verbose = FALSE + )) +}) diff --git a/tests/testthat/test_gsea.R b/tests/testthat/test_gsea.R index 5cfe9505..6192056b 100644 --- a/tests/testthat/test_gsea.R +++ b/tests/testthat/test_gsea.R @@ -2,12 +2,12 @@ context("run_gsea") # use capture.output to quiet progress bar from fgsea shush <- function(...) { - capture.output(..., file = nullfile()) + capture.output(..., file = nullfile()) } test_that("output is correctly formatted", { data("pbmc_vargenes") - + shush(res <- run_gsea( pbmc_matrix_small, query_genes = pbmc_vargenes[1:100], @@ -37,29 +37,27 @@ test_that("run_gsea warns slow runs", { data("pbmc_vargenes") expect_warning( - shush(res <- run_gsea(pbmc_matrix_small[, 1:3], - query_genes = pbmc_vargenes[1:2], - n_perm = 10001, - per_cell = TRUE, - cluster_ids = pbmc_meta$classified, - no_warnings = TRUE - ) - ) + shush(res <- run_gsea(pbmc_matrix_small[, 1:3], + query_genes = pbmc_vargenes[1:2], + n_perm = 10001, + per_cell = TRUE, + cluster_ids = pbmc_meta$classified, + no_warnings = TRUE + )) ) }) test_that("run_gsea warning suppression", { data("pbmc_vargenes") expect_warning( - shush(res <- run_gsea( + shush(res <- run_gsea( pbmc_matrix_small[, 1:3], query_genes = pbmc_vargenes[1:2], n_perm = 1, per_cell = TRUE, cluster_ids = pbmc_meta$classified, no_warnings = FALSE - ) - ) + )) ) }) @@ -101,6 +99,6 @@ test_that("plot_pathway_gsea gives output depending on returning option", { ) shush(g <- plot_pathway_gsea(pbmc_avg, gl, 5, returning = "plot")) shush(g2 <- plot_pathway_gsea(pbmc_avg, gl, 5, returning = "res")) - + expect_true(is(g, "Heatmap") & is.data.frame(g2)) }) diff --git a/tests/testthat/test_list.R b/tests/testthat/test_list.R index e4ec3d93..2bdfae94 100644 --- a/tests/testthat/test_list.R +++ b/tests/testthat/test_list.R @@ -1,7 +1,7 @@ context("compare_list") # use capture.output to quiet progress bar from fgsea shush <- function(...) { - capture.output(..., file = nullfile()) + capture.output(..., file = nullfile()) } test_that("warning if matrix is not binarized", { @@ -40,8 +40,7 @@ test_that("run all gene list functions", { metric = x ) } - ) - ) + )) expect_equal(4, length(results)) }) @@ -49,8 +48,8 @@ test_that("run all gene list functions", { test_that("output intersected genes with details_out option with hyper/jaccard", { pbmc_mm <- matrixize_markers(pbmc_markers) pbmc_avg <- average_clusters(pbmc_matrix_small, - pbmc_meta, - cluster_col = "classified" + pbmc_meta, + cluster_col = "classified" ) pbmc_avgb <- binarize_expr(pbmc_avg) gene_list_methods <- c("hyper", "jaccard") @@ -58,14 +57,13 @@ test_that("output intersected genes with details_out option with hyper/jaccard", gene_list_methods, function(x) { compare_lists(pbmc_avgb, - pbmc_mm, - metric = x, - details_out = TRUE + pbmc_mm, + metric = x, + details_out = TRUE ) } - ) - ) - + )) + expect_equal(2, length(results)) }) @@ -102,8 +100,7 @@ test_that("run all gene list functions in clustify_lists", { metric = x ) } - ) - ) + )) expect_equal(4, length(results)) }) @@ -118,7 +115,7 @@ test_that("gsea outputs in cor matrix format", { marker_inmatrix = FALSE, metric = "gsea" )) - + res2 <- cor_to_call(res) expect_equal(9, nrow(res2)) @@ -252,14 +249,14 @@ test_that("lists of genes will work with posneg", { test_that("clustify_lists input_markers mode", { pbmc_mm <- matrixize_markers(pbmc_markers) - pbmc_input_mm <- pos_neg_marker(pbmc_mm[1:3,]) + pbmc_input_mm <- pos_neg_marker(pbmc_mm[1:3, ]) results <- lapply( c("hyper", "spearman"), function(x) { clustify_lists(pbmc_input_mm, - pbmc_mm, - metric = x, - input_markers = TRUE + pbmc_mm, + metric = x, + input_markers = TRUE ) } ) @@ -268,17 +265,16 @@ test_that("clustify_lists input_markers mode", { test_that("clustify_lists input_markers mode with uneven number of marker per cluster", { pbmc_mm <- matrixize_markers(pbmc_markers) - pbmc_input_mm <- pos_neg_marker(pbmc_mm[1:3,]) + pbmc_input_mm <- pos_neg_marker(pbmc_mm[1:3, ]) results <- lapply( c("jaccard"), function(x) { clustify_lists(pbmc_input_mm, - split(pbmc_markers$gene, pbmc_markers$cluster), - metric = x, - input_markers = TRUE + split(pbmc_markers$gene, pbmc_markers$cluster), + metric = x, + input_markers = TRUE ) } ) expect_equal(1, length(results)) }) - diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R index 8bcbe9b4..1bd54baa 100644 --- a/tests/testthat/test_utils.R +++ b/tests/testthat/test_utils.R @@ -111,32 +111,32 @@ test_that("average_clusters works with median option", { test_that("average_clusters works with trimean option", { pbmc_avg2 <- average_clusters(pbmc_matrix_small, - pbmc_meta, - method = "trimean", - cluster_col = "classified" + pbmc_meta, + method = "trimean", + cluster_col = "classified" ) expect_equal(nrow(pbmc_avg2), 2000) }) test_that("average_clusters works with truncate option", { pbmc_avg2 <- average_clusters(pbmc_matrix_small, - pbmc_meta, - method = "truncate", - cluster_col = "classified" + pbmc_meta, + method = "truncate", + cluster_col = "classified" ) expect_equal(nrow(pbmc_avg2), 2000) }) test_that("average_clusters works with max and min options", { pbmc_avg2 <- average_clusters(pbmc_matrix_small, - pbmc_meta, - method = "max", - cluster_col = "classified" + pbmc_meta, + method = "max", + cluster_col = "classified" ) pbmc_avg3 <- average_clusters(pbmc_matrix_small, - pbmc_meta, - method = "min", - cluster_col = "classified" + pbmc_meta, + method = "min", + cluster_col = "classified" ) expect_equal(nrow(pbmc_avg2), 2000) }) @@ -366,7 +366,6 @@ test_that("gene_pct_markerm norm options work", { so <- so_pbmc() sce <- sce_pbmc() test_that("clustify_nudge works with seurat_out", { - res <- clustify_nudge( input = so, ref_mat = cbmc_ref, @@ -409,7 +408,7 @@ test_that("clustify_nudge.Seurat works with seurat_out option", { dr = "umap" ) expect_true(is(res, "Seurat")) - + res <- clustify_nudge( input = so, ref_mat = cbmc_ref, @@ -425,7 +424,6 @@ test_that("clustify_nudge.Seurat works with seurat_out option", { }) test_that("clustify_nudge works with obj_out option", { - res <- clustify_nudge( input = so, ref_mat = cbmc_ref, @@ -437,7 +435,7 @@ test_that("clustify_nudge works with obj_out option", { mode = "pct", dr = "umap" ) - + expect_true(is(res, "Seurat")) res2 <- clustify_nudge( @@ -604,14 +602,16 @@ test_that("object_ref with SingleCellExperiment", { avg <- object_ref(sce, cluster_col = "clusters" ) - expect_equal(dim(avg), - c(nrow(sce), length(unique(sce$clusters)))) + expect_equal( + dim(avg), + c(nrow(sce), length(unique(sce$clusters))) + ) }) test_that("object_ref gets correct averages", { avg <- object_ref(so, - cluster_col = "seurat_clusters", + cluster_col = "seurat_clusters", var_genes_only = TRUE ) expect_true(ncol(avg) == length(unique(so$seurat_clusters))) @@ -628,7 +628,7 @@ test_that("seurat_ref gets correct averages with Seurat object", { rna_assay <- tmp[["RNA"]] Key(rna_assay) <- "rna2_" tmp[["RNA2"]] <- rna_assay - + avg2 <- seurat_ref( tmp, cluster_col = "seurat_clusters", @@ -639,9 +639,8 @@ test_that("seurat_ref gets correct averages with Seurat object", { }) test_that("object parsing works for custom object", { - res2 <- clustify(so, - cbmc_ref, + cbmc_ref, cluster_col = "seurat_clusters", obj_out = FALSE ) @@ -703,7 +702,6 @@ test_that("cor_to_call renaming with suffix input works as intended, per_cell or }) test_that("renaming with suffix input works as intended with clusify wrapper", { - res <- clustify( input = so, ref_mat = cbmc_ref, @@ -785,7 +783,7 @@ test_that("clustify_nudge works with pos_neg_select", { cluster_col = "classified", norm = 0.5 ) - expect_true(all(dim(res) == c(length(unique(so$classified)), 3))) + expect_true(all(dim(res) == c(length(unique(so$classified)), 3))) }) test_that("reverse_marker_matrix takes matrix of markers input", { @@ -1071,7 +1069,7 @@ test_that("find_rank_bias and query_rank_bias run correctly", { "CD14+ Mono", "CD14+ Mono" ) - expect_true(all(dim(qres) == c(599,2))) + expect_true(all(dim(qres) == c(599, 2))) }) @@ -1149,18 +1147,18 @@ test_that("object_data respects DefaultAssay in seurat object", { test_that("append_genes pads matrix with supplied genes", { mat <- append_genes( - gene_vector = human_genes_10x, - ref_matrix = cbmc_ref + gene_vector = human_genes_10x, + ref_matrix = cbmc_ref ) - + expect_equal(nrow(mat), length(human_genes_10x)) - + mat <- append_genes( - gene_vector = human_genes_10x, - ref_matrix = pbmc_matrix_small + gene_vector = human_genes_10x, + ref_matrix = pbmc_matrix_small ) expect_equal(nrow(mat), length(human_genes_10x)) - + og_mat <- get_seurat_matrix(so) mat <- append_genes( gene_vector = human_genes_10x, @@ -1170,25 +1168,25 @@ test_that("append_genes pads matrix with supplied genes", { }) test_that("check data type of matrices", { - mat_type <- check_raw_counts( - counts_matrix = pbmc_matrix_small, - max_log_value = 50 - ) - expect_equal(mat_type, "log-normalized") - - m <- matrix(sample(10:200, 100, replace = TRUE)) - mat_type <- check_raw_counts( - counts_matrix = m, - max_log_value = 50 - ) - expect_equal(mat_type, "raw counts") - - og_mat <- get_seurat_matrix(so) - mat_type <- check_raw_counts( - counts_matrix = og_mat, - max_log_value = 50 - ) - expect_true(mat_type == "log-normalized") + mat_type <- check_raw_counts( + counts_matrix = pbmc_matrix_small, + max_log_value = 50 + ) + expect_equal(mat_type, "log-normalized") + + m <- matrix(sample(10:200, 100, replace = TRUE)) + mat_type <- check_raw_counts( + counts_matrix = m, + max_log_value = 50 + ) + expect_equal(mat_type, "raw counts") + + og_mat <- get_seurat_matrix(so) + mat_type <- check_raw_counts( + counts_matrix = og_mat, + max_log_value = 50 + ) + expect_true(mat_type == "log-normalized") }) test_that("check atlas successfully built", { @@ -1207,82 +1205,81 @@ test_that("make_comb_ref works as intended", { ref <- make_comb_ref( cbmc_ref, sep = "_+_" - ) + ) expect_true(nrow(ref) == 2000 && ncol(ref) == 91) }) test_that("calc_distance works as intended", { res <- calc_distance( - SeuratObject::Embeddings(so, "umap"), + SeuratObject::Embeddings(so, "umap"), so$seurat_clusters, collapse_to_cluster = T - ) + ) n_grps <- length(unique(so$seurat_clusters)) ex_dim <- c(n_grps, n_grps) expect_equal(dim(res), ex_dim) }) test_that("vec_out option works for clustify", { - if(is_seurat_v5()) { - mat <- SeuratObject::LayerData(so, "data") - } else{ - mat <- SeuratObject::GetAssayData(so, "data") - } - + if (is_seurat_v5()) { + mat <- SeuratObject::LayerData(so, "data") + } else { + mat <- SeuratObject::GetAssayData(so, "data") + } + res <- clustify(mat, - metadata = so@meta.data, - ref_mat = cbmc_ref, - cluster_col = "seurat_clusters", - vec_out = TRUE + metadata = so@meta.data, + ref_mat = cbmc_ref, + cluster_col = "seurat_clusters", + vec_out = TRUE ) - + res2 <- clustify(so, - ref_mat = cbmc_ref, - cluster_col = "seurat_clusters", - rename_prefix = "abc", - vec_out = TRUE + ref_mat = cbmc_ref, + cluster_col = "seurat_clusters", + rename_prefix = "abc", + vec_out = TRUE ) - + res3 <- clustify(sce, - cbmc_ref, - cluster_col = "clusters", - vec_out = TRUE + cbmc_ref, + cluster_col = "clusters", + vec_out = TRUE ) - + expect_equal(length(res), ncol(mat)) expect_equal(length(res2), ncol(so)) expect_equal(length(res3), ncol(sce)) }) test_that("vec_out option works for clustify_lists", { - if(is_seurat_v5()) { - mat <- SeuratObject::LayerData(so, "data") - } else{ - mat <- SeuratObject::GetAssayData(so, "data") + if (is_seurat_v5()) { + mat <- SeuratObject::LayerData(so, "data") + } else { + mat <- SeuratObject::GetAssayData(so, "data") } res <- clustify_lists(mat, - metadata = so@meta.data, - marker = cbmc_m, - cluster_col = "seurat_clusters", - vec_out = TRUE + metadata = so@meta.data, + marker = cbmc_m, + cluster_col = "seurat_clusters", + vec_out = TRUE ) - + res2 <- clustify_lists(so, - marker = cbmc_m, - cluster_col = "seurat_clusters", - rename_prefix = "abc", - vec_out = TRUE + marker = cbmc_m, + cluster_col = "seurat_clusters", + rename_prefix = "abc", + vec_out = TRUE ) - + res3 <- clustify_lists(sce, - marker = cbmc_m, - cluster_col = "clusters", - vec_out = TRUE + marker = cbmc_m, + cluster_col = "clusters", + vec_out = TRUE ) - + expect_equal(length(res), ncol(mat)) expect_equal(length(res2), ncol(so)) expect_equal(length(res3), ncol(sce)) - }) diff --git a/vignettes/clustifyr.Rmd b/vignettes/clustifyr.Rmd index 559b8438..99188483 100644 --- a/vignettes/clustifyr.Rmd +++ b/vignettes/clustifyr.Rmd @@ -31,12 +31,12 @@ vignette: > ```{r "knitr options", echo = FALSE, message = FALSE, warning = FALSE} knitr::opts_chunk$set( - message = FALSE, - warning = FALSE, - collapse = TRUE, - fig.align = "center", - comment = "#>", - crop = NULL + message = FALSE, + warning = FALSE, + collapse = TRUE, + fig.align = "center", + comment = "#>", + crop = NULL ) ``` @@ -89,11 +89,11 @@ When using a matrix of scRNA-seq counts `clustifyr()` will return a matrix of co vargenes <- pbmc_vargenes[1:500] res <- clustify( - input = pbmc_matrix, # matrix of normalized scRNA-seq counts (or SCE/Seurat object) - metadata = pbmc_meta, # meta.data table containing cell clusters - cluster_col = "seurat_clusters", # name of column in meta.data containing cell clusters - ref_mat = cbmc_ref, # matrix of RNA-seq expression data for each cell type - query_genes = vargenes # list of highly varible genes identified with Seurat + input = pbmc_matrix, # matrix of normalized scRNA-seq counts (or SCE/Seurat object) + metadata = pbmc_meta, # meta.data table containing cell clusters + cluster_col = "seurat_clusters", # name of column in meta.data containing cell clusters + ref_mat = cbmc_ref, # matrix of RNA-seq expression data for each cell type + query_genes = vargenes # list of highly varible genes identified with Seurat ) # Peek at correlation matrix @@ -101,16 +101,16 @@ res[1:5, 1:5] # Call cell types res2 <- cor_to_call( - cor_mat = res, # matrix correlation coefficients - cluster_col = "seurat_clusters" # name of column in meta.data containing cell clusters + cor_mat = res, # matrix correlation coefficients + cluster_col = "seurat_clusters" # name of column in meta.data containing cell clusters ) res2[1:5, ] # Insert into original metadata as "type" column pbmc_meta2 <- call_to_metadata( - res = res2, # data.frame of called cell type for each cluster - metadata = pbmc_meta, # original meta.data table containing cell clusters - cluster_col = "seurat_clusters" # name of column in meta.data containing cell clusters + res = res2, # data.frame of called cell type for each cluster + metadata = pbmc_meta, # original meta.data table containing cell clusters + cluster_col = "seurat_clusters" # name of column in meta.data containing cell clusters ) ``` @@ -128,15 +128,15 @@ plot_cor_heatmap(cor_mat = res) ```{r "Overlay corr coefficients on UMAP", fig.height = 3.5, fig.width = 9} # Overlay correlation coefficients on UMAPs for the first two cell types corr_umaps <- plot_cor( - cor_mat = res, # matrix of correlation coefficients from clustifyr() - metadata = pbmc_meta, # meta.data table containing UMAP or tSNE data - data_to_plot = colnames(res)[1:2], # name of cell type(s) to plot correlation coefficients - cluster_col = "seurat_clusters" # name of column in meta.data containing cell clusters + cor_mat = res, # matrix of correlation coefficients from clustifyr() + metadata = pbmc_meta, # meta.data table containing UMAP or tSNE data + data_to_plot = colnames(res)[1:2], # name of cell type(s) to plot correlation coefficients + cluster_col = "seurat_clusters" # name of column in meta.data containing cell clusters ) plot_grid( - plotlist = corr_umaps, - rel_widths = c(0.47, 0.53) + plotlist = corr_umaps, + rel_widths = c(0.47, 0.53) ) ``` @@ -147,24 +147,24 @@ The `plot_best_call()` function can be used to label each cluster with the cell ```{r "Label clusters", fig.height = 5.5, fig.width = 12} # Label clusters with clustifyr cell identities clustifyr_types <- plot_best_call( - cor_mat = res, # matrix of correlation coefficients from clustifyr() - metadata = pbmc_meta, # meta.data table containing UMAP or tSNE data - do_label = TRUE, # should the feature label be shown on each cluster? - do_legend = FALSE, # should the legend be shown? - do_repel = FALSE, # use ggrepel to avoid overlapping labels - cluster_col = "seurat_clusters" + cor_mat = res, # matrix of correlation coefficients from clustifyr() + metadata = pbmc_meta, # meta.data table containing UMAP or tSNE data + do_label = TRUE, # should the feature label be shown on each cluster? + do_legend = FALSE, # should the legend be shown? + do_repel = FALSE, # use ggrepel to avoid overlapping labels + cluster_col = "seurat_clusters" ) + - ggtitle("clustifyr cell types") + ggtitle("clustifyr cell types") # Compare clustifyr results with known cell identities known_types <- plot_dims( - data = pbmc_meta, # meta.data table containing UMAP or tSNE data - feature = "classified", # name of column in meta.data to color clusters by - do_label = TRUE, # should the feature label be shown on each cluster? - do_legend = FALSE, # should the legend be shown? - do_repel = FALSE + data = pbmc_meta, # meta.data table containing UMAP or tSNE data + feature = "classified", # name of column in meta.data to color clusters by + do_label = TRUE, # should the feature label be shown on each cluster? + do_legend = FALSE, # should the legend be shown? + do_repel = FALSE ) + - ggtitle("Known cell types") + ggtitle("Known cell types") plot_grid(known_types, clustifyr_types) ``` @@ -179,34 +179,34 @@ cbmc_m # Available metrics include: "hyper", "jaccard", "spearman", "gsea" list_res <- clustify_lists( - input = pbmc_matrix, # matrix of normalized single-cell RNA-seq counts - metadata = pbmc_meta, # meta.data table containing cell clusters - cluster_col = "seurat_clusters", # name of column in meta.data containing cell clusters - marker = cbmc_m, # list of known marker genes - metric = "pct" # test to use for assigning cell types + input = pbmc_matrix, # matrix of normalized single-cell RNA-seq counts + metadata = pbmc_meta, # meta.data table containing cell clusters + cluster_col = "seurat_clusters", # name of column in meta.data containing cell clusters + marker = cbmc_m, # list of known marker genes + metric = "pct" # test to use for assigning cell types ) # View as heatmap, or plot_best_call plot_cor_heatmap( - cor_mat = list_res, # matrix of correlation coefficients from clustify_lists() - cluster_rows = FALSE, # cluster by row? - cluster_columns = FALSE, # cluster by column? - legend_title = "% expressed" # title of heatmap legend + cor_mat = list_res, # matrix of correlation coefficients from clustify_lists() + cluster_rows = FALSE, # cluster by row? + cluster_columns = FALSE, # cluster by column? + legend_title = "% expressed" # title of heatmap legend ) # Downstream functions same as clustify() # Call cell types list_res2 <- cor_to_call( - cor_mat = list_res, # matrix correlation coefficients - cluster_col = "seurat_clusters" # name of column in meta.data containing cell clusters + cor_mat = list_res, # matrix correlation coefficients + cluster_col = "seurat_clusters" # name of column in meta.data containing cell clusters ) # Insert into original metadata as "list_type" column pbmc_meta3 <- call_to_metadata( - res = list_res2, # data.frame of called cell type for each cluster - metadata = pbmc_meta, # original meta.data table containing cell clusters - cluster_col = "seurat_clusters", # name of column in meta.data containing cell clusters - rename_prefix = "list_" # set a prefix for the new column + res = list_res2, # data.frame of called cell type for each cluster + metadata = pbmc_meta, # original meta.data table containing cell clusters + cluster_col = "seurat_clusters", # name of column in meta.data containing cell clusters + rename_prefix = "list_" # set a prefix for the new column ) ``` @@ -217,10 +217,10 @@ pbmc_meta3 <- call_to_metadata( library(SingleCellExperiment) sce <- sce_pbmc() res <- clustify( - input = sce, # an SCE object - ref_mat = cbmc_ref, # matrix of RNA-seq expression data for each cell type - cluster_col = "clusters", # name of column in meta.data containing cell clusters - obj_out = TRUE # output SCE object with cell type inserted as "type" column + input = sce, # an SCE object + ref_mat = cbmc_ref, # matrix of RNA-seq expression data for each cell type + cluster_col = "clusters", # name of column in meta.data containing cell clusters + obj_out = TRUE # output SCE object with cell type inserted as "type" column ) colData(res)[1:10, c("type", "r")] @@ -233,13 +233,14 @@ colData(res)[1:10, c("type", "r")] ```{r} so <- so_pbmc() res <- clustify( - input = so, # a Seurat object - ref_mat = cbmc_ref, # matrix of RNA-seq expression data for each cell type - cluster_col = "seurat_clusters", # name of column in meta.data containing cell clusters - obj_out = TRUE # output Seurat object with cell type inserted as "type" column + input = so, # a Seurat object + ref_mat = cbmc_ref, # matrix of RNA-seq expression data for each cell type + cluster_col = "seurat_clusters", # name of column in meta.data containing cell clusters + obj_out = TRUE # output Seurat object with cell type inserted as "type" column ) -res@meta.data[1:10, c("type", "r")] +# type and r are stored in the meta.data +res[[c("type", "r")]][1:10, ] ``` # Building reference matrix from single cell expression matrix @@ -248,22 +249,22 @@ In its simplest form, a reference matrix is built by averaging expression (also ```{r "average"} new_ref_matrix <- average_clusters( - mat = pbmc_matrix, - metadata = pbmc_meta$classified, # or use metadata = pbmc_meta, cluster_col = "classified" - if_log = TRUE # whether the expression matrix is already log transformed + mat = pbmc_matrix, + metadata = pbmc_meta$classified, # or use metadata = pbmc_meta, cluster_col = "classified" + if_log = TRUE # whether the expression matrix is already log transformed ) head(new_ref_matrix) # For further convenience, a shortcut function for generating reference matrix from `SingleCellExperiment` or `seurat` object is used. new_ref_matrix_sce <- object_ref( - input = sce, # SCE object - cluster_col = "clusters" # name of column in colData containing cell identities + input = sce, # SCE object + cluster_col = "clusters" # name of column in colData containing cell identities ) new_ref_matrix_so <- seurat_ref( - seurat_object = so, # Seurat object - cluster_col = "seurat_clusters" # name of column in meta.data containing cell identities + seurat_object = so, # Seurat object + cluster_col = "seurat_clusters" # name of column in meta.data containing cell identities ) tail(new_ref_matrix_so) diff --git a/vignettes/geo-annotations.Rmd b/vignettes/geo-annotations.Rmd index b49e7a8d..4231b3a0 100644 --- a/vignettes/geo-annotations.Rmd +++ b/vignettes/geo-annotations.Rmd @@ -9,8 +9,8 @@ vignette: > ```{r, include = FALSE} knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" + collapse = TRUE, + comment = "#>" ) ``` @@ -42,3 +42,9 @@ Moving forward, we suggest two remedies to this issue: 2. For single-cell mRNA-seq data, in addition to standard count matrices (genes-by-cells), we expect users to deposit metadata annotations generated during the course of analysis. Encouraging previous depositors of single-cell mRNA-seq data to update their GEO records with metadata, if it was not included in the original submission. + +# Session info + +```{r} +sessionInfo() +```