diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index f43bb1c54d..2cea4d61dd 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -29,7 +29,7 @@ jobs: R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-pandoc@v2 diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION index 10bc73b83a..f8660de8ee 100644 --- a/CRAN-SUBMISSION +++ b/CRAN-SUBMISSION @@ -1,3 +1,3 @@ -Version: 2.0.1 -Date: 2023-11-17 15:28:31 UTC -SHA: 2bf5f1166ee097878adef4e10d1a3923db44557b +Version: 2.1.0 +Date: 2024-02-23 19:16:02 UTC +SHA: 0d1bf8181d07c1754128b7edd9ae698c2d6cb22e diff --git a/DESCRIPTION b/DESCRIPTION index a9f6ac5c5c..ab1d47602c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,8 +2,8 @@ Package: scCustomize Type: Package Title: Custom Visualizations & Functions for Streamlined Analyses of Single Cell Sequencing Description: Collection of functions created and/or curated to aid in the visualization and analysis of single-cell data using 'R'. 'scCustomize' aims to provide 1) Customized visualizations for aid in ease of use and to create more aesthetic and functional visuals. 2) Improve speed/reproducibility of common tasks/pieces of code in scRNA-seq analysis with a single or group of functions. For citation please use: Marsh SE (2021) "Custom Visualizations & Functions for Streamlined Analyses of Single Cell Sequencing" RRID:SCR_024675. -Version: 2.0.1 -Date: 2023-11-17 +Version: 2.1.0 +Date: 2024-02-21 Authors@R: c( person(given = "Samuel", family = "Marsh", email = "samuel.marsh@childrens.harvard.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-3012-6945")), person(given = "Ming", family = "Tang", role = c("ctb"), email = "tangming2005@gmail.com"), @@ -38,7 +38,7 @@ Imports: patchwork, pbapply, purrr, - rlang (>= 1.0.1), + rlang (>= 1.1.3), scales, scattermore (>= 1.2), SeuratObject (>= 5.0.0), @@ -48,6 +48,7 @@ Imports: tibble, tidyr Suggests: + BiocFileCache, ComplexHeatmap, dittoSeq, DropletUtils, @@ -56,6 +57,7 @@ Suggests: knitr, Nebulosa, remotes, + reticulate, rliger, rmarkdown, scuttle, @@ -66,4 +68,4 @@ License: GPL (>= 3) Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 diff --git a/NAMESPACE b/NAMESPACE index 443a865739..7506780eb4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,11 +1,23 @@ # Generated by roxygen2: do not edit by hand +S3method(Add_Cell_Complexity,Seurat) +S3method(Add_Cell_Complexity,liger) +S3method(Add_Mito_Ribo,Seurat) +S3method(Add_Mito_Ribo,liger) S3method(Fetch_Meta,Seurat) S3method(Fetch_Meta,liger) +S3method(as.LIGER,Seurat) +S3method(as.LIGER,list) +S3method(as.Seurat,liger) +S3method(as.anndata,Seurat) +S3method(as.anndata,liger) +export(Add_Alt_Feature_ID) export(Add_CellBender_Diff) +export(Add_Cell_Complexity) export(Add_Cell_Complexity_LIGER) export(Add_Cell_Complexity_Seurat) export(Add_Cell_QC_Metrics) +export(Add_Mito_Ribo) export(Add_Mito_Ribo_LIGER) export(Add_Mito_Ribo_Seurat) export(Add_Pct_Diff) @@ -25,6 +37,7 @@ export(Cluster_Highlight_Plot) export(Cluster_Stats_All_Samples) export(Clustered_DotPlot) export(ColorBlind_Pal) +export(Convert_Assay) export(Copy_From_GCP) export(Copy_To_GCP) export(Create_10X_H5) @@ -42,6 +55,7 @@ export(Extract_Top_Markers) export(FeaturePlot_DualAssay) export(FeaturePlot_scCustom) export(FeatureScatter_scCustom) +export(Feature_Present) export(Fetch_Meta) export(Gene_Present) export(Hue_Pal) @@ -55,6 +69,7 @@ export(Iterate_Plot_Density_Custom) export(Iterate_Plot_Density_Joint) export(Iterate_VlnPlot_scCustom) export(JCO_Four) +export(LIGER_Features) export(Liger_to_Seurat) export(MAD_Stats) export(Median_Stats) @@ -120,14 +135,20 @@ export(Seq_QC_Plot_UMIs) export(Setup_scRNAseq_Project) export(Single_Color_Palette) export(Split_FeatureScatter) +export(Split_Layers) +export(Split_Vector) export(Stacked_VlnPlot) export(Store_Misc_Info_Seurat) export(Store_Palette_Seurat) export(Top_Genes_Factor) export(UnRotate_X) +export(Updated_HGNC_Symbols) export(VariableFeaturePlot_scCustom) export(Variable_Features_ALL_LIGER) export(VlnPlot_scCustom) +export(as.LIGER) +export(as.Seurat) +export(as.anndata) export(plotFactors_scCustom) export(scCustomize_Palette) export(theme_ggprism_mod) @@ -147,7 +168,6 @@ import(ggrastr) import(parallel) import(patchwork) import(pbapply) -import(rlang) importFrom(Matrix,readMM) importFrom(Matrix,rowSums) importFrom(Seurat,AddMetaData) @@ -167,6 +187,7 @@ importFrom(SeuratObject,Features) importFrom(SeuratObject,JoinLayers) importFrom(SeuratObject,LayerData) importFrom(SeuratObject,Layers) +importFrom(SeuratObject,as.Seurat) importFrom(circlize,colorRamp2) importFrom(cowplot,theme_cowplot) importFrom(data.table,fread) @@ -175,11 +196,13 @@ importFrom(dplyr,all_of) importFrom(dplyr,any_of) importFrom(dplyr,arrange) importFrom(dplyr,bind_rows) +importFrom(dplyr,contains) importFrom(dplyr,desc) importFrom(dplyr,filter) importFrom(dplyr,group_by) importFrom(dplyr,grouped_df) importFrom(dplyr,intersect) +importFrom(dplyr,join_by) importFrom(dplyr,left_join) importFrom(dplyr,mutate) importFrom(dplyr,n) @@ -191,6 +214,7 @@ importFrom(dplyr,slice) importFrom(dplyr,slice_max) importFrom(dplyr,summarise) importFrom(dplyr,summarize) +importFrom(dplyr,union) importFrom(forcats,fct_relevel) importFrom(ggbeeswarm,geom_quasirandom) importFrom(ggplot2,theme) @@ -198,6 +222,7 @@ importFrom(ggprism,theme_prism) importFrom(ggrepel,geom_label_repel) importFrom(ggrepel,geom_text_repel) importFrom(glue,glue_collapse) +importFrom(grDevices,col2rgb) importFrom(grDevices,dev.off) importFrom(grDevices,pdf) importFrom(grDevices,rainbow) @@ -224,12 +249,16 @@ importFrom(purrr,map) importFrom(purrr,map2) importFrom(purrr,map_dbl) importFrom(purrr,reduce) +importFrom(rlang,"!!") +importFrom(rlang,"%||%") +importFrom(rlang,":=") importFrom(rlang,is_installed) importFrom(rlang,sym) importFrom(scales,alpha) importFrom(scales,hue_pal) importFrom(scales,label_percent) importFrom(scattermore,geom_scattermore) +importFrom(stats,complete.cases) importFrom(stats,cor) importFrom(stats,kmeans) importFrom(stats,mad) @@ -239,8 +268,12 @@ importFrom(stats,setNames) importFrom(stats,var) importFrom(stringi,stri_replace_first_fixed) importFrom(stringi,stri_replace_last_fixed) +importFrom(stringr,str_c) importFrom(stringr,str_detect) importFrom(stringr,str_extract) +importFrom(stringr,str_replace) +importFrom(stringr,str_replace_na) +importFrom(stringr,str_to_lower) importFrom(stringr,str_to_sentence) importFrom(stringr,str_to_upper) importFrom(stringr,str_wrap) @@ -248,7 +281,10 @@ importFrom(tibble,column_to_rownames) importFrom(tibble,rownames_to_column) importFrom(tibble,tribble) importFrom(tidyr,drop_na) +importFrom(tidyr,everything) +importFrom(tidyr,pivot_longer) importFrom(tidyr,pivot_wider) +importFrom(tidyr,separate_wider_delim) importFrom(utils,combn) importFrom(utils,packageVersion) importFrom(utils,read.csv) diff --git a/NEWS.md b/NEWS.md index b3e522a89b..adcbdb7e90 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,55 @@ +# scCustomize 2.1.0 (2024-02-21) +## Added +- Added `title_prefix` parameter to `Iterate_DimPlot_bySample` to unify with `Meta_Highlight_Plot`. +- Added function `Split_Vector` to split vector in chunks of predetermined sizes. +- Added new function `Updated_HGNC_Symbols` to update human gene symbols. After first use does not require internet connection. +- Added command logging to QC metric-related commands using `Seurat::LogSeuratCommand()`. +- Added parameter `plot_legend` to `plotFactors_scCustom` to allow for removal to legend in factor loading plots. +- Added new functionality to `Iterate_FeaturePlot_scCustom` to allow for plotting multiple plots per page when saving to single PDF document (see new parameters: `features_per_page` and `landscape`. +- Added `LIGER_Features` utility function for LIGER objects (analogous to `Seurat::Features()`). +- Added new generic `as.LIGER()` as enhanced method for conversion of Seurat objects or lists of Seurat objects to single LIGER object. +- Added new generic `as.anndata()` to support conversion of Seurat and LIGER objects to anndata format. +- Added function `Convert_Assay()` to perform easy conversions of Seurat assays between V3/4 (Assay) and V5 (Assay5) formats. +- Added parameter `assay_type` to allow manual control of assay type when creating Seurat object from LIGER using `Liger_to_Seurat`. Now `as.Seurat`. +- Added param `grid_color` to `Clustered_DotPlot` to control the color of grid lines (default is no grid lines). +- Added ability to split identities in `Clustered_DotPlot` by additional variable and maintain expression information. +- Added `Split_Layers()` function for V5 objects. +- Added `Add_Alt_Feature_ID` to add alternative feature ids to an Assay5 meta.data slot. + + + +## Changed +- **BREAKING CHANGES** Several methods have been moved to S3 generics to function for both Seurat and LIGER objects using single function name and therefore some function names have changed. Old functions will give deprecation warning and direct users to new functions. + - `Add_Mito_Ribo()` replaces `Add_Mito_Ribo_Seurat` and `Add_Mito_Ribo_LIGER`. + - `Add_Cell_Complexity()` replaces `Add_Cell_Complexity_Seurat` and `Add_Cell_Complexity_LIGER`. +- **BREAKING CHANGES** `Meta_Present_LIGER` has been deprecated and wrapped inside of `Meta_Present`. +- **SOFT-DEPRECATION** The function `Liger_to_Seurat()` has been soft-deprecated. It is replaced by new extension of Seurat generic `as.Seurat` with added support for Liger objects, using all the same parameters as `Liger_to_Seurat`. Full deprecation will occur in v2.2.0. +- **SOFT-DEPRECATION** The function `Gene_Present` has been soft-deprecated. It is replaced by `Feature_Present` which functions identically but better reflects that features present may also be proteins. Full deprecation will occur in v2.2.0. +- Parameter `legend` in `Iterate_DimPlot_bySample` has been inverted to `no_legend` to match `Meta_Highlight_Plot` parameters. +- Updated `Liger_to_Seurat()` for compatibility with Seurat V5 structure ([#153](https://github.com/samuel-marsh/scCustomize/issues/153)). Now part of `as.Seurat`. +- Default color palette change from v2.0.0 when number of groups is between 3-8 has been reverted. Polychrome palette is default when number of groups is between 3-36. +- In preparation of upcoming overhaul of rliger package, added package version checks to current rliger functions in order to prevent breaking errors. Next update v2.2.0 will add cross-functionality between rliger package versions ([#161](https://github.com/samuel-marsh/scCustomize/issues/161)). + + +## Fixes +- General typo and style fixes. +- Fixed point size check in some QC functions to avoid unnecessary error message. +- Fixed redundant warning messages in `Stacked_VlnPlot` due to rasterization defaults. +- Fixed issue setting `alpha_na_exp` appropriately in `FeaturePlot_scCustom`. +- Fixed issue setting `alpha_exp` between Seurat versions 4 and 5 ([#144](https://github.com/samuel-marsh/scCustomize/issues/144)). +- Fix duplicate legends in `DimPlot_scCustom` when levels are missing from a split plot. +- Fixed bug in `FeaturePlot_scCustom` that could cause plots to be mislabeled when using `split.by` and depending on the order of features provided ([#150](https://github.com/samuel-marsh/scCustomize/issues/150)). +- Fixes issue with automatic point size calculation for Seurat Objects. +- Added check for presence of dimensionality reduction in `DimPlot_LIGER` ([#153](https://github.com/samuel-marsh/scCustomize/issues/153)). +- Fixed bug in `Add_Mito_Ribo_LIGER` that caused it to return value of 0 for all cells (Now part of renamed `Add_Mito_Ribo` S3 generic). +- Fixed legend display is `Clustered_DotPlot` to display percentage instead of proportion to match legend text. +- Fixed `Percent_Expressing` error when `group_by = "ident"`. +- Fixed error that caused features in non-default assays to be returned as not found when attempting to plot. +- Fixed error in `DotPlot_scCustom` that didn't correctly pass `group.by` when plotting ([#158](https://github.com/samuel-marsh/scCustomize/issues/158)). + + + + # scCustomize 2.0.1 (2023-11-17) ## Added - None. diff --git a/R/Color_Palettes.R b/R/Color_Palettes.R index 52d637933a..b301d986f7 100644 --- a/R/Color_Palettes.R +++ b/R/Color_Palettes.R @@ -484,7 +484,6 @@ DiscretePalette_scCustomize <- function( #' @param num_groups number of groups to be plotted. If `ggplot_default_colors = FALSE` then by default: #' \itemize{ #' \item If number of levels plotted equal to 2 then colors will be `NavyAndOrange()`. -#' \item If number of levels plotted greater than 2 but less than or equal to 8 it will use `ColorBlind_Pal()`. #' \item If number of levels plotted greater than 2 but less than or equal to 36 it will use "polychrome" from `DiscretePalette_scCustomize()`. #' \item If greater than 36 will use "varibow" with shuffle = TRUE from `DiscretePalette_scCustomize`. #' } @@ -517,10 +516,7 @@ scCustomize_Palette <- function( if (num_groups == 2) { colors_use <- NavyAndOrange() } - if (num_groups > 2 && num_groups <= 8) { - colors_use <- ColorBlind_Pal() - } - if (num_groups > 8 && num_groups <= 36) { + if (num_groups > 2 && num_groups <= 36) { colors_use <- DiscretePalette_scCustomize(num_colors = num_groups, palette = "polychrome") } if (num_groups > 36) { diff --git a/R/Deprecated.R b/R/Deprecated.R new file mode 100644 index 0000000000..47e881b8bd --- /dev/null +++ b/R/Deprecated.R @@ -0,0 +1,77 @@ +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#################### DEPRECATED FUNCTIONS #################### +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' Deprecated functions `r lifecycle::badge("deprecated")` +#' +#' +#' @description +#' Use [FeatureScatter_scCustom()] instead of `Split_FeatureScatter()`. +#' +#' @export +#' @keywords internal +#' @rdname deprecated + +Split_FeatureScatter <- function(...) { + lifecycle::deprecate_stop(when = "2.0.0", what = "Split_FeatureScatter()", with = "FeatureScatter_scCustom()") +} + + +#' @description +#' Use [Add_Mito_Ribo()] instead of `Add_Mito_Ribo_Seurat()`. +#' +#' @export +#' @keywords internal +#' @name deprecated + +Add_Mito_Ribo_Seurat <- function(...) { + lifecycle::deprecate_stop(when = "2.1.0", what = "Add_Mito_Ribo_Seurat()", with = "Add_Mito_Ribo()") +} + + +#' @description +#' Use [Add_Mito_Ribo()] instead of `Add_Mito_Ribo_LIGER()`. +#' +#' @export +#' @keywords internal +#' @rdname deprecated + +Add_Mito_Ribo_LIGER <- function(...) { + lifecycle::deprecate_stop(when = "2.1.0", what = "Add_Mito_Ribo_LIGER()", with = "Add_Mito_Ribo()") +} + + +#' @description +#' Use [Add_Cell_Complexity()] instead of `Add_Cell_Complexity_Seurat()`. +#' +#' @export +#' @keywords internal +#' @rdname deprecated + +Add_Cell_Complexity_Seurat <- function(...) { + lifecycle::deprecate_stop(when = "2.1.0", what = "Add_Cell_Complexity_Seurat()", with = "Add_Cell_Complexity()") +} + + +#' @description +#' Use [Add_Cell_Complexity()] instead of `Add_Cell_Complexity_LIGER()`. +#' +#' @export +#' @keywords internal +#' @rdname deprecated + +Add_Cell_Complexity_LIGER <- function(...) { + lifecycle::deprecate_stop(when = "2.1.0", what = "Add_Cell_Complexity_LIGER()", with = "Add_Cell_Complexity()") +} + + +#' @description +#' Use [Meta_Present()] instead of `Meta_Present_LIGER()`. +#' +#' @export +#' @keywords internal +#' @rdname deprecated + +Meta_Present_LIGER <- function(...) { + lifecycle::deprecate_stop(when = "2.1.0", what = "Meta_Present_LIGER()", with = "Meta_Present()") +} diff --git a/R/Generics.R b/R/Generics.R new file mode 100644 index 0000000000..c82fe753a4 --- /dev/null +++ b/R/Generics.R @@ -0,0 +1,75 @@ +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#################### GENERICS #################### +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' Convert objects to LIGER objects +#' +#' Convert objects (Seurat & lists of Seurat Objects) to anndata objects +#' +#' @param x An object to convert to class `liger` +#' @param ... Arguments passed to other methods +#' +#' @return a liger object generated from `x` +#' +#' @rdname as.LIGER +#' @export as.LIGER +#' + +as.LIGER <- function(x, ...) { + UseMethod(generic = 'as.LIGER', object = x) +} + + +#' Convert objects to anndata objects +#' +#' Convert objects (Seurat & LIGER) to anndata objects +#' +#' @param x Seurat or LIGER object +#' @param ... Arguments passed to other methods +#' +#' @return an anndata object generated from `x`, saved at path provided. +#' +#' @rdname as.anndata +#' @export as.anndata +#' + +as.anndata <- function(x, ...) { + UseMethod(generic = 'as.anndata', object = x) +} + + +#' Add Mito and Ribo percentages +#' +#' Add Mito, Ribo, & Mito+Ribo percentages to meta.data slot of Seurat Object or +#' cell.data slot of Liger object +#' +#' @param object Seurat or LIGER object +#' @param ... Arguments passed to other methods +#' +#' @return An object of the same class as `object` with columns added to object meta data. +#' +#' @rdname Add_Mito_Ribo +#' @export Add_Mito_Ribo +#' + +Add_Mito_Ribo <- function(object, ...) { + UseMethod(generic = 'Add_Mito_Ribo', object = object) +} + + +#' Add Cell Complexity +#' +#' Add measure of cell complexity/novelty (log10GenesPerUMI) for data QC. +#' +#' @param object Seurat or LIGER object +#' @param ... Arguments passed to other methods +#' +#' @return An object of the same class as `object` with columns added to object meta data. +#' +#' @rdname Add_Cell_Complexity +#' @export Add_Cell_Complexity +#' + +Add_Cell_Complexity <- function(object, ...) { + UseMethod(generic = 'Add_Cell_Complexity', object = object) +} diff --git a/R/Internal_Utilities.R b/R/Internal_Utilities.R index e50e756d41..9d49cf9f5d 100644 --- a/R/Internal_Utilities.R +++ b/R/Internal_Utilities.R @@ -252,12 +252,12 @@ glue_collapse_scCustom <- function( #' Perform Feature and Meta Checks before plotting #' -#' Wraps the `Gene_Present`, `Meta_Present`, `Reduction_Loading_Present`, and `Case_Check` into +#' Wraps the `Feature_Present`, `Meta_Present`, `Reduction_Loading_Present`, and `Case_Check` into #' single function to perform feature checks before plotting. #' #' @param object Seurat object #' @param features vector of features and/or meta data variables to plot. -#' @param assay Assay to use (default is the current object default assay). +#' @param assay Assay to use (default all assays present). #' #' @return vector of features and/or meta data that were found in object. #' @@ -272,12 +272,12 @@ Feature_PreCheck <- function( assay = NULL ) { # set assay (if null set to active assay) - assay <- assay %||% DefaultAssay(object = object) + assay <- assay %||% Assays(object = object) # Check features and meta to determine which features present - features_list <- Gene_Present(data = object, gene_list = features, omit_warn = FALSE, print_msg = FALSE, case_check_msg = FALSE, return_none = TRUE, seurat_assay = assay) + features_list <- Feature_Present(data = object, features = features, omit_warn = FALSE, print_msg = FALSE, case_check_msg = FALSE, return_none = TRUE, seurat_assay = assay) - meta_list <- Meta_Present(seurat_object = object, meta_col_names = features_list[[2]], omit_warn = FALSE, print_msg = FALSE, return_none = TRUE) + meta_list <- Meta_Present(object = object, meta_col_names = features_list[[2]], omit_warn = FALSE, print_msg = FALSE, return_none = TRUE) reduction_list <- Reduction_Loading_Present(seurat_object = object, reduction_names = meta_list[[2]], omit_warn = FALSE, print_msg = FALSE, return_none = TRUE) @@ -310,6 +310,33 @@ Feature_PreCheck <- function( } +#' Ask yes/no question to proceed +#' +#' Asks the user to answer yes/no question and returns logical value depending on +#' the answer. +#' +#' @return logical +#' +#' @references function modified from function in devtools R package (License: MIT) \url{https://github.com/r-lib/devtools}. +#' @details \url{https://github.com/r-lib/devtools/blob/9f27cc3e6335e74d6f51ed331509ebda56747901/R/release.R#L147-L156}. +#' +#' @import cli +#' +#' @noRd +#' + +yesno <- function(msg, .envir = parent.frame()) { + yeses <- c("Yes") + nos <- c("No") + + cli_inform(message = msg, .envir = .envir) + qs <- c("Yes", "No") + rand <- sample(length(qs)) + + utils::menu(qs[rand]) != which(rand == 1) +} + + #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #################### QC HELPERS #################### #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -523,20 +550,20 @@ Retrieve_Ensembl_Ribo <- function( } - #' Retrieve IEG Gene Lists - #' - #' Retrieves species specific IEG gene lists - #' - #' @param species species to retrieve IDs. - #' - #' @return list of 2 sets of gene_symbols - #' - #' @import cli - #' - #' @keywords internal - #' - #' @noRd - #' +#' Retrieve IEG Gene Lists +#' +#' Retrieves species specific IEG gene lists +#' +#' @param species species to retrieve IDs. +#' +#' @return list of 2 sets of gene_symbols +#' +#' @import cli +#' +#' @keywords internal +#' +#' @noRd +#' Retrieve_IEG_Lists <- function( species @@ -585,33 +612,33 @@ Retrieve_Ensembl_Ribo <- function( } - #' Add MSigDB Gene Lists Percentages - #' - #' Adds percentage of counts from 3 hallmark MSigDB hallmark gene sets: "HALLMARK_OXIDATIVE_PHOSPHORYLATION", - #' "HALLMARK_APOPTOSIS", and "HALLMARK_DNA_REPAIR". - #' - #' @param seurat_object object name. - #' @param species Species of origin for given Seurat Object. Only accepted species are: mouse, human, - #' zebrafish, rat, drosophila, or rhesus macaque (name or abbreviation) - #' @param oxphos_name name to use for the new meta.data column containing percent MSigDB Hallmark oxidative - #' phosphorylation counts. Default is "percent_oxphos". - #' @param apop_name name to use for the new meta.data column containing percent MSigDB Hallmark apoptosis counts. - #' Default is "percent_apop". - #' @param dna_repair_name name to use for the new meta.data column containing percent MSigDB Hallmark DNA repair counts. - #' Default is "percent_oxphos". - #' @param assay Assay to use (default is the current object default assay). - #' @param overwrite Logical. Whether to overwrite existing meta.data columns. Default is FALSE meaning that - #' function will abort if columns with any one of the names provided to `mito_name` `ribo_name` or - #' `mito_ribo_name` is present in meta.data slot. - #' - #' @return Seurat object - #' - #' @import cli - #' - #' @keywords internal - #' - #' @noRd - #' +#' Add MSigDB Gene Lists Percentages +#' +#' Adds percentage of counts from 3 hallmark MSigDB hallmark gene sets: "HALLMARK_OXIDATIVE_PHOSPHORYLATION", +#' "HALLMARK_APOPTOSIS", and "HALLMARK_DNA_REPAIR". +#' +#' @param seurat_object object name. +#' @param species Species of origin for given Seurat Object. Only accepted species are: mouse, human, +#' zebrafish, rat, drosophila, or rhesus macaque (name or abbreviation) +#' @param oxphos_name name to use for the new meta.data column containing percent MSigDB Hallmark oxidative +#' phosphorylation counts. Default is "percent_oxphos". +#' @param apop_name name to use for the new meta.data column containing percent MSigDB Hallmark apoptosis counts. +#' Default is "percent_apop". +#' @param dna_repair_name name to use for the new meta.data column containing percent MSigDB Hallmark DNA repair counts. +#' Default is "percent_oxphos". +#' @param assay Assay to use (default is the current object default assay). +#' @param overwrite Logical. Whether to overwrite existing meta.data columns. Default is FALSE meaning that +#' function will abort if columns with any one of the names provided to `mito_name` `ribo_name` or +#' `mito_ribo_name` is present in meta.data slot. +#' +#' @return Seurat object +#' +#' @import cli +#' +#' @keywords internal +#' +#' @noRd +#' Add_MSigDB_Seurat <- function( @@ -679,31 +706,34 @@ Retrieve_Ensembl_Ribo <- function( seurat_object[[dna_repair_name]] <- PercentageFeatureSet(object = seurat_object, features = dna_repair_found, assay = assay) } + # Log Command + seurat_object <- LogSeuratCommand(object = seurat_object) + # return final object return(seurat_object) } - #' Add IEG Gene List Percentages - #' - #' Adds percentage of counts from IEG genes from mouse and human. - #' - #' @param seurat_object object name. - #' @param species Species of origin for given Seurat Object. Only accepted species are: mouse, human (name or abbreviation). - #' @param ieg_name name to use for the new meta.data column containing percent IEG gene counts. Default is "percent_ieg". - #' @param assay Assay to use (default is the current object default assay). - #' @param overwrite Logical. Whether to overwrite existing meta.data columns. Default is FALSE meaning that - #' function will abort if columns with the name provided to `ieg_name` is present in meta.data slot. - #' - #' @return Seurat object - #' - #' @import cli - #' - #' @keywords internal - #' - #' @noRd - #' +#' Add IEG Gene List Percentages +#' +#' Adds percentage of counts from IEG genes from mouse and human. +#' +#' @param seurat_object object name. +#' @param species Species of origin for given Seurat Object. Only accepted species are: mouse, human (name or abbreviation). +#' @param ieg_name name to use for the new meta.data column containing percent IEG gene counts. Default is "percent_ieg". +#' @param assay Assay to use (default is the current object default assay). +#' @param overwrite Logical. Whether to overwrite existing meta.data columns. Default is FALSE meaning that +#' function will abort if columns with the name provided to `ieg_name` is present in meta.data slot. +#' +#' @return Seurat object +#' +#' @import cli +#' +#' @keywords internal +#' +#' @noRd +#' Add_IEG_Seurat <- function( @@ -756,27 +786,30 @@ Retrieve_Ensembl_Ribo <- function( seurat_object[[ieg_name]] <- PercentageFeatureSet(object = seurat_object, features = ieg_found, assay = assay) } + # Log Command + seurat_object <- LogSeuratCommand(object = seurat_object) + # return final object return(seurat_object) } - #' Return default QC features - #' - #' Returns default QC features full names when provided with shortcut name. - #' - #' @param seurat_object object name. - #' @param features vector of features to check against defaults. - #' @param print_defaults return the potential accepted default values. - #' - #' @return list of found and not found features - #' - #' @import cli - #' - #' @keywords internal - #' - #' @noRd - #' +#' Return default QC features +#' +#' Returns default QC features full names when provided with shortcut name. +#' +#' @param seurat_object object name. +#' @param features vector of features to check against defaults. +#' @param print_defaults return the potential accepted default values. +#' +#' @return list of found and not found features +#' +#' @import cli +#' +#' @keywords internal +#' +#' @noRd +#' Return_QC_Defaults <- function( seurat_object, @@ -983,6 +1016,109 @@ symdiff <- function( } +#' Whole number check +#' +#' Checks whether a number is whole +#' +#' @param x number to check +#' +#' @return NULL or error message if number is not whole +#' +#' @keywords internal +#' +#' @noRd +#' + +check_whole_num <- function( + x +) { + round_x <- round(x = x) + + res <- identical(x = x, y = round_x) + + return(res) +} + + +#' Remove single value columns of data.frame +#' +#' Checks all columns within data.frame and returns data.frame minus columns that have the same value in all rows. +#' +#' @param df data.frame to filter +#' +#' @references Code used in function has been slightly modified from `sceasy:::.regularise_df` function of +#' sceasy package \url{https://github.com/cellgeni/sceasy} (License: GPL-3). +#' Code modified to match scCustomize & tidyverse style, add error checks, and +#' add cli formatted messages. +#' +#' @import cli +#' @importFrom dplyr select all_of +#' @importFrom magrittr "%>%" +#' +#' @return data.frame +#' +#' @keywords internal +#' +#' @noRd +#' + +drop_single_value_cols <- function( + df +) { + if (!inherits(what = "data.frame", x = df)) { + cli_abort(message = "{.code df} must of be of class data.frame.") + } + + single_val_columns <- sapply(df, function(x) { + length(x = unique(x = x)) == 1 + }) + + col_names_single <- df %>% + select(which(single_val_columns)) %>% + colnames() + + if (length(x = col_names_single) > 0) { + cli_inform(message = c("The following columns were removed as they contain identical values for all rows:", + "i" = "{.field {col_names_single}}")) + } + + # filter df + df_filtered <- df %>% + select(-(all_of(col_names_single))) + + # return df + return(df_filtered) +} + + +#' Check valid color +#' +#' Checks if input values are valid colors representations in R. +#' +#' @param colors vector of color(s) to check +#' +#' @references Code for function \url{https://stackoverflow.com/a/13290832/15568251}. +#' Renamed by Samuel Marsh. +#' +#' @importFrom grDevices col2rgb +#' +#' @return logical named vector +#' +#' @keywords internal +#' +#' @noRd +#' + +Is_Color <- function( + colors +) { + sapply(colors, function(X) { + tryCatch(is.matrix(x = col2rgb(col = X)), + error = function(e) FALSE) + }) +} + + #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #################### METRICS HELPERS #################### #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -1289,3 +1425,120 @@ Metrics_Multi_VDJT <- function( return(full_data) } + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#################### GENE NAME/FILE CACHE HELPERS #################### +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' BiocFileCache Interface +#' +#' Internal function to manage/call BiocFileCache. +#' +#' @return cache +#' +#' @references \url{https://bioconductor.org/packages/release/bioc/vignettes/BiocFileCache/inst/doc/BiocFileCache.html#cache-to-manage-package-data} +#' +#' @noRd +#' + +.get_bioc_cache <- function( +) { + cache <- tools::R_user_dir(package = "scCustomize", which="cache") + BiocFileCache::BiocFileCache(cache) + } + + +#' Download HGNC Dataset +#' +#' Internal function to download and cache the latest version of HGNC dataset for use with renaming genes. +#' +#' @param update logical, whether to manually override update parameters and download new data. +#' +#' @import cli +#' +#' @return path to data cache +#' +#' @references \url{https://bioconductor.org/packages/release/bioc/vignettes/BiocFileCache/inst/doc/BiocFileCache.html} +#' +#' @noRd +#' + + +download_hgnc_data <- function( + update = NULL +) { + # Get cache + bfc <- .get_bioc_cache() + + # URL from https://www.genenames.org/download/statistics-and-files/ + hgnc_ftp_url <- "https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/tsv/hgnc_complete_set.txt" + + # bfc <- BiocFileCache::BiocFileCache(hgnc_ftp_url) + + rid <- BiocFileCache::bfcquery(bfc, hgnc_ftp_url, "fpath")$rid + if (!length(rid)) { # not in cache, add but do not download + rid <- names(BiocFileCache::bfcadd(bfc, hgnc_ftp_url, download = FALSE)) + } + + if (isTRUE(x = update)) { + update <- update + } else { + update <- BiocFileCache::bfcneedsupdate(bfc, rid) # TRUE if newly added or stale + } + + # download & process + if (!isFALSE(x = update)) { + cli_inform(message = "Downloading HGNC data from: {.field {hgnc_ftp_url}}") + BiocFileCache::bfcdownload(bfc, rid, ask = FALSE, FUN = process_hgnc_data) + } + + rpath <- BiocFileCache::bfcrpath(bfc, rids=rid) # path to processed result + + return(rpath) +} + + +#' Process HGNC Dataset +#' +#' Internal function process/filter and save HGNC dataset during cache process +#' +#' @param from input (cache location). +#' @param to output (cached data). +#' +#' @importFrom dplyr mutate select filter any_of contains +#' @importFrom magrittr "%>%" +#' @importFrom tidyr separate_wider_delim pivot_longer +#' +#' @return path to data cache +#' +#' @references \url{https://bioconductor.org/packages/release/bioc/vignettes/BiocFileCache/inst/doc/BiocFileCache.html} +#' +#' @noRd +#' + +process_hgnc_data <- function( + from, + to +) { + # read in data + hgnc_full_data <- data.table::fread(file = from, data.table = F) + + # filter data: Approved Genes > select relevant categories + hgnc_filtered_data <- hgnc_full_data %>% + filter(.data[["status"]] == "Approved") %>% + select(any_of(c("hgnc_id", "symbol", "status", "alias_symbol", "prev_symbol", "date_symbol_changed", "entrez_id", "ensembl_gene_id"))) + + # Select needed for renaming > split prev symbol column by number of additional columns needed > pivot wider without NAs > mutate + hgnc_long_data <- hgnc_filtered_data %>% + select(any_of(c("symbol", "prev_symbol"))) %>% + separate_wider_delim(cols = "prev_symbol", delim = "|", names_sep = "_", names = NULL, too_few = "align_start") %>% + pivot_longer(cols = contains("_symbol"), + names_to = "column", + values_to = "prev_symbol", + values_drop_na = TRUE) %>% + mutate("prev_symbol" = ifelse(.data[["prev_symbol"]] %in% "", .data[["symbol"]], .data[["prev_symbol"]])) + + # save processed data + saveRDS(hgnc_long_data, file = to) + TRUE +} diff --git a/R/LIGER_Plotting.R b/R/LIGER_Plotting.R index 8537f359f3..2b4fee1793 100644 --- a/R/LIGER_Plotting.R +++ b/R/LIGER_Plotting.R @@ -42,6 +42,7 @@ #' #' @import ggplot2 #' @importFrom patchwork wrap_plots +#' @importFrom utils packageVersion #' #' @export #' @@ -76,9 +77,20 @@ DimPlot_LIGER <- function( ggplot_default_colors = FALSE, color_seed = 123 ) { + # temp liger version check + if (packageVersion(pkg = 'rliger') > "1.0.1") { + cli_abort(message = c("Liger functionality is currently restricted to rliger v1.0.1 or lower.", + "i" = "Functionality with rliger v2+ is currently in development.")) + } + # Check LIGER Is_LIGER(liger_object = liger_object) + # Check dimreduc present + if (length(x = liger_object@tsne.coords) == 0) { + cli_abort(message = "No dimensionality reduction coordinates found.") + } + # Set group_by defaults if (isFALSE(x = combination) && is.null(x = group_by)) { group_by <- "cluster" @@ -96,10 +108,10 @@ DimPlot_LIGER <- function( # Check group_by parameter if (!group_by == "cluster") - group_by_var <- Meta_Present_LIGER(liger_object = liger_object, meta_col_names = group_by, print_msg = FALSE) + group_by_var <- Meta_Present(object = liger_object, meta_col_names = group_by, print_msg = FALSE, omit_warn = FALSE)[[1]] if (!is.null(x = split_by)) { - group_by_var <- Meta_Present_LIGER(liger_object = liger_object, meta_col_names = split_by, print_msg = FALSE) + group_by_var <- Meta_Present(object = liger_object, meta_col_names = split_by, print_msg = FALSE, omit_warn = FALSE)[[1]] } # Add one time dim label warning @@ -257,6 +269,8 @@ DimPlot_LIGER <- function( #' @param pt.size_dimreduc Adjust point size for plotting in dimensionality reduction plots. #' @param reduction_label What to label the x and y axes of resulting plots. LIGER does not store name of #' technique and therefore needs to be set manually. Default is "UMAP". +#' @param plot_legend logical, whether to plot the legend on factor loading plots, default is TRUE. +#' Helpful if number of datasets is large to avoid crowding the plot with legend. #' @param raster Convert points to raster format. Default is NULL which will rasterize by default if #' greater than 200,000 cells. #' @param raster.dpi Pixel resolution for rasterized plots, passed to geom_scattermore(). @@ -282,6 +296,7 @@ DimPlot_LIGER <- function( #' @importFrom grDevices dev.off pdf #' @importFrom patchwork wrap_plots #' @importFrom scattermore geom_scattermore +#' @importFrom utils packageVersion #' #' @export #' @@ -305,6 +320,7 @@ plotFactors_scCustom <- function( pt.size_factors = 1, pt.size_dimreduc = 1, reduction_label = "UMAP", + plot_legend = TRUE, raster = TRUE, raster.dpi = c(512, 512), order = FALSE, @@ -318,6 +334,12 @@ plotFactors_scCustom <- function( ggplot_default_colors = FALSE, color_seed = 123 ) { + # temp liger version check + if (packageVersion(pkg = 'rliger') > "1.0.1") { + cli_abort(message = c("Liger functionality is currently restricted to rliger v1.0.1 or lower.", + "i" = "Functionality with rliger v2+ is currently in development.")) + } + # Check LIGER Is_LIGER(liger_object = liger_object) @@ -431,6 +453,10 @@ plotFactors_scCustom <- function( theme(legend.position = 'none') + scale_color_manual(values = colors_use_factors) + if (isFALSE(x = plot_legend)) { + top <- top + NoLegend() + } + bottom <- ggplot(h_df, aes(x = .data[["x"]], y=.data[["h_norm"]], col = .data[["dataset"]])) + geom_scattermore(pointsize = pt.size_factors, pixels = raster.dpi) + labs(x = 'Cell', y = 'H_norm Score') + @@ -439,6 +465,9 @@ plotFactors_scCustom <- function( guides(colour = guide_legend(override.aes = list(size = 2))) + scale_color_manual(values = colors_use_factors) + if (isFALSE(x = plot_legend)) { + bottom <- bottom + NoLegend() + } } else { top <- ggplot(h_df, aes(x = .data[["x"]], y=.data[["h_raw"]], col = .data[["dataset"]])) + @@ -448,6 +477,10 @@ plotFactors_scCustom <- function( theme(legend.position = 'none') + scale_color_manual(values = colors_use_factors) + if (isFALSE(x = plot_legend)) { + top <- top + NoLegend() + } + bottom <- ggplot(h_df, aes(x = .data[["x"]], y=.data[["h_norm"]], col = .data[["dataset"]])) + geom_point(size = pt.size_factors) + labs(x = 'Cell', y = 'H_norm Score') + @@ -456,6 +489,10 @@ plotFactors_scCustom <- function( guides(colour = guide_legend(override.aes = list(size = 2))) + scale_color_manual(values = colors_use_factors) + if (isFALSE(x = plot_legend)) { + bottom <- bottom + NoLegend() + } + } if (!is.null(cells.highlight)) { diff --git a/R/LIGER_Utilities.R b/R/LIGER_Utilities.R index 927bebb55d..ae418923a3 100644 --- a/R/LIGER_Utilities.R +++ b/R/LIGER_Utilities.R @@ -1,8 +1,5 @@ -#' Add Mito and Ribo percentages to LIGER +#' Add Mito and Ribo percentages #' -#' Add Mito, Ribo, percentages to meta.data slot of LIGER Object -#' -#' @param liger_object LIGER object name. #' @param species Species of origin for given Seurat Object. If mouse, human, marmoset, zebrafish, rat, #' drosophila, or rhesus macaque (name or abbreviation) are provided the function will automatically #' generate mito_pattern and ribo_pattern values. @@ -32,22 +29,26 @@ #' @import cli #' @importFrom dplyr mutate select intersect #' @importFrom magrittr "%>%" +#' @importFrom rlang ":=" #' @importFrom tibble rownames_to_column column_to_rownames +#' @importFrom utils packageVersion #' -#' @return A LIGER Object +#' @method Add_Mito_Ribo liger #' #' @export +#' @rdname Add_Mito_Ribo #' #' @concept liger_object_util #' #' @examples #' \dontrun{ -#' object <- Add_Mito_Ribo_LIGER(liger_object = object, species = "mouse") +#' # Liger +#' liger_object <- Add_Mito_Ribo(object = liger_object, species = "human") #' } #' -Add_Mito_Ribo_LIGER <- function( - liger_object, +Add_Mito_Ribo.liger <- function( + object, species, mito_name = "percent_mito", ribo_name = "percent_ribo", @@ -58,8 +59,15 @@ Add_Mito_Ribo_LIGER <- function( ribo_features = NULL, ensembl_ids = FALSE, overwrite = FALSE, - list_species_names = FALSE + list_species_names = FALSE, + ... ) { + # temp liger version check + if (packageVersion(pkg = 'rliger') > "1.0.1") { + cli_abort(message = c("Liger functionality is currently restricted to rliger v1.0.1 or lower.", + "i" = "Functionality with rliger v2+ is currently in development.")) + } + # Accepted species names accepted_names <- data.frame( Mouse_Options = c("Mouse", "mouse", "Ms", "ms", "Mm", "mm"), @@ -78,7 +86,7 @@ Add_Mito_Ribo_LIGER <- function( } # LIGER object check - Is_LIGER(liger_object = liger_object) + Is_LIGER(liger_object = object) # Check name collision if (any(duplicated(x = c(mito_name, ribo_name, mito_ribo_name)))) { @@ -86,7 +94,7 @@ Add_Mito_Ribo_LIGER <- function( } # Overwrite check - if (mito_name %in% colnames(x = liger_object@cell.data) || ribo_name %in% colnames(x = liger_object@cell.data) || mito_ribo_name %in% colnames(x = liger_object@cell.data)) { + if (mito_name %in% colnames(x = object@cell.data) || ribo_name %in% colnames(x = object@cell.data) || mito_ribo_name %in% colnames(x = object@cell.data)) { if (isFALSE(x = overwrite)) { cli_abort(message = c("Columns with {.val {mito_name}} and/or {.val {ribo_name}} already present in cell.data slot.", "i" = "*To run function and overwrite columns set parameter {.code overwrite = TRUE} or change respective {.code mito_name}, {.code ribo_name}, and/or {.code mito_ribo_name}.*") @@ -166,15 +174,17 @@ Add_Mito_Ribo_LIGER <- function( ribo_features <- Retrieve_Ensembl_Ribo(species = species) } + all_features <- LIGER_Features(liger_object = object) + # get features from patterns - mito_features <- mito_features %||% grep(pattern = mito_pattern, x = rownames(x = liger_object@raw.data[[1]]), value = TRUE) + mito_features <- mito_features %||% grep(pattern = mito_pattern, x = all_features, value = TRUE) - ribo_features <- ribo_features %||% grep(pattern = ribo_pattern, x = rownames(x = liger_object@raw.data[[1]]), value = TRUE) + ribo_features <- ribo_features %||% grep(pattern = ribo_pattern, x = all_features, value = TRUE) # Check features are present in object - length_mito_features <- length(x = intersect(x = mito_features, y = rownames(x = liger_object@raw.data[[1]]))) + length_mito_features <- length(x = intersect(x = mito_features, y = all_features)) - length_ribo_features <- length(x = intersect(x = ribo_features, y = rownames(x = liger_object@raw.data[[1]]))) + length_ribo_features <- length(x = intersect(x = ribo_features, y = all_features)) # Check length of mito and ribo features found in object if (length_mito_features < 1 && length_ribo_features < 1) { @@ -195,68 +205,75 @@ Add_Mito_Ribo_LIGER <- function( # Add mito and ribo percent if (length_mito_features > 0) { - good_mito <- mito_features[mito_features %in% rownames(x = liger_object@raw.data)] - percent_mito <- unlist(lapply(liger_object@raw.data, function(x) { + good_mito <- mito_features[mito_features %in% all_features] + percent_mito <- unlist(lapply(object@raw.data, function(x) { (Matrix::colSums(x[good_mito, ])/Matrix::colSums(x))*100})) - liger_object@cell.data[ , mito_name] <- percent_mito + object@cell.data[ , mito_name] <- percent_mito } if (length_ribo_features > 0){ - good_ribo <- ribo_features[ribo_features %in% rownames(x = liger_object@raw.data)] - percent_ribo <- unlist(lapply(liger_object@raw.data, function(x) { + good_ribo <- ribo_features[ribo_features %in% all_features] + percent_ribo <- unlist(lapply(object@raw.data, function(x) { (Matrix::colSums(x[good_ribo, ])/Matrix::colSums(x))*100})) - liger_object@cell.data[ , ribo_name] <- percent_ribo + object@cell.data[ , ribo_name] <- percent_ribo } # Create combined mito ribo column if both present if (length_mito_features > 0 && length_ribo_features > 0) { - object_meta <- Fetch_Meta(object = liger_object) %>% + object_meta <- Fetch_Meta(object = object) %>% rownames_to_column("barcodes") object_meta <- object_meta %>% mutate({{mito_ribo_name}} := .data[[mito_name]] + .data[[ribo_name]]) - liger_object@cell.data[ , mito_ribo_name] <- object_meta[[mito_ribo_name]] + object@cell.data[ , mito_ribo_name] <- object_meta[[mito_ribo_name]] } # return object - return(liger_object) + return(object) } #' Add Cell Complexity Value #' -#' Add measure of cell complexity/novelty (log10PerUMI) for data QC. -#' -#' @param liger_object object name. #' @param meta_col_name name to use for new meta data column. Default is "log10GenesPerUMI". #' @param overwrite Logical. Whether to overwrite existing an meta.data column. Default is FALSE meaning that #' function will abort if column with name provided to `meta_col_name` is present in meta.data slot. #' #' @import cli +#' @importFrom utils packageVersion #' -#' @return A LIGER Object +#' @method Add_Cell_Complexity liger #' #' @export +#' @rdname Add_Cell_Complexity #' #' @concept liger_object_util #' #' @examples #' \dontrun{ -#' object <- Add_Cell_Complexity_LIGER(liger_object = object) +#' # Liger +#' liger_object <- Add_Cell_Complexity(object = liger_object) #' } #' -Add_Cell_Complexity_LIGER <- function( - liger_object, +Add_Cell_Complexity.liger <- function( + object, meta_col_name = "log10GenesPerUMI", - overwrite = FALSE + overwrite = FALSE, + ... ) { + # temp liger version check + if (packageVersion(pkg = 'rliger') > "1.0.1") { + cli_abort(message = c("Liger functionality is currently restricted to rliger v1.0.1 or lower.", + "i" = "Functionality with rliger v2+ is currently in development.")) + } + # Check Seurat - Is_LIGER(liger_object = liger_object) + Is_LIGER(liger_object = object) # Check columns for overwrite - if (meta_col_name %in% colnames(x = liger_object@cell.data)) { + if (meta_col_name %in% colnames(x = object@cell.data)) { if (isFALSE(x = overwrite)) { cli_abort(message = c("Column {.val {meta_col_name}} already present in cell.data slot.", "i" = "*To run function and overwrite column, set parameter {.code overwrite = TRUE} or change respective {.code meta_col_name}*.") @@ -268,23 +285,25 @@ Add_Cell_Complexity_LIGER <- function( } # Add score - liger_object@cell.data[ , meta_col_name] <- log10(liger_object@cell.data$nGene) / log10(liger_object@cell.data$nUMI) + object@cell.data[ , meta_col_name] <- log10(object@cell.data$nGene) / log10(object@cell.data$nUMI) #return object - return(liger_object) + return(object) } -#' Check if meta data are present +#' Extract Features from LIGER Object #' -#' Check if meta data columns are present in object and return vector of found columns Return warning -#' messages for meta data columns not found. +#' Extract all unique features from LIGER object #' -#' @param liger_object object name. -#' @param meta_col_names vector of column names to check. -#' @param print_msg logical. Whether message should be printed if all features are found. Default is TRUE. +#' @param liger_object LIGER object name. +#' @param by_dataset logical, whether to return list with vector of features for each dataset in +#' LIGER object or to return single vector of unique features across all datasets in object +#' (default is FALSE; return vector of unique features) #' -#' @return vector of meta data columns that are present +#' @return vector or list depending on `by_dataset` parameter +#' +#' @importFrom utils packageVersion #' #' @export #' @@ -292,51 +311,37 @@ Add_Cell_Complexity_LIGER <- function( #' #' @examples #' \dontrun{ -#' meta_variables <- Meta_Present_LIGER(liger_object = obj, gene_list = DEG_list, print_msg = TRUE) +#' # return single vector of all unique features +#' all_features <- LIGER_Features(liger_object = object, by_dataset = FALSE) +#' +#' # return list of vectors containing features from each individual dataset in object +#' dataset_features <- LIGER_Features(liger_object = object, by_dataset = TRUE) #' } #' -Meta_Present_LIGER <- function( - liger_object, - meta_col_names, - print_msg = TRUE +LIGER_Features <- function( + liger_object, + by_dataset = FALSE ) { - # Check Seurat - Is_LIGER(liger_object = liger_object) - - # get all features - possible_features <- colnames(x = liger_object@cell.data) - - # If any features not found - if (any(!meta_col_names %in% possible_features)) { - bad_meta <- meta_col_names[!meta_col_names %in% possible_features] - found_meta <- meta_col_names[meta_col_names %in% possible_features] - - # Return message of features not found - if (length(x = found_meta) < 1) { - cli_abort(message = c("No valid meta data column names found.", - "*" = "None of the provided @cell.data columns were found:", - "i" = "{.field {glue_collapse_scCustom(input_string = bad_meta, and = TRUE)}}") - ) - } + # temp liger version check + if (packageVersion(pkg = 'rliger') > "1.0.1") { + cli_abort(message = c("Liger functionality is currently restricted to rliger v1.0.1 or lower.", + "i" = "Functionality with rliger v2+ is currently in development.")) + } - if (length(x = bad_meta) > 0) { - cli_warn(message = c("The following @cell.data columns were omitted as they were not found:", - "i" = "{.field {glue_collapse_scCustom(input_string = bad_meta, and = TRUE)}}") - ) - } + Is_LIGER(liger_object = liger_object) - # Return the found features omitting the not found ones. - return(found_meta) - } + # Extract features + features_by_dataset <- lapply(1:length(x = liger_object@raw.data), function(x) { + rownames(x = liger_object@raw.data[[x]]) + }) - # Print all found message if TRUE - if (isTRUE(x = print_msg)) { - cli_inform(message = "All @cell.data columns present.") + if (isFALSE(x = by_dataset)) { + features <- unique(x = unlist(x = features_by_dataset)) + return(features) + } else { + return(features_by_dataset) } - - # Return full input gene list. - return(meta_col_names) } @@ -351,6 +356,7 @@ Meta_Present_LIGER <- function( #' @return A LIGER Object #' #' @import cli +#' @importFrom utils packageVersion #' #' @export #' @@ -367,6 +373,12 @@ Top_Genes_Factor <- function( liger_factor, num_genes = 10 ) { + # temp liger version check + if (packageVersion(pkg = 'rliger') > "1.0.1") { + cli_abort(message = c("Liger functionality is currently restricted to rliger v1.0.1 or lower.", + "i" = "Functionality with rliger v2+ is currently in development.")) + } + # LIGER object check Is_LIGER(liger_object = liger_object) @@ -402,6 +414,8 @@ Top_Genes_Factor <- function( #' #' @return A data.frame with information for plotting #' +#' @importFrom utils packageVersion +#' #' @references This function is encompasses the first part of the LIGER function plotByDatasetAndCluster. #' However, this function is modified to allow plotting other meta data variables. In this case the function #' just returns the data.frame needed for plotting rather than plots themselves. @@ -421,6 +435,12 @@ Generate_Plotting_df_LIGER <- function(object, group_by = "dataset", split_by = NULL ) { + # temp liger version check + if (packageVersion(pkg = 'rliger') > "1.0.1") { + cli_abort(message = c("Liger functionality is currently restricted to rliger v1.0.1 or lower.", + "i" = "Functionality with rliger v2+ is currently in development.")) + } + tsne_df <- data.frame(object@tsne.coords) colnames(x = tsne_df) <- c("tsne1", "tsne2") tsne_df[[group_by]] <- object@cell.data[[group_by]] @@ -500,6 +520,7 @@ Generate_Plotting_df_LIGER <- function(object, #' @importFrom patchwork wrap_plots #' @importFrom scattermore geom_scattermore #' @importFrom stats median +#' @importFrom utils packageVersion #' #' @references This function is encompasses part of the LIGER function plotByDatasetAndCluster. #' However, this function is modified to just return cluster plots based on `Generate_Plotting_df_LIGER`. @@ -534,6 +555,12 @@ Plot_By_Cluster_LIGER <- function( ggplot_default_colors = FALSE, color_seed = 123 ) { + # temp liger version check + if (packageVersion(pkg = 'rliger') > "1.0.1") { + cli_abort(message = c("Liger functionality is currently restricted to rliger v1.0.1 or lower.", + "i" = "Functionality with rliger v2+ is currently in development.")) + } + # Create plotting data.frame tsne_df <- Generate_Plotting_df_LIGER(object = liger_object, group_by = group_by, split_by = split_by, reorder.idents = reorder.idents, shuffle = shuffle, shuffle_seed = shuffle_seed) @@ -756,8 +783,9 @@ Plot_By_Cluster_LIGER <- function( #' @import ggplot2 #' @importFrom cowplot theme_cowplot #' @importFrom patchwork wrap_plots -#' @importFrom rlang sym +#' @importFrom rlang sym "!!" #' @importFrom scattermore geom_scattermore +#' @importFrom utils packageVersion #' #' @references This function is encompasses part of the LIGER function plotByDatasetAndCluster. #' However, this function is modified to just return cluster plots based on `Generate_Plotting_df_LIGER`. @@ -787,6 +815,11 @@ Plot_By_Meta_LIGER <- function( ggplot_default_colors = FALSE, color_seed = 123 ) { + # temp liger version check + if (packageVersion(pkg = 'rliger') > "1.0.1") { + cli_abort(message = c("Liger functionality is currently restricted to rliger v1.0.1 or lower.", + "i" = "Functionality with rliger v2+ is currently in development.")) + } tsne_df <- Generate_Plotting_df_LIGER(object = liger_object, group_by = group_by, split_by = split_by, reorder.idents = reorder.idents, shuffle = shuffle, shuffle_seed = shuffle_seed) @@ -920,6 +953,7 @@ Plot_By_Meta_LIGER <- function( #' @return A LIGER Object with variable genes in correct slot. #' #' @import cli +#' @importFrom utils packageVersion #' #' @references Matching function parameter text descriptions are taken from `rliger::selectGenes` #' which is called by this function after creating new temporary object/dataset. @@ -945,6 +979,12 @@ Variable_Features_ALL_LIGER <- function( pt.size = 0.3, chunk=1000 ) { + # temp liger version check + if (packageVersion(pkg = 'rliger') > "1.0.1") { + cli_abort(message = c("Liger functionality is currently restricted to rliger v1.0.1 or lower.", + "i" = "Functionality with rliger v2+ is currently in development.")) + } + Is_LIGER(liger_object = liger_object) raw_data <- liger_object@raw.data @@ -968,194 +1008,3 @@ Variable_Features_ALL_LIGER <- function( liger_object@var.genes <- var_genes return(liger_object) } - - -#' Create a Seurat object containing the data from a liger object -#' -#' Merges raw.data and scale.data of object, and creates Seurat object with these values along with -#' tsne.coords, iNMF factorization, and cluster assignments. Supports Seurat V2 and V3. -#' -#' Stores original dataset identity by default in new object metadata if dataset names are passed -#' in nms. iNMF factorization is stored in dim.reduction object with key "iNMF". -#' -#' @param liger_object \code{liger} object. -#' @param nms By default, labels cell names with dataset of origin (this is to account for cells in -#' different datasets which may have same name). Other names can be passed here as vector, must have -#' same length as the number of datasets. (default names(H)). -#' @param renormalize Whether to log-normalize raw data using Seurat defaults (default TRUE). -#' @param use.liger.genes Whether to carry over variable genes (default TRUE). -#' @param by.dataset Include dataset of origin in cluster identity in Seurat object (default FALSE). -#' @param keep_meta logical. Whether to transfer additional metadata (nGene/nUMI/dataset already transferred) -#' to new Seurat Object. Default is TRUE. -#' @param reduction_label Name of dimensionality reduction technique used. Enables accurate transfer -#' or name to Seurat object instead of defaulting to "tSNE". -#' @param seurat_assay Name to set for assay in Seurat Object. Default is "RNA". -#' -#' @return Seurat object with raw.data, scale.data, reduction_label, iNMF, and ident slots set. -#' -#' @references Original function is part of LIGER package \url{https://github.com/welch-lab/liger} (Licence: GPL-3). -#' Function was slightly modified for use in scCustomize with keep.meta parameter. Also posted as -#' PR to liger GitHub. -#' -#' @import cli -#' @import Matrix -#' @importFrom dplyr any_of pull select -#' @importFrom methods as new -#' @importFrom utils packageVersion -#' -#' @export -#' -#' @concept object_util -#' -#' @examples -#' \dontrun{ -#' seurat_object <- Liger_to_Seurat(liger_object = LIGER_OBJ, reduction_label = "UMAP") -#' } - -Liger_to_Seurat <- function( - liger_object, - nms = names(liger_object@H), - renormalize = TRUE, - use.liger.genes = TRUE, - by.dataset = FALSE, - keep_meta = TRUE, - reduction_label = "UMAP", - seurat_assay = "RNA" -) { - if (is.null(x = reduction_label)) { - cli_abort(message = c("{.code reduction_label} parameter was not set.", - "*" = "LIGER objects do not store name of dimensionality reduction technique used.", - "i" = "In order to retain proper labels in Seurat object please set {.code reduction_label} to {.val tSNE}, {.val UMAP}, {.val etc}.")) - } - - # get Seurat version - maj_version <- packageVersion('Seurat')$major - if (class(liger_object@raw.data[[1]])[1] != 'dgCMatrix') { - # mat <- as(x, 'CsparseMatrix') - liger_object@raw.data <- lapply(liger_object@raw.data, function(x) { - as(x, 'CsparseMatrix') - }) - } - - key_name <- paste0(reduction_label, "_") - - raw.data <- Merge_Sparse_Data_All(liger_object@raw.data, nms) - scale.data <- do.call(rbind, liger_object@scale.data) - rownames(x = scale.data) <- colnames(x = raw.data) - if (maj_version < 3) { - var.genes <- liger_object@var.genes - inmf.obj <- new( - Class = "dim.reduction", gene.loadings = t(liger_object@W), - cell.embeddings = liger_object@H.norm, key = "iNMF_" - ) - rownames(x = inmf.obj@gene.loadings) <- var.genes - - tsne.obj <- new( - Class = "dim.reduction", cell.embeddings = liger_object@tsne.coords, - key = key_name - ) - } else { - var.genes <- liger_object@var.genes - if (any(grepl('_', var.genes))) { - print("Warning: Seurat v3 genes cannot have underscores, replacing with dashes ('-')") - var.genes <- gsub("_", replacement = "-", var.genes) - } - inmf.loadings <- t(x = liger_object@W) - rinmf.loadings <- t(x = liger_object@W) - - inmf.embeddings <- liger_object@H.norm - rinmf.embeddings <- do.call(what = 'rbind', args = liger_object@H) - - ncol_Hnorm <- ncol(x = liger_object@H.norm) - colnames(x = inmf.embeddings) <- paste0("iNMF_", 1:ncol_Hnorm) - colnames(x = rinmf.embeddings) <- paste0("rawiNMF_", 1:ncol_Hnorm) - - tsne.embeddings <- liger_object@tsne.coords - colnames(x = tsne.embeddings) <- paste0(key_name, 1:2) - rownames(x = inmf.loadings) <- var.genes - rownames(x = inmf.embeddings) <- - rownames(x = rinmf.embeddings) <- - rownames(x = tsne.embeddings) <- - rownames(x = scale.data) - - inmf.obj <- CreateDimReducObject( - embeddings = inmf.embeddings, - loadings = inmf.loadings, - key = "iNMF_", - global = TRUE, - assay = seurat_assay - ) - - rinmf.obj <- CreateDimReducObject( - embeddings = rinmf.embeddings, - loadings = rinmf.loadings, - key = "rawiNMF_", - global = TRUE, - assay = seurat_assay - ) - - tsne.obj <- CreateDimReducObject( - embeddings = tsne.embeddings, - key = key_name, - global = TRUE, - assay = seurat_assay - ) - } - new.seurat <- CreateSeuratObject(raw.data) - if (isTRUE(x = renormalize)) { - new.seurat <- NormalizeData(new.seurat) - } - if (isTRUE(x = by.dataset)) { - ident.use <- as.character(x = unlist(x = lapply(1:length(liger_object@raw.data), function(i) { - dataset.name <- names(x = liger_object@raw.data)[i] - paste0(dataset.name, as.character(x = liger_object@clusters[colnames(x = liger_object@raw.data[[i]])])) - }))) - } else { - if (maj_version < 3) { - ident.use <- as.character(x = liger_object@clusters) - } else { - ident.use <- liger_object@clusters - } - } - - if (maj_version < 3) { - if (use.liger.genes) { - new.seurat@var.genes <- var.genes - } - new.seurat@scale.data <- t(scale.data) - new.seurat@dr[[reduction_label]] <- tsne.obj - new.seurat@dr$iNMF <- inmf.obj - new.seurat@dr$iNMF <- rinmf.obj - new.seurat <- SetIdent(new.seurat, ident.use = ident.use) - - } else { - if (isTRUE(x = use.liger.genes)) { - VariableFeatures(new.seurat) <- var.genes - } - SetAssayData(new.seurat, slot = "scale.data", t(scale.data), assay = "RNA") - new.seurat[[reduction_label]] <- tsne.obj - new.seurat[['iNMF']] <- inmf.obj - new.seurat[['rawiNMF']] <- rinmf.obj - Idents(object = new.seurat) <- ident.use - } - if (isTRUE(x = keep_meta)) { - # extract meta data from liger object - liger_meta <- Fetch_Meta(object = liger_object) - # remove meta data values already transferred - liger_meta <- liger_meta %>% - select(-any_of(c("nUMI", "nGene", "dataset"))) - # extract meta data names - meta_names <- colnames(x = liger_meta) - # add meta data to new seurat object - for (meta_var in meta_names){ - meta_transfer <- liger_meta %>% - pull(meta_var) - names(x = meta_transfer) <- colnames(x = new.seurat) - new.seurat <- AddMetaData(object = new.seurat, - metadata = meta_transfer, - col.name = meta_var) - } - } - - return(new.seurat) -} diff --git a/R/Object_Conversion.R b/R/Object_Conversion.R new file mode 100644 index 0000000000..b8a4ae99dd --- /dev/null +++ b/R/Object_Conversion.R @@ -0,0 +1,1524 @@ +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#################### CONVERT TO LIGER #################### +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +#' Create liger object from one Seurat Object +#' +#' @param group.by Variable in meta data which contains variable to split data by, (default is "orig.ident"). +#' To use split layers in Assay5 set `group.by = "layers"`. +#' @param layers_name name of meta.data column used to split layers if setting `group.by = "layers"`. +#' @param assay Assay containing raw data to use, (default is "RNA"). +#' @param remove_missing logical, whether to remove missing genes with no counts when converting to +#' LIGER object (default is FALSE). +#' @param renormalize logical, whether to perform normalization after LIGER object creation (default is TRUE). +#' @param use_seurat_var_genes logical, whether to transfer variable features from Seurat object to +#' new LIGER object (default is FALSE). +#' @param use_seurat_dimreduc logical, whether to transfer dimensionality reduction coordinates from +#' Seurat to new LIGER object (default is FALSE). +#' @param reduction Name of Seurat reduction to transfer if `use_seurat_dimreduc = TRUE`. +#' @param keep_meta logical, whether to transfer columns in Seurat meta.data slot to LIGER cell.data +#' slot (default is TRUE). +#' @param verbose logical, whether to print status messages during object conversion (default is TRUE). +#' +#' +#' @references modified and enhanced version of `rliger::seuratToLiger`. +#' +#' @method as.LIGER Seurat +#' +#' @concept object_conversion +#' +#' @import cli +#' @import Seurat +#' @importFrom dplyr left_join join_by select any_of +#' @importFrom magrittr "%>%" +#' @importFrom tibble rownames_to_column column_to_rownames +#' @importFrom utils packageVersion +#' +#' @export +#' @rdname as.LIGER +#' +#' @examples +#' \dontrun{ +#' liger_object <- as.LIGER(x = seurat_object) +#' } +#' + +as.LIGER.Seurat <- function( + x, + group.by = "orig.ident", + layers_name = NULL, + assay = "RNA", + remove_missing = FALSE, + renormalize = TRUE, + use_seurat_var_genes = FALSE, + use_seurat_dimreduc = FALSE, + reduction = NULL, + keep_meta = TRUE, + verbose = TRUE, + ... +) { + # temp liger version check + if (packageVersion(pkg = 'rliger') > "1.0.1") { + cli_abort(message = c("Liger functionality is currently restricted to rliger v1.0.1 or lower.", + "i" = "Functionality with rliger v2+ is currently in development.")) + } + + # Check Seurat + Is_Seurat(seurat_object = x) + + # Run update to ensure functionality + if (isTRUE(x = verbose)) { + cli_inform(message = c("*" = "Checking Seurat object validity")) + } + + x <- suppressMessages(UpdateSeuratObject(object = x)) + + # Check & Set Assay + if (!assay %in% Assays(object = x)) { + cli_abort(message = "Provided assay {.field {assay}} not found in Seurat object.") + } + + if (assay != DefaultAssay(object = x)) { + cli_inform(c("*" = "Changing object DefaultAssay from ({.field {DefaultAssay(object = x)}}) to provided assay ({.field {assay}}).")) + DefaultAssay(x) <- assay + } + + # Check Assay5 for multiple layers + count_layers <- Layers(object = x, search = "counts", assay = assay) + + # check split_name + if (group.by == "layers" && is.null(x = layers_name)) { + cli_abort(message = "When {.code group.by = 'layers'} please suppy name of meta.data column used to split layers to {.code layers_name}.") + } + + if (!layers_name %in% colnames(x@meta.data)) { + cli_abort(message = "The value provided to {.code layers_name} ({.field {layers_name}}) was not found in object meta.data.") + } + + if (isTRUE(x = Assay5_Check(seurat_object = x, assay = assay))) { + if (length(x = count_layers) > 1 && group.by != "layers") { + cli_abort(message = c("Multiple layers containing raw counts present ({.field {count_layers[1]}}, {.field {count_layers[2]}}, {.field ...}) and value provided to {.code group.by} is not {.val layers}.", + "i" = "To group LIGER object by assay layers please set {.code group.by = 'layers'}." + )) + } + } + + # Check meta data + if (group.by != "layers") { + group.by <- Meta_Present(object = x, meta_col_names = group.by, omit_warn = FALSE, print_msg = FALSE, return_none = TRUE)[[1]] + + # stop if none found + if (length(x = group.by) == 0) { + cli_abort(message = c("{.code group.by} was not found.", + "i" = "No column found in object meta.data named: {.val {group.by}}.") + ) + } + } + + if (isTRUE(x = verbose)) { + cli_inform(message = c("*" = "Creating LIGER object.")) + } + + # Set ident to grouping variable + if (length(x = count_layers) == 1) { + Idents(object = x) <- group.by + } + + # Check & Pull other relevant data + if (isTRUE(x = use_seurat_dimreduc)) { + # Extract default reduction + reduction <- reduction %||% DefaultDimReduc(object = x) + + if (!reduction %in% Reductions(object = x)) { + cli_abort(message = "Provided reduction: {.field {reduction}} was not found in Seurat Object.") + } + + reduc_coords <- Embeddings(object = x, reduction = reduction) + } + + if (isTRUE(x = use_seurat_var_genes)) { + var_genes <- VariableFeatures(object = x) + + if (!length(x = var_genes) > 0) { + cli_abort(message ="{.code use_seurat_var_genes = TRUE}, but no variable features found in Seurat object.") + } + } + + # Get raw data & cells + if (length(x = count_layers) == 1) { + raw_data_full <- LayerData(object = x, layer = count_layers) + + cells_per_dataset <- CellsByIdentities(object = x) + + # Split data by dataset + idents <- names(x = cells_per_dataset) + + raw_data_list <- lapply(idents, function(x){ + raw_data_full[, cells_per_dataset[[x]]] + }) + + names(raw_data_list) <- idents + } + + # If multiple layers + if (length(x = count_layers) > 1) { + raw_data_list <- lapply(count_layers, function (i){ + counts <- LayerData(object = x, layer = i) + }) + + new_names <- gsub(pattern = "counts.", replacement = "", x = count_layers) + + names(raw_data_list) <- new_names + } + + # Create LIGER Object + liger_object <- rliger::createLiger(raw.data = raw_data_list, remove.missing = remove_missing) + + if (isTRUE(x = renormalize)) { + if (isTRUE(x = verbose)) { + cli_inform(message = c("*" = "Normalizing data.")) + } + liger_object <- rliger::normalize(object = liger_object, remove.missing = remove_missing) + } + + # Add var genes + if (isTRUE(x = use_seurat_var_genes)) { + liger_object@var.genes <- var_genes + } + + # Add dim reduc + if (isTRUE(x = use_seurat_dimreduc)) { + liger_object@tsne.coords <- reduc_coords + + # Add new attribute to enable more accurate scCustomize plotting + attributes(liger_object)$reduction_key <- reduction + } + + # transfer meta + if (isTRUE(x = keep_meta)) { + # extract meta data from liger object + seurat_meta <- Fetch_Meta(object = x) + # remove meta data values already transferred + seurat_meta <- seurat_meta %>% + select(-any_of(c("nFeature_RNA", "nCount_RNA"))) %>% + rownames_to_column("barcodes") + + # pull current liger meta + liger_meta <- Fetch_Meta(object = liger_object) %>% + rownames_to_column("barcodes") + + # join meta + new_liger_meta <- suppressMessages(left_join(x = liger_meta, y = seurat_meta, by = join_by("barcodes"))) %>% + column_to_rownames("barcodes") + + # Add to LIGER object + liger_object@cell.data <- new_liger_meta + } + + # return object + return(liger_object) +} + + +#' Create liger object from one Seurat Object +#' +#' @param group.by Variable in meta data which contains variable to split data by, (default is "orig.ident"). +#' @param dataset_names optional, vector of names to use for naming datasets. +#' @param assay Assay containing raw data to use, (default is "RNA"). +#' @param remove_missing logical, whether to remove missing genes with no counts when converting to +#' LIGER object (default is FALSE). +#' @param renormalize logical, whether to perform normalization after LIGER object creation (default is TRUE). +#' @param use_seurat_var_genes logical, whether to transfer variable features from Seurat object to +#' new LIGER object (default is FALSE). +#' @param var_genes_method how variable genes should be selected from Seurat objects if `use_seurat_var_genes = TRUE`. Can be either "intersect" or "union", (default is "intersect"). +#' @param keep_meta logical, whether to transfer columns in Seurat meta.data slot to LIGER cell.data +#' slot (default is TRUE). +#' @param verbose logical, whether to print status messages during object conversion (default is TRUE). +#' +#' +#' @method as.LIGER list +#' +#' @concept object_conversion +#' +#' @import cli +#' @import Seurat +#' @importFrom dplyr left_join join_by select any_of bind_rows union intersect +#' @importFrom magrittr "%>%" +#' @importFrom stringr str_to_lower +#' @importFrom tibble rownames_to_column column_to_rownames +#' @importFrom utils packageVersion +#' +#' @export +#' @rdname as.LIGER +#' +#' @examples +#' \dontrun{ +#' liger_object <- as.LIGER(x = seurat_object_list) +#' } +#' + +as.LIGER.list <- function( + x, + group.by = "orig.ident", + dataset_names = NULL, + assay = "RNA", + remove_missing = FALSE, + renormalize = TRUE, + use_seurat_var_genes = FALSE, + var_genes_method = "intersect", + keep_meta = TRUE, + verbose = TRUE, + ... +) { + # temp liger version check + if (packageVersion(pkg = 'rliger') > "1.0.1") { + cli_abort(message = c("Liger functionality is currently restricted to rliger v1.0.1 or lower.", + "i" = "Functionality with rliger v2+ is currently in development.")) + } + + # Check Seurat + seurat_check <- unlist(lapply(x, function(x) { + inherits(x = x, what = "Seurat") + })) + + if (any(seurat_check) == "FALSE") { + cli_abort(message = "One or more of items in list are not Seurat Objects.") + } + + # Run update to ensure functionality + if (isTRUE(x = verbose)) { + cli_inform(message = c("*" = "Checking Seurat object validity")) + } + + x <- lapply(x, function(y) { + suppressMessages(UpdateSeuratObject(object = y)) + }) + + # Check Assay5 for multiple layers + for (i in x) { + if (isTRUE(x = Assay5_Check(seurat_object = i, assay = assay))) { + layers_check <- Layers(object = i, search = "counts") + if (length(x = layers_check) > 1) { + cli_abort(message = c("Multiple layers containing raw counts present {.field {head(x = layers_check, n = 2)}}.", + "i" = "Please run {.code JoinLayers} before converting to LIGER object.")) + } + } + } + + # Check meta data + if (is.null(x = dataset_names)) { + for (j in x) { + group.by <- Meta_Present(object = j, meta_col_names = group.by, omit_warn = FALSE, print_msg = FALSE, return_none = TRUE)[[1]] + + # stop if none found + if (length(x = group.by) == 0) { + cli_abort(message = c("{.code group.by} was not found in all objects in list.", + "i" = "All objects must contain column in meta.data named: {.val {group.by}}.") + ) + } + } + + } else { + if (length(x = dataset_names) != length(x = x)) { + cli_abort(message = "The number of {.code dataset_names} provided ({.field {length(x = dataset_names)}}) does not match number of Seurat objects in list ({.field {length(x = x)}}).") + } + } + + # Check & Set Assay + for (k in x) { + if (!assay %in% Assays(object = k)) { + cli_abort(message = "Provided assay {.field {assay}} not found in all Seurat objects in list.") + } + } + + for (l in x) { + if (assay != DefaultAssay(object = l)) { + cli_inform(c("*" = "Changing object DefaultAssay from ({.field {DefaultAssay(object = x)}}) to provided assay ({.field {assay}}).")) + DefaultAssay(l) <- assay + } + } + + if (isTRUE(x = use_seurat_var_genes)) { + var_genes <- lapply(x, function(z) { + VariableFeatures(object = z) + }) + + for (m in var_genes) { + if (!length(x = m) > 0) { + cli_abort(message ="{.code use_seurat_var_genes = TRUE}, but not all objects in list have variable features.") + } + } + + var_genes_method <- str_to_lower(string = var_genes_method) + if (!var_genes_method %in% c("intersect", "union")) { + cli_abort(message = "{.code var_genes_method} must be either {.field intersect} or {.field union}.") + } + + if (var_genes_method == "union") { + var_genes <- reduce(var_genes, function(a, b) { + union(x = a, y = b)}) + } + if (var_genes_method == "intersect") { + var_genes <- reduce(var_genes, function(c, d) { + intersect(x = c, y = d) + }) + } + } + + # Get raw data & cells + raw_data_list <- lapply(x, function(e){ + counts_layer <- Layers(object = e, search = "counts") + LayerData(object = e, layer = counts_layer) + }) + + if (is.null(x = dataset_names)) { + group_names <- unique(x = sapply(1:length(x = x), function(f) { + obj_meta <- Fetch_Meta(object = x[[f]]) %>% + select(any_of(group.by)) %>% + unique() + if (length(x = obj_meta) > 1) { + cli_abort(message = c("Some objects in list have multiple values within the {.field {group.by}} column.", + "i" = "This column must only contain one value per object")) + } + })) + + if (length(x = group_names) != length(x = x)) { + cli_abort(message = c("Some objects in list have the same values within the {.field {group.by}} column.", + "i" = "All objects must have unique value in this column.")) + } + + names(x = raw_data_list) <- group_names + } else { + names(x = raw_data_list) <- dataset_names + } + + + # Create LIGER Object + if (isTRUE(x = verbose)) { + cli_inform(message = c("*" = "Creating LIGER object.")) + } + + liger_object <- rliger::createLiger(raw.data = raw_data_list, remove.missing = remove_missing) + + if (isTRUE(x = renormalize)) { + if (isTRUE(x = verbose)) { + cli_inform(message = c("*" = "Normalizing data.")) + } + liger_object <- rliger::normalize(object = liger_object, remove.missing = remove_missing) + } + + # Add var genes + if (isTRUE(x = use_seurat_var_genes)) { + liger_object@var.genes <- var_genes + } + + # transfer meta + if (isTRUE(x = keep_meta)) { + # extract meta data from seurat object + seurat_meta <- lapply(x, function(g) { + obj_meta <- Fetch_Meta(object = g) %>% + select(-any_of(c("nFeature_RNA", "nCount_RNA"))) + }) + + seurat_meta <- bind_rows(seurat_meta) %>% + rownames_to_column("barcodes") + + # pull current liger meta + liger_meta <- Fetch_Meta(object = liger_object) %>% + rownames_to_column("barcodes") + + # join meta + new_liger_meta <- suppressMessages(left_join(x = liger_meta, y = seurat_meta, by = join_by("barcodes"))) %>% + column_to_rownames("barcodes") + + # Add to LIGER object + liger_object@cell.data <- new_liger_meta + } + + # return object + return(liger_object) +} + + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#################### CONVERT TO SEURAT #################### +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +#' Convert objects to \code{Seurat} objects +#' +#' Merges raw.data and scale.data of object, and creates Seurat object with these values along with slots +#' containing dimensionality reduction coordinates, iNMF factorization, and cluster assignments. +#' Supports Seurat V3/4 and V4. +#' +#' Stores original dataset identity by default in new object metadata if dataset names are passed +#' in nms. iNMF factorization is stored in dim.reduction object with key "iNMF". +#' +#' @param x \code{liger} object. +#' @param nms By default, labels cell names with dataset of origin (this is to account for cells in +#' different datasets which may have same name). Other names can be passed here as vector, must have +#' same length as the number of datasets. (default names(H)). +#' @param renormalize Whether to log-normalize raw data using Seurat defaults (default TRUE). +#' @param use.liger.genes Whether to carry over variable genes (default TRUE). +#' @param by.dataset Include dataset of origin in cluster identity in Seurat object (default FALSE). +#' @param keep_meta logical. Whether to transfer additional metadata (nGene/nUMI/dataset already transferred) +#' to new Seurat Object. Default is TRUE. +#' @param reduction_label Name of dimensionality reduction technique used. Enables accurate transfer +#' or name to Seurat object instead of defaulting to "tSNE". +#' @param seurat_assay Name to set for assay in Seurat Object. Default is "RNA". +#' @param assay_type what type of Seurat assay to create in new object (Assay vs Assay5). +#' Default is NULL which will default to the current user settings. +#' See \code{\link{Convert_Assay}} parameter `convert_to` for acceptable values. +#' @param add_barcode_names logical, whether to add dataset names to the cell barcodes when +#' creating Seurat object, default is FALSE. +#' @param barcode_prefix logical, if `add_barcode_names = TRUE` should the names be added as +#' prefix to current cell barcodes/names or a suffix (default is TRUE; prefix). +#' @param barcode_cell_id_delimiter The delimiter to use when adding dataset id to barcode +#' prefix/suffix. Default is "_". +#' @param ... unused. +#' +#' @return Seurat object with raw.data, scale.data, reduction_label, iNMF, and ident slots set. +#' +#' @references Original function is part of LIGER package \url{https://github.com/welch-lab/liger} (Licence: GPL-3). +#' Function was modified for use in scCustomize with additional parameters/functionality. +#' +#' @method as.Seurat liger +#' @return Seurat object. +#' +#' @concept object_conversion +#' +#' @import cli +#' @import Matrix +#' @import Seurat +#' @importFrom dplyr any_of pull select +#' @importFrom magrittr "%>%" +#' @importFrom methods as new +#' @importFrom utils packageVersion +#' +#' @export +#' @rdname as.Seurat +#' +#' @examples +#' \dontrun{ +#' seurat_object <- as.Seurat(x = liger_object) +#' } +#' + +as.Seurat.liger <- function( + x, + nms = names(x@H), + renormalize = TRUE, + use.liger.genes = TRUE, + by.dataset = FALSE, + keep_meta = TRUE, + reduction_label = "UMAP", + seurat_assay = "RNA", + assay_type = NULL, + add_barcode_names = FALSE, + barcode_prefix = TRUE, + barcode_cell_id_delimiter = "_", + ... +) { + # temp liger version check + if (packageVersion(pkg = 'rliger') > "1.0.1") { + cli_abort(message = c("Liger functionality is currently restricted to rliger v1.0.1 or lower.", + "i" = "Functionality with rliger v2+ is currently in development.")) + } + + if (is.null(x = reduction_label)) { + cli_abort(message = c("{.code reduction_label} parameter was not set.", + "*" = "LIGER objects do not store name of dimensionality reduction technique used.", + "i" = "In order to retain proper labels in Seurat object please set {.code reduction_label} to {.val tSNE}, {.val UMAP}, {.val etc}.")) + } + + # Adjust name for dimreduc key + key_name <- paste0(reduction_label, "_") + + # adjust raw data slot if needed + if (!inherits(x = x@raw.data[[1]], what = 'dgCMatrix')) { + x@raw.data <- lapply(x@raw.data, as, Class = "CsparseMatrix") + } + + # check assay_type is ok + if (!is.null(x = assay_type)) { + # Check accepted + accepted_V3 <- c("Assay", "assay", "V3", "v3") + accepted_V5 <- c("Assay5", "assay5", "V5", "v5") + + if (!convert_to %in% c(accepted_V5, accepted_V3)) { + cli_abort(message = c("Value provided to {.code convert_to} ({.field {convert_to}}) was not accepted value.", + "i" = "Accepted values to convert to V3/4 are: {.field {accepted_V3}}", + "i" = "Accepted values to convert to V5 are: {.field {accepted_V5}}")) + } + + # set assay value + if (convert_to %in% accepted_V5) { + if (packageVersion(pkg = 'Seurat') < "5") { + cli_abort(message = "Seurat version must be v5.0.0 or greater to create To create Assay5.") + } + + convert_to <- "v5" + } + + if (convert_to %in% accepted_V3) { + convert_to <- "v3" + } + } + + # merge raw data + if (isTRUE(x = add_barcode_names)) { + raw.data <- Merge_Sparse_Data_All(matrix_list = x@raw.data, add_cell_ids = nms, prefix = barcode_prefix, cell_id_delimiter = barcode_cell_id_delimiter) + } else { + raw.data <- Merge_Sparse_Data_All(matrix_list = x@raw.data) + } + + # create object + new.seurat <- CreateSeuratObject(counts = raw.data, assay = seurat_assay) + + # normalize data + if (isTRUE(x = renormalize)) { + new.seurat <- Seurat::NormalizeData(new.seurat) + } else { + if (length(x = x@norm.data) > 0) { + if (isTRUE(x = add_barcode_names)) { + norm.data <- Merge_Sparse_Data_All(matrix_list = x@norm.data, add_cell_ids = nms, prefix = barcode_prefix, cell_id_delimiter = barcode_cell_id_delimiter) + } else { + norm.data <- Merge_Sparse_Data_All(matrix_list = x@norm.data) + } + + new.seurat <- SetAssayData(object = new.seurat, layer = "data", slot = "data", new.data = norm.data) + } + } + + if (length(x = x@var.genes) > 0 && isTRUE(x = use.liger.genes)) { + VariableFeatures(object = new.seurat) <- x@var.genes + } + if (length(x = x@scale.data) > 0) { + scale.data <- t(x = Reduce(rbind, x@scale.data)) + colnames(x = scale.data) <- colnames(x = raw.data) + new.seurat <- SetAssayData(object = new.seurat, layer = "scale.data", slot = "scale.data", new.data = scale.data) + } + + + if (all(dim(x = x@W) > 0) && all(dim(x = x@H.norm) > 0)) { + inmf.loadings <- t(x = x@W) + rinmf.loadings <- inmf.loadings + + dimnames(x = inmf.loadings) <- list(x@var.genes, + paste0("iNMF_", seq_len(ncol(inmf.loadings)))) + dimnames(x = rinmf.loadings) <- list(x@var.genes, + paste0("rawiNMF_", seq_len(ncol(rinmf.loadings)))) + + inmf.embeddings <- x@H.norm + rinmf.embeddings <- do.call(what = 'rbind', args = x@H) + + dimnames(x = inmf.embeddings) <- list(unlist(x = lapply(x@scale.data, rownames), use.names = FALSE), + paste0("iNMF_", seq_len(ncol(inmf.loadings)))) + dimnames(x = rinmf.embeddings) <- list(unlist(x = lapply(x@scale.data, rownames), use.names = FALSE), + paste0("rawiNMF_", seq_len(ncol(x = inmf.loadings)))) + + + inmf.obj <- CreateDimReducObject( + embeddings = inmf.embeddings, + loadings = inmf.embeddings, + assay = seurat_assay, + global = TRUE, + key = "iNMF_" + ) + new.seurat[["iNMF"]] <- inmf.obj + + rinmf.obj <- CreateDimReducObject( + embeddings = rinmf.embeddings, + loadings = rinmf.loadings, + key = "rawiNMF_", + global = TRUE, + assay = seurat_assay + ) + } + + + if (all(dim(x = x@tsne.coords) > 0)) { + dimreduc.embeddings <- x@tsne.coords + dimnames(x = dimreduc.embeddings) <- list(rownames(x@H.norm), + paste0(key_name, 1:2)) + + dimreduc.obj <- CreateDimReducObject( + embeddings = dimreduc.embeddings, + assay = seurat_assay, + global = TRUE, + key = key_name + ) + new.seurat[[reduction_label]] <- dimreduc.obj + } + + new.seurat$orig.ident <- x@cell.data$dataset + + idents <- x@clusters + + if (length(x = idents) == 0 || isTRUE(x = by.dataset)) idents <- x@cell.data$dataset + Idents(object = new.seurat) <- idents + + # transfer meta + if (isTRUE(x = keep_meta)) { + # extract meta data from liger object + liger_meta <- Fetch_Meta(object = x) + # remove meta data values already transferred + liger_meta <- liger_meta %>% + select(-any_of(c("nUMI", "nGene", "dataset"))) + # extract meta data names + meta_names <- colnames(x = liger_meta) + # add meta data to new seurat object + for (meta_var in meta_names){ + meta_transfer <- liger_meta %>% + pull(meta_var) + names(x = meta_transfer) <- colnames(x = new.seurat) + new.seurat <- AddMetaData(object = new.seurat, + metadata = meta_transfer, + col.name = meta_var) + } + } + + + if (!is.null(x = assay_type)) { + options_list <- options() + if (options_list$Seurat.object.assay.version != convert_to) { + new.seurat <- Convert_Assay(seurat_object = new.seurat, convert_to = convert_to) + } + } + + # return object + return(new.seurat) +} + + +#' Create a Seurat object containing the data from a liger object `r lifecycle::badge("soft-deprecated")` +#' +#' Merges raw.data and scale.data of object, and creates Seurat object with these values along with +#' tsne.coords, iNMF factorization, and cluster assignments. Supports Seurat V2 and V3. +#' +#' Stores original dataset identity by default in new object metadata if dataset names are passed +#' in nms. iNMF factorization is stored in dim.reduction object with key "iNMF". +#' +#' @param liger_object \code{liger} object. +#' @param nms By default, labels cell names with dataset of origin (this is to account for cells in +#' different datasets which may have same name). Other names can be passed here as vector, must have +#' same length as the number of datasets. (default names(H)). +#' @param renormalize Whether to log-normalize raw data using Seurat defaults (default TRUE). +#' @param use.liger.genes Whether to carry over variable genes (default TRUE). +#' @param by.dataset Include dataset of origin in cluster identity in Seurat object (default FALSE). +#' @param keep_meta logical. Whether to transfer additional metadata (nGene/nUMI/dataset already transferred) +#' to new Seurat Object. Default is TRUE. +#' @param reduction_label Name of dimensionality reduction technique used. Enables accurate transfer +#' or name to Seurat object instead of defaulting to "tSNE". +#' @param seurat_assay Name to set for assay in Seurat Object. Default is "RNA". +#' @param assay_type what type of Seurat assay to create in new object (Assay vs Assay5). +#' Default is NULL which will default to the current user settings. +#' See \code{\link{Convert_Assay}} parameter `convert_to` for acceptable values. +#' @param add_barcode_names logical, whether to add dataset names to the cell barcodes when +#' creating Seurat object, default is FALSE. +#' @param barcode_prefix logical, if `add_barcode_names = TRUE` should the names be added as +#' prefix to current cell barcodes/names or a suffix (default is TRUE; prefix). +#' @param barcode_cell_id_delimiter The delimiter to use when adding dataset id to barcode +#' prefix/suffix. Default is "_". +#' +#' @return Seurat object with raw.data, scale.data, reduction_label, iNMF, and ident slots set. +#' +#' @references Original function is part of LIGER package \url{https://github.com/welch-lab/liger} (Licence: GPL-3). +#' Function was slightly modified for use in scCustomize with keep.meta parameter. Also posted as +#' PR to liger GitHub. +#' +#' @import cli +#' @import Matrix +#' @importFrom dplyr any_of pull select +#' @importFrom magrittr "%>%" +#' @importFrom methods as new +#' @importFrom utils packageVersion +#' +#' @export +#' +#' @concept object_conversion +#' +#' @examples +#' \dontrun{ +#' seurat_object <- Liger_to_Seurat(liger_object = LIGER_OBJ, reduction_label = "UMAP") +#' } + +Liger_to_Seurat <- function( + liger_object, + nms = names(liger_object@H), + renormalize = TRUE, + use.liger.genes = TRUE, + by.dataset = FALSE, + keep_meta = TRUE, + reduction_label = "UMAP", + seurat_assay = "RNA", + assay_type = NULL, + add_barcode_names = FALSE, + barcode_prefix = TRUE, + barcode_cell_id_delimiter = "_" +) { + lifecycle::deprecate_soft(when = "2.1.0", + what = "Liger_to_Seurat()", + with = "as.Seurat()", + details = c("i" = "Please adjust code now to prepare for full deprecation.") + ) + + + + if (is.null(x = reduction_label)) { + cli_abort(message = c("{.code reduction_label} parameter was not set.", + "*" = "LIGER objects do not store name of dimensionality reduction technique used.", + "i" = "In order to retain proper labels in Seurat object please set {.code reduction_label} to {.val tSNE}, {.val UMAP}, {.val etc}.")) + } + + # Adjust name for dimreduc key + key_name <- paste0(reduction_label, "_") + + # adjust raw data slot if needed + if (!inherits(x = liger_object@raw.data[[1]], what = 'dgCMatrix')) { + liger_object@raw.data <- lapply(liger_object@raw.data, as, Class = "CsparseMatrix") + } + + # check assay_type is ok + if (!is.null(x = assay_type)) { + # Check accepted + accepted_V3 <- c("Assay", "assay", "V3", "v3") + accepted_V5 <- c("Assay5", "assay5", "V5", "v5") + + if (!convert_to %in% c(accepted_V5, accepted_V3)) { + cli_abort(message = c("Value provided to {.code convert_to} ({.field {convert_to}}) was not accepted value.", + "i" = "Accepted values to convert to V3/4 are: {.field {accepted_V3}}", + "i" = "Accepted values to convert to V5 are: {.field {accepted_V5}}")) + } + + # set assay value + if (convert_to %in% accepted_V5) { + if (packageVersion(pkg = 'Seurat') < "5") { + cli_abort(message = "Seurat version must be v5.0.0 or greater to create To create Assay5.") + } + + convert_to <- "v5" + } + + if (convert_to %in% accepted_V3) { + convert_to <- "v3" + } + } + + # merge raw data + if (isTRUE(x = add_barcode_names)) { + raw.data <- Merge_Sparse_Data_All(matrix_list = liger_object@raw.data, add_cell_ids = nms, prefix = barcode_prefix, cell_id_delimiter = barcode_cell_id_delimiter) + } else { + raw.data <- Merge_Sparse_Data_All(matrix_list = liger_object@raw.data) + } + + # create object + new.seurat <- CreateSeuratObject(counts = raw.data, assay = seurat_assay) + + # normalize data + if (isTRUE(x = renormalize)) { + new.seurat <- Seurat::NormalizeData(new.seurat) + } else { + if (length(x = liger_object@norm.data) > 0) { + if (isTRUE(x = add_barcode_names)) { + norm.data <- Merge_Sparse_Data_All(matrix_list = liger_object@norm.data, add_cell_ids = nms, prefix = barcode_prefix, cell_id_delimiter = barcode_cell_id_delimiter) + } else { + norm.data <- Merge_Sparse_Data_All(matrix_list = liger_object@norm.data) + } + + new.seurat <- SetAssayData(object = new.seurat, layer = "data", slot = "data", new.data = norm.data) + } + } + + if (length(x = liger_object@var.genes) > 0 && isTRUE(x = use.liger.genes)) { + VariableFeatures(object = new.seurat) <- liger_object@var.genes + } + if (length(x = liger_object@scale.data) > 0) { + scale.data <- t(x = Reduce(rbind, liger_object@scale.data)) + colnames(x = scale.data) <- colnames(x = raw.data) + new.seurat <- SetAssayData(object = new.seurat, layer = "scale.data", slot = "scale.data", new.data = scale.data) + } + + + if (all(dim(x = liger_object@W) > 0) && all(dim(x = liger_object@H.norm) > 0)) { + inmf.loadings <- t(x = liger_object@W) + rinmf.loadings <- inmf.loadings + + dimnames(x = inmf.loadings) <- list(liger_object@var.genes, + paste0("iNMF_", seq_len(ncol(inmf.loadings)))) + dimnames(x = rinmf.loadings) <- list(liger_object@var.genes, + paste0("rawiNMF_", seq_len(ncol(rinmf.loadings)))) + + inmf.embeddings <- liger_object@H.norm + rinmf.embeddings <- do.call(what = 'rbind', args = liger_object@H) + + dimnames(x = inmf.embeddings) <- list(unlist(x = lapply(liger_object@scale.data, rownames), use.names = FALSE), + paste0("iNMF_", seq_len(ncol(inmf.loadings)))) + dimnames(x = rinmf.embeddings) <- list(unlist(x = lapply(liger_object@scale.data, rownames), use.names = FALSE), + paste0("rawiNMF_", seq_len(ncol(x = inmf.loadings)))) + + + inmf.obj <- CreateDimReducObject( + embeddings = inmf.embeddings, + loadings = inmf.embeddings, + assay = seurat_assay, + global = TRUE, + key = "iNMF_" + ) + new.seurat[["iNMF"]] <- inmf.obj + + rinmf.obj <- CreateDimReducObject( + embeddings = rinmf.embeddings, + loadings = rinmf.loadings, + key = "rawiNMF_", + global = TRUE, + assay = seurat_assay + ) + } + + + if (all(dim(x = liger_object@tsne.coords) > 0)) { + dimreduc.embeddings <- liger_object@tsne.coords + dimnames(x = dimreduc.embeddings) <- list(rownames(liger_object@H.norm), + paste0(key_name, 1:2)) + + dimreduc.obj <- CreateDimReducObject( + embeddings = dimreduc.embeddings, + assay = seurat_assay, + global = TRUE, + key = key_name + ) + new.seurat[[reduction_label]] <- dimreduc.obj + } + + new.seurat$orig.ident <- liger_object@cell.data$dataset + + idents <- liger_object@clusters + + if (length(x = idents) == 0 || isTRUE(x = by.dataset)) idents <- liger_object@cell.data$dataset + Idents(object = new.seurat) <- idents + + # transfer meta + if (isTRUE(x = keep_meta)) { + # extract meta data from liger object + liger_meta <- Fetch_Meta(object = liger_object) + # remove meta data values already transferred + liger_meta <- liger_meta %>% + select(-any_of(c("nUMI", "nGene", "dataset"))) + # extract meta data names + meta_names <- colnames(x = liger_meta) + # add meta data to new seurat object + for (meta_var in meta_names){ + meta_transfer <- liger_meta %>% + pull(meta_var) + names(x = meta_transfer) <- colnames(x = new.seurat) + new.seurat <- AddMetaData(object = new.seurat, + metadata = meta_transfer, + col.name = meta_var) + } + } + + + if (!is.null(x = assay_type)) { + options_list <- options() + if (options_list$Seurat.object.assay.version != convert_to) { + new.seurat <- Convert_Assay(seurat_object = new.seurat, convert_to = convert_to) + } + } + + # return object + return(new.seurat) +} + + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#################### CONVERT TO ANNDATA #################### +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +#' Create & Save Anndata Object +#' +#' @param file_path directory file path and/or file name prefix. Defaults to current wd. +#' @param file_name file name. +#' @param assay Assay containing data to use, (default is "RNA"). +#' @param main_layer the layer of data to become default layer in anndata object (default is "data"). +#' @param other_layers other data layers to transfer to anndata object (default is "counts"). +#' @param transer_dimreduc logical, whether to transfer dimensionality reduction coordinates from +#' Seurat to anndata object (default is TRUE). +#' @param verbose logical, whether to print status messages during object conversion (default is TRUE). +#' +#' +#' @references Seurat version modified and enhanced version of `sceasy::seurat2anndata` (sceasy package: \url{https://github.com/cellgeni/sceasy}; License: GPL-3. Function has additional checks and supports Seurat V3 and V5 object structure. +#' +#' @method as.anndata Seurat +#' +#' @concept object_conversion +#' +#' @import cli +#' @import Seurat +#' @importFrom stringr str_to_lower +#' +#' @export +#' @rdname as.anndata +#' +#' @examples +#' \dontrun{ +#' as.anndata(x = seurat_object, file_path = "/folder_name", file_name = "anndata_converted.h5ad") +#' } +#' + +as.anndata.Seurat <- function( + x, + file_path, + file_name, + assay = "RNA", + main_layer = "data", + other_layers = "counts", + transer_dimreduc = TRUE, + verbose = TRUE, + ... +) { + # Check reticulate installed + reticulate_check <- is_installed(pkg = "reticulate") + if (isFALSE(x = reticulate_check)) { + cli_abort(message = c( + "Please install the {.val reticulate} package to use {.code as.anndata}.", + "i" = "This can be accomplished with the following commands: ", + "----------------------------------------", + "{.field `install.packages({symbol$dquote_left}reticulate{symbol$dquote_right})`}", + "----------------------------------------" + )) + } + + # Set file_path before path check if current dir specified as opposed to leaving set to NULL + if (!is.null(x = file_path) && file_path == "") { + file_path <- NULL + } + + # Check file path is valid + if (!is.null(x = file_path)) { + if (!dir.exists(paths = file_path)) { + cli_abort(message = "Provided {.code file_path}: {symbol$dquote_left}{.field {file_path}}{symbol$dquote_right} does not exist.") + } + } + + # Check if file name provided + if (is.null(x = file_name)) { + cli_abort(message = "No file name provided. Please provide a file name using {.code file_name}.") + } + + file_ext <- grep(x = file_name, pattern = ".h5ad$") + + if (length(x = file_ext) == 0) { + file_name <- paste0(file_name, ".h5ad") + } + + if (!is.null(x = file_path)) { + norm_path <- normalizePath(path = file_path) + full_path_name <- file.path(norm_path, file_name) + } else { + full_path_name <- file.path(file_name) + } + + # Run update to ensure functionality + if (isTRUE(x = verbose)) { + cli_inform(message = c("*" = "Checking Seurat object validity & Extracting Data")) + } + + # Check Seurat + Is_Seurat(seurat_object = x) + + # Run update to ensure functionality + x <- suppressMessages(UpdateSeuratObject(object = x)) + + # Check Assay5 for multiple layers + if (isTRUE(x = Assay5_Check(seurat_object = x, assay = assay))) { + layers_check <- Layers(object = x, search = main_layer) + if (length(x = layers_check) > 1) { + cli_abort(message = c("Multiple data layers present {.field {head(x = layers_check, n = 2)}}.", + "i" = "Please run {.code JoinLayers} before converting to anndata object.")) + } + } + + main_approved_slots <- Layers(object = x, search = c("counts", "data")) + + if (!main_layer %in% main_approved_slots) { + cli_abort(message = "{.code main_layer} must be one of {.field {main_approved_slots}}") + } + + if (main_layer %in% other_layers) { + cli_abort(message = "{.code main_layer} and {.code other_layers} cannot overlap.") + } + + if (isFALSE(x = all(other_layers %in% Layers(object = x)))) { + cli_abort(message = "One or more of {.field {other_layers}} were not found in Seurat object.") + } + + # Extract Data + main_layer_data <- LayerData(object = x, assay = assay, layer = main_layer) + + meta_data <- Fetch_Meta(object = x) + + meta_data <- drop_single_value_cols(df = meta_data) + + if (isTRUE(x = Assay5_Check(seurat_object = x, assay = assay))) { + seurat_var_info <- drop_single_value_cols(df = x[[assay]]@meta.data) + } else { + seurat_var_info <- drop_single_value_cols(df = x[[assay]]@meta.features) + } + + if (isTRUE(x = transer_dimreduc)) { + dim_reducs_present <- Reductions(object = x) + if (length(x = dim_reducs_present) > 0) { + dim_reducs_list <- lapply(dim_reducs_present, function(z) { + as.matrix(x = Embeddings(object = x, reduction = z)) + }) + names(x = dim_reducs_list) <- paste0("X_", str_to_lower(string = dim_reducs_present)) + } else { + dim_reducs_present <- NULL + } + } else { + dim_reducs_present <- NULL + } + + if (length(x = other_layers) > 0) { + other_layers_list <- lapply(other_layers, function(i) { + Matrix::t(LayerData(object = x, layer = i, assay = assay)) + }) + names(x = other_layers_list) <- other_layers + } else { + other_layers_list <- list() + } + + # convert + if (isTRUE(x = verbose)) { + cli_inform(message = c("*" = "Creating anndata object.")) + } + anndata <- reticulate::import("anndata", convert = FALSE) + + adata <- anndata$AnnData( + X = Matrix::t(main_layer_data), + obs = meta_data, + var = seurat_var_info, + obsm = dim_reducs_list, + layers = other_layers_list + ) + + if (isTRUE(x = verbose)) { + cli_inform(message = c("*" = "Writing anndata file: {.val {full_path_name}}")) + } + adata$write(full_path_name, compression = "gzip") + + adata +} + + +#' Create & Save Anndata Object +#' +#' @param file_path directory file path and/or file name prefix. Defaults to current wd. +#' @param file_name file name. +#' @param transfer_norm.data logical, whether to transfer the norm.data in addition to +#' raw.data, default is FALSE. +#' @param reduction_label What to label the visualization dimensionality reduction. +#' LIGER does not store name of technique and therefore needs to be set manually. +#' @param add_barcode_names logical, whether to add dataset names to the cell barcodes when +#' merging object data, default is FALSE. +#' @param barcode_prefix logical, if `add_barcode_names = TRUE` should the names be added as +#' prefix to current cell barcodes/names or a suffix (default is TRUE; prefix). +#' @param barcode_cell_id_delimiter The delimiter to use when adding dataset id to barcode +#' prefix/suffix. Default is "_". +#' @param verbose logical, whether to print status messages during object conversion (default is TRUE). +#' +#' @references LIGER version inspired by `sceasy::seurat2anndata` modified and updated to apply to LIGER objects (sceasy package: \url{https://github.com/cellgeni/sceasy}; License: GPL-3. +#' +#' @method as.anndata liger +#' +#' @concept object_conversion +#' +#' @import cli +#' @importFrom dplyr mutate +#' @importFrom magrittr "%>%" +#' @importFrom stringr str_to_lower +#' @importFrom tibble column_to_rownames +#' @importFrom utils packageVersion +#' +#' @export +#' @rdname as.anndata +#' +#' @examples +#' \dontrun{ +#' as.anndata(x = liger_object, file_path = "/folder_name", file_name = "anndata_converted.h5ad") +#' } +#' + +as.anndata.liger <- function( + x, + file_path, + file_name, + transfer_norm.data = FALSE, + reduction_label = NULL, + add_barcode_names = FALSE, + barcode_prefix = TRUE, + barcode_cell_id_delimiter = "_", + verbose = TRUE, + ... +) { + # temp liger version check + if (packageVersion(pkg = 'rliger') > "1.0.1") { + cli_abort(message = c("Liger functionality is currently restricted to rliger v1.0.1 or lower.", + "i" = "Functionality with rliger v2+ is currently in development.")) + } + + # Check reticulate installed + reticulate_check <- is_installed(pkg = "reticulate") + if (isFALSE(x = reticulate_check)) { + cli_abort(message = c( + "Please install the {.val reticulate} package to use {.code as.anndata}.", + "i" = "This can be accomplished with the following commands: ", + "----------------------------------------", + "{.field `install.packages({symbol$dquote_left}reticulate{symbol$dquote_right})`}", + "----------------------------------------" + )) + } + + # Check all barcodes are unique to begin with + duplicated_barcodes <- x@raw.data %>% + lapply(colnames) %>% + unlist() %>% + duplicated() %>% + any() + + if (isTRUE(x = duplicated_barcodes) && is.null(x = add_barcode_names)) { + cli_abort(message = c("There are overlapping cell barcodes present in the input data", + "i" = "Please set {.code add_barcode_names = TRUE} to make all cell barcodes unique.") + ) + } + + if (is.null(x = reduction_label)) { + cli_abort(message = c("{.code reduction_label} parameter was not set.", + "*" = "LIGER objects do not store name of dimensionality reduction technique used.", + "i" = "In order to retain proper labels in Seurat object please set {.code reduction_label} to {.val tSNE}, {.val UMAP}, {.val etc}.")) + } + + # Set file_path before path check if current dir specified as opposed to leaving set to NULL + if (!is.null(x = file_path) && file_path == "") { + file_path <- NULL + } + + # Check file path is valid + if (!is.null(x = file_path)) { + if (!dir.exists(paths = file_path)) { + cli_abort(message = "Provided {.code file_path}: {symbol$dquote_left}{.field {file_path}}{symbol$dquote_right} does not exist.") + } + } + + # Check if file name provided + if (is.null(x = file_name)) { + cli_abort(message = "No file name provided. Please provide a file name using {.code file_name}.") + } + + file_ext <- grep(x = file_name, pattern = ".h5ad$") + + if (length(x = file_ext) == 0) { + file_name <- paste0(file_name, ".h5ad") + } + + if (!is.null(x = file_path)) { + norm_path <- normalizePath(path = file_path) + full_path_name <- file.path(norm_path, file_name) + } else { + full_path_name <- file.path(file_name) + } + + if (isTRUE(x = verbose)) { + cli_inform(message = c("*" = "Creating main layer from {.field raw.data}")) + } + if (isTRUE(x = add_barcode_names)) { + nms <- names(x = x@H) + main_layer_data <- Merge_Sparse_Data_All(matrix_list = x@raw.data, add_cell_ids = nms, prefix = barcode_prefix, cell_id_delimiter = barcode_cell_id_delimiter) + } else { + main_layer_data <- Merge_Sparse_Data_All(matrix_list = x@raw.data) + } + + # merge norm data + if (isTRUE(x = transfer_norm.data)) { + cli_inform(message = c("*" = "Creating other layer from {.field norm.data}")) + if (isTRUE(x = add_barcode_names)) { + nms <- names(x = x@H) + norm_data <- Merge_Sparse_Data_All(matrix_list = x@norm.data, add_cell_ids = nms, prefix = barcode_prefix, cell_id_delimiter = barcode_cell_id_delimiter) + } else { + norm_data <- Merge_Sparse_Data_All(matrix_list = x@norm.data) + } + + other_layers <- list("norm.data" = Matrix::t(norm_data) + ) + } else { + other_layers <- list() + } + + # pull var genes + liger_var_genes <- x@var.genes + total_features <- data.frame("all_genes" = LIGER_Features(liger_object = x)) + + liger_var_df <- total_features %>% + mutate("variable_genes" = ifelse(.data[["all_genes"]] %in% liger_var_genes, .data[["all_genes"]], NA)) %>% + column_to_rownames("all_genes") + + # Prep reductions + if (all(dim(x = x@W) > 0) && all(dim(x = x@H.norm) > 0)) { + inmf.loadings <- Matrix::t(x = x@W) + rinmf.loadings <- inmf.loadings + + dimnames(x = inmf.loadings) <- list(x@var.genes, + paste0("iNMF_", seq_len(ncol(inmf.loadings)))) + dimnames(x = rinmf.loadings) <- list(x@var.genes, + paste0("rawiNMF_", seq_len(ncol(rinmf.loadings)))) + + inmf.embeddings <- x@H.norm + rinmf.embeddings <- do.call(what = 'rbind', args = x@H) + + dimnames(x = inmf.embeddings) <- list(unlist(x = lapply(x@scale.data, rownames), use.names = FALSE), + paste0("iNMF_", seq_len(ncol(inmf.loadings)))) + dimnames(x = rinmf.embeddings) <- list(unlist(x = lapply(x@scale.data, rownames), use.names = FALSE), + paste0("rawiNMF_", seq_len(ncol(x = inmf.loadings)))) + + inmf.obj <- CreateDimReducObject( + embeddings = inmf.embeddings, + loadings = inmf.embeddings, + global = TRUE, + key = "iNMF_" + ) + + inmf_anndata <- as.matrix(x = Embeddings(object = inmf.obj)) + + rinmf.obj <- CreateDimReducObject( + embeddings = rinmf.embeddings, + loadings = rinmf.loadings, + key = "rawiNMF_", + global = TRUE + ) + + rinmf_anndata <- as.matrix(x = Embeddings(object = rinmf.obj)) + } + + # prep visualization reduction + dimreduc.embeddings <- x@tsne.coords + dimnames(x = dimreduc.embeddings) <- list(rownames(x@H.norm), + paste0(reduction_label, 1:2)) + + # create reducs list + reducs <- list(inmf_anndata, + rinmf_anndata, + dimreduc.embeddings) + + names(x = reducs) <- paste0("X_", str_to_lower(c("inmf", "rinmf", reduction_label))) + + # get meta and drop single value columns + liger_meta <- Fetch_Meta(object = x) + + liger_meta <- drop_single_value_cols(df = liger_meta) + + # Create anndata + if (isTRUE(x = verbose)) { + cli_inform(message = c("*" = "Creating anndata object.")) + } + anndata <- reticulate::import("anndata", convert = FALSE) + + adata <- anndata$AnnData( + X = Matrix::t(main_layer_data), + obs = liger_meta, + var = liger_var_genes, + obsm = reducs, + layers = other_layers + ) + + # write file + if (isTRUE(x = verbose)) { + cli_inform(message = c("*" = "Writing anndata file: {.val {full_path_name}}")) + } + adata$write(full_path_name, compression = "gzip") + + adata +} + + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#################### CONVERT SEURAT ASSAYS #################### +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +#' Convert between Seurat Assay types +#' +#' Will convert assays within a Seurat object between "Assay" and "Assay5" types. +#' +#' @param seurat_object Seurat object name. +#' @param assay name(s) of assays to convert. Default is NULL and will check with users +#' which assays they want to convert. +#' @param convert_to value of what assay type to convert current assays to. +#' #' \itemize{ +#' \item Accepted values for V3/4 are: "Assay", "assay", "V3", or "v3". +#' \item Accepted values for V5 are: "Assay5", "assay5", "V5", or "v5". +#' } +#' +#' @concept object_conversion +#' +#' @import cli +#' @importFrom dplyr mutate +#' @importFrom magrittr "%>%" +#' @importFrom stringr str_to_lower +#' @importFrom tibble column_to_rownames +#' @importFrom utils packageVersion +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' # Convert to V3/4 assay +#' obj <- Convert_Assay(seurat_object = obj, convert_to = "V3") +#' +#' # Convert to 5 assay +#' obj <- Convert_Assay(seurat_object = obj, convert_to = "V5") +#' } +#' + +Convert_Assay <- function( + seurat_object, + assay = NULL, + convert_to +) { + # Check accepted + accepted_V3 <- c("Assay", "assay", "V3", "v3", "3") + accepted_V5 <- c("Assay5", "assay5", "V5", "v5", "5") + + # convert to character in case numeric provided + convert_to <- as.character(x = convert_to) + + if (!convert_to %in% c(accepted_V5, accepted_V3)) { + cli_abort(message = c("Value provided to {.code convert_to} ({.field {convert_to}}) was not accepted value.", + "i" = "Accepted values to convert to V3/4 are: {.field {accepted_V3}}", + "i" = "Accepted values to convert to V5 are: {.field {accepted_V5}}")) + } + + # set assay value + if (convert_to %in% accepted_V5) { + if (packageVersion(pkg = 'Seurat') < "5") { + cli_abort(message = "Seurat version must be v5.0.0 or greater to create Assay5.") + } + + convert_to <- "Assay5" + convert_from <- "Assay" + } + if (convert_to %in% accepted_V3) { + convert_to <- "Assay" + convert_from <- "Assay5" + } + + if (is.null(x = assay)) { + num_assays <- length(x = Assays(object = seurat_object)) + if (num_assays > 1) { + if (yesno("Multiple assays ({.field {Assays(object = seurat_object)}}) are present. Should all assays be converted to assay type: {.field {convert_to}}?")) { + cli_inform(message = c("!" = "To only convert specific assays please specify assay names using {.code assay} parameter.")) + return(invisible()) + } + } + } + + # Check assays are present if provided + if (!is.null(x = assay)) { + assays_not_found <- Assay_Present(seurat_object = seurat_object, assay_list = assay, print_msg = FALSE, omit_warn = TRUE)[[2]] + + if (!is.null(x = assays_not_found)) { + stop_quietly() + } + } + + # set assays to convert + assays_convert <- assay %||% Assays(object = seurat_object) + + # Check against current assay class + current_assay_classes <- sapply(assays_convert, function(x) { + class(x = seurat_object[[x]]) + }) + + if (convert_to %in% current_assay_classes) { + cli_abort(message = c("Attempting to assays to {.field {convert_to}}, however one or more of current assays is already of that type", + "i" = "Check assay type and/or whether {.code {convert_to}} value is correct.")) + } + + if ("SCTAssay" %in% current_assay_classes) { + cli_abort(message = "Cannot convert assay of class {.field SCTAssay}.") + } + + # convert assays + for (i in assays_convert) { + cli_inform(message = "Converting assay {.val {i}} from {.field {convert_from}} to {.field {convert_to}}.") + suppressWarnings(seurat_object[[i]] <- as(seurat_object[[i]], convert_to)) + } + + # return object + return(seurat_object) +} + + +#' Split Seurat object into layers +#' +#' Split Assay5 of Seurat object into layers by variable in meta.data +#' +#' @param seurat_object Seurat object name. +#' @param assay name(s) of assays to convert. Defaults to current active assay. +#' @param split.by Variable in meta.data to use for splitting layers. +#' +#' @concept object_conversion +#' +#' @import cli +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' # Split object by "treatment" +#' obj <- Split_Layers(object = obj, assay = "RNA", split.by = "treatment") +#' } +#' + +Split_Layers <- function( + seurat_object, + assay = "RNA", + split.by +) { + # Make sure single assay + if (length(x = assay) > 1) { + cli_abort(message = c("Multiple assays specified ({.field {assay}}).", + "i" = "{.code Split_Layers} only supports splitting one assay at a time.")) + } + + # Check assay present + assay_present <- Assay_Present(seurat_object = seurat_object, assay_list = assay, print_msg = FALSE, omit_warn = TRUE)[[1]] + + # check split is valid and length + split.by <- Meta_Present(object = seurat_object, meta_col_names = split.by, print_msg = FALSE, omit_warn = FALSE)[[1]] + + length_split <- length(x = unique(x = seurat_object@meta.data[[split.by]])) + + # Check for already split layers + check_split <- Layers(object = seurat_object, search = "counts", assay = assay_present) + + if (length(x = check_split) > 1) { + cli_warn(message = "Layers in the assay: {.field {assay_present}} already appear split. Skipping assay.") + } else { + cli_inform(message = c("*" = "Splitting layers within assay: {.field {assay_present}} into {.field {length_split} parts} by {.val {split.by}}")) + # Check v3 vs. v5 and convert if needed + if (isFALSE(x = Assay5_Check(seurat_object = seurat_object, assay = assay_present))) { + cli_inform(message = c("i" = "{.field {assay_present}} is not Assay5, converting to Assay5 before splitting.")) + + seurat_object <- suppressMessages(Convert_Assay(seurat_object = seurat_object, assay = assay_present, convert_to = "V5")) + } + + # split layers + seurat_object[[assay_present]] <- split(seurat_object[[assay_present]], f = seurat_object@meta.data[[split.by]]) + } + + # return object + return(seurat_object) +} diff --git a/R/Object_Utilities.R b/R/Object_Utilities.R index a12e60e08a..915d0ead27 100644 --- a/R/Object_Utilities.R +++ b/R/Object_Utilities.R @@ -23,7 +23,7 @@ #' #' @export #' -#' @concept object_util +#' @concept misc_util #' #' @examples #' \dontrun{ @@ -85,11 +85,201 @@ Merge_Seurat_List <- function( #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -#' Add Mito and Ribo percentages +#' Add Multiple Cell Quality Control Values with Single Function #' -#' Add Mito, Ribo, & Mito+Ribo percentages to meta.data slot of Seurat Object +#' Add Mito/Ribo %, Cell Complexity (log10GenesPerUMI), Top Gene Percent with single function call #' #' @param seurat_object object name. +#' @param add_mito_ribo logical, whether to add percentage of counts belonging to mitochondrial/ribosomal +#' genes to object (Default is TRUE). +#' @param add_complexity logical, whether to add Cell Complexity to object (Default is TRUE). +#' @param add_top_pct logical, whether to add Top Gene Percentages to object (Default is TRUE). +#' @param add_MSigDB logical, whether to add percentages of counts belonging to genes from of mSigDB hallmark +#' gene lists: "HALLMARK_OXIDATIVE_PHOSPHORYLATION", "HALLMARK_APOPTOSIS", and "HALLMARK_DNA_REPAIR" to +#' object (Default is TRUE). +#' @param add_IEG logical, whether to add percentage of counts belonging to IEG genes to object (Default is TRUE). +#' @param add_cell_cycle logical, whether to addcell cycle scores and phase based on +#' \code{\link[Seurat]{CellCycleScoring}}. Only applicable if `species = "human"`. (Default is TRUE). +#' @param species Species of origin for given Seurat Object. If mouse, human, marmoset, zebrafish, rat, +#' drosophila, or rhesus macaque (name or abbreviation) are provided the function will automatically +#' generate mito_pattern and ribo_pattern values. +#' @param mito_name name to use for the new meta.data column containing percent mitochondrial counts. +#' Default is "percent_mito". +#' @param ribo_name name to use for the new meta.data column containing percent ribosomal counts. +#' Default is "percent_ribo". +#' @param mito_ribo_name name to use for the new meta.data column containing percent +#' mitochondrial+ribosomal counts. Default is "percent_mito_ribo". +#' @param complexity_name name to use for new meta data column for `Add_Cell_Complexity_Seurat`. +#' Default is "log10GenesPerUMI". +#' @param top_pct_name name to use for new meta data column for `Add_Top_Gene_Pct_Seurat`. +#' Default is "percent_topXX", where XX is equal to the value provided to `num_top_genes`. +#' @param oxphos_name name to use for new meta data column for percentage of MSigDB oxidative phosphorylation +#' counts. Default is "percent_oxphos". +#' @param apop_name name to use for new meta data column for percentage of MSigDB apoptosis counts. +#' Default is "percent_apop". +#' @param dna_repair_name name to use for new meta data column for percentage of MSigDB DNA repair +#' counts. Default is "percent_dna_repair".. +#' @param ieg_name name to use for new meta data column for percentage of IEG counts. Default is "percent_ieg". +#' @param mito_pattern A regex pattern to match features against for mitochondrial genes (will set automatically if +#' species is mouse or human; marmoset features list saved separately). +#' @param ribo_pattern A regex pattern to match features against for ribosomal genes +#' (will set automatically if species is mouse, human, or marmoset). +#' @param mito_features A list of mitochondrial gene names to be used instead of using regex pattern. +#' Will override regex pattern if both are present (including default saved regex patterns). +#' @param ribo_features A list of ribosomal gene names to be used instead of using regex pattern. +#' Will override regex pattern if both are present (including default saved regex patterns). +#' @param ensembl_ids logical, whether feature names in the object are gene names or +#' ensembl IDs (default is FALSE; set TRUE if feature names are ensembl IDs). +#' @param num_top_genes An integer vector specifying the size(s) of the top set of high-abundance genes. +#' Used to compute the percentage of library size occupied by the most highly expressed genes in each cell. +#' @param assay assay to use in calculation. Default is "RNA". *Note* This should only be changed if +#' storing corrected and uncorrected assays in same object (e.g. outputs of both Cell Ranger and Cell Bender). +#' @param overwrite Logical. Whether to overwrite existing an meta.data column. Default is FALSE meaning that +#' function will abort if column with name provided to `meta_col_name` is present in meta.data slot. +#' +#' @import cli +#' @importFrom SeuratObject Layers +#' +#' @return A Seurat Object +#' +#' @export +#' +#' @concept qc_util +#' +#' @examples +#' \dontrun{ +#' obj <- Add_Cell_QC_Metrics(seurat_object = obj, species = "Human") +#'} +#' + +Add_Cell_QC_Metrics <- function( + seurat_object, + add_mito_ribo = TRUE, + add_complexity = TRUE, + add_top_pct = TRUE, + add_MSigDB = TRUE, + add_IEG = TRUE, + add_cell_cycle = TRUE, + species, + mito_name = "percent_mito", + ribo_name = "percent_ribo", + mito_ribo_name = "percent_mito_ribo", + complexity_name = "log10GenesPerUMI", + top_pct_name = NULL, + oxphos_name = "percent_oxphos", + apop_name = "percent_apop", + dna_repair_name = "percent_dna_repair", + ieg_name = "percent_ieg", + mito_pattern = NULL, + ribo_pattern = NULL, + mito_features = NULL, + ribo_features = NULL, + ensembl_ids = FALSE, + num_top_genes = 50, + assay = NULL, + overwrite = FALSE +) { + # Set assay + assay <- assay %||% DefaultAssay(object = seurat_object) + + # Accepted species names + accepted_names <- data.frame( + Mouse_Options = c("Mouse", "mouse", "Ms", "ms", "Mm", "mm"), + Human_Options = c("Human", "human", "Hu", "hu", "Hs", "hs"), + Marmoset_Options = c("Marmoset", "marmoset", "CJ", "Cj", "cj", NA), + Zebrafish_Options = c("Zebrafish", "zebrafish", "DR", "Dr", "dr", NA), + Rat_Options = c("Rat", "rat", "RN", "Rn", "rn", NA), + Drosophila_Options = c("Drosophila", "drosophila", "DM", "Dm", "dm", NA), + Macaque_Options = c("Macaque", "macaque", "Rhesus", "macaca", "mmulatta", NA) + ) + + # Species Spelling Options + mouse_options <- accepted_names$Mouse_Options + human_options <- accepted_names$Human_Options + marmoset_options <- accepted_names$Marmoset_Options + zebrafish_options <- accepted_names$Zebrafish_Options + rat_options <- accepted_names$Rat_Options + drosophila_options <- accepted_names$Drosophila_Options + macaque_options <- accepted_names$Macaque_Options + + # Add mito/ribo + if (isTRUE(x = add_mito_ribo)) { + cli_inform(message = "Adding {.field Mito/Ribo Percentages} to meta.data.") + seurat_object <- Add_Mito_Ribo(object = seurat_object, species = species, mito_name = mito_name, ribo_name = ribo_name, mito_ribo_name = mito_ribo_name, mito_pattern = mito_pattern, ribo_pattern = ribo_pattern, mito_features = mito_features, ribo_features = ribo_features, ensembl_ids = ensembl_ids, assay = assay, overwrite = overwrite) + } + + # Add complexity + if (isTRUE(x = add_complexity)) { + cli_inform(message = "Adding {.field Cell Complexity #1 (log10GenesPerUMI)} to meta.data.") + seurat_object <- Add_Cell_Complexity(object = seurat_object, meta_col_name = complexity_name, assay = assay, overwrite = overwrite) + } + + # Add top gene expression percent + if (isTRUE(x = add_top_pct)) { + cli_inform(message = "Adding {.field Cell Complexity #2 (Top {num_top_genes} Percentages)} to meta.data.") + seurat_object <- Add_Top_Gene_Pct_Seurat(seurat_object = seurat_object, num_top_genes = num_top_genes, meta_col_name = top_pct_name, assay = assay, overwrite = overwrite) + } + + # Add MSigDB + if (isTRUE(x = add_MSigDB)) { + if (species %in% marmoset_options) { + cli_warn(message = c("{.val Marmoset} is not currently a part of MSigDB gene list database.", + "i" = "No columns will be added to object meta.data")) + } else { + cli_inform(message = "Adding {.field MSigDB Oxidative Phosphorylation, Apoptosis, and DNA Repair Percentages} to meta.data.") + seurat_object <- Add_MSigDB_Seurat(seurat_object = seurat_object, species = species, oxphos_name = oxphos_name, apop_name = apop_name, dna_repair_name = dna_repair_name, assay = assay, overwrite = overwrite) + } + } + + # Add IEG + if (isTRUE(x = add_IEG)) { + if (species %in% c(marmoset_options, rat_options, zebrafish_options, macaque_options, drosophila_options)) { + cli_warn(message = c("{.val Rat, Marmoset, Macaque, Zebrafish, and Drosophila} are not currently supported.", + "i" = "No column will be added to object meta.data")) + } else { + cli_inform(message = "Adding {.field IEG Percentages} to meta.data.") + seurat_object <- Add_IEG_Seurat(seurat_object = seurat_object, species = species, ieg_name = ieg_name, assay = assay, overwrite = overwrite) + } + } + + if (isTRUE(x = add_cell_cycle)) { + if (!species %in% human_options) { + cli_abort(message = c("Cell Cycle Scoring is only supported for human in this function.", + "i" = "To add score for other species supply cell cycle gene list of `CellCycleScoring` function." + )) + } else { + if (length(grep(x = Layers(object = seurat_object), pattern = "data", value = T)) == 0) { + cli_inform(message = c("Layer with normalized data not present.", + "i" = "Normalizing Data.")) + seurat_object <- NormalizeData(object = seurat_object) + } + + # Overwrite check + if ("S.Score" %in% colnames(x = seurat_object@meta.data) || "G2M.Score" %in% colnames(x = seurat_object@meta.data) || "Phase" %in% colnames(x = seurat_object@meta.data)) { + if (!overwrite) { + cli_abort(message = c("Columns with {.val S.Score}, {.val G2M.Score} and/or {.val Phase} already present in meta.data slot.", + "i" = "*To run function and overwrite columns set parameter {.code overwrite = TRUE}*") + ) + } + cli_inform(message = c("Columns with {.val S.Score}, {.val G2M.Score} and/or {.val Phase} already present in meta.data slot.", + "i" = "Overwriting those columns as .code {overwrite = TRUE.}") + ) + } + + # Add Cell Cycle Scoring + cli_inform(message = "Adding {.field Cell Cycle Scoring} to meta.data.") + seurat_object <- CellCycleScoring(object = seurat_object, s.features = Seurat::cc.genes.updated.2019$s.genes, g2m.features = Seurat::cc.genes.updated.2019$g2m.genes) + } + } + + # Log Command + seurat_object <- LogSeuratCommand(object = seurat_object) + + # return object + return(seurat_object) +} + + #' @param species Species of origin for given Seurat Object. If mouse, human, marmoset, zebrafish, rat, #' drosophila, or rhesus macaque (name or abbreviation) are provided the function will automatically #' generate mito_pattern and ribo_pattern values. @@ -120,23 +310,26 @@ Merge_Seurat_List <- function( #' @import cli #' @importFrom dplyr mutate select intersect all_of #' @importFrom magrittr "%>%" +#' @importFrom rlang ":=" #' @importFrom Seurat PercentageFeatureSet AddMetaData #' @importFrom tibble rownames_to_column column_to_rownames #' -#' @return A Seurat Object +#' @method Add_Mito_Ribo Seurat #' #' @export +#' @rdname Add_Mito_Ribo #' -#' @concept object_util +#' @concept qc_util #' #' @examples #' \dontrun{ -#' obj <- Add_Mito_Ribo_Seurat(seurat_object = obj, species = "human") +#' # Seurat +#' seurat_object <- Add_Mito_Ribo(object = seurat_object, species = "human") #'} #' -Add_Mito_Ribo_Seurat <- function( - seurat_object, +Add_Mito_Ribo.Seurat <- function( + object, species, mito_name = "percent_mito", ribo_name = "percent_ribo", @@ -148,7 +341,8 @@ Add_Mito_Ribo_Seurat <- function( ensembl_ids = FALSE, assay = NULL, overwrite = FALSE, - list_species_names = FALSE + list_species_names = FALSE, + ... ) { # Accepted species names accepted_names <- data.frame( @@ -168,7 +362,7 @@ Add_Mito_Ribo_Seurat <- function( } # Check Seurat - Is_Seurat(seurat_object = seurat_object) + Is_Seurat(seurat_object = object) # Check name collision if (any(duplicated(x = c(mito_name, ribo_name, mito_ribo_name)))) { @@ -176,7 +370,7 @@ Add_Mito_Ribo_Seurat <- function( } # Overwrite check - if (mito_name %in% colnames(x = seurat_object@meta.data) || ribo_name %in% colnames(x = seurat_object@meta.data) || mito_ribo_name %in% colnames(x = seurat_object@meta.data)) { + if (mito_name %in% colnames(x = object@meta.data) || ribo_name %in% colnames(x = object@meta.data) || mito_ribo_name %in% colnames(x = object@meta.data)) { if (isFALSE(x = overwrite)) { cli_abort(message = c("Columns with {.val {mito_name}} and/or {.val {ribo_name}} already present in meta.data slot.", "i" = "*To run function and overwrite columns set parameter {.code overwrite = TRUE} or change respective {.code mito_name}, {.code ribo_name}, and/or {.code mito_ribo_name}*") @@ -195,7 +389,7 @@ Add_Mito_Ribo_Seurat <- function( } # Set default assay - assay <- assay %||% DefaultAssay(object = seurat_object) + assay <- assay %||% DefaultAssay(object = object) # Species Spelling Options mouse_options <- accepted_names$Mouse_Options @@ -258,14 +452,14 @@ Add_Mito_Ribo_Seurat <- function( ribo_features <- Retrieve_Ensembl_Ribo(species = species) } - mito_features <- mito_features %||% grep(pattern = mito_pattern, x = rownames(x = seurat_object[[assay]]), value = TRUE) + mito_features <- mito_features %||% grep(pattern = mito_pattern, x = rownames(x = object[[assay]]), value = TRUE) - ribo_features <- ribo_features %||% grep(pattern = ribo_pattern, x = rownames(x = seurat_object[[assay]]), value = TRUE) + ribo_features <- ribo_features %||% grep(pattern = ribo_pattern, x = rownames(x = object[[assay]]), value = TRUE) # Check features are present in object - length_mito_features <- length(x = intersect(x = mito_features, y = rownames(x = seurat_object[[assay]]))) + length_mito_features <- length(x = intersect(x = mito_features, y = rownames(x = object[[assay]]))) - length_ribo_features <- length(x = intersect(x = ribo_features, y = rownames(x = seurat_object[[assay]]))) + length_ribo_features <- length(x = intersect(x = ribo_features, y = rownames(x = object[[assay]]))) # Check length of mito and ribo features found in object if (length_mito_features < 1 && length_ribo_features < 1) { @@ -286,17 +480,17 @@ Add_Mito_Ribo_Seurat <- function( # Add mito and ribo columns if (length_mito_features > 0) { - good_mito <- mito_features[mito_features %in% rownames(x = seurat_object)] - seurat_object[[mito_name]] <- PercentageFeatureSet(object = seurat_object, features = good_mito, assay = assay) + good_mito <- mito_features[mito_features %in% rownames(x = object)] + object[[mito_name]] <- PercentageFeatureSet(object = object, features = good_mito, assay = assay) } if (length_ribo_features > 0) { - good_ribo <- ribo_features[ribo_features %in% rownames(x = seurat_object)] - seurat_object[[ribo_name]] <- PercentageFeatureSet(object = seurat_object, features = good_ribo, assay = assay) + good_ribo <- ribo_features[ribo_features %in% rownames(x = object)] + object[[ribo_name]] <- PercentageFeatureSet(object = object, features = good_ribo, assay = assay) } # Create combined mito ribo column if both present if (length_mito_features > 0 && length_ribo_features > 0) { - object_meta <- Fetch_Meta(object = seurat_object) %>% + object_meta <- Fetch_Meta(object = object) %>% rownames_to_column("barcodes") object_meta <- object_meta %>% @@ -306,19 +500,19 @@ Add_Mito_Ribo_Seurat <- function( select(all_of(c("barcodes", mito_ribo_name))) %>% column_to_rownames("barcodes") - seurat_object <- AddMetaData(object = seurat_object, metadata = object_meta) + object <- AddMetaData(object = object, metadata = object_meta) } + # Log Command + object <- LogSeuratCommand(object = object) + # return final object - return(seurat_object) + return(object) } #' Add Cell Complexity Value #' -#' Add measure of cell complexity/novelty (log10PerUMI) for data QC. -#' -#' @param seurat_object object name. #' @param meta_col_name name to use for new meta data column. Default is "log10GenesPerUMI". #' @param assay assay to use in calculation. Default is "RNA". *Note* This should only be changed if #' storing corrected and uncorrected assays in same object (e.g. outputs of both Cell Ranger and Cell Bender). @@ -327,25 +521,28 @@ Add_Mito_Ribo_Seurat <- function( #' #' @import cli #' -#' @return A Seurat Object +#' @method Add_Cell_Complexity Seurat #' #' @export +#' @rdname Add_Cell_Complexity #' -#' @concept object_util +#' @concept qc_util #' #' @examples +#' # Seurat #' library(Seurat) -#' pbmc_small <- Add_Cell_Complexity_Seurat(seurat_object = pbmc_small) +#' pbmc_small <- Add_Cell_Complexity(object = pbmc_small) #' -Add_Cell_Complexity_Seurat <- function( - seurat_object, +Add_Cell_Complexity.Seurat <- function( + object, meta_col_name = "log10GenesPerUMI", assay = "RNA", - overwrite = FALSE + overwrite = FALSE, + ... ) { # Check Seurat - Is_Seurat(seurat_object = seurat_object) + Is_Seurat(seurat_object = object) # Add assay warning message if (assay != "RNA") { @@ -355,7 +552,7 @@ Add_Cell_Complexity_Seurat <- function( } # Check columns for overwrite - if (meta_col_name %in% colnames(x = seurat_object@meta.data)) { + if (meta_col_name %in% colnames(x = object@meta.data)) { if (isFALSE(x = overwrite)) { cli_abort(message = c("Column {.val {meta_col_name}} already present in meta.data slot.", "i" = "*To run function and overwrite column, set parameter {.code overwrite = TRUE} or change respective {.code meta_col_name}*.") @@ -371,10 +568,13 @@ Add_Cell_Complexity_Seurat <- function( count_name <- paste0("nCount_", assay) # Add score - seurat_object[[meta_col_name]] <- log10(seurat_object[[feature_name]]) / log10(seurat_object[[count_name]]) + object[[meta_col_name]] <- log10(object[[feature_name]]) / log10(object[[count_name]]) + + # Log Command + object <- LogSeuratCommand(object = object) #return object - return(seurat_object) + return(object) } @@ -403,8 +603,7 @@ Add_Cell_Complexity_Seurat <- function( #' #' @export #' -#' @concept object_util -#' +#' @concept qc_util #' #' @references This function uses scuttle package (license: GPL-3) to calculate the percent of expression #' coming from top XX genes in each cell. Parameter description for `num_top_genes` also from scuttle. @@ -515,198 +714,9 @@ Add_Top_Gene_Pct_Seurat <- function( # Add to object and return seurat_object <- AddMetaData(object = seurat_object, metadata = res, col.name = meta_col_name) - return(seurat_object) -} - - -#' Add Multiple Cell Quality Control Values with Single Function -#' -#' Add Mito/Ribo %, Cell Complexity (log10GenesPerUMI), Top Gene Percent with single function call -#' -#' @param seurat_object object name. -#' @param add_mito_ribo logical, whether to add percentage of counts belonging to mitochondrial/ribosomal -#' genes to object (Default is TRUE). -#' @param add_complexity logical, whether to add Cell Complexity to object (Default is TRUE). -#' @param add_top_pct logical, whether to add Top Gene Percentages to object (Default is TRUE). -#' @param add_MSigDB logical, whether to add percentages of counts belonging to genes from of mSigDB hallmark -#' gene lists: "HALLMARK_OXIDATIVE_PHOSPHORYLATION", "HALLMARK_APOPTOSIS", and "HALLMARK_DNA_REPAIR" to -#' object (Default is TRUE). -#' @param add_IEG logical, whether to add percentage of counts belonging to IEG genes to object (Default is TRUE). -#' @param add_cell_cycle logical, whether to addcell cycle scores and phase based on -#' \code{\link[Seurat]{CellCycleScoring}}. Only applicable if `species = "human"`. (Default is TRUE). -#' @param species Species of origin for given Seurat Object. If mouse, human, marmoset, zebrafish, rat, -#' drosophila, or rhesus macaque (name or abbreviation) are provided the function will automatically -#' generate mito_pattern and ribo_pattern values. -#' @param mito_name name to use for the new meta.data column containing percent mitochondrial counts. -#' Default is "percent_mito". -#' @param ribo_name name to use for the new meta.data column containing percent ribosomal counts. -#' Default is "percent_ribo". -#' @param mito_ribo_name name to use for the new meta.data column containing percent -#' mitochondrial+ribosomal counts. Default is "percent_mito_ribo". -#' @param complexity_name name to use for new meta data column for `Add_Cell_Complexity_Seurat`. -#' Default is "log10GenesPerUMI". -#' @param top_pct_name name to use for new meta data column for `Add_Top_Gene_Pct_Seurat`. -#' Default is "percent_topXX", where XX is equal to the value provided to `num_top_genes`. -#' @param oxphos_name name to use for new meta data column for percentage of MSigDB oxidative phosphorylation -#' counts. Default is "percent_oxphos". -#' @param apop_name name to use for new meta data column for percentage of MSigDB apoptosis counts. -#' Default is "percent_apop". -#' @param dna_repair_name name to use for new meta data column for percentage of MSigDB DNA repair -#' counts. Default is "percent_dna_repair".. -#' @param ieg_name name to use for new meta data column for percentage of IEG counts. Default is "percent_ieg". -#' @param mito_pattern A regex pattern to match features against for mitochondrial genes (will set automatically if -#' species is mouse or human; marmoset features list saved separately). -#' @param ribo_pattern A regex pattern to match features against for ribosomal genes -#' (will set automatically if species is mouse, human, or marmoset). -#' @param mito_features A list of mitochondrial gene names to be used instead of using regex pattern. -#' Will override regex pattern if both are present (including default saved regex patterns). -#' @param ribo_features A list of ribosomal gene names to be used instead of using regex pattern. -#' Will override regex pattern if both are present (including default saved regex patterns). -#' @param ensembl_ids logical, whether feature names in the object are gene names or -#' ensembl IDs (default is FALSE; set TRUE if feature names are ensembl IDs). -#' @param num_top_genes An integer vector specifying the size(s) of the top set of high-abundance genes. -#' Used to compute the percentage of library size occupied by the most highly expressed genes in each cell. -#' @param assay assay to use in calculation. Default is "RNA". *Note* This should only be changed if -#' storing corrected and uncorrected assays in same object (e.g. outputs of both Cell Ranger and Cell Bender). -#' @param overwrite Logical. Whether to overwrite existing an meta.data column. Default is FALSE meaning that -#' function will abort if column with name provided to `meta_col_name` is present in meta.data slot. -#' -#' @import cli -#' @importFrom SeuratObject Layers -#' -#' @return A Seurat Object -#' -#' @export -#' -#' @concept object_util -#' -#' @examples -#' \dontrun{ -#' obj <- Add_Cell_QC_Metrics(seurat_object = obj, species = "Human") -#'} -#' - -Add_Cell_QC_Metrics <- function( - seurat_object, - add_mito_ribo = TRUE, - add_complexity = TRUE, - add_top_pct = TRUE, - add_MSigDB = TRUE, - add_IEG = TRUE, - add_cell_cycle = TRUE, - species, - mito_name = "percent_mito", - ribo_name = "percent_ribo", - mito_ribo_name = "percent_mito_ribo", - complexity_name = "log10GenesPerUMI", - top_pct_name = NULL, - oxphos_name = "percent_oxphos", - apop_name = "percent_apop", - dna_repair_name = "percent_dna_repair", - ieg_name = "percent_ieg", - mito_pattern = NULL, - ribo_pattern = NULL, - mito_features = NULL, - ribo_features = NULL, - ensembl_ids = FALSE, - num_top_genes = 50, - assay = NULL, - overwrite = FALSE -) { - # Set assay - assay <- assay %||% DefaultAssay(object = seurat_object) - - # Accepted species names - accepted_names <- data.frame( - Mouse_Options = c("Mouse", "mouse", "Ms", "ms", "Mm", "mm"), - Human_Options = c("Human", "human", "Hu", "hu", "Hs", "hs"), - Marmoset_Options = c("Marmoset", "marmoset", "CJ", "Cj", "cj", NA), - Zebrafish_Options = c("Zebrafish", "zebrafish", "DR", "Dr", "dr", NA), - Rat_Options = c("Rat", "rat", "RN", "Rn", "rn", NA), - Drosophila_Options = c("Drosophila", "drosophila", "DM", "Dm", "dm", NA), - Macaque_Options = c("Macaque", "macaque", "Rhesus", "macaca", "mmulatta", NA) - ) - - # Species Spelling Options - mouse_options <- accepted_names$Mouse_Options - human_options <- accepted_names$Human_Options - marmoset_options <- accepted_names$Marmoset_Options - zebrafish_options <- accepted_names$Zebrafish_Options - rat_options <- accepted_names$Rat_Options - drosophila_options <- accepted_names$Drosophila_Options - macaque_options <- accepted_names$Macaque_Options - - # Add mito/ribo - if (isTRUE(x = add_mito_ribo)) { - cli_inform(message = "Adding {.field Mito/Ribo Percentages} to meta.data.") - seurat_object <- Add_Mito_Ribo_Seurat(seurat_object = seurat_object, species = species, mito_name = mito_name, ribo_name = ribo_name, mito_ribo_name = mito_ribo_name, mito_pattern = mito_pattern, ribo_pattern = ribo_pattern, mito_features = mito_features, ribo_features = ribo_features, ensembl_ids = ensembl_ids, assay = assay, overwrite = overwrite) - } - - # Add complexity - if (isTRUE(x = add_complexity)) { - cli_inform(message = "Adding {.field Cell Complexity #1 (log10GenesPerUMI)} to meta.data.") - seurat_object <- Add_Cell_Complexity_Seurat(seurat_object = seurat_object, meta_col_name = complexity_name, assay = assay, overwrite = overwrite) - } - - # Add top gene expression percent - if (isTRUE(x = add_top_pct)) { - cli_inform(message = "Adding {.field Cell Complexity #2 (Top {num_top_genes} Percentages)} to meta.data.") - seurat_object <- Add_Top_Gene_Pct_Seurat(seurat_object = seurat_object, num_top_genes = num_top_genes, meta_col_name = top_pct_name, assay = assay, overwrite = overwrite) - } - - # Add MSigDB - if (isTRUE(x = add_MSigDB)) { - if (species %in% marmoset_options) { - cli_warn(message = c("{.val Marmoset} is not currently a part of MSigDB gene list database.", - "i" = "No columns will be added to object meta.data")) - } else { - cli_inform(message = "Adding {.field MSigDB Oxidative Phosphorylation, Apoptosis, and DNA Repair Percentages} to meta.data.") - seurat_object <- Add_MSigDB_Seurat(seurat_object = seurat_object, species = species, oxphos_name = oxphos_name, apop_name = apop_name, dna_repair_name = dna_repair_name, assay = assay, overwrite = overwrite) - } - } + # Log Command + seurat_object <- LogSeuratCommand(object = seurat_object) - # Add IEG - if (isTRUE(x = add_IEG)) { - if (species %in% c(marmoset_options, rat_options, zebrafish_options, macaque_options, drosophila_options)) { - cli_warn(message = c("{.val Rat, Marmoset, Macaque, Zebrafish, and Drosophila} are not currently supported.", - "i" = "No column will be added to object meta.data")) - } else { - cli_inform(message = "Adding {.field IEG Percentages} to meta.data.") - seurat_object <- Add_IEG_Seurat(seurat_object = seurat_object, species = species, ieg_name = ieg_name, assay = assay, overwrite = overwrite) - } - } - - if (isTRUE(x = add_cell_cycle)) { - if (!species %in% human_options) { - cli_abort(message = c("Cell Cycle Scoring is only supported for human in this function.", - "i" = "To add score for other species supply cell cycle gene list of `CellCycleScoring` function." - )) - } else { - if (length(grep(x = Layers(object = seurat_object), pattern = "data", value = T)) == 0) { - cli_inform(message = c("Layer with normalized data not present.", - "i" = "Normalizing Data.")) - seurat_object <- NormalizeData(object = seurat_object) - } - - # Overwrite check - if ("S.Score" %in% colnames(x = seurat_object@meta.data) || "G2M.Score" %in% colnames(x = seurat_object@meta.data) || "Phase" %in% colnames(x = seurat_object@meta.data)) { - if (!overwrite) { - cli_abort(message = c("Columns with {.val S.Score}, {.val G2M.Score} and/or {.val Phase} already present in meta.data slot.", - "i" = "*To run function and overwrite columns set parameter {.code overwrite = TRUE}*") - ) - } - cli_inform(message = c("Columns with {.val S.Score}, {.val G2M.Score} and/or {.val Phase} already present in meta.data slot.", - "i" = "Overwriting those columns as .code {overwrite = TRUE.}") - ) - } - - # Add Cell Cycle Scoring - cli_inform(message = "Adding {.field Cell Cycle Scoring to meta.data.} to meta.data.") - seurat_object <- CellCycleScoring(object = seurat_object, s.features = Seurat::cc.genes.updated.2019$s.genes, g2m.features = Seurat::cc.genes.updated.2019$g2m.genes) - } - } - - # return object return(seurat_object) } @@ -726,7 +736,7 @@ Add_Cell_QC_Metrics <- function( #' #' @export #' -#' @concept object_util +#' @concept qc_util #' #' @examples #' \dontrun{ @@ -771,6 +781,9 @@ Add_CellBender_Diff <- function( # Add back to Seurat Object seurat_object <- AddMetaData(object = seurat_object, metadata = meta_modified) + # Log Command + seurat_object <- LogSeuratCommand(object = seurat_object) + return(seurat_object) } @@ -799,7 +812,7 @@ Add_CellBender_Diff <- function( #' #' @export #' -#' @concept object_util +#' @concept get_set_util #' #' @examples #' \dontrun{ @@ -870,7 +883,7 @@ Meta_Remove_Seurat <- function( #' #' @export #' -#' @concept object_util +#' @concept get_set_util #' #' @examples #' \dontrun{ @@ -1020,7 +1033,7 @@ Add_Sample_Meta <- function( #' #' @export #' -#' @concept object_util +#' @concept get_set_util #' #' @examples #' library(Seurat) @@ -1064,7 +1077,7 @@ Extract_Sample_Meta <- function( combined_exclude <- c(nFeature_cols, nCount_cols, "percent_mito", "percent_ribo", "percent_mito_ribo", "log10GenesPerUMI") - variables_exclude <- Meta_Present(seurat_object = object, meta_col_names = combined_exclude, omit_warn = FALSE, print_msg = FALSE, return_none = TRUE)[[1]] + variables_exclude <- Meta_Present(object = object, meta_col_names = combined_exclude, omit_warn = FALSE, print_msg = FALSE, return_none = TRUE)[[1]] } # Ensure include exclude are unique @@ -1079,13 +1092,13 @@ Extract_Sample_Meta <- function( # Check variables include/exclude are present if (!is.null(x = variables_include)) { - include_meta_list <- Meta_Present(seurat_object = object, meta_col_names = variables_include, omit_warn = FALSE, print_msg = FALSE, return_none = TRUE) + include_meta_list <- Meta_Present(object = object, meta_col_names = variables_include, omit_warn = FALSE, print_msg = FALSE, return_none = TRUE) } else { include_meta_list <- NULL } if (!is.null(x = variables_exclude)) { - exclude_meta_list <- Meta_Present(seurat_object = object, meta_col_names = variables_exclude, omit_warn = FALSE, print_msg = FALSE, return_none = TRUE) + exclude_meta_list <- Meta_Present(object = object, meta_col_names = variables_exclude, omit_warn = FALSE, print_msg = FALSE, return_none = TRUE) } else { exclude_meta_list <- NULL } @@ -1144,7 +1157,70 @@ Extract_Sample_Meta <- function( #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -#################### MISC SLOT UTILITIES #################### +#################### DATA ACCESS #################### +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +#' Get meta data from object +#' +#' Quick function to properly pull meta.data from objects. +#' +#' @param object Object of class Seurat or liger. +#' +#' @importFrom methods slot +#' +#' @return A data.frame containing cell-level meta data +#' +#' @export +#' +#' @concept get_set_util +#' +#' @rdname Fetch_Meta +#' +#' @examples +#' library(Seurat) +#' meta_data <- Fetch_Meta(object = pbmc_small) +#' head(meta_data, 5) +#' + +Fetch_Meta <- function(object) { + UseMethod(generic = 'Fetch_Meta') +} + + +#' @rdname Fetch_Meta +#' @export +#' @concept get_set_util +#' @method Fetch_Meta Seurat + +Fetch_Meta.Seurat <- function( + object +) { + # Pull meta data + object_meta <- slot(object = object, name = "meta.data") + + return(object_meta) +} + + +#' @rdname Fetch_Meta +#' @export +#' @concept liger_object_util +#' @method Fetch_Meta liger + +Fetch_Meta.liger <- function( + object +) { + + # Pull meta data + object_meta <- object_meta <- slot(object = object, name = "cell.data") + + return(object_meta) +} + + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#################### MISC OBJECT UTILITIES #################### #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -1168,7 +1244,7 @@ Extract_Sample_Meta <- function( #' #' @export #' -#' @concept object_util +#' @concept get_set_util #' #' @examples #' library(Seurat) @@ -1274,7 +1350,7 @@ Store_Misc_Info_Seurat <- function( #' #' @export #' -#' @concept object_util +#' @concept get_set_util #' #' @examples #' library(Seurat) @@ -1283,7 +1359,6 @@ Store_Misc_Info_Seurat <- function( #' pbmc_small <- Store_Misc_Info_Seurat(seurat_object = pbmc_small, data_to_store = clu_pal, #' data_name = "rd1_colors") #' -#' Store_Palette_Seurat <- function( seurat_object, @@ -1298,3 +1373,119 @@ Store_Palette_Seurat <- function( seurat_object <- Store_Misc_Info_Seurat(seurat_object = seurat_object, data_to_store = palette, data_name = palette_name, list_as_list = list_as_list, overwrite = overwrite) return(seurat_object) } + + +#' Add Alternative Feature IDs +#' +#' Add alternative feature ids to the assay level meta.data slot in Assay5 compatible object (Seurat V5.0.0 or greater) +#' +#' @param seurat_object object name. +#' @param features_tsv_file output file from Cell Ranger used for creation of Seurat object. +#' (Either provide this of `hdf5_file`) +#' @param hdf5_file output file from Cell Ranger used for creation of Seurat object. +#' (Either provide this of `features_tsv_file`) +#' @param assay name of assay(s) to add the alternative features to. Can specify "all" +#' to add to all assays. +#' +#' @import cli +#' @importFrom dplyr filter +#' +#' @return Seurat Object with new entries in the `obj@assays$ASSAY@meta.data` slot. +#' +#' @export +#' +#' @concept get_set_util +#' +#' @examples +#' \dontrun{ +#' # Using features.tsv.gz file +#' # Either file from filtered or raw outputs can be used as they are identical. +#' obj <- Add_Alt_Feature_ID(seurat_object = obj, +#' features_tsv = "sample01/outs/filtered_feature_bc_matrix/features.tsv.gz", assay = "RNA") +#' +#' #' # Using hdf5 file +#' # Either filtered_feature_bc or raw_feature_bc can be used as the features slot is identical +#' # Though it is faster to load filtered_feature_bc file due to droplet filtering +#' obj <- Add_Alt_Feature_ID(seurat_object = obj, +#' hdf5_file = "sample01/outs/outs/filtered_feature_bc_matrix.h5", assay = "RNA") +#'} +#' + +Add_Alt_Feature_ID <- function( + seurat_object, + features_tsv_file = NULL, + hdf5_file = NULL, + assay = NULL +) { + if (packageVersion(pkg = 'Seurat') < "5") { + cli_abort(message = "Seurat version must be v5.0.0 or greater to add alternative features.") + } + + # check file + if (is.null(x = features_tsv_file) && is.null(x = hdf5_file)) { + cli_abort(message = "Either {.code features_tsv_file} or {.code hdf5_file} must be provided.") + } + + if (!is.null(x = features_tsv_file) && !is.null(x = hdf5_file)) { + cli_abort(message = "Both {.code features_tsv_file} and {.code hdf5_file} provided. Please only supply one or the other parameter.") + } + + # check assay + if (is.null(x = assay)) { + cli_abort(message = c("Must provide value to {.code assay} to add alternative featutres to assay meta.data", + "i" = "Value can either be name of assay or {.val all} to add to all compatible assays present.")) + } + + # set assays to use + if (assay == "all") { + assays_use <- Assays(object = seurat_object) + } else { + assays_use <- assay + } + + # check they are Assay5 + current_assay_classes <- sapply(assays_use, function(x) { + class(x = seurat_object[[x]]) + }) + + if (isFALSE(x = all(current_assay_classes == "Assay5"))) { + cli_abort(message = "All assays to features must be {.field Assay5}.") + } + + # get features + object_features <- Features(x = seurat_object, assay = assays_use[1]) + + # if providing features_tsv + if (!is.null(x = features_tsv_file)) { + features_table <- data.table::fread(file = features_tsv_file, header = FALSE, data.table = FALSE) + colnames(features_table) <- c("Ensembl_ID", "Symbol", "Modality") + + features_table$Symbol <- make.unique(features_table$Symbol) + + features_present <- features_table %>% + filter(.data[["Symbol"]] %in% object_features) + } + + if (!is.null(x = hdf5_file)) { + h5 <- Read10X_h5(filename = hdf5_file) + symbols <- rownames(x = h5) + + h5 <- Read10X_h5(filename = hdf5_file, use.names = F) + ensembl <- rownames(x = h5) + + features_table <- data.frame("Ensembl_ID" = ensembl, + "Symbol" = symbols) + + features_present <- features_table %>% + filter(.data[["Symbol"]] %in% object_features) + } + + # Add to object + for (i in assays_use) { + seurat_object[[i]]@meta.data$Ensembl_ID <- features_present$Ensembl_ID + seurat_object[[i]]@meta.data$Symbol <- features_present$Symbol + } + + # return object + return(seurat_object) +} diff --git a/R/Plotting_Utilities.R b/R/Plotting_Utilities.R index fbd0df4a80..9bf60fac83 100644 --- a/R/Plotting_Utilities.R +++ b/R/Plotting_Utilities.R @@ -397,9 +397,980 @@ Figure_Plot <- function( } +#' Clustered DotPlot +#' +#' Clustered DotPlots using ComplexHeatmap +#' +#' @param seurat_object Seurat object name. +#' @param features Features to plot. +#' @param colors_use_exp Color palette to use for plotting expression scale. Default is `viridis::plasma(n = 20, direction = -1)`. +#' @param exp_color_min Minimum scaled average expression threshold (everything smaller will be set to this). +#' Default is -2. +#' @param exp_color_middle What scaled expression value to use for the middle of the provided `colors_use_exp`. +#' By default will be set to value in middle of `exp_color_min` and `exp_color_max`. +#' @param exp_color_max Minimum scaled average expression threshold (everything smaller will be set to this). +#' Default is 2. +#' @param print_exp_quantiles Whether to print the quantiles of expression data in addition to plots. +#' Default is FALSE. NOTE: These values will be altered by choices of `exp_color_min` and `exp_color_min` +#' if there are values below or above those cutoffs, respectively. +#' @param colors_use_idents specify color palette to used for identity labels. By default if +#' number of levels plotted is less than or equal to 36 it will use "polychrome" and if greater than 36 +#' will use "varibow" with shuffle = TRUE both from `DiscretePalette_scCustomize`. +#' @param x_lab_rotate How to rotate column labels. By default set to `TRUE` which rotates labels 45 degrees. +#' If set `FALSE` rotation is set to 0 degrees. Users can also supply custom angle for text rotation. +#' @param plot_padding if plot needs extra white space padding so no plot or labels are cutoff. +#' The parameter accepts TRUE or numeric vector of length 4. If TRUE padding will be set to +#' c(2, 15, 0 0) (bottom, left, top, right). Can also be customized further with numeric +#' vector of length 4 specifying the amount of padding in millimeters. +#' Default is NULL, no padding. +#' @param flip logical, whether to flip the axes of final plot. Default is FALSE; rows = features and +#' columns = idents. +#' @param k Value to use for k-means clustering on features Sets (km) parameter in `ComplexHeatmap::Heatmap()`. +#' From `ComplexHeatmap::Heatmap()`: Apply k-means clustering on rows. If the value is larger than 1, the +#' heatmap will be split by rows according to the k-means clustering. For each row slice, hierarchical +#' clustering is still applied with parameters above. +#' @param feature_km_repeats Number of k-means runs to get a consensus k-means clustering for features. +#' Note if `feature_km_repeats` is set to value greater than one, the final number of groups might be +#' smaller than row_km, but this might mean the original row_km is not a good choice. Default is 1000. +#' @param ident_km_repeats Number of k-means runs to get a consensus k-means clustering. Similar to +#' `feature_km_repeats`. Default is 1000. +#' @param row_label_size Size of the feature labels. Provided to `row_names_gp` in Heatmap call. +#' @param row_label_fontface Fontface to use for row labels. Provided to `row_names_gp` in Heatmap call. +#' @param grid_color color to use for heatmap grid. Default is NULL which "removes" grid by using NA color. +#' @param cluster_feature logical, whether to cluster and reorder feature axis. Default is TRUE. +#' @param cluster_ident logical, whether to cluster and reorder identity axis. Default is TRUE. +#' @param column_label_size Size of the feature labels. Provided to `column_names_gp` in Heatmap call. +#' @param legend_label_size Size of the legend text labels. Provided to `labels_gp` in Heatmap legend call. +#' @param legend_title_size Sise of the legend title text labels. Provided to `title_gp` in Heatmap legend call. +#' @param raster Logical, whether to render in raster format (faster plotting, smaller files). Default is FALSE. +#' @param plot_km_elbow Logical, whether or not to return the Sum Squared Error Elbow Plot for k-means clustering. +#' Estimating elbow of this plot is one way to determine "optimal" value for `k`. +#' Based on: \url{https://stackoverflow.com/a/15376462/15568251}. +#' @param elbow_kmax The maximum value of k to use for `plot_km_elbow`. Suggest setting larger value so the +#' true shape of plot can be observed. Value must be 1 less than number of features provided. If NULL parameter +#' will be set dependent on length of feature list up to `elbow_kmax = 20`. +#' @param assay Name of assay to use, defaults to the active assay. +#' @param group.by Group (color) cells in different ways (for example, orig.ident). +#' @param idents Which classes to include in the plot (default is all). +#' @param show_parent_dend_line Logical, Sets parameter of same name in `ComplexHeatmap::Heatmap()`. +#' From `ComplexHeatmap::Heatmap()`: When heatmap is split, whether to add a dashed line to mark parent +#' dendrogram and children dendrograms. Default is TRUE. +#' @param ggplot_default_colors logical. If `colors_use = NULL`, Whether or not to return plot using +#' default ggplot2 "hue" palette instead of default "polychrome" or "varibow" palettes. +#' @param color_seed random seed for the "varibow" palette shuffle if `colors_use = NULL` and number of +#' groups plotted is greater than 36. Default = 123. +#' @param seed Sets seed for reproducible plotting (ComplexHeatmap plot). +#' +#' @return A ComplexHeatmap or if plot_km_elbow = TRUE a list containing ggplot2 object and ComplexHeatmap. +#' +#' @import cli +#' @import ggplot2 +#' @importFrom circlize colorRamp2 +#' @importFrom dplyr any_of filter select +#' @importFrom grid grid.circle grid.rect gpar +#' @importFrom magrittr "%>%" +#' @importFrom rlang is_installed +#' @importFrom Seurat DotPlot +#' @importFrom stats quantile +#' @importFrom tidyr pivot_wider +#' +#' @noRd +#' +#' @concept seurat_plotting +#' +#' @author Ming Tang (Original Code), Sam Marsh (Wrap single function, added/modified functionality) +#' @references \url{https://divingintogeneticsandgenomics.rbind.io/post/clustered-dotplot-for-single-cell-rnaseq/} +#' @seealso \url{https://twitter.com/tangming2005} +#' +#' @examples +#' \donttest{ +#' library(Seurat) +#' Clustered_DotPlot(seurat_object = pbmc_small, features = c("CD3E", "CD8", "GZMB", "MS4A1")) +#'} +#' + +Clustered_DotPlot_Single_Group <- function( + seurat_object, + features, + colors_use_exp = viridis_plasma_dark_high, + exp_color_min = -2, + exp_color_middle = NULL, + exp_color_max = 2, + print_exp_quantiles = FALSE, + colors_use_idents = NULL, + x_lab_rotate = TRUE, + plot_padding = NULL, + flip = FALSE, + k = 1, + feature_km_repeats = 1000, + ident_km_repeats = 1000, + row_label_size = 8, + row_label_fontface = "plain", + grid_color = NULL, + cluster_feature = TRUE, + cluster_ident = TRUE, + column_label_size = 8, + legend_label_size = 10, + legend_title_size = 10, + raster = FALSE, + plot_km_elbow = TRUE, + elbow_kmax = NULL, + assay = NULL, + group.by = NULL, + idents = NULL, + show_parent_dend_line = TRUE, + ggplot_default_colors = FALSE, + color_seed = 123, + seed = 123 +) { + # Check for packages + ComplexHeatmap_check <- is_installed(pkg = "ComplexHeatmap") + if (isFALSE(x = ComplexHeatmap_check)) { + cli_abort(message = c( + "Please install the {.val ComplexHeatmap} package to use {.code Clustered_DotPlot}", + "i" = "This can be accomplished with the following commands: ", + "----------------------------------------", + "{.field `install.packages({symbol$dquote_left}BiocManager{symbol$dquote_right})`}", + "{.field `BiocManager::install({symbol$dquote_left}ComplexHeatmap{symbol$dquote_right})`}", + "----------------------------------------" + )) + } + + # Check Seurat + Is_Seurat(seurat_object = seurat_object) + + # set assay (if null set to active assay) + assay <- assay %||% DefaultAssay(object = seurat_object) + + # set padding + if (!is.null(x = plot_padding)) { + if (isTRUE(x = plot_padding)) { + # Default extra padding + # 2 bottom: typically mirrors unpadded plot + # 15 left: usually enough to make rotated labels fit in plot window + padding <- unit(c(2, 15, 0, 0), "mm") + } else { + if (length(x = plot_padding) != 4) { + cli_abort(message = c("{.code plot_padding} must be numeric vector of length 4 or TRUE", + "i" = "Numeric vector will correspond to amount of padding to be added to bottom, left, top, right).", + "i" = "Seeting {.field TRUE} will set padding to {.code c(2, 10, 0, 0)}", + "i" = "Default is {.val NULL} for no extra padding.")) + } + padding <- unit(plot_padding, "mm") + } + } + + # Check acceptable fontface + if (!row_label_fontface %in% c("plain", "bold", "italic", "oblique", "bold.italic")) { + cli_abort(message = c("{.code row_label_face} {.val {row_label_face}} not recognized.", + "i" = "Must be one of {.val plain}, {.val bold}, {.val italic}, {.val olique}, or {.val bold.italic}.")) + } + + # Check unique features + features_unique <- unique(x = features) + + if (length(x = features_unique) != length(x = features)) { + cli_warn("Feature list contains duplicates, making unique.") + } + + # Check features and meta to determine which features present + all_found_features <- Feature_PreCheck(object = seurat_object, features = features_unique, assay = assay) + + # Check exp min/max set correctly + if (!exp_color_min < exp_color_max) { + cli_abort(message = c("Expression color min/max values are not compatible.", + "i" = "The value for {.code exp_color_min}: {.field {exp_color_min}} must be less than the value for {.code exp_color_max}: {.field {exp_color_max}}.") + ) + } + + # Get DotPlot data + seurat_plot <- DotPlot(object = seurat_object, features = all_found_features, assay = assay, group.by = group.by, scale = TRUE, idents = idents, col.min = NULL, col.max = NULL) + + data <- seurat_plot$data + + # Get expression data + exp_mat <- data %>% + select(-any_of(c("pct.exp", "avg.exp"))) %>% + pivot_wider(names_from = any_of("id"), values_from = any_of("avg.exp.scaled")) %>% + as.data.frame() + + row.names(x = exp_mat) <- exp_mat$features.plot + + # Check NAs if idents + if (!is.null(x = idents)) { + # Find NA features and print warning + excluded_features <- exp_mat[rowSums(is.na(x = exp_mat)) > 0,] %>% + rownames() + cli_warn(message = c("Some scaled data missing.", + "*" = "The following features were removed as there is no scaled expression present in subset (`idents`) of object provided:", + "i" = "{.field {glue_collapse_scCustom(input_string = excluded_features, and = TRUE)}}.") + ) + + # Extract good features + good_features <- rownames(x = exp_mat) + + # Remove rows with NAs + exp_mat <- exp_mat %>% + filter(.data[["features.plot"]] %in% good_features) + } + + exp_mat <- exp_mat[,-1] %>% + as.matrix() + + # Get percent expressed data + percent_mat <- data %>% + select(-any_of(c("avg.exp", "avg.exp.scaled"))) %>% + pivot_wider(names_from = any_of("id"), values_from = any_of("pct.exp")) %>% + as.data.frame() + + row.names(x = percent_mat) <- percent_mat$features.plot + + # Subset dataframe for NAs if idents so that exp_mat and percent_mat match + if (!is.null(x = idents)) { + percent_mat <- percent_mat %>% + filter(.data[["features.plot"]] %in% good_features) + } + + percent_mat <- percent_mat[,-1] %>% + as.matrix() + + # print quantiles + if (isTRUE(x = print_exp_quantiles)) { + cli_inform(message = "Quantiles of gene expression data are:") + print(quantile(exp_mat, c(0.1, 0.5, 0.9, 0.99))) + } + + # Set default color palette based on number of levels being plotted + if (is.null(x = group.by)) { + group_by_length <- length(x = unique(x = seurat_object@active.ident)) + } else { + group_by_length <- length(x = unique(x = seurat_object@meta.data[[group.by]])) + } + + # Check colors use vs. ggplot2 color scale + if (!is.null(x = colors_use_idents) && isTRUE(x = ggplot_default_colors)) { + cli_abort(message = "Cannot provide both custom palette to {.code colors_use} and specify {.code ggplot_default_colors = TRUE}.") + } + if (is.null(x = colors_use_idents)) { + # set default plot colors + colors_use_idents <- scCustomize_Palette(num_groups = group_by_length, ggplot_default_colors = ggplot_default_colors, color_seed = color_seed) + } + + # Reduce color length list due to naming requirement + colors_use_idents <- colors_use_idents[1:group_by_length] + + # Modify if class = "colors" + if (inherits(x = colors_use_idents, what = "colors")) { + colors_use_idents <- as.vector(x = colors_use_idents) + } + + # Pull Annotation and change colors to ComplexHeatmap compatible format + Identity <- colnames(x = exp_mat) + + identity_colors <- colors_use_idents + names(x = identity_colors) <- Identity + identity_colors_list <- list(Identity = identity_colors) + + # check grid color + if (is.null(x = grid_color)) { + grid_color <- NA + } else { + if (length(x = grid_color) > 1) { + cli_abort(message = "{.code grid_color} can only be a single value.") + } + if (isTRUE(x = Is_Color(colors = grid_color))) { + grid_color <- grid_color + } else { + cli_abort(message = "Value provided to {.code grid_color} ({.field {grid_color}}) is not valid value for color in R.") + } + } + + # Create identity annotation + if (isTRUE(x = flip)) { + column_ha <- ComplexHeatmap::rowAnnotation(Identity = Identity, + col = identity_colors_list, + na_col = "grey", + name = "Identity", + show_legend = FALSE + ) + } else { + column_ha <- ComplexHeatmap::HeatmapAnnotation(Identity = Identity, + col = identity_colors_list, + na_col = "grey", + name = "Identity", + show_legend = FALSE + ) + } + + # Set middle of color scale if not specified + if (is.null(x = exp_color_middle)) { + exp_color_middle <- Middle_Number(min = exp_color_min, max = exp_color_max) + } + + palette_length <- length(x = colors_use_exp) + palette_middle <- Middle_Number(min = 0, max = palette_length) + + # Create palette + col_fun = colorRamp2(c(exp_color_min, exp_color_middle, exp_color_max), colors_use_exp[c(1,palette_middle, palette_length)]) + + # Calculate and plot Elbow + if (isTRUE(x = plot_km_elbow)) { + # if elbow_kmax not NULL check it is usable + if (!is.null(x = elbow_kmax) && elbow_kmax > (nrow(x = exp_mat) - 1)) { + elbow_kmax <- nrow(x = exp_mat) - 1 + cli_warn(message = c("The value provided for {.code elbow_kmax} is too large.", + "i" = "Changing to (length(x = features)-1): {.field {elbow_kmax}}.") + ) + } + + # if elbow_kmax is NULL set value based on input feature list + if (is.null(x = elbow_kmax)) { + # set to (length(x = features)-1) if less than 21 features OR to 20 if greater than 21 features + if (nrow(x = exp_mat) > 21) { + elbow_kmax <- 20 + } else { + elbow_kmax <- nrow(x = exp_mat) - 1 + } + } + + km_elbow_plot <- kMeans_Elbow(data = exp_mat, k_max = elbow_kmax) + } + + # prep heatmap + if (isTRUE(x = flip)) { + if (isTRUE(x = raster)) { + layer_fun_flip = function(i, j, x, y, w, h, fill) { + grid.rect(x = x, y = y, width = w, height = h, + gp = gpar(col = grid_color, fill = NA)) + grid.circle(x=x,y=y,r= sqrt(ComplexHeatmap::pindex(percent_mat, i, j)/100) * unit(2, "mm"), + gp = gpar(fill = col_fun(ComplexHeatmap::pindex(exp_mat, i, j)), col = NA)) + } + } else { + cell_fun_flip = function(i, j, x, y, w, h, fill) { + grid.rect(x = x, y = y, width = w, height = h, + gp = gpar(col = grid_color, fill = NA)) + grid.circle(x=x,y=y,r= sqrt(percent_mat[i, j]/100) * unit(2, "mm"), + gp = gpar(fill = col_fun(exp_mat[i, j]), col = NA)) + } + } + } else { + if (isTRUE(x = raster)) { + layer_fun = function(j, i, x, y, w, h, fill) { + grid.rect(x = x, y = y, width = w, height = h, + gp = gpar(col = grid_color, fill = NA)) + grid.circle(x=x,y=y,r= sqrt(ComplexHeatmap::pindex(percent_mat, i, j)/100) * unit(2, "mm"), + gp = gpar(fill = col_fun(ComplexHeatmap::pindex(exp_mat, i, j)), col = NA)) + } + } else { + cell_fun = function(j, i, x, y, w, h, fill) { + grid.rect(x = x, y = y, width = w, height = h, + gp = gpar(col = grid_color, fill = NA)) + grid.circle(x=x,y=y,r= sqrt(percent_mat[i, j]/100) * unit(2, "mm"), + gp = gpar(fill = col_fun(exp_mat[i, j]), col = NA)) + } + } + } + + # Create legend for point size + lgd_list = list( + ComplexHeatmap::Legend(at = Identity, title = "Identity", legend_gp = gpar(fill = identity_colors_list[[1]]), labels_gp = gpar(fontsize = legend_label_size), title_gp = gpar(fontsize = legend_title_size, fontface = "bold")), + ComplexHeatmap::Legend(labels = c(10,25,50,75,100), title = "Percent Expressing", + graphics = list( + function(x, y, w, h) grid.circle(x = x, y = y, r = sqrt(0.1) * unit(2, "mm"), + gp = gpar(fill = "black")), + function(x, y, w, h) grid.circle(x = x, y = y, r = sqrt(0.25) * unit(2, "mm"), + gp = gpar(fill = "black")), + function(x, y, w, h) grid.circle(x = x, y = y, r = sqrt(0.50) * unit(2, "mm"), + gp = gpar(fill = "black")), + function(x, y, w, h) grid.circle(x = x, y = y, r = sqrt(0.75) * unit(2, "mm"), + gp = gpar(fill = "black")), + function(x, y, w, h) grid.circle(x = x, y = y, r = 1 * unit(2, "mm"), + gp = gpar(fill = "black"))), + labels_gp = gpar(fontsize = legend_label_size), + title_gp = gpar(fontsize = legend_title_size, fontface = "bold") + ) + ) + + # Set x label roration + if (is.numeric(x = x_lab_rotate)) { + x_lab_rotate <- x_lab_rotate + } else if (isTRUE(x = x_lab_rotate)) { + x_lab_rotate <- 45 + } else { + x_lab_rotate <- 0 + } + + # Create Plot + set.seed(seed = seed) + if (isTRUE(x = raster)) { + if (isTRUE(x = flip)) { + cluster_dot_plot <- ComplexHeatmap::Heatmap(t(exp_mat), + heatmap_legend_param=list(title="Expression", labels_gp = gpar(fontsize = legend_label_size), title_gp = gpar(fontsize = legend_title_size, fontface = "bold")), + col=col_fun, + rect_gp = gpar(type = "none"), + layer_fun = layer_fun, + row_names_gp = gpar(fontsize = row_label_size, fontface = row_label_fontface), + column_names_gp = gpar(fontsize = column_label_size), + column_km = k, + row_km_repeats = ident_km_repeats, + border = "black", + left_annotation = column_ha, + column_km_repeats = feature_km_repeats, + show_parent_dend_line = show_parent_dend_line, + column_names_rot = x_lab_rotate, + cluster_rows = cluster_ident, + cluster_columns = cluster_feature) + } else { + cluster_dot_plot <- ComplexHeatmap::Heatmap(exp_mat, + heatmap_legend_param=list(title="Expression", labels_gp = gpar(fontsize = legend_label_size), title_gp = gpar(fontsize = legend_title_size, fontface = "bold")), + col=col_fun, + rect_gp = gpar(type = "none"), + layer_fun = layer_fun, + row_names_gp = gpar(fontsize = row_label_size, fontface = row_label_fontface), + column_names_gp = gpar(fontsize = column_label_size), + row_km = k, + row_km_repeats = feature_km_repeats, + border = "black", + top_annotation = column_ha, + column_km_repeats = ident_km_repeats, + show_parent_dend_line = show_parent_dend_line, + column_names_rot = x_lab_rotate, + cluster_rows = cluster_feature, + cluster_columns = cluster_ident) + } + } else { + if (isTRUE(x = flip)) { + cluster_dot_plot <- ComplexHeatmap::Heatmap(t(exp_mat), + heatmap_legend_param=list(title="Expression", labels_gp = gpar(fontsize = legend_label_size), title_gp = gpar(fontsize = legend_title_size, fontface = "bold")), + col=col_fun, + rect_gp = gpar(type = "none"), + cell_fun = cell_fun_flip, + row_names_gp = gpar(fontsize = row_label_size, fontface = row_label_fontface), + column_names_gp = gpar(fontsize = column_label_size), + column_km = k, + row_km_repeats = ident_km_repeats, + border = "black", + left_annotation = column_ha, + column_km_repeats = feature_km_repeats, + show_parent_dend_line = show_parent_dend_line, + column_names_rot = x_lab_rotate, + cluster_rows = cluster_ident, + cluster_columns = cluster_feature) + } else { + cluster_dot_plot <- ComplexHeatmap::Heatmap(exp_mat, + heatmap_legend_param=list(title="Expression", labels_gp = gpar(fontsize = legend_label_size), title_gp = gpar(fontsize = legend_title_size, fontface = "bold")), + col=col_fun, + rect_gp = gpar(type = "none"), + cell_fun = cell_fun, + row_names_gp = gpar(fontsize = row_label_size, fontface = row_label_fontface), + column_names_gp = gpar(fontsize = column_label_size), + row_km = k, + row_km_repeats = feature_km_repeats, + border = "black", + top_annotation = column_ha, + column_km_repeats = ident_km_repeats, + show_parent_dend_line = show_parent_dend_line, + column_names_rot = x_lab_rotate, + cluster_rows = cluster_feature, + cluster_columns = cluster_ident) + } + } + + # Add pt.size legend & return plots + if (isTRUE(x = plot_km_elbow)) { + if (!is.null(x = plot_padding)) { + return(list(km_elbow_plot, ComplexHeatmap::draw(cluster_dot_plot, annotation_legend_list = lgd_list, padding = padding))) + } else { + return(list(km_elbow_plot, ComplexHeatmap::draw(cluster_dot_plot, annotation_legend_list = lgd_list))) + } + + } + if (!is.null(x = plot_padding)) { + return(ComplexHeatmap::draw(cluster_dot_plot, annotation_legend_list = lgd_list, padding = padding)) + } else { + return(ComplexHeatmap::draw(cluster_dot_plot, annotation_legend_list = lgd_list)) + } +} + + + +#' Clustered DotPlot +#' +#' Clustered DotPlots using ComplexHeatmap +#' +#' @param seurat_object Seurat object name. +#' @param features Features to plot. +#' @param split.by Variable in `@meta.data` to split the identities plotted by. +#' @param colors_use_exp Color palette to use for plotting expression scale. Default is `viridis::plasma(n = 20, direction = -1)`. +#' @param exp_color_min Minimum scaled average expression threshold (everything smaller will be set to this). +#' Default is -2. +#' @param exp_color_middle What scaled expression value to use for the middle of the provided `colors_use_exp`. +#' By default will be set to value in middle of `exp_color_min` and `exp_color_max`. +#' @param exp_color_max Minimum scaled average expression threshold (everything smaller will be set to this). +#' Default is 2. +#' @param exp_value_type Whether to plot average normalized expression or +#' scaled average normalized expression. Only valid when `split.by` is provided. +#' @param print_exp_quantiles Whether to print the quantiles of expression data in addition to plots. +#' Default is FALSE. NOTE: These values will be altered by choices of `exp_color_min` and `exp_color_min` +#' if there are values below or above those cutoffs, respectively. +#' @param x_lab_rotate How to rotate column labels. By default set to `TRUE` which rotates labels 45 degrees. +#' If set `FALSE` rotation is set to 0 degrees. Users can also supply custom angle for text rotation. +#' @param plot_padding if plot needs extra white space padding so no plot or labels are cutoff. +#' The parameter accepts TRUE or numeric vector of length 4. If TRUE padding will be set to +#' c(2, 15, 0 0) (bottom, left, top, right). Can also be customized further with numeric +#' vector of length 4 specifying the amount of padding in millimeters. +#' Default is NULL, no padding. +#' @param flip logical, whether to flip the axes of final plot. Default is FALSE; rows = features and +#' columns = idents. +#' @param k Value to use for k-means clustering on features Sets (km) parameter in `ComplexHeatmap::Heatmap()`. +#' From `ComplexHeatmap::Heatmap()`: Apply k-means clustering on rows. If the value is larger than 1, the +#' heatmap will be split by rows according to the k-means clustering. For each row slice, hierarchical +#' clustering is still applied with parameters above. +#' @param feature_km_repeats Number of k-means runs to get a consensus k-means clustering for features. +#' Note if `feature_km_repeats` is set to value greater than one, the final number of groups might be +#' smaller than row_km, but this might mean the original row_km is not a good choice. Default is 1000. +#' @param ident_km_repeats Number of k-means runs to get a consensus k-means clustering. Similar to +#' `feature_km_repeats`. Default is 1000. +#' @param row_label_size Size of the feature labels. Provided to `row_names_gp` in Heatmap call. +#' @param row_label_fontface Fontface to use for row labels. Provided to `row_names_gp` in Heatmap call. +#' @param grid_color color to use for heatmap grid. Default is NULL which "removes" grid by using NA color. +#' @param cluster_feature logical, whether to cluster and reorder feature axis. Default is TRUE. +#' @param cluster_ident logical, whether to cluster and reorder identity axis. Default is TRUE. +#' @param column_label_size Size of the feature labels. Provided to `column_names_gp` in Heatmap call. +#' @param legend_label_size Size of the legend text labels. Provided to `labels_gp` in Heatmap legend call. +#' @param legend_title_size Sise of the legend title text labels. Provided to `title_gp` in Heatmap legend call. +#' @param raster Logical, whether to render in raster format (faster plotting, smaller files). Default is FALSE. +#' @param plot_km_elbow Logical, whether or not to return the Sum Squared Error Elbow Plot for k-means clustering. +#' Estimating elbow of this plot is one way to determine "optimal" value for `k`. +#' Based on: \url{https://stackoverflow.com/a/15376462/15568251}. +#' @param elbow_kmax The maximum value of k to use for `plot_km_elbow`. Suggest setting larger value so the +#' true shape of plot can be observed. Value must be 1 less than number of features provided. If NULL parameter +#' will be set dependent on length of feature list up to `elbow_kmax = 20`. +#' @param assay Name of assay to use, defaults to the active assay. +#' @param group.by Group (color) cells in different ways (for example, orig.ident). +#' @param idents Which classes to include in the plot (default is all). +#' @param show_parent_dend_line Logical, Sets parameter of same name in `ComplexHeatmap::Heatmap()`. +#' From `ComplexHeatmap::Heatmap()`: When heatmap is split, whether to add a dashed line to mark parent +#' dendrogram and children dendrograms. Default is TRUE. +#' @param seed Sets seed for reproducible plotting (ComplexHeatmap plot). +#' +#' @return A ComplexHeatmap or if plot_km_elbow = TRUE a list containing ggplot2 object and ComplexHeatmap. +#' +#' @import cli +#' @import ggplot2 +#' @importFrom circlize colorRamp2 +#' @importFrom dplyr any_of filter select pull +#' @importFrom grid grid.circle grid.rect gpar +#' @importFrom magrittr "%>%" +#' @importFrom rlang is_installed +#' @importFrom Seurat DotPlot +#' @importFrom stats quantile +#' @importFrom stringr str_to_lower +#' @importFrom tidyr pivot_wider +#' +#' @noRd +#' +#' @concept seurat_plotting +#' +#' @author Ming Tang (Original Code), Sam Marsh (Wrap single function, added/modified functionality) +#' @references \url{https://divingintogeneticsandgenomics.com/post/how-to-make-a-multi-group-dotplot-for-single-cell-rnaseq-data/} +#' @seealso \url{https://twitter.com/tangming2005} +#' +#' @examples +#' \donttest{ +#' library(Seurat) +#' Clustered_DotPlot(seurat_object = pbmc_small, features = c("CD3E", "CD8", "GZMB", "MS4A1")) +#'} +#' + +Clustered_DotPlot_Multi_Group <- function( + seurat_object, + features, + split.by, + colors_use_exp = viridis_plasma_dark_high, + exp_color_min = -2, + exp_color_middle = NULL, + exp_color_max = 2, + exp_value_type = "scaled", + print_exp_quantiles = FALSE, + x_lab_rotate = TRUE, + plot_padding = NULL, + flip = FALSE, + k = 1, + feature_km_repeats = 1000, + ident_km_repeats = 1000, + row_label_size = 8, + row_label_fontface = "plain", + grid_color = NULL, + cluster_feature = TRUE, + cluster_ident = TRUE, + column_label_size = 8, + legend_label_size = 10, + legend_title_size = 10, + raster = FALSE, + plot_km_elbow = TRUE, + elbow_kmax = NULL, + assay = NULL, + group.by = NULL, + idents = NULL, + show_parent_dend_line = TRUE, + seed = 123 +) { + # Check for packages + ComplexHeatmap_check <- is_installed(pkg = "ComplexHeatmap") + if (isFALSE(x = ComplexHeatmap_check)) { + cli_abort(message = c( + "Please install the {.val ComplexHeatmap} package to use {.code Clustered_DotPlot}", + "i" = "This can be accomplished with the following commands: ", + "----------------------------------------", + "{.field `install.packages({symbol$dquote_left}BiocManager{symbol$dquote_right})`}", + "{.field `BiocManager::install({symbol$dquote_left}ComplexHeatmap{symbol$dquote_right})`}", + "----------------------------------------" + )) + } + + # Check Seurat + Is_Seurat(seurat_object = seurat_object) + + # Check split valid + if (!is.null(x = split.by)) { + split.by <- Meta_Present(object = seurat_object, meta_col_names = split.by, print_msg = FALSE, omit_warn = FALSE)[[1]] + } + + # Add check for group.by before getting to colors + if (!is.null(x = group.by) && group.by != "ident") { + Meta_Present(object = seurat_object, meta_col_names = group.by, print_msg = FALSE) + } + + # set assay (if null set to active assay) + assay <- assay %||% DefaultAssay(object = seurat_object) + + # set padding + if (!is.null(x = plot_padding)) { + if (isTRUE(x = plot_padding)) { + # Default extra padding + # 2 bottom: typically mirrors unpadded plot + # 15 left: usually enough to make rotated labels fit in plot window + padding <- unit(c(2, 15, 0, 0), "mm") + } else { + if (length(x = plot_padding) != 4) { + cli_abort(message = c("{.code plot_padding} must be numeric vector of length 4 or TRUE", + "i" = "Numeric vector will correspond to amount of padding to be added to bottom, left, top, right).", + "i" = "Seeting {.field TRUE} will set padding to {.code c(2, 10, 0, 0)}", + "i" = "Default is {.val NULL} for no extra padding.")) + } + padding <- unit(plot_padding, "mm") + } + } + + # Check expression value type + accepted_exp_types <- c("scaled", "average") + + exp_value_type <- str_to_lower(string = exp_value_type) + + if (!exp_value_type %in% accepted_exp_types) { + cli_abort(message = "{.code exp_value_type}, must be one of {.field {accepted_exp_types}}") + } + + # Ignore exp_min and exp_max colors + if (exp_value_type == "average") { + if (exp_color_min != -2 || exp_color_max != 2 || !is.null(x = exp_color_middle)) { + ignored_params <- c("exp_color_min", "exp_color_max", "exp_color_middle") + cli_warn(message = c("One or more of the following parameters were set to a non-default value but are ignored when {.code exp_value_type = 'avergae'}", + "i" = "{.field {glue_collapse_scCustom(input_string = ignored_params, and = TRUE)}}.")) + } + } + + # Check acceptable fontface + if (!row_label_fontface %in% c("plain", "bold", "italic", "oblique", "bold.italic")) { + cli_abort(message = c("{.code row_label_face} {.val {row_label_face}} not recognized.", + "i" = "Must be one of {.val plain}, {.val bold}, {.val italic}, {.val olique}, or {.val bold.italic}.")) + } + + # Check unique features + features_unique <- unique(x = features) + + if (length(x = features_unique) != length(x = features)) { + cli_warn("Feature list contains duplicates, making unique.") + } + + # Check features and meta to determine which features present + all_found_features <- Feature_PreCheck(object = seurat_object, features = features_unique, assay = assay) + + # Check exp min/max set correctly + if (!exp_color_min < exp_color_max) { + cli_abort(message = c("Expression color min/max values are not compatible.", + "i" = "The value for {.code exp_color_min}: {.field {exp_color_min}} must be less than the value for {.code exp_color_max}: {.field {exp_color_max}}.") + ) + } + + # set group.by value + group.by <- group.by %||% "ident" + + # Get data + exp_mat_df <- suppressMessages(data.frame(AverageExpression(object = seurat_object, features = all_found_features, group.by = c(group.by, split.by), assays = assay, layer = "data")[[assay]])) + + # Data is returned in non-log space after averaging, return to log space for plotting + exp_mat <- data.frame(lapply(exp_mat_df, function(x){ + log1p(x) + })) + + exp_mat <- as.matrix(exp_mat) + rownames(exp_mat) <- rownames(exp_mat_df) + + # scale data + if (exp_value_type == "scaled") { + exp_mat <- FastRowScale(mat = exp_mat) + rownames(exp_mat) <- rownames(exp_mat_df) + } + + # check underscore present in split.by and replace if so + split_by_names <- Fetch_Meta(object = seurat_object) %>% + select(any_of(split.by)) %>% + pull() + + under_score <- grep(pattern = "_", x = split_by_names, value = TRUE) + + if (length(x = under_score) > 0) { + split_by_names <- gsub(pattern = "_", replacement = ".", x = split_by_names) + seurat_object[[split.by]] <- split_by_names + } + + percent_mat <- Percent_Expressing(seurat_object = seurat_object, features = all_found_features, split_by = split.by, group_by = group.by, assay = assay) + + # reorder columns to match + idx <- match(colnames(x = exp_mat), colnames(x = percent_mat)) + idx + + percent_mat <- percent_mat[, idx] + percent_mat <- as.matrix(percent_mat) + + # print quantiles + if (isTRUE(x = print_exp_quantiles)) { + cli_inform(message = "Quantiles of gene expression data are:") + print(quantile(exp_mat, c(0.1, 0.5, 0.9, 0.99))) + } + + # check grid color + if (is.null(x = grid_color)) { + grid_color <- NA + } else { + if (length(x = grid_color) > 1) { + cli_abort(message = "{.code grid_color} can only be a single value.") + } + if (isTRUE(x = Is_Color(colors = grid_color))) { + grid_color <- grid_color + } else { + cli_abort(message = "Value provided to {.code grid_color} ({.field {grid_color}}) is not valid value for color in R.") + } + } + + # Set middle of color scale if not specified + if (exp_value_type == "scaled") { + if (is.null(x = exp_color_middle)) { + exp_color_middle <- Middle_Number(min = exp_color_min, max = exp_color_max) + } + + palette_length <- length(x = colors_use_exp) + palette_middle <- Middle_Number(min = 0, max = palette_length) + + # Create palette + col_fun <- colorRamp2(c(exp_color_min, exp_color_middle, exp_color_max), colors_use_exp[c(1,palette_middle, palette_length)]) + } + + if (exp_value_type == "average") { + if (is.null(x = exp_color_middle)) { + avg_color_max <- max(apply(exp_mat, 2, function(x) max(x, na.rm = TRUE))) + avg_color_min <- 0 + avg_color_middle <- Middle_Number(min = 0, max = avg_color_max) + + palette_length <- length(x = colors_use_exp) + palette_middle <- Middle_Number(min = 0, max = palette_length) + + # Create palette + col_fun <- colorRamp2(c(avg_color_min, avg_color_middle, avg_color_max), colors_use_exp[c(1,palette_middle, palette_length)]) + + } + } + + # Calculate and plot Elbow + if (isTRUE(x = plot_km_elbow)) { + # if elbow_kmax not NULL check it is usable + if (!is.null(x = elbow_kmax) && elbow_kmax > (nrow(x = exp_mat) - 1)) { + elbow_kmax <- nrow(x = exp_mat) - 1 + cli_warn(message = c("The value provided for {.code elbow_kmax} is too large.", + "i" = "Changing to (length(x = features)-1): {.field {elbow_kmax}}.") + ) + } + + # if elbow_kmax is NULL set value based on input feature list + if (is.null(x = elbow_kmax)) { + # set to (length(x = features)-1) if less than 21 features OR to 20 if greater than 21 features + if (nrow(x = exp_mat) > 21) { + elbow_kmax <- 20 + } else { + elbow_kmax <- nrow(x = exp_mat) - 1 + } + } + + km_elbow_plot <- kMeans_Elbow(data = exp_mat, k_max = elbow_kmax) + } + + # prep heatmap + if (isTRUE(x = flip)) { + if (isTRUE(x = raster)) { + layer_fun_flip = function(i, j, x, y, w, h, fill) { + grid.rect(x = x, y = y, width = w, height = h, + gp = gpar(col = grid_color, fill = NA)) + grid.circle(x=x,y=y,r= sqrt(ComplexHeatmap::pindex(percent_mat, i, j)/100) * unit(2, "mm"), + gp = gpar(fill = col_fun(ComplexHeatmap::pindex(exp_mat, i, j)), col = NA)) + } + } else { + cell_fun_flip = function(i, j, x, y, w, h, fill) { + grid.rect(x = x, y = y, width = w, height = h, + gp = gpar(col = grid_color, fill = NA)) + grid.circle(x=x,y=y,r= sqrt(percent_mat[i, j]/100) * unit(2, "mm"), + gp = gpar(fill = col_fun(exp_mat[i, j]), col = NA)) + } + } + } else { + if (isTRUE(x = raster)) { + layer_fun = function(j, i, x, y, w, h, fill) { + grid.rect(x = x, y = y, width = w, height = h, + gp = gpar(col = grid_color, fill = NA)) + grid.circle(x=x,y=y,r= sqrt(ComplexHeatmap::pindex(percent_mat, i, j)/100) * unit(2, "mm"), + gp = gpar(fill = col_fun(ComplexHeatmap::pindex(exp_mat, i, j)), col = NA)) + } + } else { + cell_fun = function(j, i, x, y, w, h, fill) { + grid.rect(x = x, y = y, width = w, height = h, + gp = gpar(col = grid_color, fill = NA)) + grid.circle(x=x,y=y,r= sqrt(percent_mat[i, j]/100) * unit(2, "mm"), + gp = gpar(fill = col_fun(exp_mat[i, j]), col = NA)) + } + } + } + + # Create legend for point size + lgd_list = list( + ComplexHeatmap::Legend(labels = c(10,25,50,75,100), title = "Percent Expressing", + graphics = list( + function(x, y, w, h) grid.circle(x = x, y = y, r = sqrt(0.1) * unit(2, "mm"), + gp = gpar(fill = "black")), + function(x, y, w, h) grid.circle(x = x, y = y, r = sqrt(0.25) * unit(2, "mm"), + gp = gpar(fill = "black")), + function(x, y, w, h) grid.circle(x = x, y = y, r = sqrt(0.50) * unit(2, "mm"), + gp = gpar(fill = "black")), + function(x, y, w, h) grid.circle(x = x, y = y, r = sqrt(0.75) * unit(2, "mm"), + gp = gpar(fill = "black")), + function(x, y, w, h) grid.circle(x = x, y = y, r = 1 * unit(2, "mm"), + gp = gpar(fill = "black"))), + labels_gp = gpar(fontsize = legend_label_size), + title_gp = gpar(fontsize = legend_title_size, fontface = "bold") + ) + ) + + # Set x label roration + if (is.numeric(x = x_lab_rotate)) { + x_lab_rotate <- x_lab_rotate + } else if (isTRUE(x = x_lab_rotate)) { + x_lab_rotate <- 45 + } else { + x_lab_rotate <- 0 + } + + # Create Plot + set.seed(seed = seed) + if (isTRUE(x = raster)) { + if (isTRUE(x = flip)) { + cluster_dot_plot <- ComplexHeatmap::Heatmap(t(exp_mat), + heatmap_legend_param=list(title="Expression", labels_gp = gpar(fontsize = legend_label_size), title_gp = gpar(fontsize = legend_title_size, fontface = "bold")), + col=col_fun, + rect_gp = gpar(type = "none"), + layer_fun = layer_fun, + row_names_gp = gpar(fontsize = row_label_size, fontface = row_label_fontface), + column_names_gp = gpar(fontsize = column_label_size), + column_km = k, + row_km_repeats = ident_km_repeats, + border = "black", + column_km_repeats = feature_km_repeats, + show_parent_dend_line = show_parent_dend_line, + column_names_rot = x_lab_rotate, + cluster_rows = cluster_ident, + cluster_columns = cluster_feature) + } else { + cluster_dot_plot <- ComplexHeatmap::Heatmap(exp_mat, + heatmap_legend_param=list(title="Expression", labels_gp = gpar(fontsize = legend_label_size), title_gp = gpar(fontsize = legend_title_size, fontface = "bold")), + col=col_fun, + rect_gp = gpar(type = "none"), + layer_fun = layer_fun, + row_names_gp = gpar(fontsize = row_label_size, fontface = row_label_fontface), + column_names_gp = gpar(fontsize = column_label_size), + row_km = k, + row_km_repeats = feature_km_repeats, + border = "black", + column_km_repeats = ident_km_repeats, + show_parent_dend_line = show_parent_dend_line, + column_names_rot = x_lab_rotate, + cluster_rows = cluster_feature, + cluster_columns = cluster_ident) + } + } else { + if (isTRUE(x = flip)) { + cluster_dot_plot <- ComplexHeatmap::Heatmap(t(exp_mat), + heatmap_legend_param=list(title="Expression", labels_gp = gpar(fontsize = legend_label_size), title_gp = gpar(fontsize = legend_title_size, fontface = "bold")), + col=col_fun, + rect_gp = gpar(type = "none"), + cell_fun = cell_fun_flip, + row_names_gp = gpar(fontsize = row_label_size, fontface = row_label_fontface), + column_names_gp = gpar(fontsize = column_label_size), + column_km = k, + row_km_repeats = ident_km_repeats, + border = "black", + column_km_repeats = feature_km_repeats, + show_parent_dend_line = show_parent_dend_line, + column_names_rot = x_lab_rotate, + cluster_rows = cluster_ident, + cluster_columns = cluster_feature) + } else { + cluster_dot_plot <- ComplexHeatmap::Heatmap(exp_mat, + heatmap_legend_param=list(title="Expression", labels_gp = gpar(fontsize = legend_label_size), title_gp = gpar(fontsize = legend_title_size, fontface = "bold")), + col=col_fun, + rect_gp = gpar(type = "none"), + cell_fun = cell_fun, + row_names_gp = gpar(fontsize = row_label_size, fontface = row_label_fontface), + column_names_gp = gpar(fontsize = column_label_size), + row_km = k, + row_km_repeats = feature_km_repeats, + border = "black", + column_km_repeats = ident_km_repeats, + show_parent_dend_line = show_parent_dend_line, + column_names_rot = x_lab_rotate, + cluster_rows = cluster_feature, + cluster_columns = cluster_ident) + } + } + + # Add pt.size legend & return plots + if (isTRUE(x = plot_km_elbow)) { + if (!is.null(x = plot_padding)) { + return(list(km_elbow_plot, ComplexHeatmap::draw(cluster_dot_plot, annotation_legend_list = lgd_list, merge_legend = TRUE, padding = padding))) + } else { + return(list(km_elbow_plot, ComplexHeatmap::draw(cluster_dot_plot, annotation_legend_list = lgd_list, merge_legend = TRUE))) + } + + } + if (!is.null(x = plot_padding)) { + return(ComplexHeatmap::draw(cluster_dot_plot, annotation_legend_list = lgd_list, merge_legend = TRUE, padding = padding)) + } else { + return(ComplexHeatmap::draw(cluster_dot_plot, annotation_legend_list = lgd_list, merge_legend = TRUE)) + } +} + + #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #################### TEST/HELPERS #################### #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -429,6 +1400,14 @@ AutoPointSize_scCustom <- function(data, raster = NULL) { yes = 1, no = min(1583 / data, 1) )) + } + if (inherits(what = "Seurat", x = data)) { + + return(ifelse( + test = isTRUE(x = raster), + yes = 1, + no = min(1583 / length(x = Cells(x = data)), 1) + )) } else { # for data frame/object based values (from Seurat, see documentation) return(ifelse( @@ -656,7 +1635,7 @@ theme_ggprism_mod <- function( axis_text_angle = 0, border = FALSE ) { - theme_prism(palette = palette, + mod_theme <- theme_prism(palette = palette, base_size = base_size, base_family = base_family, base_fontface = base_fontface, @@ -667,6 +1646,10 @@ theme_ggprism_mod <- function( theme(legend.title = element_text(hjust = 0), axis.text = element_text(size = rel(0.95), face = "plain") ) + + mod_theme[c("legend.text.align", "legend.title.align")] <- NULL + + return(mod_theme) } diff --git a/R/QC_Plotting_Seurat.R b/R/QC_Plotting_Seurat.R index 4eebbbdaf0..45ca1758bb 100644 --- a/R/QC_Plotting_Seurat.R +++ b/R/QC_Plotting_Seurat.R @@ -67,9 +67,6 @@ QC_Plots_Genes <- function( # Check Seurat Is_Seurat(seurat_object = seurat_object) - # Add pt.size check - pt.size <- pt.size %||% AutoPointSize_scCustom(data = seurat_object) - plot <- VlnPlot_scCustom(seurat_object = seurat_object, features = "nFeature_RNA", group.by = group.by, colors_use = colors_use, pt.size = pt.size, raster = raster, ggplot_default_colors = ggplot_default_colors, color_seed = color_seed, plot_median = plot_median, plot_boxplot = plot_boxplot, median_size = median_size, ...) + geom_hline(yintercept = c(low_cutoff, high_cutoff), linetype = "dashed", color = "red") + xlab(x_axis_label) + @@ -156,9 +153,6 @@ QC_Plots_UMIs <- function( # Check Seurat Is_Seurat(seurat_object = seurat_object) - # Add pt.size check - pt.size <- pt.size %||% AutoPointSize_scCustom(data = seurat_object) - plot <- VlnPlot_scCustom(seurat_object = seurat_object, features = "nCount_RNA", group.by = group.by, colors_use = colors_use, pt.size = pt.size, raster = raster, ggplot_default_colors = ggplot_default_colors, color_seed = color_seed, plot_median = plot_median, plot_boxplot = plot_boxplot, median_size = median_size, ...) + geom_hline(yintercept = c(low_cutoff, high_cutoff), linetype = "dashed", color = "red") + xlab(x_axis_label) + @@ -248,9 +242,6 @@ QC_Plots_Mito <- function( # Check Seurat Is_Seurat(seurat_object = seurat_object) - # Add pt.size check - pt.size <- pt.size %||% AutoPointSize_scCustom(data = seurat_object) - plot <- VlnPlot_scCustom(seurat_object = seurat_object, features = mito_name, group.by = group.by, colors_use = colors_use, pt.size = pt.size, raster = raster, ggplot_default_colors = ggplot_default_colors, color_seed = color_seed, plot_median = plot_median, plot_boxplot = plot_boxplot, median_size = median_size, ...) + geom_hline(yintercept = c(low_cutoff, high_cutoff), linetype = "dashed", color = "red") + xlab(x_axis_label) + @@ -341,9 +332,6 @@ QC_Plots_Feature <- function( # Check Seurat Is_Seurat(seurat_object = seurat_object) - # Add pt.size check - pt.size <- pt.size %||% AutoPointSize_scCustom(data = seurat_object) - if (is.null(x = plot_title)) { plot_title <- paste0(feature, " per Cell/Nucleus") } @@ -407,7 +395,7 @@ QC_Plots_Feature <- function( #' #' @examples #' library(Seurat) -#' pbmc_small <- Add_Cell_Complexity_Seurat(pbmc_small) +#' pbmc_small <- Add_Cell_Complexity(pbmc_small) #' #' QC_Plots_Complexity(seurat_object = pbmc_small) #' @@ -505,9 +493,6 @@ QC_Plots_Combined_Vln <- function( # Check Seurat Is_Seurat(seurat_object = seurat_object) - # Add pt.size check - pt.size <- pt.size %||% AutoPointSize_scCustom(data = seurat_object) - # Setup cutoff values if (length(x = feature_cutoffs) > 2 || length(x = UMI_cutoffs) > 2 || length(x = mito_cutoffs) > 2) { cli_abort(message = "Length of each cutoff vector cannot be greater than {.field 2 (two)}.") @@ -605,7 +590,7 @@ QC_Histogram <- function( # Check split valid if (!is.null(x = split.by)) { - split.by <- Meta_Present(seurat_object = seurat_object, meta_col_names = split.by, print_msg = FALSE, omit_warn = FALSE)[[1]] + split.by <- Meta_Present(object = seurat_object, meta_col_names = split.by, print_msg = FALSE, omit_warn = FALSE)[[1]] } # Check feature length if split.by provided @@ -616,9 +601,9 @@ QC_Histogram <- function( } # Check against object - found_features <- Gene_Present(data = seurat_object, gene_list = found_defaults[[2]], omit_warn = FALSE, print_msg = FALSE, case_check_msg = FALSE, return_none = TRUE, seurat_assay = assay) + found_features <- Feature_Present(data = seurat_object, features = found_defaults[[2]], omit_warn = FALSE, print_msg = FALSE, case_check_msg = FALSE, return_none = TRUE, seurat_assay = assay) - found_meta <- Meta_Present(seurat_object = seurat_object, meta_col_names = found_features[[2]], omit_warn = FALSE, print_msg = FALSE, return_none = TRUE) + found_meta <- Meta_Present(object = seurat_object, meta_col_names = found_features[[2]], omit_warn = FALSE, print_msg = FALSE, return_none = TRUE) # Combine lists all_not_found_features <- found_meta[[2]] diff --git a/R/Reexports.R b/R/Reexports.R new file mode 100644 index 0000000000..14ad51f459 --- /dev/null +++ b/R/Reexports.R @@ -0,0 +1,6 @@ +#' @importFrom SeuratObject as.Seurat +#' @export +#' @note See \code{\link{as.Seurat.liger}} for scCustomize extension of this generic to converting Liger objects. +#' +#' +SeuratObject::as.Seurat diff --git a/R/Seurat_Iterative_Plotting.R b/R/Seurat_Iterative_Plotting.R index e96c139b0f..47cce6caba 100644 --- a/R/Seurat_Iterative_Plotting.R +++ b/R/Seurat_Iterative_Plotting.R @@ -104,11 +104,17 @@ Iterate_PC_Loading_Plots <- function( #' @param file_type File type to save output as. Must be one of following: ".pdf", ".png", ".tiff", ".jpeg", or ".svg". #' @param single_pdf saves all plots to single PDF file (default = FALSE). `file_type`` must be .pdf #' @param color color scheme to use. -#' @param legend logical, whether or not to include plot legend, default is TRUE. +#' @param no_legend logical, whether or not to include plot legend, default is TRUE. +#' @param title_prefix Value that should be used for plot title prefix if `no_legend = TRUE`. +#' If NULL the value of `meta_data_column` will be used. Default is NULL. +#' @param title_prefix Value that should be used for plot title prefix if `no_legend = TRUE`. +#' If NULL the value of `meta_data_column` will be used. Default is NULL. #' @param dpi dpi for image saving. #' @param reduction Dimensionality Reduction to use (default is object default). #' @param dims Dimensions to plot. #' @param pt.size Adjust point size for plotting. +#' @param raster Convert points to raster format. Default is NULL which will rasterize by default if +#' greater than 200,000 cells. #' @param ... Extra parameters passed to \code{\link[Seurat]{DimPlot}}. #' #' @return A ggplot object @@ -142,19 +148,24 @@ Iterate_DimPlot_bySample <- function( single_pdf = FALSE, dpi = 600, color = "black", - legend = TRUE, + no_legend = TRUE, + title_prefix = NULL, reduction = NULL, dims = c(1, 2), pt.size = NULL, + raster = NULL, ... ) { # Check Seurat Is_Seurat(seurat_object = seurat_object) + # Harmonize pt.size across all plots + pt.size <- pt.size %||% AutoPointSize_scCustom(data = seurat_object) + # Check meta.data column if not orig.ident if (sample_column != "orig.ident") { # Check meta data - sample_column <- Meta_Present(seurat_object = seurat_object, meta_col_names = sample_column, omit_warn = FALSE, print_msg = FALSE)[[1]] + sample_column <- Meta_Present(object = seurat_object, meta_col_names = sample_column, omit_warn = FALSE, print_msg = FALSE)[[1]] # stop if none found if (length(x = sample_column) == 0) { @@ -210,28 +221,54 @@ Iterate_DimPlot_bySample <- function( y_axis <- c(min(reduc_coordinates[, 2]), max(reduc_coordinates[, 2])) - # Extract orig.ident + # Extract sample id column column_list <- as.character(x = unique(x = seurat_object@meta.data[[sample_column]])) + num_idents <- length(x = column_list) + + # Create plot titles if needed. + if (!is.null(x = title_prefix) && isFALSE(x = no_legend)) { + cli_warn(message = "{.code title_prefix} was omitted as {.code no_legend = FALSE}.") + } + + if (is.null(x = title_prefix) && isTRUE(x = no_legend)) { + plot_title <- lapply(1:num_idents, function(z) { + paste0(sample_column, ": ", column_list[z]) + }) + } else { + plot_title <- lapply(1:num_idents, function(z) { + paste0(title_prefix, ": ", column_list[z]) + }) + } + + if (!is.null(x = title_prefix) && length(x = title_prefix) != 1 && isTRUE(x = no_legend)) { + cli_abort(message = "{.field `title_prefix`} must be vector of length 1.") + } # Create list of cells per sample cells_per_sample <- lapply(column_list, function(sample) { row.names(x = seurat_object@meta.data)[which(x = seurat_object@meta.data[[sample_column]] == sample)] }) + # Add raster check for scCustomize + raster <- raster %||% (length(x = Cells(x = seurat_object)) > 2e5) + + # Single PDF option if (isTRUE(x = single_pdf)) { cli_inform(message = "{.field Generating plots}") pboptions(char = "=") - all_plots <- pblapply(cells_per_sample,function(cells) { - if (isTRUE(x = legend)) { - DimPlot(object = seurat_object, cells = cells, group.by = sample_column, cols = color, reduction = reduction, pt.size = pt.size, ...) + + all_plots <- pblapply(1:num_idents,function(x) { + if (isTRUE(x = no_legend)) { + DimPlot(object = seurat_object, cells = cells_per_sample[[x]], group.by = sample_column, cols = color, reduction = reduction, pt.size = pt.size, raster = raster, ...) + xlim(x_axis) + - ylim(y_axis) + ylim(y_axis) + + NoLegend() + + ggtitle(plot_title[x]) + + CenterTitle() } else { - DimPlot(object = seurat_object, cells = cells, group.by = sample_column, cols = color, reduction = reduction, pt.size = pt.size, ...) + + DimPlot(object = seurat_object, cells = cells_per_sample[[x]], group.by = sample_column, cols = color, reduction = reduction, pt.size = pt.size, raster = raster, ...) + xlim(x_axis) + - ylim(y_axis) + - NoLegend() + ylim(y_axis) } }) cli_inform(message = "{.field Saving plots to file}") @@ -250,15 +287,17 @@ Iterate_DimPlot_bySample <- function( cli_inform(message = "{.field Generating plots and saving plots to file}") pb <- txtProgressBar(min = 0, max = length(cells_per_sample), style = 3, file = stderr()) for (i in 1:length(cells_per_sample)) { - if (isTRUE(x = legend)) { - DimPlot(object = seurat_object, cells = cells_per_sample[[i]], group.by = sample_column, cols = color, reduction = reduction, pt.size = pt.size, ...) + + if (isTRUE(x = no_legend)) { + DimPlot(object = seurat_object, cells = cells_per_sample[[i]], group.by = sample_column, cols = color, reduction = reduction, pt.size = pt.size, raster = raster, ...) + xlim(x_axis) + - ylim(y_axis) + ylim(y_axis) + + NoLegend() + + ggtitle(plot_title[i]) + + CenterTitle() } else { - DimPlot(object = seurat_object, cells = cells_per_sample[[i]], group.by = sample_column, cols = color, reduction = reduction, pt.size = pt.size, ...) + + DimPlot(object = seurat_object, cells = cells_per_sample[[i]], group.by = sample_column, cols = color, reduction = reduction, pt.size = pt.size, raster = raster, ...) + xlim(x_axis) + - ylim(y_axis) + - NoLegend() + ylim(y_axis) } suppressMessages(ggsave(filename = paste(file_path, column_list[[i]], file_name, file_type, sep=""), dpi = dpi)) setTxtProgressBar(pb = pb, value = i) @@ -270,15 +309,17 @@ Iterate_DimPlot_bySample <- function( cli_inform(message = "{.field Generating plots and saving plots to file}") pb <- txtProgressBar(min = 0, max = length(cells_per_sample), style = 3, file = stderr()) for (i in 1:length(cells_per_sample)) { - if (isTRUE(x = legend)) { - DimPlot(object = seurat_object, cells = cells_per_sample[[i]], group.by = sample_column, cols = color, reduction = reduction, pt.size = pt.size, ...) + + if (isTRUE(x = no_legend)) { + DimPlot(object = seurat_object, cells = cells_per_sample[[i]], group.by = sample_column, cols = color, reduction = reduction, pt.size = pt.size, raster = raster, ...) + xlim(x_axis) + - ylim(y_axis) + ylim(y_axis) + + NoLegend() + + ggtitle(plot_title[i]) + + CenterTitle() } else { - DimPlot(object = seurat_object, cells = cells_per_sample[[i]], group.by = sample_column, cols = color, reduction = reduction, pt.size = pt.size, ...) + + DimPlot(object = seurat_object, cells = cells_per_sample[[i]], group.by = sample_column, cols = color, reduction = reduction, pt.size = pt.size, raster = raster, ...) + xlim(x_axis) + - ylim(y_axis) + - NoLegend() + ylim(y_axis) } suppressMessages(ggsave(filename = paste(file_path, column_list[[i]], file_name, file_type, sep=""), useDingbats = FALSE)) setTxtProgressBar(pb = pb, value = i) @@ -550,7 +591,7 @@ Iterate_Meta_Highlight_Plot <- function( Is_Seurat(seurat_object = seurat_object) # Check meta data - meta_data_column <- Meta_Present(seurat_object = seurat_object, meta_col_names = meta_data_column, omit_warn = FALSE, print_msg = FALSE)[[1]] + meta_data_column <- Meta_Present(object = seurat_object, meta_col_names = meta_data_column, omit_warn = FALSE, print_msg = FALSE)[[1]] # stop if none found if (length(x = meta_data_column) == 0) { @@ -642,6 +683,7 @@ Iterate_Meta_Highlight_Plot <- function( highlight_color <- highlight_color } } + # Create plot titles if needed. if (!is.null(x = title_prefix) && isFALSE(x = no_legend)) { cli_warn(message = "{.code title_prefix} was omitted as {.code no_legend = FALSE}.") @@ -794,7 +836,12 @@ Iterate_Meta_Highlight_Plot <- function( #' @param file_path directory file path and/or file name prefix. Defaults to current wd. #' @param file_name name suffix and file extension. #' @param file_type File type to save output as. Must be one of following: ".pdf", ".png", ".tiff", ".jpeg", or ".svg". -#' @param single_pdf saves all plots to single PDF file (default = FALSE). `file_type`` must be .pdf. +#' @param single_pdf saves all plots to single PDF file (default = FALSE). +#' @param features_per_page numeric, number of features to plot on single page if `single_pdf = TRUE`. Default is 1. +#' @param num_columns Number of columns in plot layout (only applicable if `single_pdf = TRUE` AND +#' `features_per_page` > 1). +#' @param landscape logical, when plotting multiple features per page in single PDF whether to use landscape or portrait +#' page dimensions (default is TRUE). #' @param dpi dpi for image saving. #' @param pt.size Adjust point size for plotting. #' @param reduction Dimensionality Reduction to use (if NULL then defaults to Object default). @@ -843,6 +890,9 @@ Iterate_FeaturePlot_scCustom <- function( file_name = NULL, file_type = NULL, single_pdf = FALSE, + features_per_page = 1, + num_columns = NULL, + landscape = TRUE, dpi = 600, pt.size = NULL, reduction = NULL, @@ -853,7 +903,7 @@ Iterate_FeaturePlot_scCustom <- function( ) { # Deprecation warning if (lifecycle::is_present(gene_list)) { - lifecycle::deprecate_warn(when = "1.2.0", + lifecycle::deprecate_warn(when = "2.0.0", what = "Iterate_FeaturePlot_scCustom(gene_list)", with = "Iterate_FeaturePlot_scCustom(features)", details = c("v" = "The parameter will remain functional until next major update.", @@ -862,7 +912,6 @@ Iterate_FeaturePlot_scCustom <- function( features <- gene_list } - # temp turn off message call from FeaturePlot_scCustomize op <- options(scCustomize_warn_na_cutoff = FALSE) on.exit(options(op)) @@ -882,6 +931,13 @@ Iterate_FeaturePlot_scCustom <- function( } } + # Check num_columns validity + if (!is.null(x = num_columns) && !isTRUE(x = single_pdf)) { + cli_warn(message = c("{.code num_columns} is only valid when {.code single_pdf = TRUE}", + "i" = "Setting {.num_columns = NULL}")) + num_columns <- NULL + } + # Set file_path before path check if current dir specified as opposed to leaving set to NULL if (!is.null(x = file_path) && file_path == "") { file_path <- NULL @@ -899,6 +955,19 @@ Iterate_FeaturePlot_scCustom <- function( cli_abort(message = "No file name provided. Please provide a file name using {.code file_name}.") } + # multi-plot checks + if (isFALSE(x = single_pdf) && features_per_page != 1) { + cli_warn(message = "{.code features_per_page} only applicable when {.code single_pdf = TRUE}.") + } + + if (isFALSE(x = is.numeric(x = features_per_page))) { + cli_abort(message = "{.code features_per_page} must be numeric value.") + } + + if (isTRUE(x = is.numeric(x = features_per_page)) && isFALSE(x = check_whole_num(x = features_per_page))) { + cli_abort(message = "{.code features_per_page} must be whole numeric value.") + } + # Extract default reduction reduction <- reduction %||% DefaultDimReduc(object = seurat_object) @@ -935,8 +1004,6 @@ Iterate_FeaturePlot_scCustom <- function( "i" = "Suggest splitting {.code features} and running {.field Iterate_FeaturePlot_scCustom} once for each feature list.")) } - # gene_list <- Gene_Present(data = seurat_object, gene_list = gene_list, print_msg = FALSE, case_check = TRUE)[[1]] - # Modify Cluster Labels names if needed for saving plots if (!is.null(x = names(x = all_found_features)) && isFALSE(x = single_pdf)) { names_vec_mod <- gsub(pattern = "/", replacement = "-", x = names(x = all_found_features)) @@ -953,30 +1020,120 @@ Iterate_FeaturePlot_scCustom <- function( # Single PDF option if (isTRUE(x = single_pdf)) { - cli_inform(message = "{.field Generating plots}") - pboptions(char = "=") - all_plots <- pblapply(all_found_features,function(gene) {FeaturePlot_scCustom(seurat_object = seurat_object, features = gene, colors_use = colors_use, na_color = na_color, na_cutoff = na_cutoff, split.by = split.by, order = order, pt.size = pt.size, reduction = reduction, raster = raster, alpha_exp = alpha_exp, alpha_na_exp = alpha_na_exp,...)}) - cli_inform(message = "{.field Saving plots to file}") - # save plots with cluster annotation - if (!is.null(x = names(x = all_found_features)) && is.null(x = split.by)) { - pdf(paste(file_path, file_name, file_type, sep="")) - pb <- txtProgressBar(min = 0, max = length(all_plots), style = 3, file = stderr()) - for (i in 1:length(all_plots)) { - print(all_plots[[i]] + ggtitle((paste0(all_found_features[i], "_", names(x = all_found_features)[i])))) - setTxtProgressBar(pb = pb, value = i) + # plot if one fearture per page + if (features_per_page == 1) { + cli_inform(message = "{.field Generating plots}") + pboptions(char = "=") + all_plots <- pblapply(all_found_features,function(gene) {FeaturePlot_scCustom(seurat_object = seurat_object, features = gene, colors_use = colors_use, na_color = na_color, na_cutoff = na_cutoff, split.by = split.by, order = order, pt.size = pt.size, reduction = reduction, raster = raster, alpha_exp = alpha_exp, alpha_na_exp = alpha_na_exp,...)}) + cli_inform(message = "{.field Saving plots to file}") + # save plots with cluster annotation + if (!is.null(x = names(x = all_found_features)) && is.null(x = split.by)) { + pdf(paste(file_path, file_name, file_type, sep="")) + pb <- txtProgressBar(min = 0, max = length(all_plots), style = 3, file = stderr()) + for (i in 1:length(all_plots)) { + print(all_plots[[i]] + ggtitle((paste0(all_found_features[i], "_", names(x = all_found_features)[i])))) + setTxtProgressBar(pb = pb, value = i) + } + close(con = pb) + dev.off() + } else { + # Save plots without cluster annotation + pdf(paste(file_path, file_name, file_type, sep="")) + pb <- txtProgressBar(min = 0, max = length(all_plots), style = 3, file = stderr()) + for (i in 1:length(all_plots)) { + print(all_plots[[i]]) + setTxtProgressBar(pb = pb, value = i) + } + close(con = pb) + dev.off() } - close(con = pb) - dev.off() } else { - # Save plots without cluster annotation - pdf(paste(file_path, file_name, file_type, sep="")) - pb <- txtProgressBar(min = 0, max = length(all_plots), style = 3, file = stderr()) - for (i in 1:length(all_plots)) { - print(all_plots[[i]]) - setTxtProgressBar(pb = pb, value = i) + # for plotting multiple features per page + + # split features by + features_split <- Split_Vector(x = all_found_features, chunk_size = features_per_page, verbose = FALSE) + + cli_inform(message = "{.field Generating plots}") + pboptions(char = "=") + all_plots <- pblapply(features_split, function(z) {FeaturePlot_scCustom(seurat_object = seurat_object, features = z, colors_use = colors_use, na_color = na_color, na_cutoff = na_cutoff, split.by = split.by, order = order, pt.size = pt.size, reduction = reduction, raster = raster, alpha_exp = alpha_exp, alpha_na_exp = alpha_na_exp, num_columns = num_columns, ...)}) + + + + cli_inform(message = "{.field Saving plots to file}") + if (isTRUE(x = landscape)) { + # save plots with cluster annotation + if (!is.null(x = names(x = all_found_features)) && is.null(x = split.by)) { + pdf(paste(file_path, file_name, file_type, sep=""), width = 22, height = 17) + pb <- txtProgressBar(min = 0, max = length(all_plots), style = 3, file = stderr()) + + list_names <- lapply(1:length(x = features_split), function(k){ + feat_name <- features_split[[k]] + clu_name <- names(x = features_split[[k]]) + new_names <- paste0(feat_name, "_", clu_name) + }) + + all_plots <- lapply(1:length(x = all_plots), function (j){ + plot_split <- all_plots[[j]] + for (k in 1:length(x = list_names[[j]])) { + plot_split[[k]][["labels"]][["title"]] <- list_names[[j]][k] + } + return(plot_split) + }) + + for (i in 1:length(x = all_plots)) { + print(all_plots[[i]]) + setTxtProgressBar(pb = pb, value = i) + } + close(con = pb) + dev.off() + } else { + # Save plots without cluster annotation + pdf(paste(file_path, file_name, file_type, sep=""), width = 22, height = 17) + pb <- txtProgressBar(min = 0, max = length(all_plots), style = 3, file = stderr()) + for (i in 1:length(all_plots)) { + print(all_plots[[i]]) + setTxtProgressBar(pb = pb, value = i) + } + close(con = pb) + dev.off() + } + } else { + if (!is.null(x = names(x = all_found_features)) && is.null(x = split.by)) { + pdf(paste(file_path, file_name, file_type, sep=""), width = 17, height = 22) + pb <- txtProgressBar(min = 0, max = length(all_plots), style = 3, file = stderr()) + + list_names <- lapply(1:length(x = features_split), function(k){ + feat_name <- features_split[[k]] + clu_name <- names(x = features_split[[k]]) + new_names <- paste0(feat_name, "_", clu_name) + }) + + all_plots <- lapply(1:length(x = all_plots), function (j){ + plot_split <- all_plots[[j]] + for (k in 1:length(x = list_names[[j]])) { + plot_split[[k]][["labels"]][["title"]] <- list_names[[j]][k] + } + return(plot_split) + }) + + for (i in 1:length(x = all_plots)) { + print(all_plots[[i]]) + setTxtProgressBar(pb = pb, value = i) + } + close(con = pb) + dev.off() + } else { + # Save plots without cluster annotation + pdf(paste(file_path, file_name, file_type, sep=""), width = 17, height = 22) + pb <- txtProgressBar(min = 0, max = length(all_plots), style = 3, file = stderr()) + for (i in 1:length(all_plots)) { + print(all_plots[[i]]) + setTxtProgressBar(pb = pb, value = i) + } + close(con = pb) + dev.off() + } } - close(con = pb) - dev.off() } } else { @@ -1152,9 +1309,6 @@ Iterate_VlnPlot_scCustom <- function( all_found_features <- features } - # # Check whether features are present in object - # gene_list <- Gene_Present(data = seurat_object, gene_list = gene_list, print_msg = FALSE, case_check = TRUE)[[1]] - # Set default color palette based on number of levels being plotted if (is.null(x = group.by)) { group_by_length <- length(x = unique(x = seurat_object@active.ident)) @@ -1348,7 +1502,7 @@ Iterate_Plot_Density_Custom <- function( } # Check whether features are present in object - gene_list <- Gene_Present(data = seurat_object, gene_list = gene_list, print_msg = FALSE, case_check = TRUE)[[1]] + gene_list <- Feature_Present(data = seurat_object, features = gene_list, print_msg = FALSE, case_check = TRUE)[[1]] # check palettes if (!is.null(x = custom_palette) && viridis_palette != "magma") { @@ -1557,7 +1711,7 @@ Iterate_Plot_Density_Joint <- function( # Check whether features are present in object checked_gene_list <- lapply(1:length(gene_list), function(x){ - genes <- Gene_Present(data = seurat_object, gene_list = gene_list[[x]], print_msg = FALSE, case_check = TRUE, return_none = TRUE)[[1]] + genes <- Feature_Present(data = seurat_object, features = gene_list[[x]], print_msg = FALSE, case_check = TRUE, return_none = TRUE)[[1]] }) if (!is.null(x = names(x = gene_list))) { diff --git a/R/Seurat_Plotting.R b/R/Seurat_Plotting.R index 0453f07305..3d64ef4684 100644 --- a/R/Seurat_Plotting.R +++ b/R/Seurat_Plotting.R @@ -94,8 +94,8 @@ FeaturePlot_scCustom <- function( # Check is slot is supplied if (lifecycle::is_present(slot)) { lifecycle::deprecate_warn(when = "2.0.0", - what = "slot", - with = "layer", + what = "FeaturePlot_scCustom(slot)", + with = "FeaturePlot_scCustom(layer)", details = c("v" = "As of Seurat 5.0.0 the {.code slot} parameter is deprecated and replaced with {.code layer}.", "i" = "Please adjust code now to prepare for full deprecation.") ) @@ -104,7 +104,7 @@ FeaturePlot_scCustom <- function( # Check meta if (!is.null(x = split.by)) { - split.by <- Meta_Present(seurat_object = seurat_object, meta_col_names = split.by, print_msg = FALSE, omit_warn = FALSE)[[1]] + split.by <- Meta_Present(object = seurat_object, meta_col_names = split.by, print_msg = FALSE, omit_warn = FALSE)[[1]] } # Set or check split_collect values @@ -191,27 +191,52 @@ FeaturePlot_scCustom <- function( # Extract default reduction reduction <- reduction %||% DefaultDimReduc(object = seurat_object) + # Get Seurat version + seurat_version <- packageVersion("Seurat") + # Add alpha to color scales - if (!is.null(x = alpha_exp)) { + if (!is.null(x = alpha_exp) && seurat_version < "5") { colors_use <- alpha(colors_use, alpha_exp) } - if (!is.null(x = alpha_na_exp)) { - na_color <- alpha(na_color, alpha_exp) + if (!is.null(x = alpha_na_exp) && seurat_version < "5") { + na_color <- alpha(na_color, alpha_na_exp) + } + + if (!is.null(x = alpha_na_exp) && seurat_version >= "5") { + cli_warn(message = "{.code alpha_na_exp} is not currently supported for Seurat v5+") + } + + # Set alpha if NULL + if (is.null(x = alpha_exp) && seurat_version >= "5") { + alpha_exp <- 1 } # plot no split & combined if (is.null(x = split.by) && isTRUE(x = combine)) { - plot <- suppressMessages(FeaturePlot(object = seurat_object, features = all_found_features, order = order, pt.size = pt.size, reduction = reduction, raster = raster, split.by = split.by, ncol = num_columns, combine = combine, raster.dpi = raster.dpi, label = label, ...) & scale_color_gradientn(colors = colors_use, limits = c(na_cutoff, NA), na.value = na_color)) + # Keep until Seurat version required is > 5 + if (seurat_version >= "5") { + plot <- suppressMessages(FeaturePlot(object = seurat_object, features = all_found_features, order = order, pt.size = pt.size, reduction = reduction, raster = raster, split.by = split.by, ncol = num_columns, combine = combine, raster.dpi = raster.dpi, label = label, alpha = alpha_exp, ...) & scale_color_gradientn(colors = colors_use, limits = c(na_cutoff, NA), na.value = na_color)) + } else { + plot <- suppressMessages(FeaturePlot(object = seurat_object, features = all_found_features, order = order, pt.size = pt.size, reduction = reduction, raster = raster, split.by = split.by, ncol = num_columns, combine = combine, raster.dpi = raster.dpi, label = label, ...) & scale_color_gradientn(colors = colors_use, limits = c(na_cutoff, NA), na.value = na_color)) + } } # plot no split & combined if (is.null(x = split.by) && isFALSE(x = combine)) { - plot_list <- suppressMessages(FeaturePlot(object = seurat_object, features = all_found_features, order = order, pt.size = pt.size, reduction = reduction, raster = raster, split.by = split.by, ncol = num_columns, combine = combine, raster.dpi = raster.dpi, label = label, ...)) + if (seurat_version >= "5") { + plot_list <- suppressMessages(FeaturePlot(object = seurat_object, features = all_found_features, order = order, pt.size = pt.size, reduction = reduction, raster = raster, split.by = split.by, ncol = num_columns, combine = combine, raster.dpi = raster.dpi, label = label, alpha = alpha_exp, ...)) - plot <- lapply(1:length(x = plot_list), function(i) { - p[[i]] <- suppressMessages(p[[i]] + scale_color_gradientn(colors = colors_use, limits = c(na_cutoff, NA), na.value = na_color)) - }) + plot <- lapply(1:length(x = plot_list), function(i) { + p[[i]] <- suppressMessages(p[[i]] + scale_color_gradientn(colors = colors_use, limits = c(na_cutoff, NA), na.value = na_color)) + }) + } else { + plot_list <- suppressMessages(FeaturePlot(object = seurat_object, features = all_found_features, order = order, pt.size = pt.size, reduction = reduction, raster = raster, split.by = split.by, ncol = num_columns, combine = combine, raster.dpi = raster.dpi, label = label, ...)) + + plot <- lapply(1:length(x = plot_list), function(i) { + p[[i]] <- suppressMessages(p[[i]] + scale_color_gradientn(colors = colors_use, limits = c(na_cutoff, NA), na.value = na_color)) + }) + } } @@ -226,7 +251,11 @@ FeaturePlot_scCustom <- function( max_exp_value <- max(feature_data) min_exp_value <- min(feature_data) - plot <- suppressMessages(FeaturePlot(object = seurat_object, features = all_found_features, order = order, pt.size = pt.size, reduction = reduction, raster = raster, split.by = split.by, raster.dpi = raster.dpi, label = label, ...) & scale_color_gradientn(colors = colors_use, limits = c(na_cutoff, max_exp_value), na.value = na_color, name = all_found_features)) & RestoreLegend() & theme(axis.title.y.right = element_blank()) + if (seurat_version >= "5") { + plot <- suppressMessages(FeaturePlot(object = seurat_object, features = all_found_features, order = order, pt.size = pt.size, reduction = reduction, raster = raster, split.by = split.by, raster.dpi = raster.dpi, label = label, alpha = alpha_exp, ...) & scale_color_gradientn(colors = colors_use, limits = c(na_cutoff, max_exp_value), na.value = na_color, name = all_found_features)) & RestoreLegend() & theme(axis.title.y.right = element_blank()) + } else { + plot <- suppressMessages(FeaturePlot(object = seurat_object, features = all_found_features, order = order, pt.size = pt.size, reduction = reduction, raster = raster, split.by = split.by, raster.dpi = raster.dpi, label = label, ...) & scale_color_gradientn(colors = colors_use, limits = c(na_cutoff, max_exp_value), na.value = na_color, name = all_found_features)) & RestoreLegend() & theme(axis.title.y.right = element_blank()) + } if (isTRUE(x = label_feature_yaxis)) { plot <- plot + plot_layout(nrow = num_rows, ncol = num_columns) @@ -247,7 +276,7 @@ FeaturePlot_scCustom <- function( # plotting split multiple features if (!is.null(x = split.by) && length(x = all_found_features) > 1) { - plot_list <- lapply(1:length(x = features), function(i){ + plot_list <- lapply(1:length(x = all_found_features), function(i){ feature_data <- FetchData( object = seurat_object, vars = all_found_features[i], @@ -256,7 +285,12 @@ FeaturePlot_scCustom <- function( max_exp_value <- max(feature_data) min_exp_value <- min(feature_data) - single_plot <- suppressMessages(FeaturePlot(object = seurat_object, features = all_found_features[i], order = order, pt.size = pt.size, reduction = reduction, raster = raster, split.by = split.by, raster.dpi = raster.dpi, label = label, ...) & scale_color_gradientn(colors = colors_use, limits = c(na_cutoff, max_exp_value), na.value = na_color, name = features[i])) & RestoreLegend() & theme(axis.title.y.right = element_blank()) + + if (seurat_version >= "5") { + single_plot <- suppressMessages(FeaturePlot(object = seurat_object, features = all_found_features[i], order = order, pt.size = pt.size, reduction = reduction, raster = raster, split.by = split.by, raster.dpi = raster.dpi, label = label, alpha = alpha_exp, ...) & scale_color_gradientn(colors = colors_use, limits = c(na_cutoff, max_exp_value), na.value = na_color, name = all_found_features[i])) & RestoreLegend() & theme(axis.title.y.right = element_blank()) + } else { + single_plot <- suppressMessages(FeaturePlot(object = seurat_object, features = all_found_features[i], order = order, pt.size = pt.size, reduction = reduction, raster = raster, split.by = split.by, raster.dpi = raster.dpi, label = label, ...) & scale_color_gradientn(colors = colors_use, limits = c(na_cutoff, max_exp_value), na.value = na_color, name = features[i])) & RestoreLegend() & theme(axis.title.y.right = element_blank()) + } if (isTRUE(x = label_feature_yaxis)) { single_plot <- single_plot + plot_layout(nrow = num_rows, ncol = num_columns) @@ -397,8 +431,8 @@ FeaturePlot_DualAssay <- function( # Check is slot is supplied if (lifecycle::is_present(slot)) { lifecycle::deprecate_warn(when = "2.0.0", - what = "slot", - with = "layer", + what = "FeaturePlot_DualAssay(slot)", + with = "FeaturePlot_DualAssay(layer)", details = c("v" = "As of Seurat 5.0.0 the {.code slot} parameter is deprecated and replaced with {.code layer}.", "i" = "Please adjust code now to prepare for full deprecation.") ) @@ -467,223 +501,6 @@ FeaturePlot_DualAssay <- function( } -#' Split FeatureScatter -#' -#' `r lifecycle::badge("deprecated")` -#' Create FeatureScatter using split.by -#' -#' @param seurat_object Seurat object name. -#' @param feature1 First feature to plot. -#' @param feature2 Second feature to plot. -#' @param split.by Feature to split plots by (i.e. "orig.ident"). -#' @param group.by Name of one or more metadata columns to group (color) cells by (for example, orig.ident). -#' Use 'ident' to group.by active.ident class. -#' @param colors_use color for the points on plot. -#' @param pt.size Adjust point size for plotting. -#' @param aspect_ratio Control the aspect ratio (y:x axes ratio length). Must be numeric value; -#' Default is NULL. -#' @param title_size size for plot title labels. -#' @param num_columns number of columns in final layout plot. -#' @param raster Convert points to raster format. Default is NULL which will rasterize by default if -#' greater than 100,000 cells. -#' @param raster.dpi Pixel resolution for rasterized plots, passed to geom_scattermore(). -#' Default is c(512, 512). -#' @param ggplot_default_colors logical. If `colors_use = NULL`, Whether or not to return plot using -#' default ggplot2 "hue" palette instead of default "polychrome" or "varibow" palettes. -#' @param color_seed random seed for the "varibow" palette shuffle if `colors_use = NULL` and number of -#' groups plotted is greater than 36. Default = 123. -#' @param ... Extra parameters passed to \code{\link[Seurat]{FeatureScatter}}. -#' -#' @return A ggplot object -#' -#' @import cli -#' @import ggplot2 -#' @import patchwork -#' @importFrom magrittr "%>%" -#' @importFrom Seurat FeatureScatter -#' @importFrom stats cor -#' -#' @export -#' -#' @concept seurat_plotting -#' -#' @examples -#' \dontrun{ -#' # Function now DEPRECATED. -#' library(Seurat) -#' pbmc_small$sample_id <- sample(c("sample1", "sample2"), size = ncol(pbmc_small), replace = TRUE) -#' -#' # OLD Code -#' Split_FeatureScatter(seurat_object = pbmc_small, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", -#' split.by = "sample_id") -#' -#' # NEW Code -#' FeatureScatter_scCustom(seurat_object = pbmc_small, feature1 = "nCount_RNA", -#' feature2 = "nFeature_RNA", split.by = "sample_id") -#'} -#' - -Split_FeatureScatter <- function( - seurat_object, - feature1 = NULL, - feature2 = NULL, - split.by = NULL, - group.by = NULL, - colors_use = NULL, - pt.size = NULL, - aspect_ratio = NULL, - title_size = 15, - num_columns = NULL, - raster = NULL, - raster.dpi = c(512, 512), - ggplot_default_colors = FALSE, - color_seed = 123, - ... -) { - lifecycle::deprecate_stop(when = "2.0.0", - what = "Split_FeatureScatter()", - with = "FeatureScatter_scCustom()", - details = c("i" = "The functionality is now contained within `FeatureScatter_scCustom`") - ) - - # Check Seurat - Is_Seurat(seurat_object = seurat_object) - - # split.by present - if (is.null(x = split.by)) { - cli_abort(message = "No value supplied to {.code split.by}.") - } - - # Check split.by is valid - if (split.by %in% colnames(seurat_object@meta.data) == FALSE) { - cli_abort(message = c("The meta data variable: {.val {split.by}} could not be found in object@meta.data.", - "i" = "Please check the spelling and column names of meta.data slot.") - ) - } - - # Set column and row lengths - split.by_length <- length(x = unique(x = seurat_object@meta.data[[split.by]])) - - if (is.null(x = num_columns)) { - num_columns <- split.by_length - } - # Calculate number of rows for selected number of columns - num_rows <- ceiling(x = split.by_length/num_columns) - - # Check column and row compatibility - if (num_columns > split.by_length) { - cli_abort(message = c("The number of columns specified is greater than the number of meta data variables.", - "*" = "{.val {split.by}} only contains {.field {split.by_length}} variables.", - "i" = "Please adjust {.code num_columns} to be less than or equal to {.field {split.by_length}}.") - ) - } - - # Check features are present - possible_features <- c(rownames(x = seurat_object), colnames(x = seurat_object@meta.data)) - check_features <- setdiff(x = c(feature1, feature2), y = possible_features) - if (length(x = check_features) > 0) { - cli_abort(message = "The following feature(s) were not present in Seurat object: '{.field {check_features}}'") - } - - # Extract min/maxes of features - data_to_plot <- FetchData(object = seurat_object, vars = c(feature1, feature2)) - cor_data_features <- c("nCount_RNA", "nFeature_RNA") - if (feature1 %in% cor_data_features && feature2 %in% cor_data_features) { - min_feature1 <- min(data_to_plot[, feature1])-1 - max_feature1 <- max(data_to_plot[, feature1])+1 - min_feature2 <- min(data_to_plot[, feature2])-1 - max_feature2 <- max(data_to_plot[, feature2])+1 - } else { - min_feature1 <- min(data_to_plot[, feature1])-0.05 - max_feature1 <- max(data_to_plot[, feature1])+0.05 - min_feature2 <- min(data_to_plot[, feature2])-0.05 - max_feature2 <- max(data_to_plot[, feature2])+0.05 - } - - # Extract split.by list of values - if (inherits(x = seurat_object@meta.data[, split.by], what = "factor")) { - meta_sample_list <- as.character(x = levels(x = seurat_object@meta.data[, split.by])) - } else { - meta_sample_list <- as.character(x = unique(x = seurat_object@meta.data[, split.by])) - } - - # Extract cell names per meta data list of values - cell_names <- lapply(meta_sample_list, function(x) { - row.names(x = seurat_object@meta.data)[which(x = seurat_object@meta.data[, split.by] == x)]}) - - # raster check - raster <- raster %||% (length(x = Cells(x = seurat_object)) > 2e5) - - # Set uniform point size is pt.size = NULL (based on plot with most cells) - if (is.null(x = pt.size)) { - # cells per meta data - cells_by_meta <- data.frame(table(seurat_object@meta.data[, split.by])) - # Identity with greatest number of cells - max_cells <- max(cells_by_meta$Freq) - # modified version of the autopointsize function from Seurat - pt.size <- AutoPointSize_scCustom(data = max_cells, raster = raster) - } - - # Add correlations if applicable - cor_data_features <- c("nCount_RNA", "nFeature_RNA") - if (feature1 %in% cor_data_features && feature2 %in% cor_data_features) { - plot_cor <- TRUE - cor_data <- FetchData(object = seurat_object, vars = c("nCount_RNA", "nFeature_RNA", split.by)) - - cor_values <- lapply(1:length(x = meta_sample_list), function(i) { - cor_data_filtered <- cor_data %>% - filter(.data[[split.by]] == meta_sample_list[[i]]) - round(x = cor(x = cor_data_filtered[, "nCount_RNA"], y = cor_data_filtered[, "nFeature_RNA"]), digits = 2) - }) - } else { - plot_cor <- FALSE - } - - # Set colors - group.by <- group.by %||% 'ident' - - if (group.by == "ident") { - group_by_length <- length(x = unique(x = seurat_object@active.ident)) - } else { - group_by_length <- length(x = unique(x = seurat_object@meta.data[[group.by]])) - } - - if (is.null(x = colors_use)) { - # set default plot colors - if (is.null(x = colors_use)) { - colors_use <- scCustomize_Palette(num_groups = group_by_length, ggplot_default_colors = ggplot_default_colors, color_seed = color_seed) - } - } - - # Plots - plots <- lapply(1:length(x = meta_sample_list), function(j) { - plot <- FeatureScatter(seurat_object, feature1 = feature1, feature2 = feature2, cells = cell_names[[j]], group.by = group.by, cols = colors_use, pt.size = pt.size, raster = raster, raster.dpi = raster.dpi, ...) + - theme(plot.title = element_text(hjust = 0.5, size = title_size), - legend.position = "right") + - xlim(min_feature1, max_feature1) + - ylim(min_feature2, max_feature2) - if (isTRUE(x = plot_cor)) { - plot + ggtitle(paste(meta_sample_list[[j]]), subtitle = paste0("Correlation: ", cor_values[j])) - } else { - plot + ggtitle(paste(meta_sample_list[[j]])) - } - }) - - # Wrap Plots into single output - plot_comb <- wrap_plots(plots, ncol = num_columns, nrow = num_rows) + plot_layout(guides = 'collect') - - # Aspect ratio changes - if (!is.null(x = aspect_ratio)) { - if (!is.numeric(x = aspect_ratio)) { - cli_abort(message = "{.code aspect_ratio} must be a {.field numeric} value.") - } - plot_comb <- plot_comb & theme(aspect.ratio = aspect_ratio) - } - - return(plot_comb) -} - - #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #################### GENE EXPRESSION PLOTTING (NON-2D) #################### #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -758,12 +575,12 @@ VlnPlot_scCustom <- function( # Check split valid if (!is.null(x = split.by)) { - split.by <- Meta_Present(seurat_object = seurat_object, meta_col_names = split.by, print_msg = FALSE, omit_warn = FALSE)[[1]] + split.by <- Meta_Present(object = seurat_object, meta_col_names = split.by, print_msg = FALSE, omit_warn = FALSE)[[1]] } # Add check for group.by before getting to colors if (!is.null(x = group.by) && group.by != "ident") { - Meta_Present(seurat_object = seurat_object, meta_col_names = group.by, print_msg = FALSE) + Meta_Present(object = seurat_object, meta_col_names = group.by, print_msg = FALSE) } # Check features and meta to determine which features present @@ -778,7 +595,7 @@ VlnPlot_scCustom <- function( # set size if NULL if (isTRUE(x = plot_boxplot)) { if (!is.null(x = pt.size)) { - cli::cli_warn(message = c("Provided value for {.code pt.size} ({.field {pt.size}}) will be ignored.", + cli_warn(message = c("Provided value for {.code pt.size} ({.field {pt.size}}) will be ignored.", "When setting {.field plot_boxplot = TRUE}, {.code pt.size} is automatically set to 0.")) } pt.size <- 0 @@ -931,7 +748,7 @@ Stacked_VlnPlot <- function( # Add check for group.by before getting to colors if (!is.null(x = group.by) && group.by != "ident") { - Meta_Present(seurat_object = seurat_object, meta_col_names = group.by, print_msg = FALSE) + Meta_Present(object = seurat_object, meta_col_names = group.by, print_msg = FALSE) } # Check features and meta to determine which features present @@ -940,12 +757,16 @@ Stacked_VlnPlot <- function( # set pt.size (default is no points) if (is.null(x = pt.size)) { pt.size <- 0 + if (isTRUE(x = raster)) { + cli_inform(message = "Default pt.size is 0, setting {.code raster = FALSE}.") + } + raster <- FALSE } # Set rasterization num_cells <- length(x = Cells(x = seurat_object)) - if (length(x = num_cells) * length(x = all_found_features) > 100000 && is.null(x = raster) && pt.size != 0) { + if (num_cells * length(x = all_found_features) > 100000 && is.null(x = raster) && pt.size != 0) { raster <- TRUE cli_inform(message = c("NOTE: Rasterizing points since total number of points across all plots exceeds 100,000.", "i" = "To plot in vector form set {.code raster=FALSE}") @@ -1078,14 +899,14 @@ DotPlot_scCustom <- function( # Add check for group.by before getting to colors if (!is.null(x = group.by) && group.by != "ident") { - Meta_Present(seurat_object = seurat_object, meta_col_names = group.by, print_msg = FALSE) + Meta_Present(object = seurat_object, meta_col_names = group.by, print_msg = FALSE) } # Check features and meta to determine which features present all_found_features <- Feature_PreCheck(object = seurat_object, features = features) # Plot - plot <- suppressMessages(DotPlot(object = seurat_object, features = all_found_features, ...) + + plot <- suppressMessages(DotPlot(object = seurat_object, features = all_found_features, group.by = group.by, ...) + scale_color_gradientn(colors = colors_use) ) # Modify plot @@ -1122,6 +943,7 @@ DotPlot_scCustom <- function( #' #' @param seurat_object Seurat object name. #' @param features Features to plot. +#' @param split.by Variable in `@meta.data` to split the identities plotted by. #' @param colors_use_exp Color palette to use for plotting expression scale. Default is `viridis::plasma(n = 20, direction = -1)`. #' @param exp_color_min Minimum scaled average expression threshold (everything smaller will be set to this). #' Default is -2. @@ -1129,6 +951,8 @@ DotPlot_scCustom <- function( #' By default will be set to value in middle of `exp_color_min` and `exp_color_max`. #' @param exp_color_max Minimum scaled average expression threshold (everything smaller will be set to this). #' Default is 2. +#' @param exp_value_type Whether to plot average normalized expression or +#' scaled average normalized expression. Only valid when `split.by` is provided. #' @param print_exp_quantiles Whether to print the quantiles of expression data in addition to plots. #' Default is FALSE. NOTE: These values will be altered by choices of `exp_color_min` and `exp_color_min` #' if there are values below or above those cutoffs, respectively. @@ -1137,6 +961,11 @@ DotPlot_scCustom <- function( #' will use "varibow" with shuffle = TRUE both from `DiscretePalette_scCustomize`. #' @param x_lab_rotate How to rotate column labels. By default set to `TRUE` which rotates labels 45 degrees. #' If set `FALSE` rotation is set to 0 degrees. Users can also supply custom angle for text rotation. +#' @param plot_padding if plot needs extra white space padding so no plot or labels are cutoff. +#' The parameter accepts TRUE or numeric vector of length 4. If TRUE padding will be set to +#' c(2, 10, 0 0) (bottom, left, top, right). Can also be customized further with numeric +#' vector of length 4 specifying the amount of padding in millimeters. +#' Default is NULL, no padding. #' @param flip logical, whether to flip the axes of final plot. Default is FALSE; rows = features and #' columns = idents. #' @param k Value to use for k-means clustering on features Sets (km) parameter in `ComplexHeatmap::Heatmap()`. @@ -1146,12 +975,11 @@ DotPlot_scCustom <- function( #' @param feature_km_repeats Number of k-means runs to get a consensus k-means clustering for features. #' Note if `feature_km_repeats` is set to value greater than one, the final number of groups might be #' smaller than row_km, but this might mean the original row_km is not a good choice. Default is 1000. -#' @param row_km_repeats `r lifecycle::badge("deprecated")` soft-deprecated. See `feature_km_repeats` #' @param ident_km_repeats Number of k-means runs to get a consensus k-means clustering. Similar to #' `feature_km_repeats`. Default is 1000. -#' @param column_km_repeats `r lifecycle::badge("deprecated")` soft-deprecated. See `ident_km_repeats` #' @param row_label_size Size of the feature labels. Provided to `row_names_gp` in Heatmap call. #' @param row_label_fontface Fontface to use for row labels. Provided to `row_names_gp` in Heatmap call. +#' @param grid_color color to use for heatmap grid. Default is NULL which "removes" grid by using NA color. #' @param cluster_feature logical, whether to cluster and reorder feature axis. Default is TRUE. #' @param cluster_ident logical, whether to cluster and reorder identity axis. Default is TRUE. #' @param column_label_size Size of the feature labels. Provided to `column_names_gp` in Heatmap call. @@ -1207,21 +1035,23 @@ DotPlot_scCustom <- function( Clustered_DotPlot <- function( seurat_object, features, + split.by = NULL, colors_use_exp = viridis_plasma_dark_high, exp_color_min = -2, exp_color_middle = NULL, exp_color_max = 2, + exp_value_type = "scaled", print_exp_quantiles = FALSE, colors_use_idents = NULL, x_lab_rotate = TRUE, + plot_padding = NULL, flip = FALSE, k = 1, feature_km_repeats = 1000, ident_km_repeats = 1000, - row_km_repeats = deprecated(), - column_km_repeats = deprecated(), row_label_size = 8, row_label_fontface = "plain", + grid_color = NULL, cluster_feature = TRUE, cluster_ident = TRUE, column_label_size = 8, @@ -1238,347 +1068,73 @@ Clustered_DotPlot <- function( color_seed = 123, seed = 123 ) { - # Check for packages - ComplexHeatmap_check <- is_installed(pkg = "ComplexHeatmap") - if (isFALSE(x = ComplexHeatmap_check)) { - cli_abort(message = c( - "Please install the {.val ComplexHeatmap} package to use {.code Clustered_DotPlot}", - "i" = "This can be accomplished with the following commands: ", - "----------------------------------------", - "{.field `install.packages({symbol$dquote_left}BiocManager{symbol$dquote_right})`}", - "{.field `BiocManager::install({symbol$dquote_left}ComplexHeatmap{symbol$dquote_right})`}", - "----------------------------------------" - )) - } - - if (lifecycle::is_present(row_km_repeats)) { - lifecycle::deprecate_stop(when = "2.0.0", - what = "Clustered_DotPlot(row_km_repeats)", - with = "Clustered_DotPlot(feature_km_repeats)" - ) - feature_km_repeats <- row_km_repeats - } - - if (lifecycle::is_present(column_km_repeats)) { - lifecycle::deprecate_stop(when = "2.0.0", - what = "Clustered_DotPlot(column_km_repeats)", - with = "Clustered_DotPlot(ident_km_repeats)" - ) - ident_km_repeats <- column_km_repeats - } - - - # Check Seurat - Is_Seurat(seurat_object = seurat_object) - - # Check acceptable fontface - if (!row_label_fontface %in% c("plain", "bold", "italic", "oblique", "bold.italic")) { - cli_abort(message = c("{.code row_label_face} {.val {row_label_face}} not recognized.", - "i" = "Must be one of {.val plain}, {.val bold}, {.val italic}, {.val olique}, or {.val bold.italic}.")) - } - - # Check unique features - features_unique <- unique(x = features) - - if (length(x = features_unique) != length(x = features)) { - cli_warn("Feature list contains duplicates, making unique.") - } - - # Check features and meta to determine which features present - all_found_features <- Feature_PreCheck(object = seurat_object, features = features_unique, assay = assay) - - # Check exp min/max set correctly - if (!exp_color_min < exp_color_max) { - cli_abort(message = c("Expression color min/max values are not compatible.", - "i" = "The value for {.code exp_color_min}: {.field {exp_color_min}} must be less than the value for {.code exp_color_max}: {.field {exp_color_max}}.") - ) - } - - # Get DotPlot data - seurat_plot <- DotPlot(object = seurat_object, features = all_found_features, assay = assay, group.by = group.by, scale = TRUE, idents = idents, col.min = NULL, col.max = NULL) - - data <- seurat_plot$data - - # Get expression data - exp_mat <- data %>% - select(-any_of(c("pct.exp", "avg.exp"))) %>% - pivot_wider(names_from = any_of("id"), values_from = any_of("avg.exp.scaled")) %>% - as.data.frame() - - row.names(x = exp_mat) <- exp_mat$features.plot - - # Check NAs if idents - if (!is.null(x = idents)) { - # Find NA features and print warning - excluded_features <- exp_mat[rowSums(is.na(x = exp_mat)) > 0,] %>% - rownames() - cli_warn(message = c("Some scaled data missing.", - "*" = "The following features were removed as there is no scaled expression present in subset (`idents`) of object provided:", - "i" = "{.field {glue_collapse_scCustom(input_string = excluded_features, and = TRUE)}}.") - ) - - # Extract good features - good_features <- rownames(x = exp_mat) - - # Remove rows with NAs - exp_mat <- exp_mat %>% - filter(.data[["features.plot"]] %in% good_features) - } - - exp_mat <- exp_mat[,-1] %>% - as.matrix() - - # Get percent expressed data - percent_mat <- data %>% - select(-any_of(c("avg.exp", "avg.exp.scaled"))) %>% - pivot_wider(names_from = any_of("id"), values_from = any_of("pct.exp")) %>% - as.data.frame() - - row.names(x = percent_mat) <- percent_mat$features.plot - - # Subset dataframe for NAs if idents so that exp_mat and percent_mat match - if (!is.null(x = idents)) { - percent_mat <- percent_mat %>% - filter(.data[["features.plot"]] %in% good_features) - } - - percent_mat <- percent_mat[,-1] %>% - as.matrix() - - # print quantiles - if (isTRUE(x = print_exp_quantiles)) { - cli_inform(message = "Quantiles of gene expression data are:") - print(quantile(exp_mat, c(0.1, 0.5, 0.9, 0.99))) - } - - # set assay (if null set to active assay) - assay <- assay %||% DefaultAssay(object = seurat_object) - - # Set default color palette based on number of levels being plotted - if (is.null(x = group.by)) { - group_by_length <- length(x = unique(x = seurat_object@active.ident)) - } else { - group_by_length <- length(x = unique(x = seurat_object@meta.data[[group.by]])) - } - - # Check colors use vs. ggplot2 color scale - if (!is.null(x = colors_use_idents) && isTRUE(x = ggplot_default_colors)) { - cli_abort(message = "Cannot provide both custom palette to {.code colors_use} and specify {.code ggplot_default_colors = TRUE}.") - } - if (is.null(x = colors_use_idents)) { - # set default plot colors - colors_use_idents <- scCustomize_Palette(num_groups = group_by_length, ggplot_default_colors = ggplot_default_colors, color_seed = color_seed) - } - - # Reduce color length list due to naming requirement - colors_use_idents <- colors_use_idents[1:group_by_length] - - # Modify if class = "colors" - if (inherits(x = colors_use_idents, what = "colors")) { - colors_use_idents <- as.vector(x = colors_use_idents) - } - - # Pull Annotation and change colors to ComplexHeatmap compatible format - Identity <- colnames(x = exp_mat) - - identity_colors <- colors_use_idents - names(x = identity_colors) <- Identity - identity_colors_list <- list(Identity = identity_colors) - - # Create identity annotation - if (isTRUE(x = flip)) { - column_ha <- ComplexHeatmap::rowAnnotation(Identity = Identity, - col = identity_colors_list, - na_col = "grey", - name = "Identity", - show_legend = FALSE - ) - } else { - column_ha <- ComplexHeatmap::HeatmapAnnotation(Identity = Identity, - col = identity_colors_list, - na_col = "grey", - name = "Identity", - show_legend = FALSE - ) - } - - # Set middle of color scale if not specified - if (is.null(x = exp_color_middle)) { - exp_color_middle <- Middle_Number(min = exp_color_min, max = exp_color_max) - } - - palette_length <- length(x = colors_use_exp) - palette_middle <- Middle_Number(min = 0, max = palette_length) - - # Create palette - col_fun = colorRamp2(c(exp_color_min, exp_color_middle, exp_color_max), colors_use_exp[c(1,palette_middle, palette_length)]) - - # Calculate and plot Elbow - if (isTRUE(x = plot_km_elbow)) { - # if elbow_kmax not NULL check it is usable - if (!is.null(x = elbow_kmax) && elbow_kmax > (nrow(x = exp_mat) - 1)) { - elbow_kmax <- nrow(x = exp_mat) - 1 - cli_warn(message = c("The value provided for {.code elbow_kmax} is too large.", - "i" = "Changing to (length(x = features)-1): {.field {elbow_kmax}}.") - ) - } - - # if elbow_kmax is NULL set value based on input feature list - if (is.null(x = elbow_kmax)) { - # set to (length(x = features)-1) if less than 21 features OR to 20 if greater than 21 features - if (nrow(x = exp_mat) > 21) { - elbow_kmax <- 20 - } else { - elbow_kmax <- nrow(x = exp_mat) - 1 - } - } - - km_elbow_plot <- kMeans_Elbow(data = exp_mat, k_max = elbow_kmax) - } - - # prep heatmap - if (isTRUE(x = flip)) { - if (isTRUE(x = raster)) { - layer_fun_flip = function(i, j, x, y, w, h, fill) { - grid.rect(x = x, y = y, width = w, height = h, - gp = gpar(col = NA, fill = NA)) - grid.circle(x=x,y=y,r= sqrt(ComplexHeatmap::pindex(percent_mat, i, j)/100) * unit(2, "mm"), - gp = gpar(fill = col_fun(ComplexHeatmap::pindex(exp_mat, i, j)), col = NA)) - } - } else { - cell_fun_flip = function(i, j, x, y, w, h, fill) { - grid.rect(x = x, y = y, width = w, height = h, - gp = gpar(col = NA, fill = NA)) - grid.circle(x=x,y=y,r= sqrt(percent_mat[i, j]/100) * unit(2, "mm"), - gp = gpar(fill = col_fun(exp_mat[i, j]), col = NA)) - } - } - } else { - if (isTRUE(x = raster)) { - layer_fun = function(j, i, x, y, w, h, fill) { - grid.rect(x = x, y = y, width = w, height = h, - gp = gpar(col = NA, fill = NA)) - grid.circle(x=x,y=y,r= sqrt(ComplexHeatmap::pindex(percent_mat, i, j)/100) * unit(2, "mm"), - gp = gpar(fill = col_fun(ComplexHeatmap::pindex(exp_mat, i, j)), col = NA)) - } - } else { - cell_fun = function(j, i, x, y, w, h, fill) { - grid.rect(x = x, y = y, width = w, height = h, - gp = gpar(col = NA, fill = NA)) - grid.circle(x=x,y=y,r= sqrt(percent_mat[i, j]/100) * unit(2, "mm"), - gp = gpar(fill = col_fun(exp_mat[i, j]), col = NA)) - } - } - } - - # Create legend for point size - lgd_list = list( - ComplexHeatmap::Legend(at = Identity, title = "Identity", legend_gp = gpar(fill = identity_colors_list[[1]]), labels_gp = gpar(fontsize = legend_label_size), title_gp = gpar(fontsize = legend_title_size, fontface = "bold")), - ComplexHeatmap::Legend(labels = c(0.25,0.5,0.75,1), title = "Percent Expressing", - graphics = list( - function(x, y, w, h) grid.circle(x = x, y = y, r = sqrt(0.25) * unit(2, "mm"), - gp = gpar(fill = "black")), - function(x, y, w, h) grid.circle(x = x, y = y, r = sqrt(0.5) * unit(2, "mm"), - gp = gpar(fill = "black")), - function(x, y, w, h) grid.circle(x = x, y = y, r = sqrt(0.75) * unit(2, "mm"), - gp = gpar(fill = "black")), - function(x, y, w, h) grid.circle(x = x, y = y, r = 1 * unit(2, "mm"), - gp = gpar(fill = "black"))), - labels_gp = gpar(fontsize = legend_label_size), - title_gp = gpar(fontsize = legend_title_size, fontface = "bold") - ) - ) - - # Set x label roration - if (is.numeric(x = x_lab_rotate)) { - x_lab_rotate <- x_lab_rotate - } else if (isTRUE(x = x_lab_rotate)) { - x_lab_rotate <- 45 - } else { - x_lab_rotate <- 0 - } - - # Create Plot - set.seed(seed = seed) - if (isTRUE(x = raster)) { - if (isTRUE(x = flip)) { - cluster_dot_plot <- ComplexHeatmap::Heatmap(t(exp_mat), - heatmap_legend_param=list(title="Expression", labels_gp = gpar(fontsize = legend_label_size), title_gp = gpar(fontsize = legend_title_size, fontface = "bold")), - col=col_fun, - rect_gp = gpar(type = "none"), - layer_fun = layer_fun, - row_names_gp = gpar(fontsize = row_label_size, fontface = row_label_fontface), - column_names_gp = gpar(fontsize = column_label_size), - column_km = k, - row_km_repeats = ident_km_repeats, - border = "black", - left_annotation = column_ha, - column_km_repeats = feature_km_repeats, - show_parent_dend_line = show_parent_dend_line, - column_names_rot = x_lab_rotate, - cluster_rows = cluster_ident, - cluster_columns = cluster_feature) - } else { - cluster_dot_plot <- ComplexHeatmap::Heatmap(exp_mat, - heatmap_legend_param=list(title="Expression", labels_gp = gpar(fontsize = legend_label_size), title_gp = gpar(fontsize = legend_title_size, fontface = "bold")), - col=col_fun, - rect_gp = gpar(type = "none"), - layer_fun = layer_fun, - row_names_gp = gpar(fontsize = row_label_size, fontface = row_label_fontface), - column_names_gp = gpar(fontsize = column_label_size), - row_km = k, - row_km_repeats = feature_km_repeats, - border = "black", - top_annotation = column_ha, - column_km_repeats = ident_km_repeats, - show_parent_dend_line = show_parent_dend_line, - column_names_rot = x_lab_rotate, - cluster_rows = cluster_feature, - cluster_columns = cluster_ident) - } + # check split + if (is.null(x = split.by)) { + Clustered_DotPlot_Single_Group(seurat_object = seurat_object, + features = features, + colors_use_exp = colors_use_exp, + exp_color_min = exp_color_min, + exp_color_middle = exp_color_middle, + exp_color_max = exp_color_max, + print_exp_quantiles = print_exp_quantiles, + colors_use_idents = colors_use_idents, + x_lab_rotate = x_lab_rotate, + plot_padding = plot_padding, + flip = flip, + k = k, + feature_km_repeats = feature_km_repeats, + ident_km_repeats = ident_km_repeats, + row_label_size = row_label_size, + row_label_fontface = row_label_fontface, + grid_color = grid_color, + cluster_feature = cluster_feature, + cluster_ident = cluster_ident, + column_label_size = column_label_size, + legend_label_size = legend_label_size, + legend_title_size = legend_title_size, + raster = raster, + plot_km_elbow = plot_km_elbow, + elbow_kmax = elbow_kmax, + assay = assay, + group.by = group.by, + idents = idents, + show_parent_dend_line = show_parent_dend_line, + ggplot_default_colors = ggplot_default_colors, + color_seed = color_seed, + seed = seed) } else { - if (isTRUE(x = flip)) { - cluster_dot_plot <- ComplexHeatmap::Heatmap(t(exp_mat), - heatmap_legend_param=list(title="Expression", labels_gp = gpar(fontsize = legend_label_size), title_gp = gpar(fontsize = legend_title_size, fontface = "bold")), - col=col_fun, - rect_gp = gpar(type = "none"), - cell_fun = cell_fun_flip, - row_names_gp = gpar(fontsize = row_label_size, fontface = row_label_fontface), - column_names_gp = gpar(fontsize = column_label_size), - column_km = k, - row_km_repeats = ident_km_repeats, - border = "black", - left_annotation = column_ha, - column_km_repeats = feature_km_repeats, - show_parent_dend_line = show_parent_dend_line, - column_names_rot = x_lab_rotate, - cluster_rows = cluster_ident, - cluster_columns = cluster_feature) - } else { - cluster_dot_plot <- ComplexHeatmap::Heatmap(exp_mat, - heatmap_legend_param=list(title="Expression", labels_gp = gpar(fontsize = legend_label_size), title_gp = gpar(fontsize = legend_title_size, fontface = "bold")), - col=col_fun, - rect_gp = gpar(type = "none"), - cell_fun = cell_fun, - row_names_gp = gpar(fontsize = row_label_size, fontface = row_label_fontface), - column_names_gp = gpar(fontsize = column_label_size), - row_km = k, - row_km_repeats = feature_km_repeats, - border = "black", - top_annotation = column_ha, - column_km_repeats = ident_km_repeats, - show_parent_dend_line = show_parent_dend_line, - column_names_rot = x_lab_rotate, - cluster_rows = cluster_feature, - cluster_columns = cluster_ident) - } - } - - # Add pt.size legend & return plots - if (isTRUE(x = plot_km_elbow)) { - return(list(km_elbow_plot, ComplexHeatmap::draw(cluster_dot_plot, annotation_legend_list = lgd_list))) + Clustered_DotPlot_Multi_Group(seurat_object = seurat_object, + features = features, + split.by = split.by, + colors_use_exp = colors_use_exp, + exp_color_min = exp_color_min, + exp_color_middle = exp_color_middle, + exp_color_max = exp_color_max, + exp_value_type = exp_value_type, + print_exp_quantiles = print_exp_quantiles, + x_lab_rotate = x_lab_rotate, + plot_padding = plot_padding, + flip = flip, + k = k, + feature_km_repeats = feature_km_repeats, + ident_km_repeats = ident_km_repeats, + row_label_size = row_label_size, + row_label_fontface = row_label_fontface, + grid_color = grid_color, + cluster_feature = cluster_feature, + cluster_ident = cluster_ident, + column_label_size = column_label_size, + legend_label_size = legend_label_size, + legend_title_size = legend_title_size, + raster = raster, + plot_km_elbow = plot_km_elbow, + elbow_kmax = elbow_kmax, + assay = assay, + group.by = group.by, + idents = idents, + show_parent_dend_line = show_parent_dend_line, + seed = seed) } - return(ComplexHeatmap::draw(cluster_dot_plot, annotation_legend_list = lgd_list)) } @@ -1784,7 +1340,7 @@ Meta_Highlight_Plot <- function( Is_Seurat(seurat_object = seurat_object) # Check meta data - good_meta_data_column <- Meta_Present(seurat_object = seurat_object, meta_col_names = meta_data_column, omit_warn = FALSE, print_msg = FALSE, return_none = TRUE)[[1]] + good_meta_data_column <- Meta_Present(object = seurat_object, meta_col_names = meta_data_column, omit_warn = FALSE, print_msg = FALSE, return_none = TRUE)[[1]] # stop if none found if (length(x = good_meta_data_column) == 0) { @@ -2134,15 +1690,15 @@ DimPlot_scCustom <- function( } if (!is.null(x = split.by)) { - split.by <- Meta_Present(seurat_object = seurat_object, meta_col_names = split.by, print_msg = FALSE, omit_warn = FALSE)[[1]] + split.by <- Meta_Present(object = seurat_object, meta_col_names = split.by, print_msg = FALSE, omit_warn = FALSE)[[1]] } # Add check for group.by before getting to colors if (length(x = group.by) > 1) { - Meta_Present(seurat_object = seurat_object, meta_col_names = group.by, print_msg = FALSE) + Meta_Present(object = seurat_object, meta_col_names = group.by, print_msg = FALSE) } else { if (!is.null(x = group.by) && group.by != "ident") { - Meta_Present(seurat_object = seurat_object, meta_col_names = group.by, print_msg = FALSE) + Meta_Present(object = seurat_object, meta_col_names = group.by, print_msg = FALSE) } } @@ -2365,7 +1921,7 @@ DimPlot_scCustom <- function( ylim(y_axis) # Normalize the colors across all plots - plot <- suppressMessages(plot + scale_color_manual(values = colors_overall)) + plot <- suppressMessages(plot + scale_color_manual(values = colors_overall, drop = FALSE)) if (!is.null(x = group.by)) { plot <- plot + labs(color=group.by) @@ -2691,15 +2247,15 @@ FeatureScatter_scCustom <- function( Is_Seurat(seurat_object = seurat_object) if (!is.null(x = split.by)) { - split.by <- Meta_Present(seurat_object = seurat_object, meta_col_names = split.by, print_msg = FALSE, omit_warn = FALSE)[[1]] + split.by <- Meta_Present(object = seurat_object, meta_col_names = split.by, print_msg = FALSE, omit_warn = FALSE)[[1]] } # Add check for group.by before getting to colors if (length(x = group.by) > 1) { - Meta_Present(seurat_object = seurat_object, meta_col_names = group.by, print_msg = FALSE) + Meta_Present(object = seurat_object, meta_col_names = group.by, print_msg = FALSE) } else { if (!is.null(x = group.by) && group.by != "ident") { - Meta_Present(seurat_object = seurat_object, meta_col_names = group.by, print_msg = FALSE) + Meta_Present(object = seurat_object, meta_col_names = group.by, print_msg = FALSE) } } diff --git a/R/Statistics.R b/R/Statistics.R index 262a65e58c..d45240b002 100644 --- a/R/Statistics.R +++ b/R/Statistics.R @@ -129,8 +129,8 @@ Percent_Expressing <- function( # Check is slot is supplied if (lifecycle::is_present(slot)) { lifecycle::deprecate_warn(when = "2.0.0", - what = "slot", - with = "layer", + what = "Percent_Expressing(slot)", + with = "Percent_Expressing(layer)", details = c("v" = "As of Seurat 5.0.0 the {.code slot} parameter is deprecated and replaced with {.code layer}.", "i" = "Please adjust code now to prepare for full deprecation.") ) @@ -141,9 +141,13 @@ Percent_Expressing <- function( assay <- assay %||% DefaultAssay(object = seurat_object) # Check features exist in object - features_list <- Gene_Present(data = seurat_object, gene_list = features, print_msg = FALSE, case_check = TRUE, seurat_assay = assay)[[1]] + features_list <- Feature_Present(data = seurat_object, features = features, print_msg = FALSE, case_check = TRUE, seurat_assay = assay)[[1]] # Check group_by is in object + if (!is.null(x = group_by) && group_by == "ident") { + group_by <- NULL + } + if (!is.null(x = group_by)) { possible_groups <- colnames(x = seurat_object@meta.data) if (!group_by %in% possible_groups) { @@ -251,12 +255,12 @@ Median_Stats <- function( } # Check group variable present - group_by_var <- Meta_Present(seurat_object = seurat_object, meta_col_names = group_by_var, print_msg = FALSE)[[1]] + group_by_var <- Meta_Present(object = seurat_object, meta_col_names = group_by_var, print_msg = FALSE)[[1]] # Check stats variables present all_variables <- c(default_var, median_var) - all_variables <- Meta_Present(seurat_object = seurat_object, meta_col_names = all_variables, print_msg = FALSE)[[1]] + all_variables <- Meta_Present(object = seurat_object, meta_col_names = all_variables, print_msg = FALSE)[[1]] # Filter meta data for columns of interest meta_numeric_check <- Fetch_Meta(object = seurat_object) %>% @@ -341,12 +345,12 @@ MAD_Stats <- function( } # Check group variable present - group_by_var <- Meta_Present(seurat_object = seurat_object, meta_col_names = group_by_var, print_msg = FALSE)[[1]] + group_by_var <- Meta_Present(object = seurat_object, meta_col_names = group_by_var, print_msg = FALSE)[[1]] # Check stats variables present all_variables <- c(default_var, mad_var) - all_variables <- Meta_Present(seurat_object = seurat_object, meta_col_names = all_variables, print_msg = FALSE)[[1]] + all_variables <- Meta_Present(object = seurat_object, meta_col_names = all_variables, print_msg = FALSE)[[1]] # Filter meta data for columns of interest meta_numeric_check <- Fetch_Meta(object = seurat_object) %>% diff --git a/R/Statistics_Plotting.R b/R/Statistics_Plotting.R index bb88781bb5..a5e1accb3e 100644 --- a/R/Statistics_Plotting.R +++ b/R/Statistics_Plotting.R @@ -52,10 +52,10 @@ Plot_Median_Genes <- function( Is_Seurat(seurat_object = seurat_object) # Check group by is valid - group_by <- Meta_Present(seurat_object = seurat_object, meta_col_names = group_by, print_msg = FALSE)[[1]] + group_by <- Meta_Present(object = seurat_object, meta_col_names = group_by, print_msg = FALSE)[[1]] # Check sample_col is valid - sample_col <- Meta_Present(seurat_object = seurat_object, meta_col_names = sample_col, print_msg = FALSE)[[1]] + sample_col <- Meta_Present(object = seurat_object, meta_col_names = sample_col, print_msg = FALSE)[[1]] # Calculate medians and merge with meta.data medians <- Median_Stats(seurat_object = seurat_object, group_by_var = sample_col, median_var = "nFeature_RNA", default_var = FALSE) %>% @@ -190,10 +190,10 @@ Plot_Median_UMIs <- function( Is_Seurat(seurat_object = seurat_object) # Check group by is valid - group_by <- Meta_Present(seurat_object = seurat_object, meta_col_names = group_by, print_msg = FALSE)[[1]] + group_by <- Meta_Present(object = seurat_object, meta_col_names = group_by, print_msg = FALSE)[[1]] # Check sample_col is valid - sample_col <- Meta_Present(seurat_object = seurat_object, meta_col_names = sample_col, print_msg = FALSE)[[1]] + sample_col <- Meta_Present(object = seurat_object, meta_col_names = sample_col, print_msg = FALSE)[[1]] # Calculate medians and merge with meta.data medians <- Median_Stats(seurat_object = seurat_object, group_by_var = sample_col, median_var = "nCount_RNA", default_var = FALSE) %>% @@ -329,10 +329,10 @@ Plot_Median_Mito <- function( Is_Seurat(seurat_object = seurat_object) # Check group by is valid - group_by <- Meta_Present(seurat_object = seurat_object, meta_col_names = group_by, print_msg = FALSE)[[1]] + group_by <- Meta_Present(object = seurat_object, meta_col_names = group_by, print_msg = FALSE)[[1]] # Check sample_col is valid - sample_col <- Meta_Present(seurat_object = seurat_object, meta_col_names = sample_col, print_msg = FALSE)[[1]] + sample_col <- Meta_Present(object = seurat_object, meta_col_names = sample_col, print_msg = FALSE)[[1]] # Calculate medians and merge with meta.data medians <- Median_Stats(seurat_object = seurat_object, group_by_var = sample_col, median_var = "percent_mito", default_var = FALSE) %>% @@ -483,10 +483,10 @@ Plot_Median_Other <- function( } # Check group by is valid - group_by <- Meta_Present(seurat_object = seurat_object, meta_col_names = group_by, print_msg = FALSE)[[1]] + group_by <- Meta_Present(object = seurat_object, meta_col_names = group_by, print_msg = FALSE)[[1]] # Check sample_col is valid - sample_col <- Meta_Present(seurat_object = seurat_object, meta_col_names = sample_col, print_msg = FALSE)[[1]] + sample_col <- Meta_Present(object = seurat_object, meta_col_names = sample_col, print_msg = FALSE)[[1]] # Calculate medians and merge with meta.data medians <- Median_Stats(seurat_object = seurat_object, group_by_var = sample_col, median_var = median_var, default_var = FALSE) %>% @@ -588,7 +588,7 @@ Plot_Median_Other <- function( #' #' @import cli #' @import ggplot2 -#' @import rlang +#' @importFrom rlang "%||%" ":=" #' @importFrom dplyr select slice left_join rename all_of #' @importFrom magrittr "%>%" #' @@ -624,10 +624,10 @@ Plot_Cells_per_Sample <- function( } # Check group by is valid - group_by <- Meta_Present(seurat_object = seurat_object, meta_col_names = group_by, print_msg = FALSE)[[1]] + group_by <- Meta_Present(object = seurat_object, meta_col_names = group_by, print_msg = FALSE)[[1]] # Check sample_col is valid - sample_col <- Meta_Present(seurat_object = seurat_object, meta_col_names = sample_col, print_msg = FALSE)[[1]] + sample_col <- Meta_Present(object = seurat_object, meta_col_names = sample_col, print_msg = FALSE)[[1]] # Calculate total cells and merge with meta.data total_cells <- table(seurat_object@meta.data[[sample_col]]) %>% @@ -835,7 +835,7 @@ CellBender_Diff_Plot <- function( } } else { # check for features - features_list <- Gene_Present(data = feature_diff_df_filtered, gene_list = custom_labels, omit_warn = FALSE, print_msg = FALSE, case_check_msg = FALSE, return_none = TRUE) + features_list <- Feature_Present(data = feature_diff_df_filtered, features = custom_labels, omit_warn = FALSE, print_msg = FALSE, case_check_msg = FALSE, return_none = TRUE) all_not_found_features <- features_list[[2]] diff --git a/R/Utilities.R b/R/Utilities.R index d2a92905ed..0ef59c827f 100644 --- a/R/Utilities.R +++ b/R/Utilities.R @@ -2,6 +2,7 @@ #################### OBJECT HELPERS #################### #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + #' Check if genes/features are present #' #' Check if genes are present in object and return vector of found genes. Return warning messages for @@ -9,6 +10,165 @@ #' #' @param data Name of input data. Currently only data of classes: Seurat, liger, data.frame, #' dgCMatrix, dgTMatrix, tibble are accepted. Gene_IDs must be present in rownames of the data. +#' @param features vector of features to check. +#' @param case_check logical. Whether or not to check if features are found if the case is changed from the +#' input list (Sentence case to Upper and vice versa). Default is TRUE. +#' @param case_check_msg logical. Whether to print message to console if alternate case features are found +#' in addition to inclusion in returned list. Default is TRUE. +#' @param print_msg logical. Whether message should be printed if all features are found. Default is TRUE. +#' @param omit_warn logical. Whether to print message about features that are not found in current object. +#' Default is TRUE. +#' @param return_none logical. Whether list of found vs. bad features should still be returned if no +#' features are found. Default is FALSE. +#' @param seurat_assay Name of assay to pull feature names from if `data` is Seurat Object. +#' Default is NULL which will check against features from all assays present. +#' +#' @import cli +#' @importFrom purrr reduce +#' @importFrom SeuratObject Features +#' @importFrom stringr str_to_upper str_to_sentence +#' +#' @return A list of length 3 containing 1) found features, 2) not found features, 3) features found if +#' case was modified. +#' +#' @export +#' +#' @concept check_util +#' +#' @examples +#' \dontrun{ +#' features <- Feature_Present(data = obj_name, features = DEG_list, print_msg = TRUE, +#' case_check = TRUE) +#' found_features <- features[[1]] +#' } +#' + +Feature_Present <- function( + data, + features, + case_check = TRUE, + case_check_msg = TRUE, + print_msg = TRUE, + omit_warn = TRUE, + return_none = FALSE, + seurat_assay = NULL +) { + # Check object type + # Seurat + accepted_types <- c("data.frame", "dgCMatrix", "dgTMatrix", "tibble") + if (inherits(x = data, what = "Seurat")) { + # set assay (if null set to active assay) + assays_present <- seurat_assay %||% Assays(object = data) + + possible_features <- lapply(assays_present, function(j) { + Features(x = data, assay = j) + }) + + possible_features <- unlist(possible_features) + } else if ((class(x = data)[[1]] == "liger")) { + # get complete gene list + length_liger <- length(x = data@raw.data) + + list_genes <- lapply(1:length_liger, function(x){ + rownames(x = data@raw.data[[x]]) + }) + + possible_features <- reduce(list_genes, function(x, y) { + union(x = x, y = y)}) + } else if ((class(x = data) %in% accepted_types)) { + possible_features <- rownames(x = data) + } else { + all_accepted <- c(accepted_types, "Seurat", "liger") + cli_abort(message = c("Input data is currently accepted only in the following formats:", + "i" = "{.field {glue_collapse_scCustom(input_string = all_accepted, and = FALSE)}}.") + ) + } + + # If any features not found + if (any(!features %in% possible_features)) { + bad_features <- features[!features %in% possible_features] + found_features <- features[features %in% possible_features] + if (length(x = found_features) == 0) { + if (isTRUE(x = return_none)) { + # Combine into list and return + feature_list <- list( + found_features = NULL, + bad_features = bad_features, + wrong_case_found_features = NULL + ) + return(feature_list) + } else { + cli_abort(message ="No requested features found.") + } + } + + # Return message of features not found + if (length(x = bad_features) > 0 && isTRUE(x = omit_warn)) { + cli_warn(message = c("The following features were omitted as they were not found:", + "i" = "{.field {glue_collapse_scCustom(input_string = bad_features, and = TRUE)}}") + ) + } + + # Check if features found if case is changed. + if (isTRUE(x = case_check)) { + upper_bad_features <- str_to_upper(string = bad_features) + upper_found_features <- upper_bad_features[upper_bad_features %in% possible_features] + + sentence_bad_features <- str_to_sentence(string = bad_features) + sentence_found_features <- sentence_bad_features[sentence_bad_features %in% possible_features] + + # Combine case check + wrong_case_found_features <- c(upper_found_features, sentence_found_features) + + # Additional messages if found. + if (length(x = wrong_case_found_features) > 0) { + if (isTRUE(x = case_check_msg)) { + cli_warn(message = c("NOTE: However, the following features were found: {.field {glue_collapse_scCustom(input_string = wrong_case_found_features, and = TRUE)}}", + "i" = "Please check intended case of features provided.") + ) + } + # Combine into list and return + feature_list <- list( + found_features = found_features, + bad_features = bad_features, + wrong_case_found_features = wrong_case_found_features + ) + return(feature_list) + } + } + # Combine into list and return + feature_list <- list( + found_features = found_features, + bad_features = bad_features, + wrong_case_found_features = "NA (check not performed. Set 'case_check = TRUE' to perform check." + ) + return(feature_list) + } + + # Print all found message if TRUE + if (isTRUE(x = print_msg)) { + cli_inform(message = "All features present.") + } + + # Return full input gene list. + # Combine into list and return + feature_list <- list( + found_features = features, + bad_features = NULL, + wrong_case_found_features = NULL + ) + return(feature_list) +} + + + +#' Check if genes/features are present `r lifecycle::badge("soft-deprecated")` +#' +#' Check if genes are present in object and return vector of found genes. Return warning messages for +#' genes not found. +#' +#' @param data Name of input data. Currently only data of classes: Seurat, liger, data.frame, +#' dgCMatrix, dgTMatrix, tibble are accepted. Gene_IDs must be present in rownames of the data. #' @param gene_list vector of genes to check. #' @param case_check logical. Whether or not to check if features are found if the case is changed from the #' input list (Sentence case to Upper and vice versa). Default is TRUE. @@ -20,7 +180,7 @@ #' @param return_none logical. Whether list of found vs. bad features should still be returned if no #' features are found. Default is FALSE. #' @param seurat_assay Name of assay to pull feature names from if `data` is Seurat Object. -#' Defaults to `DefaultAssay(OBJ)` if NULL. +#' Default is NULL which will check against features from all assays present. #' #' @import cli #' @importFrom purrr reduce @@ -32,7 +192,7 @@ #' #' @export #' -#' @concept helper_util +#' @concept check_util #' #' @examples #' \dontrun{ @@ -51,14 +211,24 @@ Gene_Present <- function( return_none = FALSE, seurat_assay = NULL ) { + lifecycle::deprecate_soft(when = "2.1.0", + what = "Gene_Present()", + with = "Feature_Present()", + details = c("i" = "Please adjust code now to prepare for full deprecation.") + ) + # Check object type # Seurat accepted_types <- c("data.frame", "dgCMatrix", "dgTMatrix", "tibble") if (inherits(x = data, what = "Seurat")) { # set assay (if null set to active assay) - assay <- seurat_assay %||% DefaultAssay(object = data) + assays_present <- seurat_assay %||% Assays(object = data) - possible_features <- Features(x = data, assay = seurat_assay) + possible_features <- lapply(assays_present, function(j) { + Features(x = data, assay = j) + }) + + possible_features <- unlist(possible_features) } else if ((class(x = data)[[1]] == "liger")) { # get complete gene list length_liger <- length(x = data@raw.data) @@ -175,7 +345,7 @@ Gene_Present <- function( #' #' @export #' -#' @concept helper_util +#' @concept check_util #' #' @examples #' \dontrun{ @@ -224,7 +394,8 @@ Case_Check <- function( #' Check if meta data columns are present in object and return vector of found columns #' Return warning messages for meta data columns not found. #' -#' @param seurat_object object name. +#' @param object Seurat or Liger object name. +#' @param seurat_object `r lifecycle::badge("deprecated")` deprecated. Please use `object` instead. #' @param meta_col_names vector of column names to check. #' @param print_msg logical. Whether message should be printed if all features are found. Default is TRUE. #' @param omit_warn logical. Whether to print message about features that are not found in current object. Default is TRUE. @@ -237,26 +408,40 @@ Case_Check <- function( #' #' @export #' -#' @concept helper_util +#' @concept check_util #' #' @examples #' \dontrun{ -#' meta_variables <- Meta_Present(seurat_object = obj_name, gene_list = DEG_list, print_msg = TRUE) +#' meta_variables <- Meta_Present(object = obj_name, meta_col_names = "percent_mito", print_msg = TRUE) #' } #' Meta_Present <- function( - seurat_object, + object, + seurat_object = deprecated(), meta_col_names, print_msg = TRUE, omit_warn = TRUE, return_none = FALSE ) { - # Check Seurat - Is_Seurat(seurat_object = seurat_object) + # Check is slot is supplied + if (lifecycle::is_present(seurat_object)) { + lifecycle::deprecate_warn(when = "2.1.0", + what = "Meta_Present(seurat_object)", + with = "Meta_Present(object)", + details = c("!" = "Please adjust code now to prepare for full deprecation in v2.2.0.") + ) - # get all features - possible_features <- colnames(x = seurat_object@meta.data) + } + + # Set possible variables based on object type + if (inherits(x = object, what = "Seurat")) { + possible_features <- colnames(x = object@meta.data) + } + + if (inherits(x = object, what = "liger")) { + possible_features <- colnames(x = object@cell.data) + } # If any features not found if (any(!meta_col_names %in% possible_features)) { @@ -316,7 +501,7 @@ Meta_Present <- function( #' #' @export #' -#' @concept helper_util +#' @concept check_util #' #' @examples #' \dontrun{ @@ -376,7 +561,7 @@ Meta_Numeric <- function( #' #' @export #' -#' @concept helper_util +#' @concept check_util #' #' @examples #' \dontrun{ @@ -459,69 +644,6 @@ Reduction_Loading_Present <- function( } -#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -#################### DATA ACCESS #################### -#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -#' Get meta data from object -#' -#' Quick function to properly pull meta.data from objects. -#' -#' @param object Object of class Seurat or liger. -#' -#' @importFrom methods slot -#' -#' @return A data.frame containing cell-level meta data -#' -#' @export -#' -#' @concept helper_util -#' -#' @rdname Fetch_Meta -#' -#' @examples -#' library(Seurat) -#' meta_data <- Fetch_Meta(object = pbmc_small) -#' head(meta_data, 5) -#' - -Fetch_Meta <- function(object) { - UseMethod(generic = 'Fetch_Meta') -} - - -#' @rdname Fetch_Meta -#' @export -#' @concept helper_util -#' @method Fetch_Meta Seurat - -Fetch_Meta.Seurat <- function( - object -) { - # Pull meta data - object_meta <- object_meta <- slot(object = object, name = "meta.data") - - return(object_meta) -} - - -#' @rdname Fetch_Meta -#' @export -#' @concept helper_util -#' @method Fetch_Meta liger - -Fetch_Meta.liger <- function( - object -) { - - # Pull meta data - object_meta <- object_meta <- slot(object = object, name = "cell.data") - - return(object_meta) -} - - #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #################### MATRIX HELPERS #################### #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -550,7 +672,7 @@ Fetch_Meta.liger <- function( #' #' @export #' -#' @concept helper_util +#' @concept read_merge_util #' #' @examples #' \dontrun{ @@ -679,7 +801,7 @@ Merge_Sparse_Data_All <- function( #' #' @export #' -#' @concept helper_util +#' @concept read_merge_util #' #' @examples #' \dontrun{ @@ -727,7 +849,7 @@ Extract_Modality <- function( #' #' @export #' -#' @concept helper_util +#' @concept read_merge_util #' #' @examples #' \dontrun{ @@ -785,11 +907,11 @@ Merge_Sparse_Multimodal_All <- function( #' @import cli #' @importFrom methods slot #' -#' @references Re-implementing `CheckMatrix` only for sparse matrices with modified warning messages. Original function from SeuratObject \url{https://github.com/mojaveazure/seurat-object/blob/9c0eda946e162d8595696e5280a6ecda6284db39/R/utils.R#L625-L650} (License: MIT). +#' @references Re-implementing `CheckMatrix` only for sparse matrices with modified warning messages. Original function from SeuratObject \url{https://github.com/satijalab/seurat-object/blob/9c0eda946e162d8595696e5280a6ecda6284db39/R/utils.R#L625-L650} (License: MIT). #' #' @export #' -#' @concept helper_util +#' @concept check_util #' #' @examples #' \dontrun{ @@ -848,7 +970,7 @@ CheckMatrix_scCustom <- function( #' #' @return matrix or data.frame with new column names. #' -#' @concept helper_util +#' @concept barcode_util #' #' @examples #' \dontrun{ @@ -950,7 +1072,7 @@ Replace_Suffix <- function( #' #' @return matrix or data.frame with new column names. #' -#' @concept helper_util +#' @concept barcode_util #' #' @examples #' \dontrun{ @@ -1028,7 +1150,7 @@ Change_Delim_Suffix <- function( #' #' @return matrix or data.frame with new column names. #' -#' @concept helper_util +#' @concept barcode_util #' #' @examples #' \dontrun{ @@ -1104,7 +1226,7 @@ Change_Delim_Prefix <- function( #' #' @return matrix or data.frame with new column names. #' -#' @concept helper_util +#' @concept barcode_util #' #' @examples #' \dontrun{ @@ -1561,7 +1683,7 @@ Pull_Cluster_Annotation <- function( #' #' @export #' -#' @concept object_util +#' @concept marker_annotation_util #' #' @examples #' \dontrun{ @@ -1608,6 +1730,56 @@ Rename_Clusters <- function( } +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#################### GENERAL HELPERS #################### +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +#' Split vector into list +#' +#' Splits vector into chunks of x sizes +#' +#' @param x vector to split +#' @param chunk_size size of chunks for vector to be split into, default is 100. +#' @param verbose logical, print details of vector and split, default is FALSE. +#' +#' @return list with vector of X length +#' +#' @import cli +#' +#' @export +#' +#' @references Base code from stackoverflow post: +#' \url{https://stackoverflow.com/a/3321659/15568251} +#' +#' @concept misc_util +#' +#' @examples +#' vector <- c("gene1", "gene2", "gene3", "gene4", "gene5", "gene6") +#' +#' vector_list <- Split_Vector(x = vector, chunk_size = 3) +#' + +Split_Vector <- function( + x, + chunk_size = 100, + verbose = FALSE +) { + vector_list <- split(x, ceiling(x = seq_along(x)/chunk_size)) + + # Report info + if (isTRUE(x = verbose)) { + cli_inform(message = c("Original vector length: ({.field {length(x = x)}}).", + "Split into {.field {length(x = vector_list)}} vectors of {.field {chunk_size}} items." )) + } + + # return list + return(vector_list) + +} + + + #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #################### PROJECT ORGANIZATION #################### #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -1766,3 +1938,159 @@ Copy_From_GCP <- function( # Copy files system(paste0("gsutil -m cp -r ", gcp_bucket_path, " ", folder_file_path)) } + + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#################### GENE NAME HELPERS #################### +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +#' Update HGNC Gene Symbols +#' +#' Update human gene symbols using data from HGNC. This function will store cached data in package directory using (BiocFileCache). Use of this function requires internet connection on first use (or if setting `update_symbol_data = TRUE`). Subsequent use does not require connection and will pull from cached data. +#' +#' @param input_data Data source containing gene names. Accepted formats are: +#' \itemize{ +#' \item \code{charcter vector} +#' \item \code{Seurat Objects} +#' \item \code{data.frame}: genes as rownames +#' \item \code{dgCMatrix/dgTMatrix}: genes as rownames +#' \item \code{tibble}: genes in first column +#' } +#' @param update_symbol_data logical, whether to update cached HGNC data, default is NULL. +#' If `NULL` BiocFileCache will check and prompt for update if cache is stale. +#' If `FALSE` the BiocFileCache stale check will be skipped and current cache will be used. +#' If `TRUE` the BiocFileCache stale check will be skipped and HGNC data will be downloaded. +#' @param case_check_as_warn logical, whether case checking of features should cause abort or +#' only warn, default is FALSE (abort). Set to TRUE if atypical names (i.e. old LOC naming) are +#' present in input_data. +#' @param verbose logical, whether to print results detailing numbers of symbols, found, updated, +#' and not found; default is TRUE. +#' +#' @return data.frame containing columns: input_features, Approved_Symbol (already approved; output unchanged), Not_Found_Symbol (symbol not in HGNC; output unchanged), Updated_Symbol (new symbol from HGNC; output updated). +#' +#' @import cli +#' @importFrom dplyr mutate filter select across left_join join_by +#' @importFrom magrittr "%>%" +#' @importFrom stats complete.cases +#' @importFrom stringr str_to_upper str_replace_na str_c str_replace +#' @importFrom tidyr drop_na everything +#' +#' @export +#' +#' @concept misc_util +#' +#' @examples +#' \dontrun{ +#' new_names <- Updated_HGNC_Symbols(input_data = Seurat_Object) +#' } +#' + +Updated_HGNC_Symbols <- function( + input_data, + update_symbol_data = NULL, + case_check_as_warn = FALSE, + verbose = TRUE +) { + # Check BiocFileCache installed + BiocFileCache_check <- is_installed(pkg = "BiocFileCache") + if (isFALSE(x = BiocFileCache_check)) { + cli_abort(message = c( + "Please install the {.val BiocFileCache} package to use {.code Updated_HGNC_Symbols}", + "i" = "This can be accomplished with the following commands: ", + "----------------------------------------", + "{.field `install.packages({symbol$dquote_left}BiocManager{symbol$dquote_right})`}", + "{.field `BiocManager::install({symbol$dquote_left}BiocFileCache{symbol$dquote_right})`}", + "----------------------------------------" + )) + } + + # Check input data type + accepted_types <- c("data.frame", "dgCMatrix", "dgTMatrix") + + if (inherits(x = input_data, what = "Seurat")) { + input_symbols <- Features(input_data) + } + if ((class(x = input_data) %in% accepted_types)) { + input_symbols <- rownames(x = input_data) + } + if (inherits(x = input_data, what = "tibble")) { + input_symbols <- input_data[, 1] + } + if (inherits(x = input_data, what = "character")) { + input_symbols <- input_data + } + + # Check for duplicates + num_duplicated <- length(x = unique(x = input_symbols[duplicated(x = input_symbols)])) + + if (num_duplicated > 0) { + cli_abort(message = c("Input data contains duplicate gene symbols.", + "i" = "Check input data and/or make unique.")) + } + + # Check input symbols have correct case + case_check <- str_to_upper(input_symbols) + case_check <- gsub(pattern = "(.*C[0-9XY]+)ORF(.+)", replacement = "\\1orf\\2", x = case_check) + # Currently two genes that are case anomalies so correcting them here + case_check <- gsub(pattern = "HSA-MIR-", replacement = "hsa-mir-", x = case_check) + + if (isFALSE(x = identical(x = input_symbols, y = case_check)) && isFALSE(x = case_check_as_warn)) { + cli_abort("Uncovered potential errors in case/capitalization of input symbols. Please check case is correct.") + } + if (isFALSE(x = identical(x = input_symbols, y = case_check)) && isTRUE(x = case_check_as_warn)) { + cli_warn(c("Uncovered potential errors in case/capitalization of input symbols. This may cause errors in updating gene symbols.", + "i" = "Please check case is correct and re-run if errors are found.")) + } + + # Download and process HGNC dataset if not already cached + hgnc_data_path <- download_hgnc_data(update = update_symbol_data) + + hgnc_long_data <- readRDS(hgnc_data_path) + + input_features_df <- data.frame("input_features" = input_symbols) + + symbols_not_approved <- input_symbols[!input_symbols %in% hgnc_long_data$symbol] + symbols_approved <- input_symbols[input_symbols %in% hgnc_long_data$symbol] + + input_features_df_approved <- input_features_df %>% + mutate("Approved_Symbol" = ifelse(.data[["input_features"]] %in% symbols_approved, .data[["input_features"]], NA)) %>% + drop_na() + + + input_features_updated_df <- hgnc_long_data %>% + filter(.data[["prev_symbol"]] %in% symbols_not_approved) %>% + mutate("Updated_Symbol" = symbol) %>% + select(any_of(c("prev_symbol", "Updated_Symbol"))) %>% + rename("input_features" = any_of("prev_symbol")) %>% + drop_na() + + symbols_not_found <- data.frame("input_features" = symbols_not_approved[!symbols_not_approved %in% input_features_updated_df$input_features]) %>% + mutate("Not_Found_Symbol" = .data[["input_features"]]) + + merged_df <- left_join(input_features_df, y = input_features_df_approved, by = join_by("input_features")) %>% + left_join(symbols_not_found, by = join_by("input_features")) %>% + left_join(input_features_updated_df, by = join_by("input_features")) %>% + mutate(across(everything(), ~str_replace_na(string = .x, replacement = ""))) %>% + mutate(Output_Features = str_c(.data[["Approved_Symbol"]], .data[["Not_Found_Symbol"]], .data[["Updated_Symbol"]])) %>% + mutate(across(everything(), ~str_replace(string = .x, pattern = "^$", replacement = NA_character_))) %>% + filter(!(.data[["input_features"]] == "QARS" & .data[["Updated_Symbol"]] == "EPRS1")) + + # Report the results + if (isTRUE(x = verbose)) { + num_features <- length(input_symbols) + + num_updated <- sum(complete.cases(merged_df$Updated_Symbol)) + num_not_found <- sum(complete.cases(merged_df$Not_Found_Symbol)) + num_approved <- sum(complete.cases(merged_df$Approved_Symbol)) + + cli_inform(message = c("Input features contained {.field {format(x = num_features, big.mark = ',')}} gene symbols", + "{col_green({symbol$tick})} {.field {format(x = num_approved, big.mark = ',')}} were already approved symbols.", + "{col_blue({symbol$arrow_right})} {.field {format(x = num_updated, big.mark = ',')}} were updated to approved symbol.", + "{col_red({symbol$cross})} {.field {format(x = num_not_found, big.mark = ',')}} were not found in HGNC dataset and remain unchanged.")) + } + + # Return results + return(merged_df) + +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 650fbf3a83..674c9dc6f4 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -37,10 +37,14 @@ navbar: href: articles/Helpers_and_Utilities.html - text: "Read & Write Data Functions" href: articles/Read_and_Write_Functions.html + - text: "Object/Assay Format Conversion" + href: articles/Object_Conversion.html - text: "Marker Identification & Cluster Annotation Helpers" href: articles/Markers_and_Cluster_Annotation.html - text: "Statistics Functions" href: articles/Statistics.html + - text: "Updating Gene Symbols" + href: articles/Update_Gene_Symbols.html - text: "Misc Functions" href: articles/Misc_Functions.html - text: ------- @@ -61,6 +65,14 @@ reference: desc: "Functions for reading and writing single cell sequencing data from raw data files." - contents: - has_concept("read_&_write") +- subtitle: "Merge Data Utilities" + desc: "Utilities to merge raw data." +- contents: + - has_concept("read_merge_util") +- subtitle: "Edit Barcode Utilities" + desc: "Utilities to edit cell barcodes" +- contents: + - has_concept("barcode_util") - title: "Sequencing Metrics QC Plots" desc: "Functions plotting various QC metrics provides by 10X Genomics Cell Ranger Outputs" @@ -102,15 +114,33 @@ reference: - title: "Helper Utilities (Seurat)" desc: "Functions to provide ease of use for frequently used code from Seurat Objects." +- subtitle: "QC Utilities" + desc: "Functions to add common QC variables to meta.data." +- contents: + - has_concept("qc_util") +- subtitle: "Getters/Setters" + desc: "Functions to extract data from or add additional data to Seurat objects" +- contents: + - has_concept("get_set_util") +- subtitle: "Check Utilities" + desc: "Functions to check validity of different aspects of object or object contents." - contents: - - has_concept("helper_util") - - has_concept("object_util") + - has_concept("check_util") +- subtitle: "Misc Utilities" + desc: "Miscellaneous Utilities" +- contents: + - has_concept("misc_util") - title: "Helper Utilities (LIGER)" desc: "Functions to provide ease of use for frequently used code from LIGER Objects." - contents: - has_concept("liger_object_util") +- title: "Object Conversion Functions" + desc: "Functions to convert between different single cell object formats (R & Python)." +- contents: + - has_concept("object_conversion") + - title: "Cluster/Marker Annotation Utilities" desc: "Functions to provide ease of use or add functionality to DEG and cluster annotation." - contents: diff --git a/cran-comments.md b/cran-comments.md index 0114bff69f..bda58c881a 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,7 +1,11 @@ -## Minor Version Update -This is a hotfix update with bug fixes now v2.0.1; apologies for not catching these in v2.0.0 submission last week. In this version I have: +## Major Version Update +This is major version update to v2.1.0. See News.md for full changelog. + +- Number of new functions, changes, and bug fixes. +- A few breaking changes owing to function/parameter deprecation (all are documented (see new Deprecated.R file) and have warnings/error messages using lifecycle package). + +This is attempted re-submission after a check error in second v2.1.0 submission. I was able to diagnose error as related to upcoming ggplot2 v3.5.0 update which caused error in another package scCustomize depends on. I have updated affected functions with a fix until other package is updated. -- Fixed 5 minor bugs presented in v2.0.0 release. ## R CMD check results @@ -12,6 +16,7 @@ This is a hotfix update with bug fixes now v2.0.1; apologies for not catching th - Run locally, R4.3.2, Platform: x86_64-apple-darwin20 (64-bit) with `devtools:check()`. - Also run via GitHub Actions via `usethis::use_github_action_check_standard` - macos-latest (release), windows-latest (release), ubuntu-latest (devel), ubuntu-latest (release), ubuntu-latest (oldrel-1). +- third submission ## NOTES 1. Imports includes 28 non-default packages. @@ -23,9 +28,9 @@ This is a hotfix update with bug fixes now v2.0.1; apologies for not catching th to ensure package functionality. ## Other Notes -1. GitHub Actions check returning strange errors only on macos (release). +1. GitHub Actions check returning strange errors only on macos (release) and only sometimes (This is NOT the error that caused the v2.1.0 CRAN check failure on first submission). - The errors are from failures running package examples. This includes functions that have been part of prior CRAN releases. NO errors are found when checking locally on macos platform using R 4.3.2 and none are found in GitHub Actions check on linux or windows platforms. I believe to be error in GitHub Actions workflow and I have therefore refrained from adding `dontrun` - to examples that run fine on other platforms. This was also the case with v2.0.0 which passed macos checks on CRAN, + to examples that run fine on other platforms. This was also the case with v2.0.0 and v2.0.1 which passed macos checks on CRAN, furthering it is likely a GitHub Actions issue. diff --git a/docs/404.html b/docs/404.html index 8afc15ac16..6b2b9db772 100644 --- a/docs/404.html +++ b/docs/404.html @@ -32,7 +32,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -76,12 +76,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/LICENSE.html b/docs/LICENSE.html index b3dad0a714..7d73a23975 100644 --- a/docs/LICENSE.html +++ b/docs/LICENSE.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/articles/Cell_Bender_Functions.html b/docs/articles/Cell_Bender_Functions.html index 8ba8daf6ae..82968071fd 100644 --- a/docs/articles/Cell_Bender_Functions.html +++ b/docs/articles/Cell_Bender_Functions.html @@ -33,7 +33,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -77,12 +77,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -130,8 +136,8 @@

    Creating Dual Assay Objects @@ -324,7 +330,7 @@

    Create Dual Assay Seurat ObjectOptional Parameters

    Users can specify any additional parameters normally passed to -Seurat::CreateSeuratObject() when using this function.

    +Seurat::CreateSeuratObject() when using this function.

     dual_seurat <- Create_CellBender_Merged_Seurat(raw_cell_bender_matrix = cell_bender_merged, raw_counts_matrix = cell_ranger_merged,
         raw_assay_name = "RAW", min_cells = 5, min_features = 200)
    diff --git a/docs/articles/Cell_Bender_Functions_files/figure-html/unnamed-chunk-17-1.png b/docs/articles/Cell_Bender_Functions_files/figure-html/unnamed-chunk-17-1.png index 4ac9433c3b..84439800d7 100644 Binary files a/docs/articles/Cell_Bender_Functions_files/figure-html/unnamed-chunk-17-1.png and b/docs/articles/Cell_Bender_Functions_files/figure-html/unnamed-chunk-17-1.png differ diff --git a/docs/articles/Cell_Bender_Functions_files/figure-html/unnamed-chunk-19-1.png b/docs/articles/Cell_Bender_Functions_files/figure-html/unnamed-chunk-19-1.png index 908d68d1c0..015f64a506 100644 Binary files a/docs/articles/Cell_Bender_Functions_files/figure-html/unnamed-chunk-19-1.png and b/docs/articles/Cell_Bender_Functions_files/figure-html/unnamed-chunk-19-1.png differ diff --git a/docs/articles/Color_Palettes.html b/docs/articles/Color_Palettes.html index 850c2b2d2e..07ba466626 100644 --- a/docs/articles/Color_Palettes.html +++ b/docs/articles/Color_Palettes.html @@ -33,7 +33,7 @@ scCustomize - 2.0.1 + 2.1.0
    @@ -77,12 +77,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -129,8 +135,8 @@ @@ -77,12 +77,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -129,8 +135,8 @@ @@ -77,12 +77,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -129,8 +135,8 @@
    +
    Clustered_DotPlot() split by additional grouping variable +
    +

    Clustered_DotPlot can now plot with additional grouping +variable provided to split.by parameter.

    +
    +Clustered_DotPlot(seurat_object = marsh_mouse_micro, features = c("Fos", "Jun", "Egr1", "Aif1",
    +    "P2ry12", "Tmem119"), split.by = "Transcription_Method")
    +

    +

    However, you’ll notice that the labels on the bottom get cutoff on +the left-hand side of the plot. There are two solutions to this.

    +

    Keep bottom labels rotated but add extra white-space padding on +left

    +
    +Clustered_DotPlot(seurat_object = marsh_mouse_micro, features = c("Fos", "Jun", "Egr1", "Aif1",
    +    "P2ry12", "Tmem119"), split.by = "Transcription_Method", plot_padding = TRUE)
    +

    +

    Or simply remove the bottom label text rotation

    +
    +Clustered_DotPlot(seurat_object = marsh_mouse_micro, features = c("Fos", "Jun", "Egr1", "Aif1",
    +    "P2ry12", "Tmem119"), split.by = "Transcription_Method", x_lab_rotate = 90)
    +

    +
    +
    Clustered_DotPlot() k-means Clustering Optional Parameters

    Determining Optimal k Value
    @@ -728,7 +757,7 @@

    Clustered_DotP Clustered_DotPlot will return this plot when using the function. However, it can be turned off by setting plot_km_elbow = FALSE. -

    +

    The number of k values plotted must be 1 less than number of features. Default is to plot 20 values but users can customize number of k values plotted using elbow_kmax parameter.

    @@ -760,18 +789,18 @@
    Clustered_DotPlo of exp_color_min/exp_color_max but can be modified if a skewed visualization is desired. -
    +
     Clustered_DotPlot(seurat_object = pbmc, features = top_markers, k = 8, print_exp_quantiles = T)
    -
    Quantiles of gene expression data are:
    -       10%        50%        90%        99% 
    --0.6555988 -0.3595223  1.7742718  2.6666597
    +
    Quantiles of gene expression data are:
    +       10%        50%        90%        99% 
    +-0.6555988 -0.3595223  1.7742718  2.6666597

    Here we can adjust the expression clipping based on the range of the data in this specific dataset and list of features and change the color scale to use Seurat::PurpleAndYellow()

    -
    +
     Clustered_DotPlot(seurat_object = pbmc, features = top_markers, k = 8, exp_color_min = -1, exp_color_max = 2,
         colors_use_exp = PurpleAndYellow())
    -

    +

    Clustered_DotPlot() Other Optional Parameters @@ -806,11 +835,11 @@

    FeatureScater Plots
    +
     # Create Plots
     FeatureScatter_scCustom(seurat_object = marsh_mouse_micro, feature1 = "exAM_Score1", feature2 = "Microglia_Score1",
         colors_use = mouse_colors, group.by = "ident", num_columns = 2, pt.size = 1)
    -

    +

    Split FeatureScatter Plots
    @@ -823,12 +852,12 @@
    Split FeatureScatter PlotsDimPlot_scCustom. The default is to return each plot with their own x and y axes, which has a number of advantages (see DimPlot_scCustom section).

    -
    +
     # Create Plots
     FeatureScatter_scCustom(seurat_object = marsh_mouse_micro, feature1 = "exAM_Score1", feature2 = "Microglia_Score1",
         colors_use = mouse_colors, split.by = "Transcription_Method", group.by = "ident", num_columns = 2,
         pt.size = 1)
    -

    +

    @@ -860,11 +889,11 @@
    New default color palettes

    To best demonstrate rationale for this I’m going to use over-clustered version of the marsh_mouse_micro object.

    -
    +
     DimPlot(object = marsh_mouse_over)
     DimPlot_scCustom(seurat_object = marsh_mouse_over)
    -*`DimPlot_scCustom` also sets `label = TRUE` if `group.by = NULL` by default.*

    +*`DimPlot_scCustom` also sets `label = TRUE` if `group.by = NULL` by default.*

    DimPlot_scCustom also sets label = TRUE if group.by = NULL by default.

    @@ -880,11 +909,11 @@
    Shuffle Points

    Here is example when plotting by donor in the human dataset to determine how well the dataset integration worked.

    -
    +
     DimPlot(object = marsh_human_pm, group.by = "sample_id")
     DimPlot_scCustom(seurat_object = marsh_human_pm, group.by = "sample_id")
    -***A.** Cannot tell how well integrated the samples are due to plotting one on top of the other.  **B.** Default plot using scCustomize `DimPlot_scCustom`.*

    +***A.** Cannot tell how well integrated the samples are due to plotting one on top of the other.  **B.** Default plot using scCustomize `DimPlot_scCustom`.*

    A. Cannot tell how well integrated the samples are due to plotting one on top of the other. B. Default plot using scCustomize DimPlot_scCustom. @@ -897,11 +926,11 @@

    Split DimPlotsWhen plotting a split plot Seurat::DimPlot() simplifies the axes by implementing shared axes depending on the number of columns specified.

    -
    +
     DimPlot(object = pbmc, split.by = "treatment")
     DimPlot(object = pbmc, split.by = "sample_id", ncol = 4)
    -***A.** The default Seurat split.by looks ok when plots are all present on single row.  **B.** However, the visualization isn't so good when you starting wrapping plots into multiple rows.*

    +***A.** The default Seurat split.by looks ok when plots are all present on single row.  **B.** However, the visualization isn't so good when you starting wrapping plots into multiple rows.*

    A. The default Seurat split.by looks ok when plots are all present on single row. B. However, the visualization isn’t so good when you starting wrapping plots into @@ -911,10 +940,10 @@

    Split DimPlotsBy default when using split.by with DimPlot_scCustom the layout is returned with an axes for each plot to make visualization of large numbers of splits easier.

    -
    +
     DimPlot_scCustom(seurat_object = pbmc, split.by = "treatment", num_columns = 4, repel = TRUE)
    -*Simplified visualization without having to think about the number of variables that are being plotted.*

    +*Simplified visualization without having to think about the number of variables that are being plotted.*

    Simplified visualization without having to think about the number of variables that are being plotted.

    @@ -923,9 +952,9 @@
    Split DimPlotsDimPlot_scCustom by supplying split_seurat = TRUE

    -
    +
     DimPlot_scCustom(seurat_object = pbmc, split.by = "treatment", num_columns = 4, repel = TRUE, split_seurat = TRUE)
    -

    +

    Figure Plotting @@ -934,9 +963,9 @@
    Figure PlottingDimPlot_scCustom simply set figure_plot = TRUE.

    -
    +
     DimPlot_scCustom(seurat_object = pbmc, figure_plot = TRUE)
    -

    +

    @@ -953,14 +982,14 @@

    Highlight Cluster(s)
    +
     Cluster_Highlight_Plot(seurat_object = marsh_mouse_over, cluster_name = "7", highlight_color = "navy",
         background_color = "lightgray")
     
     Cluster_Highlight_Plot(seurat_object = marsh_mouse_over, cluster_name = "8", highlight_color = "forestgreen",
         background_color = "lightgray")
    -*`Cluster_Highlight_Plot` takes identity or vector of identities and plots them in front of remaining unselected cells.*

    +*`Cluster_Highlight_Plot` takes identity or vector of identities and plots them in front of remaining unselected cells.*

    Cluster_Highlight_Plot takes identity or vector of identities and plots them in front of remaining unselected cells.

    @@ -970,10 +999,10 @@
    Highlight 2+ clusters in the same

    Cluster_Highlight_Plot() also supports the ability to plot multiple identities in the same plot.

    -
    +
     Cluster_Highlight_Plot(seurat_object = marsh_mouse_over, cluster_name = c("7", "8"), highlight_color = c("navy",
         "forestgreen"))
    -

    +

    NOTE: If no value is provided to highlight_color then all clusters provided to cluster_name will be plotted using single default color (navy).

    @@ -988,10 +1017,10 @@

    Highlight Meta Data
    +
     Meta_Highlight_Plot(seurat_object = marsh_mouse_micro, meta_data_column = "Transcription_Method",
         meta_data_highlight = "ENZYMATIC_NONE", highlight_color = "firebrick", background_color = "lightgray")
    -

    +

    Highlight 2+ factor levels in the same plot
    @@ -999,11 +1028,11 @@
    Highlight 2+ factor levels i levels from the same meta data column in the same plot, similar to plotting multiple identities with Cluster_Highlight_Plot()

    -
    +
     Meta_Highlight_Plot(seurat_object = marsh_mouse_micro, meta_data_column = "Transcription_Method",
         meta_data_highlight = c("ENZYMATIC_NONE", "DOUNCE_NONE"), highlight_color = c("firebrick", "dodgerblue"),
         background_color = "lightgray")
    -

    +

    @@ -1016,16 +1045,16 @@

    Highlight Cellscells_highlight parameter must be a named list.

    Let’s say we want to highlight cells with expression of MS4A1 above certain threshold.

    -
    +
     # Get cell names
    -MS4A1 <- WhichCells(object = pbmc, expression = MS4A1 > 3)
    +MS4A1 <- WhichCells(object = pbmc, expression = MS4A1 > 3)
     
     # Make into list
     cells <- list(MS4A1 = MS4A1)
     
     # Plot
     Cell_Highlight_Plot(seurat_object = pbmc, cells_highlight = cells)
    -

    +

    Highlight 2+ sets of cells in the same plot
    @@ -1033,15 +1062,15 @@
    Highlight 2+ sets of cells i sets of cells in the same plot, similar to plotting multiple identities with Cluster_Highlight_Plot()/Meta_Highlight_Plot().

    -
    +
     # Get cell names and make list
    -MS4A1 <- WhichCells(object = pbmc, expression = MS4A1 > 3)
    -GZMB <- WhichCells(object = pbmc, expression = GZMB > 3)
    +MS4A1 <- WhichCells(object = pbmc, expression = MS4A1 > 3)
    +GZMB <- WhichCells(object = pbmc, expression = GZMB > 3)
     
     cells <- list(MS4A1 = MS4A1, GZMB = GZMB)
     # Plot
     Cell_Highlight_Plot(seurat_object = pbmc, cells_highlight = cells)
    -

    +

    @@ -1050,19 +1079,19 @@

    DimPlot Layout Plots
    +
     DimPlot_All_Samples(seurat_object = pbmc, meta_data_column = "sample_id", num_col = 3, pt.size = 0.5)
    -*Visualize all samples in simple plot layout.*

    +*Visualize all samples in simple plot layout.*

    Visualize all samples in simple plot layout.

    Can unique color each plot by providing a vector of colors instead of single value

    -
    +
     DimPlot_All_Samples(seurat_object = marsh_mouse_micro, meta_data_column = "Transcription", num_col = 2,
         pt.size = 0.5, color = c("firebrick3", "dodgerblue3"))
    -

    +

    diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-20-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-20-1.png index fc37820017..6ce1d9cac6 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-20-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-20-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-33-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-33-1.png index 005883c48a..0279264a74 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-33-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-33-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-35-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-35-1.png index 141c1bb300..09581b5bc9 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-35-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-35-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-37-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-37-1.png index 49ebf4d69a..46bb866aef 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-37-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-37-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-43-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-43-1.png index dd89ed7c90..efdc981308 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-43-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-43-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-44-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-44-1.png index dd89ed7c90..efdc981308 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-44-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-44-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-46-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-46-1.png index 54e1b71d33..b864465087 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-46-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-46-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-47-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-47-1.png index 54e1b71d33..b864465087 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-47-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-47-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-49-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-49-1.png index 7cf46a1a50..dca051e1d2 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-49-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-49-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-50-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-50-1.png index 7cf46a1a50..dca051e1d2 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-50-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-50-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-52-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-52-1.png index 2f1685f937..114a9da0cd 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-52-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-52-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-53-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-53-1.png index 2f1685f937..114a9da0cd 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-53-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-53-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-55-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-55-1.png index 40f3479384..056d78ecd0 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-55-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-55-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-56-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-56-1.png index f7ea1db5e1..056d78ecd0 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-56-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-56-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-57-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-57-1.png new file mode 100644 index 0000000000..42ae682b08 Binary files /dev/null and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-57-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-58-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-58-1.png index 0a702cc982..8990c587ed 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-58-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-58-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-60-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-60-1.png index d43c63bef0..376b44c3ab 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-60-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-60-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-61-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-61-1.png index 32b7959971..c734bd6fcd 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-61-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-61-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-62-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-62-1.png index 7f3e7d73ca..c734bd6fcd 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-62-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-62-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-63-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-63-1.png index 9349c1f0ed..dda6aff359 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-63-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-63-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-64-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-64-1.png index d5e64f8b79..40f3479384 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-64-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-64-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-67-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-67-1.png index 9349c1f0ed..8990c587ed 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-67-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-67-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-69-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-69-1.png index a61b24eea0..376b44c3ab 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-69-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-69-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-71-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-71-1.png index 6ff6d09964..7f3e7d73ca 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-71-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-71-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-73-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-73-1.png index 236a9e9ded..d5e64f8b79 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-73-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-73-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-74-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-74-1.png index 0651fa673d..f01148dcfc 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-74-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-74-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-76-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-76-1.png new file mode 100644 index 0000000000..5c6c18fd7f Binary files /dev/null and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-76-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-78-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-78-1.png index 0651fa673d..a61b24eea0 100644 Binary files a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-78-1.png and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-78-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-80-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-80-1.png new file mode 100644 index 0000000000..6ff6d09964 Binary files /dev/null and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-80-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-81-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-81-1.png new file mode 100644 index 0000000000..4f589ee43f Binary files /dev/null and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-81-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-82-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-82-1.png new file mode 100644 index 0000000000..236a9e9ded Binary files /dev/null and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-82-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-84-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-84-1.png new file mode 100644 index 0000000000..ff4d12e1e9 Binary files /dev/null and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-84-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-86-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-86-1.png new file mode 100644 index 0000000000..3ec97ac4f7 Binary files /dev/null and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-86-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-87-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-87-1.png new file mode 100644 index 0000000000..0651fa673d Binary files /dev/null and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-87-1.png differ diff --git a/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-88-1.png b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-88-1.png new file mode 100644 index 0000000000..9db85a5c13 Binary files /dev/null and b/docs/articles/Gene_Expression_Plotting_files/figure-html/unnamed-chunk-88-1.png differ diff --git a/docs/articles/Helpers_and_Utilities.html b/docs/articles/Helpers_and_Utilities.html index f7719a4583..72c718ed2c 100644 --- a/docs/articles/Helpers_and_Utilities.html +++ b/docs/articles/Helpers_and_Utilities.html @@ -33,7 +33,7 @@ scCustomize - 2.0.1 + 2.1.0

    @@ -77,12 +77,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -130,8 +136,8 @@

    To view list of accepted values for default species names simply set list_species_names = TRUE.

    -Add_Mito_Ribo_Seurat(list_species_names = TRUE)
    +Add_Mito_Ribo(list_species_names = TRUE)

    @@ -573,10 +575,10 @@

    Extract all meta data (cel B

    @@ -596,10 +598,10 @@

    Extract all meta data (cel Memory CD4 T

    @@ -619,10 +621,10 @@

    Extract all meta data (cel CD14+ Mono

    @@ -642,10 +644,10 @@

    Extract all meta data (cel NK

    @@ -665,7 +667,7 @@

    Extract all meta data (cel Memory CD4 T

    @@ -711,7 +713,7 @@

    Extract all meta data (cel CD8 T

    @@ -757,10 +759,10 @@

    Extract all meta data (cel FCGR3A+ Mono

    @@ -800,13 +802,13 @@

    Extract sample-level meta da pbmc3k

    @@ -817,7 +819,7 @@

    Extract sample-level meta da pbmc3k

    @@ -868,7 +870,7 @@

    Extract sample-level meta da pbmc3k

    @@ -928,7 +930,7 @@
    Remove columns tha sample1
    @@ -961,7 +963,7 @@
    Remove columns tha sample4
    @@ -983,7 +985,7 @@
    Remove columns tha sample6
    @@ -1029,13 +1031,13 @@
    Merge with sample-level sample1
    @@ -1049,10 +1051,10 @@
    Merge with sample-level Batch1
    @@ -1066,10 +1068,10 @@
    Merge with sample-level Batch2
    @@ -1080,10 +1082,10 @@
    Merge with sample-level sample4
    @@ -1114,13 +1116,13 @@
    Merge with sample-level sample6
    @@ -1167,6 +1169,33 @@

    Add sample-level meta data to obje obj <- Add_Sample_Meta(seurat_object = obj, meta_data = sample_meta, join_by_seurat = "orig.ident", join_by_meta = "sample_id") +
    +

    Add feature meta data +

    +

    Starting in Seurat V5 each assay now possess it’s own meta.data slot +which is feature-level meta data. During course of normal analysis this +is where information on variable features is stored. However, we can +also use it to store alternate feature names, in most cases this is +Ensembl IDs matching the symbols used in object creation/analysis.

    +

    scCustomize provides the function Add_Alt_Feature_ID() +to automatically match and add these features using the same files used +in object creation. Users only need to supply either path to the +features.tsv.gz file or the hdf5 file produced from Cell Ranger +output.

    +
    # Using features.tsv.gz file
    +obj <- Add_Alt_Feature_ID(seurat_object = obj,
    +features_tsv = "sample01/outs/filtered_feature_bc_matrix/features.tsv.gz", assay = "RNA")
    +
    +# Using hdf5 file
    +obj <- Add_Alt_Feature_ID(seurat_object = obj,
    +hdf5_file = "sample01/outs/outs/filtered_feature_bc_matrix.h5"", assay = "RNA")
    +

    NOTE: If using features.tsv.gz file the file from either +filtered or raw outputs can be used as they are identical.

    +

    NOTE: If using hdf5 file the file from either +filtered_feature_bc or raw_feature_bc can be used as the features slot +is identical. Though it is faster to load filtered_feature_bc file due +to droplet filtering.

    +

    Check for Features/Genes @@ -1198,18 +1227,24 @@

    -
    +
     # Example gene list with all examples (found genes, wrong case (lower) and misspelled (CD8A
     # forgetting to un-shift when typing 8))
     gene_input_list <- c("CD14", "CD3E", "Cd4", "CD*A")
     
     genes_present <- Gene_Present(data = pbmc, gene_list = gene_input_list)
    +
    ## Warning: `Gene_Present()` was deprecated in scCustomize 2.1.0.
    +##  Please use `Feature_Present()` instead.
    +##  Please adjust code now to prepare for full deprecation.
    +## This warning is displayed once every 8 hours.
    +## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
    +## generated.
    ## Warning: The following features were omitted as they were not found:
     ##  Cd4 and CD*A
    ## Warning: NOTE: However, the following features were found: CD4
     ##  Please check intended case of features provided.

    Now let’s look at the output:

    -
    +
     genes_present
    ## $found_features
     ## [1] "CD14" "CD3E"
    @@ -1249,7 +1284,7 @@ 

    Check for updated gene symbols
    +
     gene_input_list <- c("CD14", "CD3E", "Cd4", "CD*A", "SEPT1")
     
     genes_present <- Gene_Present(data = pbmc, gene_list = gene_input_list)
    @@ -1257,13 +1292,13 @@

    Check for updated gene symbols## Cd4 and CD*A

    ## Warning: NOTE: However, the following features were found: CD4
     ##  Please check intended case of features provided.
    -
    +
     check_symbols <- UpdateSymbolList(symbols = genes_present[[2]], verbose = TRUE)
    ## Warning: No updated symbols found

    -

    Merging Data/Objects +

    Merging & Splitting Data/Objects

    Merging raw data @@ -1282,7 +1317,7 @@

    Merging raw dataorig.ident when creating Seurat objects. See Read & Write Vignette for more info on the data import functions.

    -
    +
     # Read in data
     GEO_10X <- Read10X_GEO(data_dir = "assets/GSE152183_RAW_Marsh/")
     
    @@ -1290,7 +1325,7 @@ 

    Merging raw dataGEO_10X_merged <- Merge_Sparse_Data_All(matrix_list = GEO_10X, add_cell_ids = names(GEO_10X)) # Create Seurat Object and specify orig.ident location -GEO_10X_Seurat <- Seurat::CreateSeuratObject(counts = GEO_10X_merged, names.field = 1, names.delim = "_", +GEO_10X_Seurat <- Seurat::CreateSeuratObject(counts = GEO_10X_merged, names.field = 1, names.delim = "_", min.features = 200, min.cells = 5)

    @@ -1302,7 +1337,7 @@

    Merging List of Seurat Objectspurrr::reduce() to merge all objects in list into single combined object

    -
    +
     list_of_objects <- list(obj1, obj2, obj2, ..., obj10)
     
     merged_seurat <- Merge_Seurat_List(list_seurat = list_of_objects)
    @@ -1312,6 +1347,21 @@ 

    Merging List of Seurat Objectsmerged_seurat <- Merge_Seurat_List(list_seurat = list_of_objects, add.cell.ids = cell_ids)

    +
    +

    Splitting Seurat V5 Layers +

    +

    Seurat V5 objects now have the ability to split within the object +into layers. However, I find that the syntax to do this is not the most +intuitive and can be simplified with a new simple wrapper function: +Split_Layers()

    +
    +pbmc <- Split_Layers(seurat_object = pbmc, split.by = "sample_id")
    +
    ##  Splitting layers within assay: RNA into 6 parts by "sample_id"
    +##  RNA is not Assay5, converting to Assay5 before splitting.
    +

    Split_Layers() defaults to “RNA” assay but can be used +for any assay present in object (users should check whether splitting +assay other than “RNA” is valid before proceeding).

    +

    Storing Misc Information in Seurat Objects @@ -1321,7 +1371,7 @@

    Storing Misc Information in scCustomize contains two functions Store_Misc_Info_Seurat and a wrapper around that function Store_Palette_Seurat to make this process easy.

    -
    +
     # Data can be vectors or data.frames
     misc_info <- "misc_vector_dataframe_list_etc"
     
    @@ -1333,7 +1383,7 @@ 

    Storing Lists
    +
     # Create list
     misc_info <- list("misc_item1", "misc_item2", etc)
     
    @@ -1353,7 +1403,7 @@ 

    Storing Color PalettesStore_Palette_Seurat.

    -
    +
     # Data can be vectors or data.frames
     annotated_color_palette <- c("color1", "color2", "color3", "etc")
     
    @@ -1377,7 +1427,7 @@ 

    Replace Suffixes

    Replace_Suffix can be used on single matrix/data.frame or list of matrices/data.frames to modify to remove suffixes

    -
    +
     # For single object
     data_mod <- Replace_Suffix(data = raw_data, current_suffix = "-1", new_suffix = "-2")
     
    @@ -1402,7 +1452,7 @@ 

    Strip Suffixes

    Replace_Suffix can also be used to strip suffixes from data

    -
    +
     # For single object
     data_mod <- Replace_Suffix(data = raw_data, current_suffix = "-1", new_suffix = "")
    @@ -1423,7 +1473,7 @@

    Change Prefix/Suffix Delimiters

    These functions all take identical inputs and can be applied to either single matrix/data.frames or lists of matrices/data.frames.

    -
    +
     data_mod <- Change_Delim_Prefix(data = raw_data, current_delim = ".", new_delim = "_")
     
     data_mod <- Change_Delim_Suffix(data = raw_data, current_delim = ".", new_delim = "_")
    diff --git a/docs/articles/Installation.html b/docs/articles/Installation.html
    index 5bd7ec5c08..6e15e2ec5b 100644
    --- a/docs/articles/Installation.html
    +++ b/docs/articles/Installation.html
    @@ -33,7 +33,7 @@
           
           
             scCustomize
    -        2.0.1
    +        2.1.0
           
         
    @@ -77,12 +77,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -129,8 +135,8 @@ @@ -77,12 +77,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -129,8 +135,8 @@ @@ -77,12 +77,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -129,8 +135,8 @@

    Add Mitochondrial & Ribosomal Percentages @@ -443,8 +452,7 @@

    Add Mitochondrial & Ribosom

    For more information on adding mitochondrial and ribosomal count percentages to LIGER objects see General Utilities and Helpers Vignette for information on -Add_Mito_Ribo_LIGER which is analogous to -Add_Mito_Ribo_Seurat.

    +Add_Mito_Ribo.

    Variable Gene Selection diff --git a/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-10-1.png b/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-10-1.png index cfaa099550..c158ad38c3 100644 Binary files a/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-10-1.png and b/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-10-1.png differ diff --git a/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-12-1.png b/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-12-1.png index 0f5c3932b6..4fa478c6bb 100644 Binary files a/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-12-1.png and b/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-12-1.png differ diff --git a/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-3-1.png b/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-3-1.png index c8c76d06a2..2a64e97d24 100644 Binary files a/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-3-1.png and b/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-3-1.png differ diff --git a/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-4-1.png index 1bcc384865..e2bc05cc89 100644 Binary files a/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-4-1.png and b/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-5-1.png index 6346999bca..85fce5cc1b 100644 Binary files a/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-8-1.png b/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-8-1.png index 183ba35026..19c3f86cff 100644 Binary files a/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-8-1.png and b/docs/articles/LIGER_Functions_files/figure-html/unnamed-chunk-8-1.png differ diff --git a/docs/articles/Markers_and_Cluster_Annotation.html b/docs/articles/Markers_and_Cluster_Annotation.html index f4f62cf02d..16031dc3cd 100644 --- a/docs/articles/Markers_and_Cluster_Annotation.html +++ b/docs/articles/Markers_and_Cluster_Annotation.html @@ -33,7 +33,7 @@ scCustomize - 2.0.1 + 2.1.0

    @@ -77,12 +77,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -131,8 +137,8 @@

    Marker Identification & Cluster Annotation Helpers

    -

    Compiled: November 20, -2023

    +

    Compiled: February 16, +2024

    Source: vignettes/articles/Markers_and_Cluster_Annotation.Rmd diff --git a/docs/articles/Misc_Functions.html b/docs/articles/Misc_Functions.html index 6b922bb82d..bb59fcd581 100644 --- a/docs/articles/Misc_Functions.html +++ b/docs/articles/Misc_Functions.html @@ -33,7 +33,7 @@ scCustomize - 2.0.1 + 2.1.0
    @@ -77,12 +77,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -129,8 +135,8 @@

    @@ -388,59 +395,54 @@
    Non-default species
     # Using gene name patterns
    -pbmc <- Add_Mito_Ribo_Seurat(seurat_object = pbmc, species = "other", mito_pattern = "regexp_pattern",
    -    ribo_pattern = "regexp_pattern")
    +pbmc <- Add_Mito_Ribo(object = pbmc, species = "other", mito_pattern = "regexp_pattern", ribo_pattern = "regexp_pattern")
     
     # Using feature name lists
     mito_gene_list <- c("gene1", "gene2", "etc")
     ribo_gene_list <- c("gene1", "gene2", "etc")
    -pbmc <- Add_Mito_Ribo_Seurat(seurat_object = pbmc, species = "other", mito_features = mito_gene_list,
    -    ribo_features = ribo_gene_list)
    +pbmc <- Add_Mito_Ribo(object = pbmc, species = "other", mito_features = mito_gene_list, ribo_features = ribo_gene_list)
     
     # Using combination of gene lists and gene name patterns
    -pbmc <- Add_Mito_Ribo_Seurat(seurat_object = pbmc, species = "Human", mito_features = mito_gene_list,
    -    ribo_pattern = "regexp_pattern")
    +pbmc <- Add_Mito_Ribo(object = pbmc, species = "Human", mito_features = mito_gene_list, ribo_pattern = "regexp_pattern")

    Warning Messages

    -

    The added benefit of Add_Mito_Ribo_Seurat & -Add_Mito_Ribo_LIGER is that they will return informative -warnings if no Mitochondrial or Ribosomal features are found using the -current species, features, or pattern specification.

    +

    The added benefit of Add_Mito_Ribo is that it will +return informative warnings if no Mitochondrial or Ribosomal features +are found using the current species, features, or pattern +specification.

     # For demonstration purposes we can set `species = mouse` for this object of human cells
    -pbmc <- Add_Mito_Ribo_Seurat(seurat_object = pbmc, species = "mouse")
    -
    ## Error in `Add_Mito_Ribo_Seurat()`:
    +pbmc <- Add_Mito_Ribo(object = pbmc, species = "mouse")
    +
    ## Error in `Add_Mito_Ribo()`:
     ## ! No Mito or Ribo features found in object using patterns/feature list
     ##   provided.
     ##  Please check pattern/feature list and/or gene names in object.
     # Or if providing custom patterns/lists and features not found
    -pbmc <- Add_Mito_Ribo_Seurat(seurat_object = pbmc, species = "other", mito_pattern = "^MT-", ribo_pattern = "BAD_PATTERN")
    +pbmc <- Add_Mito_Ribo(object = pbmc, species = "other", mito_pattern = "^MT-", ribo_pattern = "BAD_PATTERN")
    ## Warning: No Ribo features found in object using pattern/feature list provided.
     ##  No column will be added to meta.data.
    -

    Add_Mito_Ribo_Seurat and -Add_Mito_Ribo_LIGER() will also return warnings if columns -are already present in @meta.data slot and prompt you to +

    Add_Mito_Ribo will also return warnings if columns are +already present in @meta.data slot and prompt you to provide override if you want to run the function.

    -pbmc <- Add_Mito_Ribo_Seurat(seurat_object = pbmc, species = "human")
    -
    ## Error in `Add_Mito_Ribo_Seurat()`:
    +pbmc <- Add_Mito_Ribo(object = pbmc, species = "human")
    +
    ## Error in `Add_Mito_Ribo()`:
     ## ! Columns with "percent_mito" and/or "percent_ribo" already present in
     ##   meta.data slot.
     ##  *To run function and overwrite columns set parameter `overwrite = TRUE` or
     ##   change respective `mito_name`, `ribo_name`, and/or `mito_ribo_name`*
    -

    LIGER Objects: Add_Mito_Ribo_LIGER() +

    LIGER Objects: Add_Mito_Ribo()

    -

    scCustomize also contains identical function for use with LIGER -objects. Add_Mito_Ribo_LIGER contains equivalent parameters -and capabilities as Add_Mito_Ribo_Seurat

    +

    scCustomize Add_Mito_Ribo also works seemlessly with +LIGER objects.

    -liger_obj <- Add_Mito_Ribo_Seurat(seurat_object = liger_obj, species = "human")
    +liger_obj <- Add_Mito_Ribo(object = liger_obj, species = "human")
    @@ -459,9 +461,9 @@

    Cell Complexity (log10(nFeatu log10(nFeature) / log10(nCount).

     # These defaults can be run just by providing accepted species name
    -pbmc <- Add_Cell_Complexity_Seurat(seurat_object = pbmc)
    -

    NOTE: There is analogous function for LIGER objects (see: -Add_Cell_Complexity_LIGER()).

    +pbmc <- Add_Cell_Complexity(object = pbmc)

    +

    NOTE: The function also works seemlessly with LIGER +objects.

    Add Top Percent Expression QC Metric @@ -550,10 +552,10 @@

    Extract all meta data (cel Memory CD4 T

    -sample4 +sample6 -Batch2 +Batch1
    -sample2 +sample6 -Batch1 +Batch2
    -sample4 +sample6 -Batch1 +Batch2
    -sample6 +sample2 -Batch2 +Batch1
    -sample3 +sample1 -Batch2 +Batch1
    -sample6 +sample5 Batch1 @@ -691,7 +693,7 @@

    Extract all meta data (cel sample4

    -Batch2 +Batch1
    -sample2 +sample3 Batch2 @@ -734,10 +736,10 @@

    Extract all meta data (cel Naive CD4 T

    -sample4 +sample5 -Batch1 +Batch2
    -sample2 +sample5 -Batch2 +Batch1
    -B +NK sample1 -Batch2 +Batch1
    -B +CD14+ Mono sample2 @@ -834,7 +836,7 @@

    Extract sample-level meta da pbmc3k

    -NK +CD8 T sample3 @@ -851,13 +853,13 @@

    Extract sample-level meta da pbmc3k

    -Memory CD4 T +CD8 T sample4 -Batch2 +Batch1
    -Naive CD4 T +Memory CD4 T sample5 @@ -885,13 +887,13 @@

    Extract sample-level meta da pbmc3k

    -CD14+ Mono +Memory CD4 T sample6 -Batch2 +Batch1
    -Batch2 +Batch1
    -Batch2 +Batch1
    -Batch2 +Batch1
    -Batch2 +Batch1 -2173.0 +2208.0 -809.0 +819.5
    -2163.5 +2250.5 -803.0 +815.0
    -2190.0 +2172.0 -823.5 +825.0
    -Batch2 +Batch1 -2209.0 +2173.5 815.0 @@ -1100,10 +1102,10 @@
    Merge with sample-level Batch1
    -2161.0 +2178.0 -820.0 +815.0
    -Batch2 +Batch1 -2286.0 +2198.0 -827.0 +809.0
    + + + + + + + + + + + + + + + + + + + + + + +
    +V3_Assay_Options + +V5_Assay5_Options +
    +Assay + +Assay5 +
    +assay + +assay5 +
    +V3 + +V5 +
    +v3 + +v5 +
    +
    +
    +
    +
    + + + +
    + + + +
    + +
    +

    +

    Site built with pkgdown 2.0.7.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/articles/Object_Conversion_files/kePrint-0.0.1/kePrint.js b/docs/articles/Object_Conversion_files/kePrint-0.0.1/kePrint.js new file mode 100644 index 0000000000..e6fbbfc44d --- /dev/null +++ b/docs/articles/Object_Conversion_files/kePrint-0.0.1/kePrint.js @@ -0,0 +1,8 @@ +$(document).ready(function(){ + if (typeof $('[data-toggle="tooltip"]').tooltip === 'function') { + $('[data-toggle="tooltip"]').tooltip(); + } + if ($('[data-toggle="popover"]').popover === 'function') { + $('[data-toggle="popover"]').popover(); + } +}); diff --git a/docs/articles/Object_Conversion_files/lightable-0.0.1/lightable.css b/docs/articles/Object_Conversion_files/lightable-0.0.1/lightable.css new file mode 100644 index 0000000000..3be3be9046 --- /dev/null +++ b/docs/articles/Object_Conversion_files/lightable-0.0.1/lightable.css @@ -0,0 +1,272 @@ +/*! + * lightable v0.0.1 + * Copyright 2020 Hao Zhu + * Licensed under MIT (https://github.com/haozhu233/kableExtra/blob/master/LICENSE) + */ + +.lightable-minimal { + border-collapse: separate; + border-spacing: 16px 1px; + width: 100%; + margin-bottom: 10px; +} + +.lightable-minimal td { + margin-left: 5px; + margin-right: 5px; +} + +.lightable-minimal th { + margin-left: 5px; + margin-right: 5px; +} + +.lightable-minimal thead tr:last-child th { + border-bottom: 2px solid #00000050; + empty-cells: hide; + +} + +.lightable-minimal tbody tr:first-child td { + padding-top: 0.5em; +} + +.lightable-minimal.lightable-hover tbody tr:hover { + background-color: #f5f5f5; +} + +.lightable-minimal.lightable-striped tbody tr:nth-child(even) { + background-color: #f5f5f5; +} + +.lightable-classic { + border-top: 0.16em solid #111111; + border-bottom: 0.16em solid #111111; + width: 100%; + margin-bottom: 10px; + margin: 10px 5px; +} + +.lightable-classic tfoot tr td { + border: 0; +} + +.lightable-classic tfoot tr:first-child td { + border-top: 0.14em solid #111111; +} + +.lightable-classic caption { + color: #222222; +} + +.lightable-classic td { + padding-left: 5px; + padding-right: 5px; + color: #222222; +} + +.lightable-classic th { + padding-left: 5px; + padding-right: 5px; + font-weight: normal; + color: #222222; +} + +.lightable-classic thead tr:last-child th { + border-bottom: 0.10em solid #111111; +} + +.lightable-classic.lightable-hover tbody tr:hover { + background-color: #F9EEC1; +} + +.lightable-classic.lightable-striped tbody tr:nth-child(even) { + background-color: #f5f5f5; +} + +.lightable-classic-2 { + border-top: 3px double #111111; + border-bottom: 3px double #111111; + width: 100%; + margin-bottom: 10px; +} + +.lightable-classic-2 tfoot tr td { + border: 0; +} + +.lightable-classic-2 tfoot tr:first-child td { + border-top: 3px double #111111; +} + +.lightable-classic-2 caption { + color: #222222; +} + +.lightable-classic-2 td { + padding-left: 5px; + padding-right: 5px; + color: #222222; +} + +.lightable-classic-2 th { + padding-left: 5px; + padding-right: 5px; + font-weight: normal; + color: #222222; +} + +.lightable-classic-2 tbody tr:last-child td { + border-bottom: 3px double #111111; +} + +.lightable-classic-2 thead tr:last-child th { + border-bottom: 1px solid #111111; +} + +.lightable-classic-2.lightable-hover tbody tr:hover { + background-color: #F9EEC1; +} + +.lightable-classic-2.lightable-striped tbody tr:nth-child(even) { + background-color: #f5f5f5; +} + +.lightable-material { + min-width: 100%; + white-space: nowrap; + table-layout: fixed; + font-family: Roboto, sans-serif; + border: 1px solid #EEE; + border-collapse: collapse; + margin-bottom: 10px; +} + +.lightable-material tfoot tr td { + border: 0; +} + +.lightable-material tfoot tr:first-child td { + border-top: 1px solid #EEE; +} + +.lightable-material th { + height: 56px; + padding-left: 16px; + padding-right: 16px; +} + +.lightable-material td { + height: 52px; + padding-left: 16px; + padding-right: 16px; + border-top: 1px solid #eeeeee; +} + +.lightable-material.lightable-hover tbody tr:hover { + background-color: #f5f5f5; +} + +.lightable-material.lightable-striped tbody tr:nth-child(even) { + background-color: #f5f5f5; +} + +.lightable-material.lightable-striped tbody td { + border: 0; +} + +.lightable-material.lightable-striped thead tr:last-child th { + border-bottom: 1px solid #ddd; +} + +.lightable-material-dark { + min-width: 100%; + white-space: nowrap; + table-layout: fixed; + font-family: Roboto, sans-serif; + border: 1px solid #FFFFFF12; + border-collapse: collapse; + margin-bottom: 10px; + background-color: #363640; +} + +.lightable-material-dark tfoot tr td { + border: 0; +} + +.lightable-material-dark tfoot tr:first-child td { + border-top: 1px solid #FFFFFF12; +} + +.lightable-material-dark th { + height: 56px; + padding-left: 16px; + padding-right: 16px; + color: #FFFFFF60; +} + +.lightable-material-dark td { + height: 52px; + padding-left: 16px; + padding-right: 16px; + color: #FFFFFF; + border-top: 1px solid #FFFFFF12; +} + +.lightable-material-dark.lightable-hover tbody tr:hover { + background-color: #FFFFFF12; +} + +.lightable-material-dark.lightable-striped tbody tr:nth-child(even) { + background-color: #FFFFFF12; +} + +.lightable-material-dark.lightable-striped tbody td { + border: 0; +} + +.lightable-material-dark.lightable-striped thead tr:last-child th { + border-bottom: 1px solid #FFFFFF12; +} + +.lightable-paper { + width: 100%; + margin-bottom: 10px; + color: #444; +} + +.lightable-paper tfoot tr td { + border: 0; +} + +.lightable-paper tfoot tr:first-child td { + border-top: 1px solid #00000020; +} + +.lightable-paper thead tr:last-child th { + color: #666; + vertical-align: bottom; + border-bottom: 1px solid #00000020; + line-height: 1.15em; + padding: 10px 5px; +} + +.lightable-paper td { + vertical-align: middle; + border-bottom: 1px solid #00000010; + line-height: 1.15em; + padding: 7px 5px; +} + +.lightable-paper.lightable-hover tbody tr:hover { + background-color: #F9EEC1; +} + +.lightable-paper.lightable-striped tbody tr:nth-child(even) { + background-color: #00000008; +} + +.lightable-paper.lightable-striped tbody td { + border: 0; +} + diff --git a/docs/articles/QC_Plots.html b/docs/articles/QC_Plots.html index d89b7fb142..588db366f2 100644 --- a/docs/articles/QC_Plots.html +++ b/docs/articles/QC_Plots.html @@ -33,7 +33,7 @@ scCustomize - 2.0.1 + 2.1.0
    @@ -77,12 +77,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -130,8 +136,8 @@
    +

    NOTE: This function works seemlessly for both Seurat and LIGER +objects.

    To view list of accepted values for default species names simply set list_species_names = TRUE. @@ -370,18 +375,15 @@

    Add Mitochondrial and defaults to the function.

     # Using gene name patterns
    -hca_bm <- Add_Mito_Ribo_Seurat(seurat_object = hca_bm, species = "other", mito_pattern = "regexp_pattern",
    -    ribo_pattern = "regexp_pattern")
    +hca_bm <- Add_Mito_Ribo(object = hca_bm, species = "other", mito_pattern = "regexp_pattern", ribo_pattern = "regexp_pattern")
     
     # Using feature name lists
     mito_gene_list <- c("gene1", "gene2", "etc")
     ribo_gene_list <- c("gene1", "gene2", "etc")
    -hca_bm <- Add_Mito_Ribo_Seurat(seurat_object = hca_bm, species = "other", mito_features = mito_gene_list,
    -    ribo_features = ribo_gene_list)
    +hca_bm <- Add_Mito_Ribo(object = hca_bm, species = "other", mito_features = mito_gene_list, ribo_features = ribo_gene_list)
     
     # Using combination of gene lists and gene name patterns
    -hca_bm <- Add_Mito_Ribo_Seurat(seurat_object = hca_bm, species = "Human", mito_features = mito_gene_list,
    -    ribo_pattern = "regexp_pattern")
    +hca_bm <- Add_Mito_Ribo(object = hca_bm, species = "Human", mito_features = mito_gene_list, ribo_pattern = "regexp_pattern")
    Use of Ensembl IDs
    @@ -391,7 +393,7 @@
    Use of Ensembl IDs
     # Using gene name patterns
    -hca_bm <- Add_Mito_Ribo_Seurat(seurat_object = hca_bm, species = "Human", ensembl_ids = TRUE)
    +hca_bm <- Add_Mito_Ribo(object = hca_bm, species = "Human", ensembl_ids = TRUE)
    @@ -410,10 +412,9 @@

    Cell Complexity (log10(nFeatu log10(nFeature) / log10(nCount).

     # These defaults can be run just by providing accepted species name
    -hca_bm <- Add_Cell_Complexity_Seurat(seurat_object = hca_bm)
    -

    NOTE: There is analogous function for LIGER objects (see: -Add_Cell_Complexity_LIGER) but QC plotting functions -currently only support Seurat objects.

    +hca_bm <- Add_Cell_Complexity(object = hca_bm)

    +

    NOTE: This function works seemlessly for both Seurat and LIGER +objects.

    Add Top Percent Expression QC Metric diff --git a/docs/articles/QC_Plots_files/figure-html/unnamed-chunk-13-1.png b/docs/articles/QC_Plots_files/figure-html/unnamed-chunk-13-1.png index 45e58acc56..7acd42b317 100644 Binary files a/docs/articles/QC_Plots_files/figure-html/unnamed-chunk-13-1.png and b/docs/articles/QC_Plots_files/figure-html/unnamed-chunk-13-1.png differ diff --git a/docs/articles/QC_Plots_files/figure-html/unnamed-chunk-14-1.png b/docs/articles/QC_Plots_files/figure-html/unnamed-chunk-14-1.png index 50dbfe7c60..bdf3a94a1b 100644 Binary files a/docs/articles/QC_Plots_files/figure-html/unnamed-chunk-14-1.png and b/docs/articles/QC_Plots_files/figure-html/unnamed-chunk-14-1.png differ diff --git a/docs/articles/QC_Plots_files/figure-html/unnamed-chunk-15-1.png b/docs/articles/QC_Plots_files/figure-html/unnamed-chunk-15-1.png index f27142b50e..bacfe7cf69 100644 Binary files a/docs/articles/QC_Plots_files/figure-html/unnamed-chunk-15-1.png and b/docs/articles/QC_Plots_files/figure-html/unnamed-chunk-15-1.png differ diff --git a/docs/articles/QC_Plots_files/figure-html/unnamed-chunk-16-1.png b/docs/articles/QC_Plots_files/figure-html/unnamed-chunk-16-1.png index cc17f29529..5f56f5d55a 100644 Binary files a/docs/articles/QC_Plots_files/figure-html/unnamed-chunk-16-1.png and b/docs/articles/QC_Plots_files/figure-html/unnamed-chunk-16-1.png differ diff --git a/docs/articles/QC_Plots_files/figure-html/unnamed-chunk-17-1.png b/docs/articles/QC_Plots_files/figure-html/unnamed-chunk-17-1.png index 5d0e3b17b0..e7c6ba984b 100644 Binary files a/docs/articles/QC_Plots_files/figure-html/unnamed-chunk-17-1.png and b/docs/articles/QC_Plots_files/figure-html/unnamed-chunk-17-1.png differ diff --git a/docs/articles/Read_and_Write_Functions.html b/docs/articles/Read_and_Write_Functions.html index 0d65899582..bbc179dd47 100644 --- a/docs/articles/Read_and_Write_Functions.html +++ b/docs/articles/Read_and_Write_Functions.html @@ -33,7 +33,7 @@ scCustomize - 2.0.1 + 2.1.0

    @@ -77,12 +77,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -129,8 +135,8 @@ @@ -77,12 +77,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -130,8 +136,8 @@ @@ -77,12 +77,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -130,8 +136,8 @@

    @@ -270,28 +276,28 @@

    Cells Per Identity -116 +128

    @@ -308,25 +314,25 @@

    Cells Per Identity -115 +127

    @@ -340,28 +346,28 @@

    Cells Per Identity -86 +88

    @@ -375,28 +381,28 @@

    Cells Per Identity -71 +75

    @@ -410,28 +416,28 @@

    Cells Per Identity -46 +44

    @@ -445,28 +451,28 @@

    Cells Per Identity -42 +39

    @@ -480,28 +486,28 @@

    Cells Per Identity -10 +7

    @@ -515,28 +521,28 @@

    Cells Per Identity -6 +2

    @@ -550,22 +556,22 @@

    Cells Per Identity -683 +680

    @@ -642,16 +648,16 @@

    Cells Per Identity -228 +242

    @@ -665,16 +671,16 @@

    Cells Per Identity -249 +246

    @@ -688,16 +694,16 @@

    Cells Per Identity -171 +176

    @@ -711,16 +717,16 @@

    Cells Per Identity -144 +143

    @@ -734,16 +740,16 @@

    Cells Per Identity -85 +81

    @@ -757,16 +763,16 @@

    Cells Per Identity -82 +72

    @@ -780,16 +786,16 @@

    Cells Per Identity -17 +16

    @@ -803,16 +809,16 @@

    Cells Per Identity -11 +5

    @@ -826,13 +832,13 @@

    Cells Per Identity -1333 +1339

    @@ -980,16 +986,16 @@

    Change grouping variable

    @@ -997,16 +1003,16 @@

    Change grouping variable

    @@ -1024,10 +1030,10 @@

    Split within groups

    @@ -1084,52 +1090,52 @@

    Split within groups -8.547009 +7.26257

    @@ -1372,19 +1378,19 @@

    Basic Use -2206.0 +2227

    @@ -1392,19 +1398,19 @@

    Basic Use -2210.5 +2148

    @@ -1412,19 +1418,19 @@

    Basic Use -2220.0 +2214

    @@ -1432,7 +1438,7 @@

    Basic Use -2213.0 +2213

    @@ -1512,22 +1518,22 @@

    Additional Variables -2206.0 +2227

    @@ -1535,22 +1541,22 @@

    Additional Variables -2210.5 +2148

    @@ -1558,22 +1564,22 @@

    Additional Variables -2220.0 +2214

    @@ -1581,7 +1587,7 @@

    Additional Variables -2213.0 +2213

    @@ -1658,19 +1664,19 @@

    Calculate Median Absolute Deviatio sample2

    @@ -1678,19 +1684,19 @@

    Calculate Median Absolute Deviatio sample3

    @@ -1698,19 +1704,19 @@

    Calculate Median Absolute Deviatio sample4

    diff --git a/docs/articles/Statistics_files/figure-html/unnamed-chunk-22-1.png b/docs/articles/Statistics_files/figure-html/unnamed-chunk-22-1.png index 28a33e846b..d69eaeea9a 100644 Binary files a/docs/articles/Statistics_files/figure-html/unnamed-chunk-22-1.png and b/docs/articles/Statistics_files/figure-html/unnamed-chunk-22-1.png differ diff --git a/docs/articles/Update_Gene_Symbols.html b/docs/articles/Update_Gene_Symbols.html new file mode 100644 index 0000000000..65255d1562 --- /dev/null +++ b/docs/articles/Update_Gene_Symbols.html @@ -0,0 +1,626 @@ + + + + + + + +Updating Gene Symbols • scCustomize + + + + + + + + + + + + +
    +
    + + + + +
    +
    +
    + + + + +
    + +
    +

    Upating Human Gene Symbols +

    +

    The official gene symbols used in a dataset can change depending on +the reference version used in aligning that particular dataset. For +human genes the official symbols are set by HGNC.

    +

    In the absence of more static identifier (Ensembl ID or Entrez ID +Numbers) the only way to update gene symbols is to examine the current +and past symbols for all genes in the HGNC database. However, many of +the functions that perform this task come with caveats that vary from +lack of ease of updating to newest HGNC data or at worst potentially +improperly renaming symbols.

    +
    +# Load Packages
    +library(Seurat)
    +library(scCustomize)
    +library(qs)
    +

    Load Seurat Object & Add QC Data

    +
    +# read object
    +pbmc <- pbmc3k.SeuratData::pbmc3k.final
    +pbmc <- UpdateSeuratObject(pbmc)
    +
    +

    Issues with other functions +

    +

    In order to understand how scCustomize’s +Update_HGNC_Symbols() improves process it is important to +be aware of the caveats of some other tools.

    +
    +

    Seurat’s UpdateSymbolList() +

    +

    The first is the Seurat’s UpdateSymbolList() which takes +an input vector of symbols and uses active connection to HGNC to query +for updated symbols. However, there are two caveats with this function +1) it requires user to have internet connection anytime using the +function, 2) it can potentially rename symbols incorrectly.

    +

    To illustrate the second issue I will use 3 gene symbols that have +been current for some time: MCM2, MCM7, CCNL1. However, let’s take a +look at some of the previous symbols for each of these genes:
    +- Previous Symbols for MCM2 are: CCNL1 & CDCL1
    +- Previous symbols for MCM7 are: MCM2
    +- Previous symbols for CCNL1 are: None

    +

    Now see what happens when we use UpdateSymbolList.

    +
    +test_symbols <- c("MCM2", "MCM7", "CCNL1")
    +
    +UpdateSymbolList(symbols = test_symbols)
    +
    ## [1] "MCM7" "MCM7" "MCM2"
    +

    As you can see the functions does the following:
    +- Renames MCM2 > MCM7 because MCM2 is a previous symbol.
    +- Leaves MCM7 the same because no other gene has MCM7 as previous +symbol.
    +- Renames CCNL1 > MCM2 because CCNL1 is previous symbol.

    +

    The reason that this happens is because UpdateSymbolList +queries each symbol in isolation and not in the context of all of the +genes being queried.

    +
    +
    +

    HGNChelper Package +

    +

    After developing this function I was made aware of the HGNChelper +package which also aims to provide symbol updates. It solves renaming +issue in similar fashion to scCustomize (see below). It also provides a +solution for requirement of internet access.

    +

    It does this by storing HGNC dataset as package data so that it comes +bundled with the package. However, there is an issue with the way this +is implemented. First, the bundled data is from 2019 so is approached 5 +years old. Updated data can be downloaded interactively using a package +function but this must be done in every R session where the data is +needed requiring internet access to use current data. The authors do +provide a solution to this but it involves cloning the github repo and +running source scripts which may be beyond many R users.

    +
    +
    +
    +

    Solving the Issue with scCustomize’s +Update_HGNC_Symbols +

    +

    scCustomize now provides the function +Update_HGNC_Symbols to attempt to solve both of these +caveats.

    +
    +

    Requirement of internet access +

    +

    Update_HGNC_Symbols does require internet access the +first time the function is being used to download most recent data from +HGNC. However, it then stores the downloaded data using BiocFileCache +package, meaning subsequent uses don’t require any internet access. +This also significant improves the speed of the function.

    +
    +
    +

    Inappropriate renaming +

    +

    Second, Update_HGNC_Symbols uses the full input list and +first automatically approves any symbol that is already an approved gene +symbol so that there is not a chance of improperly updating any symbols. +It then checks the remaining symbols for any symbol updates.

    +

    Let’s run our test symbol set:

    +
    +results <- Updated_HGNC_Symbols(input_data = test_symbols)
    +
    ## Input features contained 3 gene symbols
    +##  3 were already approved symbols.
    +##  0 were updated to approved symbol.
    +##  0 were not found in HGNC dataset and remain unchanged.
    +
    -186 +167 -165 +186 -165 +172 -26.5007321 +25.2941176 -28.4839204 +25.972006 -25.3846154 +28.2245827 -25.3067485 +26.2195122
    -118 +113 -112 +114 -137 +128 -16.9838946 +18.8235294 -18.0704441 +17.573873 -17.2307692 +17.2989378 -21.0122699 +19.5121951
    -124 +121 -116 +107 -18.3016105 +18.3823529 -17.6110260 +19.751166 -19.0769231 +18.3611533 -17.7914110 +16.3109756
    -85 +84 -85 +88 -88 +84 -12.5915081 +12.9411765 -13.0168453 +13.063764 -13.0769231 +13.3535660 -13.4969325 +12.8048780
    -58 +53 -73 +68 -69 +75 -10.3953148 +11.0294118 -8.8820827 +8.242613 -11.2307692 +10.3186646 -10.5828221 +11.4329268
    -37 +41 -39 +37 40 -6.7349927 +6.4705882 -5.6661562 +6.376361 -6.0000000 +5.6145675 -6.1349693 +6.0975610
    -39 +43 -40 +33 -34 +40 -6.1493411 +5.7352941 -5.9724349 +6.687403 -6.1538462 +5.0075873 -5.2147239 +6.0975610
    12 -7 +9 -3 +4 -1.4641288 +1.0294118 -1.8376723 +1.866252 -1.0769231 +1.3657056 -0.4601227 +0.6097561
    3 -5 +3 -0 +6 -0.8784773 +0.2941176 -0.4594181 +0.466563 -0.7692308 +0.4552352 -0.0000000 +0.9146341
    -653 +643 -650 +659 -652 +656 100.0000000 -100.0000000 +100.000000 100.0000000 @@ -619,16 +625,16 @@

    Cells Per Identity -346 +358

    -351 +339 -25.9564891 +26.736370 -26.8965517 +26.0969977
    -255 +241 -17.1042761 +18.073189 -19.5402299 +18.5527329
    -231 +234 -18.6796699 +18.371919 -17.7011494 +18.0138568
    -173 +168 -12.8282071 +13.144137 -13.2567050 +12.9330254
    -127 +128 -10.8027007 +10.679612 -9.7318008 +9.8537336
    -77 +81 -6.3765941 +6.049290 -5.9003831 +6.2355658
    -73 +83 -6.1515379 +5.377147 -5.5938697 +6.3895304
    -15 +16 -1.2753188 +1.194922 -1.1494253 +1.2317167
    -3 +9 -0.8252063 +0.373413 -0.2298851 +0.6928406
    -1305 +1299 -100.0000000 +100.000000 100.0000000 @@ -962,16 +968,16 @@

    Change grouping variable

    -sample2 +sample3 -sample3 +sample1 sample4 -sample1 +sample2
    -13.32312 +11.07739 -10 +11.76471 -10.58282 +11.43293 -12.88433 +12.597201
    -11.33231 +12.13961 -12 +10.88235 -10.88957 +14.32927 -12.88433 +9.797823
    -Naive.CD4.T_Group.2 +Naive.CD4.T_Group.1 -Naive.CD4.T_Group.1 +Naive.CD4.T_Group.2 Memory.CD4.T_Group.1 @@ -1054,10 +1060,10 @@

    Split within groups -FCGR3A..Mono_Group.1 +FCGR3A..Mono_Group.2

    -FCGR3A..Mono_Group.2 +FCGR3A..Mono_Group.1 NK_Group.1 @@ -1072,10 +1078,10 @@

    Split within groups -Platelet_Group.1 +Platelet_Group.2

    -Platelet_Group.2 +Platelet_Group.1
    -4.913295 +6.19469 -18.85965 +16.115703 -13.333333 +15.767635 -22.489960 +21.951220 -26.406926 +26.923077 -1.754386 +0.5681818 -1.156069 +2.380952 -3.149606 +2.34375 -2.083333 +2.797203 -27.058823 +24.691358 -27.272727 +29.629630 0.000000 -1.369863 +1.204819 -20 +37.50 -47.058824 +31.25 0 @@ -1143,52 +1149,52 @@

    Split within groups -12.250712 +12.29050

    -14.450867 +14.45428 -8.77193 +9.090909 -9.019608 +8.713693 -1.204819 +1.626016 -2.597403 +2.136752 -1.754386 +1.7045455 -3.468208 +3.571429 -47.244095 +50.00000 -53.472222 +51.048951 -4.705882 +4.938272 -2.597403 +2.469136 -9.756098 +8.333333 -6.849315 +8.433735 -0 +6.25 -5.882353 +0.00 0 @@ -1352,19 +1358,19 @@

    Basic Use -2214.0 +2252

    -823.0 +821.0 -2.017654 +2.060924 -36.58009 +36.57870 -38.43844 +38.86638
    -816.0 +815.0 -2.005348 +2.031930 -37.98147 +36.54529 -39.96591 +38.75969
    -816.5 +815.0 -2.012259 +2.011096 -35.89979 +37.08812 -37.77841 +39.08969
    -817.5 +821.5 -1.994846 +1.951115 -37.67410 +37.37397 -39.62994 +39.33379
    819.0 @@ -1489,22 +1495,22 @@

    Additional Variables -2214.0 +2252

    -823.0 +821.0 -2.017654 +2.060924 -36.58009 +36.57870 -38.43844 +38.86638 --0.1017213 +-0.1011296
    -816.0 +815.0 -2.005348 +2.031930 -37.98147 +36.54529 -39.96591 +38.75969 --0.1369502 +-0.2372828
    -816.5 +815.0 -2.012259 +2.011096 -35.89979 +37.08812 -37.77841 +39.08969 --0.0850152 +-0.0895095
    -817.5 +821.5 -1.994846 +1.951115 -37.67410 +37.37397 -39.62994 +39.33379 --0.0392487 +-0.0212715
    819.0 @@ -1638,19 +1644,19 @@

    Calculate Median Absolute Deviatio sample1

    -760.5738 +656.0505 -201.6336 +180.8772 -0.7650783 +0.8373849 -12.29632 +11.36374 -12.14524 +11.36833
    -738.3348 +815.4300 -188.2902 +204.5988 -0.7760709 +0.7878454 -11.56004 +12.56448 -11.42002 +12.47948
    -729.4392 +699.7872 -182.3598 +194.2206 -0.7619156 +0.7821236 -12.58267 +11.90167 -12.36349 +11.87983
    -672.3591 +736.8522 -189.0315 +183.1011 -0.7728938 +0.7324823 -10.64233 +11.31652 -10.78957 +10.95798
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +input_features + +Approved_Symbol + +Not_Found_Symbol + +Updated_Symbol + +Output_Features +
    +MCM2 + +MCM2 + +NA + +NA + +MCM2 +
    +MCM7 + +MCM7 + +NA + +NA + +MCM7 +
    +CCNL1 + +CCNL1 + +NA + +NA + +CCNL1 +
    +

    As mentioned before the function is also very quick. Returning +updated symbols for 36,000 genes in ~1 second.

    +
    +# Read in full 10X reference genome feature list
    +features <- Read10X_h5("assets/Barcode_Rank_Example/sample1/outs/raw_feature_bc_matrix.h5")
    +
    +features <- rownames(features)
    +
    +# Load tictoc to give timing
    +library(tictoc)
    +
    +# Get updated symbols
    +tic()
    +results <- Updated_HGNC_Symbols(input_data = features)
    +
    ## Input features contained 36,601 gene symbols
    +##  23,360 were already approved symbols.
    +##  654 were updated to approved symbol.
    +##  12,587 were not found in HGNC dataset and remain unchanged.
    +
    +toc()
    +
    ## 0.688 sec elapsed
    +
    +
    +

    Examining the Results +

    +

    Now let’s take a look at the output from +Updated_HGNC_Symbols, which also has some detail advtanages +vs other methods.

    +

    For this example I have picked section of the results that contains +all 3 potential results.

    +
    +results[168:177, ]
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + +input_features + +Approved_Symbol + +Not_Found_Symbol + +Updated_Symbol + +Output_Features +
    +168 + +NPHP4 + +NPHP4 + +NA + +NA + +NPHP4 +
    +169 + +KCNAB2 + +KCNAB2 + +NA + +NA + +KCNAB2 +
    +170 + +CHD5 + +CHD5 + +NA + +NA + +CHD5 +
    +171 + +RPL22 + +RPL22 + +NA + +NA + +RPL22 +
    +172 + +AL031847.1 + +NA + +AL031847.1 + +NA + +AL031847.1 +
    +173 + +RNF207 + +RNF207 + +NA + +NA + +RNF207 +
    +174 + +ICMT + +ICMT + +NA + +NA + +ICMT +
    +175 + +LINC00337 + +NA + +NA + +ICMT-DT + +ICMT-DT +
    +176 + +HES3 + +HES3 + +NA + +NA + +HES3 +
    +177 + +GPR153 + +GPR153 + +NA + +NA + +GPR153 +
    +

    As you can see the majority of these symbols are already updated so +the input symbol matches the output symbol.

    +

    In the case of “AL031847.1” that annotation was not found in HGNC and +therefore the symbol was left unchanged.

    +

    Finally in the case of “LINC00337” there was an updated symbol of +“ICMT-DT” so the output symbol was updated to that current symbol.

    +
    +
    + + + + + + + + + +
    + +
    +

    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + + diff --git a/docs/articles/Update_Gene_Symbols_files/kePrint-0.0.1/kePrint.js b/docs/articles/Update_Gene_Symbols_files/kePrint-0.0.1/kePrint.js new file mode 100644 index 0000000000..e6fbbfc44d --- /dev/null +++ b/docs/articles/Update_Gene_Symbols_files/kePrint-0.0.1/kePrint.js @@ -0,0 +1,8 @@ +$(document).ready(function(){ + if (typeof $('[data-toggle="tooltip"]').tooltip === 'function') { + $('[data-toggle="tooltip"]').tooltip(); + } + if ($('[data-toggle="popover"]').popover === 'function') { + $('[data-toggle="popover"]').popover(); + } +}); diff --git a/docs/articles/Update_Gene_Symbols_files/lightable-0.0.1/lightable.css b/docs/articles/Update_Gene_Symbols_files/lightable-0.0.1/lightable.css new file mode 100644 index 0000000000..3be3be9046 --- /dev/null +++ b/docs/articles/Update_Gene_Symbols_files/lightable-0.0.1/lightable.css @@ -0,0 +1,272 @@ +/*! + * lightable v0.0.1 + * Copyright 2020 Hao Zhu + * Licensed under MIT (https://github.com/haozhu233/kableExtra/blob/master/LICENSE) + */ + +.lightable-minimal { + border-collapse: separate; + border-spacing: 16px 1px; + width: 100%; + margin-bottom: 10px; +} + +.lightable-minimal td { + margin-left: 5px; + margin-right: 5px; +} + +.lightable-minimal th { + margin-left: 5px; + margin-right: 5px; +} + +.lightable-minimal thead tr:last-child th { + border-bottom: 2px solid #00000050; + empty-cells: hide; + +} + +.lightable-minimal tbody tr:first-child td { + padding-top: 0.5em; +} + +.lightable-minimal.lightable-hover tbody tr:hover { + background-color: #f5f5f5; +} + +.lightable-minimal.lightable-striped tbody tr:nth-child(even) { + background-color: #f5f5f5; +} + +.lightable-classic { + border-top: 0.16em solid #111111; + border-bottom: 0.16em solid #111111; + width: 100%; + margin-bottom: 10px; + margin: 10px 5px; +} + +.lightable-classic tfoot tr td { + border: 0; +} + +.lightable-classic tfoot tr:first-child td { + border-top: 0.14em solid #111111; +} + +.lightable-classic caption { + color: #222222; +} + +.lightable-classic td { + padding-left: 5px; + padding-right: 5px; + color: #222222; +} + +.lightable-classic th { + padding-left: 5px; + padding-right: 5px; + font-weight: normal; + color: #222222; +} + +.lightable-classic thead tr:last-child th { + border-bottom: 0.10em solid #111111; +} + +.lightable-classic.lightable-hover tbody tr:hover { + background-color: #F9EEC1; +} + +.lightable-classic.lightable-striped tbody tr:nth-child(even) { + background-color: #f5f5f5; +} + +.lightable-classic-2 { + border-top: 3px double #111111; + border-bottom: 3px double #111111; + width: 100%; + margin-bottom: 10px; +} + +.lightable-classic-2 tfoot tr td { + border: 0; +} + +.lightable-classic-2 tfoot tr:first-child td { + border-top: 3px double #111111; +} + +.lightable-classic-2 caption { + color: #222222; +} + +.lightable-classic-2 td { + padding-left: 5px; + padding-right: 5px; + color: #222222; +} + +.lightable-classic-2 th { + padding-left: 5px; + padding-right: 5px; + font-weight: normal; + color: #222222; +} + +.lightable-classic-2 tbody tr:last-child td { + border-bottom: 3px double #111111; +} + +.lightable-classic-2 thead tr:last-child th { + border-bottom: 1px solid #111111; +} + +.lightable-classic-2.lightable-hover tbody tr:hover { + background-color: #F9EEC1; +} + +.lightable-classic-2.lightable-striped tbody tr:nth-child(even) { + background-color: #f5f5f5; +} + +.lightable-material { + min-width: 100%; + white-space: nowrap; + table-layout: fixed; + font-family: Roboto, sans-serif; + border: 1px solid #EEE; + border-collapse: collapse; + margin-bottom: 10px; +} + +.lightable-material tfoot tr td { + border: 0; +} + +.lightable-material tfoot tr:first-child td { + border-top: 1px solid #EEE; +} + +.lightable-material th { + height: 56px; + padding-left: 16px; + padding-right: 16px; +} + +.lightable-material td { + height: 52px; + padding-left: 16px; + padding-right: 16px; + border-top: 1px solid #eeeeee; +} + +.lightable-material.lightable-hover tbody tr:hover { + background-color: #f5f5f5; +} + +.lightable-material.lightable-striped tbody tr:nth-child(even) { + background-color: #f5f5f5; +} + +.lightable-material.lightable-striped tbody td { + border: 0; +} + +.lightable-material.lightable-striped thead tr:last-child th { + border-bottom: 1px solid #ddd; +} + +.lightable-material-dark { + min-width: 100%; + white-space: nowrap; + table-layout: fixed; + font-family: Roboto, sans-serif; + border: 1px solid #FFFFFF12; + border-collapse: collapse; + margin-bottom: 10px; + background-color: #363640; +} + +.lightable-material-dark tfoot tr td { + border: 0; +} + +.lightable-material-dark tfoot tr:first-child td { + border-top: 1px solid #FFFFFF12; +} + +.lightable-material-dark th { + height: 56px; + padding-left: 16px; + padding-right: 16px; + color: #FFFFFF60; +} + +.lightable-material-dark td { + height: 52px; + padding-left: 16px; + padding-right: 16px; + color: #FFFFFF; + border-top: 1px solid #FFFFFF12; +} + +.lightable-material-dark.lightable-hover tbody tr:hover { + background-color: #FFFFFF12; +} + +.lightable-material-dark.lightable-striped tbody tr:nth-child(even) { + background-color: #FFFFFF12; +} + +.lightable-material-dark.lightable-striped tbody td { + border: 0; +} + +.lightable-material-dark.lightable-striped thead tr:last-child th { + border-bottom: 1px solid #FFFFFF12; +} + +.lightable-paper { + width: 100%; + margin-bottom: 10px; + color: #444; +} + +.lightable-paper tfoot tr td { + border: 0; +} + +.lightable-paper tfoot tr:first-child td { + border-top: 1px solid #00000020; +} + +.lightable-paper thead tr:last-child th { + color: #666; + vertical-align: bottom; + border-bottom: 1px solid #00000020; + line-height: 1.15em; + padding: 10px 5px; +} + +.lightable-paper td { + vertical-align: middle; + border-bottom: 1px solid #00000010; + line-height: 1.15em; + padding: 7px 5px; +} + +.lightable-paper.lightable-hover tbody tr:hover { + background-color: #F9EEC1; +} + +.lightable-paper.lightable-striped tbody tr:nth-child(even) { + background-color: #00000008; +} + +.lightable-paper.lightable-striped tbody td { + border: 0; +} + diff --git a/docs/articles/articles/Object_Conversion.html b/docs/articles/articles/Object_Conversion.html new file mode 100644 index 0000000000..2f5038d55a --- /dev/null +++ b/docs/articles/articles/Object_Conversion.html @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/docs/articles/articles/Update_Gene_Symbols.html b/docs/articles/articles/Update_Gene_Symbols.html new file mode 100644 index 0000000000..87aaec5b5e --- /dev/null +++ b/docs/articles/articles/Update_Gene_Symbols.html @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/docs/articles/index.html b/docs/articles/index.html index d921ce591e..f1f85e0619 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -127,6 +133,8 @@

    All vignettes

    Misc Functions
    +
    Object Format Conversion
    +
    Plotting #2: QC Plots
    Read & Write Data Functions
    @@ -135,6 +143,8 @@

    All vignettes

    Statistics Functions
    +
    Updating Gene Symbols
    +
    diff --git a/docs/authors.html b/docs/authors.html index df33730e9c..2db28bb309 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -130,15 +136,15 @@

    Citation

    -

    Marsh S (2023). +

    Marsh S (2024). scCustomize: Custom Visualizations & Functions for Streamlined Analyses of Single Cell Sequencing. -R package version 2.0.1, https://samuel-marsh.github.io/scCustomize/, https://doi.org/10.5281/zenodo.5706431, https://github.com/samuel-marsh/scCustomize. +R package version 2.1.0, https://samuel-marsh.github.io/scCustomize/, https://doi.org/10.5281/zenodo.5706431, https://github.com/samuel-marsh/scCustomize.

    @Manual{,
       title = {scCustomize: Custom Visualizations & Functions for Streamlined Analyses of Single Cell Sequencing},
       author = {Samuel Marsh},
    -  year = {2023},
    -  note = {R package version 2.0.1, https://samuel-marsh.github.io/scCustomize/, https://doi.org/10.5281/zenodo.5706431},
    +  year = {2024},
    +  note = {R package version 2.1.0, https://samuel-marsh.github.io/scCustomize/, https://doi.org/10.5281/zenodo.5706431},
       url = {https://github.com/samuel-marsh/scCustomize},
     }
    diff --git a/docs/index.html b/docs/index.html index 8b223bb414..fc2ed838db 100644 --- a/docs/index.html +++ b/docs/index.html @@ -33,7 +33,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -77,12 +77,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -182,8 +188,8 @@

    Goals/About scCustomize
  • -

    Example of adding new parameters: Adding the percentage of counts aligning to mitochondrial (and/or ribosomal) genes is common early step in analysis. scCustomize provides Add_Mito_Ribo_Seurat() (and LIGER version) to simplify this. Basic use requires only one line of code and two parameters.

    -
    Add_Mito_Ribo_Seurat(seurat_object = obj_name, species = "Human") 
    +

    Example of adding new parameters: Adding the percentage of counts aligning to mitochondrial (and/or ribosomal) genes is common early step in analysis. scCustomize provides Add_Mito_Ribo() to simplify this. Basic use requires only one line of code and two parameters.

    +
    Add_Mito_Ribo(object = obj_name, species = "Human") 
    • Function already knows the defaults for Human, Mouse, Rat, Zebrafish, Drosophila, Marmoset, and Rhesus Macaque (submit a PR if you would like more species added!).
    @@ -223,7 +229,7 @@

    Goals/About scCustomizescCustomize provides checks/warnings, using the cli/rlang packages, wrapped inside its functions to help and provide more informative error/warning messages. Two examples include:

  • -Add_Mito_Ribo_Seurat() will warn you if no mitochondrial or ribosomal features are found and won’t create new metadata column.
    +Add_Mito_Ribo() will warn you if no mitochondrial or ribosomal features are found and won’t create new metadata column.
  • Rename_Clusters() will check and make sure the right number of unique new names are provided and provide one of two error messages if not before attempting to rename the object idents.
  • diff --git a/docs/news/index.html b/docs/news/index.html index 2d41ecbcf0..921251a26a 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -104,6 +110,62 @@

    Changelog

    Source: NEWS.md +
    + +
    +

    Added

    +
    • Added title_prefix parameter to Iterate_DimPlot_bySample to unify with Meta_Highlight_Plot.
    • +
    • Added function Split_Vector to split vector in chunks of predetermined sizes.
    • +
    • Added new function Updated_HGNC_Symbols to update human gene symbols. After first use does not require internet connection.
    • +
    • Added command logging to QC metric-related commands using Seurat::LogSeuratCommand().
    • +
    • Added parameter plot_legend to plotFactors_scCustom to allow for removal to legend in factor loading plots.
    • +
    • Added new functionality to Iterate_FeaturePlot_scCustom to allow for plotting multiple plots per page when saving to single PDF document (see new parameters: features_per_page and landscape.
    • +
    • Added LIGER_Features utility function for LIGER objects (analogous to Seurat::Features()).
    • +
    • Added new generic as.LIGER() as enhanced method for conversion of Seurat objects or lists of Seurat objects to single LIGER object.
    • +
    • Added new generic as.anndata() to support conversion of Seurat and LIGER objects to anndata format.
    • +
    • Added function Convert_Assay() to perform easy conversions of Seurat assays between V3/4 (Assay) and V5 (Assay5) formats.
    • +
    • Added parameter assay_type to allow manual control of assay type when creating Seurat object from LIGER using Liger_to_Seurat.
    • +
    • Added param grid_color to Clustered_DotPlot to control the color of grid lines (default is no grid lines).
    • +
    • Added ability to split identities in Clustered_DotPlot by additional variable and maintain expression information.
    • +
    • Added Split_Layers() function for V5 objects.
    • +
    • Added Add_Alt_Feature_ID to add alternative feature ids to an Assay5 meta.data slot.
    • +
    +
    +

    Changed

    +
    • +BREAKING CHANGES Several methods have been moved to S3 generics to function for both Seurat and LIGER objects using single function name and therefore some function names have changed. Old functions will give deprecation warning and direct users to new functions. +
    • +
    • +BREAKING CHANGES Meta_Present_LIGER has been deprecated and wrapped inside of Meta_Present.
    • +
    • +SOFT-DEPRECATION The function Liger_to_Seurat() has been soft-deprecated. It is replaced by new extension of Seurat generic as.Seurat with added support for Liger objects, using all the same parameters as Liger_to_Seurat. Full deprecation will occur in v2.2.0.
    • +
    • +SOFT-DEPRECATION The function Gene_Present has been soft-deprecated. It is replaced by Feature_Present which functions identically but better reflects that features present may also be proteins. Full deprecation will occur in v2.2.0.
    • +
    • Parameter legend in Iterate_DimPlot_bySample has been inverted to no_legend to match Meta_Highlight_Plot parameters.
    • +
    • Updated Liger_to_Seurat() for compatibility with Seurat V5 structure (#153). Now part of as.Seurat.
    • +
    • Default color palette change from v2.0.0 when number of groups is between 3-8 has been reverted. Polychrome palette is default when number of groups is between 3-36.
    • +
    +
    +

    Fixes

    +
    • General typo and style fixes.
    • +
    • Fixed point size check in some QC functions to avoid unnecessary error message.
    • +
    • Fixed redundant warning messages in Stacked_VlnPlot due to rasterization defaults.
    • +
    • Fixed issue setting alpha_na_exp appropriately in FeaturePlot_scCustom.
    • +
    • Fixed issue setting alpha_exp between Seurat versions 4 and 5 (#144).
    • +
    • Fix duplicate legends in DimPlot_scCustom when levels are missing from a split plot.
    • +
    • Fixed bug in FeaturePlot_scCustom that could cause plots to be mislabeled when using split.by and depending on the order of features provided (#150).
    • +
    • Fixes issue with automatic point size calculation for Seurat Objects.
    • +
    • Added check for presence of dimensionality reduction in DimPlot_LIGER (#153).
    • +
    • Fixed bug in Add_Mito_Ribo_LIGER that caused it to return value of 0 for all cells (Now part of renamed Add_Mito_Ribo S3 generic).
    • +
    • Fixed legend display is Clustered_DotPlot to display percentage instead of proportion to match legend text.
    • +
    • Fixed Percent_Expressing error when group_by = "ident".
    • +
    • Fixed error that caused features in non-default assays to be returned as not found when attempting to plot.
    • +
    +
    @@ -329,7 +391,7 @@

    Added

    • Added mito_name parameter to QC_Plots_Mito to allow for custom specification of meta data column name that contains mitochondrial information.
    • Added QC_Plots_Combined_Vln() function to return patchwork layout of 3 QC plots.
    • -
    • Added Rhesus Macaque (macaca mulatta) to the accepted species list for Add_Mito_Ribo_Seurat() and Add_Mito_Ribo_LIGER() (#28).
    • +
    • Added Rhesus Macaque (macaca mulatta) to the accepted species list for Add_Mito_Ribo_Seurat() and Add_Mito_Ribo_LIGER() (#28).
    • Added alpha_exp and alpha_na_exp parameters to FeaturePlot_scCustom to allow for control of color scale transparency (#21).
    • *_Highlight_Plot functions can now plot multiple variables simultaneously using either one color for all variables or one color per variable (#34).
    • @@ -344,7 +406,7 @@

      Added#60).
    • Added Add_Sample_Meta function for addition of sample-level meta data to cell-level @meta.data slot of Seurat objects.
    • -
    • Added a matrix check in Read_GEO_Delim to check for issues with imported matrices. Check is modified version of SeuratObject::CheckMatrix called CheckMatrix_scCustom(). Will warn if infinite, logical, non-integer (whole), or NA/NaN values are detected in input matrix.
    • +
    • Added a matrix check in Read_GEO_Delim to check for issues with imported matrices. Check is modified version of SeuratObject::CheckMatrix called CheckMatrix_scCustom(). Will warn if infinite, logical, non-integer (whole), or NA/NaN values are detected in input matrix.
    • QC_Plot_UMIvsGene will now returned filtered correlation value that takes into account meta_gradient_name if provided in addition to nFeature_RNA and nCount_RNA.
    • Added new function Variable_Features_ALL_LIGER which allows for detection/selection of variable genes from entire LIGER object instead of iterating by dataset.
    • diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 71f235eb40..a9f7334034 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -12,11 +12,13 @@ articles: LIGER_Functions: LIGER_Functions.html Markers_and_Cluster_Annotation: Markers_and_Cluster_Annotation.html Misc_Functions: Misc_Functions.html + Object_Conversion: Object_Conversion.html QC_Plots: QC_Plots.html Read_and_Write_Functions: Read_and_Write_Functions.html Sequencing_QC_Plots: Sequencing_QC_Plots.html Statistics: Statistics.html -last_built: 2023-11-20T21:27Z + Update_Gene_Symbols: Update_Gene_Symbols.html +last_built: 2024-02-16T19:32Z urls: reference: https://samuel-marsh.github.io/scCustomize/reference article: https://samuel-marsh.github.io/scCustomize/articles diff --git a/docs/reference/Add_Alt_Feature_ID.html b/docs/reference/Add_Alt_Feature_ID.html new file mode 100644 index 0000000000..73f115cc3f --- /dev/null +++ b/docs/reference/Add_Alt_Feature_ID.html @@ -0,0 +1,195 @@ + +Add Alternative Feature IDs — Add_Alt_Feature_ID • scCustomize + + +
      +
      + + + +
      +
      + + +
      +

      Add alternative feature ids to the assay level meta.data slot in Assay5 compatible object (Seurat V5.0.0 or greater)

      +
      + +
      +
      Add_Alt_Feature_ID(
      +  seurat_object,
      +  features_tsv_file = NULL,
      +  hdf5_file = NULL,
      +  assay = NULL
      +)
      +
      + +
      +

      Arguments

      +
      seurat_object
      +

      object name.

      + + +
      features_tsv_file
      +

      output file from Cell Ranger used for creation of Seurat object. +(Either provide this of hdf5_file)

      + + +
      hdf5_file
      +

      output file from Cell Ranger used for creation of Seurat object. +(Either provide this of features_tsv_file)

      + + +
      assay
      +

      name of assay(s) to add the alternative features to. Can specify "all" +to add to all assays.

      + +
      +
      +

      Value

      + + +

      Seurat Object with new entries in the obj@assays$ASSAY@meta.data slot.

      +
      + +
      +

      Examples

      +
      if (FALSE) {
      +# Using features.tsv.gz file
      +   # Either file from filtered or raw outputs can be used as they are identical.
      +obj <- Add_Alt_Feature_ID(seurat_object = obj,
      +features_tsv = "sample01/outs/filtered_feature_bc_matrix/features.tsv.gz", assay = "RNA")
      +
      +#' # Using hdf5 file
      +   # Either filtered_feature_bc or raw_feature_bc can be used as the features slot is identical
      +   # Though it is faster to load filtered_feature_bc file due to droplet filtering
      +obj <- Add_Alt_Feature_ID(seurat_object = obj,
      +hdf5_file = "sample01/outs/outs/filtered_feature_bc_matrix.h5", assay = "RNA")
      +}
      +
      +
      +
      +
      + +
      + + +
      + +
      +

      Site built with pkgdown 2.0.7.

      +
      + +
      + + + + + + + + diff --git a/docs/reference/Add_CellBender_Diff.html b/docs/reference/Add_CellBender_Diff.html index 9b68cb0332..114bad0fea 100644 --- a/docs/reference/Add_CellBender_Diff.html +++ b/docs/reference/Add_CellBender_Diff.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0

    @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Add_Cell_Complexity.html b/docs/reference/Add_Cell_Complexity.html new file mode 100644 index 0000000000..017188ca23 --- /dev/null +++ b/docs/reference/Add_Cell_Complexity.html @@ -0,0 +1,206 @@ + +Add Cell Complexity — Add_Cell_Complexity • scCustomize + + +
    +
    + + + +
    +
    + + +
    +

    Add measure of cell complexity/novelty (log10GenesPerUMI) for data QC.

    +
    + +
    +
    Add_Cell_Complexity(object, ...)
    +
    +# S3 method for liger
    +Add_Cell_Complexity(
    +  object,
    +  meta_col_name = "log10GenesPerUMI",
    +  overwrite = FALSE,
    +  ...
    +)
    +
    +# S3 method for Seurat
    +Add_Cell_Complexity(
    +  object,
    +  meta_col_name = "log10GenesPerUMI",
    +  assay = "RNA",
    +  overwrite = FALSE,
    +  ...
    +)
    +
    + +
    +

    Arguments

    +
    object
    +

    Seurat or LIGER object

    + + +
    ...
    +

    Arguments passed to other methods

    + + +
    meta_col_name
    +

    name to use for new meta data column. Default is "log10GenesPerUMI".

    + + +
    overwrite
    +

    Logical. Whether to overwrite existing an meta.data column. Default is FALSE meaning that +function will abort if column with name provided to meta_col_name is present in meta.data slot.

    + + +
    assay
    +

    assay to use in calculation. Default is "RNA". Note This should only be changed if +storing corrected and uncorrected assays in same object (e.g. outputs of both Cell Ranger and Cell Bender).

    + +
    +
    +

    Value

    + + +

    An object of the same class as object with columns added to object meta data.

    +
    + +
    +

    Examples

    +
    if (FALSE) {
    +# Liger
    +liger_object <- Add_Cell_Complexity(object = liger_object)
    +}
    +
    +# Seurat
    +library(Seurat)
    +pbmc_small <- Add_Cell_Complexity(object = pbmc_small)
    +
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/Add_Cell_QC_Metrics.html b/docs/reference/Add_Cell_QC_Metrics.html index 17475dc3ac..166f40e12d 100644 --- a/docs/reference/Add_Cell_QC_Metrics.html +++ b/docs/reference/Add_Cell_QC_Metrics.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0
    @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Add_Mito_Ribo.html b/docs/reference/Add_Mito_Ribo.html new file mode 100644 index 0000000000..7762012235 --- /dev/null +++ b/docs/reference/Add_Mito_Ribo.html @@ -0,0 +1,275 @@ + +Add Mito and Ribo percentages — Add_Mito_Ribo • scCustomize + + +
    +
    + + + +
    +
    + + +
    +

    Add Mito, Ribo, & Mito+Ribo percentages to meta.data slot of Seurat Object or +cell.data slot of Liger object

    +
    + +
    +
    Add_Mito_Ribo(object, ...)
    +
    +# S3 method for liger
    +Add_Mito_Ribo(
    +  object,
    +  species,
    +  mito_name = "percent_mito",
    +  ribo_name = "percent_ribo",
    +  mito_ribo_name = "percent_mito_ribo",
    +  mito_pattern = NULL,
    +  ribo_pattern = NULL,
    +  mito_features = NULL,
    +  ribo_features = NULL,
    +  ensembl_ids = FALSE,
    +  overwrite = FALSE,
    +  list_species_names = FALSE,
    +  ...
    +)
    +
    +# S3 method for Seurat
    +Add_Mito_Ribo(
    +  object,
    +  species,
    +  mito_name = "percent_mito",
    +  ribo_name = "percent_ribo",
    +  mito_ribo_name = "percent_mito_ribo",
    +  mito_pattern = NULL,
    +  ribo_pattern = NULL,
    +  mito_features = NULL,
    +  ribo_features = NULL,
    +  ensembl_ids = FALSE,
    +  assay = NULL,
    +  overwrite = FALSE,
    +  list_species_names = FALSE,
    +  ...
    +)
    +
    + +
    +

    Arguments

    +
    object
    +

    Seurat or LIGER object

    + + +
    ...
    +

    Arguments passed to other methods

    + + +
    species
    +

    Species of origin for given Seurat Object. If mouse, human, marmoset, zebrafish, rat, +drosophila, or rhesus macaque (name or abbreviation) are provided the function will automatically +generate mito_pattern and ribo_pattern values.

    + + +
    mito_name
    +

    name to use for the new meta.data column containing percent mitochondrial counts. +Default is "percent_mito".

    + + +
    ribo_name
    +

    name to use for the new meta.data column containing percent ribosomal counts. +Default is "percent_ribo".

    + + +
    mito_ribo_name
    +

    name to use for the new meta.data column containing percent +mitochondrial+ribosomal counts. Default is "percent_mito_ribo".

    + + +
    mito_pattern
    +

    A regex pattern to match features against for mitochondrial genes (will set automatically if +species is mouse or human; marmoset features list saved separately).

    + + +
    ribo_pattern
    +

    A regex pattern to match features against for ribosomal genes +(will set automatically if species is mouse, human, or marmoset).

    + + +
    mito_features
    +

    A list of mitochondrial gene names to be used instead of using regex pattern. +Will override regex pattern if both are present (including default saved regex patterns).

    + + +
    ribo_features
    +

    A list of ribosomal gene names to be used instead of using regex pattern. +Will override regex pattern if both are present (including default saved regex patterns).

    + + +
    ensembl_ids
    +

    logical, whether feature names in the object are gene names or +ensembl IDs (default is FALSE; set TRUE if feature names are ensembl IDs).

    + + +
    overwrite
    +

    Logical. Whether to overwrite existing meta.data columns. Default is FALSE meaning that +function will abort if columns with any one of the names provided to mito_name ribo_name or +mito_ribo_name is present in meta.data slot.

    + + +
    list_species_names
    +

    returns list of all accepted values to use for default species names which +contain internal regex/feature lists (human, mouse, marmoset, zebrafish, rat, drosophila, and +rhesus macaque). Default is FALSE.

    + + +
    assay
    +

    Assay to use (default is the current object default assay).

    + +
    +
    +

    Value

    + + +

    An object of the same class as object with columns added to object meta data.

    +
    + +
    +

    Examples

    +
    if (FALSE) {
    +# Liger
    +liger_object <- Add_Mito_Ribo(object = liger_object, species = "human")
    +}
    +
    +if (FALSE) {
    +# Seurat
    +seurat_object <- Add_Mito_Ribo(object = seurat_object, species = "human")
    +}
    +
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/Add_Pct_Diff.html b/docs/reference/Add_Pct_Diff.html index 8e5bc55727..1234a15ce7 100644 --- a/docs/reference/Add_Pct_Diff.html +++ b/docs/reference/Add_Pct_Diff.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Add_Sample_Meta.html b/docs/reference/Add_Sample_Meta.html index fd0b329927..09237ffbf9 100644 --- a/docs/reference/Add_Sample_Meta.html +++ b/docs/reference/Add_Sample_Meta.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Add_Top_Gene_Pct_Seurat.html b/docs/reference/Add_Top_Gene_Pct_Seurat.html index 80717eb52c..af1cbcc634 100644 --- a/docs/reference/Add_Top_Gene_Pct_Seurat.html +++ b/docs/reference/Add_Top_Gene_Pct_Seurat.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Barcode_Plot.html b/docs/reference/Barcode_Plot.html index 1c3b152dbc..682ba03562 100644 --- a/docs/reference/Barcode_Plot.html +++ b/docs/reference/Barcode_Plot.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Blank_Theme-1.png b/docs/reference/Blank_Theme-1.png index 7967cc21ba..7901ae927c 100644 Binary files a/docs/reference/Blank_Theme-1.png and b/docs/reference/Blank_Theme-1.png differ diff --git a/docs/reference/Blank_Theme.html b/docs/reference/Blank_Theme.html index 8aa315537c..36d5374e53 100644 --- a/docs/reference/Blank_Theme.html +++ b/docs/reference/Blank_Theme.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Case_Check.html b/docs/reference/Case_Check.html index 89f7c252a0..e2a97b61aa 100644 --- a/docs/reference/Case_Check.html +++ b/docs/reference/Case_Check.html @@ -20,7 +20,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -61,12 +61,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/CellBender_Diff_Plot.html b/docs/reference/CellBender_Diff_Plot.html index d5ef495fa5..f4607ad751 100644 --- a/docs/reference/CellBender_Diff_Plot.html +++ b/docs/reference/CellBender_Diff_Plot.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -200,7 +206,7 @@

    Arguments

    max.overlaps
    -

    passed to geom_text_repel, exclude text labels that +

    passed to geom_text_repel, exclude text labels that overlap too many things. Defaults to 100.

    @@ -213,19 +219,19 @@

    Arguments

    label_size
    -

    text size for feature labels (passed to geom_text_repel).

    +

    text size for feature labels (passed to geom_text_repel).

    bg.color
    -

    color to use for shadow/outline of text labels (passed to geom_text_repel) (Default is white).

    +

    color to use for shadow/outline of text labels (passed to geom_text_repel) (Default is white).

    bg.r
    -

    radius to use for shadow/outline of text labels (passed to geom_text_repel) (Default is 0.15).

    +

    radius to use for shadow/outline of text labels (passed to geom_text_repel) (Default is 0.15).

    ...
    -

    Extra parameters passed to geom_text_repel through +

    Extra parameters passed to geom_text_repel through LabelPoints.

    diff --git a/docs/reference/CellBender_Feature_Diff.html b/docs/reference/CellBender_Feature_Diff.html index 7dc98ba670..72840d2ba9 100644 --- a/docs/reference/CellBender_Feature_Diff.html +++ b/docs/reference/CellBender_Feature_Diff.html @@ -18,7 +18,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -59,12 +59,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Cell_Highlight_Plot.html b/docs/reference/Cell_Highlight_Plot.html index 224ecf178d..740a4a4800 100644 --- a/docs/reference/Cell_Highlight_Plot.html +++ b/docs/reference/Cell_Highlight_Plot.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -204,8 +210,8 @@

    Examples

    library(Seurat)
     
     # Creating example non-overlapping vectors of cells
    -MS4A1 <- WhichCells(object = pbmc_small, expression = MS4A1 > 4)
    -GZMB <- WhichCells(object = pbmc_small, expression = GZMB > 4)
    +MS4A1 <- WhichCells(object = pbmc_small, expression = MS4A1 > 4)
    +GZMB <- WhichCells(object = pbmc_small, expression = GZMB > 4)
     
     # Format as named list
     cells <- list("MS4A1" = MS4A1,
    diff --git a/docs/reference/Change_Delim_All.html b/docs/reference/Change_Delim_All.html
    index 1d2d76cb18..0c9897165c 100644
    --- a/docs/reference/Change_Delim_All.html
    +++ b/docs/reference/Change_Delim_All.html
    @@ -17,7 +17,7 @@
           
           
             scCustomize
    -        2.0.1
    +        2.1.0
           
         
    @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Change_Delim_Prefix.html b/docs/reference/Change_Delim_Prefix.html index 3bfdd0ae71..a19025258e 100644 --- a/docs/reference/Change_Delim_Prefix.html +++ b/docs/reference/Change_Delim_Prefix.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Change_Delim_Suffix.html b/docs/reference/Change_Delim_Suffix.html index 2bb6ed6a4a..5e3457427a 100644 --- a/docs/reference/Change_Delim_Suffix.html +++ b/docs/reference/Change_Delim_Suffix.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/CheckMatrix_scCustom.html b/docs/reference/CheckMatrix_scCustom.html index d701f6a619..3226b50550 100644 --- a/docs/reference/CheckMatrix_scCustom.html +++ b/docs/reference/CheckMatrix_scCustom.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -142,7 +148,7 @@

    Value

    References

    -

    Re-implementing CheckMatrix only for sparse matrices with modified warning messages. Original function from SeuratObject https://github.com/mojaveazure/seurat-object/blob/9c0eda946e162d8595696e5280a6ecda6284db39/R/utils.R#L625-L650 (License: MIT).

    +

    Re-implementing CheckMatrix only for sparse matrices with modified warning messages. Original function from SeuratObject https://github.com/satijalab/seurat-object/blob/9c0eda946e162d8595696e5280a6ecda6284db39/R/utils.R#L625-L650 (License: MIT).

    diff --git a/docs/reference/Cluster_Highlight_Plot.html b/docs/reference/Cluster_Highlight_Plot.html index 44d4a68609..47f7f217e4 100644 --- a/docs/reference/Cluster_Highlight_Plot.html +++ b/docs/reference/Cluster_Highlight_Plot.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0
    @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Cluster_Stats_All_Samples.html b/docs/reference/Cluster_Stats_All_Samples.html index 8844ee97b2..deb90459e7 100644 --- a/docs/reference/Cluster_Stats_All_Samples.html +++ b/docs/reference/Cluster_Stats_All_Samples.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Clustered_DotPlot-1.png b/docs/reference/Clustered_DotPlot-1.png index 2a18fdec99..e850ddaa62 100644 Binary files a/docs/reference/Clustered_DotPlot-1.png and b/docs/reference/Clustered_DotPlot-1.png differ diff --git a/docs/reference/Clustered_DotPlot-3.png b/docs/reference/Clustered_DotPlot-3.png index 2a18fdec99..e850ddaa62 100644 Binary files a/docs/reference/Clustered_DotPlot-3.png and b/docs/reference/Clustered_DotPlot-3.png differ diff --git a/docs/reference/Clustered_DotPlot.html b/docs/reference/Clustered_DotPlot.html index c06c1fe126..e44517bd27 100644 --- a/docs/reference/Clustered_DotPlot.html +++ b/docs/reference/Clustered_DotPlot.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -113,21 +119,23 @@

    Clustered DotPlot

    Clustered_DotPlot(
       seurat_object,
       features,
    +  split.by = NULL,
       colors_use_exp = viridis_plasma_dark_high,
       exp_color_min = -2,
       exp_color_middle = NULL,
       exp_color_max = 2,
    +  exp_value_type = "scaled",
       print_exp_quantiles = FALSE,
       colors_use_idents = NULL,
       x_lab_rotate = TRUE,
    +  plot_padding = NULL,
       flip = FALSE,
       k = 1,
       feature_km_repeats = 1000,
       ident_km_repeats = 1000,
    -  row_km_repeats = deprecated(),
    -  column_km_repeats = deprecated(),
       row_label_size = 8,
       row_label_fontface = "plain",
    +  grid_color = NULL,
       cluster_feature = TRUE,
       cluster_ident = TRUE,
       column_label_size = 8,
    @@ -156,6 +164,10 @@ 

    Arguments

    Features to plot.

    +
    split.by
    +

    Variable in @meta.data to split the identities plotted by.

    + +
    colors_use_exp

    Color palette to use for plotting expression scale. Default is viridis::plasma(n = 20, direction = -1).

    @@ -175,6 +187,11 @@

    Arguments

    Default is 2.

    +
    exp_value_type
    +

    Whether to plot average normalized expression or +scaled average normalized expression. Only valid when split.by is provided.

    + +
    print_exp_quantiles

    Whether to print the quantiles of expression data in addition to plots. Default is FALSE. NOTE: These values will be altered by choices of exp_color_min and exp_color_min @@ -192,6 +209,14 @@

    Arguments

    If set FALSE rotation is set to 0 degrees. Users can also supply custom angle for text rotation.

    +
    plot_padding
    +

    if plot needs extra white space padding so no plot or labels are cutoff. +The parameter accepts TRUE or numeric vector of length 4. If TRUE padding will be set to +c(2, 10, 0 0) (bottom, left, top, right). Can also be customized further with numeric +vector of length 4 specifying the amount of padding in millimeters. +Default is NULL, no padding.

    + +
    flip

    logical, whether to flip the axes of final plot. Default is FALSE; rows = features and columns = idents.

    @@ -215,14 +240,6 @@

    Arguments

    feature_km_repeats. Default is 1000.

    -
    row_km_repeats
    -

    [Deprecated] soft-deprecated. See feature_km_repeats

    - - -
    column_km_repeats
    -

    [Deprecated] soft-deprecated. See ident_km_repeats

    - -
    row_label_size

    Size of the feature labels. Provided to row_names_gp in Heatmap call.

    @@ -231,6 +248,10 @@

    Arguments

    Fontface to use for row labels. Provided to row_names_gp in Heatmap call.

    +
    grid_color
    +

    color to use for heatmap grid. Default is NULL which "removes" grid by using NA color.

    + +
    cluster_feature

    logical, whether to cluster and reorder feature axis. Default is TRUE.

    diff --git a/docs/reference/ColorBlind_Pal.html b/docs/reference/ColorBlind_Pal.html index 86355ddf3b..977cd4d030 100644 --- a/docs/reference/ColorBlind_Pal.html +++ b/docs/reference/ColorBlind_Pal.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0
    @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Convert_Assay.html b/docs/reference/Convert_Assay.html new file mode 100644 index 0000000000..676ee434f3 --- /dev/null +++ b/docs/reference/Convert_Assay.html @@ -0,0 +1,176 @@ + +Convert between Seurat Assay types — Convert_Assay • scCustomize + + +
    +
    + + + +
    +
    + + +
    +

    Will convert assays within a Seurat object between "Assay" and "Assay5" types.

    +
    + +
    +
    Convert_Assay(seurat_object, assay = NULL, convert_to)
    +
    + +
    +

    Arguments

    +
    seurat_object
    +

    Seurat object name.

    + + +
    assay
    +

    name(s) of assays to convert. Default is NULL and will check with users +which assays they want to convert.

    + + +
    convert_to
    +

    value of what assay type to convert current assays to. +#'

    • Accepted values for V3/4 are: "Assay", "assay", "V3", or "v3".

    • +
    • Accepted values for V5 are: "Assay5", "assay5", "V5", or "v5".

    • +
    + +
    + +
    +

    Examples

    +
    if (FALSE) {
    +# Convert to V3/4 assay
    +obj <- Convert_Assay(seurat_object = obj, convert_to = "V3")
    +
    +# Convert to 5 assay
    +obj <- Convert_Assay(seurat_object = obj, convert_to = "V5")
    +}
    +
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/Copy_From_GCP.html b/docs/reference/Copy_From_GCP.html index f07739f045..b44dd6a373 100644 --- a/docs/reference/Copy_From_GCP.html +++ b/docs/reference/Copy_From_GCP.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Copy_To_GCP.html b/docs/reference/Copy_To_GCP.html index 40c1e95c54..a164180168 100644 --- a/docs/reference/Copy_To_GCP.html +++ b/docs/reference/Copy_To_GCP.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Create_10X_H5.html b/docs/reference/Create_10X_H5.html index 632c7f9ab2..928b5519bd 100644 --- a/docs/reference/Create_10X_H5.html +++ b/docs/reference/Create_10X_H5.html @@ -18,7 +18,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -59,12 +59,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Create_CellBender_Merged_Seurat.html b/docs/reference/Create_CellBender_Merged_Seurat.html index 13a8cda3da..6a72d60369 100644 --- a/docs/reference/Create_CellBender_Merged_Seurat.html +++ b/docs/reference/Create_CellBender_Merged_Seurat.html @@ -18,7 +18,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -59,12 +59,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -137,17 +143,17 @@

    Arguments

    min_cells
    -

    value to supply to min.cells parameter of CreateSeuratObject. +

    value to supply to min.cells parameter of CreateSeuratObject. Default is 5.

    min_features
    -

    value to supply to min.features parameter of CreateSeuratObject. +

    value to supply to min.features parameter of CreateSeuratObject. Default is 200.

    ...
    -

    Extra parameters passed to CreateSeuratObject.

    +

    Extra parameters passed to CreateSeuratObject.

    diff --git a/docs/reference/Create_Cluster_Annotation_File.html b/docs/reference/Create_Cluster_Annotation_File.html index 85d370b095..82daf08a55 100644 --- a/docs/reference/Create_Cluster_Annotation_File.html +++ b/docs/reference/Create_Cluster_Annotation_File.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0
    @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Dark2_Pal.html b/docs/reference/Dark2_Pal.html index 6b1ee2a307..ffc3061010 100644 --- a/docs/reference/Dark2_Pal.html +++ b/docs/reference/Dark2_Pal.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/DimPlot_All_Samples.html b/docs/reference/DimPlot_All_Samples.html index f7bb4d7d9d..c0077047aa 100644 --- a/docs/reference/DimPlot_All_Samples.html +++ b/docs/reference/DimPlot_All_Samples.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/DimPlot_LIGER.html b/docs/reference/DimPlot_LIGER.html index 5f4339152d..09c2573146 100644 --- a/docs/reference/DimPlot_LIGER.html +++ b/docs/reference/DimPlot_LIGER.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/DimPlot_scCustom-1.png b/docs/reference/DimPlot_scCustom-1.png index d62e83751f..68ac4ca57e 100644 Binary files a/docs/reference/DimPlot_scCustom-1.png and b/docs/reference/DimPlot_scCustom-1.png differ diff --git a/docs/reference/DimPlot_scCustom.html b/docs/reference/DimPlot_scCustom.html index 4008cabb6c..b00a40c09e 100644 --- a/docs/reference/DimPlot_scCustom.html +++ b/docs/reference/DimPlot_scCustom.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/DiscretePalette_scCustomize.html b/docs/reference/DiscretePalette_scCustomize.html index 0214bca090..bcf2d0cfa5 100644 --- a/docs/reference/DiscretePalette_scCustomize.html +++ b/docs/reference/DiscretePalette_scCustomize.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/DotPlot_scCustom.html b/docs/reference/DotPlot_scCustom.html index 0da74a1c80..4ee6ab501e 100644 --- a/docs/reference/DotPlot_scCustom.html +++ b/docs/reference/DotPlot_scCustom.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Extract_Modality.html b/docs/reference/Extract_Modality.html index b84dd1db6c..8705066694 100644 --- a/docs/reference/Extract_Modality.html +++ b/docs/reference/Extract_Modality.html @@ -18,7 +18,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -59,12 +59,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Extract_Sample_Meta.html b/docs/reference/Extract_Sample_Meta.html index 411a75feae..ae98610e0f 100644 --- a/docs/reference/Extract_Sample_Meta.html +++ b/docs/reference/Extract_Sample_Meta.html @@ -18,7 +18,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -59,12 +59,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Extract_Top_Markers.html b/docs/reference/Extract_Top_Markers.html index 2c50b227f4..5e02425cab 100644 --- a/docs/reference/Extract_Top_Markers.html +++ b/docs/reference/Extract_Top_Markers.html @@ -18,7 +18,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -59,12 +59,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/FeaturePlot_DualAssay.html b/docs/reference/FeaturePlot_DualAssay.html index d1bdbe82f4..610d2ae89c 100644 --- a/docs/reference/FeaturePlot_DualAssay.html +++ b/docs/reference/FeaturePlot_DualAssay.html @@ -18,7 +18,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -59,12 +59,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -126,7 +132,7 @@

    Customize FeaturePlot of two assays

    na_cutoff = 1e-09, raster = NULL, raster.dpi = c(512, 512), - slot = deprecated(), + slot = deprecated(), layer = "data", num_columns = NULL, alpha_exp = NULL, diff --git a/docs/reference/FeaturePlot_scCustom.html b/docs/reference/FeaturePlot_scCustom.html index b503ddb3db..14ed4265e1 100644 --- a/docs/reference/FeaturePlot_scCustom.html +++ b/docs/reference/FeaturePlot_scCustom.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -126,7 +132,7 @@

    Customize FeaturePlot

    aspect_ratio = NULL, figure_plot = FALSE, num_columns = NULL, - slot = deprecated(), + slot = deprecated(), layer = "data", alpha_exp = NULL, alpha_na_exp = NULL, diff --git a/docs/reference/FeatureScatter_scCustom-1.png b/docs/reference/FeatureScatter_scCustom-1.png index 1d83a1b4b3..e7678e4c08 100644 Binary files a/docs/reference/FeatureScatter_scCustom-1.png and b/docs/reference/FeatureScatter_scCustom-1.png differ diff --git a/docs/reference/FeatureScatter_scCustom.html b/docs/reference/FeatureScatter_scCustom.html index 8406726c5f..207f7ad6ea 100644 --- a/docs/reference/FeatureScatter_scCustom.html +++ b/docs/reference/FeatureScatter_scCustom.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Feature_Present.html b/docs/reference/Feature_Present.html new file mode 100644 index 0000000000..74fc03143e --- /dev/null +++ b/docs/reference/Feature_Present.html @@ -0,0 +1,214 @@ + +Check if genes/features are present — Feature_Present • scCustomize + + +
    +
    + + + +
    +
    + + +
    +

    Check if genes are present in object and return vector of found genes. Return warning messages for +genes not found.

    +
    + +
    +
    Feature_Present(
    +  data,
    +  features,
    +  case_check = TRUE,
    +  case_check_msg = TRUE,
    +  print_msg = TRUE,
    +  omit_warn = TRUE,
    +  return_none = FALSE,
    +  seurat_assay = NULL
    +)
    +
    + +
    +

    Arguments

    +
    data
    +

    Name of input data. Currently only data of classes: Seurat, liger, data.frame, +dgCMatrix, dgTMatrix, tibble are accepted. Gene_IDs must be present in rownames of the data.

    + + +
    features
    +

    vector of features to check.

    + + +
    case_check
    +

    logical. Whether or not to check if features are found if the case is changed from the +input list (Sentence case to Upper and vice versa). Default is TRUE.

    + + +
    case_check_msg
    +

    logical. Whether to print message to console if alternate case features are found +in addition to inclusion in returned list. Default is TRUE.

    + + +
    print_msg
    +

    logical. Whether message should be printed if all features are found. Default is TRUE.

    + + +
    omit_warn
    +

    logical. Whether to print message about features that are not found in current object. +Default is TRUE.

    + + +
    return_none
    +

    logical. Whether list of found vs. bad features should still be returned if no +features are found. Default is FALSE.

    + + +
    seurat_assay
    +

    Name of assay to pull feature names from if data is Seurat Object. +Default is NULL which will check against features from all assays present.

    + +
    +
    +

    Value

    + + +

    A list of length 3 containing 1) found features, 2) not found features, 3) features found if +case was modified.

    +
    + +
    +

    Examples

    +
    if (FALSE) {
    +features <- Feature_Present(data = obj_name, features = DEG_list, print_msg = TRUE,
    +case_check = TRUE)
    +found_features <- features[[1]]
    +}
    +
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/Fetch_Meta.html b/docs/reference/Fetch_Meta.html index a6b3e97ecb..1b93dd519c 100644 --- a/docs/reference/Fetch_Meta.html +++ b/docs/reference/Fetch_Meta.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Gene_Present.html b/docs/reference/Gene_Present.html index 8ad3dd996a..3692ce44d9 100644 --- a/docs/reference/Gene_Present.html +++ b/docs/reference/Gene_Present.html @@ -1,5 +1,5 @@ -Check if genes/features are present — Gene_Present • scCustomizeCheck if genes/features are present — Gene_Present • scCustomizeExtract Features from LIGER Object — LIGER_Features • scCustomize + + +
    +
    + + + +
    +
    + + +
    +

    Extract all unique features from LIGER object

    +
    + +
    +
    LIGER_Features(liger_object, by_dataset = FALSE)
    +
    + +
    +

    Arguments

    +
    liger_object
    +

    LIGER object name.

    + + +
    by_dataset
    +

    logical, whether to return list with vector of features for each dataset in +LIGER object or to return single vector of unique features across all datasets in object +(default is FALSE; return vector of unique features)

    + +
    +
    +

    Value

    + + +

    vector or list depending on by_dataset parameter

    +
    + +
    +

    Examples

    +
    if (FALSE) {
    +# return single vector of all unique features
    +all_features <- LIGER_Features(liger_object = object, by_dataset = FALSE)
    +
    +# return list of vectors containing features from each individual dataset in object
    +dataset_features <- LIGER_Features(liger_object = object, by_dataset = TRUE)
    +}
    +
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/Liger_to_Seurat.html b/docs/reference/Liger_to_Seurat.html index 954af2b3ca..3aaff57ae7 100644 --- a/docs/reference/Liger_to_Seurat.html +++ b/docs/reference/Liger_to_Seurat.html @@ -1,5 +1,5 @@ -Create a Seurat object containing the data from a liger object — Liger_to_Seurat • scCustomizeCreate a Seurat object containing the data from a liger object — Liger_to_Seurat • scCustomizeSplit Seurat object into layers — Split_Layers • scCustomize + + +
    +
    + + + +
    +
    + + +
    +

    Split Assay5 of Seurat object into layers by variable in meta.data

    +
    + +
    +
    Split_Layers(seurat_object, assay = "RNA", split.by)
    +
    + +
    +

    Arguments

    +
    seurat_object
    +

    Seurat object name.

    + + +
    assay
    +

    name(s) of assays to convert. Defaults to current active assay.

    + + +
    split.by
    +

    Variable in meta.data to use for splitting layers.

    + +
    + +
    +

    Examples

    +
    if (FALSE) {
    +# Split object by "treatment"
    +obj <- Split_Layers(object = obj, assay = "RNA", split.by = "treatment")
    +}
    +
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/Split_Vector.html b/docs/reference/Split_Vector.html new file mode 100644 index 0000000000..0fa68a2bbe --- /dev/null +++ b/docs/reference/Split_Vector.html @@ -0,0 +1,179 @@ + +Split vector into list — Split_Vector • scCustomize + + +
    +
    + + + +
    +
    + + +
    +

    Splits vector into chunks of x sizes

    +
    + +
    +
    Split_Vector(x, chunk_size = 100, verbose = FALSE)
    +
    + +
    +

    Arguments

    +
    x
    +

    vector to split

    + + +
    chunk_size
    +

    size of chunks for vector to be split into, default is 100.

    + + +
    verbose
    +

    logical, print details of vector and split, default is FALSE.

    + +
    +
    +

    Value

    + + +

    list with vector of X length

    +
    +
    +

    References

    +

    Base code from stackoverflow post: +https://stackoverflow.com/a/3321659/15568251

    +
    + +
    +

    Examples

    +
    vector <- c("gene1", "gene2", "gene3", "gene4", "gene5", "gene6")
    +
    +vector_list <- Split_Vector(x = vector, chunk_size = 3)
    +
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/Stacked_VlnPlot-1.png b/docs/reference/Stacked_VlnPlot-1.png index a6f40f6f6a..97527401df 100644 Binary files a/docs/reference/Stacked_VlnPlot-1.png and b/docs/reference/Stacked_VlnPlot-1.png differ diff --git a/docs/reference/Stacked_VlnPlot.html b/docs/reference/Stacked_VlnPlot.html index 0029ee0b19..bb14822105 100644 --- a/docs/reference/Stacked_VlnPlot.html +++ b/docs/reference/Stacked_VlnPlot.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Store_Misc_Info_Seurat.html b/docs/reference/Store_Misc_Info_Seurat.html index 948d4d8ce4..a1331ff087 100644 --- a/docs/reference/Store_Misc_Info_Seurat.html +++ b/docs/reference/Store_Misc_Info_Seurat.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Store_Palette_Seurat.html b/docs/reference/Store_Palette_Seurat.html index f7395acf4e..1b3fdc76cb 100644 --- a/docs/reference/Store_Palette_Seurat.html +++ b/docs/reference/Store_Palette_Seurat.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -162,7 +168,6 @@

    Examples

    #> Seurat Object now contains the following items in @misc slot: #> 'rd1_colors' -
    diff --git a/docs/reference/Top_Genes_Factor.html b/docs/reference/Top_Genes_Factor.html index 9fcab0d547..f64578fb74 100644 --- a/docs/reference/Top_Genes_Factor.html +++ b/docs/reference/Top_Genes_Factor.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/UnRotate_X.html b/docs/reference/UnRotate_X.html index e392ff2ae1..4e1ba35219 100644 --- a/docs/reference/UnRotate_X.html +++ b/docs/reference/UnRotate_X.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Updated_HGNC_Symbols.html b/docs/reference/Updated_HGNC_Symbols.html new file mode 100644 index 0000000000..879c9587dd --- /dev/null +++ b/docs/reference/Updated_HGNC_Symbols.html @@ -0,0 +1,194 @@ + +Update HGNC Gene Symbols — Updated_HGNC_Symbols • scCustomize + + +
    +
    + + + +
    +
    + + +
    +

    Update human gene symbols using data from HGNC. This function will store cached data in package directory using (BiocFileCache). Use of this function requires internet connection on first use (or if setting update_symbol_data = TRUE). Subsequent use does not require connection and will pull from cached data.

    +
    + +
    +
    Updated_HGNC_Symbols(
    +  input_data,
    +  update_symbol_data = NULL,
    +  case_check_as_warn = FALSE,
    +  verbose = TRUE
    +)
    +
    + +
    +

    Arguments

    +
    input_data
    +

    Data source containing gene names. Accepted formats are:

    • charcter vector

    • +
    • Seurat Objects

    • +
    • data.frame: genes as rownames

    • +
    • dgCMatrix/dgTMatrix: genes as rownames

    • +
    • tibble: genes in first column

    • +
    + + +
    update_symbol_data
    +

    logical, whether to update cached HGNC data, default is NULL. +If NULL BiocFileCache will check and prompt for update if cache is stale. +If FALSE the BiocFileCache stale check will be skipped and current cache will be used. +If TRUE the BiocFileCache stale check will be skipped and HGNC data will be downloaded.

    + + +
    case_check_as_warn
    +

    logical, whether case checking of features should cause abort or +only warn, default is FALSE (abort). Set to TRUE if atypical names (i.e. old LOC naming) are +present in input_data.

    + + +
    verbose
    +

    logical, whether to print results detailing numbers of symbols, found, updated, +and not found; default is TRUE.

    + +
    +
    +

    Value

    + + +

    data.frame containing columns: input_features, Approved_Symbol (already approved; output unchanged), Not_Found_Symbol (symbol not in HGNC; output unchanged), Updated_Symbol (new symbol from HGNC; output updated).

    +
    + +
    +

    Examples

    +
    if (FALSE) {
    +new_names <- Updated_HGNC_Symbols(input_data = Seurat_Object)
    +}
    +
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/VariableFeaturePlot_scCustom.html b/docs/reference/VariableFeaturePlot_scCustom.html index 4a1fd08826..a30b64b32e 100644 --- a/docs/reference/VariableFeaturePlot_scCustom.html +++ b/docs/reference/VariableFeaturePlot_scCustom.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/Variable_Features_ALL_LIGER.html b/docs/reference/Variable_Features_ALL_LIGER.html index ae9e31930d..78fdd792f8 100644 --- a/docs/reference/Variable_Features_ALL_LIGER.html +++ b/docs/reference/Variable_Features_ALL_LIGER.html @@ -18,7 +18,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -59,12 +59,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/VlnPlot_scCustom-1.png b/docs/reference/VlnPlot_scCustom-1.png index 01575ce7a5..9a083c2a07 100644 Binary files a/docs/reference/VlnPlot_scCustom-1.png and b/docs/reference/VlnPlot_scCustom-1.png differ diff --git a/docs/reference/VlnPlot_scCustom.html b/docs/reference/VlnPlot_scCustom.html index 9f05b3cfc4..8747dab862 100644 --- a/docs/reference/VlnPlot_scCustom.html +++ b/docs/reference/VlnPlot_scCustom.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/as.LIGER.html b/docs/reference/as.LIGER.html new file mode 100644 index 0000000000..5ee8663f66 --- /dev/null +++ b/docs/reference/as.LIGER.html @@ -0,0 +1,258 @@ + +Convert objects to LIGER objects — as.LIGER • scCustomize + + +
    +
    + + + +
    +
    + + +
    +

    Convert objects (Seurat & lists of Seurat Objects) to anndata objects

    +
    + +
    +
    as.LIGER(x, ...)
    +
    +# S3 method for Seurat
    +as.LIGER(
    +  x,
    +  group.by = "orig.ident",
    +  assay = "RNA",
    +  remove_missing = FALSE,
    +  renormalize = TRUE,
    +  use_seurat_var_genes = FALSE,
    +  use_seurat_dimreduc = FALSE,
    +  reduction = NULL,
    +  keep_meta = TRUE,
    +  verbose = TRUE,
    +  ...
    +)
    +
    +# S3 method for list
    +as.LIGER(
    +  x,
    +  group.by = "orig.ident",
    +  dataset_names = NULL,
    +  assay = "RNA",
    +  remove_missing = FALSE,
    +  renormalize = TRUE,
    +  use_seurat_var_genes = FALSE,
    +  var_genes_method = "intersect",
    +  keep_meta = TRUE,
    +  verbose = TRUE,
    +  ...
    +)
    +
    + +
    +

    Arguments

    +
    x
    +

    An object to convert to class liger

    + + +
    ...
    +

    Arguments passed to other methods

    + + +
    group.by
    +

    Variable in meta data which contains variable to split data by, (default is "orig.ident").

    + + +
    assay
    +

    Assay containing raw data to use, (default is "RNA").

    + + +
    remove_missing
    +

    logical, whether to remove missing genes with no counts when converting to +LIGER object (default is FALSE).

    + + +
    renormalize
    +

    logical, whether to perform normalization after LIGER object creation (default is TRUE).

    + + +
    use_seurat_var_genes
    +

    logical, whether to transfer variable features from Seurat object to +new LIGER object (default is FALSE).

    + + +
    use_seurat_dimreduc
    +

    logical, whether to transfer dimensionality reduction coordinates from +Seurat to new LIGER object (default is FALSE).

    + + +
    reduction
    +

    Name of Seurat reduction to transfer if use_seurat_dimreduc = TRUE.

    + + +
    keep_meta
    +

    logical, whether to transfer columns in Seurat meta.data slot to LIGER cell.data +slot (default is TRUE).

    + + +
    verbose
    +

    logical, whether to print status messages during object conversion (default is TRUE).

    + + +
    dataset_names
    +

    optional, vector of names to use for naming datasets.

    + + +
    var_genes_method
    +

    how variable genes should be selected from Seurat objects if use_seurat_var_genes = TRUE. Can be either "intersect" or "union", (default is "intersect").

    + +
    +
    +

    Value

    + + +

    a liger object generated from x

    + + +
    +
    +

    References

    +

    modified and enhanced version of rliger::seuratToLiger.

    +
    + +
    +

    Examples

    +
    if (FALSE) {
    +liger_object <- as.LIGER(x = seurat_object)
    +}
    +
    +if (FALSE) {
    +liger_object <- as.LIGER(x = seurat_object_list)
    +}
    +
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/as.Seurat.html b/docs/reference/as.Seurat.html new file mode 100644 index 0000000000..9e8df705c6 --- /dev/null +++ b/docs/reference/as.Seurat.html @@ -0,0 +1,255 @@ + +Convert objects to Seurat objects — as.Seurat.liger • scCustomize + + +
    +
    + + + +
    +
    + + +
    +

    Merges raw.data and scale.data of object, and creates Seurat object with these values along with slots +containing dimensionality reduction coordinates, iNMF factorization, and cluster assignments. +Supports Seurat V3/4 and V4.

    +
    + +
    +
    # S3 method for liger
    +as.Seurat(
    +  x,
    +  nms = names(x@H),
    +  renormalize = TRUE,
    +  use.liger.genes = TRUE,
    +  by.dataset = FALSE,
    +  keep_meta = TRUE,
    +  reduction_label = "UMAP",
    +  seurat_assay = "RNA",
    +  assay_type = NULL,
    +  add_barcode_names = FALSE,
    +  barcode_prefix = TRUE,
    +  barcode_cell_id_delimiter = "_",
    +  ...
    +)
    +
    + +
    +

    Arguments

    +
    x
    +

    liger object.

    + + +
    nms
    +

    By default, labels cell names with dataset of origin (this is to account for cells in +different datasets which may have same name). Other names can be passed here as vector, must have +same length as the number of datasets. (default names(H)).

    + + +
    renormalize
    +

    Whether to log-normalize raw data using Seurat defaults (default TRUE).

    + + +
    use.liger.genes
    +

    Whether to carry over variable genes (default TRUE).

    + + +
    by.dataset
    +

    Include dataset of origin in cluster identity in Seurat object (default FALSE).

    + + +
    keep_meta
    +

    logical. Whether to transfer additional metadata (nGene/nUMI/dataset already transferred) +to new Seurat Object. Default is TRUE.

    + + +
    reduction_label
    +

    Name of dimensionality reduction technique used. Enables accurate transfer +or name to Seurat object instead of defaulting to "tSNE".

    + + +
    seurat_assay
    +

    Name to set for assay in Seurat Object. Default is "RNA".

    + + +
    assay_type
    +

    what type of Seurat assay to create in new object (Assay vs Assay5). +Default is NULL which will default to the current user settings. +See Convert_Assay parameter convert_to for acceptable values.

    + + +
    add_barcode_names
    +

    logical, whether to add dataset names to the cell barcodes when +creating Seurat object, default is FALSE.

    + + +
    barcode_prefix
    +

    logical, if add_barcode_names = TRUE should the names be added as +prefix to current cell barcodes/names or a suffix (default is TRUE; prefix).

    + + +
    barcode_cell_id_delimiter
    +

    The delimiter to use when adding dataset id to barcode +prefix/suffix. Default is "_".

    + + +
    ...
    +

    unused.

    + +
    +
    +

    Value

    + + +

    Seurat object with raw.data, scale.data, reduction_label, iNMF, and ident slots set.

    + + +

    Seurat object.

    +
    +
    +

    Details

    +

    Stores original dataset identity by default in new object metadata if dataset names are passed +in nms. iNMF factorization is stored in dim.reduction object with key "iNMF".

    +
    +
    +

    References

    +

    Original function is part of LIGER package https://github.com/welch-lab/liger (Licence: GPL-3). +Function was modified for use in scCustomize with additional parameters/functionality.

    +
    + +
    +

    Examples

    +
    if (FALSE) {
    +seurat_object <- as.Seurat(x = liger_object)
    +}
    +
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/as.anndata.html b/docs/reference/as.anndata.html new file mode 100644 index 0000000000..252fdbfcf4 --- /dev/null +++ b/docs/reference/as.anndata.html @@ -0,0 +1,260 @@ + +Convert objects to anndata objects — as.anndata • scCustomize + + +
    +
    + + + +
    +
    + + +
    +

    Convert objects (Seurat & LIGER) to anndata objects

    +
    + +
    +
    as.anndata(x, ...)
    +
    +# S3 method for Seurat
    +as.anndata(
    +  x,
    +  file_path,
    +  file_name,
    +  assay = "RNA",
    +  main_layer = "data",
    +  other_layers = "counts",
    +  transer_dimreduc = TRUE,
    +  verbose = TRUE,
    +  ...
    +)
    +
    +# S3 method for liger
    +as.anndata(
    +  x,
    +  file_path,
    +  file_name,
    +  transfer_norm.data = FALSE,
    +  reduction_label = NULL,
    +  add_barcode_names = FALSE,
    +  barcode_prefix = TRUE,
    +  barcode_cell_id_delimiter = "_",
    +  verbose = TRUE,
    +  ...
    +)
    +
    + +
    +

    Arguments

    +
    x
    +

    Seurat or LIGER object

    + + +
    ...
    +

    Arguments passed to other methods

    + + +
    file_path
    +

    directory file path and/or file name prefix. Defaults to current wd.

    + + +
    file_name
    +

    file name.

    + + +
    assay
    +

    Assay containing data to use, (default is "RNA").

    + + +
    main_layer
    +

    the layer of data to become default layer in anndata object (default is "data").

    + + +
    other_layers
    +

    other data layers to transfer to anndata object (default is "counts").

    + + +
    transer_dimreduc
    +

    logical, whether to transfer dimensionality reduction coordinates from +Seurat to anndata object (default is TRUE).

    + + +
    verbose
    +

    logical, whether to print status messages during object conversion (default is TRUE).

    + + +
    transfer_norm.data
    +

    logical, whether to transfer the norm.data in addition to +raw.data, default is FALSE.

    + + +
    reduction_label
    +

    What to label the visualization dimensionality reduction. +LIGER does not store name of technique and therefore needs to be set manually.

    + + +
    add_barcode_names
    +

    logical, whether to add dataset names to the cell barcodes when +merging object data, default is FALSE.

    + + +
    barcode_prefix
    +

    logical, if add_barcode_names = TRUE should the names be added as +prefix to current cell barcodes/names or a suffix (default is TRUE; prefix).

    + + +
    barcode_cell_id_delimiter
    +

    The delimiter to use when adding dataset id to barcode +prefix/suffix. Default is "_".

    + +
    +
    +

    Value

    + + +

    an anndata object generated from x, saved at path provided.

    +
    +
    +

    References

    +

    Seurat version modified and enhanced version of sceasy::seurat2anndata (sceasy package: https://github.com/cellgeni/sceasy; License: GPL-3. Function has additional checks and supports Seurat V3 and V5 object structure.

    +

    LIGER version inspired by sceasy::seurat2anndata modified and updated to apply to LIGER objects (sceasy package: https://github.com/cellgeni/sceasy; License: GPL-3.

    +
    + +
    +

    Examples

    +
    if (FALSE) {
    +as.anndata(x = seurat_object, file_path = "/folder_name", file_name = "anndata_converted.h5ad")
    +}
    +
    +if (FALSE) {
    +as.anndata(x = liger_object, file_path = "/folder_name", file_name = "anndata_converted.h5ad")
    +}
    +
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/deprecated.html b/docs/reference/deprecated.html new file mode 100644 index 0000000000..85707770e5 --- /dev/null +++ b/docs/reference/deprecated.html @@ -0,0 +1,166 @@ + +Deprecated functions — Split_FeatureScatter • scCustomize + + +
    +
    + + + +
    +
    + + +
    +

    Use FeatureScatter_scCustom() instead of Split_FeatureScatter().

    +

    Use Add_Mito_Ribo() instead of Add_Mito_Ribo_Seurat().

    +

    Use Add_Mito_Ribo() instead of Add_Mito_Ribo_LIGER().

    +

    Use Add_Cell_Complexity() instead of Add_Cell_Complexity_Seurat().

    +

    Use Add_Cell_Complexity() instead of Add_Cell_Complexity_LIGER().

    +

    Use Meta_Present() instead of Meta_Present_LIGER().

    +
    + +
    +
    Split_FeatureScatter(...)
    +
    +Add_Mito_Ribo_Seurat(...)
    +
    +Add_Mito_Ribo_LIGER(...)
    +
    +Add_Cell_Complexity_Seurat(...)
    +
    +Add_Cell_Complexity_LIGER(...)
    +
    +Meta_Present_LIGER(...)
    +
    + + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/ensembl_mito_id.html b/docs/reference/ensembl_mito_id.html index 529e656f2c..047c29981f 100644 --- a/docs/reference/ensembl_mito_id.html +++ b/docs/reference/ensembl_mito_id.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/ensembl_ribo_id.html b/docs/reference/ensembl_ribo_id.html index 0b60b1b3ea..5a6460f644 100644 --- a/docs/reference/ensembl_ribo_id.html +++ b/docs/reference/ensembl_ribo_id.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/ieg_gene_list.html b/docs/reference/ieg_gene_list.html index 2c961ed57d..f79b526a01 100644 --- a/docs/reference/ieg_gene_list.html +++ b/docs/reference/ieg_gene_list.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/index.html b/docs/reference/index.html index 1e2d7a41ee..1df867ac51 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -155,6 +161,42 @@

    Read & Write Data Read_Metrics_10X()

    Read Overall Statistics from 10X Cell Ranger Count

    + +

    Merge Data Utilities

    +

    Utilities to merge raw data.

    + + +

    Extract_Modality()

    + +

    Extract multi-modal data into list by modality

    + +

    Merge_Sparse_Data_All()

    + +

    Merge a list of Sparse Matrices

    + +

    Merge_Sparse_Multimodal_All()

    + +

    Merge a list of Sparse Matrices contain multi-modal data.

    + +

    Edit Barcode Utilities

    +

    Utilities to edit cell barcodes

    + + +

    Change_Delim_All()

    + +

    Change all delimiters in cell name

    + +

    Change_Delim_Prefix()

    + +

    Change barcode prefix delimiter

    + +

    Change_Delim_Suffix()

    + +

    Change barcode suffix delimiter

    + +

    Replace_Suffix()

    + +

    Replace barcode suffixes

    Sequencing Metrics QC Plots

    Functions plotting various QC metrics provides by 10X Genomics Cell Ranger Outputs

    @@ -319,10 +361,6 @@

    Seurat Plotting Functions PC_Plotting()

    PC Plots

    - -

    Split_FeatureScatter()

    - -

    Split FeatureScatter

    Stacked_VlnPlot()

    @@ -459,139 +497,167 @@

    Plotting Utilities #2 (Them

    Helper Utilities (Seurat)

    Functions to provide ease of use for frequently used code from Seurat Objects.

    + +

    QC Utilities

    +

    Functions to add common QC variables to meta.data.

    + -

    Case_Check()

    +

    Add_CellBender_Diff()

    -

    Check for alternate case features -Checks Seurat object for the presence of features with the same spelling but alternate case.

    +

    Calculate and add differences post-cell bender analysis

    -

    Change_Delim_All()

    +

    Add_Cell_Complexity()

    -

    Change all delimiters in cell name

    +

    Add Cell Complexity

    -

    Change_Delim_Prefix()

    +

    Add_Cell_QC_Metrics()

    -

    Change barcode prefix delimiter

    +

    Add Multiple Cell Quality Control Values with Single Function

    -

    Change_Delim_Suffix()

    +

    Add_Mito_Ribo()

    -

    Change barcode suffix delimiter

    +

    Add Mito and Ribo percentages

    -

    CheckMatrix_scCustom()

    +

    Add_Top_Gene_Pct_Seurat()

    -

    Check Matrix Validity

    - -

    Extract_Modality()

    +

    Add Percent of High Abundance Genes

    + +

    Getters/Setters

    +

    Functions to extract data from or add additional data to Seurat objects

    + + +

    Add_Alt_Feature_ID()

    -

    Extract multi-modal data into list by modality

    +

    Add Alternative Feature IDs

    -

    Fetch_Meta()

    +

    Add_Sample_Meta()

    -

    Get meta data from object

    +

    Add Sample Level Meta Data

    -

    Gene_Present()

    +

    Extract_Sample_Meta()

    -

    Check if genes/features are present

    +

    Extract sample level meta.data

    -

    Merge_Sparse_Data_All()

    +

    Fetch_Meta()

    -

    Merge a list of Sparse Matrices

    +

    Get meta data from object

    -

    Merge_Sparse_Multimodal_All()

    +

    Meta_Remove_Seurat()

    -

    Merge a list of Sparse Matrices contain multi-modal data.

    +

    Remove meta data columns containing Seurat Defaults

    -

    Meta_Numeric()

    +

    Store_Misc_Info_Seurat()

    -

    Check if meta data columns are numeric

    +

    Store misc data in Seurat object

    -

    Meta_Present()

    +

    Store_Palette_Seurat()

    -

    Check if meta data are present

    - -

    Reduction_Loading_Present()

    +

    Store color palette in Seurat object

    + +

    Check Utilities

    +

    Functions to check validity of different aspects of object or object contents.

    + + +

    Case_Check()

    -

    Check if reduction loadings are present

    +

    Check for alternate case features +Checks Seurat object for the presence of features with the same spelling but alternate case.

    -

    Replace_Suffix()

    +

    CheckMatrix_scCustom()

    -

    Replace barcode suffixes

    +

    Check Matrix Validity

    -

    Add_CellBender_Diff()

    +

    Feature_Present()

    -

    Calculate and add differences post-cell bender analysis

    +

    Check if genes/features are present

    -

    Add_Cell_Complexity_Seurat()

    +

    Gene_Present()

    -

    Add Cell Complexity Value

    +

    Check if genes/features are present [Soft-deprecated]

    -

    Add_Cell_QC_Metrics()

    +

    Meta_Numeric()

    -

    Add Multiple Cell Quality Control Values with Single Function

    +

    Check if meta data columns are numeric

    -

    Add_Mito_Ribo_Seurat()

    +

    Meta_Present()

    -

    Add Mito and Ribo percentages

    +

    Check if meta data are present

    -

    Add_Sample_Meta()

    +

    Reduction_Loading_Present()

    -

    Add Sample Level Meta Data

    - -

    Add_Top_Gene_Pct_Seurat()

    +

    Check if reduction loadings are present

    + +

    Misc Utilities

    +

    Miscellaneous Utilities

    + + +

    Merge_Seurat_List()

    -

    Add Percent of High Abundance Genes

    +

    Merge a list of Seurat Objects

    -

    Extract_Sample_Meta()

    +

    Split_Vector()

    -

    Extract sample level meta.data

    +

    Split vector into list

    -

    Liger_to_Seurat()

    +

    Updated_HGNC_Symbols()

    -

    Create a Seurat object containing the data from a liger object

    +

    Update HGNC Gene Symbols

    + +

    Helper Utilities (LIGER)

    +

    Functions to provide ease of use for frequently used code from LIGER Objects.

    + + +

    Add_Cell_Complexity()

    + +

    Add Cell Complexity

    -

    Merge_Seurat_List()

    +

    Add_Mito_Ribo()

    -

    Merge a list of Seurat Objects

    +

    Add Mito and Ribo percentages

    -

    Meta_Remove_Seurat()

    +

    Fetch_Meta()

    -

    Remove meta data columns containing Seurat Defaults

    +

    Get meta data from object

    -

    Rename_Clusters()

    +

    LIGER_Features()

    -

    Rename Cluster Seurat

    +

    Extract Features from LIGER Object

    -

    Store_Misc_Info_Seurat()

    +

    Top_Genes_Factor()

    -

    Store misc data in Seurat object

    +

    Extract top loading genes for LIGER factor

    -

    Store_Palette_Seurat()

    +

    Variable_Features_ALL_LIGER()

    -

    Store color palette in Seurat object

    +

    Perform variable gene selection over whole dataset

    -

    Helper Utilities (LIGER)

    -

    Functions to provide ease of use for frequently used code from LIGER Objects.

    +

    Object Conversion Functions

    +

    Functions to convert between different single cell object formats (R & Python).

    -

    Add_Cell_Complexity_LIGER()

    +

    Convert_Assay()

    -

    Add Cell Complexity Value

    +

    Convert between Seurat Assay types

    -

    Add_Mito_Ribo_LIGER()

    +

    Liger_to_Seurat()

    -

    Add Mito and Ribo percentages to LIGER

    +

    Create a Seurat object containing the data from a liger object [Soft-deprecated]

    -

    Meta_Present_LIGER()

    +

    Split_Layers()

    -

    Check if meta data are present

    +

    Split Seurat object into layers

    -

    Top_Genes_Factor()

    +

    as.LIGER()

    -

    Extract top loading genes for LIGER factor

    +

    Convert objects to LIGER objects

    -

    Variable_Features_ALL_LIGER()

    +

    as.Seurat(<liger>)

    -

    Perform variable gene selection over whole dataset

    +

    Convert objects to Seurat objects

    + +

    as.anndata()

    + +

    Convert objects to anndata objects

    Cluster/Marker Annotation Utilities

    Functions to provide ease of use or add functionality to DEG and cluster annotation.

    @@ -612,6 +678,10 @@

    Cluster/Marker Annotation Utilities

    Pull_Cluster_Annotation()

    Pull cluster information from annotation csv file.

    + +

    Rename_Clusters()

    + +

    Rename Cluster Seurat

    Project Organization Utilities

    Functions to provide ease of use for organization of analysis projects.

    diff --git a/docs/reference/msigdb_qc_gene_list.html b/docs/reference/msigdb_qc_gene_list.html index b0b9ac0dab..3ef88b3cc8 100644 --- a/docs/reference/msigdb_qc_gene_list.html +++ b/docs/reference/msigdb_qc_gene_list.html @@ -18,7 +18,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -59,12 +59,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/plotFactors_scCustom.html b/docs/reference/plotFactors_scCustom.html index 3f68e6fe37..61fe3024a0 100644 --- a/docs/reference/plotFactors_scCustom.html +++ b/docs/reference/plotFactors_scCustom.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -118,6 +124,7 @@

    Customized version of plotFactors

    pt.size_factors = 1, pt.size_dimreduc = 1, reduction_label = "UMAP", + plot_legend = TRUE, raster = TRUE, raster.dpi = c(512, 512), order = FALSE, @@ -166,6 +173,11 @@

    Arguments

    technique and therefore needs to be set manually. Default is "UMAP".

    +
    plot_legend
    +

    logical, whether to plot the legend on factor loading plots, default is TRUE. +Helpful if number of datasets is large to avoid crowding the plot with legend.

    + +
    raster

    Convert points to raster format. Default is NULL which will rasterize by default if greater than 200,000 cells.

    diff --git a/docs/reference/reexports.html b/docs/reference/reexports.html new file mode 100644 index 0000000000..5ad7316950 --- /dev/null +++ b/docs/reference/reexports.html @@ -0,0 +1,159 @@ + +Objects exported from other packages — reexports • scCustomize + + +
    +
    + + + +
    +
    + + +
    +

    These objects are imported from other packages. Follow the links +below to see their documentation.

    +
    SeuratObject
    +

    as.Seurat

    + + +
    + + +
    +

    Note

    +

    See as.Seurat.liger for scCustomize extension of this generic to converting Liger objects.

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/scCustomize-package.html b/docs/reference/scCustomize-package.html index adf34cbea1..79afed72b8 100644 --- a/docs/reference/scCustomize-package.html +++ b/docs/reference/scCustomize-package.html @@ -18,7 +18,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -59,12 +59,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/scCustomize_Palette.html b/docs/reference/scCustomize_Palette.html index 04f75802d7..eada346eab 100644 --- a/docs/reference/scCustomize_Palette.html +++ b/docs/reference/scCustomize_Palette.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • @@ -121,7 +127,6 @@

    Color Palette Selection for scCustomize

    Arguments

    num_groups

    number of groups to be plotted. If ggplot_default_colors = FALSE then by default:

    • If number of levels plotted equal to 2 then colors will be NavyAndOrange().

    • -
    • If number of levels plotted greater than 2 but less than or equal to 8 it will use ColorBlind_Pal().

    • If number of levels plotted greater than 2 but less than or equal to 36 it will use "polychrome" from DiscretePalette_scCustomize().

    • If greater than 36 will use "varibow" with shuffle = TRUE from DiscretePalette_scCustomize.

    diff --git a/docs/reference/theme_ggprism_mod.html b/docs/reference/theme_ggprism_mod.html index cacdbb1600..4cb70d7b23 100644 --- a/docs/reference/theme_ggprism_mod.html +++ b/docs/reference/theme_ggprism_mod.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/reference/viridis_shortcut.html b/docs/reference/viridis_shortcut.html index fd272b8d1f..d903ed3645 100644 --- a/docs/reference/viridis_shortcut.html +++ b/docs/reference/viridis_shortcut.html @@ -17,7 +17,7 @@ scCustomize - 2.0.1 + 2.1.0 @@ -58,12 +58,18 @@
  • Read & Write Data Functions
  • +
  • + Object/Assay Format Conversion +
  • Marker Identification & Cluster Annotation Helpers
  • Statistics Functions
  • +
  • + Updating Gene Symbols +
  • Misc Functions
  • diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 9386c38ee0..4c62db13ad 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -36,6 +36,9 @@ https://samuel-marsh.github.io/scCustomize/articles/Misc_Functions.html + + https://samuel-marsh.github.io/scCustomize/articles/Object_Conversion.html + https://samuel-marsh.github.io/scCustomize/articles/QC_Plots.html @@ -48,6 +51,9 @@ https://samuel-marsh.github.io/scCustomize/articles/Statistics.html + + https://samuel-marsh.github.io/scCustomize/articles/Update_Gene_Symbols.html + https://samuel-marsh.github.io/scCustomize/articles/articles/Cell_Bender_Functions.html @@ -78,6 +84,9 @@ https://samuel-marsh.github.io/scCustomize/articles/articles/Misc_Functions.html + + https://samuel-marsh.github.io/scCustomize/articles/articles/Object_Conversion.html + https://samuel-marsh.github.io/scCustomize/articles/articles/QC_Plots.html @@ -90,6 +99,9 @@ https://samuel-marsh.github.io/scCustomize/articles/articles/Statistics.html + + https://samuel-marsh.github.io/scCustomize/articles/articles/Update_Gene_Symbols.html + https://samuel-marsh.github.io/scCustomize/articles/index.html @@ -102,9 +114,15 @@ https://samuel-marsh.github.io/scCustomize/news/index.html + + https://samuel-marsh.github.io/scCustomize/reference/Add_Alt_Feature_ID.html + https://samuel-marsh.github.io/scCustomize/reference/Add_CellBender_Diff.html + + https://samuel-marsh.github.io/scCustomize/reference/Add_Cell_Complexity.html + https://samuel-marsh.github.io/scCustomize/reference/Add_Cell_Complexity_LIGER.html @@ -114,6 +132,9 @@ https://samuel-marsh.github.io/scCustomize/reference/Add_Cell_QC_Metrics.html + + https://samuel-marsh.github.io/scCustomize/reference/Add_Mito_Ribo.html + https://samuel-marsh.github.io/scCustomize/reference/Add_Mito_Ribo_LIGER.html @@ -171,6 +192,9 @@ https://samuel-marsh.github.io/scCustomize/reference/ColorBlind_Pal.html + + https://samuel-marsh.github.io/scCustomize/reference/Convert_Assay.html + https://samuel-marsh.github.io/scCustomize/reference/Copy_From_GCP.html @@ -222,6 +246,9 @@ https://samuel-marsh.github.io/scCustomize/reference/FeatureScatter_scCustom.html + + https://samuel-marsh.github.io/scCustomize/reference/Feature_Present.html + https://samuel-marsh.github.io/scCustomize/reference/Fetch_Meta.html @@ -261,6 +288,9 @@ https://samuel-marsh.github.io/scCustomize/reference/JCO_Four.html + + https://samuel-marsh.github.io/scCustomize/reference/LIGER_Features.html + https://samuel-marsh.github.io/scCustomize/reference/Liger_to_Seurat.html @@ -456,6 +486,12 @@ https://samuel-marsh.github.io/scCustomize/reference/Split_FeatureScatter.html + + https://samuel-marsh.github.io/scCustomize/reference/Split_Layers.html + + + https://samuel-marsh.github.io/scCustomize/reference/Split_Vector.html + https://samuel-marsh.github.io/scCustomize/reference/Stacked_VlnPlot.html @@ -471,6 +507,9 @@ https://samuel-marsh.github.io/scCustomize/reference/UnRotate_X.html + + https://samuel-marsh.github.io/scCustomize/reference/Updated_HGNC_Symbols.html + https://samuel-marsh.github.io/scCustomize/reference/VariableFeaturePlot_scCustom.html @@ -480,6 +519,18 @@ https://samuel-marsh.github.io/scCustomize/reference/VlnPlot_scCustom.html + + https://samuel-marsh.github.io/scCustomize/reference/as.LIGER.html + + + https://samuel-marsh.github.io/scCustomize/reference/as.Seurat.html + + + https://samuel-marsh.github.io/scCustomize/reference/as.anndata.html + + + https://samuel-marsh.github.io/scCustomize/reference/deprecated.html + https://samuel-marsh.github.io/scCustomize/reference/ensembl_mito_id.html @@ -498,6 +549,9 @@ https://samuel-marsh.github.io/scCustomize/reference/plotFactors_scCustom.html + + https://samuel-marsh.github.io/scCustomize/reference/reexports.html + https://samuel-marsh.github.io/scCustomize/reference/scCustomize-package.html diff --git a/index.md b/index.md index 65763709cb..cc1f1f4e32 100644 --- a/index.md +++ b/index.md @@ -90,11 +90,10 @@ scCustomize aims to achieve these goals through: errors in code reproducibility. - *Example of adding new parameters:* Adding the percentage of counts aligning to mitochondrial (and/or ribosomal) genes is common early - step in analysis. scCustomize provides `Add_Mito_Ribo_Seurat()` (and - LIGER version) to simplify this. Basic use requires only one line of - code and two parameters. + step in analysis. scCustomize provides `Add_Mito_Ribo()` to simplify + this. Basic use requires only one line of code and two parameters. - Add_Mito_Ribo_Seurat(seurat_object = obj_name, species = "Human") + Add_Mito_Ribo(object = obj_name, species = "Human") - Function already knows the defaults for Human, Mouse, Rat, Zebrafish, Drosophila, Marmoset, and Rhesus Macaque (submit a PR @@ -138,8 +137,8 @@ scCustomize aims to achieve these goals through: - scCustomize provides checks/warnings, using the cli/rlang packages, wrapped inside its functions to help and provide more informative error/warning messages. Two examples include: - - `Add_Mito_Ribo_Seurat()` will warn you if no mitochondrial or - ribosomal features are found and won’t create new metadata column. + - `Add_Mito_Ribo()` will warn you if no mitochondrial or ribosomal + features are found and won’t create new metadata column. - `Rename_Clusters()` will check and make sure the right number of unique new names are provided and provide one of two error messages if not before attempting to rename the object idents. diff --git a/inst/pkgdown.yml b/inst/pkgdown.yml index 71f235eb40..a9f7334034 100644 --- a/inst/pkgdown.yml +++ b/inst/pkgdown.yml @@ -12,11 +12,13 @@ articles: LIGER_Functions: LIGER_Functions.html Markers_and_Cluster_Annotation: Markers_and_Cluster_Annotation.html Misc_Functions: Misc_Functions.html + Object_Conversion: Object_Conversion.html QC_Plots: QC_Plots.html Read_and_Write_Functions: Read_and_Write_Functions.html Sequencing_QC_Plots: Sequencing_QC_Plots.html Statistics: Statistics.html -last_built: 2023-11-20T21:27Z + Update_Gene_Symbols: Update_Gene_Symbols.html +last_built: 2024-02-16T19:32Z urls: reference: https://samuel-marsh.github.io/scCustomize/reference article: https://samuel-marsh.github.io/scCustomize/articles diff --git a/man/Add_Alt_Feature_ID.Rd b/man/Add_Alt_Feature_ID.Rd new file mode 100644 index 0000000000..51ec203483 --- /dev/null +++ b/man/Add_Alt_Feature_ID.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Object_Utilities.R +\name{Add_Alt_Feature_ID} +\alias{Add_Alt_Feature_ID} +\title{Add Alternative Feature IDs} +\usage{ +Add_Alt_Feature_ID( + seurat_object, + features_tsv_file = NULL, + hdf5_file = NULL, + assay = NULL +) +} +\arguments{ +\item{seurat_object}{object name.} + +\item{features_tsv_file}{output file from Cell Ranger used for creation of Seurat object. +(Either provide this of \code{hdf5_file})} + +\item{hdf5_file}{output file from Cell Ranger used for creation of Seurat object. +(Either provide this of \code{features_tsv_file})} + +\item{assay}{name of assay(s) to add the alternative features to. Can specify "all" +to add to all assays.} +} +\value{ +Seurat Object with new entries in the \code{obj@assays$ASSAY@meta.data} slot. +} +\description{ +Add alternative feature ids to the assay level meta.data slot in Assay5 compatible object (Seurat V5.0.0 or greater) +} +\examples{ +\dontrun{ +# Using features.tsv.gz file + # Either file from filtered or raw outputs can be used as they are identical. +obj <- Add_Alt_Feature_ID(seurat_object = obj, +features_tsv = "sample01/outs/filtered_feature_bc_matrix/features.tsv.gz", assay = "RNA") + +#' # Using hdf5 file + # Either filtered_feature_bc or raw_feature_bc can be used as the features slot is identical + # Though it is faster to load filtered_feature_bc file due to droplet filtering +obj <- Add_Alt_Feature_ID(seurat_object = obj, +hdf5_file = "sample01/outs/outs/filtered_feature_bc_matrix.h5", assay = "RNA") +} + +} +\concept{get_set_util} diff --git a/man/Add_CellBender_Diff.Rd b/man/Add_CellBender_Diff.Rd index 5ed9d34a23..f6bb653c40 100644 --- a/man/Add_CellBender_Diff.Rd +++ b/man/Add_CellBender_Diff.Rd @@ -26,4 +26,4 @@ cell_bender_assay_name = "RNA") } } -\concept{object_util} +\concept{qc_util} diff --git a/man/Add_Cell_Complexity.Rd b/man/Add_Cell_Complexity.Rd new file mode 100644 index 0000000000..bb38ab5a49 --- /dev/null +++ b/man/Add_Cell_Complexity.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Generics.R, R/LIGER_Utilities.R, +% R/Object_Utilities.R +\name{Add_Cell_Complexity} +\alias{Add_Cell_Complexity} +\alias{Add_Cell_Complexity.liger} +\alias{Add_Cell_Complexity.Seurat} +\title{Add Cell Complexity} +\usage{ +Add_Cell_Complexity(object, ...) + +\method{Add_Cell_Complexity}{liger}( + object, + meta_col_name = "log10GenesPerUMI", + overwrite = FALSE, + ... +) + +\method{Add_Cell_Complexity}{Seurat}( + object, + meta_col_name = "log10GenesPerUMI", + assay = "RNA", + overwrite = FALSE, + ... +) +} +\arguments{ +\item{object}{Seurat or LIGER object} + +\item{...}{Arguments passed to other methods} + +\item{meta_col_name}{name to use for new meta data column. Default is "log10GenesPerUMI".} + +\item{overwrite}{Logical. Whether to overwrite existing an meta.data column. Default is FALSE meaning that +function will abort if column with name provided to \code{meta_col_name} is present in meta.data slot.} + +\item{assay}{assay to use in calculation. Default is "RNA". \emph{Note} This should only be changed if +storing corrected and uncorrected assays in same object (e.g. outputs of both Cell Ranger and Cell Bender).} +} +\value{ +An object of the same class as \code{object} with columns added to object meta data. +} +\description{ +Add measure of cell complexity/novelty (log10GenesPerUMI) for data QC. +} +\examples{ +\dontrun{ +# Liger +liger_object <- Add_Cell_Complexity(object = liger_object) +} + +# Seurat +library(Seurat) +pbmc_small <- Add_Cell_Complexity(object = pbmc_small) + +} +\concept{liger_object_util} +\concept{qc_util} diff --git a/man/Add_Cell_Complexity_LIGER.Rd b/man/Add_Cell_Complexity_LIGER.Rd deleted file mode 100644 index 097143b234..0000000000 --- a/man/Add_Cell_Complexity_LIGER.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/LIGER_Utilities.R -\name{Add_Cell_Complexity_LIGER} -\alias{Add_Cell_Complexity_LIGER} -\title{Add Cell Complexity Value} -\usage{ -Add_Cell_Complexity_LIGER( - liger_object, - meta_col_name = "log10GenesPerUMI", - overwrite = FALSE -) -} -\arguments{ -\item{liger_object}{object name.} - -\item{meta_col_name}{name to use for new meta data column. Default is "log10GenesPerUMI".} - -\item{overwrite}{Logical. Whether to overwrite existing an meta.data column. Default is FALSE meaning that -function will abort if column with name provided to \code{meta_col_name} is present in meta.data slot.} -} -\value{ -A LIGER Object -} -\description{ -Add measure of cell complexity/novelty (log10PerUMI) for data QC. -} -\examples{ -\dontrun{ -object <- Add_Cell_Complexity_LIGER(liger_object = object) -} - -} -\concept{liger_object_util} diff --git a/man/Add_Cell_Complexity_Seurat.Rd b/man/Add_Cell_Complexity_Seurat.Rd deleted file mode 100644 index 1da6c9c116..0000000000 --- a/man/Add_Cell_Complexity_Seurat.Rd +++ /dev/null @@ -1,36 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Object_Utilities.R -\name{Add_Cell_Complexity_Seurat} -\alias{Add_Cell_Complexity_Seurat} -\title{Add Cell Complexity Value} -\usage{ -Add_Cell_Complexity_Seurat( - seurat_object, - meta_col_name = "log10GenesPerUMI", - assay = "RNA", - overwrite = FALSE -) -} -\arguments{ -\item{seurat_object}{object name.} - -\item{meta_col_name}{name to use for new meta data column. Default is "log10GenesPerUMI".} - -\item{assay}{assay to use in calculation. Default is "RNA". \emph{Note} This should only be changed if -storing corrected and uncorrected assays in same object (e.g. outputs of both Cell Ranger and Cell Bender).} - -\item{overwrite}{Logical. Whether to overwrite existing an meta.data column. Default is FALSE meaning that -function will abort if column with name provided to \code{meta_col_name} is present in meta.data slot.} -} -\value{ -A Seurat Object -} -\description{ -Add measure of cell complexity/novelty (log10PerUMI) for data QC. -} -\examples{ -library(Seurat) -pbmc_small <- Add_Cell_Complexity_Seurat(seurat_object = pbmc_small) - -} -\concept{object_util} diff --git a/man/Add_Cell_QC_Metrics.Rd b/man/Add_Cell_QC_Metrics.Rd index ee37095d07..7890b8afd2 100644 --- a/man/Add_Cell_QC_Metrics.Rd +++ b/man/Add_Cell_QC_Metrics.Rd @@ -117,4 +117,4 @@ obj <- Add_Cell_QC_Metrics(seurat_object = obj, species = "Human") } } -\concept{object_util} +\concept{qc_util} diff --git a/man/Add_Mito_Ribo_Seurat.Rd b/man/Add_Mito_Ribo.Rd similarity index 70% rename from man/Add_Mito_Ribo_Seurat.Rd rename to man/Add_Mito_Ribo.Rd index e15ac7eb01..b72a76a7b9 100644 --- a/man/Add_Mito_Ribo_Seurat.Rd +++ b/man/Add_Mito_Ribo.Rd @@ -1,11 +1,32 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Object_Utilities.R -\name{Add_Mito_Ribo_Seurat} -\alias{Add_Mito_Ribo_Seurat} +% Please edit documentation in R/Generics.R, R/LIGER_Utilities.R, +% R/Object_Utilities.R +\name{Add_Mito_Ribo} +\alias{Add_Mito_Ribo} +\alias{Add_Mito_Ribo.liger} +\alias{Add_Mito_Ribo.Seurat} \title{Add Mito and Ribo percentages} \usage{ -Add_Mito_Ribo_Seurat( - seurat_object, +Add_Mito_Ribo(object, ...) + +\method{Add_Mito_Ribo}{liger}( + object, + species, + mito_name = "percent_mito", + ribo_name = "percent_ribo", + mito_ribo_name = "percent_mito_ribo", + mito_pattern = NULL, + ribo_pattern = NULL, + mito_features = NULL, + ribo_features = NULL, + ensembl_ids = FALSE, + overwrite = FALSE, + list_species_names = FALSE, + ... +) + +\method{Add_Mito_Ribo}{Seurat}( + object, species, mito_name = "percent_mito", ribo_name = "percent_ribo", @@ -17,11 +38,14 @@ Add_Mito_Ribo_Seurat( ensembl_ids = FALSE, assay = NULL, overwrite = FALSE, - list_species_names = FALSE + list_species_names = FALSE, + ... ) } \arguments{ -\item{seurat_object}{object name.} +\item{object}{Seurat or LIGER object} + +\item{...}{Arguments passed to other methods} \item{species}{Species of origin for given Seurat Object. If mouse, human, marmoset, zebrafish, rat, drosophila, or rhesus macaque (name or abbreviation) are provided the function will automatically @@ -51,8 +75,6 @@ Will override regex pattern if both are present (including default saved regex p \item{ensembl_ids}{logical, whether feature names in the object are gene names or ensembl IDs (default is FALSE; set TRUE if feature names are ensembl IDs).} -\item{assay}{Assay to use (default is the current object default assay).} - \item{overwrite}{Logical. Whether to overwrite existing meta.data columns. Default is FALSE meaning that function will abort if columns with any one of the names provided to \code{mito_name} \code{ribo_name} or \code{mito_ribo_name} is present in meta.data slot.} @@ -60,17 +82,27 @@ function will abort if columns with any one of the names provided to \code{mito_ \item{list_species_names}{returns list of all accepted values to use for default species names which contain internal regex/feature lists (human, mouse, marmoset, zebrafish, rat, drosophila, and rhesus macaque). Default is FALSE.} + +\item{assay}{Assay to use (default is the current object default assay).} } \value{ -A Seurat Object +An object of the same class as \code{object} with columns added to object meta data. } \description{ -Add Mito, Ribo, & Mito+Ribo percentages to meta.data slot of Seurat Object +Add Mito, Ribo, & Mito+Ribo percentages to meta.data slot of Seurat Object or +cell.data slot of Liger object } \examples{ \dontrun{ -obj <- Add_Mito_Ribo_Seurat(seurat_object = obj, species = "human") +# Liger +liger_object <- Add_Mito_Ribo(object = liger_object, species = "human") +} + +\dontrun{ +# Seurat +seurat_object <- Add_Mito_Ribo(object = seurat_object, species = "human") } } -\concept{object_util} +\concept{liger_object_util} +\concept{qc_util} diff --git a/man/Add_Mito_Ribo_LIGER.Rd b/man/Add_Mito_Ribo_LIGER.Rd deleted file mode 100644 index b6ed595a4e..0000000000 --- a/man/Add_Mito_Ribo_LIGER.Rd +++ /dev/null @@ -1,73 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/LIGER_Utilities.R -\name{Add_Mito_Ribo_LIGER} -\alias{Add_Mito_Ribo_LIGER} -\title{Add Mito and Ribo percentages to LIGER} -\usage{ -Add_Mito_Ribo_LIGER( - liger_object, - species, - mito_name = "percent_mito", - ribo_name = "percent_ribo", - mito_ribo_name = "percent_mito_ribo", - mito_pattern = NULL, - ribo_pattern = NULL, - mito_features = NULL, - ribo_features = NULL, - ensembl_ids = FALSE, - overwrite = FALSE, - list_species_names = FALSE -) -} -\arguments{ -\item{liger_object}{LIGER object name.} - -\item{species}{Species of origin for given Seurat Object. If mouse, human, marmoset, zebrafish, rat, -drosophila, or rhesus macaque (name or abbreviation) are provided the function will automatically -generate mito_pattern and ribo_pattern values.} - -\item{mito_name}{name to use for the new meta.data column containing percent mitochondrial counts. -Default is "percent_mito".} - -\item{ribo_name}{name to use for the new meta.data column containing percent ribosomal counts. -Default is "percent_ribo".} - -\item{mito_ribo_name}{name to use for the new meta.data column containing percent mitochondrial+ribosomal -counts. Default is "percent_mito_ribo".} - -\item{mito_pattern}{A regex pattern to match features against for mitochondrial genes (will set automatically -if species is mouse or human; marmoset features list saved separately).} - -\item{ribo_pattern}{A regex pattern to match features against for ribosomal genes (will set automatically -if species is mouse, human, or marmoset).} - -\item{mito_features}{A list of mitochondrial gene names to be used instead of using regex pattern. -Will override regex pattern if both are present (including default saved regex patterns).} - -\item{ribo_features}{A list of ribosomal gene names to be used instead of using regex pattern. -Will override regex pattern if both are present (including default saved regex patterns).} - -\item{ensembl_ids}{logical, whether feature names in the object are gene names or -ensembl IDs (default is FALSE; set TRUE if feature names are ensembl IDs).} - -\item{overwrite}{Logical. Whether to overwrite existing meta.data columns. Default is FALSE meaning that -function will abort if columns with any one of the names provided to \code{mito_name} \code{ribo_name} or \code{mito_ribo_name} -is present in meta.data slot.} - -\item{list_species_names}{returns list of all accepted values to use for default species names which -contain internal regex/feature lists (human, mouse, marmoset, zebrafish, rat, drosophila, and -rhesus macaque). Default is FALSE.} -} -\value{ -A LIGER Object -} -\description{ -Add Mito, Ribo, percentages to meta.data slot of LIGER Object -} -\examples{ -\dontrun{ -object <- Add_Mito_Ribo_LIGER(liger_object = object, species = "mouse") -} - -} -\concept{liger_object_util} diff --git a/man/Add_Sample_Meta.Rd b/man/Add_Sample_Meta.Rd index 27a5259577..fddab31938 100644 --- a/man/Add_Sample_Meta.Rd +++ b/man/Add_Sample_Meta.Rd @@ -52,4 +52,4 @@ join_by_seurat = "orig.ident", join_by_meta = "sample_ID") } } -\concept{object_util} +\concept{get_set_util} diff --git a/man/Add_Top_Gene_Pct_Seurat.Rd b/man/Add_Top_Gene_Pct_Seurat.Rd index 8b688c37b7..375225b883 100644 --- a/man/Add_Top_Gene_Pct_Seurat.Rd +++ b/man/Add_Top_Gene_Pct_Seurat.Rd @@ -54,4 +54,4 @@ normalisation and visualisation of single-cell RNA-seq data in R.” Bioinformat \seealso{ \url{https://bioconductor.org/packages/release/bioc/html/scuttle.html} } -\concept{object_util} +\concept{qc_util} diff --git a/man/Case_Check.Rd b/man/Case_Check.Rd index 79bde243f2..3366c12665 100644 --- a/man/Case_Check.Rd +++ b/man/Case_Check.Rd @@ -39,4 +39,4 @@ alt_features <- Case_Check(seurat_object = obj_name, gene_list = DEG_list) } } -\concept{helper_util} +\concept{check_util} diff --git a/man/Change_Delim_All.Rd b/man/Change_Delim_All.Rd index b0cde973e6..693356f7ce 100644 --- a/man/Change_Delim_All.Rd +++ b/man/Change_Delim_All.Rd @@ -25,4 +25,4 @@ dge_matrix <- Change_Delim_All(data = dge_matrix, current_delim = ".", new_delim } } -\concept{helper_util} +\concept{barcode_util} diff --git a/man/Change_Delim_Prefix.Rd b/man/Change_Delim_Prefix.Rd index 90e31b4a9c..ffd3b43b20 100644 --- a/man/Change_Delim_Prefix.Rd +++ b/man/Change_Delim_Prefix.Rd @@ -25,4 +25,4 @@ dge_matrix <- Change_Delim_Prefix(data = dge_matrix, current_delim = ".", new_de } } -\concept{helper_util} +\concept{barcode_util} diff --git a/man/Change_Delim_Suffix.Rd b/man/Change_Delim_Suffix.Rd index abdf3ec95a..8ed50b897c 100644 --- a/man/Change_Delim_Suffix.Rd +++ b/man/Change_Delim_Suffix.Rd @@ -25,4 +25,4 @@ dge_matrix <- Change_Delim_Suffix(data = dge_matrix, current_delim = ".", new_de } } -\concept{helper_util} +\concept{barcode_util} diff --git a/man/CheckMatrix_scCustom.Rd b/man/CheckMatrix_scCustom.Rd index d7880799fb..667989922b 100644 --- a/man/CheckMatrix_scCustom.Rd +++ b/man/CheckMatrix_scCustom.Rd @@ -36,6 +36,6 @@ CheckMatrix_scCustom(object = mat) } \references{ -Re-implementing \code{CheckMatrix} only for sparse matrices with modified warning messages. Original function from SeuratObject \url{https://github.com/mojaveazure/seurat-object/blob/9c0eda946e162d8595696e5280a6ecda6284db39/R/utils.R#L625-L650} (License: MIT). +Re-implementing \code{CheckMatrix} only for sparse matrices with modified warning messages. Original function from SeuratObject \url{https://github.com/satijalab/seurat-object/blob/9c0eda946e162d8595696e5280a6ecda6284db39/R/utils.R#L625-L650} (License: MIT). } -\concept{helper_util} +\concept{check_util} diff --git a/man/Clustered_DotPlot.Rd b/man/Clustered_DotPlot.Rd index 541e23c48b..4deca1b5ac 100644 --- a/man/Clustered_DotPlot.Rd +++ b/man/Clustered_DotPlot.Rd @@ -7,21 +7,23 @@ Clustered_DotPlot( seurat_object, features, + split.by = NULL, colors_use_exp = viridis_plasma_dark_high, exp_color_min = -2, exp_color_middle = NULL, exp_color_max = 2, + exp_value_type = "scaled", print_exp_quantiles = FALSE, colors_use_idents = NULL, x_lab_rotate = TRUE, + plot_padding = NULL, flip = FALSE, k = 1, feature_km_repeats = 1000, ident_km_repeats = 1000, - row_km_repeats = deprecated(), - column_km_repeats = deprecated(), row_label_size = 8, row_label_fontface = "plain", + grid_color = NULL, cluster_feature = TRUE, cluster_ident = TRUE, column_label_size = 8, @@ -44,6 +46,8 @@ Clustered_DotPlot( \item{features}{Features to plot.} +\item{split.by}{Variable in \verb{@meta.data} to split the identities plotted by.} + \item{colors_use_exp}{Color palette to use for plotting expression scale. Default is \code{viridis::plasma(n = 20, direction = -1)}.} \item{exp_color_min}{Minimum scaled average expression threshold (everything smaller will be set to this). @@ -55,6 +59,9 @@ By default will be set to value in middle of \code{exp_color_min} and \code{exp_ \item{exp_color_max}{Minimum scaled average expression threshold (everything smaller will be set to this). Default is 2.} +\item{exp_value_type}{Whether to plot average normalized expression or +scaled average normalized expression. Only valid when \code{split.by} is provided.} + \item{print_exp_quantiles}{Whether to print the quantiles of expression data in addition to plots. Default is FALSE. NOTE: These values will be altered by choices of \code{exp_color_min} and \code{exp_color_min} if there are values below or above those cutoffs, respectively.} @@ -66,6 +73,12 @@ will use "varibow" with shuffle = TRUE both from \code{DiscretePalette_scCustomi \item{x_lab_rotate}{How to rotate column labels. By default set to \code{TRUE} which rotates labels 45 degrees. If set \code{FALSE} rotation is set to 0 degrees. Users can also supply custom angle for text rotation.} +\item{plot_padding}{if plot needs extra white space padding so no plot or labels are cutoff. +The parameter accepts TRUE or numeric vector of length 4. If TRUE padding will be set to +c(2, 10, 0 0) (bottom, left, top, right). Can also be customized further with numeric +vector of length 4 specifying the amount of padding in millimeters. +Default is NULL, no padding.} + \item{flip}{logical, whether to flip the axes of final plot. Default is FALSE; rows = features and columns = idents.} @@ -81,14 +94,12 @@ smaller than row_km, but this might mean the original row_km is not a good choic \item{ident_km_repeats}{Number of k-means runs to get a consensus k-means clustering. Similar to \code{feature_km_repeats}. Default is 1000.} -\item{row_km_repeats}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} soft-deprecated. See \code{feature_km_repeats}} - -\item{column_km_repeats}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} soft-deprecated. See \code{ident_km_repeats}} - \item{row_label_size}{Size of the feature labels. Provided to \code{row_names_gp} in Heatmap call.} \item{row_label_fontface}{Fontface to use for row labels. Provided to \code{row_names_gp} in Heatmap call.} +\item{grid_color}{color to use for heatmap grid. Default is NULL which "removes" grid by using NA color.} + \item{cluster_feature}{logical, whether to cluster and reorder feature axis. Default is TRUE.} \item{cluster_ident}{logical, whether to cluster and reorder identity axis. Default is TRUE.} diff --git a/man/Convert_Assay.Rd b/man/Convert_Assay.Rd new file mode 100644 index 0000000000..1a44c48496 --- /dev/null +++ b/man/Convert_Assay.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Object_Conversion.R +\name{Convert_Assay} +\alias{Convert_Assay} +\title{Convert between Seurat Assay types} +\usage{ +Convert_Assay(seurat_object, assay = NULL, convert_to) +} +\arguments{ +\item{seurat_object}{Seurat object name.} + +\item{assay}{name(s) of assays to convert. Default is NULL and will check with users +which assays they want to convert.} + +\item{convert_to}{value of what assay type to convert current assays to. +#' \itemize{ +\item Accepted values for V3/4 are: "Assay", "assay", "V3", or "v3". +\item Accepted values for V5 are: "Assay5", "assay5", "V5", or "v5". +}} +} +\description{ +Will convert assays within a Seurat object between "Assay" and "Assay5" types. +} +\examples{ +\dontrun{ +# Convert to V3/4 assay +obj <- Convert_Assay(seurat_object = obj, convert_to = "V3") + +# Convert to 5 assay +obj <- Convert_Assay(seurat_object = obj, convert_to = "V5") +} + +} +\concept{object_conversion} diff --git a/man/Extract_Modality.Rd b/man/Extract_Modality.Rd index 4d694fa4d6..cbc19ca292 100644 --- a/man/Extract_Modality.Rd +++ b/man/Extract_Modality.Rd @@ -23,4 +23,4 @@ new_multi_mat <- Extract_Modality(matrix_list = multi_mat) } } -\concept{helper_util} +\concept{read_merge_util} diff --git a/man/Extract_Sample_Meta.Rd b/man/Extract_Sample_Meta.Rd index b5f72af801..e336625f85 100644 --- a/man/Extract_Sample_Meta.Rd +++ b/man/Extract_Sample_Meta.Rd @@ -56,4 +56,4 @@ sample_meta3 <- Extract_Sample_Meta(object = pbmc_small, sample_name = "orig.ide include_all = TRUE) } -\concept{object_util} +\concept{get_set_util} diff --git a/man/Feature_Present.Rd b/man/Feature_Present.Rd new file mode 100644 index 0000000000..2579914b13 --- /dev/null +++ b/man/Feature_Present.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Utilities.R +\name{Feature_Present} +\alias{Feature_Present} +\title{Check if genes/features are present} +\usage{ +Feature_Present( + data, + features, + case_check = TRUE, + case_check_msg = TRUE, + print_msg = TRUE, + omit_warn = TRUE, + return_none = FALSE, + seurat_assay = NULL +) +} +\arguments{ +\item{data}{Name of input data. Currently only data of classes: Seurat, liger, data.frame, +dgCMatrix, dgTMatrix, tibble are accepted. Gene_IDs must be present in rownames of the data.} + +\item{features}{vector of features to check.} + +\item{case_check}{logical. Whether or not to check if features are found if the case is changed from the +input list (Sentence case to Upper and vice versa). Default is TRUE.} + +\item{case_check_msg}{logical. Whether to print message to console if alternate case features are found +in addition to inclusion in returned list. Default is TRUE.} + +\item{print_msg}{logical. Whether message should be printed if all features are found. Default is TRUE.} + +\item{omit_warn}{logical. Whether to print message about features that are not found in current object. +Default is TRUE.} + +\item{return_none}{logical. Whether list of found vs. bad features should still be returned if no +features are found. Default is FALSE.} + +\item{seurat_assay}{Name of assay to pull feature names from if \code{data} is Seurat Object. +Default is NULL which will check against features from all assays present.} +} +\value{ +A list of length 3 containing 1) found features, 2) not found features, 3) features found if +case was modified. +} +\description{ +Check if genes are present in object and return vector of found genes. Return warning messages for +genes not found. +} +\examples{ +\dontrun{ +features <- Feature_Present(data = obj_name, features = DEG_list, print_msg = TRUE, +case_check = TRUE) +found_features <- features[[1]] +} + +} +\concept{check_util} diff --git a/man/Fetch_Meta.Rd b/man/Fetch_Meta.Rd index f700200b89..27c5d04dd4 100644 --- a/man/Fetch_Meta.Rd +++ b/man/Fetch_Meta.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Utilities.R +% Please edit documentation in R/Object_Utilities.R \name{Fetch_Meta} \alias{Fetch_Meta} \alias{Fetch_Meta.Seurat} @@ -27,4 +27,5 @@ meta_data <- Fetch_Meta(object = pbmc_small) head(meta_data, 5) } -\concept{helper_util} +\concept{get_set_util} +\concept{liger_object_util} diff --git a/man/Gene_Present.Rd b/man/Gene_Present.Rd index 8fc00253a6..ea8cddc9cd 100644 --- a/man/Gene_Present.Rd +++ b/man/Gene_Present.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/Utilities.R \name{Gene_Present} \alias{Gene_Present} -\title{Check if genes/features are present} +\title{Check if genes/features are present \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#soft-deprecated}{\figure{lifecycle-soft-deprecated.svg}{options: alt='[Soft-deprecated]'}}}{\strong{[Soft-deprecated]}}} \usage{ Gene_Present( data, @@ -36,7 +36,7 @@ Default is TRUE.} features are found. Default is FALSE.} \item{seurat_assay}{Name of assay to pull feature names from if \code{data} is Seurat Object. -Defaults to \code{DefaultAssay(OBJ)} if NULL.} +Default is NULL which will check against features from all assays present.} } \value{ A list of length 3 containing 1) found features, 2) not found features, 3) features found if @@ -53,4 +53,4 @@ found_features <- features[[1]] } } -\concept{helper_util} +\concept{check_util} diff --git a/man/Iterate_DimPlot_bySample.Rd b/man/Iterate_DimPlot_bySample.Rd index 90f6edc77f..e1fe6d4c33 100644 --- a/man/Iterate_DimPlot_bySample.Rd +++ b/man/Iterate_DimPlot_bySample.Rd @@ -13,10 +13,12 @@ Iterate_DimPlot_bySample( single_pdf = FALSE, dpi = 600, color = "black", - legend = TRUE, + no_legend = TRUE, + title_prefix = NULL, reduction = NULL, dims = c(1, 2), pt.size = NULL, + raster = NULL, ... ) } @@ -37,7 +39,10 @@ Iterate_DimPlot_bySample( \item{color}{color scheme to use.} -\item{legend}{logical, whether or not to include plot legend, default is TRUE.} +\item{no_legend}{logical, whether or not to include plot legend, default is TRUE.} + +\item{title_prefix}{Value that should be used for plot title prefix if \code{no_legend = TRUE}. +If NULL the value of \code{meta_data_column} will be used. Default is NULL.} \item{reduction}{Dimensionality Reduction to use (default is object default).} @@ -45,6 +50,9 @@ Iterate_DimPlot_bySample( \item{pt.size}{Adjust point size for plotting.} +\item{raster}{Convert points to raster format. Default is NULL which will rasterize by default if +greater than 200,000 cells.} + \item{...}{Extra parameters passed to \code{\link[Seurat]{DimPlot}}.} } \value{ diff --git a/man/Iterate_FeaturePlot_scCustom.Rd b/man/Iterate_FeaturePlot_scCustom.Rd index 83fc7e621a..91056cff6e 100644 --- a/man/Iterate_FeaturePlot_scCustom.Rd +++ b/man/Iterate_FeaturePlot_scCustom.Rd @@ -18,6 +18,9 @@ Iterate_FeaturePlot_scCustom( file_name = NULL, file_type = NULL, single_pdf = FALSE, + features_per_page = 1, + num_columns = NULL, + landscape = TRUE, dpi = 600, pt.size = NULL, reduction = NULL, @@ -53,7 +56,15 @@ will be incorporated into plot title if \code{single_pdf = TRUE} or into file na \item{file_type}{File type to save output as. Must be one of following: ".pdf", ".png", ".tiff", ".jpeg", or ".svg".} -\item{single_pdf}{saves all plots to single PDF file (default = FALSE). `file_type`` must be .pdf.} +\item{single_pdf}{saves all plots to single PDF file (default = FALSE).} + +\item{features_per_page}{numeric, number of features to plot on single page if \code{single_pdf = TRUE}. Default is 1.} + +\item{num_columns}{Number of columns in plot layout (only applicable if \code{single_pdf = TRUE} AND +\code{features_per_page} > 1).} + +\item{landscape}{logical, when plotting multiple features per page in single PDF whether to use landscape or portrait +page dimensions (default is TRUE).} \item{dpi}{dpi for image saving.} diff --git a/man/LIGER_Features.Rd b/man/LIGER_Features.Rd new file mode 100644 index 0000000000..3c3108b5a8 --- /dev/null +++ b/man/LIGER_Features.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/LIGER_Utilities.R +\name{LIGER_Features} +\alias{LIGER_Features} +\title{Extract Features from LIGER Object} +\usage{ +LIGER_Features(liger_object, by_dataset = FALSE) +} +\arguments{ +\item{liger_object}{LIGER object name.} + +\item{by_dataset}{logical, whether to return list with vector of features for each dataset in +LIGER object or to return single vector of unique features across all datasets in object +(default is FALSE; return vector of unique features)} +} +\value{ +vector or list depending on \code{by_dataset} parameter +} +\description{ +Extract all unique features from LIGER object +} +\examples{ +\dontrun{ +# return single vector of all unique features +all_features <- LIGER_Features(liger_object = object, by_dataset = FALSE) + +# return list of vectors containing features from each individual dataset in object +dataset_features <- LIGER_Features(liger_object = object, by_dataset = TRUE) +} + +} +\concept{liger_object_util} diff --git a/man/Liger_to_Seurat.Rd b/man/Liger_to_Seurat.Rd index 70c3ea294f..a197291eb8 100644 --- a/man/Liger_to_Seurat.Rd +++ b/man/Liger_to_Seurat.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/LIGER_Utilities.R +% Please edit documentation in R/Object_Conversion.R \name{Liger_to_Seurat} \alias{Liger_to_Seurat} -\title{Create a Seurat object containing the data from a liger object} +\title{Create a Seurat object containing the data from a liger object \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#soft-deprecated}{\figure{lifecycle-soft-deprecated.svg}{options: alt='[Soft-deprecated]'}}}{\strong{[Soft-deprecated]}}} \usage{ Liger_to_Seurat( liger_object, @@ -12,7 +12,11 @@ Liger_to_Seurat( by.dataset = FALSE, keep_meta = TRUE, reduction_label = "UMAP", - seurat_assay = "RNA" + seurat_assay = "RNA", + assay_type = NULL, + add_barcode_names = FALSE, + barcode_prefix = TRUE, + barcode_cell_id_delimiter = "_" ) } \arguments{ @@ -35,6 +39,19 @@ to new Seurat Object. Default is TRUE.} or name to Seurat object instead of defaulting to "tSNE".} \item{seurat_assay}{Name to set for assay in Seurat Object. Default is "RNA".} + +\item{assay_type}{what type of Seurat assay to create in new object (Assay vs Assay5). +Default is NULL which will default to the current user settings. +See \code{\link{Convert_Assay}} parameter \code{convert_to} for acceptable values.} + +\item{add_barcode_names}{logical, whether to add dataset names to the cell barcodes when +creating Seurat object, default is FALSE.} + +\item{barcode_prefix}{logical, if \code{add_barcode_names = TRUE} should the names be added as +prefix to current cell barcodes/names or a suffix (default is TRUE; prefix).} + +\item{barcode_cell_id_delimiter}{The delimiter to use when adding dataset id to barcode +prefix/suffix. Default is "_".} } \value{ Seurat object with raw.data, scale.data, reduction_label, iNMF, and ident slots set. @@ -57,4 +74,4 @@ Original function is part of LIGER package \url{https://github.com/welch-lab/lig Function was slightly modified for use in scCustomize with keep.meta parameter. Also posted as PR to liger GitHub. } -\concept{object_util} +\concept{object_conversion} diff --git a/man/Merge_Seurat_List.Rd b/man/Merge_Seurat_List.Rd index d562adb37e..605080ec5b 100644 --- a/man/Merge_Seurat_List.Rd +++ b/man/Merge_Seurat_List.Rd @@ -36,4 +36,4 @@ merged_object <- Merge_Seurat_List(list_seurat = object_list) } } -\concept{object_util} +\concept{misc_util} diff --git a/man/Merge_Sparse_Data_All.Rd b/man/Merge_Sparse_Data_All.Rd index 361a132669..965b71f6f0 100644 --- a/man/Merge_Sparse_Data_All.Rd +++ b/man/Merge_Sparse_Data_All.Rd @@ -40,4 +40,4 @@ Original function is part of LIGER package as non-exported function \url{https://github.com/welch-lab/liger/blob/master/R/utilities.R} (License: GPL-3). Function was modified for use in scCustomize (add progress bar, prefix vs. suffix, and delimiter options). } -\concept{helper_util} +\concept{read_merge_util} diff --git a/man/Merge_Sparse_Multimodal_All.Rd b/man/Merge_Sparse_Multimodal_All.Rd index be94f991ee..cc7f0721b3 100644 --- a/man/Merge_Sparse_Multimodal_All.Rd +++ b/man/Merge_Sparse_Multimodal_All.Rd @@ -35,4 +35,4 @@ prefix = TRUE, cell_id_delimiter = "_") } } -\concept{helper_util} +\concept{read_merge_util} diff --git a/man/Meta_Numeric.Rd b/man/Meta_Numeric.Rd index 044ce69486..aae517547f 100644 --- a/man/Meta_Numeric.Rd +++ b/man/Meta_Numeric.Rd @@ -22,4 +22,4 @@ numeric_meta_columns <- Meta_Numeric(data = meta_data) } } -\concept{helper_util} +\concept{check_util} diff --git a/man/Meta_Present.Rd b/man/Meta_Present.Rd index cdd5433b2d..a07bba23b0 100644 --- a/man/Meta_Present.Rd +++ b/man/Meta_Present.Rd @@ -5,7 +5,8 @@ \title{Check if meta data are present} \usage{ Meta_Present( - seurat_object, + object, + seurat_object = deprecated(), meta_col_names, print_msg = TRUE, omit_warn = TRUE, @@ -13,7 +14,9 @@ Meta_Present( ) } \arguments{ -\item{seurat_object}{object name.} +\item{object}{Seurat or Liger object name.} + +\item{seurat_object}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} deprecated. Please use \code{object} instead.} \item{meta_col_names}{vector of column names to check.} @@ -33,8 +36,8 @@ Return warning messages for meta data columns not found. } \examples{ \dontrun{ -meta_variables <- Meta_Present(seurat_object = obj_name, gene_list = DEG_list, print_msg = TRUE) +meta_variables <- Meta_Present(object = obj_name, meta_col_names = "percent_mito", print_msg = TRUE) } } -\concept{helper_util} +\concept{check_util} diff --git a/man/Meta_Present_LIGER.Rd b/man/Meta_Present_LIGER.Rd deleted file mode 100644 index 503108bd27..0000000000 --- a/man/Meta_Present_LIGER.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/LIGER_Utilities.R -\name{Meta_Present_LIGER} -\alias{Meta_Present_LIGER} -\title{Check if meta data are present} -\usage{ -Meta_Present_LIGER(liger_object, meta_col_names, print_msg = TRUE) -} -\arguments{ -\item{liger_object}{object name.} - -\item{meta_col_names}{vector of column names to check.} - -\item{print_msg}{logical. Whether message should be printed if all features are found. Default is TRUE.} -} -\value{ -vector of meta data columns that are present -} -\description{ -Check if meta data columns are present in object and return vector of found columns Return warning -messages for meta data columns not found. -} -\examples{ -\dontrun{ -meta_variables <- Meta_Present_LIGER(liger_object = obj, gene_list = DEG_list, print_msg = TRUE) -} - -} -\concept{liger_object_util} diff --git a/man/Meta_Remove_Seurat.Rd b/man/Meta_Remove_Seurat.Rd index a3e070e95e..174bc8f037 100644 --- a/man/Meta_Remove_Seurat.Rd +++ b/man/Meta_Remove_Seurat.Rd @@ -34,4 +34,4 @@ object <- AddMetaData(object = object, metadata = new_meta) } } -\concept{object_util} +\concept{get_set_util} diff --git a/man/QC_Plots_Complexity.Rd b/man/QC_Plots_Complexity.Rd index e0e1ac8917..a504a8a271 100644 --- a/man/QC_Plots_Complexity.Rd +++ b/man/QC_Plots_Complexity.Rd @@ -77,7 +77,7 @@ Custom VlnPlot for initial QC checks including lines for thresholding } \examples{ library(Seurat) -pbmc_small <- Add_Cell_Complexity_Seurat(pbmc_small) +pbmc_small <- Add_Cell_Complexity(pbmc_small) QC_Plots_Complexity(seurat_object = pbmc_small) diff --git a/man/Reduction_Loading_Present.Rd b/man/Reduction_Loading_Present.Rd index 870d2ff2ab..df976fa035 100644 --- a/man/Reduction_Loading_Present.Rd +++ b/man/Reduction_Loading_Present.Rd @@ -39,4 +39,4 @@ found_features <- features[[1]] } } -\concept{helper_util} +\concept{check_util} diff --git a/man/Rename_Clusters.Rd b/man/Rename_Clusters.Rd index b9e6cbde76..e7c1c840da 100644 --- a/man/Rename_Clusters.Rd +++ b/man/Rename_Clusters.Rd @@ -30,4 +30,4 @@ meta_col_name = "Round01_Res0.6_Idents") } } -\concept{object_util} +\concept{marker_annotation_util} diff --git a/man/Replace_Suffix.Rd b/man/Replace_Suffix.Rd index 3c8f02e7ad..115c7ac23c 100644 --- a/man/Replace_Suffix.Rd +++ b/man/Replace_Suffix.Rd @@ -28,4 +28,4 @@ dge_matrix <- Replace_Suffix(data = dge_matrix, current_suffix = "-1", new_suffi } } -\concept{helper_util} +\concept{barcode_util} diff --git a/man/Split_FeatureScatter.Rd b/man/Split_FeatureScatter.Rd deleted file mode 100644 index 3d9d4f98ef..0000000000 --- a/man/Split_FeatureScatter.Rd +++ /dev/null @@ -1,85 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Seurat_Plotting.R -\name{Split_FeatureScatter} -\alias{Split_FeatureScatter} -\title{Split FeatureScatter} -\usage{ -Split_FeatureScatter( - seurat_object, - feature1 = NULL, - feature2 = NULL, - split.by = NULL, - group.by = NULL, - colors_use = NULL, - pt.size = NULL, - aspect_ratio = NULL, - title_size = 15, - num_columns = NULL, - raster = NULL, - raster.dpi = c(512, 512), - ggplot_default_colors = FALSE, - color_seed = 123, - ... -) -} -\arguments{ -\item{seurat_object}{Seurat object name.} - -\item{feature1}{First feature to plot.} - -\item{feature2}{Second feature to plot.} - -\item{split.by}{Feature to split plots by (i.e. "orig.ident").} - -\item{group.by}{Name of one or more metadata columns to group (color) cells by (for example, orig.ident). -Use 'ident' to group.by active.ident class.} - -\item{colors_use}{color for the points on plot.} - -\item{pt.size}{Adjust point size for plotting.} - -\item{aspect_ratio}{Control the aspect ratio (y:x axes ratio length). Must be numeric value; -Default is NULL.} - -\item{title_size}{size for plot title labels.} - -\item{num_columns}{number of columns in final layout plot.} - -\item{raster}{Convert points to raster format. Default is NULL which will rasterize by default if -greater than 100,000 cells.} - -\item{raster.dpi}{Pixel resolution for rasterized plots, passed to geom_scattermore(). -Default is c(512, 512).} - -\item{ggplot_default_colors}{logical. If \code{colors_use = NULL}, Whether or not to return plot using -default ggplot2 "hue" palette instead of default "polychrome" or "varibow" palettes.} - -\item{color_seed}{random seed for the "varibow" palette shuffle if \code{colors_use = NULL} and number of -groups plotted is greater than 36. Default = 123.} - -\item{...}{Extra parameters passed to \code{\link[Seurat]{FeatureScatter}}.} -} -\value{ -A ggplot object -} -\description{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} -Create FeatureScatter using split.by -} -\examples{ -\dontrun{ -# Function now DEPRECATED. -library(Seurat) -pbmc_small$sample_id <- sample(c("sample1", "sample2"), size = ncol(pbmc_small), replace = TRUE) - -# OLD Code -Split_FeatureScatter(seurat_object = pbmc_small, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", -split.by = "sample_id") - -# NEW Code -FeatureScatter_scCustom(seurat_object = pbmc_small, feature1 = "nCount_RNA", -feature2 = "nFeature_RNA", split.by = "sample_id") -} - -} -\concept{seurat_plotting} diff --git a/man/Split_Layers.Rd b/man/Split_Layers.Rd new file mode 100644 index 0000000000..8c3cfa72e9 --- /dev/null +++ b/man/Split_Layers.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Object_Conversion.R +\name{Split_Layers} +\alias{Split_Layers} +\title{Split Seurat object into layers} +\usage{ +Split_Layers(seurat_object, assay = "RNA", split.by) +} +\arguments{ +\item{seurat_object}{Seurat object name.} + +\item{assay}{name(s) of assays to convert. Defaults to current active assay.} + +\item{split.by}{Variable in meta.data to use for splitting layers.} +} +\description{ +Split Assay5 of Seurat object into layers by variable in meta.data +} +\examples{ +\dontrun{ +# Split object by "treatment" +obj <- Split_Layers(object = obj, assay = "RNA", split.by = "treatment") +} + +} +\concept{object_conversion} diff --git a/man/Split_Vector.Rd b/man/Split_Vector.Rd new file mode 100644 index 0000000000..293a1c9188 --- /dev/null +++ b/man/Split_Vector.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Utilities.R +\name{Split_Vector} +\alias{Split_Vector} +\title{Split vector into list} +\usage{ +Split_Vector(x, chunk_size = 100, verbose = FALSE) +} +\arguments{ +\item{x}{vector to split} + +\item{chunk_size}{size of chunks for vector to be split into, default is 100.} + +\item{verbose}{logical, print details of vector and split, default is FALSE.} +} +\value{ +list with vector of X length +} +\description{ +Splits vector into chunks of x sizes +} +\examples{ +vector <- c("gene1", "gene2", "gene3", "gene4", "gene5", "gene6") + +vector_list <- Split_Vector(x = vector, chunk_size = 3) + +} +\references{ +Base code from stackoverflow post: +\url{https://stackoverflow.com/a/3321659/15568251} +} +\concept{misc_util} diff --git a/man/Store_Misc_Info_Seurat.Rd b/man/Store_Misc_Info_Seurat.Rd index 5344a36095..840baf932b 100644 --- a/man/Store_Misc_Info_Seurat.Rd +++ b/man/Store_Misc_Info_Seurat.Rd @@ -41,4 +41,4 @@ pbmc_small <- Store_Misc_Info_Seurat(seurat_object = pbmc_small, data_to_store = data_name = "rd1_colors") } -\concept{object_util} +\concept{get_set_util} diff --git a/man/Store_Palette_Seurat.Rd b/man/Store_Palette_Seurat.Rd index 59f5eef282..8e3718ba5e 100644 --- a/man/Store_Palette_Seurat.Rd +++ b/man/Store_Palette_Seurat.Rd @@ -40,6 +40,5 @@ clu_pal <- c("red", "green", "blue") pbmc_small <- Store_Misc_Info_Seurat(seurat_object = pbmc_small, data_to_store = clu_pal, data_name = "rd1_colors") - } -\concept{object_util} +\concept{get_set_util} diff --git a/man/Updated_HGNC_Symbols.Rd b/man/Updated_HGNC_Symbols.Rd new file mode 100644 index 0000000000..85297d8d93 --- /dev/null +++ b/man/Updated_HGNC_Symbols.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Utilities.R +\name{Updated_HGNC_Symbols} +\alias{Updated_HGNC_Symbols} +\title{Update HGNC Gene Symbols} +\usage{ +Updated_HGNC_Symbols( + input_data, + update_symbol_data = NULL, + case_check_as_warn = FALSE, + verbose = TRUE +) +} +\arguments{ +\item{input_data}{Data source containing gene names. Accepted formats are: +\itemize{ +\item \code{charcter vector} +\item \code{Seurat Objects} +\item \code{data.frame}: genes as rownames +\item \code{dgCMatrix/dgTMatrix}: genes as rownames +\item \code{tibble}: genes in first column +}} + +\item{update_symbol_data}{logical, whether to update cached HGNC data, default is NULL. +If \code{NULL} BiocFileCache will check and prompt for update if cache is stale. +If \code{FALSE} the BiocFileCache stale check will be skipped and current cache will be used. +If \code{TRUE} the BiocFileCache stale check will be skipped and HGNC data will be downloaded.} + +\item{case_check_as_warn}{logical, whether case checking of features should cause abort or +only warn, default is FALSE (abort). Set to TRUE if atypical names (i.e. old LOC naming) are +present in input_data.} + +\item{verbose}{logical, whether to print results detailing numbers of symbols, found, updated, +and not found; default is TRUE.} +} +\value{ +data.frame containing columns: input_features, Approved_Symbol (already approved; output unchanged), Not_Found_Symbol (symbol not in HGNC; output unchanged), Updated_Symbol (new symbol from HGNC; output updated). +} +\description{ +Update human gene symbols using data from HGNC. This function will store cached data in package directory using (BiocFileCache). Use of this function requires internet connection on first use (or if setting \code{update_symbol_data = TRUE}). Subsequent use does not require connection and will pull from cached data. +} +\examples{ +\dontrun{ +new_names <- Updated_HGNC_Symbols(input_data = Seurat_Object) +} + +} +\concept{misc_util} diff --git a/man/as.LIGER.Rd b/man/as.LIGER.Rd new file mode 100644 index 0000000000..763518bca1 --- /dev/null +++ b/man/as.LIGER.Rd @@ -0,0 +1,92 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Generics.R, R/Object_Conversion.R +\name{as.LIGER} +\alias{as.LIGER} +\alias{as.LIGER.Seurat} +\alias{as.LIGER.list} +\title{Convert objects to LIGER objects} +\usage{ +as.LIGER(x, ...) + +\method{as.LIGER}{Seurat}( + x, + group.by = "orig.ident", + layers_name = NULL, + assay = "RNA", + remove_missing = FALSE, + renormalize = TRUE, + use_seurat_var_genes = FALSE, + use_seurat_dimreduc = FALSE, + reduction = NULL, + keep_meta = TRUE, + verbose = TRUE, + ... +) + +\method{as.LIGER}{list}( + x, + group.by = "orig.ident", + dataset_names = NULL, + assay = "RNA", + remove_missing = FALSE, + renormalize = TRUE, + use_seurat_var_genes = FALSE, + var_genes_method = "intersect", + keep_meta = TRUE, + verbose = TRUE, + ... +) +} +\arguments{ +\item{x}{An object to convert to class \code{liger}} + +\item{...}{Arguments passed to other methods} + +\item{group.by}{Variable in meta data which contains variable to split data by, (default is "orig.ident").} + +\item{layers_name}{name of meta.data column used to split layers if setting \code{group.by = "layers"}.} + +\item{assay}{Assay containing raw data to use, (default is "RNA").} + +\item{remove_missing}{logical, whether to remove missing genes with no counts when converting to +LIGER object (default is FALSE).} + +\item{renormalize}{logical, whether to perform normalization after LIGER object creation (default is TRUE).} + +\item{use_seurat_var_genes}{logical, whether to transfer variable features from Seurat object to +new LIGER object (default is FALSE).} + +\item{use_seurat_dimreduc}{logical, whether to transfer dimensionality reduction coordinates from +Seurat to new LIGER object (default is FALSE).} + +\item{reduction}{Name of Seurat reduction to transfer if \code{use_seurat_dimreduc = TRUE}.} + +\item{keep_meta}{logical, whether to transfer columns in Seurat meta.data slot to LIGER cell.data +slot (default is TRUE).} + +\item{verbose}{logical, whether to print status messages during object conversion (default is TRUE).} + +\item{dataset_names}{optional, vector of names to use for naming datasets.} + +\item{var_genes_method}{how variable genes should be selected from Seurat objects if \code{use_seurat_var_genes = TRUE}. Can be either "intersect" or "union", (default is "intersect").} +} +\value{ +a liger object generated from \code{x} +} +\description{ +Convert objects (Seurat & lists of Seurat Objects) to anndata objects +} +\examples{ +\dontrun{ +liger_object <- as.LIGER(x = seurat_object) +} + +\dontrun{ +liger_object <- as.LIGER(x = seurat_object_list) +} + +} +\references{ +modified and enhanced version of \code{rliger::seuratToLiger}. +} +\concept{object_conversion} diff --git a/man/as.Seurat.Rd b/man/as.Seurat.Rd new file mode 100644 index 0000000000..fd6b918a38 --- /dev/null +++ b/man/as.Seurat.Rd @@ -0,0 +1,83 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Object_Conversion.R +\name{as.Seurat.liger} +\alias{as.Seurat.liger} +\title{Convert objects to \code{Seurat} objects} +\usage{ +\method{as.Seurat}{liger}( + x, + nms = names(x@H), + renormalize = TRUE, + use.liger.genes = TRUE, + by.dataset = FALSE, + keep_meta = TRUE, + reduction_label = "UMAP", + seurat_assay = "RNA", + assay_type = NULL, + add_barcode_names = FALSE, + barcode_prefix = TRUE, + barcode_cell_id_delimiter = "_", + ... +) +} +\arguments{ +\item{x}{\code{liger} object.} + +\item{nms}{By default, labels cell names with dataset of origin (this is to account for cells in +different datasets which may have same name). Other names can be passed here as vector, must have +same length as the number of datasets. (default names(H)).} + +\item{renormalize}{Whether to log-normalize raw data using Seurat defaults (default TRUE).} + +\item{use.liger.genes}{Whether to carry over variable genes (default TRUE).} + +\item{by.dataset}{Include dataset of origin in cluster identity in Seurat object (default FALSE).} + +\item{keep_meta}{logical. Whether to transfer additional metadata (nGene/nUMI/dataset already transferred) +to new Seurat Object. Default is TRUE.} + +\item{reduction_label}{Name of dimensionality reduction technique used. Enables accurate transfer +or name to Seurat object instead of defaulting to "tSNE".} + +\item{seurat_assay}{Name to set for assay in Seurat Object. Default is "RNA".} + +\item{assay_type}{what type of Seurat assay to create in new object (Assay vs Assay5). +Default is NULL which will default to the current user settings. +See \code{\link{Convert_Assay}} parameter \code{convert_to} for acceptable values.} + +\item{add_barcode_names}{logical, whether to add dataset names to the cell barcodes when +creating Seurat object, default is FALSE.} + +\item{barcode_prefix}{logical, if \code{add_barcode_names = TRUE} should the names be added as +prefix to current cell barcodes/names or a suffix (default is TRUE; prefix).} + +\item{barcode_cell_id_delimiter}{The delimiter to use when adding dataset id to barcode +prefix/suffix. Default is "_".} + +\item{...}{unused.} +} +\value{ +Seurat object with raw.data, scale.data, reduction_label, iNMF, and ident slots set. + +Seurat object. +} +\description{ +Merges raw.data and scale.data of object, and creates Seurat object with these values along with slots +containing dimensionality reduction coordinates, iNMF factorization, and cluster assignments. +Supports Seurat V3/4 and V4. +} +\details{ +Stores original dataset identity by default in new object metadata if dataset names are passed +in nms. iNMF factorization is stored in dim.reduction object with key "iNMF". +} +\examples{ +\dontrun{ +seurat_object <- as.Seurat(x = liger_object) +} + +} +\references{ +Original function is part of LIGER package \url{https://github.com/welch-lab/liger} (Licence: GPL-3). +Function was modified for use in scCustomize with additional parameters/functionality. +} +\concept{object_conversion} diff --git a/man/as.anndata.Rd b/man/as.anndata.Rd new file mode 100644 index 0000000000..b971a6939b --- /dev/null +++ b/man/as.anndata.Rd @@ -0,0 +1,92 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Generics.R, R/Object_Conversion.R +\name{as.anndata} +\alias{as.anndata} +\alias{as.anndata.Seurat} +\alias{as.anndata.liger} +\title{Convert objects to anndata objects} +\usage{ +as.anndata(x, ...) + +\method{as.anndata}{Seurat}( + x, + file_path, + file_name, + assay = "RNA", + main_layer = "data", + other_layers = "counts", + transer_dimreduc = TRUE, + verbose = TRUE, + ... +) + +\method{as.anndata}{liger}( + x, + file_path, + file_name, + transfer_norm.data = FALSE, + reduction_label = NULL, + add_barcode_names = FALSE, + barcode_prefix = TRUE, + barcode_cell_id_delimiter = "_", + verbose = TRUE, + ... +) +} +\arguments{ +\item{x}{Seurat or LIGER object} + +\item{...}{Arguments passed to other methods} + +\item{file_path}{directory file path and/or file name prefix. Defaults to current wd.} + +\item{file_name}{file name.} + +\item{assay}{Assay containing data to use, (default is "RNA").} + +\item{main_layer}{the layer of data to become default layer in anndata object (default is "data").} + +\item{other_layers}{other data layers to transfer to anndata object (default is "counts").} + +\item{transer_dimreduc}{logical, whether to transfer dimensionality reduction coordinates from +Seurat to anndata object (default is TRUE).} + +\item{verbose}{logical, whether to print status messages during object conversion (default is TRUE).} + +\item{transfer_norm.data}{logical, whether to transfer the norm.data in addition to +raw.data, default is FALSE.} + +\item{reduction_label}{What to label the visualization dimensionality reduction. +LIGER does not store name of technique and therefore needs to be set manually.} + +\item{add_barcode_names}{logical, whether to add dataset names to the cell barcodes when +merging object data, default is FALSE.} + +\item{barcode_prefix}{logical, if \code{add_barcode_names = TRUE} should the names be added as +prefix to current cell barcodes/names or a suffix (default is TRUE; prefix).} + +\item{barcode_cell_id_delimiter}{The delimiter to use when adding dataset id to barcode +prefix/suffix. Default is "_".} +} +\value{ +an anndata object generated from \code{x}, saved at path provided. +} +\description{ +Convert objects (Seurat & LIGER) to anndata objects +} +\examples{ +\dontrun{ +as.anndata(x = seurat_object, file_path = "/folder_name", file_name = "anndata_converted.h5ad") +} + +\dontrun{ +as.anndata(x = liger_object, file_path = "/folder_name", file_name = "anndata_converted.h5ad") +} + +} +\references{ +Seurat version modified and enhanced version of \code{sceasy::seurat2anndata} (sceasy package: \url{https://github.com/cellgeni/sceasy}; License: GPL-3. Function has additional checks and supports Seurat V3 and V5 object structure. + +LIGER version inspired by \code{sceasy::seurat2anndata} modified and updated to apply to LIGER objects (sceasy package: \url{https://github.com/cellgeni/sceasy}; License: GPL-3. +} +\concept{object_conversion} diff --git a/man/deprecated.Rd b/man/deprecated.Rd new file mode 100644 index 0000000000..810a0a0d00 --- /dev/null +++ b/man/deprecated.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Deprecated.R +\name{Split_FeatureScatter} +\alias{Split_FeatureScatter} +\alias{deprecated} +\alias{Add_Mito_Ribo_Seurat} +\alias{Add_Mito_Ribo_LIGER} +\alias{Add_Cell_Complexity_Seurat} +\alias{Add_Cell_Complexity_LIGER} +\alias{Meta_Present_LIGER} +\title{Deprecated functions \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}} +\usage{ +Split_FeatureScatter(...) + +Add_Mito_Ribo_Seurat(...) + +Add_Mito_Ribo_LIGER(...) + +Add_Cell_Complexity_Seurat(...) + +Add_Cell_Complexity_LIGER(...) + +Meta_Present_LIGER(...) +} +\description{ +Use \code{\link[=FeatureScatter_scCustom]{FeatureScatter_scCustom()}} instead of \code{Split_FeatureScatter()}. + +Use \code{\link[=Add_Mito_Ribo]{Add_Mito_Ribo()}} instead of \code{Add_Mito_Ribo_Seurat()}. + +Use \code{\link[=Add_Mito_Ribo]{Add_Mito_Ribo()}} instead of \code{Add_Mito_Ribo_LIGER()}. + +Use \code{\link[=Add_Cell_Complexity]{Add_Cell_Complexity()}} instead of \code{Add_Cell_Complexity_Seurat()}. + +Use \code{\link[=Add_Cell_Complexity]{Add_Cell_Complexity()}} instead of \code{Add_Cell_Complexity_LIGER()}. + +Use \code{\link[=Meta_Present]{Meta_Present()}} instead of \code{Meta_Present_LIGER()}. +} +\keyword{internal} diff --git a/man/plotFactors_scCustom.Rd b/man/plotFactors_scCustom.Rd index d24979ce5c..8e8ff5260e 100644 --- a/man/plotFactors_scCustom.Rd +++ b/man/plotFactors_scCustom.Rd @@ -12,6 +12,7 @@ plotFactors_scCustom( pt.size_factors = 1, pt.size_dimreduc = 1, reduction_label = "UMAP", + plot_legend = TRUE, raster = TRUE, raster.dpi = c(512, 512), order = FALSE, @@ -44,6 +45,9 @@ coordinates (tSNE/UMAP). Default is c('lemonchiffon', 'red'),} \item{reduction_label}{What to label the x and y axes of resulting plots. LIGER does not store name of technique and therefore needs to be set manually. Default is "UMAP".} +\item{plot_legend}{logical, whether to plot the legend on factor loading plots, default is TRUE. +Helpful if number of datasets is large to avoid crowding the plot with legend.} + \item{raster}{Convert points to raster format. Default is NULL which will rasterize by default if greater than 200,000 cells.} diff --git a/man/reexports.Rd b/man/reexports.Rd new file mode 100644 index 0000000000..dbdff0fcba --- /dev/null +++ b/man/reexports.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Reexports.R +\docType{import} +\name{reexports} +\alias{reexports} +\alias{as.Seurat} +\title{Objects exported from other packages} +\note{ +See \code{\link{as.Seurat.liger}} for scCustomize extension of this generic to converting Liger objects. +} +\keyword{internal} +\description{ +These objects are imported from other packages. Follow the links +below to see their documentation. + +\describe{ + \item{SeuratObject}{\code{\link[SeuratObject]{as.Seurat}}} +}} + diff --git a/man/scCustomize_Palette.Rd b/man/scCustomize_Palette.Rd index a6e1172fb6..4bb2a00911 100644 --- a/man/scCustomize_Palette.Rd +++ b/man/scCustomize_Palette.Rd @@ -14,7 +14,6 @@ scCustomize_Palette( \item{num_groups}{number of groups to be plotted. If \code{ggplot_default_colors = FALSE} then by default: \itemize{ \item If number of levels plotted equal to 2 then colors will be \code{NavyAndOrange()}. -\item If number of levels plotted greater than 2 but less than or equal to 8 it will use \code{ColorBlind_Pal()}. \item If number of levels plotted greater than 2 but less than or equal to 36 it will use "polychrome" from \code{DiscretePalette_scCustomize()}. \item If greater than 36 will use "varibow" with shuffle = TRUE from \code{DiscretePalette_scCustomize}. }} diff --git a/vignettes/articles/Gene_Expression_Plotting.Rmd b/vignettes/articles/Gene_Expression_Plotting.Rmd index ca9c0e4dba..eac90845d5 100644 --- a/vignettes/articles/Gene_Expression_Plotting.Rmd +++ b/vignettes/articles/Gene_Expression_Plotting.Rmd @@ -524,6 +524,50 @@ plots <- Clustered_DotPlot(seurat_object = pbmc, features = top_markers, k = 8) plots[[2]] ``` +#### Clustered_DotPlot() split by additional grouping variable +`Clustered_DotPlot` can now plot with additional grouping variable provided to `split.by` parameter. + +```{r eval=FALSE} +Clustered_DotPlot(seurat_object = marsh_mouse_micro, features = c("Fos", "Jun", "Egr1", "Aif1", "P2ry12", "Tmem119"), split.by = "Transcription_Method") +``` + +```{r include=FALSE, fig.align='center', fig.height=10, fig.width=10} +plot_split <- Clustered_DotPlot(seurat_object = marsh_mouse_micro, features = c("Fos", "Jun", "Egr1", "Aif1", "P2ry12", "Tmem119"), split.by = "Transcription_Method") +``` + +```{r echo=FALSE, fig.align='center', fig.height=10, fig.width=10} +plot_split[[2]] +``` + +However, you'll notice that the labels on the bottom get cutoff on the left-hand side of the plot. There are two solutions to this. + +Keep bottom labels rotated but add extra white-space padding on left +```{r eval=FALSE} +Clustered_DotPlot(seurat_object = marsh_mouse_micro, features = c("Fos", "Jun", "Egr1", "Aif1", "P2ry12", "Tmem119"), split.by = "Transcription_Method", plot_padding = TRUE) +``` + +```{r include=FALSE, fig.align='center', fig.height=10, fig.width=10} +plot_split <- Clustered_DotPlot(seurat_object = marsh_mouse_micro, features = c("Fos", "Jun", "Egr1", "Aif1", "P2ry12", "Tmem119"), split.by = "Transcription_Method", plot_padding = TRUE) +``` + +```{r echo=FALSE, fig.align='center', fig.height=10, fig.width=10} +plot_split[[2]] +``` + +Or simply remove the bottom label text rotation +```{r eval=FALSE} +Clustered_DotPlot(seurat_object = marsh_mouse_micro, features = c("Fos", "Jun", "Egr1", "Aif1", "P2ry12", "Tmem119"), split.by = "Transcription_Method", x_lab_rotate = 90) +``` + +```{r include=FALSE, fig.align='center', fig.height=10, fig.width=10} +plot_split <- Clustered_DotPlot(seurat_object = marsh_mouse_micro, features = c("Fos", "Jun", "Egr1", "Aif1", "P2ry12", "Tmem119"), split.by = "Transcription_Method", x_lab_rotate = 90) +``` + +```{r echo=FALSE, fig.align='center', fig.height=10, fig.width=10} +plot_split[[2]] +``` + + #### Clustered_DotPlot() k-means Clustering Optional Parameters **Determining Optimal k Value** diff --git a/vignettes/articles/Helpers_and_Utilities.Rmd b/vignettes/articles/Helpers_and_Utilities.Rmd index 19929e5291..0988eb7ead 100644 --- a/vignettes/articles/Helpers_and_Utilities.Rmd +++ b/vignettes/articles/Helpers_and_Utilities.Rmd @@ -68,22 +68,22 @@ pbmc$batch <- sample(c("Batch1", "Batch2"), size = ncol(pbmc), replace = TRUE) ## Add Mitochondrial & Ribosomal Percentages As discussed in [QC Plotting vignette](https://samuel-marsh.github.io/scCustomize/articles/QC_Plots.html) one the first steps after creating object if often to calculate and add mitochondrial and ribosomal count percentages per cell/nucleus. -### Seurat Objects: `Add_Mito_Ribo_Seurat()` +### `Add_Mito_Ribo()` scCustomize contains easy wrapper function to automatically add both Mitochondrial and Ribosomal percentages to meta.data slot. If you are using mouse, human, marmoset, zebrafish, rat, drosophila, or rhesus macaque data all you need to do is specify the `species` parameter. ```{r eval=FALSE} # These defaults can be run just by providing accepted species name -pbmc <- Add_Mito_Ribo_Seurat(seurat_object = pbmc, species = "human") +pbmc <- Add_Mito_Ribo(object = pbmc, species = "human") ``` To view list of accepted values for default species names simply set `list_species_names = TRUE`. ```{r eval=FALSE} -Add_Mito_Ribo_Seurat(list_species_names = TRUE) +Add_Mito_Ribo(list_species_names = TRUE) ``` ```{r echo=FALSE} -accepted_names <- Add_Mito_Ribo_Seurat(list_species_names = TRUE) +accepted_names <- Add_Mito_Ribo(object = pbmc, list_species_names = TRUE) accepted_names %>% kableExtra::kbl(row.names = TRUE) %>% @@ -95,22 +95,22 @@ However custom prefixes can be used for non-human/mouse/marmoset/rat/zebrafish/d *NOTE: If desired please submit issue on GitHub for additional default species. Please include regex pattern or list of genes for both mitochondrial and ribosomal genes and I will add additional built-in defaults to the function.* ```{r eval=FALSE} # Using gene name patterns -pbmc <- Add_Mito_Ribo_Seurat(seurat_object = pbmc, species = "other", mito_pattern = "regexp_pattern", ribo_pattern = "regexp_pattern") +pbmc <- Add_Mito_Ribo(object = pbmc, species = "other", mito_pattern = "regexp_pattern", ribo_pattern = "regexp_pattern") # Using feature name lists mito_gene_list <- c("gene1", "gene2", "etc") ribo_gene_list <- c("gene1", "gene2", "etc") -pbmc <- Add_Mito_Ribo_Seurat(seurat_object = pbmc, species = "other", mito_features = mito_gene_list, ribo_features = ribo_gene_list) +pbmc <- Add_Mito_Ribo(object = pbmc, species = "other", mito_features = mito_gene_list, ribo_features = ribo_gene_list) # Using combination of gene lists and gene name patterns -pbmc <- Add_Mito_Ribo_Seurat(seurat_object = pbmc, species = "Human", mito_features = mito_gene_list, ribo_pattern = "regexp_pattern") +pbmc <- Add_Mito_Ribo(object = pbmc, species = "Human", mito_features = mito_gene_list, ribo_pattern = "regexp_pattern") ``` ### Warning Messages -The added benefit of `Add_Mito_Ribo_Seurat` & `Add_Mito_Ribo_LIGER` is that they will return informative warnings if no Mitochondrial or Ribosomal features are found using the current species, features, or pattern specification. +The added benefit of `Add_Mito_Ribo` is that it will return informative warnings if no Mitochondrial or Ribosomal features are found using the current species, features, or pattern specification. ```{r message=TRUE, warning=TRUE, error=TRUE} # For demonstration purposes we can set `species = mouse` for this object of human cells -pbmc <- Add_Mito_Ribo_Seurat(seurat_object = pbmc, species = "mouse") +pbmc <- Add_Mito_Ribo(object = pbmc, species = "mouse") ``` ```{r include=FALSE} @@ -121,26 +121,26 @@ pbmc <- UpdateSeuratObject(object = pbmc) ```{r message=TRUE, warning=TRUE, error=TRUE} # Or if providing custom patterns/lists and features not found -pbmc <- Add_Mito_Ribo_Seurat(seurat_object = pbmc, species = "other", mito_pattern = "^MT-", ribo_pattern = "BAD_PATTERN") +pbmc <- Add_Mito_Ribo(object = pbmc, species = "other", mito_pattern = "^MT-", ribo_pattern = "BAD_PATTERN") ``` -`Add_Mito_Ribo_Seurat` and `Add_Mito_Ribo_LIGER()` will also return warnings if columns are already present in `@meta.data` slot and prompt you to provide override if you want to run the function. +`Add_Mito_Ribo` will also return warnings if columns are already present in `@meta.data` slot and prompt you to provide override if you want to run the function. ```{r include=FALSE} pbmc <- pbmc3k.SeuratData::pbmc3k pbmc <- UpdateSeuratObject(object = pbmc) -pbmc <- Add_Mito_Ribo_Seurat(seurat_object = pbmc, species = "human") +pbmc <- Add_Mito_Ribo(object = pbmc, species = "human") ``` ```{r message=TRUE, warning=TRUE, error=TRUE} -pbmc <- Add_Mito_Ribo_Seurat(seurat_object = pbmc, species = "human") +pbmc <- Add_Mito_Ribo(object = pbmc, species = "human") ``` -### LIGER Objects: `Add_Mito_Ribo_LIGER()` -scCustomize also contains identical function for use with LIGER objects. `Add_Mito_Ribo_LIGER` contains equivalent parameters and capabilities as `Add_Mito_Ribo_Seurat` +### LIGER Objects: `Add_Mito_Ribo()` +scCustomize `Add_Mito_Ribo` also works seemlessly with LIGER objects. ```{r eval=FALSE} -liger_obj <- Add_Mito_Ribo_Seurat(seurat_object = liger_obj, species = "human") +liger_obj <- Add_Mito_Ribo(object = liger_obj, species = "human") ``` @@ -151,9 +151,9 @@ In addition to metrics like number of features and UMIs it can often be helpful scCustomize contains easy shortcut function to add a measure of cell complexity/novelty that can sometimes be useful to filter low quality cells. The metric is calculated by calculating the result of log10(nFeature) / log10(nCount). ```{r eval = FALSE} # These defaults can be run just by providing accepted species name -pbmc <- Add_Cell_Complexity_Seurat(seurat_object = pbmc) +pbmc <- Add_Cell_Complexity(object = pbmc) ``` -*NOTE: There is analogous function for LIGER objects (see: `Add_Cell_Complexity_LIGER()`).* +*NOTE: The function also works seemlessly with LIGER objects.* ### Add Top Percent Expression QC Metric @@ -270,6 +270,24 @@ obj <- Add_Sample_Meta(seurat_object = obj, meta_data = sample_meta, join_by_seu ``` +### Add feature meta data +Starting in Seurat V5 each assay now possess it's own meta.data slot which is feature-level meta data. During course of normal analysis this is where information on variable features is stored. However, we can also use it to store alternate feature names, in most cases this is Ensembl IDs matching the symbols used in object creation/analysis. + +scCustomize provides the function `Add_Alt_Feature_ID()` to automatically match and add these features using the same files used in object creation. Users only need to supply either path to the features.tsv.gz file or the hdf5 file produced from Cell Ranger output. + +```{r eval=FALSE} +# Using features.tsv.gz file +obj <- Add_Alt_Feature_ID(seurat_object = obj, +features_tsv = "sample01/outs/filtered_feature_bc_matrix/features.tsv.gz", assay = "RNA") + +# Using hdf5 file +obj <- Add_Alt_Feature_ID(seurat_object = obj, +hdf5_file = "sample01/outs/outs/filtered_feature_bc_matrix.h5"", assay = "RNA") +``` + +*NOTE:* If using features.tsv.gz file the file from either filtered or raw outputs can be used as they are identical. + +*NOTE:* If using hdf5 file the file from either filtered_feature_bc or raw_feature_bc can be used as the features slot is identical. Though it is faster to load filtered_feature_bc file due to droplet filtering. ## Check for Features/Genes @@ -316,7 +334,7 @@ check_symbols <- UpdateSymbolList(symbols = genes_present[[2]], verbose = TRUE) ``` -## Merging Data/Objects +## Merging & Splitting Data/Objects ### Merging raw data It can often be advantageous to merge raw data before creating analysis objects vs creating lots of objects and merging them all later. scCustomize features a modified version of the internal LIGER function `MergeSparseDataAll()`. @@ -351,6 +369,16 @@ merged_seurat <- Merge_Seurat_List(list_seurat = list_of_objects, add.cell.ids = ``` +### Splitting Seurat V5 Layers +Seurat V5 objects now have the ability to split within the object into layers. However, I find that the syntax to do this is not the most intuitive and can be simplified with a new simple wrapper function: `Split_Layers()` + +```{r message=TRUE, warning=TRUE} +pbmc <- Split_Layers(seurat_object = pbmc, split.by = "sample_id") +``` + +`Split_Layers()` defaults to "RNA" assay but can be used for any assay present in object (users should check whether splitting assay other than "RNA" is valid before proceeding). + + ## Storing Misc Information in Seurat Objects Seurat objects contain an extra empty slot that can be used to store any extra information desired. scCustomize contains two functions `Store_Misc_Info_Seurat` and a wrapper around that function `Store_Palette_Seurat` to make this process easy. diff --git a/vignettes/articles/LIGER_Functions.Rmd b/vignettes/articles/LIGER_Functions.Rmd index 0843601d2d..d479402943 100644 --- a/vignettes/articles/LIGER_Functions.Rmd +++ b/vignettes/articles/LIGER_Functions.Rmd @@ -258,25 +258,26 @@ head(top20_factor6, 5) ## Convert LIGER to Seurat -The liger/rliger package already contains a function `seuratToLiger()` to convert LIGER objects to Seurat Objects. However, during this transfer a few things have issues crop up: +The liger/rliger package already contains a function `ligerToSeurat()` to convert LIGER objects to Seurat Objects. However, during this transfer a few things have issues crop up: * All meta data except the "dataset" column from liger_object\@cell.data is lost. * Seurat dimensionality reduction is set to tSNE regardless of method used in LIGER analysis * Seurat assay name not specified -scCustomize contains modified version of this function named `Liger_to_Seurat()` that solves these issues with 3 extra parameters: +scCustomize contains modified version of this function which extends the Seurat function [`as.Seurat()`](https://github.com/samuel-marsh/scCustomize/blob/develop/docs/reference/as.Seurat.html) that solves these issues with some extra parameters: * `keep_meta` logical. Whether to keep meta data from the \@cell.data slot in LIGER object. Default is TRUE. * `reduction_label` Name of dimensionality reduction technique used (e.g., tSNE, UMAP, etc). Ensures dim names are set correctly in Seurat object. * `seurat_assay` Name of assay to use for data in Seurat object. Default is "RNA". +* `assay_type` Specify whether to create V3/4 vs V5 Seurat assays. ```{r eval=FALSE} -seurat_obj <- Liger_to_Seurat(liger_object = liger_object, reduction_label = "UMAP") +seurat_obj <- as.Seurat(x = liger_object, reduction_label = "UMAP") ``` ## Add Mitochondrial & Ribosomal Percentages -For more information on adding mitochondrial and ribosomal count percentages to LIGER objects see [General Utilities and Helpers Vignette](https://samuel-marsh.github.io/scCustomize/articles/Helpers_and_Utilities.html) for information on `Add_Mito_Ribo_LIGER` which is analogous to `Add_Mito_Ribo_Seurat`. +For more information on adding mitochondrial and ribosomal count percentages to LIGER objects see [General Utilities and Helpers Vignette](https://samuel-marsh.github.io/scCustomize/articles/Helpers_and_Utilities.html) for information on `Add_Mito_Ribo`. ## Variable Gene Selection diff --git a/vignettes/articles/Object_Conversion.Rmd b/vignettes/articles/Object_Conversion.Rmd new file mode 100644 index 0000000000..bef3da87fd --- /dev/null +++ b/vignettes/articles/Object_Conversion.Rmd @@ -0,0 +1,166 @@ +--- +title: "Object Format Conversion" +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' +output: rmarkdown::html_vignette +theme: united +df_print: kable +vignette: > + %\VignetteIndexEntry{Object Format Conversion} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- +*** + + + +```{r setup, include=FALSE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + time_it = TRUE +) +``` + +# Converting between scRNA-seq package formats +While many packages are have some object converters they are not always as feature complete as desired. scCustomize provides a few straightforward converter functions. + +***FYI: Object converters can be fragile and/or not very flexible. I have tried to make these functions to avoid those issues. However, if that proves too hard a task to maintain long-term I may move them to separate package/deprecate them.*** + + +```{r init} +# Load Packages +library(Seurat) +library(rliger) +library(scCustomize) +library(qs) +``` + +Load Seurat Object & Add QC Data +```{r} +# read object +pbmc <- pbmc3k.SeuratData::pbmc3k.final +pbmc <- UpdateSeuratObject(pbmc) + +pbmc <- Add_Cell_QC_Metrics(seurat_object = pbmc, species = "human") +``` + +We'll also add some random meta data variables to pbmc data form use in this vignette +```{r} +pbmc$sample_id <- sample(c("sample1", "sample2", "sample3", "sample4", "sample5", "sample6"), size = ncol(pbmc), replace = TRUE) +``` + + +## Convert Seurat Objects to LIGER Objects +scCustomize contains the conversion function `as.LIGER()`. This function has a few advantages over the conversion function `rliger::seuratToLiger()`. + +- `as.liger` will *keep all meta data* and transfer it to LIGER by default. +- If a dimensionality reduction is transferred `as.liger` will also store a reduction key for use with scCustomize LIGER plotting functions to correctly set plot axes. +- If converting a list of Seurat objects and keeping Seurat variable genes then `as.liger` can either take union or intersection of genes between objects. +- `as.liger` has additional internal checks to ensure compatibility with both Seurat V3/4 & V5 object structures. + +```{r message=TRUE} +pbmc_liger <- as.LIGER(x = pbmc, group.by = "sample_id") + +pbmc_liger +``` + +Confirm that information from meta.data slot was transferred to LIGER object. +```{r} +head(colnames(x = pbmc_liger@cell.data), 10) +``` + + + +## Convert LIGER objects to Seurat Objects +The liger/rliger package already contains a function `rliger::seuratToLiger()` to convert LIGER objects to Seurat Objects. However, during this transfer a few things have issues crop up: + +* All meta data except the "dataset" column from liger_object\@cell.data is lost. +* Seurat dimensionality reduction is set to tSNE regardless of method used in LIGER analysis +* Seurat assay name not specified + +As of scCustomize v2.1.0 converting to Seurat objects from Liger can be accomplished using `as.liger` function which functions identically to previous function `Liger_to_Seurat()`. `Liger_to_Seurat()` will continue to work until v2.2.0 at which point it will be completely deprecated. + +scCustomize contains modified version of this function which extends the Seurat function [`as.Seurat()`](https://github.com/samuel-marsh/scCustomize/blob/develop/docs/reference/as.Seurat.html) that solves these issues with some extra parameters: + +* `keep_meta` logical. Whether to keep meta data from the \@cell.data slot in LIGER object. Default is TRUE. +* `reduction_label` Name of dimensionality reduction technique used (e.g., tSNE, UMAP, etc). Ensures dim names are set correctly in Seurat object. +* `seurat_assay` Name of assay to use for data in Seurat object. Default is "RNA". +* `assay_type` Specify whether to create V3/4 vs V5 Seurat assays. + +```{r message=TRUE} +new_seurat <- as.Seurat(x = pbmc_liger, reduction_label = "UMAP") +``` + + +## Convert Seurat or LIGER objects to Anndata objects +scCustomize also allows for the conversion of Seurat or LIGER objects to python [anndata](https://anndata.readthedocs.io/en/latest/) objects for analysis in [scanpy](https://scanpy.readthedocs.io/en/stable/) or other compatible python packages via the function `as.anndata`. These functions were inspired/modified/updated from [sceasy R package](https://github.com/cellgeni/sceasy) (see `as.anndata` documentation). + +* `as.anndata` works with Seurat V3/4, Seurat V5, and LIGER objects. +* `as.anndata` requires users have [reticulate R package](https://CRAN.R-project.org/package=reticulate) and linked python installation with anndata installed. + - See [reticulate website](https://rstudio.github.io/reticulate/) for more information on installation and setup of reticulate in R. + - See [anndata website](https://anndata.readthedocs.io/en/latest/) for more information on installation and use of anndata in python. + +```{r include=FALSE} +Sys.setenv(RETICULATE_PYTHON = "/Users/marsh_mbp/.virtualenvs/r-reticulate/bin/python") +``` + +```{r message=TRUE} +as.anndata(x = pbmc, file_path = "~/Desktop", file_name = "pbmc_anndata.h5ad") +``` + +## Convert Seurat assay type within an object +The release of Seurat V5+ has brought about two different types of assay structure that can exist within a Seurat object. However, some community tools that interact with Seurat objects have not been updated to work with both assay formats. Therefore it becomes necessary to change assay format for use with certain tools. + +scCustomize provides `Convert_Assay()` for easy method to convert from Assay>Assay5 (V3/4>5) or Assay5>Assay (V5>V3/4). + +### Convert V3/4 to V5 + +```{r} +# Convert to V5/Assay5 structure +pbmc_V5 <- Convert_Assay(seurat_object = pbmc, convert_to = "V5") + +pbmc_V5[["RNA"]] +``` + + +### Convert V5 > V3/4 + +```{r} +# Convert to V3/4/Assay structure +pbmc_V3 <- Convert_Assay(seurat_object = pbmc_V5, convert_to = "V3") + +pbmc_V3[["RNA"]] +``` + +### Accepted values for `convert_to` +`Convert_Assay` will accept a range of accepted values for the `convert_to` parameter to specify desired format. + + +```{r echo=FALSE} +library(magrittr) +accepted_format_names <- data.frame(V3_Assay_Options = c("Assay", "assay", "V3", "v3"), + V5_Assay5_Options = c("Assay5", "assay5", "V5", "v5")) + +accepted_format_names %>% + kableExtra::kbl() %>% + kableExtra::kable_styling(bootstrap_options = c("bordered", "condensed", "responsive", "striped")) +``` + diff --git a/vignettes/articles/QC_Plots.Rmd b/vignettes/articles/QC_Plots.Rmd index f5916fc863..38d8dec496 100644 --- a/vignettes/articles/QC_Plots.Rmd +++ b/vignettes/articles/QC_Plots.Rmd @@ -69,7 +69,7 @@ hca_bm <- UpdateSeuratObject(hca_bm) ```{r include=FALSE} -accepted_names <- Add_Mito_Ribo_Seurat(list_species_names = TRUE) +accepted_names <- Add_Mito_Ribo(object = hca_bm, list_species_names = TRUE) ``` ## Adding QC Metrics @@ -79,9 +79,9 @@ This is the starting point of nearly every single cell analysis and scCustomize scCustomize contains easy wrapper function to automatically add both Mitochondrial and Ribosomal count percentages to meta.data slot. If you are using mouse, human, rat, zebrafish, drosophila, marmoset, or macaque data all you need to do is specify the `species` parameter. ```{r eval=FALSE} # These defaults can be run just by providing accepted species name -hca_bm <- Add_Mito_Ribo_Seurat(seurat_object = hca_bm, species = "Human") +hca_bm <- Add_Mito_Ribo(object = hca_bm, species = "Human") ``` -*NOTE: There is analogous function for LIGER objects (see: `Add_Mito_Ribo_LIGER`) but QC plotting functions currently only support Seurat objects.* +*NOTE: This function works seemlessly for both Seurat and LIGER objects.* To view list of accepted values for default species names simply set `list_species_names = TRUE`. ```{r echo=FALSE} @@ -94,22 +94,22 @@ However custom prefixes can be used for non-human/mouse/marmoset species with di *NOTE: If desired please submit issue on GitHub for additional default species. Please include regex pattern or list of genes for both mitochondrial and ribosomal genes and I will add additional built-in defaults to the function.* ```{r eval=FALSE} # Using gene name patterns -hca_bm <- Add_Mito_Ribo_Seurat(seurat_object = hca_bm, species = "other", mito_pattern = "regexp_pattern", ribo_pattern = "regexp_pattern") +hca_bm <- Add_Mito_Ribo(object = hca_bm, species = "other", mito_pattern = "regexp_pattern", ribo_pattern = "regexp_pattern") # Using feature name lists mito_gene_list <- c("gene1", "gene2", "etc") ribo_gene_list <- c("gene1", "gene2", "etc") -hca_bm <- Add_Mito_Ribo_Seurat(seurat_object = hca_bm, species = "other", mito_features = mito_gene_list, ribo_features = ribo_gene_list) +hca_bm <- Add_Mito_Ribo(object = hca_bm, species = "other", mito_features = mito_gene_list, ribo_features = ribo_gene_list) # Using combination of gene lists and gene name patterns -hca_bm <- Add_Mito_Ribo_Seurat(seurat_object = hca_bm, species = "Human", mito_features = mito_gene_list, ribo_pattern = "regexp_pattern") +hca_bm <- Add_Mito_Ribo(object = hca_bm, species = "Human", mito_features = mito_gene_list, ribo_pattern = "regexp_pattern") ``` #### Use of Ensembl IDs scCustomize contains built in list of ensembl IDs that correspond to mitochondrial and ribosomal genes for all default species. If your object using ensembl IDs as features names then simply add `ensembl_ids` parameter. ```{r eval=FALSE} # Using gene name patterns -hca_bm <- Add_Mito_Ribo_Seurat(seurat_object = hca_bm, species = "Human", ensembl_ids = TRUE) +hca_bm <- Add_Mito_Ribo(object = hca_bm, species = "Human", ensembl_ids = TRUE) ``` @@ -120,9 +120,9 @@ In addition to metrics like number of features and UMIs it can often be helpful scCustomize contains easy shortcut function to add a measure of cell complexity/novelty that can sometimes be useful to filter low quality cells. The metric is calculated by calculating the result of log10(nFeature) / log10(nCount). ```{r eval = FALSE} # These defaults can be run just by providing accepted species name -hca_bm <- Add_Cell_Complexity_Seurat(seurat_object = hca_bm) +hca_bm <- Add_Cell_Complexity(object = hca_bm) ``` -*NOTE: There is analogous function for LIGER objects (see: `Add_Cell_Complexity_LIGER`) but QC plotting functions currently only support Seurat objects.* +*NOTE: This function works seemlessly for both Seurat and LIGER objects.* ### Add Top Percent Expression QC Metric diff --git a/vignettes/articles/Statistics.Rmd b/vignettes/articles/Statistics.Rmd index 0cfaaffb4a..6d59f4301e 100644 --- a/vignettes/articles/Statistics.Rmd +++ b/vignettes/articles/Statistics.Rmd @@ -65,7 +65,7 @@ pbmc <- UpdateSeuratObject(pbmc) Now let's add some extra meta data for use with tutorial ```{r} # Add mito and ribo data -pbmc <- Add_Mito_Ribo_Seurat(seurat_object = pbmc, species = "human") +pbmc <- Add_Mito_Ribo(object = pbmc, species = "human") # Add random sample and group variables pbmc$orig.ident <- sample(c("sample1", "sample2", "sample3", "sample4"), size = ncol(pbmc), replace = TRUE) diff --git a/vignettes/articles/Update_Gene_Symbols.Rmd b/vignettes/articles/Update_Gene_Symbols.Rmd new file mode 100644 index 0000000000..ab7eed25de --- /dev/null +++ b/vignettes/articles/Update_Gene_Symbols.Rmd @@ -0,0 +1,154 @@ +--- +title: "Updating Gene Symbols" +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' +output: rmarkdown::html_vignette +theme: united +df_print: kable +vignette: > + %\VignetteIndexEntry{Updating Gene Symbols} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- +*** + + + +```{r setup, include=FALSE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + time_it = TRUE +) +``` + +# Upating Human Gene Symbols +The official gene symbols used in a dataset can change depending on the reference version used in aligning that particular dataset. For human genes the official symbols are set by [HGNC](https://www.genenames.org/). + +In the absence of more static identifier (Ensembl ID or Entrez ID Numbers) the only way to update gene symbols is to examine the current and past symbols for all genes in the HGNC database. However, many of the functions that perform this task come with caveats that vary from lack of ease of updating to newest HGNC data or at worst potentially improperly renaming symbols. + + +```{r init} +# Load Packages +library(Seurat) +library(scCustomize) +library(qs) +``` + +Load Seurat Object & Add QC Data +```{r} +# read object +pbmc <- pbmc3k.SeuratData::pbmc3k.final +pbmc <- UpdateSeuratObject(pbmc) +``` + +## Issues with other functions +In order to understand how scCustomize's `Update_HGNC_Symbols()` improves process it is important to be aware of the caveats of some other tools. + +### Seurat's `UpdateSymbolList()` +The first is the Seurat's `UpdateSymbolList()` which takes an input vector of symbols and uses active connection to HGNC to query for updated symbols. However, there are two caveats with this function 1) it requires user to have internet connection anytime using the function, 2) it can potentially rename symbols incorrectly. + +To illustrate the second issue I will use 3 gene symbols that have been current for some time: MCM2, MCM7, CCNL1. However, let's take a look at some of the previous symbols for each of these genes: +- Previous Symbols for MCM2 are: CCNL1 & CDCL1 +- Previous symbols for MCM7 are: MCM2 +- Previous symbols for CCNL1 are: None + +Now see what happens when we use `UpdateSymbolList`. + +```{r} +test_symbols <- c("MCM2", "MCM7", "CCNL1") + +UpdateSymbolList(symbols = test_symbols) +``` + +As you can see the functions does the following: +- Renames MCM2 > MCM7 because MCM2 is a previous symbol. +- Leaves MCM7 the same because no other gene has MCM7 as previous symbol. +- Renames CCNL1 > MCM2 because CCNL1 is previous symbol. + +The reason that this happens is because `UpdateSymbolList` queries each symbol in isolation and not in the context of all of the genes being queried. + + +### HGNChelper Package +After developing this function I was made aware of the HGNChelper package which also aims to provide symbol updates. It solves renaming issue in similar fashion to scCustomize (see below). It also provides a solution for requirement of internet access. + +It does this by storing HGNC dataset as package data so that it comes bundled with the package. However, there is an issue with the way this is implemented. First, the bundled data is from 2019 so is approached 5 years old. Updated data can be downloaded interactively using a package function but this must be done in every R session where the data is needed requiring internet access to use current data. The authors do provide a solution to this but it involves cloning the github repo and running source scripts which may be beyond many R users. + + +## Solving the Issue with scCustomize's `Update_HGNC_Symbols` +scCustomize now provides the function `Update_HGNC_Symbols` to attempt to solve both of these caveats. + +### Requirement of internet access +`Update_HGNC_Symbols` does require internet access the first time the function is being used to download most recent data from HGNC. However, it then stores the downloaded data using [BiocFileCache package](https://bioconductor.org/packages/release/bioc/html/BiocFileCache.html), meaning subsequent uses don't require any internet access. This also significant improves the speed of the function. + +### Inappropriate renaming +Second, `Update_HGNC_Symbols` uses the full input list and first automatically approves any symbol that is already an approved gene symbol so that there is not a chance of improperly updating any symbols. It then checks the remaining symbols for any symbol updates. + +Let's run our test symbol set: +```{r message=TRUE} +results <- Updated_HGNC_Symbols(input_data = test_symbols) +``` + +```{r echo=FALSE} +library(magrittr) + +results %>% + kableExtra::kbl() %>% + kableExtra::kable_styling(bootstrap_options = c("bordered", "condensed", "responsive", "striped")) +``` + + +As mentioned before the function is also very quick. Returning updated symbols for 36,000 genes in ~1 second. +```{r message=TRUE} +# Read in full 10X reference genome feature list +features <- Read10X_h5("assets/Barcode_Rank_Example/sample1/outs/raw_feature_bc_matrix.h5") + +features <- rownames(features) + +# Load tictoc to give timing +library(tictoc) + +# Get updated symbols +tic() +results <- Updated_HGNC_Symbols(input_data = features) +toc() +``` + + +### Examining the Results +Now let's take a look at the output from `Updated_HGNC_Symbols`, which also has some detail advtanages vs other methods. + +For this example I have picked section of the results that contains all 3 potential results. +```{r eval=FALSE} +results[168:177,] +``` + +```{r echo=FALSE} +results[168:177,] %>% + kableExtra::kbl() %>% + kableExtra::kable_styling(bootstrap_options = c("bordered", "condensed", "responsive", "striped")) +``` + + +As you can see the majority of these symbols are already updated so the input symbol matches the output symbol. + +In the case of "AL031847.1" that annotation was not found in HGNC and therefore the symbol was left unchanged. + +Finally in the case of "LINC00337" there was an updated symbol of "ICMT-DT" so the output symbol was updated to that current symbol.