diff --git a/DESCRIPTION b/DESCRIPTION index c8c8594bf..a8426d509 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Giotto Title: Spatial Single-Cell Transcriptomics Toolbox -Version: 4.0.6 +Version: 4.0.9 Authors@R: c( person("Ruben", "Dries", email = "rubendries@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-7650-7754")), @@ -27,11 +27,11 @@ URL: https://drieslab.github.io/Giotto/, https://github.com/drieslab/Giotto BugReports: https://github.com/drieslab/Giotto/issues RoxygenNote: 7.3.1 Depends: - base (>= 3.5.0), - utils (>= 3.5.0), - R (>= 3.5.0), + base (>= 4.1.0), + utils (>= 4.1.0), + R (>= 4.1.0), methods, - GiottoClass (>= 0.3.0) + GiottoClass (>= 0.3.1) Imports: BiocParallel, BiocSingular, @@ -41,12 +41,12 @@ Imports: dbscan (>= 1.1-3), ggplot2 (>= 3.1.1), ggrepel, - GiottoUtils (>= 0.1.7), - GiottoVisuals (>= 0.2.0), + GiottoUtils (>= 0.1.9), + GiottoVisuals (>= 0.2.2), igraph (>= 1.2.4.1), jsonlite, limma, - Matrix (>= 1.6.2), + Matrix (>= 1.6-2), MatrixGenerics, plotly, progressr, diff --git a/NAMESPACE b/NAMESPACE index 7b3eb6556..818eb8c4d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,7 @@ # Generated by roxygen2: do not edit by hand +S3method(print,combIcfObject) +S3method(print,icfObject) export("%>%") export("activeFeatType<-") export("activeSpatUnit<-") @@ -145,13 +147,12 @@ export(create_jackstrawplot) export(create_screeplot) export(crop) export(cropGiottoLargeImage) -export(crossSectionGenePlot) -export(crossSectionGenePlot3D) +export(crossSectionFeatPlot) +export(crossSectionFeatPlot3D) export(crossSectionPlot) export(crossSectionPlot3D) export(detectSpatialCorFeats) export(detectSpatialCorFeatsMatrix) -export(detectSpatialCorGenes) export(detectSpatialPatterns) export(dimCellPlot) export(dimCellPlot2D) @@ -258,7 +259,7 @@ export(heatmSpatialCorGenes) export(hexVertices) export(hyperGeometricEnrich) export(initHMRF_V2) -export(insertCrossSectionGenePlot3D) +export(insertCrossSectionFeatPlot3D) export(insertCrossSectionSpatPlot3D) export(installGiottoEnvironment) export(instructions) diff --git a/NEWS.md b/NEWS.md index fcc84a1f9..5fe9c1560 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,40 @@ +# Giotto 4.0.10 TBD + +## Bug fixes +* Fix error in `plotInteractivePolygons()` when providing a spatial plot with a continuous scale [#964](https://github.com/drieslab/Giotto/issues/964) by jweis3 +* Fix error in DWLS `find_dampening_constant()` when `S[subset, ]` produces only 1 gene. + +# Giotto 4.0.9 + +## Breaking changes +* Deprecated `detectSpatialCorGenes()` removed. Use `detectSpatialCorFeats()` instead +* Deprecated `findInteractionChangedGenes()` removed. Use `findInteractionChangedFeats()` instead +* Deprecated `findCellProximityGenes()` removed. Use `findInteractionChangedFeats()` instead + +## Bug fixes +* Add catch in `runPCAprojectionBatch()` for when ncp requested exceeds number of feats used +* Make `spatCellCellcom()` respect `verbose` flag [#949](https://github.com/drieslab/Giotto/issues/949) by rbutleriii + +## Enhancements +* `print()` methods for `icfObject` and `combIcfObject` + +## Changes +* require GiottoUtils (>= 0.1.9) + +# Giotto 4.0.8 (2024/05/22) + +## Breaking changes +* `crossSectionGenePlot()` removed. Use `crossSectionFeatPlot()` instead +* `crossSectionGenePlot3D()` removed. Use `crossSectionFeatPlot3D()` instead +* `insertCrossSectionGenePlot3D()` removed Use `insertCrossSectionFeatPlot3D()` instead + +## Bug fixes +* `binSpect()` param passing error introduced in _v4.0.6_ +* updated `viewHMRFresults3D()` and `viewHMRFresults2D()` +* updated `createCrossSections()`, `insertCrossSectionSpatPlot3D()`, `crossSectionPlot()`, `crossSectionFeatPlot3D()`, `insertCrossSectionFeatPlot3D()`, `crossSectionPlot3D()`, `crossSectionFeatPlot()` + +## Changes +* GiottoVisuals (>= 0.2.2), GiottoClass (>= 0.3.1), GiottoUtils (>= 0.1.8) are now required. # Giotto 4.0.6 (2024/05/13) diff --git a/R/auxiliary_giotto.R b/R/auxiliary_giotto.R index 3ce6d6296..11dafbe7a 100644 --- a/R/auxiliary_giotto.R +++ b/R/auxiliary_giotto.R @@ -113,24 +113,25 @@ #' #' filterDistributions(g) #' @export -filterDistributions <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("raw", "normalized", "scaled", "custom"), - method = c("threshold", "sum", "mean"), - expression_threshold = 1, - detection = c("feats", "cells"), - plot_type = c("histogram", "violin"), - scale_y = NULL, - nr_bins = 30, - fill_color = "lightblue", - scale_axis = "identity", - axis_offset = 0, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "filterDistributions") { +filterDistributions <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("raw", "normalized", "scaled", "custom"), + method = c("threshold", "sum", "mean"), + expression_threshold = 1, + detection = c("feats", "cells"), + plot_type = c("histogram", "violin"), + scale_y = NULL, + nr_bins = 30, + fill_color = "lightblue", + scale_axis = "identity", + axis_offset = 0, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "filterDistributions") { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -145,7 +146,8 @@ filterDistributions <- function(gobject, # expression values to be used values <- match.arg( expression_values, - unique(c("raw", "normalized", "scaled", "custom", expression_values))) + unique(c("raw", "normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -170,15 +172,18 @@ filterDistributions <- function(gobject, if (detection == "feats") { if (method == "threshold") { feat_detection_levels <- data.table::as.data.table( - rowSums_flex(expr_values >= expression_threshold)) + rowSums_flex(expr_values >= expression_threshold) + ) mytitle <- "feat detected in # of cells" } else if (method == "sum") { feat_detection_levels <- data.table::as.data.table( - rowSums_flex(expr_values)) + rowSums_flex(expr_values) + ) mytitle <- "total sum of feature detected in all cells" } else if (method == "mean") { feat_detection_levels <- data.table::as.data.table( - rowMeans_flex(expr_values)) + rowMeans_flex(expr_values) + ) mytitle <- "average of feature detected in all cells" } @@ -216,15 +221,18 @@ filterDistributions <- function(gobject, } else if (detection == "cells") { if (method == "threshold") { cell_detection_levels <- data.table::as.data.table( - colSums_flex(expr_values >= expression_threshold)) + colSums_flex(expr_values >= expression_threshold) + ) mytitle <- "feats detected per cell" } else if (method == "sum") { cell_detection_levels <- data.table::as.data.table( - colSums_flex(expr_values)) + colSums_flex(expr_values) + ) mytitle <- "total features per cell" } else if (method == "mean") { cell_detection_levels <- data.table::as.data.table( - colMeans_flex(expr_values)) + colMeans_flex(expr_values) + ) mytitle <- "average number of features per cell" } @@ -302,22 +310,23 @@ filterDistributions <- function(gobject, #' #' filterCombinations(g) #' @export -filterCombinations <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("raw", "normalized", "scaled", "custom"), - expression_thresholds = c(1, 2), - feat_det_in_min_cells = c(5, 50), - min_det_feats_per_cell = c(200, 400), - scale_x_axis = "identity", - x_axis_offset = 0, - scale_y_axis = "identity", - y_axis_offset = 0, - show_plot = TRUE, - return_plot = FALSE, - save_plot = NULL, - save_param = list(), - default_save_name = "filterCombinations") { +filterCombinations <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("raw", "normalized", "scaled", "custom"), + expression_thresholds = c(1, 2), + feat_det_in_min_cells = c(5, 50), + min_det_feats_per_cell = c(200, 400), + scale_x_axis = "identity", + x_axis_offset = 0, + scale_y_axis = "identity", + y_axis_offset = 0, + show_plot = TRUE, + return_plot = FALSE, + save_plot = NULL, + save_param = list(), + default_save_name = "filterCombinations") { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -333,7 +342,8 @@ filterCombinations <- function(gobject, # expression values to be used values <- match.arg( expression_values, - unique(c("raw", "normalized", "scaled", "custom", expression_values))) + unique(c("raw", "normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -361,16 +371,20 @@ filterCombinations <- function(gobject, # first remove feats filter_index_feats <- rowSums_flex( - expr_values >= threshold) >= min_cells_for_feat + expr_values >= threshold + ) >= min_cells_for_feat removed_feats <- length(filter_index_feats[ - filter_index_feats == FALSE]) + filter_index_feats == FALSE + ]) det_cells_res[[combn_i]] <- removed_feats # then remove cells filter_index_cells <- colSums_flex(expr_values[ - filter_index_feats, ] >= threshold) >= min_feats_per_cell + filter_index_feats, + ] >= threshold) >= min_feats_per_cell removed_cells <- length(filter_index_cells[ - filter_index_cells == FALSE]) + filter_index_cells == FALSE + ]) det_feats_res[[combn_i]] <- removed_cells } @@ -393,7 +407,8 @@ filterCombinations <- function(gobject, result_DT[["min_detected_feats_per_cell"]] <- min_det_feats_per_cell result_DT[["combination"]] <- paste0( result_DT$feat_detected_in_min_cells, "-", - result_DT$min_detected_feats_per_cell) + result_DT$min_detected_feats_per_cell + ) result_DT <- result_DT[, .( threshold, @@ -420,18 +435,22 @@ filterCombinations <- function(gobject, color = as.factor(threshold) )) pl <- pl + scale_color_discrete( - guide = guide_legend(title = "threshold(s)")) + guide = guide_legend(title = "threshold(s)") + ) pl <- pl + ggrepel::geom_text_repel(data = result_DT, aes( x = removed_cells + x_axis_offset, y = removed_feats + y_axis_offset, label = combination )) pl <- pl + ggplot2::scale_x_continuous( - trans = scale_x_axis, limits = c(0, maximum_x_value)) + trans = scale_x_axis, limits = c(0, maximum_x_value) + ) pl <- pl + ggplot2::scale_y_continuous( - trans = scale_y_axis, limits = c(0, maximum_y_value)) + trans = scale_y_axis, limits = c(0, maximum_y_value) + ) pl <- pl + ggplot2::labs( - x = "number of removed cells", y = "number of removed feats") + x = "number of removed cells", y = "number of removed feats" + ) return(plot_output_handler( @@ -491,23 +510,24 @@ filterCombinations <- function(gobject, #' #' filterGiotto(g) #' @export -filterGiotto <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("raw", "normalized", "scaled", "custom"), - expression_threshold = 1, - feat_det_in_min_cells = 100, - min_det_feats_per_cell = 100, - spat_unit_fsub = ":all:", - feat_type_ssub = ":all:", - all_spat_units = NULL, - all_feat_types = NULL, - poly_info = NULL, - tag_cells = FALSE, - tag_cell_name = "tag", - tag_feats = FALSE, - tag_feats_name = "tag", - verbose = TRUE) { +filterGiotto <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("raw", "normalized", "scaled", "custom"), + expression_threshold = 1, + feat_det_in_min_cells = 100, + min_det_feats_per_cell = 100, + spat_unit_fsub = ":all:", + feat_type_ssub = ":all:", + all_spat_units = NULL, + all_feat_types = NULL, + poly_info = NULL, + tag_cells = FALSE, + tag_cell_name = "tag", + tag_feats = FALSE, + tag_feats_name = "tag", + verbose = TRUE) { # data.table vars cell_ID <- feat_ID <- NULL @@ -574,7 +594,8 @@ filterGiotto <- function(gobject, # expression values to be used values <- match.arg( expression_values, - unique(c("raw", "normalized", "scaled", "custom", expression_values))) + unique(c("raw", "normalized", "scaled", "custom", expression_values)) + ) # get expression values to perform filtering on # Only the first spat_unit and feat_type provided are filtered. @@ -596,14 +617,16 @@ filterGiotto <- function(gobject, ## filter features filter_index_feats <- rowSums_flex( - expr_values >= expression_threshold) >= feat_det_in_min_cells + expr_values >= expression_threshold + ) >= feat_det_in_min_cells selected_feat_ids <- names(filter_index_feats[filter_index_feats == TRUE]) ## filter cells filter_index_cells <- colSums_flex(expr_values[ - filter_index_feats, ] >= expression_threshold) >= min_det_feats_per_cell + filter_index_feats, + ] >= expression_threshold) >= min_det_feats_per_cell selected_cell_ids <- names(filter_index_cells[filter_index_cells == TRUE]) @@ -612,7 +635,8 @@ filterGiotto <- function(gobject, if (isTRUE(tag_cells)) { cell_meta <- getCellMetadata(gobject = gobject, copy_obj = TRUE) cell_meta[][, c(tag_cell_name) := ifelse( - cell_ID %in% selected_cell_ids, 0, 1)] + cell_ID %in% selected_cell_ids, 0, 1 + )] gobject <- setCellMetadata( gobject = gobject, x = cell_meta, initialize = FALSE ) @@ -624,7 +648,8 @@ filterGiotto <- function(gobject, if (isTRUE(tag_feats)) { feat_meta <- getFeatureMetadata(gobject = gobject, copy_obj = TRUE) feat_meta[][, c(tag_feats_name) := ifelse( - feat_ID %in% selected_feat_ids, 0, 1)] + feat_ID %in% selected_feat_ids, 0, 1 + )] gobject <- setFeatureMetadata( gobject = gobject, x = feat_meta, initialize = FALSE ) @@ -660,19 +685,27 @@ filterGiotto <- function(gobject, cat("Feature type: ", feat_type, "\n") if (isTRUE(tag_cells)) { - cat("Number of cells tagged: ", removed_cells, " out of ", - total_cells, "\n") + cat( + "Number of cells tagged: ", removed_cells, " out of ", + total_cells, "\n" + ) } else { - cat("Number of cells removed: ", removed_cells, " out of ", - total_cells, "\n") + cat( + "Number of cells removed: ", removed_cells, " out of ", + total_cells, "\n" + ) } if (isTRUE(tag_feats)) { - cat("Number of feats tagged: ", removed_feats, " out of ", - total_feats, "\n") + cat( + "Number of feats tagged: ", removed_feats, " out of ", + total_feats, "\n" + ) } else { - cat("Number of feats removed: ", removed_feats, " out of ", - total_feats, "\n") + cat( + "Number of feats removed: ", removed_feats, " out of ", + total_feats, "\n" + ) } } @@ -695,7 +728,9 @@ filterGiotto <- function(gobject, # If this function call is not downstream of processGiotto, update normally newGiottoObject <- update_giotto_params( - newGiottoObject, description = "_filter") + newGiottoObject, + description = "_filter" + ) return(newGiottoObject) } @@ -711,19 +746,20 @@ filterGiotto <- function(gobject, #' @description standard function for RNA normalization #' @returns giotto object #' @keywords internal -.rna_standard_normalization <- function(gobject, - raw_expr, - feat_type, - spat_unit, - library_size_norm = TRUE, - scalefactor = 6e3, - log_norm = TRUE, - log_offset = 1, - logbase = 2, - scale_feats = TRUE, - scale_cells = TRUE, - scale_order = c("first_feats", "first_cells"), - verbose = TRUE) { +.rna_standard_normalization <- function( + gobject, + raw_expr, + feat_type, + spat_unit, + library_size_norm = TRUE, + scalefactor = 6e3, + log_norm = TRUE, + log_offset = 1, + logbase = 2, + scale_feats = TRUE, + scale_cells = TRUE, + scale_order = c("first_feats", "first_cells"), + verbose = TRUE) { # check feature type compatibility if (!feat_type %in% c("rna", "RNA")) { warning("Caution: Standard normalization was developed for RNA data \n") @@ -765,37 +801,42 @@ filterGiotto <- function(gobject, ## 3. scale if (scale_feats == TRUE & scale_cells == TRUE) { scale_order <- match.arg( - arg = scale_order, choices = c("first_feats", "first_cells")) + arg = scale_order, choices = c("first_feats", "first_cells") + ) if (scale_order == "first_feats") { - if (isTRUE(verbose)) + if (isTRUE(verbose)) { wrap_msg("\n first scale feats and then cells \n") + } norm_scaled_expr <- t_flex(standardise_flex( - x = t_flex(norm_expr), center = TRUE, scale = TRUE)) + x = t_flex(norm_expr), center = TRUE, scale = TRUE + )) norm_scaled_expr <- standardise_flex( - x = norm_scaled_expr, center = TRUE, scale = TRUE) - + x = norm_scaled_expr, center = TRUE, scale = TRUE + ) } else if (scale_order == "first_cells") { - if (isTRUE(verbose)) + if (isTRUE(verbose)) { wrap_msg("\n first scale cells and then feats \n") + } norm_scaled_expr <- standardise_flex( - x = norm_expr, center = TRUE, scale = TRUE) + x = norm_expr, center = TRUE, scale = TRUE + ) norm_scaled_expr <- t_flex(standardise_flex( - x = t_flex(norm_scaled_expr), center = TRUE, scale = TRUE)) - + x = t_flex(norm_scaled_expr), center = TRUE, scale = TRUE + )) } else { stop("\n scale order must be given \n") } } else if (scale_feats == TRUE) { norm_scaled_expr <- t_flex(standardise_flex( - x = t_flex(norm_expr), center = TRUE, scale = TRUE)) - + x = t_flex(norm_expr), center = TRUE, scale = TRUE + )) } else if (scale_cells == TRUE) { norm_scaled_expr <- standardise_flex( - x = norm_expr, center = TRUE, scale = TRUE) - + x = norm_expr, center = TRUE, scale = TRUE + ) } else { norm_scaled_expr <- NULL } @@ -853,12 +894,13 @@ filterGiotto <- function(gobject, #' @description function for RNA normalization according to osmFISH paper #' @returns giotto object #' @keywords internal -.rna_osmfish_normalization <- function(gobject, - raw_expr, - feat_type, - spat_unit, - name = "custom", - verbose = TRUE) { +.rna_osmfish_normalization <- function( + gobject, + raw_expr, + feat_type, + spat_unit, + name = "custom", + verbose = TRUE) { # check feature type compatibility if (!feat_type %in% c("rna", "RNA")) { warning("Caution: osmFISH normalization was developed for RNA in situ @@ -869,12 +911,15 @@ filterGiotto <- function(gobject, norm_feats <- (raw_expr[] / rowSums_flex(raw_expr[])) * nrow(raw_expr[]) # 2. normalize per cells with scale-factor equal to number of cells norm_feats_cells <- t_flex((t_flex(norm_feats) / - colSums_flex(norm_feats)) * ncol(raw_expr[])) + colSums_flex(norm_feats)) * ncol(raw_expr[])) # return results to Giotto object - if (verbose == TRUE) - message("\n osmFISH-like normalized data will be returned to the", - name, "Giotto slot \n") + if (verbose == TRUE) { + message( + "\n osmFISH-like normalized data will be returned to the", + name, "Giotto slot \n" + ) + } norm_feats_cells <- create_expr_obj( name = name, @@ -903,20 +948,22 @@ filterGiotto <- function(gobject, #' Adapted from https://gist.github.com/hypercompetent/51a3c428745e1c06d826d76c3671797c#file-pearson_residuals-r #' @returns giotto object #' @keywords internal -.rna_pears_resid_normalization <- function(gobject, - raw_expr, - feat_type, - spat_unit, - theta = 100, - name = "scaled", - verbose = TRUE) { +.rna_pears_resid_normalization <- function( + gobject, + raw_expr, + feat_type, + spat_unit, + theta = 100, + name = "scaled", + verbose = TRUE) { # print message with information # - if (verbose) - message("using 'Lause/Kobak' method to normalize count matrix If used in + if (verbose) { + message("using 'Lause/Kobak' method to normalize count matrix If used in published research, please cite: Jan Lause, Philipp Berens, Dmitry Kobak (2020). 'Analytic Pearson residuals for normalization of single-cell RNA-seq UMI data' ") + } # check feature type compatibility @@ -927,9 +974,13 @@ filterGiotto <- function(gobject, if (methods::is(raw_expr[], "HDF5Matrix")) { counts_sum0 <- methods::as(matrix( - MatrixGenerics::colSums2(raw_expr[]), nrow = 1), "HDF5Matrix") + MatrixGenerics::colSums2(raw_expr[]), + nrow = 1 + ), "HDF5Matrix") counts_sum1 <- methods::as(matrix( - MatrixGenerics::rowSums2(raw_expr[]), ncol = 1), "HDF5Matrix") + MatrixGenerics::rowSums2(raw_expr[]), + ncol = 1 + ), "HDF5Matrix") counts_sum <- sum(raw_expr[]) # get residuals @@ -942,9 +993,11 @@ filterGiotto <- function(gobject, z[z < -sqrt(n)] <- -sqrt(n) } else { counts_sum0 <- methods::as(matrix(Matrix::colSums( - raw_expr[]), nrow = 1), "dgCMatrix") + raw_expr[] + ), nrow = 1), "dgCMatrix") counts_sum1 <- methods::as(matrix(Matrix::rowSums( - raw_expr[]), ncol = 1), "dgCMatrix") + raw_expr[] + ), ncol = 1), "dgCMatrix") counts_sum <- sum(raw_expr[]) # get residuals @@ -958,9 +1011,12 @@ filterGiotto <- function(gobject, } # return results to Giotto object - if (verbose == TRUE) - message("\n Pearson residual normalized data will be returned to the ", - name, " Giotto slot \n") + if (verbose == TRUE) { + message( + "\n Pearson residual normalized data will be returned to the ", + name, " Giotto slot \n" + ) + } z <- create_expr_obj( name = name, @@ -1033,23 +1089,24 @@ filterGiotto <- function(gobject, #' #' normalizeGiotto(g) #' @export -normalizeGiotto <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = "raw", - norm_methods = c("standard", "pearson_resid", "osmFISH"), - library_size_norm = TRUE, - scalefactor = 6e3, - log_norm = TRUE, - log_offset = 1, - logbase = 2, - scale_feats = TRUE, - scale_genes = NULL, - scale_cells = TRUE, - scale_order = c("first_feats", "first_cells"), - theta = 100, - update_slot = "scaled", - verbose = TRUE) { +normalizeGiotto <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = "raw", + norm_methods = c("standard", "pearson_resid", "osmFISH"), + library_size_norm = TRUE, + scalefactor = 6e3, + log_norm = TRUE, + log_offset = 1, + logbase = 2, + scale_feats = TRUE, + scale_genes = NULL, + scale_cells = TRUE, + scale_order = c("first_feats", "first_cells"), + theta = 100, + update_slot = "scaled", + verbose = TRUE) { ## deprecated arguments if (!is.null(scale_genes)) { scale_feats <- scale_genes @@ -1078,7 +1135,8 @@ normalizeGiotto <- function(gobject, ) norm_methods <- match.arg( - arg = norm_methods, choices = c("standard", "pearson_resid", "osmFISH")) + arg = norm_methods, choices = c("standard", "pearson_resid", "osmFISH") + ) # normalization according to standard methods if (norm_methods == "standard") { @@ -1163,14 +1221,15 @@ normalizeGiotto <- function(gobject, #' #' adjustGiottoMatrix(g, covariate_columns = "leiden_clus") #' @export -adjustGiottoMatrix <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - batch_columns = NULL, - covariate_columns = NULL, - return_gobject = TRUE, - update_slot = c("custom")) { +adjustGiottoMatrix <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + batch_columns = NULL, + covariate_columns = NULL, + return_gobject = TRUE, + update_slot = c("custom")) { # Catch for both batch and covariate being null if (is.null(batch_columns) & is.null(covariate_columns)) { stop("Metadata for either different batches or covariates must be @@ -1210,12 +1269,14 @@ adjustGiottoMatrix <- function(gobject, } update_slot <- match.arg( - update_slot, c("normalized", "scaled", "custom", update_slot)) + update_slot, c("normalized", "scaled", "custom", update_slot) + ) # expression values to be used values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_data <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1241,7 +1302,8 @@ adjustGiottoMatrix <- function(gobject, # covariate columns if (!is.null(covariate_columns)) { covariates <- as.matrix( - cell_metadata[, covariate_columns, with = FALSE]) + cell_metadata[, covariate_columns, with = FALSE] + ) } else { covariates <- NULL } @@ -1318,43 +1380,51 @@ adjustGiottoMatrix <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' processGiotto(gobject = g, -#' adjust_params = list(covariate_columns = "leiden_clus")) +#' processGiotto( +#' gobject = g, +#' adjust_params = list(covariate_columns = "leiden_clus") +#' ) #' @export -processGiotto <- function(gobject, - filter_params = list(), - norm_params = list(), - stat_params = list(), - adjust_params = list(), - verbose = TRUE) { +processGiotto <- function( + gobject, + filter_params = list(), + norm_params = list(), + stat_params = list(), + adjust_params = list(), + verbose = TRUE) { # filter Giotto if (verbose == TRUE) message("1. start filter step") - if (!inherits(filter_params, "list")) + if (!inherits(filter_params, "list")) { stop("filter_params need to be a list of parameters for filterGiotto") + } gobject <- do.call("filterGiotto", c(gobject = gobject, filter_params)) # normalize Giotto if (verbose == TRUE) message("2. start normalization step") - if (!inherits(norm_params, "list")) + if (!inherits(norm_params, "list")) { stop("norm_params need to be a list of parameters for normalizeGiotto") + } gobject <- do.call("normalizeGiotto", c(gobject = gobject, norm_params)) # add Statistics if (verbose == TRUE) message("3. start cell and gene statistics step") - if (!inherits(stat_params, "list")) + if (!inherits(stat_params, "list")) { stop("stat_params need to be a list of parameters for addStatistics ") + } stat_params[["return_gobject"]] <- TRUE # force this to be true gobject <- do.call("addStatistics", c(gobject = gobject, stat_params)) # adjust Giotto, if applicable if (!is.null(adjust_params)) { if (verbose == TRUE) message("4. start adjusted matrix step") - if (!inherits(adjust_params, "list")) + if (!inherits(adjust_params, "list")) { stop("adjust_params need to be a list of parameters for adjustGiottoMatrix") + } adjust_params[["return_gobject"]] <- TRUE # force this to be true gobject <- do.call( - "adjustGiottoMatrix", c(gobject = gobject, adjust_params)) + "adjustGiottoMatrix", c(gobject = gobject, adjust_params) + ) } gobject <- update_giotto_params(gobject, description = "_process") @@ -1387,6 +1457,7 @@ processGiotto <- function(gobject, #' @param expression_values expression values to use #' @param detection_threshold detection threshold to consider a gene detected #' @param return_gobject boolean: return giotto object (default = TRUE) +#' @param verbose be verbose #' @returns giotto object if return_gobject = TRUE #' @details #' This function will add the following statistics to feature metadata: @@ -1406,12 +1477,14 @@ processGiotto <- function(gobject, #' #' addFeatStatistics(g) #' @export -addFeatStatistics <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - detection_threshold = 0, - return_gobject = TRUE) { +addFeatStatistics <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + detection_threshold = 0, + return_gobject = TRUE, + verbose = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1426,7 +1499,8 @@ addFeatStatistics <- function(gobject, # expression values to be used expression_values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_data <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1441,7 +1515,7 @@ addFeatStatistics <- function(gobject, feats = rownames(expr_data[]), nr_cells = rowSums_flex(expr_data[] > detection_threshold), perc_cells = (rowSums_flex(expr_data[] > detection_threshold) / - ncol(expr_data[])) * 100, + ncol(expr_data[])) * 100, total_expr = rowSums_flex(expr_data[]), mean_expr = rowMeans_flex(expr_data[]) ) @@ -1450,7 +1524,9 @@ addFeatStatistics <- function(gobject, mean_expr_det <- NULL mean_expr_detected <- .mean_expr_det_test( - expr_data[], detection_threshold = detection_threshold) + expr_data[], + detection_threshold = detection_threshold + ) feat_stats[, mean_expr_det := mean_expr_detected] @@ -1475,11 +1551,14 @@ addFeatStatistics <- function(gobject, metadata_names <- colnames(feat_metadata[]) if ("nr_cells" %in% metadata_names) { - message("feat statistics has already been applied once, will be - overwritten") + vmsg( + .v = verbose, "feat statistics has already been applied", + "once; overwriting" + ) feat_metadata[][, c( "nr_cells", "perc_cells", "total_expr", "mean_expr", - "mean_expr_det") := NULL] + "mean_expr_det" + ) := NULL] ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### gobject <- set_feature_metadata(gobject, metadata = feat_metadata, @@ -1520,16 +1599,19 @@ addFeatStatistics <- function(gobject, # normally if (is.null(cl)) { gobject <- update_giotto_params(gobject, - description = "_feat_stats") + description = "_feat_stats" + ) } else { fname <- as.character(cl[[1]]) if (fname == "addStatistics") { gobject <- update_giotto_params(gobject, - description = "_feat_stats", - toplevel = 3) + description = "_feat_stats", + toplevel = 3 + ) } else { gobject <- update_giotto_params(gobject, - description = "_feat_stats") + description = "_feat_stats" + ) } } @@ -1553,6 +1635,7 @@ addFeatStatistics <- function(gobject, #' @param expression_values expression values to use #' @param detection_threshold detection threshold to consider a gene detected #' @param return_gobject boolean: return giotto object (default = TRUE) +#' @param verbose be verbose #' @returns giotto object if return_gobject = TRUE #' @details #' This function will add the following statistics to cell metadata: @@ -1569,12 +1652,14 @@ addFeatStatistics <- function(gobject, #' #' addCellStatistics(g) #' @export -addCellStatistics <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - detection_threshold = 0, - return_gobject = TRUE) { +addCellStatistics <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + detection_threshold = 0, + return_gobject = TRUE, + verbose = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1589,7 +1674,8 @@ addCellStatistics <- function(gobject, # expression values to be used expression_values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_data <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1605,7 +1691,7 @@ addCellStatistics <- function(gobject, cells = colnames(expr_data[]), nr_feats = colSums_flex(expr_data[] > detection_threshold), perc_feats = (colSums_flex(expr_data[] > detection_threshold) / - nrow(expr_data[])) * 100, + nrow(expr_data[])) * 100, total_expr = colSums_flex(expr_data[]) ) @@ -1628,8 +1714,10 @@ addCellStatistics <- function(gobject, metadata_names <- colnames(cell_metadata[]) if ("nr_feats" %in% metadata_names) { - message("cells statistics has already been applied once, will be - overwritten") + vmsg( + .v = verbose, "cells statistics has already been applied", + "once; overwriting" + ) cell_metadata[][, c("nr_feats", "perc_feats", "total_expr") := NULL] ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### gobject <- set_cell_metadata(gobject, @@ -1673,16 +1761,19 @@ addCellStatistics <- function(gobject, # normally if (is.null(cl)) { gobject <- update_giotto_params(gobject, - description = "_cell_stats") + description = "_cell_stats" + ) } else { fname <- as.character(cl[[1]]) if (fname == "addStatistics") { gobject <- update_giotto_params(gobject, - description = "_cell_stats", - toplevel = 3) + description = "_cell_stats", + toplevel = 3 + ) } else { gobject <- update_giotto_params(gobject, - description = "_cell_stats") + description = "_cell_stats" + ) } } @@ -1703,6 +1794,7 @@ addCellStatistics <- function(gobject, #' @param expression_values expression values to use #' @param detection_threshold detection threshold to consider a feature detected #' @param return_gobject boolean: return giotto object (default = TRUE) +#' @param verbose be verbose #' @returns giotto object if return_gobject = TRUE, else a list with results #' @details See \code{\link{addFeatStatistics}} and #' \code{\link{addCellStatistics}} @@ -1711,12 +1803,14 @@ addCellStatistics <- function(gobject, #' #' addStatistics(g) #' @export -addStatistics <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - detection_threshold = 0, - return_gobject = TRUE) { +addStatistics <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + detection_threshold = 0, + return_gobject = TRUE, + verbose = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1735,7 +1829,8 @@ addStatistics <- function(gobject, spat_unit = spat_unit, expression_values = expression_values, detection_threshold = detection_threshold, - return_gobject = return_gobject + return_gobject = return_gobject, + verbose = verbose ) if (return_gobject == TRUE) { @@ -1749,7 +1844,8 @@ addStatistics <- function(gobject, spat_unit = spat_unit, expression_values = expression_values, detection_threshold = detection_threshold, - return_gobject = return_gobject + return_gobject = return_gobject, + verbose = verbose ) if (return_gobject == TRUE) { @@ -1781,13 +1877,14 @@ addStatistics <- function(gobject, #' #' addFeatsPerc(g, feats = c("Gm19935", "9630013A20Rik", "2900040C04Rik")) #' @export -addFeatsPerc <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - feats = NULL, - vector_name = "feat_perc", - return_gobject = TRUE) { +addFeatsPerc <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + feats = NULL, + vector_name = "feat_perc", + return_gobject = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1812,7 +1909,8 @@ addFeatsPerc <- function(gobject, # expression values to be used expression_values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_data <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1838,7 +1936,8 @@ addFeatsPerc <- function(gobject, ## update parameters used ## temp_gobj <- update_giotto_params(temp_gobj, - description = "_feats_perc") + description = "_feats_perc" + ) return(temp_gobj) } else { @@ -1870,14 +1969,17 @@ addFeatsPerc <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' findNetworkNeighbors(gobject = g, spatial_network_name = "spatial_network", -#' source_cell_ids = c("AACTCGATGGCGCAGT-1", "GGCTGGCTAGCTTAAA-1")) +#' findNetworkNeighbors( +#' gobject = g, spatial_network_name = "spatial_network", +#' source_cell_ids = c("AACTCGATGGCGCAGT-1", "GGCTGGCTAGCTTAAA-1") +#' ) #' @export -findNetworkNeighbors <- function(gobject, - spat_unit = NULL, - spatial_network_name = NULL, - source_cell_ids = NULL, - name = "nb_cells") { +findNetworkNeighbors <- function( + gobject, + spat_unit = NULL, + spatial_network_name = NULL, + source_cell_ids = NULL, + name = "nb_cells") { spat_unit <- set_default_spat_unit( gobject = gobject, spat_unit = spat_unit @@ -1905,11 +2007,14 @@ findNetworkNeighbors <- function(gobject, full_network_DT <- convert_to_full_spatial_network(spatial_network) potential_target_cells <- full_network_DT[ - source %in% source_cells][["target"]] + source %in% source_cells + ][["target"]] source_and_target_cells <- potential_target_cells[ - potential_target_cells %in% source_cells] + potential_target_cells %in% source_cells + ] target_cells <- potential_target_cells[ - !potential_target_cells %in% source_and_target_cells] + !potential_target_cells %in% source_and_target_cells + ] cell_meta <- pDataDT(gobject) diff --git a/R/cell_segmentation.R b/R/cell_segmentation.R index 6c5df37ad..05ff56040 100644 --- a/R/cell_segmentation.R +++ b/R/cell_segmentation.R @@ -16,14 +16,15 @@ #' of the tile: sx (start x), ex (end x), sy, and ey. #' #' @export -doCellSegmentation <- function(raster_img, - folder_path, - reduce_resolution = 4, - overlapping_pixels = 50, - python_path = NULL) { +doCellSegmentation <- function( + raster_img, + folder_path, + reduce_resolution = 4, + overlapping_pixels = 50, + python_path = NULL) { package_check("deepcell", repository = "pip") package_check("PIL", repository = "pip") - + # prepare python path and segmentation script reticulate::use_python(required = TRUE, python = python_path) python_segmentation_function <- system.file("python", @@ -51,10 +52,10 @@ doCellSegmentation <- function(raster_img, # sliding window start_x <- 0 end_x <- start_x + tile_dim - for (i in 1:nxwindow) { + for (i in seq_len(nxwindow)) { start_y <- 0 end_y <- start_y + tile_dim - for (j in 1:nywindow) { + for (j in seq_len(nywindow)) { ext_crop <- terra::ext(c(start_x, end_x, start_y, end_y)) img_crop <- terra::crop(raster_img, ext_crop, snap = "in") img_crop_rescaled <- terra::aggregate(img_crop, reduce_resolution) diff --git a/R/clustering.R b/R/clustering.R index 625db7e35..950debf62 100644 --- a/R/clustering.R +++ b/R/clustering.R @@ -47,24 +47,25 @@ #' #' doLeidenCluster(g) #' @export -doLeidenCluster <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - name = "leiden_clus", - nn_network_to_use = "sNN", - network_name = "sNN.pca", - python_path = NULL, - resolution = 1, - weight_col = "weight", - partition_type = c( - "RBConfigurationVertexPartition", - "ModularityVertexPartition" - ), - init_membership = NULL, - n_iterations = 1000, - return_gobject = TRUE, - set_seed = TRUE, - seed_number = 1234) { +doLeidenCluster <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + name = "leiden_clus", + nn_network_to_use = "sNN", + network_name = "sNN.pca", + python_path = NULL, + resolution = 1, + weight_col = "weight", + partition_type = c( + "RBConfigurationVertexPartition", + "ModularityVertexPartition" + ), + init_membership = NULL, + n_iterations = 1000, + return_gobject = TRUE, + set_seed = TRUE, + seed_number = 1234) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -94,7 +95,8 @@ doLeidenCluster <- function(gobject, ## select partition type partition_type <- match.arg(partition_type, choices = c( - "RBConfigurationVertexPartition", "ModularityVertexPartition") + "RBConfigurationVertexPartition", "ModularityVertexPartition" + ) ) ## check or make paths @@ -106,19 +108,21 @@ doLeidenCluster <- function(gobject, ## prepare python path and louvain script reticulate::use_python(required = TRUE, python = python_path) python_leiden_function <- system.file("python", "python_leiden.py", - package = "Giotto") + package = "Giotto" + ) reticulate::source_python(file = python_leiden_function) ## set seed if (isTRUE(set_seed)) { seed_number <- as.integer(seed_number) } else { - seed_number <- as.integer(sample(x = 1:10000, size = 1)) + seed_number <- as.integer(sample(x = seq(10000), size = 1)) } ## extract NN network network_edge_dt <- data.table::as.data.table( - igraph::as_data_frame(x = igraph_object, what = "edges")) + igraph::as_data_frame(x = igraph_object, what = "edges") + ) # data.table variables weight <- NULL @@ -130,7 +134,9 @@ doLeidenCluster <- function(gobject, } else { # weight is defined by attribute of igraph object network_edge_dt <- network_edge_dt[ - , c("from", "to", weight_col), with = FALSE] + , c("from", "to", weight_col), + with = FALSE + ] data.table::setnames(network_edge_dt, weight_col, "weight") } } else { @@ -143,8 +149,10 @@ doLeidenCluster <- function(gobject, ## do python leiden clustering - reticulate::py_set_seed(seed = seed_number, - disable_hash_randomization = TRUE) + reticulate::py_set_seed( + seed = seed_number, + disable_hash_randomization = TRUE + ) pyth_leid_result <- python_leiden( df = network_edge_dt, partition_type = partition_type, @@ -156,7 +164,8 @@ doLeidenCluster <- function(gobject, ) ident_clusters_DT <- data.table::data.table( - cell_ID = pyth_leid_result[[1]], "name" = pyth_leid_result[[2]]) + cell_ID = pyth_leid_result[[1]], "name" = pyth_leid_result[[2]] + ) data.table::setnames(ident_clusters_DT, "name", name) @@ -252,22 +261,23 @@ doLeidenCluster <- function(gobject, #' #' doLeidenClusterIgraph(g) #' @export -doLeidenClusterIgraph <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - name = "leiden_clus", - nn_network_to_use = "sNN", - network_name = "sNN.pca", - objective_function = c("modularity", "CPM"), - weights = NULL, - resolution_parameter = 1, - beta = 0.01, - initial_membership = NULL, - n_iterations = 1000, - return_gobject = TRUE, - set_seed = TRUE, - seed_number = 1234, - ...) { +doLeidenClusterIgraph <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + name = "leiden_clus", + nn_network_to_use = "sNN", + network_name = "sNN.pca", + objective_function = c("modularity", "CPM"), + weights = NULL, + resolution_parameter = 1, + beta = 0.01, + initial_membership = NULL, + n_iterations = 1000, + return_gobject = TRUE, + set_seed = TRUE, + seed_number = 1234, + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -321,7 +331,8 @@ doLeidenClusterIgraph <- function(gobject, # summarize results ident_clusters_DT <- data.table::data.table( - "cell_ID" = leiden_clusters$names, "name" = leiden_clusters$membership) + "cell_ID" = leiden_clusters$names, "name" = leiden_clusters$membership + ) data.table::setnames(ident_clusters_DT, "name", name) @@ -410,20 +421,23 @@ doLeidenClusterIgraph <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' doGiottoClustree(gobject = g, res_vector = c(0.5, 0.8), return_plot = FALSE, -#' show_plot = FALSE, save_plot = FALSE) +#' doGiottoClustree( +#' gobject = g, res_vector = c(0.5, 0.8), return_plot = FALSE, +#' show_plot = FALSE, save_plot = FALSE +#' ) #' @export -doGiottoClustree <- function(gobject, - res_vector = NULL, - res_seq = NULL, - return_gobject = FALSE, - show_plot = NULL, - save_plot = NULL, - return_plot = NULL, - save_param = list(), - default_save_name = "clustree", - verbose = TRUE, - ...) { +doGiottoClustree <- function( + gobject, + res_vector = NULL, + res_seq = NULL, + return_gobject = FALSE, + show_plot = NULL, + save_plot = NULL, + return_plot = NULL, + save_param = list(), + default_save_name = "clustree", + verbose = TRUE, + ...) { package_check(pkg_name = "clustree", repository = "CRAN") ## setting resolutions to use if (is.null(res_vector)) { @@ -496,20 +510,21 @@ doGiottoClustree <- function(gobject, #' Set \emph{weight_col = NULL} to give equal weight (=1) to each edge. #' @md #' @keywords internal -.doLouvainCluster_community <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - name = "louvain_clus", - nn_network_to_use = "sNN", - network_name = "sNN.pca", - python_path = NULL, - resolution = 1, - weight_col = NULL, - louv_random = FALSE, - return_gobject = TRUE, - set_seed = FALSE, - seed_number = 1234, - ...) { +.doLouvainCluster_community <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + name = "louvain_clus", + nn_network_to_use = "sNN", + network_name = "sNN.pca", + python_path = NULL, + resolution = 1, + weight_col = NULL, + louv_random = FALSE, + return_gobject = TRUE, + set_seed = FALSE, + seed_number = 1234, + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -544,18 +559,21 @@ doGiottoClustree <- function(gobject, # prepare python path and louvain script reticulate::use_python(required = TRUE, python = python_path) python_louvain_function <- system.file( - "python", "python_louvain.py", package = "Giotto") + "python", "python_louvain.py", + package = "Giotto" + ) reticulate::source_python(file = python_louvain_function) # set seed if (isTRUE(set_seed)) { seed_number <- as.integer(seed_number) } else { - seed_number <- as.integer(sample(x = 1:10000, size = 1)) + seed_number <- as.integer(sample(x = seq(10000), size = 1)) } network_edge_dt <- data.table::as.data.table(igraph::as_data_frame( - x = igraph_object, what = "edges")) + x = igraph_object, what = "edges" + )) # data.table variables weight <- NULL @@ -566,7 +584,9 @@ doGiottoClustree <- function(gobject, } else { # weight is defined by attribute of igraph object network_edge_dt <- network_edge_dt[ - , c("from", "to", weight_col), with = FALSE] + , c("from", "to", weight_col), + with = FALSE + ] setnames(network_edge_dt, weight_col, "weight") } } else { @@ -578,19 +598,24 @@ doGiottoClustree <- function(gobject, # do python louvain clustering if (louv_random == FALSE) { reticulate::py_set_seed( - seed = seed_number, disable_hash_randomization = TRUE) + seed = seed_number, disable_hash_randomization = TRUE + ) pyth_louv_result <- python_louvain( - df = network_edge_dt, resolution = resolution, randomize = FALSE) + df = network_edge_dt, resolution = resolution, randomize = FALSE + ) } else { reticulate::py_set_seed( - seed = seed_number, disable_hash_randomization = TRUE) + seed = seed_number, disable_hash_randomization = TRUE + ) pyth_louv_result <- python_louvain( df = network_edge_dt, resolution = resolution, - random_state = seed_number) + random_state = seed_number + ) } ident_clusters_DT <- data.table::data.table( - cell_ID = rownames(pyth_louv_result), "name" = pyth_louv_result[[1]]) + cell_ID = rownames(pyth_louv_result), "name" = pyth_louv_result[[1]] + ) data.table::setnames(ident_clusters_DT, "name", name) @@ -647,11 +672,13 @@ doGiottoClustree <- function(gobject, fname <- as.character(cl[[1]]) if (fname == "doLouvainCluster") { gobject <- update_giotto_params(gobject, - description = "_cluster", - toplevel = 3) + description = "_cluster", + toplevel = 3 + ) } else { gobject <- update_giotto_params(gobject, - description = "_cluster") + description = "_cluster" + ) } } @@ -686,17 +713,18 @@ doGiottoClustree <- function(gobject, #' in R for more information. #' #' @keywords internal -.doLouvainCluster_multinet <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - name = "louvain_clus", - nn_network_to_use = "sNN", - network_name = "sNN.pca", - gamma = 1, - omega = 1, - return_gobject = TRUE, - set_seed = FALSE, - seed_number = 1234) { +.doLouvainCluster_multinet <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + name = "louvain_clus", + nn_network_to_use = "sNN", + network_name = "sNN.pca", + gamma = 1, + omega = 1, + return_gobject = TRUE, + set_seed = FALSE, + seed_number = 1234) { if ("multinet" %in% rownames(installed.packages()) == FALSE) { stop( "package 'multinet' is not yet installed \n", @@ -731,10 +759,11 @@ doGiottoClustree <- function(gobject, # create mlnetworkobject mln_object <- multinet::ml_empty() - multinet::add_vertices_ml( - n = mln_object, vertices = igraph::V(igraph_object)) + # multinet::add_vertices_ml( + # n = mln_object, vertices = igraph::V(igraph_object)) multinet::add_igraph_layer_ml( - n = mln_object, g = igraph_object, name = name) + n = mln_object, g = igraph_object, name = name + ) # start seed if (isTRUE(set_seed)) { @@ -745,7 +774,8 @@ doGiottoClustree <- function(gobject, cell_ID <- actor <- weight_col <- NULL louvain_clusters <- multinet::glouvain_ml( - n = mln_object, gamma = gamma, omega = omega) + n = mln_object, gamma = gamma, omega = omega + ) ident_clusters_DT <- data.table::as.data.table(louvain_clusters) ident_clusters_DT[, cell_ID := actor] data.table::setnames(ident_clusters_DT, "cid", name) @@ -808,11 +838,13 @@ doGiottoClustree <- function(gobject, fname <- as.character(cl[[1]]) if (fname == "doLouvainCluster") { gobject <- update_giotto_params(gobject, - description = "_cluster", - toplevel = 3) + description = "_cluster", + toplevel = 3 + ) } else { gobject <- update_giotto_params(gobject, - description = "_cluster") + description = "_cluster" + ) } } return(gobject) @@ -861,23 +893,24 @@ doGiottoClustree <- function(gobject, #' #' doLouvainCluster(g) #' @export -doLouvainCluster <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - version = c("community", "multinet"), - name = "louvain_clus", - nn_network_to_use = "sNN", - network_name = "sNN.pca", - python_path = NULL, - resolution = 1, - weight_col = NULL, - gamma = 1, - omega = 1, - louv_random = FALSE, - return_gobject = TRUE, - set_seed = FALSE, - seed_number = 1234, - ...) { +doLouvainCluster <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + version = c("community", "multinet"), + name = "louvain_clus", + nn_network_to_use = "sNN", + network_name = "sNN.pca", + python_path = NULL, + resolution = 1, + weight_col = NULL, + gamma = 1, + omega = 1, + louv_random = FALSE, + return_gobject = TRUE, + set_seed = FALSE, + seed_number = 1234, + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -957,18 +990,20 @@ doLouvainCluster <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' doRandomWalkCluster(g) +#' g <- doRandomWalkCluster(g) +#' pDataDT(g) #' @export -doRandomWalkCluster <- function(gobject, - name = "random_walk_clus", - nn_network_to_use = "sNN", - network_name = "sNN.pca", - walk_steps = 4, - walk_clusters = 10, - walk_weights = NA, - return_gobject = TRUE, - set_seed = FALSE, - seed_number = 1234) { +doRandomWalkCluster <- function( + gobject, + name = "random_walk_clus", + nn_network_to_use = "sNN", + network_name = "sNN.pca", + walk_steps = 4, + walk_clusters = 10, + walk_weights = NA, + return_gobject = TRUE, + set_seed = FALSE, + seed_number = 1234) { ## get cell IDs ## cell_ID_vec <- gobject@cell_ID @@ -976,7 +1011,8 @@ doRandomWalkCluster <- function(gobject, igraph_object <- getNearestNetwork( gobject, nn_type = nn_network_to_use, - name = network_name + name = network_name, + output = "igraph" ) @@ -986,13 +1022,16 @@ doRandomWalkCluster <- function(gobject, } randomwalk_clusters <- igraph::cluster_walktrap( - graph = igraph_object, steps = walk_steps, weights = walk_weights) + graph = igraph_object, steps = walk_steps, weights = walk_weights + ) randomwalk_clusters <- as.factor(igraph::cut_at( - communities = randomwalk_clusters, no = walk_clusters)) + communities = randomwalk_clusters, no = walk_clusters + )) ident_clusters_DT <- data.table::data.table( "cell_ID" = igraph::V(igraph_object)$name, - "name" = randomwalk_clusters) + "name" = randomwalk_clusters + ) data.table::setnames(ident_clusters_DT, "name", name) # exit seed @@ -1000,21 +1039,13 @@ doRandomWalkCluster <- function(gobject, set.seed(Sys.time()) } - ## return if (return_gobject == TRUE) { - cluster_names <- names(gobject@cell_metadata) - if (name %in% cluster_names) { - cat(name, " has already been used, will be overwritten") - cell_metadata <- gobject@cell_metadata - cell_metadata[, eval(name) := NULL] - gobject@cell_metadata <- cell_metadata - } - gobject <- addCellMetadata( gobject = gobject, new_metadata = ident_clusters_DT[, c("cell_ID", name), - with = FALSE], + with = FALSE + ], by_column = TRUE, column_cell_ID = "cell_ID" ) @@ -1022,7 +1053,8 @@ doRandomWalkCluster <- function(gobject, ## update parameters used ## gobject <- update_giotto_params(gobject, - description = "_randomwalk_cluster") + description = "_randomwalk_cluster" + ) return(gobject) } else { # else return clustering result @@ -1058,17 +1090,18 @@ doRandomWalkCluster <- function(gobject, #' #' doSNNCluster(g) #' @export -doSNNCluster <- function(gobject, - name = "sNN_clus", - nn_network_to_use = "kNN", - network_name = "kNN.pca", - k = 20, - eps = 4, - minPts = 16, - borderPoints = TRUE, - return_gobject = TRUE, - set_seed = FALSE, - seed_number = 1234) { +doSNNCluster <- function( + gobject, + name = "sNN_clus", + nn_network_to_use = "kNN", + network_name = "kNN.pca", + k = 20, + eps = 4, + minPts = 16, + borderPoints = TRUE, + return_gobject = TRUE, + set_seed = FALSE, + seed_number = 1234) { ## get cell IDs ## cell_ID_vec <- gobject@cell_ID @@ -1096,18 +1129,24 @@ doSNNCluster <- function(gobject, ## SNN clust igraph_DT <- data.table::as.data.table(igraph::as_data_frame( - igraph_object, what = "edges")) + igraph_object, + what = "edges" + )) igraph_DT <- igraph_DT[order(from)] cell_id_numeric <- unique(x = c(igraph_DT$from, igraph_DT$to)) names(cell_id_numeric) <- seq_along(cell_id_numeric) igraph_DT[, from_T := as.numeric(names(cell_id_numeric[ - cell_id_numeric == from])), by = 1:nrow(igraph_DT)] + cell_id_numeric == from + ])), by = 1:nrow(igraph_DT)] igraph_DT[, to_T := as.numeric(names(cell_id_numeric[ - cell_id_numeric == to])), by = 1:nrow(igraph_DT)] + cell_id_numeric == to + ])), by = 1:nrow(igraph_DT)] temp_igraph_DT <- igraph_DT[, .(from_T, to_T, weight, distance)] data.table::setnames( - temp_igraph_DT, old = c("from_T", "to_T"), new = c("from", "to")) + temp_igraph_DT, + old = c("from_T", "to_T"), new = c("from", "to") + ) kNN_object <- nnDT_to_kNN(nnDT = temp_igraph_DT) sNN_clusters <- dbscan::sNNclust( @@ -1116,8 +1155,9 @@ doSNNCluster <- function(gobject, ) ident_clusters_DT <- data.table::data.table( - "cell_ID" = cell_id_numeric[1:nrow(kNN_object$dist)], - "name" = sNN_clusters$cluster) + "cell_ID" = cell_id_numeric[seq_len(nrow(kNN_object$dist))], + "name" = sNN_clusters$cluster + ) data.table::setnames(ident_clusters_DT, "name", name) # exit seed @@ -1138,7 +1178,8 @@ doSNNCluster <- function(gobject, gobject <- addCellMetadata( gobject = gobject, new_metadata = ident_clusters_DT[, c("cell_ID", name), - with = FALSE], + with = FALSE + ], by_column = TRUE, column_cell_ID = "cell_ID" ) @@ -1189,27 +1230,28 @@ doSNNCluster <- function(gobject, #' #' doKmeans(g) #' @export -doKmeans <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - feats_to_use = NULL, - dim_reduction_to_use = c("cells", "pca", "umap", "tsne"), - dim_reduction_name = "pca", - dimensions_to_use = 1:10, - distance_method = c( - "original", "pearson", "spearman", - "euclidean", "maximum", "manhattan", - "canberra", "binary", "minkowski" - ), - centers = 10, - iter_max = 100, - nstart = 1000, - algorithm = "Hartigan-Wong", - name = "kmeans", - return_gobject = TRUE, - set_seed = TRUE, - seed_number = 1234) { +doKmeans <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + feats_to_use = NULL, + dim_reduction_to_use = c("cells", "pca", "umap", "tsne"), + dim_reduction_name = "pca", + dimensions_to_use = 1:10, + distance_method = c( + "original", "pearson", "spearman", + "euclidean", "maximum", "manhattan", + "canberra", "binary", "minkowski" + ), + centers = 10, + iter_max = 100, + nstart = 1000, + algorithm = "Hartigan-Wong", + name = "kmeans", + return_gobject = TRUE, + set_seed = TRUE, + seed_number = 1234) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1223,7 +1265,9 @@ doKmeans <- function(gobject, dim_reduction_to_use <- match.arg( - dim_reduction_to_use, choices = c("cells", "pca", "umap", "tsne")) + dim_reduction_to_use, + choices = c("cells", "pca", "umap", "tsne") + ) distance_method <- match.arg(distance_method, choices = c( "original", "pearson", "spearman", "euclidean", "maximum", "manhattan", @@ -1247,12 +1291,14 @@ doKmeans <- function(gobject, ) dimensions_to_use <- dimensions_to_use[ - dimensions_to_use %in% 1:ncol(dim_coord[])] + dimensions_to_use %in% seq_len(ncol(dim_coord[])) + ] matrix_to_use <- dim_coord[][, dimensions_to_use] } else { values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) ## using original matrix ## expr_values <- getExpression( @@ -1266,7 +1312,8 @@ doKmeans <- function(gobject, # subset expression matrix if (!is.null(feats_to_use)) { expr_values[] <- expr_values[][ - rownames(expr_values[]) %in% feats_to_use, ] + rownames(expr_values[]) %in% feats_to_use, + ] } # features as columns @@ -1280,7 +1327,8 @@ doKmeans <- function(gobject, celldist <- matrix_to_use } else if (distance_method %in% c("spearman", "pearson")) { celldist <- stats::as.dist(1 - cor_flex( - x = t_flex(matrix_to_use), method = distance_method)) + x = t_flex(matrix_to_use), method = distance_method + )) } else if (distance_method %in% c( "euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski" @@ -1317,7 +1365,6 @@ doKmeans <- function(gobject, ## add clusters to metadata ## if (isTRUE(return_gobject)) { - cluster_names <- names(pDataDT( gobject = gobject, spat_unit = spat_unit, @@ -1357,7 +1404,8 @@ doKmeans <- function(gobject, ## update parameters used ## gobject <- update_giotto_params(gobject, - description = "_kmeans_cluster") + description = "_kmeans_cluster" + ) return(gobject) } else { return(ident_clusters_DT) @@ -1395,30 +1443,31 @@ doKmeans <- function(gobject, #' #' doHclust(g) #' @export -doHclust <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - feats_to_use = NULL, - dim_reduction_to_use = c("cells", "pca", "umap", "tsne"), - dim_reduction_name = "pca", - dimensions_to_use = 1:10, - distance_method = c( - "pearson", "spearman", "original", - "euclidean", "maximum", "manhattan", - "canberra", "binary", "minkowski" - ), - agglomeration_method = c( - "ward.D2", "ward.D", "single", - "complete", "average", "mcquitty", - "median", "centroid" - ), - k = 10, - h = NULL, - name = "hclust", - return_gobject = TRUE, - set_seed = TRUE, - seed_number = 1234) { +doHclust <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + feats_to_use = NULL, + dim_reduction_to_use = c("cells", "pca", "umap", "tsne"), + dim_reduction_name = "pca", + dimensions_to_use = 1:10, + distance_method = c( + "pearson", "spearman", "original", + "euclidean", "maximum", "manhattan", + "canberra", "binary", "minkowski" + ), + agglomeration_method = c( + "ward.D2", "ward.D", "single", + "complete", "average", "mcquitty", + "median", "centroid" + ), + k = 10, + h = NULL, + name = "hclust", + return_gobject = TRUE, + set_seed = TRUE, + seed_number = 1234) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1470,7 +1519,8 @@ doHclust <- function(gobject, ) dimensions_to_use <- dimensions_to_use[ - dimensions_to_use %in% 1:ncol(dim_coord)] + dimensions_to_use %in% seq_len(ncol(dim_coord)) + ] matrix_to_use <- dim_coord[, dimensions_to_use] } else { ## using original matrix ## @@ -1485,7 +1535,8 @@ doHclust <- function(gobject, # subset expression matrix if (!is.null(feats_to_use)) { expr_values <- expr_values[ - rownames(expr_values) %in% feats_to_use, ] + rownames(expr_values) %in% feats_to_use, + ] } # features as columns @@ -1498,7 +1549,8 @@ doHclust <- function(gobject, celldist <- matrix_to_use } else if (distance_method %in% c("spearman", "pearson")) { celldist <- stats::as.dist(1 - cor_flex(x = t_flex( - matrix_to_use), method = distance_method)) + matrix_to_use + ), method = distance_method)) } else if (distance_method %in% c( "euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski" @@ -1569,7 +1621,8 @@ doHclust <- function(gobject, ## update parameters used ## gobject <- update_giotto_params(gobject, - description = "_hierarchical_cluster") + description = "_hierarchical_cluster" + ) return(gobject) } else { return(list("hclust" = hclusters, "DT" = ident_clusters_DT)) @@ -1635,59 +1688,62 @@ doHclust <- function(gobject, #' #' clusterCells(g) #' @export -clusterCells <- function(gobject, - cluster_method = c( - "leiden", - "louvain_community", "louvain_multinet", - "randomwalk", "sNNclust", - "kmeans", "hierarchical" - ), - name = "cluster_name", - nn_network_to_use = "sNN", - network_name = "sNN.pca", - pyth_leid_resolution = 1, - pyth_leid_weight_col = "weight", - pyth_leid_part_type = c("RBConfigurationVertexPartition", - "ModularityVertexPartition"), - pyth_leid_init_memb = NULL, - pyth_leid_iterations = 1000, - pyth_louv_resolution = 1, - pyth_louv_weight_col = NULL, - python_louv_random = FALSE, - python_path = NULL, - louvain_gamma = 1, - louvain_omega = 1, - walk_steps = 4, - walk_clusters = 10, - walk_weights = NA, - sNNclust_k = 20, - sNNclust_eps = 4, - sNNclust_minPts = 16, - borderPoints = TRUE, - expression_values = c("normalized", "scaled", "custom"), - feats_to_use = NULL, - dim_reduction_to_use = c("cells", "pca", "umap", "tsne"), - dim_reduction_name = "pca", - dimensions_to_use = 1:10, - distance_method = c( - "original", "pearson", "spearman", - "euclidean", "maximum", "manhattan", - "canberra", "binary", "minkowski" - ), - km_centers = 10, - km_iter_max = 100, - km_nstart = 1000, - km_algorithm = "Hartigan-Wong", - hc_agglomeration_method = c( - "ward.D2", "ward.D", "single", - "complete", "average", "mcquitty", - "median", "centroid" - ), - hc_k = 10, - hc_h = NULL, - return_gobject = TRUE, - set_seed = TRUE, - seed_number = 1234) { +clusterCells <- function( + gobject, + cluster_method = c( + "leiden", + "louvain_community", "louvain_multinet", + "randomwalk", "sNNclust", + "kmeans", "hierarchical" + ), + name = "cluster_name", + nn_network_to_use = "sNN", + network_name = "sNN.pca", + pyth_leid_resolution = 1, + pyth_leid_weight_col = "weight", + pyth_leid_part_type = c( + "RBConfigurationVertexPartition", + "ModularityVertexPartition" + ), + pyth_leid_init_memb = NULL, + pyth_leid_iterations = 1000, + pyth_louv_resolution = 1, + pyth_louv_weight_col = NULL, + python_louv_random = FALSE, + python_path = NULL, + louvain_gamma = 1, + louvain_omega = 1, + walk_steps = 4, + walk_clusters = 10, + walk_weights = NA, + sNNclust_k = 20, + sNNclust_eps = 4, + sNNclust_minPts = 16, + borderPoints = TRUE, + expression_values = c("normalized", "scaled", "custom"), + feats_to_use = NULL, + dim_reduction_to_use = c("cells", "pca", "umap", "tsne"), + dim_reduction_name = "pca", + dimensions_to_use = 1:10, + distance_method = c( + "original", "pearson", "spearman", + "euclidean", "maximum", "manhattan", + "canberra", "binary", "minkowski" + ), + km_centers = 10, + km_iter_max = 100, + km_nstart = 1000, + km_algorithm = "Hartigan-Wong", + hc_agglomeration_method = c( + "ward.D2", "ward.D", "single", + "complete", "average", "mcquitty", + "median", "centroid" + ), + hc_k = 10, + hc_h = NULL, + return_gobject = TRUE, + set_seed = TRUE, + seed_number = 1234) { ## select cluster method cluster_method <- match.arg( arg = cluster_method, @@ -1816,113 +1872,250 @@ clusterCells <- function(gobject, +# subclustering #### -#' @title doLeidenSubCluster -#' @name doLeidenSubCluster -#' @description Further subcluster cells using a NN-network and the Leiden -#' algorithm -#' @param gobject giotto object -#' @param feat_type feature type +#' @title Cell subclustering +#' @name subClusterCells +#' @description Perform cell subclustering by taking an annotated group of +#' cells and performing another round of clustering on just that subset. +#' Several methods are implemented. `subClusterCells()` is the main wrapper +#' function. `doLeidenSubCluster()` and `doLouvainSubCluster()` are more +#' specific implementations. +#' @param gobject `giotto` object #' @param name name for new clustering result +#' @param cluster_method clustering method to use. Currently one of "leiden" +#' (default), "louvain_community", "louvain_multinet" #' @param cluster_column cluster column to subcluster #' @param selected_clusters only do subclustering on these clusters -#' @param hvf_param parameters for calculateHVf -#' @param hvg_param deprecatd, use hvf_param +#' @param hvf_param list of parameters for [calculateHVF()] +#' @param hvg_param deprecated #' @param hvf_min_perc_cells threshold for detection in min percentage of cells -#' @param hvg_min_perc_cells deprecated, use hvf_min_perc_cells +#' @param hvg_min_perc_cells deprecated #' @param hvf_mean_expr_det threshold for mean expression level in cells with #' detection -#' @param hvg_mean_expr_det deprecated, use hvf_mean_expr_det +#' @param hvg_mean_expr_det deprecated #' @param use_all_feats_as_hvf forces all features to be HVF and to be used as #' input for PCA -#' @param use_all_genes_as_hvg deprecated, use use_all_feats_as_hvf +#' @param use_all_genes_as_hvg deprecated #' @param min_nr_of_hvf minimum number of HVF, or all features will be used as #' input for PCA -#' @param min_nr_of_hvg deprecated, use min_nr_of_hvf -#' @param pca_param parameters for runPCA -#' @param nn_param parameters for parameters for createNearestNetwork -#' @param k_neighbors number of k for createNearestNetwork -#' @param resolution resolution of Leiden clustering -#' @param n_iterations number of interations to run the Leiden algorithm. +#' @param min_nr_of_hvg deprecated +#' @param pca_param list of parameters for [runPCA()] +#' @param nn_param list of parameters for [createNearestNetwork()] +#' @param k_neighbors number of k for [createNearestNetwork()] +#' @param resolution resolution for community algorithm +#' @param n_iterations number of iterations to run the Leiden algorithm. +#' @param gamma gamma +#' @param omega omega #' @param python_path specify specific path to python if required #' @param nn_network_to_use type of NN network to use (kNN vs sNN) #' @param network_name name of NN network to use -#' @param return_gobject boolean: return giotto object (default = TRUE) +#' @param return_gobject logical. return `giotto` object (default = TRUE) #' @param verbose verbose -#' @returns giotto object with new subclusters appended to cell metadata -#' @details This function performs subclustering using the Leiden algorithm on -#' selected clusters. +#' @returns `giotto` object with new subclusters appended to cell metadata +#' @details This function performs subclustering on selected clusters. #' The systematic steps are: -#' \itemize{ -#' \item{1. subset Giotto object} -#' \item{2. identify highly variable fetures} -#' \item{3. run PCA} -#' \item{4. create nearest neighbouring network} -#' \item{5. do Leiden clustering} -#' } -#' @seealso \code{\link{doLeidenCluster}} +#' 1. subset Giotto object +#' 2. identify highly variable genes +#' 3. run PCA +#' 4. create nearest neighbouring network +#' 5. do clustering +#' #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' +#' # Run some subclusterings based on "leiden_clus" annotations that already +#' # exist in the visium mini object +#' +#' # default method is leiden subclustering +#' subClusterCells(g, cluster_column = "leiden_clus") +#' +#' # use louvain instead +#' subClusterCells(g, +#' cluster_column = "leiden_clus", +#' cluster_method = "louvain_community" +#' ) +#' +#' # directly call the more specific functions #' doLeidenSubCluster(g, cluster_column = "leiden_clus") +#' +#' doLouvainSubCluster(g, cluster_column = "leiden_clus") +#' @md +NULL + + + + +#' @rdname subClusterCells #' @export -doLeidenSubCluster <- function(gobject, - feat_type = NULL, - name = "sub_pleiden_clus", +subClusterCells <- function(gobject, + name = "sub_clus", + cluster_method = c( + "leiden", + "louvain_community", + "louvain_multinet" + ), cluster_column = NULL, selected_clusters = NULL, - hvf_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, - expression_values = "normalized"), - hvg_param = NULL, + hvg_param = deprecated(), + hvf_param = list( + reverse_log_scale = TRUE, difference_in_cov = 1, + expression_values = "normalized" + ), + hvg_min_perc_cells = deprecated(), hvf_min_perc_cells = 5, - hvg_min_perc_cells = NULL, + hvg_mean_expr_det = deprecated(), hvf_mean_expr_det = 1, - hvg_mean_expr_det = NULL, + use_all_genes_as_hvg = deprecated(), use_all_feats_as_hvf = FALSE, - use_all_genes_as_hvg = NULL, + min_nr_of_hvg = deprecated(), min_nr_of_hvf = 5, - min_nr_of_hvg = NULL, pca_param = list(expression_values = "normalized", scale_unit = TRUE), nn_param = list(dimensions_to_use = 1:20), k_neighbors = 10, - resolution = 0.5, - n_iterations = 500, + resolution = 1, + n_iterations = 1000, + gamma = 1, + omega = 1, python_path = NULL, nn_network_to_use = "sNN", network_name = "sNN.pca", return_gobject = TRUE, verbose = TRUE) { + ## select cluster method + cluster_method <- match.arg(arg = cluster_method, choices = c( + "leiden", + "louvain_community", + "louvain_multinet" + )) + + # deprecations + .dep_param <- function(...) { + GiottoUtils::deprecate_param( + ..., + fun = "subClusterCells", when = "4.0.9" + ) + } + + hvf_param <- .dep_param(hvg_param, hvf_param) + hvf_min_perc_cells <- .dep_param(hvg_min_perc_cells, hvf_min_perc_cells) + hvf_mean_expr_det <- .dep_param(hvg_mean_expr_det, hvf_mean_expr_det) + use_all_feats_as_hvf <- .dep_param(use_all_genes_as_hvg, use_all_feats_as_hvf) + min_nr_of_hvf <- .dep_param(min_nr_of_hvg, min_nr_of_hvf) + + # gather common args + common_args <- get_args_list(keep = c( + "gobject", + "cluster_column", + "selected_clusters", + "hvf_param", + "hvf_min_perc_cells", + "hvf_mean_expr_det", + "use_all_feats_as_hvf", + "min_nr_of_hvf", + "pca_param", + "nn_param", + "k_neighbors", + "nn_network_to_use", + "network_name", + "name", + "return_gobject", + "verbose" + )) + + result <- switch(cluster_method, + "leiden" = { + do.call(doLeidenSubCluster, args = c( + common_args, + list( + resolution = resolution, + n_iterations = n_iterations, + python_path = python_path, + toplevel = 4 + ) + )) + }, + "louvain_community" = { + do.call(.doLouvainSubCluster_community, args = c( + common_args, + list( + resolution = resolution, + python_path = python_path + ) + )) + }, + "louvain_multinet" = { + do.call(.doLouvainSubCluster_multinet, args = c( + common_args, + list( + gamma = gamma, + omega = omega + ) + )) + } + ) + + return(result) +} + + + + + +#' @describeIn subClusterCells Further subcluster cells using a NN-network and +#' the Leiden algorithm +#' @param toplevel do not use +#' @param feat_type feature type +#' @export +doLeidenSubCluster <- function( + gobject, + feat_type = NULL, + name = "sub_leiden_clus", + cluster_column = NULL, + selected_clusters = NULL, + hvf_param = list( + reverse_log_scale = TRUE, difference_in_cov = 1, + expression_values = "normalized" + ), + hvg_param = deprecated(), + hvf_min_perc_cells = 5, + hvg_min_perc_cells = deprecated(), + hvf_mean_expr_det = 1, + hvg_mean_expr_det = deprecated(), + use_all_feats_as_hvf = FALSE, + use_all_genes_as_hvg = deprecated(), + min_nr_of_hvf = 5, + min_nr_of_hvg = deprecated(), + pca_param = list(expression_values = "normalized", scale_unit = TRUE), + nn_param = list(dimensions_to_use = 1:20), + k_neighbors = 10, + resolution = 0.5, + n_iterations = 500, + python_path = NULL, + nn_network_to_use = "sNN", + network_name = "sNN.pca", + return_gobject = TRUE, + toplevel = 2, + verbose = TRUE) { # specify feat_type if (is.null(feat_type)) { feat_type <- gobject@expression_feat[[1]] } - ## deprecated arguments - if (!is.null(hvg_param)) { - hvf_param <- hvg_param - warning("hvg_param is deprecated, use hvf_param in the future") - } - if (!is.null(hvg_min_perc_cells)) { - hvf_min_perc_cells <- hvg_min_perc_cells - warning("hvg_min_perc_cells is deprecated, use hvf_min_perc_cells in - the future") - } - if (!is.null(hvg_mean_expr_det)) { - hvf_mean_expr_det <- hvg_mean_expr_det - warning("hvg_mean_expr_det is deprecated, use hvf_mean_expr_det in the - future") - } - if (!is.null(use_all_genes_as_hvg)) { - use_all_feats_as_hvf <- use_all_genes_as_hvg - warning("use_all_genes_as_hvg is deprecated, use use_all_feats_as_hvf - in the future") - } - if (!is.null(min_nr_of_hvg)) { - min_nr_of_hvf <- min_nr_of_hvg - warning("min_nr_of_hvg is deprecated, use min_nr_of_hvf in the future") + # deprecated arguments + .dep_param <- function(x, y) { + GiottoUtils::deprecate_param( + x, y, + fun = "doLeidenSubCluster", when = "4.0.9" + ) } + hvf_param <- .dep_param(hvg_param, hvf_param) + hvf_min_perc_cells <- .dep_param(hvg_min_perc_cells, hvf_min_perc_cells) + hvf_mean_expr_det <- .dep_param(hvg_mean_expr_det, hvf_mean_expr_det) + use_all_feats_as_hvf <- .dep_param(use_all_genes_as_hvg, use_all_feats_as_hvf) + min_nr_of_hvf <- .dep_param(min_nr_of_hvg, min_nr_of_hvf) iter_list <- list() @@ -1943,11 +2136,12 @@ doLeidenSubCluster <- function(gobject, for (cluster in unique_clusters) { - if (verbose == TRUE) cat("start with cluster: ", cluster) + vmsg(.v = verbose, "start with cluster: ", cluster, "\n") ## get subset subset_cell_IDs <- cell_metadata[ - get(cluster_column) == cluster][["cell_ID"]] + get(cluster_column) == cluster + ][["cell_ID"]] temp_giotto <- subsetGiotto( gobject = gobject, feat_type = feat_type, @@ -1959,7 +2153,8 @@ doLeidenSubCluster <- function(gobject, temp_cluster <- data.table( "cell_ID" = subset_cell_IDs, "tempclus" = 1, - "parent_cluster" = cluster) + "parent_cluster" = cluster + ) iter_list[[cluster]] <- temp_cluster } else { # continue for selected clusters or all clusters if there is no @@ -1968,45 +2163,58 @@ doLeidenSubCluster <- function(gobject, ## calculate stats temp_giotto <- addStatistics( gobject = temp_giotto, - feat_type = feat_type + feat_type = feat_type, + verbose = FALSE ) ## calculate variable feats + hvf_param$verbose <- FALSE temp_giotto <- do.call( - "calculateHVF", c(gobject = temp_giotto, hvf_param)) + "calculateHVF", c(gobject = temp_giotto, hvf_param) + ) ## get hvg feat_metadata <- fDataDT(temp_giotto, feat_type = feat_type ) - featfeats <- feat_metadata[ + usefeats <- feat_metadata[ hvf == "yes" & perc_cells >= hvf_min_perc_cells & - mean_expr_det >= hvf_mean_expr_det]$feat_ID + mean_expr_det >= hvf_mean_expr_det + ]$feat_ID ## catch too low number of hvg if (use_all_feats_as_hvf == TRUE) { - featfeats == feat_metadata$feat_ID + usefeats == feat_metadata$feat_ID } else { - if (verbose == TRUE) - cat(length(featfeats), - "highly variable feats have been selected") - if (length(featfeats) <= min_nr_of_hvf) { + if (verbose == TRUE) { + cat( + length(usefeats), + "highly variable feats have been selected\n" + ) + } + if (length(usefeats) <= min_nr_of_hvf) { message("too few feats, will continue with all feats instead") - featfeats <- feat_metadata$feat_ID + usefeats <- feat_metadata$feat_ID } } ## run PCA + pca_param$verbose <- FALSE temp_giotto <- do.call( "runPCA", - c(gobject = temp_giotto, feats_to_use = list(featfeats), - pca_param)) + c( + gobject = temp_giotto, feats_to_use = list(usefeats), + pca_param + ) + ) ## nearest neighbor and clustering + nn_param$verbose <- FALSE temp_giotto <- do.call( "createNearestNetwork", - c(gobject = temp_giotto, k = k_neighbors, nn_param)) + c(gobject = temp_giotto, k = k_neighbors, nn_param) + ) ## Leiden Cluster ## TO DO: expand to all clustering options @@ -2049,7 +2257,10 @@ doLeidenSubCluster <- function(gobject, ) ## update parameters used ## - gobject <- update_giotto_params(gobject, description = "_sub_cluster") + gobject <- update_giotto_params( + gobject, + description = "_sub_cluster", toplevel = toplevel + ) return(gobject) } else { return(together) @@ -2057,63 +2268,31 @@ doLeidenSubCluster <- function(gobject, } -#' @title doLouvainSubCluster community -#' @name .doLouvainSubCluster_community -#' @description subcluster cells using a NN-network and the Louvain community -#' detection algorithm -#' @param gobject giotto object -#' @param name name for new clustering result -#' @param cluster_column cluster column to subcluster -#' @param selected_clusters only do subclustering on these clusters -#' @param hvg_param parameters for calculateHVG -#' @param hvg_min_perc_cells threshold for detection in min percentage of cells -#' @param hvg_mean_expr_det threshold for mean expression level in cells with -#' detection -#' @param use_all_genes_as_hvg forces all genes to be HVG and to be used as -#' input for PCA -#' @param min_nr_of_hvg minimum number of HVG, or all genes will be used as -#' input for PCA -#' @param pca_param parameters for runPCA -#' @param nn_param parameters for parameters for createNearestNetwork -#' @param k_neighbors number of k for createNearestNetwork -#' @param resolution resolution -#' @param python_path specify specific path to python if required -#' @param nn_network_to_use type of NN network to use (kNN vs sNN) -#' @param network_name name of NN network to use -#' @param return_gobject Boolean: return giotto object (default = TRUE) -#' @param verbose verbose -#' @returns giotto object with new subclusters appended to cell metadata -#' @details This function performs subclustering using the Louvain community -#' algorithm on selected clusters. -#' The systematic steps are: -#' \itemize{ -#' \item{1. subset Giotto object} -#' \item{2. identify highly variable genes} -#' \item{3. run PCA} -#' \item{4. create nearest neighbouring network} -#' \item{5. do Louvain community clustering} -#' } -#' @seealso \code{\link{.doLouvainCluster_community}} -#' @keywords internal -.doLouvainSubCluster_community <- function(gobject, - name = "sub_louvain_comm_clus", - cluster_column = NULL, - selected_clusters = NULL, - hvg_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, - expression_values = "normalized"), - hvg_min_perc_cells = 5, - hvg_mean_expr_det = 1, - use_all_genes_as_hvg = FALSE, - min_nr_of_hvg = 5, - pca_param = list(expression_values = "normalized", scale_unit = TRUE), - nn_param = list(dimensions_to_use = 1:20), - k_neighbors = 10, - resolution = 0.5, - python_path = NULL, - nn_network_to_use = "sNN", - network_name = "sNN.pca", - return_gobject = TRUE, - verbose = TRUE) { +# subcluster cells using a NN-network and the Louvain community +# detection algorithm +.doLouvainSubCluster_community <- function( + gobject, + name = "sub_louvain_comm_clus", + cluster_column = NULL, + selected_clusters = NULL, + hvf_param = list( + reverse_log_scale = TRUE, + difference_in_cov = 1, + expression_values = "normalized" + ), + hvf_min_perc_cells = 5, + hvf_mean_expr_det = 1, + use_all_feats_as_hvf = FALSE, + min_nr_of_hvf = 5, + pca_param = list(expression_values = "normalized", scale_unit = TRUE), + nn_param = list(dimensions_to_use = 1:20), + k_neighbors = 10, + resolution = 0.5, + python_path = NULL, + nn_network_to_use = "sNN", + network_name = "sNN.pca", + return_gobject = TRUE, + verbose = TRUE) { iter_list <- list() cell_metadata <- pDataDT(gobject) @@ -2127,68 +2306,86 @@ doLeidenSubCluster <- function(gobject, index_offset <- ifelse(0 %in% unique_clusters, 1, 0) for (cluster in unique_clusters) { - if (verbose == TRUE) cat("start with cluster: ", cluster) + if (verbose == TRUE) cat("start with cluster: ", cluster, "\n") ## get subset subset_cell_IDs <- cell_metadata[ - get(cluster_column) == cluster][["cell_ID"]] - temp_giotto <- subsetGiotto(gobject = gobject, - cell_ids = subset_cell_IDs) + get(cluster_column) == cluster + ][["cell_ID"]] + temp_giotto <- subsetGiotto( + gobject = gobject, + cell_ids = subset_cell_IDs + ) ## if cluster is not selected if (!is.null(selected_clusters) & !cluster %in% selected_clusters) { temp_cluster <- data.table( "cell_ID" = subset_cell_IDs, "tempclus" = 1, - "parent_cluster" = cluster) + "parent_cluster" = cluster + ) iter_list[[cluster + index_offset]] <- temp_cluster } else { # continue for selected clusters or all clusters if there is no # selection ## calculate stats - temp_giotto <- addStatistics(gobject = temp_giotto) + temp_giotto <- addStatistics( + gobject = temp_giotto, verbose = FALSE + ) ## calculate variable genes + hvf_param$verbose <- FALSE temp_giotto <- do.call( - "calculateHVG", c(gobject = temp_giotto, hvg_param)) + "calculateHVF", c(gobject = temp_giotto, hvf_param) + ) - ## get hvg - gene_metadata <- fDataDT(temp_giotto) + ## get hvf + feat_metadata <- fDataDT(temp_giotto) - # data.table variables - hvg <- perc_cells <- mean_expr_det <- NULL + # NSE variables + hvf <- perc_cells <- mean_expr_det <- NULL - featgenes <- gene_metadata[ - hvg == "yes" & perc_cells >= hvg_min_perc_cells & - mean_expr_det >= hvg_mean_expr_det]$gene_ID + usefeats <- feat_metadata[ + hvf == "yes" & + perc_cells >= hvf_min_perc_cells & + mean_expr_det >= hvf_mean_expr_det + ]$feat_ID - ## catch too low number of hvg - if (use_all_genes_as_hvg == TRUE) { - featgenes == gene_metadata$gene_ID + ## catch too low number of hvf + if (isTRUE(use_all_feats_as_hvf)) { + usefeats == feat_metadata$feat_ID } else { - if (verbose == TRUE) - cat(length(featgenes), - "highly variable genes have been selected") - if (length(featgenes) <= min_nr_of_hvg) { - message("too few genes, will continue with all genes - instead") - featgenes <- gene_metadata$gene_ID + if (isTRUE(verbose)) { + cat( + length(usefeats), + "highly variable features have been selected\n" + ) + } + if (length(usefeats) <= min_nr_of_hvf) { + wrap_msg("too few features + will continue with all features instead") + usefeats <- feat_metadata$feat_ID } } ## run PCA + pca_param$verbose <- FALSE temp_giotto <- do.call( "runPCA", - c(gobject = temp_giotto, genes_to_use = list(featgenes), - pca_param)) + c( + gobject = temp_giotto, feats_to_use = list(usefeats), + pca_param + ) + ) ## nearest neighbor and clustering + nn_param$verbose <- FALSE temp_giotto <- do.call( "createNearestNetwork", - c(gobject = temp_giotto, k = k_neighbors, nn_param)) + c(gobject = temp_giotto, k = k_neighbors, nn_param) + ) - ## Leiden Cluster ## TO DO: expand to all clustering options temp_cluster <- .doLouvainCluster_community( gobject = temp_giotto, @@ -2256,63 +2453,30 @@ doLeidenSubCluster <- function(gobject, -#' @title doLouvainSubCluster multinet -#' @name .doLouvainSubCluster_multinet -#' @description subcluster cells using a NN-network and the Louvain multinet -#' detection algorithm -#' @param gobject giotto object -#' @param name name for new clustering result -#' @param cluster_column cluster column to subcluster -#' @param selected_clusters only do subclustering on these clusters -#' @param hvg_param parameters for calculateHVG -#' @param hvg_min_perc_cells threshold for detection in min percentage of cells -#' @param hvg_mean_expr_det threshold for mean expression level in cells with -#' detection -#' @param use_all_genes_as_hvg forces all genes to be HVG and to be used as -#' input for PCA -#' @param min_nr_of_hvg minimum number of HVG, or all genes will be used as -#' input for PCA -#' @param pca_param parameters for runPCA -#' @param nn_param parameters for parameters for createNearestNetwork -#' @param k_neighbors number of k for createNearestNetwork -#' @param gamma gamma -#' @param omega omega -#' @param nn_network_to_use type of NN network to use (kNN vs sNN) -#' @param network_name name of NN network to use -#' @param return_gobject boolean: return giotto object (default = TRUE) -#' @param verbose verbose -#' @returns giotto object with new subclusters appended to cell metadata -#' @details This function performs subclustering using the Louvain multinet -#' algorithm on selected clusters. -#' The systematic steps are: -#' \itemize{ -#' \item{1. subset Giotto object} -#' \item{2. identify highly variable genes} -#' \item{3. run PCA} -#' \item{4. create nearest neighbouring network} -#' \item{5. do Louvain multinet clustering} -#' } -#' @seealso \code{\link{.doLouvainCluster_multinet}} -#' @keywords internal -.doLouvainSubCluster_multinet <- function(gobject, - name = "sub_louvain_mult_clus", - cluster_column = NULL, - selected_clusters = NULL, - hvg_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, - expression_values = "normalized"), - hvg_min_perc_cells = 5, - hvg_mean_expr_det = 1, - use_all_genes_as_hvg = FALSE, - min_nr_of_hvg = 5, - pca_param = list(expression_values = "normalized", scale_unit = TRUE), - nn_param = list(dimensions_to_use = 1:20), - k_neighbors = 10, - gamma = 1, - omega = 1, - nn_network_to_use = "sNN", - network_name = "sNN.pca", - return_gobject = TRUE, - verbose = TRUE) { +# subcluster cells using a NN-network and the Louvain multinet +# detection algorithm +.doLouvainSubCluster_multinet <- function( + gobject, + name = "sub_louvain_mult_clus", + cluster_column = NULL, + selected_clusters = NULL, + hvf_param = list( + reverse_log_scale = TRUE, difference_in_cov = 1, + expression_values = "normalized" + ), + hvf_min_perc_cells = 5, + hvf_mean_expr_det = 1, + use_all_feats_as_hvf = FALSE, + min_nr_of_hvf = 5, + pca_param = list(expression_values = "normalized", scale_unit = TRUE), + nn_param = list(dimensions_to_use = 1:20), + k_neighbors = 10, + gamma = 1, + omega = 1, + nn_network_to_use = "sNN", + network_name = "sNN.pca", + return_gobject = TRUE, + verbose = TRUE) { if ("multinet" %in% rownames(installed.packages()) == FALSE) { stop( "package 'multinet' is not yet installed \n", @@ -2335,69 +2499,86 @@ doLeidenSubCluster <- function(gobject, # data.table variables - hvg <- perc_cells <- mean_expr_det <- parent_cluster <- cell_ID <- + hvf <- perc_cells <- mean_expr_det <- parent_cluster <- cell_ID <- comb <- tempclus <- NULL for (cluster in unique_clusters) { - if (verbose == TRUE) cat("start with cluster: ", cluster) + if (verbose == TRUE) cat("start with cluster: ", cluster, "\n") ## get subset subset_cell_IDs <- cell_metadata[ - get(cluster_column) == cluster][["cell_ID"]] - temp_giotto <- subsetGiotto(gobject = gobject, - cell_ids = subset_cell_IDs) + get(cluster_column) == cluster + ][["cell_ID"]] + temp_giotto <- subsetGiotto( + gobject = gobject, + cell_ids = subset_cell_IDs + ) ## if cluster is not selected if (!is.null(selected_clusters) & !cluster %in% selected_clusters) { temp_cluster <- data.table( "cell_ID" = subset_cell_IDs, "tempclus" = 1, - "parent_cluster" = cluster) + "parent_cluster" = cluster + ) iter_list[[cluster + index_offset]] <- temp_cluster } else { # continue for selected clusters or all clusters if there is no # selection ## calculate stats - temp_giotto <- addStatistics(gobject = temp_giotto) + temp_giotto <- addStatistics( + gobject = temp_giotto, verbose = FALSE + ) ## calculate variable genes + hvf_param$verbose <- FALSE temp_giotto <- do.call( - "calculateHVG", c(gobject = temp_giotto, hvg_param)) + "calculateHVF", c(gobject = temp_giotto, hvf_param) + ) - ## get hvg - gene_metadata <- fDataDT(temp_giotto) - featgenes <- gene_metadata[ - hvg == "yes" & perc_cells >= hvg_min_perc_cells & - mean_expr_det >= hvg_mean_expr_det]$gene_ID + ## get hvf + feat_metadata <- fDataDT(temp_giotto) + usefeats <- feat_metadata[ + hvf == "yes" & perc_cells >= hvf_min_perc_cells & + mean_expr_det >= hvf_mean_expr_det + ]$feat_ID - ## catch too low number of hvg - if (use_all_genes_as_hvg == TRUE) { - featgenes == gene_metadata$gene_ID + ## catch too low number of hvf + if (use_all_feats_as_hvf == TRUE) { + usefeats == feat_metadata$feat_ID } else { - if (verbose == TRUE) - cat(length(featgenes), - "highly variable genes have been selecteds") - if (length(featgenes) <= min_nr_of_hvg) { - message("too few genes, will continue with all genes + if (verbose == TRUE) { + cat( + length(usefeats), + "highly variable features have been selected\n" + ) + } + if (length(usefeats) <= min_nr_of_hvf) { + message("too few features, will continue with all features instead") - featgenes <- gene_metadata$gene_ID + usefeats <- feat_metadata$feat_ID } } ## run PCA + pca_param$verbose <- FALSE temp_giotto <- do.call( "runPCA", - c(gobject = temp_giotto, genes_to_use = list(featgenes), - pca_param)) + c( + gobject = temp_giotto, feats_to_use = list(usefeats), + pca_param + ) + ) ## nearest neighbor and clustering + nn_param$verbose <- FALSE temp_giotto <- do.call( "createNearestNetwork", - c(gobject = temp_giotto, k = k_neighbors, nn_param)) + c(gobject = temp_giotto, k = k_neighbors, nn_param) + ) - ## Leiden Cluster ## TO DO: expand to all clustering options temp_cluster <- .doLouvainCluster_multinet( gobject = temp_giotto, @@ -2460,269 +2641,99 @@ doLeidenSubCluster <- function(gobject, -#' @title doLouvainSubCluster -#' @name doLouvainSubCluster -#' @description subcluster cells using a NN-network and the Louvain algorithm -#' @param gobject giotto object -#' @param name name for new clustering result -#' @param version version of Louvain algorithm to use -#' @param cluster_column cluster column to subcluster -#' @param selected_clusters only do subclustering on these clusters -#' @param hvg_param parameters for calculateHVG -#' @param hvg_min_perc_cells threshold for detection in min percentage of cells -#' @param hvg_mean_expr_det threshold for mean expression level in cells with -#' detection -#' @param use_all_genes_as_hvg forces all genes to be HVG and to be used as -#' input for PCA -#' @param min_nr_of_hvg minimum number of HVG, or all genes will be used as -#' input for PCA -#' @param pca_param parameters for runPCA -#' @param nn_param parameters for parameters for createNearestNetwork -#' @param k_neighbors number of k for createNearestNetwork -#' @param resolution resolution for community algorithm -#' @param gamma gamma -#' @param omega omega -#' @param python_path specify specific path to python if required -#' @param nn_network_to_use type of NN network to use (kNN vs sNN) -#' @param network_name name of NN network to use -#' @param return_gobject boolean: return giotto object (default = TRUE) -#' @param verbose verbose -#' @returns giotto object with new subclusters appended to cell metadata -#' @details This function performs subclustering using the Louvain algorithm on -#' selected clusters. -#' The systematic steps are: -#' \itemize{ -#' \item{1. subset Giotto object} -#' \item{2. identify highly variable genes} -#' \item{3. run PCA} -#' \item{4. create nearest neighbouring network} -#' \item{5. do Louvain clustering} -#' } -#' @seealso \code{\link{.doLouvainCluster_multinet}} and -#' \code{\link{.doLouvainCluster_community}} -#' @examples -#' g <- GiottoData::loadGiottoMini("visium") -#' -#' doLouvainSubCluster(g, cluster_column = "leiden_clus") + +#' @describeIn subClusterCells subcluster cells using a NN-network and the +#' Louvain algorithm +#' @param version version of Louvain algorithm to use. One of "community" or +#' "multinet", with the default being "community" #' @export -doLouvainSubCluster <- function(gobject, - name = "sub_louvain_clus", - version = c("community", "multinet"), - cluster_column = NULL, - selected_clusters = NULL, - hvg_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, - expression_values = "normalized"), - hvg_min_perc_cells = 5, - hvg_mean_expr_det = 1, - use_all_genes_as_hvg = FALSE, - min_nr_of_hvg = 5, - pca_param = list(expression_values = "normalized", scale_unit = TRUE), - nn_param = list(dimensions_to_use = 1:20), - k_neighbors = 10, - resolution = 0.5, - gamma = 1, - omega = 1, - python_path = NULL, - nn_network_to_use = "sNN", - network_name = "sNN.pca", - return_gobject = TRUE, - verbose = TRUE) { +doLouvainSubCluster <- function( + gobject, + name = "sub_louvain_clus", + version = c("community", "multinet"), + cluster_column = NULL, + selected_clusters = NULL, + hvg_param = deprecated(), + hvf_param = list( + reverse_log_scale = TRUE, difference_in_cov = 1, + expression_values = "normalized" + ), + hvg_min_perc_cells = deprecated(), + hvf_min_perc_cells = 5, + hvg_mean_expr_det = deprecated(), + hvf_mean_expr_det = 1, + use_all_genes_as_hvg = deprecated(), + use_all_feats_as_hvf = FALSE, + min_nr_of_hvg = deprecated(), + min_nr_of_hvf = 5, + pca_param = list(expression_values = "normalized", scale_unit = TRUE), + nn_param = list(dimensions_to_use = 1:20), + k_neighbors = 10, + resolution = 0.5, + gamma = 1, + omega = 1, + python_path = NULL, + nn_network_to_use = "sNN", + network_name = "sNN.pca", + return_gobject = TRUE, + verbose = TRUE) { ## louvain clustering version to use version <- match.arg(version, c("community", "multinet")) - - # python community implementation - if (version == "community") { - result <- .doLouvainSubCluster_community( - gobject = gobject, - cluster_column = cluster_column, - selected_clusters = selected_clusters, - hvg_param = hvg_param, - hvg_mean_expr_det = hvg_mean_expr_det, - pca_param = pca_param, - nn_param = nn_param, - k_neighbors = k_neighbors, - resolution = resolution, - python_path = python_path, - nn_network_to_use = nn_network_to_use, - network_name = network_name, - name = name, - return_gobject = return_gobject, - verbose = verbose - ) - } else if (version == "multinet") { - result <- .doLouvainSubCluster_multinet( - gobject = gobject, - cluster_column = cluster_column, - selected_clusters = selected_clusters, - hvg_param = hvg_param, - hvg_mean_expr_det = hvg_mean_expr_det, - pca_param = pca_param, - nn_param = nn_param, - k_neighbors = k_neighbors, - gamma = gamma, - omega = omega, - nn_network_to_use = nn_network_to_use, - network_name = network_name, - name = name, - return_gobject = return_gobject, - verbose = verbose + # deprecations + .dep_param <- function(x, y) { + GiottoUtils::deprecate_param( + x, y, + fun = "doLouvainSubCluster", when = "4.0.9" ) } - return(result) -} - - - - - -#' @title subClusterCells -#' @name subClusterCells -#' @description subcluster cells -#' @param gobject giotto object -#' @param name name for new clustering result -#' @param cluster_method clustering method to use -#' @param cluster_column cluster column to subcluster -#' @param selected_clusters only do subclustering on these clusters -#' @param hvg_param parameters for calculateHVG -#' @param hvg_min_perc_cells threshold for detection in min percentage of cells -#' @param hvg_mean_expr_det threshold for mean expression level in cells with -#' detection -#' @param use_all_genes_as_hvg forces all genes to be HVG and to be used as -#' input for PCA -#' @param min_nr_of_hvg minimum number of HVG, or all genes will be used as -#' input for PCA -#' @param pca_param parameters for runPCA -#' @param nn_param parameters for parameters for createNearestNetwork -#' @param k_neighbors number of k for createNearestNetwork -#' @param resolution resolution -#' @param n_iterations number of interations to run the Leiden algorithm. -#' @param gamma gamma -#' @param omega omega -#' @param python_path specify specific path to python if required -#' @param nn_network_to_use type of NN network to use (kNN vs sNN) -#' @param network_name name of NN network to use -#' @param return_gobject boolean: return giotto object (default = TRUE) -#' @param verbose verbose -#' @returns giotto object with new subclusters appended to cell metadata -#' @details This function performs subclustering on selected clusters. -#' The systematic steps are: -#' \itemize{ -#' \item{1. subset Giotto object} -#' \item{2. identify highly variable genes} -#' \item{3. run PCA} -#' \item{4. create nearest neighbouring network} -#' \item{5. do clustering} -#' } -#' @seealso \code{\link{.doLouvainCluster_multinet}}, -#' \code{\link{.doLouvainCluster_community}} -#' and @seealso \code{\link{doLeidenCluster}} -#' @examples -#' g <- GiottoData::loadGiottoMini("visium") -#' -#' subClusterCells(g, cluster_column = "leiden_clus") -#' @export -subClusterCells <- function(gobject, - name = "sub_clus", - cluster_method = c( - "leiden", - "louvain_community", - "louvain_multinet" - ), - cluster_column = NULL, - selected_clusters = NULL, - hvg_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, - expression_values = "normalized"), - hvg_min_perc_cells = 5, - hvg_mean_expr_det = 1, - use_all_genes_as_hvg = FALSE, - min_nr_of_hvg = 5, - pca_param = list(expression_values = "normalized", scale_unit = TRUE), - nn_param = list(dimensions_to_use = 1:20), - k_neighbors = 10, - resolution = 1, - n_iterations = 1000, - gamma = 1, - omega = 1, - python_path = NULL, - nn_network_to_use = "sNN", - network_name = "sNN.pca", - return_gobject = TRUE, - verbose = TRUE) { - ## select cluster method - cluster_method <- match.arg(arg = cluster_method, choices = c( - "leiden", - "louvain_community", - "louvain_multinet" + hvf_param <- .dep_param(hvg_param, hvf_param) + hvf_min_perc_cells <- .dep_param(hvg_min_perc_cells, hvf_min_perc_cells) + hvf_mean_expr_det <- .dep_param(hvg_mean_expr_det, hvf_mean_expr_det) + use_all_feats_as_hvf <- .dep_param(use_all_genes_as_hvg, use_all_feats_as_hvf) + min_nr_of_hvf <- .dep_param(min_nr_of_hvg, min_nr_of_hvf) + + # get common args + common_args <- get_args_list(keep = c( + "gobject", + "cluster_column", + "selected_clusters", + "hvf_param", + "hvf_min_perc_cells", + "hvf_mean_expr_det", + "use_all_feats_as_hvf", + "min_nr_of_hvf", + "pca_param", + "nn_param", + "k_neighbors", + "nn_network_to_use", + "network_name", + "name", + "return_gobject", + "verbose" )) - - if (cluster_method == "leiden") { - result <- doLeidenSubCluster( - gobject = gobject, - cluster_column = cluster_column, - selected_clusters = selected_clusters, - hvg_param = hvg_param, - hvg_min_perc_cells = hvg_min_perc_cells, - hvg_mean_expr_det = hvg_mean_expr_det, - use_all_genes_as_hvg = use_all_genes_as_hvg, - min_nr_of_hvg = min_nr_of_hvg, - pca_param = pca_param, - nn_param = nn_param, - k_neighbors = k_neighbors, - resolution = resolution, - n_iterations = n_iterations, - python_path = python_path, - nn_network_to_use = nn_network_to_use, - network_name = network_name, - name = name, - return_gobject = return_gobject, - verbose = verbose - ) - } else if (cluster_method == "louvain_community") { - result <- .doLouvainSubCluster_community( - gobject = gobject, - cluster_column = cluster_column, - selected_clusters = selected_clusters, - hvg_param = hvg_param, - hvg_min_perc_cells = hvg_min_perc_cells, - hvg_mean_expr_det = hvg_mean_expr_det, - use_all_genes_as_hvg = use_all_genes_as_hvg, - min_nr_of_hvg = min_nr_of_hvg, - pca_param = pca_param, - nn_param = nn_param, - k_neighbors = k_neighbors, - resolution = resolution, - python_path = python_path, - nn_network_to_use = nn_network_to_use, - network_name = network_name, - name = name, - return_gobject = return_gobject, - verbose = verbose - ) - } else if (cluster_method == "louvain_multinet") { - result <- .doLouvainSubCluster_multinet( - gobject = gobject, - cluster_column = cluster_column, - selected_clusters = selected_clusters, - hvg_param = hvg_param, - hvg_min_perc_cells = hvg_min_perc_cells, - hvg_mean_expr_det = hvg_mean_expr_det, - use_all_genes_as_hvg = use_all_genes_as_hvg, - min_nr_of_hvg = min_nr_of_hvg, - pca_param = pca_param, - nn_param = nn_param, - k_neighbors = k_neighbors, - gamma = gamma, - omega = omega, - nn_network_to_use = nn_network_to_use, - network_name = network_name, - name = name, - return_gobject = return_gobject, - verbose = verbose - ) - } + result <- switch(version, + "community" = { + do.call(.doLouvainSubCluster_community, args = c( + common_args, + list( + resolution = resolution, + python_path = python_path + ) + )) + }, + "multinet" = { + do.call(.doLouvainSubCluster_multinet, args = c( + common_args, + list( + gamma = gamma, + omega = omega + ) + )) + } + ) return(result) } @@ -2734,6 +2745,9 @@ subClusterCells <- function(gobject, +# cluster manipulation #### + + #' @title getClusterSimilarity #' @name getClusterSimilarity #' @description Creates data.table with pairwise correlation scores between @@ -2754,12 +2768,13 @@ subClusterCells <- function(gobject, #' #' getClusterSimilarity(g, cluster_column = "leiden_clus") #' @export -getClusterSimilarity <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - cor = c("pearson", "spearman")) { +getClusterSimilarity <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + cor = c("pearson", "spearman")) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2777,7 +2792,8 @@ getClusterSimilarity <- function(gobject, cor <- match.arg(cor, c("pearson", "spearman")) values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) metadata <- pDataDT(gobject, feat_type = feat_type, @@ -2802,25 +2818,38 @@ getClusterSimilarity <- function(gobject, metadata_cols = cluster_column ) dcast_metatable <- data.table::dcast.data.table( - metatable, formula = variable ~ uniq_ID, value.var = "value") + metatable, + formula = variable ~ uniq_ID, value.var = "value" + ) testmatrix <- dt_to_matrix(x = dcast_metatable) # correlation matrix cormatrix <- cor_flex(x = testmatrix, method = cor) cor_table <- data.table::as.data.table(reshape2::melt(cormatrix)) data.table::setnames( - cor_table, old = c("Var1", "Var2"), c("group1", "group2")) + cor_table, + old = c("Var1", "Var2"), c("group1", "group2") + ) cor_table[, c("group1", "group2") := list( - as.character(group1), as.character(group2))] + as.character(group1), as.character(group2) + )] cor_table[, unified_group := paste( - sort(c(group1, group2)), collapse = "--"), by = 1:nrow(cor_table)] + sort(c(group1, group2)), + collapse = "--" + ), + by = 1:nrow(cor_table) + ] cor_table <- cor_table[!duplicated(cor_table[, .(value, unified_group)])] cor_table <- merge( - cor_table, by.x = "group1", clustersize, by.y = "clusters") + cor_table, + by.x = "group1", clustersize, by.y = "clusters" + ) setnames(cor_table, "size", "group1_size") cor_table <- merge( - cor_table, by.x = "group2", clustersize, by.y = "clusters") + cor_table, + by.x = "group2", clustersize, by.y = "clusters" + ) setnames(cor_table, "size", "group2_size") return(cor_table) @@ -2864,19 +2893,20 @@ getClusterSimilarity <- function(gobject, #' #' mergeClusters(g, cluster_column = "leiden_clus") #' @export -mergeClusters <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - cor = c("pearson", "spearman"), - new_cluster_name = "merged_cluster", - min_cor_score = 0.8, - max_group_size = 20, - force_min_group_size = 10, - max_sim_clusters = 10, - return_gobject = TRUE, - verbose = TRUE) { +mergeClusters <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + cor = c("pearson", "spearman"), + new_cluster_name = "merged_cluster", + min_cor_score = 0.8, + max_group_size = 20, + force_min_group_size = 10, + max_sim_clusters = 10, + return_gobject = TRUE, + verbose = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2891,7 +2921,8 @@ mergeClusters <- function(gobject, # expression values to be used values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) # correlation score to be used cor <- match.arg(cor, c("pearson", "spearman")) @@ -2914,12 +2945,16 @@ mergeClusters <- function(gobject, min_reached <- cumsum_reached <- NULL filter_set_first <- similarityDT[group1 != group2][ - group1_size < max_group_size][value >= min_cor_score] + group1_size < max_group_size + ][value >= min_cor_score] # 2. small clusters minimum_set <- similarityDT[group1 != group2][ - group1_size < force_min_group_size][order(-value)][ - , head(.SD, max_sim_clusters), by = group1] + group1_size < force_min_group_size + ][order(-value)][ + , head(.SD, max_sim_clusters), + by = group1 + ] # 2.1 take all clusters necessary to reach force_min_group_size minimum_set[, cumsum_val := cumsum(group2_size) + group1_size, by = group1] @@ -2933,7 +2968,7 @@ mergeClusters <- function(gobject, ## get list of correlated groups finallist <- list() start_i <- 1 - for (row in 1:nrow(filter_set)) { + for (row in seq_len(nrow(filter_set))) { first_clus <- filter_set[row][["group1"]] second_clus <- filter_set[row][["group2"]] @@ -2949,7 +2984,8 @@ mergeClusters <- function(gobject, } else { who <- which(res == TRUE)[[1]] finallist[[who]] <- unique( - c(finallist[[who]], first_clus, second_clus)) + c(finallist[[who]], first_clus, second_clus) + ) } } @@ -2996,7 +3032,9 @@ mergeClusters <- function(gobject, spat_unit = spat_unit, feat_type = feat_type, new_metadata = metadata[ - , c("cell_ID", new_cluster_name), with = FALSE], + , c("cell_ID", new_cluster_name), + with = FALSE + ], by_column = TRUE, column_cell_ID = "cell_ID" ) @@ -3034,10 +3072,12 @@ mergeClusters <- function(gobject, dend_1 <- dendextend::find_dendrogram( dend = dend, - selected_labels = names(numerical_leaves[selected_labels_ind_1])) + selected_labels = names(numerical_leaves[selected_labels_ind_1]) + ) dend_2 <- dendextend::find_dendrogram( dend = dend, - selected_labels = names(numerical_leaves[selected_labels_ind_2])) + selected_labels = names(numerical_leaves[selected_labels_ind_2]) + ) return(list(theight = top_height, dend1 = dend_1, dend2 = dend_2)) } @@ -3074,7 +3114,9 @@ mergeClusters <- function(gobject, # check which heights are available available_h <- as.numeric(unlist(lapply( - dend_list, FUN = function(x) attributes(x)$height))) + dend_list, + FUN = function(x) attributes(x)$height + ))) # get dendrogram associated with height and split in two select_dend_ind <- which.min(abs(available_h - n_height)) @@ -3084,13 +3126,19 @@ mergeClusters <- function(gobject, # find leave labels toph <- tempres[[1]] first_group <- dendextend::get_leaves_attr( - tempres[[2]], attribute = "label") + tempres[[2]], + attribute = "label" + ) second_group <- dendextend::get_leaves_attr( - tempres[[3]], attribute = "label") + tempres[[3]], + attribute = "label" + ) - result_list[[j]] <- list("height" = toph, - "first" = first_group, - "sec" = second_group) + result_list[[j]] <- list( + "height" = toph, + "first" = first_group, + "sec" = second_group + ) j <- j + 1 @@ -3136,17 +3184,18 @@ mergeClusters <- function(gobject, #' #' getDendrogramSplits(g, cluster_column = "leiden_clus") #' @export -getDendrogramSplits <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - cor = c("pearson", "spearman"), - distance = "ward.D", - h = NULL, - h_color = "red", - show_dend = TRUE, - verbose = TRUE) { +getDendrogramSplits <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + cor = c("pearson", "spearman"), + distance = "ward.D", + h = NULL, + h_color = "red", + show_dend = TRUE, + verbose = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -3167,7 +3216,8 @@ getDendrogramSplits <- function(gobject, cor <- match.arg(cor, c("pearson", "spearman")) values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) # create average expression matrix per cluster metatable <- calculateMetaTable( @@ -3178,7 +3228,9 @@ getDendrogramSplits <- function(gobject, metadata_cols = cluster_column ) dcast_metatable <- data.table::dcast.data.table( - metatable, formula = variable ~ uniq_ID, value.var = "value") + metatable, + formula = variable ~ uniq_ID, value.var = "value" + ) testmatrix <- dt_to_matrix(x = dcast_metatable) # correlation @@ -3203,9 +3255,10 @@ getDendrogramSplits <- function(gobject, splitList <- .node_clusters(hclus_obj = corclus, verbose = verbose) splitDT <- data.table::as.data.table(t_flex( - data.table::as.data.table(splitList[[2]]))) + data.table::as.data.table(splitList[[2]]) + )) colnames(splitDT) <- c("node_h", "tree_1", "tree_2") - splitDT[, nodeID := paste0("node_", 1:.N)] + splitDT[, nodeID := paste0("node_", seq_len(.N))] return(splitDT) } @@ -3256,27 +3309,30 @@ getDendrogramSplits <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' x <- pDataDT(g) -#' g_small <- subsetGiotto(g, cell_ids = sample(x$cell_ID,300)) -#' doClusterProjection(target_gobject = g, source_gobject = g_small, -#' source_cluster_labels = "leiden_clus") +#' g_small <- subsetGiotto(g, cell_ids = sample(x$cell_ID, 300)) +#' doClusterProjection( +#' target_gobject = g, source_gobject = g_small, +#' source_cluster_labels = "leiden_clus" +#' ) #' @export -doClusterProjection <- function(target_gobject, - target_cluster_label_name = "knn_labels", - spat_unit = NULL, - feat_type = NULL, - source_gobject, - source_cluster_labels = NULL, - reduction = "cells", - reduction_method = "pca", - reduction_name = "pca", - dimensions_to_use = 1:10, - knn_k = 10, - prob = FALSE, - algorithm = c( - "kd_tree", - "cover_tree", "brute" - ), - return_gobject = TRUE) { +doClusterProjection <- function( + target_gobject, + target_cluster_label_name = "knn_labels", + spat_unit = NULL, + feat_type = NULL, + source_gobject, + source_cluster_labels = NULL, + reduction = "cells", + reduction_method = "pca", + reduction_name = "pca", + dimensions_to_use = 1:10, + knn_k = 10, + prob = FALSE, + algorithm = c( + "kd_tree", + "cover_tree", "brute" + ), + return_gobject = TRUE) { # NSE vars cell_ID <- temp_name_prob <- NULL @@ -3317,7 +3373,8 @@ doClusterProjection <- function(target_gobject, dim_coord <- dim_obj[] dimensions_to_use <- dimensions_to_use[ - dimensions_to_use %in% 1:ncol(dim_coord)] + dimensions_to_use %in% seq_len(ncol(dim_coord)) + ] matrix_to_use <- dim_coord[, dimensions_to_use] ## create the training and testset from the matrix @@ -3326,12 +3383,14 @@ doClusterProjection <- function(target_gobject, # (w/ labels) # and target giotto object train <- matrix_to_use[ - rownames(matrix_to_use) %in% names(source_annot_vec), ] + rownames(matrix_to_use) %in% names(source_annot_vec), + ] train <- train[match(names(source_annot_vec), rownames(train)), ] # the test set are the remaining cell_IDs that need a label test <- matrix_to_use[ - !rownames(matrix_to_use) %in% names(source_annot_vec), ] + !rownames(matrix_to_use) %in% names(source_annot_vec), + ] cl <- source_annot_vec # make prediction @@ -3369,14 +3428,18 @@ doClusterProjection <- function(target_gobject, if (isTRUE(prob)) { cell_meta_target[, temp_name_prob := probs[cell_ID]] cell_meta_target <- cell_meta_target[ - , .(cell_ID, temp_name, temp_name_prob)] + , .(cell_ID, temp_name, temp_name_prob) + ] cell_meta_target[, temp_name_prob := ifelse( - is.na(temp_name_prob), 1, temp_name_prob)] + is.na(temp_name_prob), 1, temp_name_prob + )] data.table::setnames(cell_meta_target, old = c("temp_name", "temp_name_prob"), - new = c(target_cluster_label_name, - paste0(target_cluster_label_name, "_prob")) + new = c( + target_cluster_label_name, + paste0(target_cluster_label_name, "_prob") + ) ) } else { cell_meta_target <- cell_meta_target[, .(cell_ID, temp_name)] @@ -3397,7 +3460,8 @@ doClusterProjection <- function(target_gobject, feat_type = feat_type, new_metadata = cell_meta_target[ , c("cell_ID", target_cluster_label_name, prob_label), - with = FALSE], + with = FALSE + ], by_column = TRUE, column_cell_ID = "cell_ID" ) @@ -3407,7 +3471,9 @@ doClusterProjection <- function(target_gobject, spat_unit = spat_unit, feat_type = feat_type, new_metadata = cell_meta_target[ - , c("cell_ID", target_cluster_label_name), with = FALSE], + , c("cell_ID", target_cluster_label_name), + with = FALSE + ], by_column = TRUE, column_cell_ID = "cell_ID" ) diff --git a/R/convenience.R b/R/convenience.R index e94f8e20c..69fe5feee 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -13,12 +13,12 @@ #' reader functions should be built using it as a base. #' @param spat_method spatial method for which the data is being read #' @param data_dir exported data directory to read from -#' @param dir_items named list of directory items to expect and keywords to +#' @param dir_items named list of directory items to expect and keywords to #' match #' @param data_to_use character. Which type(s) of expression data to build the #' gobject with. Values should match with a *workflow* item in require_data_DT #' (see details) -#' @param require_data_DT data.table detailing if expected data items are +#' @param require_data_DT data.table detailing if expected data items are #' required or optional for each \code{data_to_use} *workflow* #' @param cores cores to use #' @param verbose be verbose @@ -31,10 +31,10 @@ #' \item{1. detection of items within \code{data_dir} by looking for keywords #' assigned through \code{dir_items}} #' \item{2. check of detected items to see if everything needed has been found. -#' Dictionary of necessary vs optional items for each \code{data_to_use} +#' Dictionary of necessary vs optional items for each \code{data_to_use} #' *workflow* is provided through \code{require_data_DT}} -#' \item{3. if multiple filepaths are found to be matching then select the -#' first one. This function is only intended to find the first level +#' \item{3. if multiple filepaths are found to be matching then select the +#' first one. This function is only intended to find the first level #' subdirectories and files.} #' } #' @@ -82,27 +82,32 @@ NULL #' @describeIn read_data_folder Should not be used directly #' @keywords internal -.read_data_folder <- function(spat_method = NULL, - data_dir = NULL, - dir_items, - data_to_use, - load_format = NULL, - require_data_DT, - cores = NA, - verbose = NULL, - toplevel = 2L) { +.read_data_folder <- function( + spat_method = NULL, + data_dir = NULL, + dir_items, + data_to_use, + load_format = NULL, + require_data_DT, + cores = NA, + verbose = NULL, + toplevel = 2L) { ch <- box_chars() # 0. check params if (is.null(data_dir) || !dir.exists(data_dir)) { - .gstop(.n = toplevel, "The full path to a", spat_method, - "directory must be given.") + .gstop( + .n = toplevel, "The full path to a", spat_method, + "directory must be given." + ) } vmsg(.v = verbose, "A structured", spat_method, "directory will be used") if (!data_to_use %in% require_data_DT$workflow) { - .gstop(.n = toplevel, - "Data requirements for data_to_use not found in require_data_DT") + .gstop( + .n = toplevel, + "Data requirements for data_to_use not found in require_data_DT" + ) } # 1. detect items @@ -126,10 +131,12 @@ NULL .initial = paste0(ch$s, "> "), item, " found" ) - for (item_i in seq_along(dir_items[[item]])) { + for (item_i in seq_along(dir_items[[item]])) { # print found item names - subItem <- gsub(pattern = ".*/", replacement = "", - x = dir_items[[item]][[item_i]]) + subItem <- gsub( + pattern = ".*/", replacement = "", + x = dir_items[[item]][[item_i]] + ) vmsg( .v = verbose, .is_debug = TRUE, .initial = paste0(ch$s, ch$s, ch$l, ch$h, ch$h), @@ -147,13 +154,16 @@ NULL require_data_DT <- require_data_DT[workflow == data_to_use, ] - if (!is.null(load_format)) + if (!is.null(load_format)) { require_data_DT <- require_data_DT[filetype == load_format, ] + } - if (item %in% require_data_DT[needed == TRUE, item]) + if (item %in% require_data_DT[needed == TRUE, item]) { stop(item, " is missing") - if (item %in% require_data_DT[needed == FALSE, item]) + } + if (item %in% require_data_DT[needed == FALSE, item]) { warning(item, "is missing (optional)") + } } } @@ -193,7 +203,7 @@ NULL #' @title Create a giotto object from 10x visium data #' @name createGiottoVisiumObject -#' @description Create Giotto object directly from a 10X visium folder. Also +#' @description Create Giotto object directly from a 10X visium folder. Also #' accepts visium H5 outputs. #' #' @param visium_dir path to the 10X visium directory [required] @@ -202,7 +212,7 @@ NULL #' @param h5_visium_path path to visium 10X .h5 file #' @param h5_gene_ids gene names as symbols (default) or ensemble gene ids #' @param h5_tissue_positions_path path to tissue locations (.csv file) -#' @param h5_image_png_path path to tissue .png file (optional). Image +#' @param h5_image_png_path path to tissue .png file (optional). Image #' autoscaling looks for matches in the filename for either 'hires' or 'lowres' #' @param h5_json_scalefactors_path path to .json scalefactors (optional) #' @param png_name select name of png to use (see details) @@ -211,11 +221,11 @@ NULL #' @param xmin_adj deprecated #' @param ymax_adj deprecated #' @param ymin_adj deprecated -#' @param instructions list of instructions or output result from +#' @param instructions list of instructions or output result from #' \code{\link[GiottoClass]{createGiottoInstructions}} -#' @param cores how many cores or threads to use to read data if paths are +#' @param cores how many cores or threads to use to read data if paths are #' provided -#' @param expression_matrix_class class of expression matrix to use +#' @param expression_matrix_class class of expression matrix to use #' (e.g. 'dgCMatrix', 'DelayedArray') #' @param h5_file optional path to create an on-disk h5 file #' @param verbose be verbose @@ -239,31 +249,32 @@ NULL #' } #' #' @export -createGiottoVisiumObject <- function(visium_dir = NULL, - expr_data = c("raw", "filter"), - gene_column_index = 1, - h5_visium_path = NULL, - h5_gene_ids = c("symbols", "ensembl"), - h5_tissue_positions_path = NULL, - h5_image_png_path = NULL, - h5_json_scalefactors_path = NULL, - png_name = NULL, - do_manual_adj = FALSE, # deprecated - xmax_adj = 0, # deprecated - xmin_adj = 0, # deprecated - ymax_adj = 0, # deprecated - ymin_adj = 0, # deprecated - instructions = NULL, - expression_matrix_class = c("dgCMatrix", "DelayedArray"), - h5_file = NULL, - cores = NA, - verbose = NULL) { +createGiottoVisiumObject <- function( + visium_dir = NULL, + expr_data = c("raw", "filter"), + gene_column_index = 1, + h5_visium_path = NULL, + h5_gene_ids = c("symbols", "ensembl"), + h5_tissue_positions_path = NULL, + h5_image_png_path = NULL, + h5_json_scalefactors_path = NULL, + png_name = NULL, + do_manual_adj = FALSE, # deprecated + xmax_adj = 0, # deprecated + xmin_adj = 0, # deprecated + ymax_adj = 0, # deprecated + ymin_adj = 0, # deprecated + instructions = NULL, + expression_matrix_class = c("dgCMatrix", "DelayedArray"), + h5_file = NULL, + cores = NA, + verbose = NULL) { # NSE vars barcode <- row_pxl <- col_pxl <- in_tissue <- array_row <- array_col <- NULL # handle deprecations - img_dep_msg <- "The params 'do_manual_adj', 'xmax_adj', 'xmin_adj', - 'ymax_adj', 'ymin_adj' are no longer used. + img_dep_msg <- "The params 'do_manual_adj', 'xmax_adj', 'xmin_adj', + 'ymax_adj', 'ymin_adj' are no longer used. Please use the automated workflow." if (!isFALSE(do_manual_adj) || xmax_adj != 0 || @@ -316,18 +327,17 @@ createGiottoVisiumObject <- function(visium_dir = NULL, -.visium_create <- function( - expr_counts_path, - h5_gene_ids = NULL, # h5 - gene_column_index = NULL, # folder - tissue_positions_path, - image_path = NULL, - scale_json_path = NULL, - png_name = NULL, - instructions = NULL, - expression_matrix_class = c("dgCMatrix", "DelayedArray"), - h5_file = NULL, - verbose = NULL) { +.visium_create <- function(expr_counts_path, + h5_gene_ids = NULL, # h5 + gene_column_index = NULL, # folder + tissue_positions_path, + image_path = NULL, + scale_json_path = NULL, + png_name = NULL, + instructions = NULL, + expression_matrix_class = c("dgCMatrix", "DelayedArray"), + h5_file = NULL, + verbose = NULL) { # NSE vars barcode <- cell_ID <- row_pxl <- col_pxl <- in_tissue <- array_row <- array_col <- NULL @@ -348,12 +358,16 @@ createGiottoVisiumObject <- function(visium_dir = NULL, } # if expr_results is not a list, make it a list compatible with downstream - if (!is.list(expr_results)) expr_results <- list( - "Gene Expression" = expr_results) + if (!is.list(expr_results)) { + expr_results <- list( + "Gene Expression" = expr_results + ) + } # format expected data into list to be used with readExprData() raw_matrix_list <- list("cell" = list("rna" = list( - "raw" = expr_results[["Gene Expression"]]))) + "raw" = expr_results[["Gene Expression"]] + ))) # add protein expression data to list if it exists if ("Antibody Capture" %in% names(expr_results)) { @@ -363,12 +377,15 @@ createGiottoVisiumObject <- function(visium_dir = NULL, # 2. spatial locations spatial_results <- data.table::fread(tissue_positions_path) - colnames(spatial_results) <- c("barcode", "in_tissue", "array_row", - "array_col", "col_pxl", "row_pxl") + colnames(spatial_results) <- c( + "barcode", "in_tissue", "array_row", + "array_col", "col_pxl", "row_pxl" + ) spatial_results <- spatial_results[match(colnames( - raw_matrix_list$cell[[1]]$raw), barcode)] + raw_matrix_list$cell[[1]]$raw + ), barcode)] data.table::setnames(spatial_results, old = "barcode", new = "cell_ID") - spatial_locs <- spatial_results[, .(cell_ID, row_pxl, -col_pxl)] + spatial_locs <- spatial_results[, .(cell_ID, row_pxl, -col_pxl)] # flip x and y colnames(spatial_locs) <- c("cell_ID", "sdimx", "sdimy") @@ -388,7 +405,8 @@ createGiottoVisiumObject <- function(visium_dir = NULL, # 5. metadata meta_results <- spatial_results[ - , .(cell_ID, in_tissue, array_row, array_col)] + , .(cell_ID, in_tissue, array_row, array_col) + ] expr_types <- names(raw_matrix_list$cell) meta_list <- list() for (etype in expr_types) { @@ -427,17 +445,17 @@ createGiottoVisiumObject <- function(visium_dir = NULL, # Find and check the filepaths within a structured visium directory -.visium_read_folder <- function( - visium_dir = NULL, - expr_data = c("raw", "filter"), - gene_column_index = 1, - png_name = NULL, - verbose = NULL) { +.visium_read_folder <- function(visium_dir = NULL, + expr_data = c("raw", "filter"), + gene_column_index = 1, + png_name = NULL, + verbose = NULL) { vmsg(.v = verbose, "A structured visium directory will be used") ## check arguments - if (is.null(visium_dir)) + if (is.null(visium_dir)) { .gstop("visium_dir needs to be a path to a visium directory") + } visium_dir <- path.expand(visium_dir) if (!dir.exists(visium_dir)) .gstop(visium_dir, " does not exist!") expr_data <- match.arg(expr_data, choices = c("raw", "filter")) @@ -448,14 +466,16 @@ createGiottoVisiumObject <- function(visium_dir = NULL, "raw" = paste0(visium_dir, "/", "raw_feature_bc_matrix/"), "filter" = paste0(visium_dir, "/", "filtered_feature_bc_matrix/") ) - if (!file.exists(expr_counts_path)) + if (!file.exists(expr_counts_path)) { .gstop(expr_counts_path, "does not exist!") + } ## 2. check spatial locations spatial_dir <- paste0(visium_dir, "/", "spatial/") tissue_positions_path <- Sys.glob( - paths = file.path(spatial_dir, "tissue_positions*")) + paths = file.path(spatial_dir, "tissue_positions*") + ) ## 3. check spatial image @@ -469,8 +489,9 @@ createGiottoVisiumObject <- function(visium_dir = NULL, ## 4. check scalefactors scalefactors_path <- paste0(spatial_dir, "/", "scalefactors_json.json") - if (!file.exists(scalefactors_path)) + if (!file.exists(scalefactors_path)) { .gstop(scalefactors_path, "does not exist!") + } list( @@ -484,36 +505,44 @@ createGiottoVisiumObject <- function(visium_dir = NULL, -.visium_read_h5 <- function( - h5_visium_path = h5_visium_path, # expression matrix - h5_gene_ids = h5_gene_ids, - h5_tissue_positions_path = h5_tissue_positions_path, - h5_image_png_path = h5_image_png_path, - h5_json_scalefactors_path = h5_json_scalefactors_path, - verbose = NULL) { +.visium_read_h5 <- function(h5_visium_path = h5_visium_path, # expression matrix + h5_gene_ids = h5_gene_ids, + h5_tissue_positions_path = h5_tissue_positions_path, + h5_image_png_path = h5_image_png_path, + h5_json_scalefactors_path = h5_json_scalefactors_path, + verbose = NULL) { # 1. filepaths - vmsg(.v = verbose, - "A path to an .h5 10X file was provided and will be used") - if (!file.exists(h5_visium_path)) + vmsg( + .v = verbose, + "A path to an .h5 10X file was provided and will be used" + ) + if (!file.exists(h5_visium_path)) { .gstop("The provided path ", h5_visium_path, " does not exist") - if (is.null(h5_tissue_positions_path)) - .gstop("A path to the tissue positions (.csv) needs to be provided to + } + if (is.null(h5_tissue_positions_path)) { + .gstop("A path to the tissue positions (.csv) needs to be provided to h5_tissue_positions_path") - if (!file.exists(h5_tissue_positions_path)) - .gstop("The provided path ", h5_tissue_positions_path, - " does not exist") + } + if (!file.exists(h5_tissue_positions_path)) { + .gstop( + "The provided path ", h5_tissue_positions_path, + " does not exist" + ) + } if (!is.null(h5_image_png_path)) { if (!file.exists(h5_image_png_path)) { - .gstop("The provided h5 image path ", h5_image_png_path, - "does not exist. - Set to NULL to exclude or provide the correct path.") + .gstop( + "The provided h5 image path ", h5_image_png_path, + "does not exist. + Set to NULL to exclude or provide the correct path." + ) } } if (!is.null(h5_json_scalefactors_path)) { if (!file.exists(h5_json_scalefactors_path)) { warning(wrap_txt( "No file found at h5_json_scalefactors_path. - Scalefactors are needed for proper image alignment and + Scalefactors are needed for proper image alignment and polygon generation" )) } @@ -549,8 +578,9 @@ createGiottoVisiumObject <- function(visium_dir = NULL, #' Adds circular giottoPolygons to the spatial_info slot of a Giotto Object #' for the "cell" spatial unit. #' @export -addVisiumPolygons <- function(gobject, - scalefactor_path = NULL) { +addVisiumPolygons <- function( + gobject, + scalefactor_path = NULL) { assert_giotto(gobject) visium_spat_locs <- getSpatialLocations( @@ -591,8 +621,10 @@ addVisiumPolygons <- function(gobject, .visium_read_scalefactors <- function(json_path = NULL) { if (!checkmate::test_file_exists(json_path)) { if (!is.null(json_path)) { - warning("scalefactors not discovered at: \n", - json_path, call. = FALSE) + warning("scalefactors not discovered at: \n", + json_path, + call. = FALSE + ) } return(NULL) } @@ -640,7 +672,7 @@ addVisiumPolygons <- function(gobject, #' @title Calculate Pixel to Micron Scalefactor #' @name visium_micron_scalefactor -#' @param json_scalefactors list of scalefactors from +#' @param json_scalefactors list of scalefactors from #' .visium_read_scalefactors() #' @returns scale factor for converting pixel to micron #' @details @@ -662,7 +694,7 @@ addVisiumPolygons <- function(gobject, #' @name .visium_spot_poly #' @param spatlocs spatial locations data.table or `spatLocsObj` containing #' centroid locations of visium spots -#' @param json_scalefactors list of scalefactors from +#' @param json_scalefactors list of scalefactors from #' .visium_read_scalefactors() #' @returns giottoPolygon object #' @details @@ -670,8 +702,9 @@ addVisiumPolygons <- function(gobject, #' Visium spots. #' @keywords internal #' @md -.visium_spot_poly <- function(spatlocs = NULL, - json_scalefactors) { +.visium_spot_poly <- function( + spatlocs = NULL, + json_scalefactors) { if (inherits(spatlocs, "spatLocsObj")) { spatlocs <- spatlocs[] } @@ -699,11 +732,10 @@ addVisiumPolygons <- function(gobject, # json_info expects the list read output from .visium_read_scalefactors # image_path should be expected to be full filepath # should only be used when do_manual_adj (deprecated) is FALSE -.visium_image <- function( - image_path, - json_info = NULL, - micron_scale = FALSE, - verbose = NULL) { +.visium_image <- function(image_path, + json_info = NULL, + micron_scale = FALSE, + verbose = NULL) { # assume image already checked vmsg(.v = verbose, .initial = " - ", "found image") @@ -730,8 +762,8 @@ addVisiumPolygons <- function(gobject, if (is.null(visium_img_type)) { # if not recognized visium image type .gstop( - "\'image_path\' filename did not partial match either - \'lowres\' or \'hires\'. Ensure specified image is either the + "\'image_path\' filename did not partial match either + \'lowres\' or \'hires\'. Ensure specified image is either the Visium lowres or hires image and rename it accordingly" ) } @@ -793,9 +825,10 @@ addVisiumPolygons <- function(gobject, #' if image_file is a list. #' @returns giottoLargeImage #' @export -createMerscopeLargeImage <- function(image_file, - transforms_file, - name = "image") { +createMerscopeLargeImage <- function( + image_file, + transforms_file, + name = "image") { checkmate::assert_character(transforms_file) tfsDT <- data.table::fread(transforms_file) if (inherits(image_file, "character")) { @@ -836,12 +869,12 @@ createMerscopeLargeImage <- function(image_file, #' @title Create Vizgen MERSCOPE Giotto Object #' @name createGiottoMerscopeObject -#' @description Given the path to a MERSCOPE experiment directory, creates a +#' @description Given the path to a MERSCOPE experiment directory, creates a #' Giotto object. #' @param merscope_dir full path to the exported merscope directory -#' @param data_to_use which of either the 'subcellular' or 'aggregate' +#' @param data_to_use which of either the 'subcellular' or 'aggregate' #' information to use for object creation -#' @param FOVs which FOVs to use when building the subcellular object. +#' @param FOVs which FOVs to use when building the subcellular object. #' (default is NULL) #' NULL loads all FOVs (very slow) #' @param calculate_overlap whether to run \code{\link{calculateOverlapRaster}} @@ -851,9 +884,9 @@ createMerscopeLargeImage <- function(image_file, #' @inheritParams GiottoClass::createGiottoObjectSubcellular #' @returns a giotto object #' @details -#' [\strong{Expected Directory}] This function generates a giotto object when -#' given a link to a MERSCOPE output directory. It expects the following items -#' within the directory where the \strong{bolded} portions are what this +#' [\strong{Expected Directory}] This function generates a giotto object when +#' given a link to a MERSCOPE output directory. It expects the following items +#' within the directory where the \strong{bolded} portions are what this #' function matches against: #' \itemize{ #' \item{\strong{cell_boundaries} (folder .hdf5 files)} @@ -863,21 +896,22 @@ createMerscopeLargeImage <- function(image_file, #' \item{detected_transcripts\strong{metadata_file}.csv (file)} #' } #' @export -createGiottoMerscopeObject <- function(merscope_dir, - data_to_use = c("subcellular", "aggregate"), - FOVs = NULL, - poly_z_indices = 1:7, - calculate_overlap = TRUE, - overlap_to_matrix = TRUE, - aggregate_stack = TRUE, - aggregate_stack_param = list( - summarize_expression = "sum", - summarize_locations = "mean", - new_spat_unit = "cell" - ), - instructions = NULL, - cores = NA, - verbose = TRUE) { +createGiottoMerscopeObject <- function( + merscope_dir, + data_to_use = c("subcellular", "aggregate"), + FOVs = NULL, + poly_z_indices = 1:7, + calculate_overlap = TRUE, + overlap_to_matrix = TRUE, + aggregate_stack = TRUE, + aggregate_stack_param = list( + summarize_expression = "sum", + summarize_locations = "mean", + new_spat_unit = "cell" + ), + instructions = NULL, + cores = NA, + verbose = TRUE) { fovs <- NULL # 0. setup @@ -893,7 +927,8 @@ createGiottoMerscopeObject <- function(merscope_dir, # determine data to use data_to_use <- match.arg( - arg = data_to_use, choices = c("subcellular", "aggregate")) + arg = data_to_use, choices = c("subcellular", "aggregate") + ) # 1. test if folder structure exists and is as expected dir_items <- .read_merscope_folder( @@ -931,8 +966,10 @@ createGiottoMerscopeObject <- function(merscope_dir, verbose = verbose ) } else { - stop(wrap_txt('data_to_use "', data_to_use, - '" not implemented', sep = "")) + stop(wrap_txt('data_to_use "', data_to_use, + '" not implemented', + sep = "" + )) } return(merscope_gobject) @@ -941,21 +978,22 @@ createGiottoMerscopeObject <- function(merscope_dir, -#' @describeIn createGiottoMerscopeObject Create giotto object with +#' @describeIn createGiottoMerscopeObject Create giotto object with #' 'subcellular' workflow #' @param data_list list of loaded data from \code{\link{load_merscope_folder}} #' @keywords internal -.createGiottoMerscopeObject_subcellular <- function(data_list, - calculate_overlap = TRUE, - overlap_to_matrix = TRUE, - aggregate_stack = TRUE, - aggregate_stack_param = list( - summarize_expression = "sum", - summarize_locations = "mean", - new_spat_unit = "cell" - ), - cores = NA, - verbose = TRUE) { +.createGiottoMerscopeObject_subcellular <- function( + data_list, + calculate_overlap = TRUE, + overlap_to_matrix = TRUE, + aggregate_stack = TRUE, + aggregate_stack_param = list( + summarize_expression = "sum", + summarize_locations = "mean", + new_spat_unit = "cell" + ), + cores = NA, + verbose = TRUE) { feat_coord <- neg_coord <- cellLabel_dir <- instructions <- NULL # unpack data_list @@ -977,10 +1015,12 @@ createGiottoMerscopeObject <- function(merscope_dir, blank_dt <- tx_dt[gene %in% blank_id, ] # extract transcript_id col and store as feature meta - feat_meta <- unique(feat_dt[, c("gene", "transcript_id", "barcode_id"), - with = FALSE]) - blank_meta <- unique(blank_dt[, c("gene", "transcript_id", "barcode_id"), - with = FALSE]) + feat_meta <- unique(feat_dt[, c("gene", "transcript_id", "barcode_id"), + with = FALSE + ]) + blank_meta <- unique(blank_dt[, c("gene", "transcript_id", "barcode_id"), + with = FALSE + ]) feat_dt[, c("transcript_id", "barcode_id") := NULL] blank_dt[, c("transcript_id", "barcode_id") := NULL] @@ -1011,13 +1051,14 @@ createGiottoMerscopeObject <- function(merscope_dir, -#' @describeIn createGiottoMerscopeObject Create giotto object with 'aggregate' +#' @describeIn createGiottoMerscopeObject Create giotto object with 'aggregate' #' workflow #' @param data_list list of loaded data from \code{\link{load_merscope_folder}} #' @keywords internal -.createGiottoMerscopeObject_aggregate <- function(data_list, - cores = NA, - verbose = TRUE) { +.createGiottoMerscopeObject_aggregate <- function( + data_list, + cores = NA, + verbose = TRUE) { # unpack data_list micronToPixelScale <- data_list$micronToPixelScale expr_dt <- data_list$expr_dt @@ -1037,14 +1078,15 @@ createGiottoMerscopeObject <- function(merscope_dir, #' @title Create Spatial Genomics Giotto Object #' @name createSpatialGenomicsObject #' @param sg_dir full path to the exported Spatial Genomics directory -#' @param instructions new instructions +#' @param instructions new instructions #' (e.g. result from createGiottoInstructions) #' @returns giotto object #' @description Given the path to a Spatial Genomics data directory, creates a #' Giotto object. #' @export -createSpatialGenomicsObject <- function(sg_dir = NULL, - instructions = NULL) { +createSpatialGenomicsObject <- function( + sg_dir = NULL, + instructions = NULL) { # Find files in Spatial Genomics directory dapi <- list.files(sg_dir, full.names = TRUE, pattern = "DAPI") mask <- list.files(sg_dir, full.names = TRUE, pattern = "mask") @@ -1085,20 +1127,20 @@ createSpatialGenomicsObject <- function(sg_dir = NULL, #' object. #' @param cosmx_dir full path to the exported cosmx directory #' @param data_to_use which type(s) of expression data to build the gobject with -#' Default is \code{'all'} information available. \code{'subcellular'} loads -#' the transcript coordinates only. \code{'aggregate'} loads the provided +#' Default is \code{'all'} information available. \code{'subcellular'} loads +#' the transcript coordinates only. \code{'aggregate'} loads the provided #' aggregated expression matrix. #' @param FOVs field of views to load (only affects subcellular data and images) -#' @param remove_background_polygon try to remove background polygon +#' @param remove_background_polygon try to remove background polygon #' (default: FALSE) #' @param background_algo algorithm to remove background polygon #' @param remove_unvalid_polygons remove unvalid polygons (default: TRUE) #' @inheritParams GiottoClass::createGiottoObjectSubcellular #' @returns a giotto object #' @details -#' [\strong{Expected Directory}] This function generates a giotto object when -#' given a link to a cosmx output directory. It expects the following items -#' within the directory where the \strong{bolded} portions are what this +#' [\strong{Expected Directory}] This function generates a giotto object when +#' given a link to a cosmx output directory. It expects the following items +#' within the directory where the \strong{bolded} portions are what this #' function matches against: #' \itemize{ #' \item{\strong{CellComposite} (folder of images)} @@ -1113,49 +1155,51 @@ createSpatialGenomicsObject <- function(sg_dir = NULL, #' #' [\strong{Workflows}] Workflow to use is accessed through the data_to_use param #' \itemize{ -#' \item{'all' - loads and requires subcellular information from tx_file and +#' \item{'all' - loads and requires subcellular information from tx_file and #' fov_positions_file -#' and also the existing aggregated information +#' and also the existing aggregated information #' (expression, spatial locations, and metadata) #' from exprMat_file and metadata_file.} -#' \item{'subcellular' - loads and requires subcellular information from +#' \item{'subcellular' - loads and requires subcellular information from #' tx_file and #' fov_positions_file only.} -#' \item{'aggregate' - loads and requires the existing aggregate information -#' (expression, spatial locations, and metadata) from exprMat_file and +#' \item{'aggregate' - loads and requires the existing aggregate information +#' (expression, spatial locations, and metadata) from exprMat_file and #' metadata_file.} #' } #' -#' [\strong{Images}] Images in the default CellComposite, CellLabels, +#' [\strong{Images}] Images in the default CellComposite, CellLabels, #' CompartmentLabels, and CellOverlay -#' folders will be loaded as giotto largeImage objects in all workflows as -#' long as they are available. Additionally, CellComposite images will be +#' folders will be loaded as giotto largeImage objects in all workflows as +#' long as they are available. Additionally, CellComposite images will be #' converted to giotto image objects, making plotting with #' these image objects more responsive when accessing them from a server. #' \code{\link{showGiottoImageNames}} can be used to see the available images. #' @export -createGiottoCosMxObject <- function(cosmx_dir = NULL, - data_to_use = c("all", "subcellular", "aggregate"), - remove_background_polygon = TRUE, - background_algo = c("range"), - remove_unvalid_polygons = TRUE, - FOVs = NULL, - instructions = NULL, - cores = determine_cores(), - verbose = TRUE) { +createGiottoCosMxObject <- function( + cosmx_dir = NULL, + data_to_use = c("all", "subcellular", "aggregate"), + remove_background_polygon = TRUE, + background_algo = c("range"), + remove_unvalid_polygons = TRUE, + FOVs = NULL, + instructions = NULL, + cores = determine_cores(), + verbose = TRUE) { # 0. setup cosmx_dir <- path.expand(cosmx_dir) # determine data to use data_to_use <- match.arg( - arg = data_to_use, choices = c("all", "subcellular", "aggregate")) + arg = data_to_use, choices = c("all", "subcellular", "aggregate") + ) if (data_to_use %in% c("all", "aggregate")) { - stop(wrap_txt('Convenience workflows "all" and "aggregate" are not + stop(wrap_txt('Convenience workflows "all" and "aggregate" are not available yet')) } # Define for data.table - fov <- target <- x_local_px <- y_local_px <- z <- cell_ID <- + fov <- target <- x_local_px <- y_local_px <- z <- cell_ID <- CenterX_global_px <- CenterY_global_px <- CenterX_local_px <- CenterY_local_px <- NULL @@ -1219,15 +1263,14 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, #' @inheritParams createGiottoCosMxObject #' @returns giotto object #' @keywords internal -.createGiottoCosMxObject_subcellular <- function( - dir_items, - FOVs = NULL, - remove_background_polygon = TRUE, - background_algo = c("range"), - remove_unvalid_polygons = TRUE, - cores, - verbose = TRUE, - instructions = NULL) { +.createGiottoCosMxObject_subcellular <- function(dir_items, + FOVs = NULL, + remove_background_polygon = TRUE, + background_algo = c("range"), + remove_unvalid_polygons = TRUE, + cores, + verbose = TRUE, + instructions = NULL) { target <- fov <- NULL # load tx detections and FOV offsets ------------------------------------- # @@ -1247,7 +1290,8 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, tx_coord_all[, c("x_global_px", "y_global_px", "cell_ID") := NULL] data.table::setcolorder( - tx_coord_all, c("target", "x_local_px", "y_local_px", "z", "fov")) + tx_coord_all, c("target", "x_local_px", "y_local_px", "z", "fov") + ) # feature detection type splitting --------------------------------------- # @@ -1273,13 +1317,17 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, if (isTRUE(verbose)) message("Loading image information...") composite_dir <- Sys.glob(paths = file.path( - dir_items$`CellComposite folder`, paste0("*", x, "*"))) + dir_items$`CellComposite folder`, paste0("*", x, "*") + )) cellLabel_dir <- Sys.glob(paths = file.path( - dir_items$`CellLabels folder`, paste0("*", x, "*"))) + dir_items$`CellLabels folder`, paste0("*", x, "*") + )) compartmentLabel_dir <- Sys.glob(paths = file.path( - dir_items$`CompartmentLabels folder`, paste0("*", x, "*"))) + dir_items$`CompartmentLabels folder`, paste0("*", x, "*") + )) cellOverlay_dir <- Sys.glob(paths = file.path( - dir_items$`CellOverlay folder`, paste0("*", x, "*"))) + dir_items$`CellOverlay folder`, paste0("*", x, "*") + )) # Missing warnings if (length(composite_dir) == 0) { @@ -1314,11 +1362,15 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, feat_coord <- feat_coords_all[fov == as.numeric(x)] data.table::setnames( - feat_coord, old = coord_oldnames, new = coord_newnames) + feat_coord, + old = coord_oldnames, new = coord_newnames + ) # neg probe info neg_coord <- neg_coords_all[fov == as.numeric(x)] data.table::setnames( - neg_coord, old = coord_oldnames, new = coord_newnames) + neg_coord, + old = coord_oldnames, new = coord_newnames + ) # build giotto object -------------------------------------- # @@ -1344,8 +1396,9 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, # find centroids as spatial locations ---------------------- # - if (isTRUE(verbose)) + if (isTRUE(verbose)) { message("Finding polygon centroids as cell spatial locations...") + } fov_subset <- addSpatialCentroidLocations( fov_subset, poly_info = "cell", @@ -1394,7 +1447,7 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, images = gImage_list ) - # convert to MG for faster loading (particularly relevant for + # convert to MG for faster loading (particularly relevant for # pulling from server) # TODO remove this fov_subset <- convertGiottoLargeImageToMG( @@ -1439,10 +1492,11 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, #' @inheritParams createGiottoCosMxObject #' @returns giotto object #' @keywords internal -.createGiottoCosMxObject_aggregate <- function(dir_items, - cores, - verbose = TRUE, - instructions = NULL) { +.createGiottoCosMxObject_aggregate <- function( + dir_items, + cores, + verbose = TRUE, + instructions = NULL) { data_to_use <- fov <- NULL data_list <- .load_cosmx_folder_aggregate( @@ -1479,19 +1533,25 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, # load in images img_ID <- data.table::data.table( fov = fov_shifts[, fov], - img_name = paste0("fov", - sprintf("%03d", fov_shifts[, fov]), "-image") + img_name = paste0( + "fov", + sprintf("%03d", fov_shifts[, fov]), "-image" + ) ) if (isTRUE(verbose)) message("Attaching image files...") composite_dir <- Sys.glob(paths = file.path( - dir_items$`CellComposite folder`, paste0("/*"))) + dir_items$`CellComposite folder`, paste0("/*") + )) cellLabel_dir <- Sys.glob(paths = file.path( - dir_items$`CellLabels folder`, paste0("/*"))) + dir_items$`CellLabels folder`, paste0("/*") + )) compartmentLabel_dir <- Sys.glob(paths = file.path( - dir_items$`CompartmentLabels folder`, paste0("/*"))) + dir_items$`CompartmentLabels folder`, paste0("/*") + )) overlay_dir <- Sys.glob(paths = file.path( - dir_items$`CellOverlay folder`, paste0("/*"))) + dir_items$`CellOverlay folder`, paste0("/*") + )) if (length(cellLabel_imgList) > 0) { cellLabel_imgList <- lapply(cellLabel_dir, function(x) { @@ -1506,8 +1566,9 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, if (length(compartmentLabel_dir) > 0) { compartmentLabel_imgList <- lapply( compartmentLabel_dir, function(x) { - createGiottoLargeImage(x, name = "composite", negative_y = TRUE) - }) + createGiottoLargeImage(x, name = "composite", negative_y = TRUE) + } + ) } if (length(overlay_dir) > 0) { overlay_imgList <- lapply(overlay_dir, function(x) { @@ -1520,30 +1581,31 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, -#' @title Load and create a CosMx Giotto object from subcellular and aggregate +#' @title Load and create a CosMx Giotto object from subcellular and aggregate #' info #' @name .createGiottoCosMxObject_all #' @param dir_items list of full directory paths from \code{.read_cosmx_folder} #' @inheritParams createGiottoCosMxObject #' @returns giotto object -#' @details Both \emph{subcellular} +#' @details Both \emph{subcellular} #' (subellular transcript detection information) and -#' \emph{aggregate} (aggregated detection count matrices by cell polygon from +#' \emph{aggregate} (aggregated detection count matrices by cell polygon from #' NanoString) #' data will be loaded in. The two will be separated into 'cell' and 'cell_agg' #' spatial units in order to denote the difference in origin of the two. #' @seealso createGiottoCosMxObject .createGiottoCosMxObject_aggregate #' .createGiottoCosMxObject_subcellular #' @keywords internal -.createGiottoCosMxObject_all <- function(dir_items, - FOVs, - remove_background_polygon = TRUE, - background_algo = c("range"), - remove_unvalid_polygons = TRUE, - cores, - verbose = TRUE, - instructions = NULL, - ...) { +.createGiottoCosMxObject_all <- function( + dir_items, + FOVs, + remove_background_polygon = TRUE, + background_algo = c("range"), + remove_unvalid_polygons = TRUE, + cores, + verbose = TRUE, + instructions = NULL, + ...) { # 1. create subcellular giotto as spat_unit 'cell' cosmx_gobject <- .createGiottoCosMxObject_subcellular( dir_items = dir_items, @@ -1570,15 +1632,18 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, protM <- agg_data$protM spM <- agg_data$spM - # add in pre-generated aggregated expression matrix information for 'all' + # add in pre-generated aggregated expression matrix information for 'all' # workflow # Add aggregate expression information - if (isTRUE(verbose)) wrap_msg( - 'Appending provided aggregate expression data as... + if (isTRUE(verbose)) { + wrap_msg( + 'Appending provided aggregate expression data as... spat_unit: "cell_agg" feat_type: "rna" - name: "raw"') + name: "raw"' + ) + } # add expression data to expression slot s4_expr <- createExprObj( name = "raw", @@ -1591,13 +1656,19 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, cosmx_gobject <- set_expression_values(cosmx_gobject, values = s4_expr) # Add spatial locations - if (isTRUE(verbose)) wrap_msg( - 'Appending metadata provided spatial locations data as... + if (isTRUE(verbose)) { + wrap_msg( + 'Appending metadata provided spatial locations data as... --> spat_unit: "cell_agg" name: "raw" - --> spat_unit: "cell" name: "raw_fov"') - if (isTRUE(verbose)) wrap_msg( - 'Polygon centroid derived spatial locations assigned as... - --> spat_unit: "cell" name: "raw" (default)') + --> spat_unit: "cell" name: "raw_fov"' + ) + } + if (isTRUE(verbose)) { + wrap_msg( + 'Polygon centroid derived spatial locations assigned as... + --> spat_unit: "cell" name: "raw" (default)' + ) + } locsObj <- create_spat_locs_obj( name = "raw", @@ -1613,8 +1684,9 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, ) cosmx_gobject <- set_spatial_locations(cosmx_gobject, spatlocs = locsObj) - cosmx_gobject <- set_spatial_locations(cosmx_gobject, - spatlocs = locsObj_fov) + cosmx_gobject <- set_spatial_locations(cosmx_gobject, + spatlocs = locsObj_fov + ) # initialize cell and feat IDs and metadata slots for 'cell_agg' spat_unit agg_cell_ID <- colnames(s4_expr[]) @@ -1658,24 +1730,24 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, #' @title Create 10x Xenium Giotto Object #' @name createGiottoXeniumObject -#' @description Given the path to a Xenium experiment output folder, creates a +#' @description Given the path to a Xenium experiment output folder, creates a #' Giotto object #' @param xenium_dir full path to the exported xenium directory #' @param data_to_use which type(s) of expression data to build the gobject with #' (e.g. default: \strong{'subcellular'}, 'aggregate', or 'all') #' @param load_format files formats from which to load the data. Either `csv` or #' `parquet` currently supported. -#' @param h5_expression (boolean) whether to load cell_feature_matrix from .h5 +#' @param h5_expression (boolean) whether to load cell_feature_matrix from .h5 #' file. Default is \code{TRUE} #' @param h5_gene_ids use gene symbols (default) or ensembl ids for the .h5 gene #' expression matrix -#' @param bounds_to_load vector of boundary information to load +#' @param bounds_to_load vector of boundary information to load #' (e.g. \code{'cell'} #' or \code{'nucleus'} by themselves or \code{c('cell', 'nucleus')} to load both #' at the same time.) -#' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included +#' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included #' as a subcellular transcript detection (default = 20) -#' @param key_list (advanced) list of grep-based keywords to split the +#' @param key_list (advanced) list of grep-based keywords to split the #' subcellular feature detections by feature type. See details #' @inheritParams get10Xmatrix #' @inheritParams GiottoClass::createGiottoObjectSubcellular @@ -1686,20 +1758,20 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, #' Xenium provides info on feature detections that include more than only the #' Gene Expression specific probes. Additional probes for QC are included: #' \emph{blank codeword}, \emph{negative control codeword}, and -#' \emph{negative control probe}. These additional QC probes each occupy and -#' are treated as their own feature types so that they can largely remain +#' \emph{negative control probe}. These additional QC probes each occupy and +#' are treated as their own feature types so that they can largely remain #' independent of the gene expression information. #' #' [\strong{key_list}] #' Related to \code{data_to_use = 'subcellular'} workflow only: -#' Additional QC probe information is in the subcellular feature detections -#' information and must be separated from the gene expression information +#' Additional QC probe information is in the subcellular feature detections +#' information and must be separated from the gene expression information #' during processing. -#' The QC probes have prefixes that allow them to be selected from the rest of +#' The QC probes have prefixes that allow them to be selected from the rest of #' the feature IDs. -#' Giotto uses a named list of keywords (\code{key_list}) to select these QC -#' probes, with the list names being the names that will be assigned as the -#' feature type of these feature detections. The default list is used when +#' Giotto uses a named list of keywords (\code{key_list}) to select these QC +#' probes, with the list names being the names that will be assigned as the +#' feature type of these feature detections. The default list is used when #' \code{key_list} = NULL. #' #' Default list: @@ -1713,30 +1785,33 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, #' map to any of the keys. #' #' @export -createGiottoXeniumObject <- function(xenium_dir, - data_to_use = c("subcellular", "aggregate"), - load_format = "csv", - h5_expression = TRUE, - h5_gene_ids = c("symbols", "ensembl"), - gene_column_index = 1, - bounds_to_load = c("cell"), - qv_threshold = 20, - key_list = NULL, - instructions = NULL, - cores = NA, - verbose = TRUE) { +createGiottoXeniumObject <- function( + xenium_dir, + data_to_use = c("subcellular", "aggregate"), + load_format = "csv", + h5_expression = TRUE, + h5_gene_ids = c("symbols", "ensembl"), + gene_column_index = 1, + bounds_to_load = c("cell"), + qv_threshold = 20, + key_list = NULL, + instructions = NULL, + cores = NA, + verbose = TRUE) { # 0. setup xenium_dir <- path.expand(xenium_dir) # Determine data to load data_to_use <- match.arg( - arg = data_to_use, choices = c("subcellular", "aggregate")) + arg = data_to_use, choices = c("subcellular", "aggregate") + ) # Determine load formats - load_format <- "csv" # TODO Remove this and add as param once other options + load_format <- "csv" # TODO Remove this and add as param once other options # are available load_format <- match.arg( - arg = load_format, choices = c("csv", "parquet", "zarr")) + arg = load_format, choices = c("csv", "parquet", "zarr") + ) # set number of cores automatically, but with limit of 10 cores <- determine_cores(cores) @@ -1837,19 +1912,20 @@ createGiottoXeniumObject <- function(xenium_dir, #' @param data_list list of data loaded by \code{\link{.load_xenium_folder}} #' @param key_list regex-based search keys for feature IDs to allow separation #' into separate giottoPoints objects by feat_type -#' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included +#' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included #' as a subcellular transcript detection (default = 20) #' @inheritParams get10Xmatrix #' @inheritParams GiottoClass::createGiottoObjectSubcellular #' @returns giotto object #' @seealso createGiottoXeniumObject .createGiottoXeniumObject_aggregate #' @keywords internal -.createGiottoXeniumObject_subcellular <- function(data_list, - key_list = NULL, - qv_threshold = 20, - instructions = NULL, - cores = NA, - verbose = TRUE) { +.createGiottoXeniumObject_subcellular <- function( + data_list, + key_list = NULL, + qv_threshold = 20, + instructions = NULL, + cores = NA, + verbose = TRUE) { # data.table vars qv <- NULL @@ -1866,8 +1942,10 @@ createGiottoXeniumObject <- function(xenium_dir, vmsg("> points data prep...", .v = verbose) # filter by qv_threshold - vmsg("> filtering feature detections for Phred score >= ", - qv_threshold, .v = verbose) + vmsg("> filtering feature detections for Phred score >= ", + qv_threshold, + .v = verbose + ) n_before <- tx_dt[, .N] tx_dt_filtered <- tx_dt[qv >= qv_threshold] n_after <- tx_dt_filtered[, .N] @@ -1884,7 +1962,8 @@ createGiottoXeniumObject <- function(xenium_dir, # discover feat_IDs for each feat_type all_IDs <- tx_dt_filtered[, unique(feat_ID)] feat_types_IDs <- lapply( - key_list, function(x) all_IDs[grepl(pattern = x, all_IDs)]) + key_list, function(x) all_IDs[grepl(pattern = x, all_IDs)] + ) rna <- list("rna" = all_IDs[!all_IDs %in% unlist(feat_types_IDs)]) feat_types_IDs <- append(rna, feat_types_IDs) @@ -1937,11 +2016,12 @@ createGiottoXeniumObject <- function(xenium_dir, #' @returns giotto object #' @seealso createGiottoXeniumObject .createGiottoXeniumObject_subcellular #' @keywords internal -.createGiottoXeniumObject_aggregate <- function(data_list, - # include_analysis = FALSE, - instructions = NULL, - cores = NA, - verbose = TRUE) { +.createGiottoXeniumObject_aggregate <- function( + data_list, + # include_analysis = FALSE, + instructions = NULL, + cores = NA, + verbose = TRUE) { # Unpack data_list info feat_meta <- data_list$feat_meta cell_meta <- data_list$cell_meta @@ -2002,10 +2082,11 @@ createGiottoXeniumObject <- function(xenium_dir, #' @describeIn read_data_folder Read a structured MERSCOPE folder #' @keywords internal -.read_merscope_folder <- function(merscope_dir, - data_to_use, - cores = NA, - verbose = NULL) { +.read_merscope_folder <- function( + merscope_dir, + data_to_use, + cores = NA, + verbose = NULL) { # prepare dir_items list dir_items <- list( `boundary info` = "*cell_boundaries*", @@ -2064,12 +2145,14 @@ createGiottoXeniumObject <- function(xenium_dir, #' @returns path_list a list of cosmx files discovered and their filepaths. NULL #' values denote missing items #' @keywords internal -.read_cosmx_folder <- function(cosmx_dir, - verbose = TRUE) { +.read_cosmx_folder <- function( + cosmx_dir, + verbose = TRUE) { ch <- box_chars() - if (is.null(cosmx_dir) | !dir.exists(cosmx_dir)) + if (is.null(cosmx_dir) | !dir.exists(cosmx_dir)) { stop("The full path to a cosmx directory must be given.") + } vmsg("A structured CosMx directory will be used\n", .v = verbose) # find directories (length = 1 if present, length = 0 if missing) @@ -2084,7 +2167,8 @@ createGiottoXeniumObject <- function(xenium_dir, `metadata file` = "*metadata_file*" ) dir_items <- lapply( - dir_items, function(x) Sys.glob(paths = file.path(cosmx_dir, x))) + dir_items, function(x) Sys.glob(paths = file.path(cosmx_dir, x)) + ) dir_items_lengths <- lengths(dir_items) if (isTRUE(verbose)) { @@ -2100,7 +2184,7 @@ createGiottoXeniumObject <- function(xenium_dir, # select first directory in list if multiple are detected if (any(dir_items_lengths > 1)) { - warning("Multiple matches for expected subdirectory item(s).\n + warning("Multiple matches for expected subdirectory item(s).\n First matching item selected") multiples <- which(dir_items_lengths > 1) @@ -2124,12 +2208,13 @@ createGiottoXeniumObject <- function(xenium_dir, #' @keywords internal #' @returns path_list a list of xenium files discovered and their filepaths. NULL #' values denote missing items -.read_xenium_folder <- function(xenium_dir, - data_to_use = "subcellular", - bounds_to_load = c("cell"), - load_format = "csv", - h5_expression = FALSE, - verbose = TRUE) { +.read_xenium_folder <- function( + xenium_dir, + data_to_use = "subcellular", + bounds_to_load = c("cell"), + load_format = "csv", + h5_expression = FALSE, + verbose = TRUE) { # Check needed packages if (load_format == "parquet") { package_check(pkg_name = "arrow", repository = "CRAN") @@ -2145,8 +2230,9 @@ createGiottoXeniumObject <- function(xenium_dir, # 0. test if folder structure exists and is as expected - if (is.null(xenium_dir) | !dir.exists(xenium_dir)) + if (is.null(xenium_dir) | !dir.exists(xenium_dir)) { stop("The full path to a xenium directory must be given.") + } vmsg("A structured Xenium directory will be used\n", .v = verbose) # find items (length = 1 if present, length = 0 if missing) @@ -2162,7 +2248,8 @@ createGiottoXeniumObject <- function(xenium_dir, ) dir_items <- lapply( - dir_items, function(x) Sys.glob(paths = file.path(xenium_dir, x))) + dir_items, function(x) Sys.glob(paths = file.path(xenium_dir, x)) + ) dir_items_lengths <- lengths(dir_items) if (isTRUE(verbose)) { @@ -2172,10 +2259,12 @@ createGiottoXeniumObject <- function(xenium_dir, if (dir_items_lengths[[item]] > 0) { message(ch$s, "> ", item, " found") - for (item_i in seq_along(dir_items[[item]])) { + for (item_i in seq_along(dir_items[[item]])) { # print found item names - subItem <- gsub(pattern = ".*/", replacement = "", - x = dir_items[[item]][[item_i]]) + subItem <- gsub( + pattern = ".*/", replacement = "", + x = dir_items[[item]][[item_i]] + ) message(ch$s, ch$s, ch$l, ch$h, ch$h, subItem) } } else { @@ -2186,24 +2275,30 @@ createGiottoXeniumObject <- function(xenium_dir, if (data_to_use == "subcellular") { # necessary items - if (item %in% c("boundary info", "raw transcript info")) + if (item %in% c("boundary info", "raw transcript info")) { stop(item, " is missing") + } # optional items if (item %in% c( - "image info", "experiment info (.xenium)", - "panel metadata")) + "image info", "experiment info (.xenium)", + "panel metadata" + )) { warning(item, " is missing (optional)") - # items to ignore: analysis info, cell feature matrix, + } + # items to ignore: analysis info, cell feature matrix, # cell metadata } else if (data_to_use == "aggregate") { # necessary items - if (item %in% c("cell feature matrix", "cell metadata")) + if (item %in% c("cell feature matrix", "cell metadata")) { stop(item, " is missing") + } # optional items if (item %in% c( - "image info", "experiment info (.xenium)", - "panel metadata", "analysis info")) + "image info", "experiment info (.xenium)", + "panel metadata", "analysis info" + )) { warning(item, " is missing (optional)") + } # items to ignore: boundary info, raw transcript info } } @@ -2217,45 +2312,55 @@ createGiottoXeniumObject <- function(xenium_dir, # **** transcript info **** tx_path <- NULL tx_path <- dir_items$`raw transcript info`[grepl( - pattern = load_format, dir_items$`raw transcript info`)] + pattern = load_format, dir_items$`raw transcript info` + )] # **** cell metadata **** cell_meta_path <- NULL cell_meta_path <- dir_items$`cell metadata`[grepl( - pattern = load_format, dir_items$`cell metadata`)] + pattern = load_format, dir_items$`cell metadata` + )] # **** boundary info **** # Select bound load format if (load_format != "zarr") { # No zarr available for boundary info dir_items$`boundary info` <- dir_items$`boundary info`[grepl( - pattern = load_format, dir_items$`boundary info`)] + pattern = load_format, dir_items$`boundary info` + )] } else { dir_items$`boundary info` <- dir_items$`boundary info`[grepl( - pattern = "csv", dir_items$`boundary info`)] + pattern = "csv", dir_items$`boundary info` + )] } # Organize bound paths by type of bound (bounds_to_load param) bound_paths <- NULL bound_names <- bounds_to_load bounds_to_load <- as.list(bounds_to_load) - bound_paths <- lapply(bounds_to_load, function(x) dir_items$`boundary info`[ - grepl(pattern = x, dir_items$`boundary info`)]) + bound_paths <- lapply(bounds_to_load, function(x) { + dir_items$`boundary info`[ + grepl(pattern = x, dir_items$`boundary info`) + ] + }) names(bound_paths) <- bound_names # **** aggregated expression info **** agg_expr_path <- NULL if (isTRUE(h5_expression)) { # h5 expression matrix loading is default agg_expr_path <- dir_items$`cell feature matrix`[grepl( - pattern = "h5", dir_items$`cell feature matrix`)] + pattern = "h5", dir_items$`cell feature matrix` + )] } else if (load_format == "zarr") { agg_expr_path <- dir_items$`cell feature matrix`[grepl( - pattern = "zarr", dir_items$`cell feature matrix`)] + pattern = "zarr", dir_items$`cell feature matrix` + )] } else { # No parquet for aggregated expression - default to normal 10x loading agg_expr_path <- dir_items$`cell feature matrix`[sapply( - dir_items$`cell feature matrix`, function(x) file_test(op = "-d", x))] + dir_items$`cell feature matrix`, function(x) file_test(op = "-d", x) + )] if (length(agg_expr_path) == 0) { stop(wrap_txt( "Expression matrix cannot be loaded.\n - Has cell_feature_matrix(.tar.gz) been unpacked into a + Has cell_feature_matrix(.tar.gz) been unpacked into a directory?" )) } @@ -2300,7 +2405,7 @@ createGiottoXeniumObject <- function(xenium_dir, #' @title Load MERSCOPE data from folder #' @name load_merscope_folder -#' @param dir_items list of full filepaths from +#' @param dir_items list of full filepaths from #' \code{\link{.read_merscope_folder}} #' @inheritParams createGiottoMerscopeObject #' @returns list of loaded-in MERSCOPE data @@ -2308,12 +2413,13 @@ NULL #' @rdname load_merscope_folder #' @keywords internal -.load_merscope_folder <- function(dir_items, - data_to_use, - fovs = NULL, - poly_z_indices = 1L:7L, - cores = NA, - verbose = TRUE) { +.load_merscope_folder <- function( + dir_items, + data_to_use, + fovs = NULL, + poly_z_indices = 1L:7L, + cores = NA, + verbose = TRUE) { # 1. load data_to_use-specific if (data_to_use == "subcellular") { data_list <- .load_merscope_folder_subcellular( @@ -2332,17 +2438,22 @@ NULL verbose = verbose ) } else { - stop(wrap_txt('data_to_use "', data_to_use, - '" not implemented', sep = "")) + stop(wrap_txt('data_to_use "', data_to_use, + '" not implemented', + sep = "" + )) } # 2. Load images if available if (!is.null(dir_items$`image info`)) { ## micron to px scaling factor micronToPixelScale <- Sys.glob(paths = file.path( - dir_items$`image info`, "*micron_to_mosaic_pixel_transform*"))[[1]] + dir_items$`image info`, "*micron_to_mosaic_pixel_transform*" + ))[[1]] micronToPixelScale <- data.table::fread( - micronToPixelScale, nThread = cores) + micronToPixelScale, + nThread = cores + ) # add to data_list data_list$micronToPixelScale <- micronToPixelScale @@ -2350,14 +2461,17 @@ NULL ## determine types of stains images_filenames <- list.files(dir_items$`image info`) bound_stains_filenames <- images_filenames[ - grep(pattern = ".tif", images_filenames)] + grep(pattern = ".tif", images_filenames) + ] bound_stains_types <- sapply(strsplit( - bound_stains_filenames, "_"), `[`, 2) + bound_stains_filenames, "_" + ), `[`, 2) bound_stains_types <- unique(bound_stains_types) img_list <- lapply_flex(bound_stains_types, function(stype) { img_paths <- Sys.glob(paths = file.path( - dir_items$`image info`, paste0("*", stype, "*"))) + dir_items$`image info`, paste0("*", stype, "*") + )) lapply_flex(img_paths, function(img) { createGiottoLargeImage(raster_object = img) @@ -2376,16 +2490,19 @@ NULL #' @describeIn load_merscope_folder Load items for 'subcellular' workflow #' @keywords internal -.load_merscope_folder_subcellular <- function(dir_items, - data_to_use, - cores = NA, - poly_z_indices = 1L:7L, - verbose = TRUE, - fovs = NULL) { +.load_merscope_folder_subcellular <- function( + dir_items, + data_to_use, + cores = NA, + poly_z_indices = 1L:7L, + verbose = TRUE, + fovs = NULL) { if (isTRUE(verbose)) message("Loading transcript level info...") if (is.null(fovs)) { tx_dt <- data.table::fread( - dir_items$`raw transcript info`, nThread = cores) + dir_items$`raw transcript info`, + nThread = cores + ) } else { message("Selecting FOV subset transcripts") tx_dt <- fread_colmatch( @@ -2398,7 +2515,8 @@ NULL } tx_dt[, c("x", "y") := NULL] # remove unneeded cols data.table::setcolorder( - tx_dt, c("gene", "global_x", "global_y", "global_z")) + tx_dt, c("gene", "global_x", "global_y", "global_z") + ) if (isTRUE(verbose)) message("Loading polygon info...") poly_info <- readPolygonFilesVizgenHDF5( @@ -2422,18 +2540,23 @@ NULL #' @describeIn load_merscope_folder Load items for 'aggregate' workflow #' @keywords internal -.load_merscope_folder_aggregate <- function(dir_items, - data_to_use, - cores = NA, - verbose = TRUE) { +.load_merscope_folder_aggregate <- function( + dir_items, + data_to_use, + cores = NA, + verbose = TRUE) { # metadata is polygon-related measurements vmsg("Loading cell metadata...", .v = verbose) cell_metadata_file <- data.table::fread( - dir_items$`cell metadata`, nThread = cores) + dir_items$`cell metadata`, + nThread = cores + ) vmsg("Loading expression matrix", .v = verbose) expr_dt <- data.table::fread( - dir_items$`cell feature matrix`, nThread = cores) + dir_items$`cell feature matrix`, + nThread = cores + ) data_list <- list( @@ -2457,15 +2580,16 @@ NULL #' @title Load CosMx folder subcellular info #' @name .load_cosmx_folder_subcellular #' @description loads in the feature detections information. Note that the mask -#' images are still required for a working subcellular object, and those are +#' images are still required for a working subcellular object, and those are #' loaded in \code{\link{.createGiottoCosMxObject_subcellular}} #' @inheritParams createGiottoCosMxObject #' @returns list #' @keywords internal -.load_cosmx_folder_subcellular <- function(dir_items, - FOVs = NULL, - cores, - verbose = TRUE) { +.load_cosmx_folder_subcellular <- function( + dir_items, + FOVs = NULL, + cores, + verbose = TRUE) { vmsg(.v = verbose, "Loading subcellular information...") # subcellular checks @@ -2479,7 +2603,8 @@ NULL # FOVs to load vmsg(.v = verbose, "Loading FOV offsets...") fov_offset_file <- fread( - input = dir_items$`fov positions file`, nThread = cores) + input = dir_items$`fov positions file`, nThread = cores + ) if (is.null(FOVs)) FOVs <- fov_offset_file$fov # default to ALL FOVs FOV_ID <- as.list(sprintf("%03d", FOVs)) @@ -2487,7 +2612,8 @@ NULL vmsg(.v = verbose, "Loading transcript level info...") tx_coord_all <- fread( - input = dir_items$`transcript locations file`, nThread = cores) + input = dir_items$`transcript locations file`, nThread = cores + ) vmsg(.v = verbose, "Subcellular load done") data_list <- list( @@ -2506,11 +2632,12 @@ NULL #' @inheritParams createGiottoCosMxObject #' @returns list #' @keywords internal -.load_cosmx_folder_aggregate <- function(dir_items, - cores, - verbose = TRUE) { +.load_cosmx_folder_aggregate <- function( + dir_items, + cores, + verbose = TRUE) { # data.table vars - fov <- cell_ID <- fov_cell_ID <- CenterX_global_px <- + fov <- cell_ID <- fov_cell_ID <- CenterX_global_px <- CenterY_global_px <- CenterX_local_px <- CenterY_local_px <- x_shift <- y_shift <- NULL @@ -2518,15 +2645,18 @@ NULL vmsg(.v = verbose, "Loading provided aggregated information...") # aggregate checks - if (!file.exists(dir_items$`expression matrix file`)) + if (!file.exists(dir_items$`expression matrix file`)) { stop(wrap_txt("No expression matrix file (.csv) detected")) - if (!file.exists(dir_items$`metadata file`)) - stop(wrap_txt("No metadata file (.csv) detected. Needed for cell + } + if (!file.exists(dir_items$`metadata file`)) { + stop(wrap_txt("No metadata file (.csv) detected. Needed for cell spatial locations.")) + } # read in aggregate data expr_mat <- fread( - input = dir_items$`expression matrix file`, nThread = cores) + input = dir_items$`expression matrix file`, nThread = cores + ) metadata <- fread(input = dir_items$`metadata file`, nThread = cores) # setorder expression and spatlocs @@ -2536,12 +2666,14 @@ NULL # generate unique cell IDs expr_mat[, cell_ID := paste0( - "fov", sprintf("%03d", fov), "-", "cell_", cell_ID)] + "fov", sprintf("%03d", fov), "-", "cell_", cell_ID + )] expr_mat <- expr_mat[, fov := NULL] metadata[, fov_cell_ID := cell_ID] metadata[, cell_ID := paste0( - "fov", sprintf("%03d", fov), "-", "cell_", cell_ID)] + "fov", sprintf("%03d", fov), "-", "cell_", cell_ID + )] # reorder data.table::setcolorder(x = metadata, c("cell_ID", "fov", "fov_cell_ID")) @@ -2562,11 +2694,15 @@ NULL spatloc_newnames <- c("sdimx", "sdimy", "cell_ID") data.table::setnames(spatlocs, old = spatloc_oldnames, new = spatloc_newnames) data.table::setnames( - spatlocs_fov, old = spatloc_oldnames_fov, new = spatloc_newnames) + spatlocs_fov, + old = spatloc_oldnames_fov, new = spatloc_newnames + ) # cleanup metadata and spatlocs - metadata <- metadata[, c("CenterX_global_px", "CenterY_global_px", - "CenterX_local_px", "CenterY_local_px") := NULL] + metadata <- metadata[, c( + "CenterX_global_px", "CenterY_global_px", + "CenterX_local_px", "CenterY_local_px" + ) := NULL] # find unique cell_IDs present in both expression and metadata giotto_cell_ID <- unique(intersect(expr_mat$cell_ID, metadata$cell_ID)) @@ -2579,27 +2715,35 @@ NULL # take all mean intensity protein information except for MembraneStain and DAPI protein_meta_cols <- colnames(metadata) protein_meta_cols <- protein_meta_cols[ - grepl(pattern = "Mean.*", x = protein_meta_cols)] + grepl(pattern = "Mean.*", x = protein_meta_cols) + ] protein_meta_cols <- protein_meta_cols[ - !protein_meta_cols %in% c("Mean.MembraneStain", "Mean.DAPI")] + !protein_meta_cols %in% c("Mean.MembraneStain", "Mean.DAPI") + ] protein_meta_cols <- c("cell_ID", protein_meta_cols) prot_expr <- metadata[, protein_meta_cols, with = FALSE] prot_cell_ID <- metadata[, cell_ID] - protM <- Matrix::Matrix(as.matrix(prot_expr[, -1]), - dimnames = list(prot_expr[[1]], - colnames(prot_expr[, -1])), - sparse = FALSE) + protM <- Matrix::Matrix(as.matrix(prot_expr[, -1]), + dimnames = list( + prot_expr[[1]], + colnames(prot_expr[, -1]) + ), + sparse = FALSE + ) protM <- t_flex(protM) # convert expression to sparse matrix - spM <- Matrix::Matrix(as.matrix(expr_mat[, -1]), - dimnames = list(expr_mat[[1]], - colnames(expr_mat[, -1])), - sparse = TRUE) + spM <- Matrix::Matrix(as.matrix(expr_mat[, -1]), + dimnames = list( + expr_mat[[1]], + colnames(expr_mat[, -1]) + ), + sparse = TRUE + ) spM <- t_flex(spM) - ## Ready for downstream aggregate gobject creation or appending into + ## Ready for downstream aggregate gobject creation or appending into # existing subcellular Giotto object ## data_list <- list( @@ -2631,14 +2775,15 @@ NULL #' @rdname load_xenium_folder #' @keywords internal -.load_xenium_folder <- function(path_list, - load_format = "csv", - data_to_use = "subcellular", - h5_expression = "FALSE", - h5_gene_ids = "symbols", - gene_column_index = 1, - cores, - verbose = TRUE) { +.load_xenium_folder <- function( + path_list, + load_format = "csv", + data_to_use = "subcellular", + h5_expression = "FALSE", + h5_gene_ids = "symbols", + gene_column_index = 1, + cores, + verbose = TRUE) { data_list <- switch(load_format, "csv" = .load_xenium_folder_csv( path_list = path_list, @@ -2667,13 +2812,14 @@ NULL #' @describeIn load_xenium_folder Load from csv files #' @keywords internal -.load_xenium_folder_csv <- function(path_list, - cores, - data_to_use = "subcellular", - h5_expression = FALSE, - h5_gene_ids = "symbols", - gene_column_index = 1, - verbose = TRUE) { +.load_xenium_folder_csv <- function( + path_list, + cores, + data_to_use = "subcellular", + h5_expression = FALSE, + h5_gene_ids = "symbols", + gene_column_index = 1, + verbose = TRUE) { # initialize return vars feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL @@ -2682,8 +2828,10 @@ NULL fdata_path <- path_list$panel_meta_path[[1]] fdata_ext <- GiottoUtils::file_extension(fdata_path) if ("json" %in% fdata_ext) { - feat_meta <- .load_xenium_panel_json(path = fdata_path, - gene_ids = h5_gene_ids) + feat_meta <- .load_xenium_panel_json( + path = fdata_path, + gene_ids = h5_gene_ids + ) } else { feat_meta <- data.table::fread(fdata_path, nThread = cores) colnames(feat_meta)[[1]] <- "feat_ID" @@ -2715,7 +2863,9 @@ NULL } colnames(features_dt) <- c("id", "feat_ID", "feat_class") feat_meta <- merge( - features_dt[, c(2, 3)], feat_meta, all.x = TRUE, by = "feat_ID") + features_dt[, c(2, 3)], feat_meta, + all.x = TRUE, by = "feat_ID" + ) GiottoUtils::vmsg("Loading transcript level info...", .v = verbose) tx_dt <- data.table::fread(path_list$tx_path[[1]], nThread = cores) @@ -2735,7 +2885,9 @@ NULL # **** aggregate info **** GiottoUtils::vmsg("loading cell metadata...", .v = verbose) cell_meta <- data.table::fread( - path_list$cell_meta_path[[1]], nThread = cores) + path_list$cell_meta_path[[1]], + nThread = cores + ) if (data_to_use == "aggregate") { GiottoUtils::vmsg("Loading aggregated expression...", .v = verbose) @@ -2772,13 +2924,14 @@ NULL #' @describeIn load_xenium_folder Load from parquet files #' @keywords internal -.load_xenium_folder_parquet <- function(path_list, - cores, - data_to_use = "subcellular", - h5_expression = FALSE, - h5_gene_ids = "symbols", - gene_column_index = 1, - verbose = TRUE) { +.load_xenium_folder_parquet <- function( + path_list, + cores, + data_to_use = "subcellular", + h5_expression = FALSE, + h5_gene_ids = "symbols", + gene_column_index = 1, + verbose = TRUE) { # initialize return vars feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL # dplyr variable @@ -2790,7 +2943,8 @@ NULL fdata_ext <- GiottoUtils::file_extension(fdata_path) if ("json" %in% fdata_ext) { feat_meta <- .load_xenium_panel_json( - path = fdata_path, gene_ids = h5_gene_ids) + path = fdata_path, gene_ids = h5_gene_ids + ) } else { feat_meta <- data.table::fread(fdata_path, nThread = cores) colnames(feat_meta)[[1]] <- "feat_ID" @@ -2818,15 +2972,18 @@ NULL h5$close_all() }) } else { - features_dt <- arrow::read_tsv_arrow(paste0( - path_list$agg_expr_path, "/features.tsv.gz"), + features_dt <- arrow::read_tsv_arrow( + paste0( + path_list$agg_expr_path, "/features.tsv.gz" + ), col_names = FALSE ) %>% data.table::setDT() } colnames(features_dt) <- c("id", "feat_ID", "feat_class") feat_meta <- merge(features_dt[ - , c(2, 3)], feat_meta, all.x = TRUE, by = "feat_ID") + , c(2, 3) + ], feat_meta, all.x = TRUE, by = "feat_ID") vmsg("Loading transcript level info...", .v = verbose) tx_dt <- arrow::read_parquet( @@ -2834,10 +2991,12 @@ NULL as_data_frame = FALSE ) %>% dplyr::mutate( - transcript_id = cast(transcript_id, arrow::string())) %>% + transcript_id = cast(transcript_id, arrow::string()) + ) %>% dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% dplyr::mutate( - feature_name = cast(feature_name, arrow::string())) %>% + feature_name = cast(feature_name, arrow::string()) + ) %>% as.data.frame() %>% data.table::setDT() data.table::setnames( @@ -2939,47 +3098,48 @@ NULL #' (ii) fragment files, or (iii) bam files. #' @param genome A string indicating the default genome to be used for all ArchR #' functions. Currently supported values include "hg19","hg38","mm9", and "mm10". -#' This value is stored as a global environment variable, not part of the +#' This value is stored as a global environment variable, not part of the #' ArchRProject. #' This can be overwritten on a per-function basis using the given function's #' geneAnnotationand genomeAnnotation parameter. For something other than one of -#' the currently supported, see createGeneAnnnotation() and +#' the currently supported, see createGeneAnnnotation() and #' createGenomeAnnnotation() -#' @param createArrowFiles_params list of parameters passed to +#' @param createArrowFiles_params list of parameters passed to #' `ArchR::createArrowFiles` #' @param ArchRProject_params list of parameters passed to `ArchR::ArchRProject` -#' @param addIterativeLSI_params list of parameters passed to +#' @param addIterativeLSI_params list of parameters passed to #' `ArchR::addIterativeLSI` #' @param threads number of threads to use. Default = `ArchR::getArchRThreads()` #' @param force Default = FALSE #' @param verbose Default = TRUE #' -#' @returns An ArchR project with GeneScoreMatrix, TileMatrix, and +#' @returns An ArchR project with GeneScoreMatrix, TileMatrix, and #' TileMatrix-based LSI #' @export -createArchRProj <- function(fragmentsPath, - genome = c("hg19", "hg38", "mm9", "mm10"), - createArrowFiles_params = list( - sampleNames = "sample1", - minTSS = 0, - minFrags = 0, - maxFrags = 1e+07, - minFragSize = 10, - maxFragSize = 2000, - offsetPlus = 0, - offsetMinus = 0, - TileMatParams = list(tileSize = 5000) - ), - ArchRProject_params = list( - outputDirectory = getwd(), - copyArrows = FALSE - ), - addIterativeLSI_params = list(), - threads = ArchR::getArchRThreads(), - force = FALSE, - verbose = TRUE) { +createArchRProj <- function( + fragmentsPath, + genome = c("hg19", "hg38", "mm9", "mm10"), + createArrowFiles_params = list( + sampleNames = "sample1", + minTSS = 0, + minFrags = 0, + maxFrags = 1e+07, + minFragSize = 10, + maxFragSize = 2000, + offsetPlus = 0, + offsetMinus = 0, + TileMatParams = list(tileSize = 5000) + ), + ArchRProject_params = list( + outputDirectory = getwd(), + copyArrows = FALSE + ), + addIterativeLSI_params = list(), + threads = ArchR::getArchRThreads(), + force = FALSE, + verbose = TRUE) { if (!requireNamespace("ArchR")) { - message('ArchR is needed. Install the package using + message('ArchR is needed. Install the package using remotes::install_github("GreenleafLab/ArchR")') } @@ -3029,27 +3189,32 @@ createArchRProj <- function(fragmentsPath, #' @param archRproj ArchR project #' @param expression expression information #' @param expression_feat Giotto object available features (e.g. atac, rna, ...) -#' @param spatial_locs data.table or data.frame with coordinates for cell +#' @param spatial_locs data.table or data.frame with coordinates for cell #' centroids -#' @param sampleNames A character vector containing the ArchR project sample +#' @param sampleNames A character vector containing the ArchR project sample #' name #' @param ... additional arguments passed to `createGiottoObject` #' #' @returns A Giotto object with at least an atac or epigenetic modality #' #' @export -createGiottoObjectfromArchR <- function(archRproj, - expression = NULL, - expression_feat = "atac", - spatial_locs = NULL, - sampleNames = "sample1", - ...) { +createGiottoObjectfromArchR <- function( + archRproj, + expression = NULL, + expression_feat = "atac", + spatial_locs = NULL, + sampleNames = "sample1", + ...) { # extract GeneScoreMatrix GeneScoreMatrix_summarizedExperiment <- ArchR::getMatrixFromProject( - archRproj) - GeneScoreMatrix <- slot(slot( - GeneScoreMatrix_summarizedExperiment, "assays"), - "data")[["GeneScoreMatrix"]] + archRproj + ) + GeneScoreMatrix <- slot( + slot( + GeneScoreMatrix_summarizedExperiment, "assays" + ), + "data" + )[["GeneScoreMatrix"]] ## get cell names cell_names <- colnames(GeneScoreMatrix) @@ -3057,8 +3222,10 @@ createGiottoObjectfromArchR <- function(archRproj, cell_names <- gsub("-1", "", cell_names) ## get gene names - gene_names <- slot(GeneScoreMatrix_summarizedExperiment, - "elementMetadata")[["name"]] + gene_names <- slot( + GeneScoreMatrix_summarizedExperiment, + "elementMetadata" + )[["name"]] ## replace colnames with cell names colnames(GeneScoreMatrix) <- cell_names diff --git a/R/cross_section.R b/R/cross_section.R index f1fac6960..ff93ccaf2 100644 --- a/R/cross_section.R +++ b/R/cross_section.R @@ -8,43 +8,44 @@ #' @description create a crossSection object #' @param name name of cross section object. (default = cross_section) #' @param method method to define the cross section plane. -#' @param thickness_unit unit of the virtual section thickness. If "cell", -#' average size of the observed cells is used as length unit. If "natural", +#' @param thickness_unit unit of the virtual section thickness. If "cell", +#' average size of the observed cells is used as length unit. If "natural", #' the unit of cell location coordinates is used.(default = cell) #' @param slice_thickness thickness of slice -#' @param cell_distance_estimate_method method to estimate average distance +#' @param cell_distance_estimate_method method to estimate average distance #' between neighboring cells. (default = mean) -#' @param extend_ratio deciding the span of the cross section meshgrid, as a -#' ratio of extension compared to the borders of the virtual tissue section. +#' @param extend_ratio deciding the span of the cross section meshgrid, as a +#' ratio of extension compared to the borders of the virtual tissue section. #' (default = 0.2) -#' @param plane_equation a numerical vector of length 4, in the form of +#' @param plane_equation a numerical vector of length 4, in the form of #' c(A,B,C,D), which defines plane Ax+By+Cz=D. -#' @param mesh_grid_n number of meshgrid lines to generate along both +#' @param mesh_grid_n number of meshgrid lines to generate along both #' directions for the cross section plane. #' @param mesh_obj object that stores the cross section meshgrid information. #' @param cell_subset cells selected by the cross section -#' @param cell_subset_spatial_locations locations of cells selected by the +#' @param cell_subset_spatial_locations locations of cells selected by the #' cross section -#' @param cell_subset_projection_locations 3D projection coordinates of +#' @param cell_subset_projection_locations 3D projection coordinates of #' selected cells onto the cross section plane #' @param cell_subset_projection_PCA pca of projection coordinates -#' @param cell_subset_projection_coords 2D PCA coordinates of selected cells +#' @param cell_subset_projection_coords 2D PCA coordinates of selected cells #' in the cross section plane #' @returns crossSection object -create_crossSection_object <- function(name = NULL, - method = NULL, - thickness_unit = NULL, - slice_thickness = NULL, - cell_distance_estimate_method = NULL, - extend_ratio = NULL, - plane_equation = NULL, - mesh_grid_n = NULL, - mesh_obj = NULL, - cell_subset = NULL, - cell_subset_spatial_locations = NULL, - cell_subset_projection_locations = NULL, - cell_subset_projection_PCA = NULL, - cell_subset_projection_coords = NULL) { +create_crossSection_object <- function( + name = NULL, + method = NULL, + thickness_unit = NULL, + slice_thickness = NULL, + cell_distance_estimate_method = NULL, + extend_ratio = NULL, + plane_equation = NULL, + mesh_grid_n = NULL, + mesh_obj = NULL, + cell_subset = NULL, + cell_subset_spatial_locations = NULL, + cell_subset_projection_locations = NULL, + cell_subset_projection_PCA = NULL, + cell_subset_projection_coords = NULL) { crossSection_obj <- list( "method" = method, "thickness_unit" = thickness_unit, @@ -64,42 +65,58 @@ create_crossSection_object <- function(name = NULL, #' @name read_crossSection #' @description read a cross section object from a giotto object #' @param gobject gobject +#' @param spat_unit spatial unit #' @param name name #' @param spatial_network_name spatial_network_name #' @returns crossSectionObjects #' @keywords internal -read_crossSection <- function(gobject, - name = NULL, - spatial_network_name = NULL) { +read_crossSection <- function( + gobject, + spat_unit = NULL, + name = NULL, + spatial_network_name = NULL) { + spat_unit <- set_default_spat_unit( + gobject = gobject, spat_unit = spat_unit + ) + if (is.null(spatial_network_name)) { stop("spatial_network_name is not specified.") - } else if (!is.element( - spatial_network_name, names(slot(gobject, "spatial_network")))) { - stop(paste0(spatial_network_name, " has not been created.")) - } else { - sp_network_obj <- get_spatialNetwork(gobject, - name = spatial_network_name, - output = "spatialNetworkObj" + } + + sn <- getSpatialNetwork( + gobject = gobject, + spat_unit = spat_unit, + name = spatial_network_name, + set_defaults = FALSE, + copy_obj = FALSE, + verbose = FALSE, + output = "spatialNetworkObj" + ) + + cs_list <- slot(sn, "crossSectionObjects") + + if (length(cs_list) == 0L) { + stop("No cross section object has been created.") + } + + if (is.null(name)) { + name <- names(cs_list)[length(cs_list)] + + default_name_msg <- sprintf( + "cross section object is not specified, \n%s \n'%s'", + "reading the last one from the existing list:", name ) - if (length(slot(sp_network_obj, "crossSectionObjects")) == 0) { - stop("No cross section object has been created.") - } else if (is.null(name)) { - sprintf( - "cross section object is not specified, reading the last one %s - from the existing list", - names(slot(sp_network_obj, "crossSectionObjects"))[ - length(slot(sp_network_obj, "crossSectionObjects"))] - ) - crossSection_obj <- slot(sp_network_obj, "crossSectionObjects")[[ - length(slot(sp_network_obj, "crossSectionObjects"))]] - } else if (!is.element(name, names(slot( - sp_network_obj, "crossSectionObjects")))) { - stop(paste0(name, " has not been created.")) - } else { - crossSection_obj <- slot( - sp_network_obj, "crossSectionObjects")[[name]] - } } + + if (!name %in% names(cs_list)) { + stop(sprintf( + "crossSectionObject '%s' has not been created.", + name + )) + } + + crossSection_obj <- cs_list[[name]] + return(crossSection_obj) } @@ -109,49 +126,69 @@ read_crossSection <- function(gobject, #' @name estimateCellCellDistance #' @description estimate average distance between neighboring cells #' @param gobject gobject +#' @param spat_unit spatial unit #' @param spatial_network_name spatial_network_name #' @param method method #' @returns matrix #' @keywords internal -estimateCellCellDistance <- function(gobject, - spatial_network_name = "Delaunay_network", - method = c("mean", "median")) { - delaunay_network_DT <- gobject@spatial_network[["thickness_unit"]][[ - spatial_network_name]]@networkDT +estimateCellCellDistance <- function( + gobject, + spat_unit = NULL, + spatial_network_name = "Delaunay_network", + method = c("mean", "median")) { + spat_unit <- set_default_spat_unit( + gobject = gobject, spat_unit = spat_unit + ) + + net <- getSpatialNetwork( + gobject = gobject, + spat_unit = spat_unit, + name = spatial_network_name, + output = "networkDT" + ) CellCellDistance <- get_distance( - networkDT = delaunay_network_DT, + networkDT = net, method = method ) + return(CellCellDistance) } #' @title get_sectionThickness #' @name get_sectionThickness #' @description get section thickness #' @param gobject gobject +#' @param spat_unit spatial unit #' @param thickness_unit thickness_unit #' @param spatial_network_name spatial_network_name #' @param cell_distance_estimate_method cell_distance_estimate_method #' @param plane_equation plane_equation #' @returns numeric #' @keywords internal -get_sectionThickness <- function(gobject, thickness_unit = c("cell", "natural"), - slice_thickness = 2, - spatial_network_name = "Delaunay_network", - cell_distance_estimate_method = c("mean", "median"), - plane_equation = NULL) { +get_sectionThickness <- function( + gobject, + spat_unit = NULL, + thickness_unit = c("cell", "natural"), + slice_thickness = 2, + spatial_network_name = "Delaunay_network", + cell_distance_estimate_method = c("mean", "median"), + plane_equation = NULL) { thickness_unit <- match.arg(thickness_unit, c("cell", "natural")) - if (thickness_unit == "cell") { - CellCellDistance <- estimateCellCellDistance(gobject, - method = cell_distance_estimate_method, - spatial_network_name = spatial_network_name - ) - sectionThickness <- CellCellDistance * slice_thickness - } else if (thickness_unit == "natural") { - sectionThickness <- slice_thickness - } - return(sectionThickness) + section_thickness <- switch(thickness_unit, + "cell" = { + CellCellDistance <- estimateCellCellDistance( + gobject = gobject, + spat_unit = spat_unit, + method = cell_distance_estimate_method, + spatial_network_name = spatial_network_name + ) + CellCellDistance * slice_thickness + }, + "natural" = slice_thickness + ) + + return(section_thickness) } #' @title projection_fun @@ -182,7 +219,7 @@ projection_fun <- function(point_to_project, plane_point, plane_norm) { #' @title adapt_aspect_ratio #' @name adapt_aspect_ratio -#' @description adapt the aspact ratio after inserting cross section mesh grid +#' @description adapt the aspact ratio after inserting cross section mesh grid #' lines #' @param current_ratio current_ratio #' @param cell_locations cell_locations @@ -192,19 +229,23 @@ projection_fun <- function(point_to_project, plane_point, plane_norm) { #' @param mesh_obj mesh_obj #' @returns numeric #' @keywords internal -adapt_aspect_ratio <- function(current_ratio, cell_locations, - sdimx = NULL, sdimy = NULL, sdimz = NULL, - mesh_obj = NULL) { +adapt_aspect_ratio <- function( + current_ratio, cell_locations, + sdimx = NULL, sdimy = NULL, sdimz = NULL, + mesh_obj = NULL) { x_range <- max(cell_locations[[sdimx]]) - min(cell_locations[[sdimx]]) y_range <- max(cell_locations[[sdimy]]) - min(cell_locations[[sdimy]]) z_range <- max(cell_locations[[sdimz]]) - min(cell_locations[[sdimz]]) x_mesh_range <- max(mesh_obj$mesh_grid_lines$mesh_grid_lines_X) - min( - mesh_obj$mesh_grid_lines$mesh_grid_lines_X) + mesh_obj$mesh_grid_lines$mesh_grid_lines_X + ) y_mesh_range <- max(mesh_obj$mesh_grid_lines$mesh_grid_lines_Y) - min( - mesh_obj$mesh_grid_lines$mesh_grid_lines_Y) + mesh_obj$mesh_grid_lines$mesh_grid_lines_Y + ) z_mesh_range <- max(mesh_obj$mesh_grid_lines$mesh_grid_lines_Z) - min( - mesh_obj$mesh_grid_lines$mesh_grid_lines_Z) + mesh_obj$mesh_grid_lines$mesh_grid_lines_Z + ) if (x_mesh_range > x_range) { x_adapt <- x_mesh_range / x_range @@ -223,7 +264,8 @@ adapt_aspect_ratio <- function(current_ratio, cell_locations, } new_ratio <- as.numeric(current_ratio) * c( - as.numeric(x_adapt), as.numeric(y_adapt), as.numeric(z_adapt)) + as.numeric(x_adapt), as.numeric(y_adapt), as.numeric(z_adapt) + ) new_ratio <- new_ratio / min(new_ratio) return(new_ratio) } @@ -278,8 +320,7 @@ find_x_y_ranges <- function(data, extend_ratio) { #' @param mesh_grid_n mesh_grid_n #' @returns 2d mesh grid line object #' @keywords internal -create_2d_mesh_grid_line_obj <- function( - x_min, x_max, y_min, y_max, mesh_grid_n) { +create_2d_mesh_grid_line_obj <- function(x_min, x_max, y_min, y_max, mesh_grid_n) { x_grid <- seq(x_min, x_max, length.out = mesh_grid_n) y_grid <- seq(y_min, y_max, length.out = mesh_grid_n) @@ -333,9 +374,13 @@ reshape_to_data_point <- function(mesh_grid_obj) { reshape_to_mesh_grid_obj <- function(data_points, mesh_grid_n) { if (dim(data_points)[2] == 2) { mesh_grid_lines_X <- matrix( - data_points[, 1], nrow = mesh_grid_n, byrow = FALSE) + data_points[, 1], + nrow = mesh_grid_n, byrow = FALSE + ) mesh_grid_lines_Y <- matrix( - data_points[, 2], nrow = mesh_grid_n, byrow = FALSE) + data_points[, 2], + nrow = mesh_grid_n, byrow = FALSE + ) mesh_grid_obj <- list( "mesh_grid_lines_X" = mesh_grid_lines_X, @@ -343,11 +388,17 @@ reshape_to_mesh_grid_obj <- function(data_points, mesh_grid_n) { ) } else if (dim(data_points)[2] == 3) { mesh_grid_lines_X <- matrix( - data_points[, 1], nrow = mesh_grid_n, byrow = FALSE) + data_points[, 1], + nrow = mesh_grid_n, byrow = FALSE + ) mesh_grid_lines_Y <- matrix( - data_points[, 2], nrow = mesh_grid_n, byrow = FALSE) + data_points[, 2], + nrow = mesh_grid_n, byrow = FALSE + ) mesh_grid_lines_Z <- matrix( - data_points[, 3], nrow = mesh_grid_n, byrow = FALSE) + data_points[, 3], + nrow = mesh_grid_n, byrow = FALSE + ) mesh_grid_obj <- list( "mesh_grid_lines_X" = mesh_grid_lines_X, "mesh_grid_lines_Y" = mesh_grid_lines_Y, @@ -367,17 +418,19 @@ reshape_to_mesh_grid_obj <- function(data_points, mesh_grid_n) { #' @param mesh_grid_n mesh_grid_n #' @returns 3d mesh #' @keywords internal -transform_2d_mesh_to_3d_mesh <- function( - mesh_line_obj_2d, pca_out, center_vec, mesh_grid_n) { +transform_2d_mesh_to_3d_mesh <- function(mesh_line_obj_2d, pca_out, center_vec, mesh_grid_n) { data_point_2d <- reshape_to_data_point(mesh_line_obj_2d) center_mat <- matrix( - rep(center_vec, dim(data_point_2d)[1]), - nrow = dim(data_point_2d)[1], byrow = TRUE) + rep(center_vec, dim(data_point_2d)[1]), + nrow = dim(data_point_2d)[1], byrow = TRUE + ) data_point_3d <- cbind( - data_point_2d, - rep(0, dim(data_point_2d)[1])) %*% t((pca_out$rotation)) + center_mat + data_point_2d, + rep(0, dim(data_point_2d)[1]) + ) %*% t((pca_out$rotation)) + center_mat mesh_grid_line_obj_3d <- reshape_to_mesh_grid_obj( - data_point_3d, mesh_grid_n) + data_point_3d, mesh_grid_n + ) return(mesh_grid_line_obj_3d) } @@ -390,10 +443,12 @@ transform_2d_mesh_to_3d_mesh <- function( #' @keywords internal get_cross_section_coordinates <- function(cell_subset_projection_locations) { cell_subset_projection_PCA <- stats::prcomp( - cell_subset_projection_locations) + cell_subset_projection_locations + ) cell_subset_projection_coords <- cell_subset_projection_PCA$x[ - , c("PC1", "PC2")] + , c("PC1", "PC2") + ] return(cell_subset_projection_coords) } @@ -406,13 +461,14 @@ get_cross_section_coordinates <- function(cell_subset_projection_locations) { #' @param mesh_grid_n mesh_grid_n #' @returns mesh grid lines #' @keywords internal -create_mesh_grid_lines <- function( - cell_subset_projection_locations, extend_ratio, mesh_grid_n) { +create_mesh_grid_lines <- function(cell_subset_projection_locations, extend_ratio, mesh_grid_n) { cell_subset_projection_PCA <- stats::prcomp( - cell_subset_projection_locations) + cell_subset_projection_locations + ) cell_subset_projection_coords <- cell_subset_projection_PCA$x[ - , c("PC1", "PC2")] + , c("PC1", "PC2") + ] x_y_ranges <- find_x_y_ranges(cell_subset_projection_coords, extend_ratio) @@ -424,7 +480,8 @@ create_mesh_grid_lines <- function( mesh_grid_n ) center_vec <- apply( - cell_subset_projection_locations, 2, function(x) mean(x)) + cell_subset_projection_locations, 2, function(x) mean(x) + ) mesh_grid_line_obj_3d <- transform_2d_mesh_to_3d_mesh( mesh_line_obj_2d, cell_subset_projection_PCA, @@ -440,136 +497,162 @@ create_mesh_grid_lines <- function( #' @title createCrossSection #' @description Create a virtual 2D cross section. #' @param gobject giotto object +#' @param spat_unit spatial unit #' @param spat_loc_name name of spatial locations #' @param name name of cress section object. (default = cross_sectino) -#' @param spatial_network_name name of spatial network object. +#' @param spatial_network_name name of spatial network object. #' (default = Delaunay_network) -#' @param thickness_unit unit of the virtual section thickness. If "cell", -#' average size of the observed cells is used as length unit. If "natural", -#' the unit of cell location coordinates is used.(default = cell) +#' @param thickness_unit unit of the virtual section thickness. If "cell", +#' average size of the observed cells is used as length unit. If "natural", +#' the unit of cell location coordinates is used. (default = cell) #' @param slice_thickness thickness of slice. default = 2 -#' @param cell_distance_estimate_method method to estimate average distance +#' @param cell_distance_estimate_method method to estimate average distance #' between neighobring cells. (default = mean) -#' @param extend_ratio deciding the span of the cross section meshgrid, as a -#' ratio of extension compared to the borders of the vitural tissue section. +#' @param extend_ratio deciding the span of the cross section meshgrid, as a +#' ratio of extension compared to the borders of the vitural tissue section. #' (default = 0.2) #' @param method method to define the cross section plane. -#' If equation, the plane is defined by a four element numerical vector -#' (equation) in the form of c(A,B,C,D), corresponding to a plane with +#' If equation, the plane is defined by a four element numerical vector +#' (equation) in the form of c(A,B,C,D), corresponding to a plane with #' equation Ax+By+Cz=D. -#' If 3 points, the plane is define by the coordinates of 3 points, as given by +#' If 3 points, the plane is define by the coordinates of 3 points, as given by #' point1, point2, and point3. -#' If point and norm vector, the plane is defined by the coordinates of one -#' point (point1) in the plane and the coordinates of one norm vector +#' If point and norm vector, the plane is defined by the coordinates of one +#' point (point1) in the plane and the coordinates of one norm vector #' (normVector) to the plane. -#' If point and two plane vector, the plane is defined by the coordinates of -#' one point (point1) in the plane and the coordinates of two vectors +#' If point and two plane vector, the plane is defined by the coordinates of +#' one point (point1) in the plane and the coordinates of two vectors #' (planeVector1, planeVector2) in the plane. #' (default = equation) -#' @param equation equation required by method "equation".equations needs to be -#' a numerical vector of length 4, in the form of c(A,B,C,D), which defines +#' @param equation equation required by method "equation".equations needs to be +#' a numerical vector of length 4, in the form of c(A,B,C,D), which defines #' plane Ax+By+Cz=D. -#' @param point1 coordinates of the first point required by method +#' @param point1 coordinates of the first point required by method #' "3 points","point and norm vector", and "point and two plane vectors". #' @param point2 coordinates of the second point required by method "3 points" #' @param point3 coordinates of the third point required by method "3 points" -#' @param normVector coordinates of the norm vector required by method +#' @param normVector coordinates of the norm vector required by method #' "point and norm vector" -#' @param planeVector1 coordinates of the first plane vector required by +#' @param planeVector1 coordinates of the first plane vector required by #' method "point and two plane vectors" -#' @param planeVector2 coordinates of the second plane vector required by +#' @param planeVector2 coordinates of the second plane vector required by #' method "point and two plane vectors" -#' @param mesh_grid_n numer of meshgrid lines to generate along both directions +#' @param mesh_grid_n numer of meshgrid lines to generate along both directions #' for the cross section plane. #' @param return_gobject boolean: return giotto object (default = TRUE) +#' @param verbose be verbose #' @returns giotto object with updated spatial network slot -#' @details Creates a virtual 2D cross section object for a given spatial -#' network object. The users need to provide the definition of the cross +#' @details Creates a virtual 2D cross section object for a given spatial +#' network object. The users need to provide the definition of the cross #' section plane (see method). #' @examples -#' g <- GiottoData::loadGiottoMini("visium") -#' -#' createCrossSection(gobject = g, spatial_network_name = "spatial_network") +#' g <- GiottoData::loadGiottoMini("starmap") +#' +#' g <- createCrossSection( +#' gobject = g, +#' method = "equation", +#' equation = c(0, 1, 0, 600), +#' extend_ratio = 0.6, +#' name = "new_cs", +#' return_gobject = TRUE +#' ) +#' +#' crossSectionPlot(g, name = "new_cs") #' @export -createCrossSection <- function(gobject, - spat_loc_name = "raw", - name = "cross_section", - spatial_network_name = "Delaunay_network", - thickness_unit = c("cell", "natural"), - slice_thickness = 2, - cell_distance_estimate_method = "mean", - extend_ratio = 0.2, - method = c("equation", "3 points", "point and norm vector", - "point and two plane vectors"), - equation = NULL, - point1 = NULL, point2 = NULL, point3 = NULL, - normVector = NULL, - planeVector1 = NULL, planeVector2 = NULL, - mesh_grid_n = 20, - return_gobject = TRUE) { - # read spatial locations - spatial_locations <- getSpatialLocations(gobject, - name = spat_loc_name) - cell_IDs <- spatial_locations[, "cell_ID"] - cell_IDs <- cell_IDs$cell_ID - - colnames_to_extract <- c("sdimx", "sdimy", "sdimz") - spatial_locations <- spatial_locations[, colnames_to_extract] +createCrossSection <- function( + gobject, + spat_unit = NULL, + spat_loc_name = "raw", + name = "cross_section", + spatial_network_name = "Delaunay_network", + thickness_unit = c("cell", "natural"), + slice_thickness = 2, + cell_distance_estimate_method = "mean", + extend_ratio = 0.2, + method = c( + "equation", "3 points", "point and norm vector", + "point and two plane vectors" + ), + equation = NULL, + point1 = NULL, point2 = NULL, point3 = NULL, + normVector = NULL, + planeVector1 = NULL, planeVector2 = NULL, + mesh_grid_n = 20, + return_gobject = TRUE, + verbose = NULL) { + spat_unit <- set_default_spat_unit( + gobject = gobject, spat_unit = spat_unit + ) - spatial_locations <- spatial_locations@coordinates + # read spatial locations + spatial_locations <- getSpatialLocations( + gobject = gobject, spat_unit = spat_unit, name = spat_loc_name, + set_defaults = FALSE, verbose = FALSE, output = "spatLocsObj" + ) - spatial_locations <- as.matrix(spatial_locations) - rownames(spatial_locations) <- cell_IDs - cell_ID_vec <- c(1:nrow(spatial_locations)) + spatial_locations <- as.matrix(spatial_locations, id_rownames = TRUE) + cell_ID_vec <- seq_len(nrow(spatial_locations)) names(cell_ID_vec) <- rownames(spatial_locations) # generate section plane equation method <- match.arg( - method, - c("equation", "3 points", "point and norm vector", - "point and two plane vectors")) - - if (method == "equation") { - if (is.null(equation)) { - message("equation was not provided.") - } else { - plane_equation <- equation - plane_equation[4] <- -equation[4] - } - } else if (method == "point and norm vector") { - if (is.null(point1) | is.null(normVector)) { - message("either point or norm vector was not provided.") - } else { - plane_equation <- c() - plane_equation[1:3] <- normVector - plane_equation[4] <- -point1 %*% normVector - } - } else if (method == "point and two plane vectors") { - if (is.null(point1) | is.null(planeVector1) | is.null(planeVector2)) { - message("either point or any of the two plane vectors was not + method, + c( + "equation", "3 points", "point and norm vector", + "point and two plane vectors" + ) + ) + + switch(method, + "equation" = { + if (is.null(equation)) { + message("equation was not provided.") + } else { + plane_equation <- equation + plane_equation[4] <- -equation[4] + } + }, + "point and norm vector" = { + if (is.null(point1) || is.null(normVector)) { + message("either point or norm vector was not provided.") + } else { + plane_equation <- c() + plane_equation[seq_len(3)] <- normVector + plane_equation[4] <- -point1 %*% normVector + } + }, + "point and two plane vectors" = { + if (is.null(point1) || + is.null(planeVector1) || + is.null(planeVector2)) { + message("either point or any of the two plane vectors was not provided.") - } else { - normVector <- crossprod(planeVector1, planeVector2) - plane_equation[1:3] <- normVector - plane_equation[4] <- -point1 %*% normVector - } - } else if (method == "3 points") { - if (is.null(point1) | is.null(point2) | is.null(point3)) { - message("not all three points were provided.") - } else { - planeVector1 <- point2 - point1 - planeVector2 <- point3 - point1 - normVector <- crossprod(planeVector1, planeVector2) - plane_equation[1:3] <- normVector - plane_equation[4] <- -point1 %*% normVector + } else { + normVector <- crossprod(planeVector1, planeVector2) + plane_equation[seq_len(3)] <- normVector + plane_equation[4] <- -point1 %*% normVector + } + }, + "3 points" = { + if (is.null(point1) || is.null(point2) || is.null(point3)) { + message("not all three points were provided.") + } else { + planeVector1 <- point2 - point1 + planeVector2 <- point3 - point1 + normVector <- crossprod(planeVector1, planeVector2) + plane_equation[seq_len(3)] <- normVector + plane_equation[4] <- -point1 %*% normVector + } } - } + ) + names(plane_equation) <- c("A", "B", "C", "D") # determine section thickness thickness_unit <- match.arg(thickness_unit, c("cell", "natural")) sectionThickness <- get_sectionThickness(gobject, + spat_unit = spat_unit, thickness_unit = thickness_unit, slice_thickness = slice_thickness, spatial_network_name = spatial_network_name, @@ -581,10 +664,12 @@ createCrossSection <- function(gobject, # calculate distances to cross section spatial_locations_mat <- cbind( - spatial_locations, as.matrix(rep(1, dim(spatial_locations)[1]))) + spatial_locations, as.matrix(rep(1, dim(spatial_locations)[1])) + ) norm_vec <- function(x) sqrt(sum(x^2)) distance_to_plane_vector <- abs(spatial_locations_mat %*% as.matrix( - plane_equation) / norm_vec(plane_equation[1:3])) + plane_equation + ) / norm_vec(plane_equation[1:3])) # select cells within section ### cell_subset <- distance_to_plane_vector <= max_distance_to_section_plane @@ -602,19 +687,27 @@ createCrossSection <- function(gobject, } ## find the projection Xp,Yp,Zp coordinates ## cell_subset_projection_locations <- t(apply( - cell_subset_spatial_locations, 1, - function(x) projection_fun(x, plane_point = plane_point, - plane_norm = plane_equation[1:3]))) + cell_subset_spatial_locations, 1, + function(x) { + projection_fun(x, + plane_point = plane_point, + plane_norm = plane_equation[1:3] + ) + } + )) # get the local coordinates of selected cells on the section plane cell_subset_projection_PCA <- stats::prcomp( - cell_subset_projection_locations) + cell_subset_projection_locations + ) cell_subset_projection_coords <- get_cross_section_coordinates( - cell_subset_projection_locations) + cell_subset_projection_locations + ) # create mesh grid lines for the cross section ### mesh_grid_lines <- create_mesh_grid_lines( - cell_subset_projection_locations, extend_ratio, mesh_grid_n) + cell_subset_projection_locations, extend_ratio, mesh_grid_n + ) mesh_obj <- list("mesh_grid_lines" = mesh_grid_lines) ### save and update the spatial object ### @@ -634,14 +727,30 @@ createCrossSection <- function(gobject, ) - if (return_gobject == TRUE) { - cs_names <- names(gobject@spatial_network[[ - spatial_network_name]]$crossSectionObjects) + if (return_gobject) { + sn <- getSpatialNetwork( + gobject = gobject, + spat_unit = spat_unit, + name = spatial_network_name, + copy_obj = FALSE, + set_defaults = FALSE, + verbose = FALSE, + output = "spatialNetworkObj" + ) + + cs_names <- names(sn@crossSectionObjects) if (name %in% cs_names) { - cat(name, " has already been used, will be overwritten") + vmsg(.v = verbose, sprintf( + "name '%s' has already been used, will be overwritten", + name + )) } - gobject@spatial_network[[spatial_network_name]]$crossSectionObjects[[ - name]] <- crossSection_obj + + sn@crossSectionObjects[[name]] <- crossSection_obj + + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + gobject <- setGiotto(gobject, sn, verbose = FALSE) + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### return(gobject) } else { @@ -653,16 +762,18 @@ createCrossSection <- function(gobject, # cross section visual functions #### #### -#' @title crossSectionGenePlot -#' @name crossSectionGenePlot -#' @description Visualize cells and gene expression in a virtual cross section -#' according to spatial coordinates +#' @title crossSectionFeatPlot +#' @name crossSectionFeatPlot +#' @description Visualize cells and feature expression in a virtual cross +#' section according to spatial coordinates #' @param gobject giotto object +#' @param spat_unit spatial unit +#' @param feat_type feature type #' @param spat_loc_name name of spatial locations #' @param crossSection_obj crossSection object #' @param name name of virtual cross section to use #' @param spatial_network_name name of spatial network to use -#' @param default_save_name default save name for saving, don't change, +#' @param default_save_name default save name for saving, don't change, #' change save_name in save_param #' @param ... parameters for spatFeatPlot2D #' @returns ggplot @@ -670,36 +781,66 @@ createCrossSection <- function(gobject, #' @md #' @seealso [GiottoVisuals::spatGenePlot3D] and [GiottoVisuals::spatFeatPlot2D] #' @export -crossSectionGenePlot <- function( - gobject = NULL, - spat_loc_name = "raw", - crossSection_obj = NULL, - name = NULL, - spatial_network_name = "Delaunay_network", - default_save_name = "crossSectionGenePlot", - ...) { +crossSectionFeatPlot <- function(gobject = NULL, + spat_unit = NULL, + feat_type = NULL, + spat_loc_name = "raw", + crossSection_obj = NULL, + name = NULL, + spatial_network_name = "Delaunay_network", + default_save_name = "crossSectionGenePlot", + ...) { + spat_unit <- set_default_spat_unit( + gobject = gobject, spat_unit = spat_unit + ) + feat_type <- set_default_feat_type( + gobject = gobject, spat_unit = spat_unit, feat_type = feat_type + ) + # load cross section object - if (!is.null(crossSection_obj)) { - crossSection_obj <- crossSection_obj - } else { + if (is.null(crossSection_obj)) { crossSection_obj <- read_crossSection( gobject, + spat_unit = spat_unit, name = name, spatial_network_name = spatial_network_name ) } cell_subset <- crossSection_obj$cell_subset - cell_subset_projection_coords <- crossSection_obj$cell_subset_projection_coords + cell_subset <- rownames(cell_subset)[which(cell_subset)] + cell_subset_projection_coords <- + crossSection_obj$cell_subset_projection_coords + # modify gobject based on crossSection object - subset_cell_IDs <- gobject@cell_metadata$cell_ID[cell_subset] - temp_gobject <- subsetGiotto(gobject = gobject, cell_ids = subset_cell_IDs) - temp_gobject@spatial_locs[[spat_loc_name]]$sdimx <- cell_subset_projection_coords[, 1] - temp_gobject@spatial_locs[[spat_loc_name]]$sdimy <- cell_subset_projection_coords[, 2] - temp_gobject@spatial_locs[[spat_loc_name]]$sdimz <- rep(0, dim(cell_subset_projection_coords)[1]) + gobj_sids <- spatIDs(gobject, spat_unit = spat_unit) + subset_cell_ids <- gobj_sids[gobj_sids %in% cell_subset] + temp_gobject <- subsetGiotto(gobject, + spat_unit = spat_unit, + feat_type = feat_type, + cell_ids = subset_cell_ids + ) + + sl <- getSpatialLocations( + gobject = temp_gobject, + spat_unit = spat_unit, + name = spat_loc_name, + output = "spatLocsObj", + copy_obj = TRUE, + verbose = FALSE + ) + + sl[]$sdimx <- cell_subset_projection_coords[, 1] + sl[]$sdimy <- cell_subset_projection_coords[, 2] + sl[]$sdimz <- rep(0, dim(cell_subset_projection_coords)[1]) + + temp_gobject <- setGiotto(temp_gobject, sl, verbose = FALSE) + # call spatFeatPlot2D to generate the plots GiottoVisuals::spatFeatPlot2D( gobject = temp_gobject, + spat_unit = spat_unit, + feat_type = feat_type, spatial_network_name = spatial_network_name, default_save_name = default_save_name, ... @@ -709,62 +850,83 @@ crossSectionGenePlot <- function( #' @title crossSectionPlot #' @name crossSectionPlot -#' @description Visualize cells in a virtual cross section according to +#' @description Visualize cells in a virtual cross section according to #' spatial coordinates #' @param gobject giotto object -#' @param spat_loc_name name of spatial locations +#' @param spat_unit spatial unit #' @param feat_type feature type -#' @param crossSection_obj cross section object as alternative input. +#' @param spat_loc_name name of spatial locations +#' @param crossSection_obj cross section object as alternative input. #' default = NULL. #' @param name name of virtual cross section to use #' @param spatial_network_name name of spatial network to use -#' @param default_save_name default save name for saving, don't change, +#' @param default_save_name default save name for saving, don't change, #' change save_name in save_param #' @param ... parameters for spatPlot2D #' @returns ggplot #' @details Description of parameters. #' @export #' @seealso \code{\link{crossSectionPlot}} -crossSectionPlot <- function(gobject, - spat_loc_name = "raw", - feat_type = NULL, - crossSection_obj = NULL, - name = NULL, - spatial_network_name = "Delaunay_network", - default_save_name = "crossSectionPlot", - ...) { - # specify feat_type - if (is.null(feat_type)) { - feat_type <- gobject@expression_feat[[1]] - } +crossSectionPlot <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + spat_loc_name = "raw", + crossSection_obj = NULL, + name = NULL, + spatial_network_name = "Delaunay_network", + default_save_name = "crossSectionPlot", + ...) { + spat_unit <- set_default_spat_unit( + gobject = gobject, spat_unit = spat_unit + ) + feat_type <- set_default_feat_type( + gobject = gobject, spat_unit = spat_unit, feat_type = feat_type + ) # load cross section object - if (!is.null(crossSection_obj)) { - crossSection_obj <- crossSection_obj - } else { - crossSection_obj <- read_crossSection(gobject, + if (is.null(crossSection_obj)) { + crossSection_obj <- read_crossSection( + gobject = gobject, + spat_unit = spat_unit, name = name, spatial_network_name = spatial_network_name ) } - cell_subset <- crossSection_obj$cell_subset - cell_subset_projection_coords <- crossSection_obj$cell_subset_projection_coords + cell_subset <- rownames(cell_subset)[which(cell_subset)] + cell_subset_projection_coords <- + crossSection_obj$cell_subset_projection_coords # modify gobject based on crossSection object - subset_cell_IDs <- gobject@cell_metadata[[feat_type]]$cell_ID[cell_subset] + gobj_sids <- spatIDs(gobject, spat_unit = spat_unit) + subset_cell_ids <- gobj_sids[gobj_sids %in% cell_subset] temp_gobject <- subsetGiotto(gobject, + spat_unit = spat_unit, feat_type = feat_type, - cell_ids = subset_cell_IDs + cell_ids = subset_cell_ids ) - temp_gobject@spatial_locs[[spat_loc_name]]$sdimx <- cell_subset_projection_coords[, 1] - temp_gobject@spatial_locs[[spat_loc_name]]$sdimy <- cell_subset_projection_coords[, 2] - temp_gobject@spatial_locs[[spat_loc_name]]$sdimz <- rep(0, dim(cell_subset_projection_coords)[1]) + + sl <- getSpatialLocations( + gobject = temp_gobject, + spat_unit = spat_unit, + name = spat_loc_name, + output = "spatLocsObj", + copy_obj = TRUE, + verbose = FALSE + ) + + sl[]$sdimx <- cell_subset_projection_coords[, 1] + sl[]$sdimy <- cell_subset_projection_coords[, 2] + sl[]$sdimz <- rep(0, dim(cell_subset_projection_coords)[1]) + + temp_gobject <- setGiotto(temp_gobject, sl, verbose = FALSE) # call spatFeatPlot2D to generate the plots spatPlot2D( gobject = temp_gobject, + spat_unit = spat_unit, feat_type = feat_type, spatial_network_name = spatial_network_name, default_save_name = default_save_name, @@ -773,94 +935,141 @@ crossSectionPlot <- function(gobject, } #### -#' @title crossSectionGenePlot3D -#' @name crossSectionGenePlot3D -#' @description Visualize cells and gene expression in a virtual cross section -#' according to spatial coordinates +#' @title crossSectionFeatPlot3D +#' @name crossSectionFeatPlot3D +#' @description Visualize cells and feature expression in a virtual cross +#' section according to spatial coordinates #' @param gobject giotto object +#' @param spat_unit spatial unit +#' @param feat_type feature type #' @param crossSection_obj cross section object as alternative input. default = NULL. #' @param name name of virtual cross section to use #' @param spatial_network_name name of spatial network to use -#' @param other_cell_color color of cells outside the cross section. +#' @param show_other_cells logical. Default = TRUE +#' @param other_cell_color color of cells outside the cross section. #' default = transparent. -#' @param default_save_name default save name for saving, don't change, change +#' @param default_save_name default save name for saving, don't change, change #' save_name in save_param #' @param ... parameters for spatGenePlot3D #' @return ggplot #' @details Description of parameters. #' @export -crossSectionGenePlot3D <- function(gobject, - crossSection_obj = NULL, - name = NULL, - spatial_network_name = "Delaunay_network", - other_cell_color = alpha("lightgrey", 0), - default_save_name = "crossSectionGenePlot3D", ...) { +crossSectionFeatPlot3D <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + crossSection_obj = NULL, + name = NULL, + spatial_network_name = "Delaunay_network", + show_other_cells = TRUE, + other_cell_color = alpha("lightgrey", 0), + default_save_name = "crossSectionGenePlot3D", + ...) { + spat_unit <- set_default_spat_unit( + gobject = gobject, spat_unit = spat_unit + ) + feat_type <- set_default_feat_type( + gobject = gobject, spat_unit = spat_unit, feat_type = feat_type + ) + # load cross section object - if (!is.null(crossSection_obj)) { - crossSection_obj <- crossSection_obj - } else { + if (is.null(crossSection_obj)) { crossSection_obj <- read_crossSection( - gobject, name = name, spatial_network_name = spatial_network_name) + gobject = gobject, + spat_unit = spat_unit, + name = name, + spatial_network_name = spatial_network_name + ) } - cell_subset <- crossSection_obj$cell_subset - cell_subset_projection_coords <- crossSection_obj$cell_subset_projection_coords + cell_subset <- rownames(cell_subset)[which(cell_subset)] + cell_subset_projection_coords <- + crossSection_obj$cell_subset_projection_coords + # modify gobject based on crossSection object - subset_cell_IDs <- gobject@cell_metadata$cell_ID[cell_subset] + gobj_sids <- spatIDs(gobject, spat_unit = spat_unit) + subset_cell_ids <- gobj_sids[gobj_sids %in% cell_subset] + # call spatGenePlot3D to generate the plots - spatGenePlot3D(gobject, - select_cells = subset_cell_IDs, + spatFeatPlot3D(gobject, + spat_unit = spat_unit, + feat_type = feat_type, + select_cells = subset_cell_ids, + show_other_cells = show_other_cells, other_cell_color = other_cell_color, - default_save_name = default_save_name, ... + default_save_name = default_save_name, + ... ) } #### #' @title crossSectionPlot3D #' @name crossSectionPlot3D -#' @description Visualize cells in a virtual cross section according to spatial +#' @description Visualize cells in a virtual cross section according to spatial #' coordinates #' @param gobject giotto object -#' @param crossSection_obj cross section object as alternative input. +#' @param spat_unit spatial unit +#' @param feat_type feature type +#' @param crossSection_obj cross section object as alternative input. #' default = NULL. #' @param name name of virtual cross section to use #' @param spatial_network_name name of spatial network to use #' @param show_other_cells display not selected cells -#' @param other_cell_color color of cells outside the cross section. +#' @param other_cell_color color of cells outside the cross section. #' default = transparent. -#' @param default_save_name default save name for saving, don't change, +#' @param default_save_name default save name for saving, don't change, #' change save_name in save_param #' @param ... parameters for spatPlot3D #' @returns ggplot #' @details Description of parameters. #' @export -crossSectionPlot3D <- function(gobject, - crossSection_obj = NULL, - name = NULL, - spatial_network_name = "Delaunay_network", - show_other_cells = TRUE, - other_cell_color = alpha("lightgrey", 0), - default_save_name = "crossSection3D", ...) { +crossSectionPlot3D <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + crossSection_obj = NULL, + name = NULL, + spatial_network_name = "Delaunay_network", + show_other_cells = TRUE, + other_cell_color = alpha("lightgrey", 0), + default_save_name = "crossSection3D", + ...) { + spat_unit <- set_default_spat_unit( + gobject = gobject, spat_unit = spat_unit + ) + feat_type <- set_default_feat_type( + gobject = gobject, spat_unit = spat_unit, feat_type = feat_type + ) + # load cross section object - if (!is.null(crossSection_obj)) { - crossSection_obj <- crossSection_obj - } else { + if (is.null(crossSection_obj)) { crossSection_obj <- read_crossSection( - gobject, name = name, spatial_network_name = spatial_network_name) + gobject, + spat_unit = spat_unit, + name = name, + spatial_network_name = spatial_network_name + ) } cell_subset <- crossSection_obj$cell_subset - cell_subset_projection_coords <- crossSection_obj$cell_subset_projection_coords + cell_subset <- rownames(cell_subset)[which(cell_subset)] + cell_subset_projection_coords <- + crossSection_obj$cell_subset_projection_coords + # modify gobject based on crossSection object - subset_cell_IDs <- gobject@cell_metadata$cell_ID[cell_subset] - + gobj_sids <- spatIDs(gobject, spat_unit = spat_unit) + subset_cell_ids <- gobj_sids[gobj_sids %in% cell_subset] + # call spatPlot3D to generate the plots spatPlot3D( gobject = gobject, - select_cells = subset_cell_IDs, + spat_unit = spat_unit, + feat_type = feat_type, + select_cells = subset_cell_ids, show_other_cells = show_other_cells, other_cell_color = other_cell_color, - default_save_name = default_save_name, ... + default_save_name = default_save_name, + ... ) } @@ -868,11 +1077,13 @@ crossSectionPlot3D <- function(gobject, #### #' @title insertCrossSectionSpatPlot3D #' @name insertCrossSectionSpatPlot3D -#' @description Visualize the meshgrid lines of cross section together with +#' @description Visualize the meshgrid lines of cross section together with #' cells #' @param gobject giotto object +#' @param spat_unit spatial unit +#' @param feat_type feature type #' @param spat_loc_name name of spatial locations -#' @param crossSection_obj cross section object as alternative input. +#' @param crossSection_obj cross section object as alternative input. #' default = NULL. #' @param name name of virtual cross section to use #' @param spatial_network_name name of spatial network to use @@ -885,36 +1096,52 @@ crossSectionPlot3D <- function(gobject, #' @param show_other_cells display not selected cells #' @param axis_scale axis_scale #' @param custom_ratio custom_ratio -#' @param default_save_name default save name for saving, don't change, +#' @param default_save_name default save name for saving, don't change, #' change save_name in save_param #' @param ... parameters for spatPlot3D #' @returns ggplot #' @details Description of parameters. #' @export -insertCrossSectionSpatPlot3D <- function(gobject, - spat_loc_name = "raw", - crossSection_obj = NULL, - name = NULL, - spatial_network_name = "Delaunay_network", - mesh_grid_color = "#1f77b4", - mesh_grid_width = 3, - mesh_grid_style = "dot", - sdimx = "sdimx", sdimy = "sdimy", sdimz = "sdimz", - show_other_cells = FALSE, - axis_scale = c("cube", "real", "custom"), - custom_ratio = NULL, - default_save_name = "spat3D_with_cross_section", ...) { +insertCrossSectionSpatPlot3D <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + spat_loc_name = "raw", + crossSection_obj = NULL, + name = NULL, + spatial_network_name = "Delaunay_network", + mesh_grid_color = "#1f77b4", + mesh_grid_width = 3, + mesh_grid_style = "dot", + sdimx = "sdimx", sdimy = "sdimy", sdimz = "sdimz", + show_other_cells = FALSE, + axis_scale = c("cube", "real", "custom"), + custom_ratio = NULL, + default_save_name = "spat3D_with_cross_section", + ...) { + spat_unit <- set_default_spat_unit( + gobject = gobject, spat_unit = spat_unit + ) + feat_type <- set_default_feat_type( + gobject = gobject, spat_unit = spat_unit, feat_type = feat_type + ) + # load cross section object if (!is.null(crossSection_obj)) { crossSection_obj <- crossSection_obj } else { crossSection_obj <- read_crossSection( - gobject, name = name, spatial_network_name = spatial_network_name) + gobject, + spat_unit = spat_unit, + name = name, + spatial_network_name = spatial_network_name + ) } - pl <- spatPlot3D(gobject, + spat_unit = spat_unit, + feat_type = feat_type, sdimx = sdimx, sdimy = sdimy, sdimz = sdimz, @@ -922,28 +1149,40 @@ insertCrossSectionSpatPlot3D <- function(gobject, show_plot = FALSE, return_plot = TRUE, save_plot = FALSE, - default_save_name = default_save_name, ... + default_save_name = default_save_name, + ... ) for (i in seq_len(dim( - crossSection_obj$mesh_obj$mesh_grid_lines$mesh_grid_lines_X)[2])) { + crossSection_obj$mesh_obj$mesh_grid_lines$mesh_grid_lines_X + )[2])) { pl <- pl %>% plotly::add_trace( x = crossSection_obj$mesh_obj$mesh_grid_lines$mesh_grid_lines_X[, i], y = crossSection_obj$mesh_obj$mesh_grid_lines$mesh_grid_lines_Y[, i], z = crossSection_obj$mesh_obj$mesh_grid_lines$mesh_grid_lines_Z[, i], mode = "lines", type = "scatter3d", - line = list(color = mesh_grid_color, - width = mesh_grid_width, dash = mesh_grid_style) + line = list( + color = mesh_grid_color, + width = mesh_grid_width, dash = mesh_grid_style + ) ) } - current_ratio <- plotly_axis_scale_3D(gobject@spatial_locs[[spat_loc_name]], + sl <- getSpatialLocations( + gobject = gobject, spat_unit = spat_unit, name = spat_loc_name, + output = "data.table", copy_obj = TRUE, verbose = FALSE, + set_defaults = TRUE + ) + + current_ratio <- plotly_axis_scale_3D( + cell_locations = sl, sdimx = sdimx, sdimy = sdimy, sdimz = sdimz, mode = axis_scale, custom_ratio = custom_ratio ) new_ratio <- adapt_aspect_ratio( - current_ratio, gobject@spatial_locs[[spat_loc_name]], + current_ratio = current_ratio, + cell_locations = sl, sdimx = sdimx, sdimy = sdimy, sdimz = sdimz, mesh_obj = crossSection_obj$mesh_obj ) @@ -963,13 +1202,15 @@ insertCrossSectionSpatPlot3D <- function(gobject, return(pl) } #### -#' @title insertCrossSectionGenePlot3D -#' @name insertCrossSectionGenePlot3D -#' @description Visualize cells and gene expression in a virtual cross section +#' @title insertCrossSectionFeatPlot3D +#' @name insertCrossSectionFeatPlot3D +#' @description Visualize cells and gene expression in a virtual cross section #' according to spatial coordinates #' @param gobject giotto object +#' @param spat_unit spatial unit +#' @param feat_type feature type #' @param spat_loc_name name of spatial locations -#' @param crossSection_obj cross section object as alternative input. +#' @param crossSection_obj cross section object as alternative input. #' default = NULL. #' @param name name of virtual cross section to use #' @param spatial_network_name name of spatial network to use @@ -985,74 +1226,96 @@ insertCrossSectionSpatPlot3D <- function(gobject, #' @param show_plot show plots #' @param return_plot return ggplot object #' @param save_plot logical. directly save the plot -#' @param save_param list of saving parameters from +#' @param save_param list of saving parameters from #' [GiottoVisuals::all_plots_save_function] -#' @param default_save_name default save name for saving, don't change, +#' @param default_save_name default save name for saving, don't change, #' change save_name in save_param #' @param ... parameters for spatGenePlot3D #' @returns ggplot #' @details Description of parameters. #' @md #' @export -insertCrossSectionGenePlot3D <- function( - gobject, - spat_loc_name = "raw", - crossSection_obj = NULL, - name = NULL, - spatial_network_name = "Delaunay_network", - mesh_grid_color = "#1f77b4", - mesh_grid_width = 3, - mesh_grid_style = "dot", - sdimx = "sdimx", sdimy = "sdimy", sdimz = "sdimz", - show_other_cells = FALSE, - axis_scale = c("cube", "real", "custom"), - custom_ratio = NULL, - show_plot = NULL, return_plot = NULL, save_plot = NULL, - save_param = list(), - default_save_name = "spatGenePlot3D_with_cross_section", - ...) { +insertCrossSectionFeatPlot3D <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + spat_loc_name = "raw", + crossSection_obj = NULL, + name = NULL, + spatial_network_name = "Delaunay_network", + mesh_grid_color = "#1f77b4", + mesh_grid_width = 3, + mesh_grid_style = "dot", + sdimx = "sdimx", sdimy = "sdimy", sdimz = "sdimz", + show_other_cells = FALSE, + axis_scale = c("cube", "real", "custom"), + custom_ratio = NULL, + show_plot = NULL, return_plot = NULL, save_plot = NULL, + save_param = list(), + default_save_name = "spatGenePlot3D_with_cross_section", + ...) { + spat_unit <- set_default_spat_unit( + gobject = gobject, spat_unit = spat_unit + ) + feat_type <- set_default_feat_type( + gobject = gobject, spat_unit = spat_unit, feat_type = feat_type + ) + # load cross section object - if (!is.null(crossSection_obj)) { - crossSection_obj <- crossSection_obj - } else { + if (is.null(crossSection_obj)) { crossSection_obj <- read_crossSection( - gobject, name = name, spatial_network_name = spatial_network_name) + gobject, + spat_unit = spat_unit, + name = name, + spatial_network_name = spatial_network_name + ) } - pl <- spatGenePlot3D(gobject, + pl <- spatFeatPlot3D(gobject, + spat_unit = spat_unit, + feat_type = feat_type, show_other_cells = FALSE, axis_scale = axis_scale, custom_ratio = custom_ratio, show_plot = FALSE, return_plot = TRUE, save_plot = FALSE, - default_save_name = default_save_name, ... + default_save_name = default_save_name, + ... ) + for (i in seq_len(dim( - crossSection_obj$mesh_obj$mesh_grid_lines$mesh_grid_lines_X)[2])) { + crossSection_obj$mesh_obj$mesh_grid_lines$mesh_grid_lines_X + )[2])) { pl <- pl %>% plotly::add_trace( x = crossSection_obj$mesh_obj$mesh_grid_lines$mesh_grid_lines_X[, i], y = crossSection_obj$mesh_obj$mesh_grid_lines$mesh_grid_lines_Y[, i], z = crossSection_obj$mesh_obj$mesh_grid_lines$mesh_grid_lines_Z[, i], mode = "lines+markers", type = "scatter3d", color = mesh_grid_color, marker = list(color = alpha(mesh_grid_color, 0)), - line = list(color = mesh_grid_color, - width = mesh_grid_width, dash = mesh_grid_style) + line = list( + color = mesh_grid_color, + width = mesh_grid_width, dash = mesh_grid_style + ) ) } - current_ratio <- plotly_axis_scale_3D(gobject@spatial_locs[[spat_loc_name]], - sdimx = sdimx, - sdimy = sdimy, - sdimz = sdimz, + sl <- getSpatialLocations( + gobject = gobject, spat_unit = spat_unit, name = spat_loc_name, + output = "data.table", copy_obj = TRUE, verbose = FALSE, + set_defaults = TRUE + ) + + + current_ratio <- plotly_axis_scale_3D( + cell_locations = sl, + sdimx = sdimx, sdimy = sdimy, sdimz = sdimz, mode = axis_scale, custom_ratio = custom_ratio ) new_ratio <- adapt_aspect_ratio( - current_ratio, gobject@spatial_locs[[spat_loc_name]], - sdimx = sdimx, - sdimy = sdimy, - sdimz = sdimz, + current_ratio = current_ratio, + cell_locations = sl, + sdimx = sdimx, sdimy = sdimy, sdimz = sdimz, mesh_obj = crossSection_obj$mesh_obj ) diff --git a/R/differential_expression.R b/R/differential_expression.R index e476e2e62..5eead79a0 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -35,18 +35,19 @@ #' #' findScranMarkers(g, cluster_column = "leiden_clus") #' @export -findScranMarkers <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - subset_clusters = NULL, - group_1 = NULL, - group_1_name = NULL, - group_2 = NULL, - group_2_name = NULL, - verbose = TRUE, - ...) { +findScranMarkers <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + subset_clusters = NULL, + group_1 = NULL, + group_1_name = NULL, + group_2 = NULL, + group_2_name = NULL, + verbose = TRUE, + ...) { # verify if optional package is installed package_check(pkg_name = "scran", repository = "Bioc") @@ -76,8 +77,11 @@ findScranMarkers <- function(gobject, # expression data values <- match.arg( expression_values, - choices = unique(c("normalized", "scaled", "custom", - expression_values))) + choices = unique(c( + "normalized", "scaled", "custom", + expression_values + )) + ) expr_data <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -104,20 +108,23 @@ findScranMarkers <- function(gobject, expr_data <- expr_data[, colnames(expr_data) %in% subset_cell_IDs] } else if (!is.null(group_1) & !is.null(group_2)) { cell_metadata <- cell_metadata[ - get(cluster_column) %in% c(group_1, group_2)] + get(cluster_column) %in% c(group_1, group_2) + ] # create new pairwise group if (!is.null(group_1_name)) { - if (!is.character(group_1_name)) + if (!is.character(group_1_name)) { stop("group_1_name needs to be a character") + } group_1_name <- group_1_name } else { group_1_name <- paste0(group_1, collapse = "_") } if (!is.null(group_2_name)) { - if (!is.character(group_2_name)) + if (!is.character(group_2_name)) { stop("group_2_name needs to be a character") + } group_2_name <- group_2_name } else { group_2_name <- paste0(group_2, collapse = "_") @@ -128,7 +135,8 @@ findScranMarkers <- function(gobject, pairwise_select_comp <- NULL cell_metadata[, pairwise_select_comp := ifelse( - get(cluster_column) %in% group_1, group_1_name, group_2_name)] + get(cluster_column) %in% group_1, group_1_name, group_2_name + )] cluster_column <- "pairwise_select_comp" @@ -140,7 +148,8 @@ findScranMarkers <- function(gobject, ## SCRAN ## marker_results <- scran::findMarkers( - x = expr_data, groups = cell_metadata[[cluster_column]], ...) + x = expr_data, groups = cell_metadata[[cluster_column]], ... + ) # data.table variables genes <- cluster <- feats <- NULL @@ -180,18 +189,19 @@ findScranMarkers <- function(gobject, #' #' findScranMarkers_one_vs_all(g, cluster_column = "leiden_clus") #' @export -findScranMarkers_one_vs_all <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - subset_clusters = NULL, - pval = 0.01, - logFC = 0.5, - min_feats = 10, - min_genes = NULL, - verbose = TRUE, - ...) { +findScranMarkers_one_vs_all <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + subset_clusters = NULL, + pval = 0.01, + logFC = 0.5, + min_feats = 10, + min_genes = NULL, + verbose = TRUE, + ...) { ## deprecated arguments if (!is.null(min_genes)) { min_feats <- min_genes @@ -203,12 +213,13 @@ findScranMarkers_one_vs_all <- function(gobject, package_check(pkg_name = "scran", repository = "Bioc") # print message with information # - if (verbose) - message("using 'Scran' to detect marker feats. If used in published + if (verbose) { + message("using 'Scran' to detect marker feats. If used in published research, please cite: Lun ATL, McCarthy DJ, Marioni JC (2016). 'A step-by-step workflow for low-level analysis of single-cell RNA-seq data with Bioconductor.' F1000Res., 5, 2122. doi: 10.12688/f1000research.9501.2. ") + } # Set feat_type and spat_unit @@ -225,8 +236,11 @@ findScranMarkers_one_vs_all <- function(gobject, # expression data values <- match.arg( expression_values, - choices = unique(c("normalized", "scaled", "custom", - expression_values))) + choices = unique(c( + "normalized", "scaled", "custom", + expression_values + )) + ) # cluster column cell_metadata <- getCellMetadata(gobject, @@ -294,7 +308,8 @@ findScranMarkers_one_vs_all <- function(gobject, unique(x$cluster) == selected_clus })) selected_table <- data.table::as.data.table( - markers[select_bool]) + markers[select_bool] + ) # remove summary column from scran output if present col_ind_keep <- !grepl("summary", colnames(selected_table)) @@ -302,9 +317,11 @@ findScranMarkers_one_vs_all <- function(gobject, # change logFC.xxx name to logFC data.table::setnames( - selected_table, colnames(selected_table)[4], "logFC") + selected_table, colnames(selected_table)[4], "logFC" + ) data.table::setnames( - selected_table, colnames(selected_table)[5], "feats") + selected_table, colnames(selected_table)[5], "feats" + ) # filter selected table filtered_table <- selected_table[logFC > 0] @@ -314,7 +331,8 @@ findScranMarkers_one_vs_all <- function(gobject, p.value <- ranking <- NULL filtered_table <- filtered_table[ - (p.value <= pval & logFC >= logFC) | (ranking <= min_feats)] + (p.value <= pval & logFC >= logFC) | (ranking <= min_feats) + ] pb(message = c("cluster ", clus_i, "/", length(uniq_clusters))) return(filtered_table) @@ -385,22 +403,23 @@ findScranMarkers_one_vs_all <- function(gobject, #' #' findGiniMarkers(g, cluster_column = "leiden_clus") #' @export -findGiniMarkers <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - subset_clusters = NULL, - group_1 = NULL, - group_1_name = NULL, - group_2 = NULL, - group_2_name = NULL, - min_expr_gini_score = 0.2, - min_det_gini_score = 0.2, - detection_threshold = 0, - rank_score = 1, - min_feats = 5, - min_genes = NULL) { +findGiniMarkers <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + subset_clusters = NULL, + group_1 = NULL, + group_1_name = NULL, + group_2 = NULL, + group_2_name = NULL, + min_expr_gini_score = 0.2, + min_det_gini_score = 0.2, + detection_threshold = 0, + rank_score = 1, + min_feats = 5, + min_genes = NULL) { ## deprecated arguments if (!is.null(min_genes)) { min_feats <- min_genes @@ -422,7 +441,8 @@ findGiniMarkers <- function(gobject, ## select expression values values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) # cluster column @@ -441,7 +461,8 @@ findGiniMarkers <- function(gobject, # subset clusters if (!is.null(subset_clusters)) { cell_metadata[] <- cell_metadata[][ - get(cluster_column) %in% subset_clusters] + get(cluster_column) %in% subset_clusters + ] subset_cell_IDs <- cell_metadata[][["cell_ID"]] gobject <- subsetGiotto( gobject = gobject, @@ -451,20 +472,23 @@ findGiniMarkers <- function(gobject, ) } else if (!is.null(group_1) & !is.null(group_2)) { cell_metadata[] <- cell_metadata[][ - get(cluster_column) %in% c(group_1, group_2)] + get(cluster_column) %in% c(group_1, group_2) + ] # create new pairwise group if (!is.null(group_1_name)) { - if (!is.character(group_1_name)) + if (!is.character(group_1_name)) { stop("group_1_name needs to be a character") + } group_1_name <- group_1_name } else { group_1_name <- paste0(group_1, collapse = "_") } if (!is.null(group_2_name)) { - if (!is.character(group_2_name)) + if (!is.character(group_2_name)) { stop("group_2_name needs to be a character") + } group_2_name <- group_2_name } else { group_2_name <- paste0(group_2, collapse = "_") @@ -473,7 +497,8 @@ findGiniMarkers <- function(gobject, pairwise_select_comp <- NULL cell_metadata[][, pairwise_select_comp := ifelse( - get(cluster_column) %in% group_1, group_1_name, group_2_name)] + get(cluster_column) %in% group_1, group_1_name, group_2_name + )] cluster_column <- "pairwise_select_comp" @@ -526,9 +551,11 @@ findGiniMarkers <- function(gobject, detection_threshold = detection_threshold ) aggr_detection_sc_clusters_DT <- data.table::as.data.table( - aggr_detection_sc_clusters) + aggr_detection_sc_clusters + ) aggr_detection_sc_clusters_DT[, feats := rownames( - aggr_detection_sc_clusters)] + aggr_detection_sc_clusters + )] aggr_detection_sc_clusters_DT_melt <- data.table::melt.data.table( aggr_detection_sc_clusters_DT, variable.name = "cluster", @@ -541,15 +568,20 @@ findGiniMarkers <- function(gobject, expression_gini <- detection_gini <- detection <- NULL aggr_sc_clusters_DT_melt[, expression_gini := mygini_fun( - expression), by = feats] + expression + ), by = feats] aggr_detection_sc_clusters_DT_melt[, detection_gini := mygini_fun( - detection), by = feats] + detection + ), by = feats] ## combine - aggr_sc <- cbind(aggr_sc_clusters_DT_melt, - aggr_detection_sc_clusters_DT_melt[ - , .(detection, detection_gini)]) + aggr_sc <- cbind( + aggr_sc_clusters_DT_melt, + aggr_detection_sc_clusters_DT_melt[ + , .(detection, detection_gini) + ] + ) ## create combined rank @@ -561,13 +593,17 @@ findGiniMarkers <- function(gobject, aggr_sc[, expression_rank := rank(-expression), by = feats] aggr_sc[, expression_rank := scales::rescale( - expression_rank, to = c(1, 0.1)), by = cluster] + expression_rank, + to = c(1, 0.1) + ), by = cluster] # detection rank for each feat in all samples # rescale detection rank range between 1 and 0.1 aggr_sc[, detection_rank := rank(-detection), by = feats] aggr_sc[, detection_rank := scales::rescale( - detection_rank, to = c(1, 0.1)), by = cluster] + detection_rank, + to = c(1, 0.1) + ), by = cluster] # create combine score based on rescaled ranks and gini scores @@ -577,7 +613,7 @@ findGiniMarkers <- function(gobject, aggr_sc[, comb_score := (expression_gini * expression_rank) * ( detection_gini * detection_rank)] setorder(aggr_sc, cluster, -comb_score) - aggr_sc[, comb_rank := 1:.N, by = cluster] + aggr_sc[, comb_rank := seq_len(.N), by = cluster] top_feats_scores <- aggr_sc[comb_rank <= min_feats | ( expression_rank <= rank_score & detection_rank <= rank_score)] @@ -590,7 +626,8 @@ findGiniMarkers <- function(gobject, original_uniq_cluster_names <- unique(cell_metadata[][[cluster_column]]) if (sum(grepl("cluster_", original_uniq_cluster_names)) == 0) { top_feats_scores_filtered[, cluster := gsub( - x = cluster, "cluster_", "")] + x = cluster, "cluster_", "" + )] } return(top_feats_scores_filtered) @@ -623,19 +660,20 @@ findGiniMarkers <- function(gobject, #' #' findGiniMarkers_one_vs_all(g, cluster_column = "leiden_clus") #' @export -findGiniMarkers_one_vs_all <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - subset_clusters = NULL, - min_expr_gini_score = 0.5, - min_det_gini_score = 0.5, - detection_threshold = 0, - rank_score = 1, - min_feats = 4, - min_genes = NULL, - verbose = TRUE) { +findGiniMarkers_one_vs_all <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + subset_clusters = NULL, + min_expr_gini_score = 0.5, + min_det_gini_score = 0.5, + detection_threshold = 0, + rank_score = 1, + min_feats = 4, + min_genes = NULL, + verbose = TRUE) { ## deprecated arguments if (!is.null(min_genes)) { min_feats <- min_genes @@ -657,7 +695,8 @@ findGiniMarkers_one_vs_all <- function(gobject, ## select expression values values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) # cluster column @@ -767,21 +806,24 @@ findGiniMarkers_one_vs_all <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' findMastMarkers(gobject = g, cluster_column = "leiden_clus", group_1 = 1, -#' group_2 = 2) +#' findMastMarkers( +#' gobject = g, cluster_column = "leiden_clus", group_1 = 1, +#' group_2 = 2 +#' ) #' @export -findMastMarkers <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - group_1 = NULL, - group_1_name = NULL, - group_2 = NULL, - group_2_name = NULL, - adjust_columns = NULL, - verbose = FALSE, - ...) { +findMastMarkers <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + group_1 = NULL, + group_1_name = NULL, + group_2 = NULL, + group_2_name = NULL, + adjust_columns = NULL, + verbose = FALSE, + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -797,16 +839,18 @@ findMastMarkers <- function(gobject, package_check(pkg_name = "MAST", repository = "Bioc") # print message with information # - if (verbose) - message("using 'MAST' to detect marker feats. If used in published + if (verbose) { + message("using 'MAST' to detect marker feats. If used in published research, please cite: McDavid A, Finak G, Yajima M (2020). MAST: Model-based Analysis of Single Cell Transcriptomics. R package version 1.14.0, https://github.com/RGLab/MAST/.") + } ## select expression values to use values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) ## cluster column cell_metadata <- getCellMetadata(gobject, @@ -826,7 +870,8 @@ findMastMarkers <- function(gobject, ## subset data based on group_1 and group_2 cell_metadata[] <- cell_metadata[][ - get(cluster_column) %in% c(group_1, group_2)] + get(cluster_column) %in% c(group_1, group_2) + ] if (nrow(cell_metadata[]) == 0) { stop("there are no cells for group_1 or group_2, check cluster column") } @@ -839,7 +884,8 @@ findMastMarkers <- function(gobject, pairwise_select_comp <- NULL cell_metadata[][, pairwise_select_comp := ifelse( - get(cluster_column) %in% group_1, group_1_name, group_2_name)] + get(cluster_column) %in% group_1, group_1_name, group_2_name + )] if (nrow(cell_metadata[][pairwise_select_comp == group_1_name]) == 0) { stop("there are no cells for group_1, check cluster column") @@ -873,8 +919,11 @@ findMastMarkers <- function(gobject, # expression data values <- match.arg( expression_values, - choices = unique(c("normalized", "scaled", "custom", - expression_values))) + choices = unique(c( + "normalized", "scaled", "custom", + expression_values + )) + ) expr_data <- getExpression( gobject = gobject, feat_type = feat_type, @@ -914,7 +963,8 @@ findMastMarkers <- function(gobject, if (!is.null(adjust_columns)) { myformula <- stats::as.formula(paste0( "~ 1 + ", cluster_column, " + ", - paste(adjust_columns, collapse = " + "))) + paste(adjust_columns, collapse = " + ") + )) } else { myformula <- stats::as.formula(paste0("~ 1 + ", cluster_column)) } @@ -929,12 +979,15 @@ findMastMarkers <- function(gobject, sample <- paste0(cluster_column, group_1_name) summaryCond <- MAST::summary(zlmCond, doLRT = sample) summaryDt <- summaryCond$datatable - fcHurdle <- merge(summaryDt[ - contrast == sample & component == "H", - .(primerid, `Pr(>Chisq)`)], # hurdle P values + fcHurdle <- merge( + summaryDt[ + contrast == sample & component == "H", + .(primerid, `Pr(>Chisq)`) + ], # hurdle P values summaryDt[ contrast == sample & component == "logFC", - .(primerid, coef, ci.hi, ci.lo)], + .(primerid, coef, ci.hi, ci.lo) + ], by = "primerid" ) # logFC coefficients fcHurdle[, fdr := stats::p.adjust(`Pr(>Chisq)`, "fdr")] @@ -976,19 +1029,20 @@ findMastMarkers <- function(gobject, #' #' findMastMarkers_one_vs_all(gobject = g, cluster_column = "leiden_clus") #' @export -findMastMarkers_one_vs_all <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - subset_clusters = NULL, - adjust_columns = NULL, - pval = 0.001, - logFC = 1, - min_feats = 10, - min_genes = NULL, - verbose = TRUE, - ...) { +findMastMarkers_one_vs_all <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + subset_clusters = NULL, + adjust_columns = NULL, + pval = 0.001, + logFC = 1, + min_feats = 10, + min_genes = NULL, + verbose = TRUE, + ...) { ## deprecated arguments if (!is.null(min_genes)) { min_feats <- min_genes @@ -1011,11 +1065,12 @@ findMastMarkers_one_vs_all <- function(gobject, package_check(pkg_name = "MAST", repository = "Bioc") # print message with information # - if (verbose) + if (verbose) { message("using 'MAST' to detect marker feats. If used in published research, please cite: McDavid A, Finak G, Yajima M (2020). MAST: Model-based Analysis of Single Cell Transcriptomics. R package version 1.14.0, https://github.com/RGLab/MAST/.") + } ## cluster column @@ -1087,7 +1142,8 @@ findMastMarkers_one_vs_all <- function(gobject, result_dt[, ranking := seq_len(.N), by = "cluster"] filtered_result_dt <- result_dt[ - ranking <= min_feats | (fdr < pval & coef > logFC)] + ranking <= min_feats | (fdr < pval & coef > logFC) + ] return(filtered_result_dt) } @@ -1134,25 +1190,26 @@ findMastMarkers_one_vs_all <- function(gobject, #' #' findMarkers(g, cluster_column = "leiden_clus") #' @export -findMarkers <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column = NULL, - method = c("scran", "gini", "mast"), - subset_clusters = NULL, - group_1 = NULL, - group_2 = NULL, - min_expr_gini_score = 0.5, - min_det_gini_score = 0.5, - detection_threshold = 0, - rank_score = 1, - min_feats = 4, - min_genes = NULL, - group_1_name = NULL, - group_2_name = NULL, - adjust_columns = NULL, - ...) { +findMarkers <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column = NULL, + method = c("scran", "gini", "mast"), + subset_clusters = NULL, + group_1 = NULL, + group_2 = NULL, + min_expr_gini_score = 0.5, + min_det_gini_score = 0.5, + detection_threshold = 0, + rank_score = 1, + min_feats = 4, + min_genes = NULL, + group_1_name = NULL, + group_2_name = NULL, + adjust_columns = NULL, + ...) { ## deprecated arguments if (!is.null(min_genes)) { min_feats <- min_genes @@ -1256,27 +1313,28 @@ findMarkers <- function(gobject, #' #' findMarkers_one_vs_all(g, cluster_column = "leiden_clus") #' @export -findMarkers_one_vs_all <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - subset_clusters = NULL, - method = c("scran", "gini", "mast"), - # scran & mast - pval = 0.01, - logFC = 0.5, - min_feats = 10, - min_genes = NULL, - # gini - min_expr_gini_score = 0.5, - min_det_gini_score = 0.5, - detection_threshold = 0, - rank_score = 1, - # mast specific - adjust_columns = NULL, - verbose = TRUE, - ...) { +findMarkers_one_vs_all <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + subset_clusters = NULL, + method = c("scran", "gini", "mast"), + # scran & mast + pval = 0.01, + logFC = 0.5, + min_feats = 10, + min_genes = NULL, + # gini + min_expr_gini_score = 0.5, + min_det_gini_score = 0.5, + detection_threshold = 0, + rank_score = 1, + # mast specific + adjust_columns = NULL, + verbose = TRUE, + ...) { ## deprecated arguments if (!is.null(min_genes)) { min_feats <- min_genes diff --git a/R/dimension_reduction.R b/R/dimension_reduction.R index e384b7f1b..0e4bcbf70 100644 --- a/R/dimension_reduction.R +++ b/R/dimension_reduction.R @@ -19,13 +19,14 @@ #' @param seed_number seed number to use #' @keywords internal #' @returns list of eigenvalues, loadings and pca coordinates -.run_pca_factominer <- function(x, - ncp = 100, - scale = TRUE, - rev = FALSE, - set_seed = TRUE, - seed_number = 1234, - ...) { +.run_pca_factominer <- function( + x, + ncp = 100, + scale = TRUE, + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + ...) { # verify if optional package is installed package_check(pkg_name = "FactoMineR", repository = "CRAN") @@ -47,7 +48,8 @@ } pca_res <- FactoMineR::PCA( - X = x, ncp = ncp, scale.unit = scale, graph = FALSE, ...) + X = x, ncp = ncp, scale.unit = scale, graph = FALSE, ... + ) # exit seed if (isTRUE(set_seed)) { @@ -60,16 +62,19 @@ # PC loading loadings <- pca_res$ind$coord rownames(loadings) <- rownames(x) - colnames(loadings) <- paste0("Dim.", 1:ncol(loadings)) + colnames(loadings) <- paste0("Dim.", seq_len(ncol(loadings))) # coordinates - coords <- sweep(pca_res$var$coord, - 2, sqrt(eigenvalues[1:ncp]), FUN = "/") + coords <- sweep(pca_res$var$coord, + 2, sqrt(eigenvalues[seq_len(ncp)]), + FUN = "/" + ) rownames(coords) <- colnames(x) - colnames(coords) <- paste0("Dim.", 1:ncol(coords)) + colnames(coords) <- paste0("Dim.", seq_len(ncol(coords))) result <- list( - eigenvalues = eigenvalues, loadings = loadings, coords = coords) + eigenvalues = eigenvalues, loadings = loadings, coords = coords + ) } else { if (ncp > ncol(x)) { warning("ncp > ncol(x), will be set to ncol(x)") @@ -82,7 +87,8 @@ } pca_res <- FactoMineR::PCA( - X = x, ncp = ncp, scale.unit = scale, graph = FALSE, ...) + X = x, ncp = ncp, scale.unit = scale, graph = FALSE, ... + ) # exit seed if (isTRUE(set_seed)) { @@ -94,21 +100,26 @@ # PC loading loadings <- sweep( - pca_res$var$coord, 2, sqrt(eigenvalues[1:ncp]), FUN = "/") + pca_res$var$coord, 2, sqrt(eigenvalues[seq_len(ncp)]), + FUN = "/" + ) rownames(loadings) <- colnames(x) - colnames(loadings) <- paste0("Dim.", 1:ncol(loadings)) + colnames(loadings) <- paste0("Dim.", seq_len(ncol(loadings))) # coordinates coords <- pca_res$ind$coord rownames(coords) <- rownames(x) - colnames(coords) <- paste0("Dim.", 1:ncol(coords)) + colnames(coords) <- paste0("Dim.", seq_len(ncol(coords))) result <- list( - eigenvalues = eigenvalues, loadings = loadings, coords = coords) + eigenvalues = eigenvalues, loadings = loadings, coords = coords + ) } - vmsg(.is_debug = TRUE, - "finished .run_pca_factominer, method == factominer") + vmsg( + .is_debug = TRUE, + "finished .run_pca_factominer, method == factominer" + ) return(result) } @@ -128,22 +139,23 @@ #' @param BPPARAM BiocParallelParam object #' @keywords internal #' @returns list of eigenvalues, loadings and pca coordinates -.run_pca_biocsingular <- function(x, - ncp = 100, - center = TRUE, - scale = TRUE, - rev = FALSE, - set_seed = TRUE, - seed_number = 1234, - BSPARAM = c("irlba", "exact", "random"), - BPPARAM = BiocParallel::SerialParam(), - ...) { +.run_pca_biocsingular <- function( + x, + ncp = 100, + center = TRUE, + scale = TRUE, + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + BSPARAM = c("irlba", "exact", "random"), + BPPARAM = BiocParallel::SerialParam(), + ...) { BSPARAM <- match.arg(BSPARAM, choices = c("irlba", "exact", "random")) min_ncp <- min(dim(x)) if (ncp >= min_ncp) { - warning("ncp >= minimum dimension of x, will be set to + warning("ncp >= minimum dimension of x, will be set to minimum dimension of x - 1") ncp <- min_ncp - 1 } @@ -189,13 +201,14 @@ # PC loading loadings <- pca_res$x rownames(loadings) <- rownames(x) - colnames(loadings) <- paste0("Dim.", 1:ncol(loadings)) + colnames(loadings) <- paste0("Dim.", seq_len(ncol(loadings))) # coordinates coords <- pca_res$rotation rownames(coords) <- colnames(x) - colnames(coords) <- paste0("Dim.", 1:ncol(coords)) + colnames(coords) <- paste0("Dim.", seq_len(ncol(coords))) result <- list( - eigenvalues = eigenvalues, loadings = loadings, coords = coords) + eigenvalues = eigenvalues, loadings = loadings, coords = coords + ) } else { if (BSPARAM == "irlba") { pca_res <- BiocSingular::runPCA( @@ -228,13 +241,14 @@ # PC loading loadings <- pca_res$rotation rownames(loadings) <- colnames(x) - colnames(loadings) <- paste0("Dim.", 1:ncol(loadings)) + colnames(loadings) <- paste0("Dim.", seq_len(ncol(loadings))) # coordinates coords <- pca_res$x rownames(coords) <- rownames(x) - colnames(coords) <- paste0("Dim.", 1:ncol(coords)) + colnames(coords) <- paste0("Dim.", seq_len(ncol(coords))) result <- list( - eigenvalues = eigenvalues, loadings = loadings, coords = coords) + eigenvalues = eigenvalues, loadings = loadings, coords = coords + ) } # exit seed @@ -263,12 +277,13 @@ #' @param verbose verbosity #' @keywords internal #' @returns subsetted matrix based on selected features -.create_feats_to_use_matrix <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - sel_matrix, - feats_to_use, - verbose = FALSE) { +.create_feats_to_use_matrix <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + sel_matrix, + feats_to_use, + verbose = FALSE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -291,11 +306,12 @@ if (feats_to_use %in% colnames(feat_metadata)) { vmsg( .v = verbose, str_double_quote(feats_to_use), - "column was found in the feats metadata information and will be + "column was found in the feats metadata information and will be used to select highly variable features" ) feats_to_use <- feat_metadata[ - get(feats_to_use) == "yes"][["feat_ID"]] + get(feats_to_use) == "yes" + ][["feat_ID"]] sel_matrix <- sel_matrix[rownames(sel_matrix) %in% feats_to_use, ] } else { vmsg( @@ -305,13 +321,17 @@ ) } } else { - vmsg(.v = verbose, - "a custom vector of genes will be used to subset the matrix") + vmsg( + .v = verbose, + "a custom vector of genes will be used to subset the matrix" + ) sel_matrix <- sel_matrix[rownames(sel_matrix) %in% feats_to_use, ] } - vmsg(.v = verbose, .is_debug = TRUE, - "class of selected matrix: ", class(sel_matrix)) + vmsg( + .v = verbose, .is_debug = TRUE, + "class of selected matrix: ", class(sel_matrix) + ) return(sel_matrix) } @@ -349,7 +369,7 @@ #' \item feats_to_use = NULL: will use all features from the selected matrix #' \item feats_to_use = : can be used to select a column name of #' highly variable features, created by (see \code{\link{calculateHVF}}) -#' \item feats_to_use = c('geneA', 'geneB', ...): will use all manually +#' \item feats_to_use = c('geneA', 'geneB', ...): will use all manually #' provided features #' } #' By default the number of principle components that we calculate is 100, which @@ -357,27 +377,28 @@ #' will calculate all the principle components. #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' runPCA(g) #' @export -runPCA <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - name = NULL, - feats_to_use = "hvf", - return_gobject = TRUE, - center = TRUE, - scale_unit = TRUE, - ncp = 100, - method = c("irlba", "exact", "random", "factominer"), - method_params = BiocParallel::SerialParam(), - rev = FALSE, - set_seed = TRUE, - seed_number = 1234, - verbose = TRUE, - ...) { +runPCA <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + name = NULL, + feats_to_use = "hvf", + return_gobject = TRUE, + center = TRUE, + scale_unit = TRUE, + ncp = 100, + method = c("irlba", "exact", "random", "factominer"), + method_params = BiocParallel::SerialParam(), + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE, + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -400,8 +421,9 @@ runPCA <- function(gobject, # expression values to be used values <- match.arg( - expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + expression_values, + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, feat_type = feat_type, @@ -463,7 +485,7 @@ runPCA <- function(gobject, ... ) } else { - stop("only PCA methods from the BiocSingular and factominer + stop("only PCA methods from the BiocSingular and factominer package have been implemented") } } else { @@ -488,7 +510,7 @@ runPCA <- function(gobject, set_seed = set_seed, seed_number = seed_number, ... ) } else { - stop("only PCA methods from the irlba and factominer package have + stop("only PCA methods from the irlba and factominer package have been implemented") } } @@ -497,18 +519,6 @@ runPCA <- function(gobject, if (isTRUE(return_gobject)) { - pca_names <- list_dim_reductions_names( - gobject = gobject, - data_type = reduction, - spat_unit = spat_unit, - feat_type = feat_type, - dim_type = "pca" - ) - - if (name %in% pca_names) { - cat(name, " has already been used, will be overwritten") - } - if (reduction == "cells") { my_row_names <- colnames(expr_values) } else { @@ -531,7 +541,9 @@ runPCA <- function(gobject, ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_dimReduction(gobject = gobject, dimObject = dimObject) + gobject <- set_dimReduction( + gobject = gobject, dimObject = dimObject, verbose = verbose + ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### @@ -570,23 +582,24 @@ runPCA <- function(gobject, #' @param verbose verbosity level #' @keywords internal #' @returns list of eigenvalues, loadings and pca coordinates -.run_pca_biocsingular_irlba_projection <- function(x, - ncp = 100, - center = TRUE, - scale = TRUE, - rev = FALSE, - set_seed = TRUE, - seed_number = 1234, - BPPARAM = BiocParallel::SerialParam(), - random_subset = 500, - verbose = TRUE, - ...) { +.run_pca_biocsingular_irlba_projection <- function( + x, + ncp = 100, + center = TRUE, + scale = TRUE, + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + BPPARAM = BiocParallel::SerialParam(), + random_subset = 500, + verbose = TRUE, + ...) { x <- scale(x, center = center, scale = scale) min_ncp <- min(dim(x)) if (ncp >= min_ncp) { - warning("ncp >= minimum dimension of x, will be set to minimum + warning("ncp >= minimum dimension of x, will be set to minimum dimension of x - 1") ncp <- min_ncp - 1 } @@ -607,7 +620,7 @@ runPCA <- function(gobject, cell_ID_order <- rownames(x) # create random selection - random_selection <- sort(sample(1:nrow(x), random_subset)) + random_selection <- sort(sample(seq_len(nrow(x)), random_subset)) subsample_matrix <- x[random_selection, ] @@ -647,21 +660,22 @@ runPCA <- function(gobject, # PC loading loadings <- coords rownames(loadings) <- rownames(x) - colnames(loadings) <- paste0("Dim.", 1:ncol(loadings)) + colnames(loadings) <- paste0("Dim.", seq_len(ncol(loadings))) # coordinates coords <- pca_res$rotation rownames(coords) <- colnames(x) - colnames(coords) <- paste0("Dim.", 1:ncol(coords)) + colnames(coords) <- paste0("Dim.", seq_len(ncol(coords))) result <- list( - eigenvalues = eigenvalues, loadings = loadings, coords = coords) + eigenvalues = eigenvalues, loadings = loadings, coords = coords + ) } else { # store cell ID order information cell_ID_order <- rownames(x) # create random selection - random_selection <- sort(sample(1:nrow(x), random_subset)) + random_selection <- sort(sample(seq_len(nrow(x)), random_subset)) subsample_matrix <- x[random_selection, ] if (verbose) message("pca random subset: start") @@ -699,13 +713,14 @@ runPCA <- function(gobject, # PC loading loadings <- pca_res$rotation rownames(loadings) <- colnames(x) - colnames(loadings) <- paste0("Dim.", 1:ncol(loadings)) + colnames(loadings) <- paste0("Dim.", seq_len(ncol(loadings))) # coordinates - colnames(coords) <- paste0("Dim.", 1:ncol(coords)) + colnames(coords) <- paste0("Dim.", seq_len(ncol(coords))) result <- list( - eigenvalues = eigenvalues, loadings = loadings, coords = coords) + eigenvalues = eigenvalues, loadings = loadings, coords = coords + ) } return(result) @@ -721,7 +736,7 @@ runPCA <- function(gobject, #' @title runPCAprojection #' @name runPCAprojection -#' @description runs a Principal Component Analysis on a random +#' @description runs a Principal Component Analysis on a random #' subset + projection #' @param gobject giotto object #' @param spat_unit spatial unit @@ -743,7 +758,7 @@ runPCA <- function(gobject, #' @param verbose verbosity of the function #' @param ... additional parameters for PCA (see details) #' @returns giotto object with updated PCA dimension recuction -#' @details See \code{\link[BiocSingular]{runPCA}} and +#' @details See \code{\link[BiocSingular]{runPCA}} and #' \code{\link[FactoMineR]{PCA}} for more information about other parameters. #' This PCA implementation is similar to \code{\link{runPCA}}, except that it #' performs PCA on a subset of the cells or features, and predict on the others. @@ -752,33 +767,34 @@ runPCA <- function(gobject, #' \item feats_to_use = NULL: will use all features from the selected matrix #' \item feats_to_use = : can be used to select a column name of #' highly variable features, created by (see \code{\link{calculateHVF}}) -#' \item feats_to_use = c('geneA', 'geneB', ...): will use all manually +#' \item feats_to_use = c('geneA', 'geneB', ...): will use all manually #' provided features #' } #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' runPCAprojection(g) #' @export -runPCAprojection <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - random_subset = 500, - name = "pca.projection", - feats_to_use = "hvf", - return_gobject = TRUE, - center = TRUE, - scale_unit = TRUE, - ncp = 100, - method = c("irlba"), - method_params = BiocParallel::SerialParam(), - rev = FALSE, - set_seed = TRUE, - seed_number = 1234, - verbose = TRUE, - ...) { +runPCAprojection <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + random_subset = 500, + name = "pca.projection", + feats_to_use = "hvf", + return_gobject = TRUE, + center = TRUE, + scale_unit = TRUE, + ncp = 100, + method = c("irlba"), + method_params = BiocParallel::SerialParam(), + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE, + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -801,8 +817,9 @@ runPCAprojection <- function(gobject, # expression values to be used values <- match.arg( - expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + expression_values, + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, feat_type = feat_type, @@ -946,7 +963,7 @@ runPCAprojection <- function(gobject, #' @title runPCAprojectionBatch #' @name runPCAprojectionBatch -#' @description runs a Principal Component Analysis on multiple random +#' @description runs a Principal Component Analysis on multiple random #' batches + projection #' @param gobject giotto object #' @param spat_unit spatial unit @@ -969,46 +986,49 @@ runPCAprojection <- function(gobject, #' @param verbose verbosity of the function #' @param ... additional parameters for PCA (see details) #' @returns giotto object with updated PCA dimension reduction -#' @details See \code{\link[BiocSingular]{runPCA}} and +#' @details See \code{\link[BiocSingular]{runPCA}} and #' \code{\link[FactoMineR]{PCA}} for more information about other parameters. -#' This PCA implementation is similar to \code{\link{runPCA}} and +#' This PCA implementation is similar to \code{\link{runPCA}} and #' \code{\link{runPCAprojection}}, -#' except that it performs PCA on multiple subsets (batches) of the cells or +#' except that it performs PCA on multiple subsets (batches) of the cells or #' features, -#' and predict on the others. This can significantly increase speed without +#' and predict on the others. This can significantly increase speed without #' sacrificing accuracy too much. #' \itemize{ #' \item feats_to_use = NULL: will use all features from the selected matrix #' \item feats_to_use = : can be used to select a column name of #' highly variable features, created by (see \code{\link{calculateHVF}}) -#' \item feats_to_use = c('geneA', 'geneB', ...): will use all manually +#' \item feats_to_use = c('geneA', 'geneB', ...): will use all manually #' provided features #' } #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' -#' runPCAprojectionBatch(g) +#' +#' # set feats_to_use to NULL since there are not many hvfs +#' # (only 48 in this mini dataset) +#' runPCAprojectionBatch(g, feats_to_use = NULL) #' @export -runPCAprojectionBatch <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - random_subset = 500, - batch_number = 5, - name = "pca.projection.batch", - feats_to_use = "hvf", - return_gobject = TRUE, - center = TRUE, - scale_unit = TRUE, - ncp = 100, - method = c("irlba"), - method_params = BiocParallel::SerialParam(), - rev = FALSE, - set_seed = TRUE, - seed_number = 1234, - verbose = TRUE, - ...) { +runPCAprojectionBatch <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + random_subset = 500, + batch_number = 5, + name = "pca.projection.batch", + feats_to_use = "hvf", + return_gobject = TRUE, + center = TRUE, + scale_unit = TRUE, + ncp = 100, + method = c("irlba"), + method_params = BiocParallel::SerialParam(), + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE, + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1031,8 +1051,9 @@ runPCAprojectionBatch <- function(gobject, # expression values to be used values <- match.arg( - expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + expression_values, + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, feat_type = feat_type, @@ -1071,7 +1092,6 @@ runPCAprojectionBatch <- function(gobject, } - ## subset matrix if (!is.null(feats_to_use)) { expr_values <- .create_feats_to_use_matrix( @@ -1085,6 +1105,15 @@ runPCAprojectionBatch <- function(gobject, } + if (ncp >= nrow(expr_values)) { + ncp <- nrow(expr_values) - 1L + warning(wrap_txt( + "ncp >= number of available features + ncp will be set to minimum of n features - 1" + )) + } + + # do PCA dimension reduction reduction <- match.arg(reduction, c("cells", "feats")) @@ -1094,8 +1123,8 @@ runPCAprojectionBatch <- function(gobject, if (reduction == "cells") { pca_batch_results <- list() - for (batch in 1:batch_number) { - if (verbose) wrap_msg("start batch ", batch) + for (batch in seq_len(batch_number)) { + vmsg(.v = verbose, "start batch ", batch) if (isTRUE(set_seed)) { seed_batch <- seed_number + batch @@ -1119,15 +1148,16 @@ runPCAprojectionBatch <- function(gobject, ... ) - # adjust the sign of the coordinates and loadings vector relative + # adjust the sign of the coordinates and loadings vector relative # to first batch this is necessary for the next averaging step if (batch == 1) { pca_batch_results[[batch]] <- pca_object } else { - for (dimension in 1:ncol(pca_object[["coords"]])) { + for (dimension in seq_len(ncol(pca_object[["coords"]]))) { sum_evaluation <- sum(sign(pca_batch_results[[1]][[ - "coords"]][1:20, dimension]) * - sign(pca_object[["coords"]][1:20, dimension])) + "coords" + ]][seq_len(20), dimension]) * + sign(pca_object[["coords"]][seq_len(20), dimension])) if (sum_evaluation < 0) { pca_object$coords[, dimension] <- -1 * pca_object$coords[, dimension] pca_object$loadings[, dimension] <- -1 * pca_object$loadings[, dimension] @@ -1137,7 +1167,7 @@ runPCAprojectionBatch <- function(gobject, } } - if (verbose) message("start averaging pca results of batches") + vmsg(.v = verbose, "start averaging pca results of batches") # calculate average eigenvalues, coordinates and loadings # TODO: test out DT approach, might be faster and more efficient for @@ -1145,7 +1175,9 @@ runPCAprojectionBatch <- function(gobject, # eigenvalues eigenvalues_list <- lapply( - pca_batch_results, FUN = function(x) x$eigenvalues) + pca_batch_results, + FUN = function(x) x$eigenvalues + ) eigenvalues_matrix <- do.call("cbind", eigenvalues_list) eigenvalues_mean <- rowMeans_flex(eigenvalues_matrix) @@ -1153,9 +1185,10 @@ runPCAprojectionBatch <- function(gobject, coords_list <- lapply(pca_batch_results, FUN = function(x) x$coords) coords_vector <- do.call("c", coords_list) coords_array <- array( - data = coords_vector, - dim = c(ncol(expr_values), ncp, length(pca_batch_results))) - coords_all <- apply(coords_array, MARGIN = c(1:2), function(arr) { + data = coords_vector, + dim = c(ncol(expr_values), ncp, length(pca_batch_results)) + ) + coords_all <- apply(coords_array, MARGIN = seq_len(2), function(arr) { mean(arr, na.rm = TRUE) }) rownames(coords_all) <- rownames(pca_batch_results[[1]][["coords"]]) @@ -1165,22 +1198,27 @@ runPCAprojectionBatch <- function(gobject, loadings_list <- lapply(pca_batch_results, FUN = function(x) x$loadings) loadings_vector <- do.call("c", loadings_list) loadings_array <- array( - data = loadings_vector, - dim = c(nrow(expr_values), ncp, length(pca_batch_results))) - loadings_all <- apply(loadings_array, MARGIN = c(1:2), function(arr) { - mean(arr, na.rm = TRUE) - }) + data = loadings_vector, + dim = c(nrow(expr_values), ncp, length(pca_batch_results)) + ) + loadings_all <- apply( + loadings_array, + MARGIN = seq_len(2), function(arr) { + mean(arr, na.rm = TRUE) + } + ) rownames(loadings_all) <- rownames(pca_batch_results[[1]][["loadings"]]) colnames(loadings_all) <- colnames(pca_batch_results[[1]][["loadings"]]) pca_object <- list( - eigenvalues = eigenvalues_mean, - loadings = loadings_all, coords = coords_all) + eigenvalues = eigenvalues_mean, + loadings = loadings_all, coords = coords_all + ) } else { pca_batch_results <- list() - for (batch in 1:batch_number) { + for (batch in seq_len(batch_number)) { if (verbose) wrap_msg("start batch ", batch) if (isTRUE(set_seed)) { @@ -1206,15 +1244,16 @@ runPCAprojectionBatch <- function(gobject, ) - # adjust the sign of the coordinates and loadings vector relative + # adjust the sign of the coordinates and loadings vector relative # to first batch this is necessary for the next averaging step if (batch == 1) { pca_batch_results[[batch]] <- pca_object } else { - for (dimension in 1:ncol(pca_object[["coords"]])) { + for (dimension in seq_len(ncol(pca_object[["coords"]]))) { sum_evaluation <- sum(sign(pca_batch_results[[1]][[ - "coords"]][1:20, dimension]) * - sign(pca_object[["coords"]][1:20, dimension])) + "coords" + ]][seq_len(20), dimension]) * + sign(pca_object[["coords"]][seq_len(20), dimension])) if (sum_evaluation < 0) { pca_object$coords[, dimension] <- -1 * pca_object$coords[, dimension] pca_object$loadings[, dimension] <- -1 * pca_object$loadings[, dimension] @@ -1224,7 +1263,7 @@ runPCAprojectionBatch <- function(gobject, } } - if (verbose) wrap_msg("start averaging pca results of batches") + vmsg(.v = verbose, "start averaging pca results of batches") # calculate average eigenvalues, coordinates and loadings # TODO: test out DT approach, might be faster and more efficient for @@ -1232,7 +1271,9 @@ runPCAprojectionBatch <- function(gobject, # eigenvalues eigenvalues_list <- lapply( - pca_batch_results, FUN = function(x) x$eigenvalues) + pca_batch_results, + FUN = function(x) x$eigenvalues + ) eigenvalues_matrix <- do.call("cbind", eigenvalues_list) eigenvalues_mean <- rowMeans_flex(eigenvalues_matrix) @@ -1240,9 +1281,10 @@ runPCAprojectionBatch <- function(gobject, coords_list <- lapply(pca_batch_results, FUN = function(x) x$coords) coords_vector <- do.call("c", coords_list) coords_array <- array( - data = coords_vector, - dim = c(ncol(expr_values), ncp, length(pca_batch_results))) - coords_all <- apply(coords_array, MARGIN = c(1:2), function(arr) { + data = coords_vector, + dim = c(ncol(expr_values), ncp, length(pca_batch_results)) + ) + coords_all <- apply(coords_array, MARGIN = seq_len(2), function(arr) { mean(arr, na.rm = TRUE) }) rownames(coords_all) <- rownames(pca_batch_results[[1]][["coords"]]) @@ -1252,18 +1294,23 @@ runPCAprojectionBatch <- function(gobject, loadings_list <- lapply(pca_batch_results, FUN = function(x) x$loadings) loadings_vector <- do.call("c", loadings_list) loadings_array <- array( - data = loadings_vector, - dim = c(nrow(expr_values), ncp, length(pca_batch_results))) - loadings_all <- apply(loadings_array, MARGIN = c(1:2), function(arr) { - mean(arr, na.rm = TRUE) - }) + data = loadings_vector, + dim = c(nrow(expr_values), ncp, length(pca_batch_results)) + ) + loadings_all <- apply( + loadings_array, + MARGIN = seq_len(2), function(arr) { + mean(arr, na.rm = TRUE) + } + ) rownames(loadings_all) <- rownames(pca_batch_results[[1]][["loadings"]]) colnames(loadings_all) <- colnames(pca_batch_results[[1]][["loadings"]]) pca_object <- list( - eigenvalues = eigenvalues_mean, - loadings = loadings_all, coords = coords_all) + eigenvalues = eigenvalues_mean, + loadings = loadings_all, coords = coords_all + ) } @@ -1324,7 +1371,7 @@ runPCAprojectionBatch <- function(gobject, #' @title screePlot #' @name screePlot -#' @description identify significant principal components (PCs) using an +#' @description identify significant principal components (PCs) using an #' screeplot (a.k.a. elbowplot) #' @inheritParams data_access_params #' @inheritParams plot_output_params @@ -1342,35 +1389,36 @@ runPCAprojectionBatch <- function(gobject, #' @returns ggplot object for scree method #' @details #' Screeplot works by plotting the explained variance of each -#' individual PC in a barplot allowing you to identify which PC provides a +#' individual PC in a barplot allowing you to identify which PC provides a #' significant contribution (a.k.a 'elbow method'). \cr -#' Screeplot will use an available pca object, based on the parameter 'name', +#' Screeplot will use an available pca object, based on the parameter 'name', #' or it will create it if it's not available (see \code{\link{runPCA}}) #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' screePlot(g) #' @export -screePlot <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - name = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - method = c("irlba", "exact", "random", "factominer"), - rev = FALSE, - feats_to_use = NULL, - center = FALSE, - scale_unit = FALSE, - ncp = 100, - ylim = c(0, 20), - verbose = TRUE, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "screePlot", - ...) { +screePlot <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + name = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + method = c("irlba", "exact", "random", "factominer"), + rev = FALSE, + feats_to_use = NULL, + center = FALSE, + scale_unit = FALSE, + ncp = 100, + ylim = c(0, 20), + verbose = TRUE, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "screePlot", + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1405,22 +1453,30 @@ screePlot <- function(gobject, # if pca already exists plot if (!is.null(pca_obj)) { - if (isTRUE(verbose)) - wrap_msg("PCA with name: ", name, - " already exists and will be used for the screeplot") + if (isTRUE(verbose)) { + wrap_msg( + "PCA with name: ", name, + " already exists and will be used for the screeplot" + ) + } screeplot <- create_screeplot( - eigs = slot(pca_obj, "misc")$eigenvalues, ncp = ncp, ylim = ylim) + eigs = slot(pca_obj, "misc")$eigenvalues, ncp = ncp, ylim = ylim + ) } else { # if pca doesn't exists, then create pca and then plot - if (isTRUE(verbose)) - wrap_msg("PCA with name: ", name, - " does NOT exist, PCA will be done first") + if (isTRUE(verbose)) { + wrap_msg( + "PCA with name: ", name, + " does NOT exist, PCA will be done first" + ) + } # expression values to be used values <- match.arg( - expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + expression_values, + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1464,10 +1520,11 @@ screePlot <- function(gobject, ) } else if (method == "factominer") { pca_object <- .run_pca_factominer( - x = t_flex(expr_values), - scale = scale_unit, ncp = ncp, rev = rev, ...) + x = t_flex(expr_values), + scale = scale_unit, ncp = ncp, rev = rev, ... + ) } else { - stop("only PCA methods from the irlba and factominer package + stop("only PCA methods from the irlba and factominer package have been implemented") } @@ -1487,8 +1544,9 @@ screePlot <- function(gobject, ) screeplot <- create_screeplot( - eigs = slot(dimObject, "misc")$eigenvalues, - ncp = ncp, ylim = ylim) + eigs = slot(dimObject, "misc")$eigenvalues, + ncp = ncp, ylim = ylim + ) } } @@ -1548,20 +1606,24 @@ create_screeplot <- function(eigs, ncp = 20, ylim = c(0, 20)) { pl <- ggplot2::ggplot() pl <- pl + ggplot2::theme_bw() pl <- pl + ggplot2::geom_bar( - data = screeDT[1:ncp], - ggplot2::aes(x = PC, y = var_expl), stat = "identity") + data = screeDT[seq_len(ncp)], + ggplot2::aes(x = PC, y = var_expl), stat = "identity" + ) pl <- pl + ggplot2::coord_cartesian(ylim = ylim) pl <- pl + ggplot2::theme( - axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, vjust = 1)) + axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, vjust = 1) + ) pl <- pl + ggplot2::labs(x = "", y = "% of variance explained per PC") cpl <- ggplot2::ggplot() cpl <- cpl + ggplot2::theme_bw() cpl <- cpl + ggplot2::geom_bar( - data = screeDT[1:ncp], - ggplot2::aes(x = PC, y = var_expl_cum), stat = "identity") + data = screeDT[seq_len(ncp)], + ggplot2::aes(x = PC, y = var_expl_cum), stat = "identity" + ) cpl <- cpl + ggplot2::theme(axis.text.x = ggplot2::element_text( - angle = 45, hjust = 1, vjust = 1)) + angle = 45, hjust = 1, vjust = 1 + )) cpl <- cpl + ggplot2::labs(x = "", y = "cumulative % of variance explained") savelist <- list(pl, cpl) @@ -1602,32 +1664,33 @@ create_screeplot <- function(eigs, ncp = 20, ylim = c(0, 20)) { #' @param verbose show progress of jackstraw method #' @returns ggplot object for jackstraw method #' @details -#' The Jackstraw method uses the \code{\link[jackstraw]{permutationPA}} -#' function. By systematically permuting genes it identifies robust, and thus +#' The Jackstraw method uses the \code{\link[jackstraw]{permutationPA}} +#' function. By systematically permuting genes it identifies robust, and thus #' significant, PCs. #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' jackstrawPlot(gobject = g) #' @export -jackstrawPlot <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - feats_to_use = NULL, - center = FALSE, - scale_unit = FALSE, - ncp = 20, - ylim = c(0, 1), - iter = 10, - threshold = 0.01, - verbose = TRUE, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "jackstrawPlot") { +jackstrawPlot <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + feats_to_use = NULL, + center = FALSE, + scale_unit = FALSE, + ncp = 20, + ylim = c(0, 1), + iter = 10, + threshold = 0.01, + verbose = TRUE, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "jackstrawPlot") { package_check(pkg_name = "jackstraw", repository = "CRAN") # Set feat_type and spat_unit @@ -1642,20 +1705,22 @@ jackstrawPlot <- function(gobject, ) # print message with information # - if (verbose) - message("using 'jackstraw' to identify significant PCs If used in - published research, please cite: + if (verbose) { + message("using 'jackstraw' to identify significant PCs If used in + published research, please cite: Neo Christopher Chung and John D. Storey (2014). - 'Statistical significance of variables driving systematic variation in + 'Statistical significance of variables driving systematic variation in high-dimensional data. Bioinformatics") + } # select direction of reduction reduction <- match.arg(reduction, c("cells", "feats")) # expression values to be used values <- match.arg( - expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + expression_values, + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1681,22 +1746,29 @@ jackstrawPlot <- function(gobject, if (reduction == "cells") { if (scale_unit == TRUE | center == TRUE) { expr_values <- t_flex(scale( - t_flex(expr_values), center = center, scale = scale_unit)) + t_flex(expr_values), + center = center, scale = scale_unit + )) } jtest <- jackstraw::permutationPA( - dat = as.matrix(expr_values), - B = iter, threshold = threshold, verbose = verbose) + dat = as.matrix(expr_values), + B = iter, threshold = threshold, verbose = verbose + ) ## results ## nr_sign_components <- jtest$r - if (verbose) - cat("number of estimated significant components: ", - nr_sign_components) + if (verbose) { + cat( + "number of estimated significant components: ", + nr_sign_components + ) + } final_results <- jtest$p jackplot <- create_jackstrawplot( - jackstraw_data = final_results, - ncp = ncp, ylim = ylim, threshold = threshold) + jackstraw_data = final_results, + ncp = ncp, ylim = ylim, threshold = threshold + ) } return(plot_output_handler( @@ -1723,10 +1795,11 @@ jackstrawPlot <- function(gobject, #' @keywords internal #' @returns ggplot #' @export -create_jackstrawplot <- function(jackstraw_data, - ncp = 20, - ylim = c(0, 1), - threshold = 0.01) { +create_jackstrawplot <- function( + jackstraw_data, + ncp = 20, + ylim = c(0, 1), + threshold = 0.01) { checkmate::assert_numeric(ncp, len = 1L) checkmate::assert_numeric(ylim, len = 2L) checkmate::assert_numeric(threshold, len = 1L) @@ -1744,12 +1817,15 @@ create_jackstrawplot <- function(jackstraw_data, pl <- ggplot2::ggplot() pl <- pl + ggplot2::theme_bw() pl <- pl + ggplot2::geom_point( - data = testDT[1:ncp], - ggplot2::aes(x = PC, y = p.val, fill = sign), shape = 21) + data = testDT[seq_len(ncp)], + ggplot2::aes(x = PC, y = p.val, fill = sign), shape = 21 + ) pl <- pl + ggplot2::scale_fill_manual( - values = c("n.s." = "lightgrey", "sign" = "darkorange")) + values = c("n.s." = "lightgrey", "sign" = "darkorange") + ) pl <- pl + ggplot2::theme( - axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, vjust = 1)) + axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, vjust = 1) + ) pl <- pl + ggplot2::coord_cartesian(ylim = ylim) pl <- pl + ggplot2::theme(panel.grid.major.x = ggplot2::element_blank()) pl <- pl + ggplot2::labs(x = "", y = "p-value per PC") @@ -1785,44 +1861,45 @@ create_jackstrawplot <- function(jackstraw_data, #' @param jack_ylim y-axis limits on jackstraw plot #' @param verbose be verbose #' @returns ggplot object for scree method and maxtrix of p-values for jackstraw -#' @details Two different methods can be used to assess the number of relevant +#' @details Two different methods can be used to assess the number of relevant #' or significant prinicipal components (PC's). \cr #' 1. Screeplot works by plotting the explained variance of each -#' individual PC in a barplot allowing you to identify which PC provides a +#' individual PC in a barplot allowing you to identify which PC provides a #' significant #' contribution (a.k.a. 'elbow method'). \cr -#' 2. The Jackstraw method uses the \code{\link[jackstraw]{permutationPA}} -#' function. By systematically permuting genes it identifies robust, and thus +#' 2. The Jackstraw method uses the \code{\link[jackstraw]{permutationPA}} +#' function. By systematically permuting genes it identifies robust, and thus #' significant, PCs. #' \cr #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' signPCA(g) #' @export -signPCA <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - name = NULL, - method = c("screeplot", "jackstraw"), - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - pca_method = c("irlba", "factominer"), - rev = FALSE, - feats_to_use = NULL, - center = TRUE, - scale_unit = TRUE, - ncp = 50, - scree_ylim = c(0, 10), - jack_iter = 10, - jack_threshold = 0.01, - jack_ylim = c(0, 1), - verbose = TRUE, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "signPCA") { +signPCA <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + name = NULL, + method = c("screeplot", "jackstraw"), + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + pca_method = c("irlba", "factominer"), + rev = FALSE, + feats_to_use = NULL, + center = TRUE, + scale_unit = TRUE, + ncp = 50, + scree_ylim = c(0, 10), + jack_iter = 10, + jack_threshold = 0.01, + jack_ylim = c(0, 1), + verbose = TRUE, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "signPCA") { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1942,7 +2019,7 @@ signPCA <- function(gobject, show_plot = show_plot, default_save_name = default_save_name, save_param = save_param, - else_return = jackplot + else_return = jackplot # TODO potentially return all results instead )) } @@ -1985,7 +2062,7 @@ signPCA <- function(gobject, #' @param seed_number seed number to use #' @param verbose verbosity of function #' @param toplevel_params parameters to extract -#' @inheritDotParams uwot::umap -X -n_neighbors -n_components -n_epochs +#' @inheritDotParams uwot::umap -X -n_neighbors -n_components -n_epochs #' -min_dist -n_threads -spread -seed -scale -pca -pca_center -pca_method #' @returns giotto object with updated UMAP dimension reduction #' @details See \code{\link[uwot]{umap}} for more information about these and @@ -2002,31 +2079,32 @@ signPCA <- function(gobject, #' } #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' runUMAP(g) #' @export -runUMAP <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - dim_reduction_to_use = "pca", - dim_reduction_name = NULL, - dimensions_to_use = 1:10, - name = NULL, - feats_to_use = NULL, - return_gobject = TRUE, - n_neighbors = 40, - n_components = 2, - n_epochs = 400, - min_dist = 0.01, - n_threads = NA, - spread = 5, - set_seed = TRUE, - seed_number = 1234L, - verbose = TRUE, - toplevel_params = 2L, - ...) { +runUMAP <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + dim_reduction_to_use = "pca", + dim_reduction_name = NULL, + dimensions_to_use = 1:10, + name = NULL, + feats_to_use = NULL, + return_gobject = TRUE, + n_neighbors = 40, + n_components = 2, + n_epochs = 400, + min_dist = 0.01, + n_threads = NA, + spread = 5, + set_seed = TRUE, + seed_number = 1234L, + verbose = TRUE, + toplevel_params = 2L, + ...) { # NSE vars cell_ID <- NULL @@ -2097,7 +2175,8 @@ runUMAP <- function(gobject, "Ignoring dimensions_to_use that are outside the range." )) dimensions_to_use <- dimensions_to_use[ - dimensions_to_use %in% seq(ncol(matrix_to_use))] + dimensions_to_use %in% seq(ncol(matrix_to_use)) + ] } matrix_to_use <- matrix_to_use[, dimensions_to_use] @@ -2105,8 +2184,9 @@ runUMAP <- function(gobject, ## using original matrix ## # expression values to be used values <- match.arg( - expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + expression_values, + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, @@ -2198,8 +2278,10 @@ runUMAP <- function(gobject, ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_dimReduction(gobject = gobject, - dimObject = dimObject) + gobject <- set_dimReduction( + gobject = gobject, + dimObject = dimObject + ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### @@ -2252,7 +2334,7 @@ runUMAP <- function(gobject, #' @param toplevel_params parameters to extract #' @param ... additional UMAP parameters #' @returns giotto object with updated UMAP dimension reduction -#' @details See \code{\link[uwot]{umap}} for more information about these and +#' @details See \code{\link[uwot]{umap}} for more information about these and #' other parameters. #' \itemize{ #' \item Input for UMAP dimension reduction can be another dimension reduction (default = 'pca') @@ -2263,32 +2345,33 @@ runUMAP <- function(gobject, #' } #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' runUMAPprojection(g) #' @export -runUMAPprojection <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - dim_reduction_to_use = "pca", - dim_reduction_name = NULL, - dimensions_to_use = 1:10, - random_subset = 500, - name = NULL, - feats_to_use = NULL, - return_gobject = TRUE, - n_neighbors = 40, - n_components = 2, - n_epochs = 400, - min_dist = 0.01, - n_threads = NA, - spread = 5, - set_seed = TRUE, - seed_number = 1234, - verbose = TRUE, - toplevel_params = 2, - ...) { +runUMAPprojection <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + dim_reduction_to_use = "pca", + dim_reduction_name = NULL, + dimensions_to_use = 1:10, + random_subset = 500, + name = NULL, + feats_to_use = NULL, + return_gobject = TRUE, + n_neighbors = 40, + n_components = 2, + n_epochs = 400, + min_dist = 0.01, + n_threads = NA, + spread = 5, + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE, + toplevel_params = 2, + ...) { # NSE vars cell_ID <- NULL @@ -2397,7 +2480,9 @@ runUMAPprojection <- function(gobject, cell_ID_order <- rownames(matrix_to_use) # create random selection - random_selection <- sort(sample(1:nrow(matrix_to_use), random_subset)) + random_selection <- sort(sample( + seq_len(nrow(matrix_to_use)), random_subset + )) subsample_matrix <- matrix_to_use[random_selection, ] uwot_clus_subset <- uwot::umap( @@ -2423,7 +2508,8 @@ runUMAPprojection <- function(gobject, # combine subset and prediction coords_umap <- rbind(uwot_clus_subset$embedding, uwot_clus_pred) coords_umap <- coords_umap[ - match(cell_ID_order, rownames(coords_umap)), ] + match(cell_ID_order, rownames(coords_umap)), + ] coords_umap_DT <- data.table::as.data.table(coords_umap) coords_umap_DT[, cell_ID := rownames(coords_umap)] @@ -2514,7 +2600,7 @@ runUMAPprojection <- function(gobject, #' @param verbose verbosity of the function #' @param ... additional tSNE parameters #' @returns giotto object with updated tSNE dimension recuction -#' @details See \code{\link[Rtsne]{Rtsne}} for more information about these and +#' @details See \code{\link[Rtsne]{Rtsne}} for more information about these and #' other parameters. \cr #' \itemize{ #' \item Input for tSNE dimension reduction can be another dimension reduction (default = 'pca') @@ -2525,28 +2611,29 @@ runUMAPprojection <- function(gobject, #' } #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' runtSNE(g) #' @export -runtSNE <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - dim_reduction_to_use = "pca", - dim_reduction_name = NULL, - dimensions_to_use = 1:10, - name = NULL, - feats_to_use = NULL, - return_gobject = TRUE, - dims = 2, - perplexity = 30, - theta = 0.5, - do_PCA_first = FALSE, - set_seed = TRUE, - seed_number = 1234, - verbose = TRUE, - ...) { +runtSNE <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + dim_reduction_to_use = "pca", + dim_reduction_name = NULL, + dimensions_to_use = 1:10, + name = NULL, + feats_to_use = NULL, + return_gobject = TRUE, + dims = 2, + perplexity = 30, + theta = 0.5, + do_PCA_first = FALSE, + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE, + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2609,8 +2696,9 @@ runtSNE <- function(gobject, ## using original matrix ## # expression values to be used values <- match.arg( - expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + expression_values, + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -2690,8 +2778,10 @@ runtSNE <- function(gobject, ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_dimReduction(gobject = gobject, - dimObject = dimObject) + gobject <- set_dimReduction( + gobject = gobject, + dimObject = dimObject + ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## update parameters used ## @@ -2737,37 +2827,38 @@ runtSNE <- function(gobject, #' @param verbose be verbose #' @param ... additional \code{\link[harmony]{HarmonyMatrix}} parameters #' @returns giotto object with updated Harmony dimension reduction -#' @details This is a simple wrapper for the HarmonyMatrix function in the +#' @details This is a simple wrapper for the HarmonyMatrix function in the #' Harmony package \doi{10.1038/s41592-019-0619-0}. #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' runGiottoHarmony(g, vars_use = "leiden_clus") #' @export -runGiottoHarmony <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - vars_use = "list_ID", - do_pca = FALSE, - expression_values = c("normalized", "scaled", "custom"), - reduction = "cells", - dim_reduction_to_use = "pca", - dim_reduction_name = NULL, - dimensions_to_use = 1:10, - name = NULL, - feats_to_use = NULL, - set_seed = TRUE, - seed_number = 1234, - toplevel_params = 2, - return_gobject = TRUE, - verbose = NULL, - ...) { +runGiottoHarmony <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + vars_use = "list_ID", + do_pca = FALSE, + expression_values = c("normalized", "scaled", "custom"), + reduction = "cells", + dim_reduction_to_use = "pca", + dim_reduction_name = NULL, + dimensions_to_use = 1:10, + name = NULL, + feats_to_use = NULL, + set_seed = TRUE, + seed_number = 1234, + toplevel_params = 2, + return_gobject = TRUE, + verbose = NULL, + ...) { # verify if optional package is installed package_check(pkg_name = "harmony", repository = "CRAN") # print message with information # - message("using 'Harmony' to integrate different datasets. If used in + message("using 'Harmony' to integrate different datasets. If used in published research, please cite:") wrap_msg("Korsunsky, I., Millard, N., Fan, J. et al. @@ -2841,8 +2932,9 @@ runGiottoHarmony <- function(gobject, ## using original matrix ## # expression values to be used values <- match.arg( - expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + expression_values, + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -2889,7 +2981,7 @@ runGiottoHarmony <- function(gobject, ) - colnames(harmony_results) <- paste0("Dim.", 1:ncol(harmony_results)) + colnames(harmony_results) <- paste0("Dim.", seq_len(ncol(harmony_results))) rownames(harmony_results) <- rownames(matrix_to_use) harmdimObject <- create_dim_obj( @@ -2905,7 +2997,6 @@ runGiottoHarmony <- function(gobject, # return giotto object or harmony results if (isTRUE(return_gobject)) { - harmony_names <- list_dim_reductions_names( gobject = gobject, data_type = reduction, @@ -2915,13 +3006,17 @@ runGiottoHarmony <- function(gobject, ) if (name %in% harmony_names) { - cat(name, - " has already been used with harmony, will be overwritten") + cat( + name, + " has already been used with harmony, will be overwritten" + ) } ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_dimReduction(gobject = gobject, - dimObject = harmdimObject) + gobject <- set_dimReduction( + gobject = gobject, + dimObject = harmdimObject + ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### diff --git a/R/feature_set_enrichment.R b/R/feature_set_enrichment.R index c30cb848a..c5bf382ea 100644 --- a/R/feature_set_enrichment.R +++ b/R/feature_set_enrichment.R @@ -4,28 +4,28 @@ #' @param dryrun do a dry run, default TRUE. #' @param path_to_GSEA path to GSEA command line executable, e.g. gsea-XXX.jar. #' See details (1.) for more information. -#' @param GSEA_dataset path to a Human/Mouse collection from GSEA, e.g. +#' @param GSEA_dataset path to a Human/Mouse collection from GSEA, e.g. #' Hallmarks C1. See details (2.) for more information. -#' @param GSEA_ranked_file path to .rnk file for GSEA. See details (3.) for +#' @param GSEA_ranked_file path to .rnk file for GSEA. See details (3.) for #' more information -#' @param output_folder path to which the GSEA results will be saved. Default +#' @param output_folder path to which the GSEA results will be saved. Default #' is current working directory. -#' @param name_analysis_folder default output subdirectory prefix to which +#' @param name_analysis_folder default output subdirectory prefix to which #' results are saved. -#' Will live within output_folder; equivalent of +#' Will live within output_folder; equivalent of #' "Analysis Name" in GSEA Application. -#' @param collapse only 'false' is supported. This will use your dataset as-is, +#' @param collapse only 'false' is supported. This will use your dataset as-is, #' in the original format. -#' @param mode option selected in Advanced Field "Collapsing Mode for +#' @param mode option selected in Advanced Field "Collapsing Mode for #' Probe Sets => 1 gene" #' @param norm normalization mode; only meandiv is supported. #' @param nperm number of permutations, default 1000 -#' @param scoring_scheme Default "weighted", equivalent of +#' @param scoring_scheme Default "weighted", equivalent of #' "enrichment statistic" in GSEA Application #' @param plot_top_x Default 20, number of enrichment plots to produce. -#' @param set_max default 500, equivalent to "max size; exclude larger sets" +#' @param set_max default 500, equivalent to "max size; exclude larger sets" #' in Basic Fields in GSEA Application -#' @param set_min default 15, equivalent to "min size; exclude smaller sets" +#' @param set_min default 15, equivalent to "min size; exclude smaller sets" #' in Basic Fields in GSEA Application #' @returns data.table #' @details @@ -33,11 +33,11 @@ #' 1. download and install the COMMAND line (all platforms) gsea-XXX.jar #' https://www.gsea-msigdb.org/gsea/downloads.jsp #' 1.1. download zip file -#' 1.2. unzip and move to known location +#' 1.2. unzip and move to known location #' (e.g. in path/to/your/applications/gsea/GSEA_4.3.2) #' #' 2. download the Human and Mouse collections -#' https://www.gsea-msigdb.org/gsea/msigdb/index.jsp or zipped folder +#' https://www.gsea-msigdb.org/gsea/msigdb/index.jsp or zipped folder #' https://www.gsea-msigdb.org/gsea/downloads.jsp (all downloaded) #' #' 3. create ranked gene lists @@ -50,37 +50,40 @@ #' please reference GSEA's documentation here: #' https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Syntax #' @export -doFeatureSetEnrichment <- function(dryrun = TRUE, - path_to_GSEA = NULL, - GSEA_dataset = NULL, - GSEA_ranked_file = NULL, - output_folder = NULL, - name_analysis_folder = "my_GSEA_analysis", - collapse = "false", - mode = c( - "Abs_max_of_probes", - "Max_probe", - "Median_of_probes", - "Mean_of_probes", - "Sum_of_probes" - ), - norm = "meandiv", - nperm = 1000, - scoring_scheme = "weighted", - plot_top_x = 20, - set_max = 500, - set_min = 15) { +doFeatureSetEnrichment <- function( + dryrun = TRUE, + path_to_GSEA = NULL, + GSEA_dataset = NULL, + GSEA_ranked_file = NULL, + output_folder = NULL, + name_analysis_folder = "my_GSEA_analysis", + collapse = "false", + mode = c( + "Abs_max_of_probes", + "Max_probe", + "Median_of_probes", + "Mean_of_probes", + "Sum_of_probes" + ), + norm = "meandiv", + nperm = 1000, + scoring_scheme = "weighted", + plot_top_x = 20, + set_max = 500, + set_min = 15) { # set don't run to false as a start dont_run <- FALSE # SYSTEM CHECK FOR JAVA java_not_installed <- as.logical(system("java -version")) - # returns 0 if java is installed (i.e., command runs successfully), + # returns 0 if java is installed (i.e., command runs successfully), # 1 otherwise - if (java_not_installed) - stop(wrap_txt("Java must be installed for doFeatureSetEnrichment() to + if (java_not_installed) { + stop(wrap_txt("Java must be installed for doFeatureSetEnrichment() to run. Please install Java: https://www.java.com/en/download/", - errWidth = TRUE)) + errWidth = TRUE + )) + } mode <- match.arg(mode, choices = c( @@ -91,26 +94,33 @@ doFeatureSetEnrichment <- function(dryrun = TRUE, "Sum_of_probes" )) - if (is.null(output_folder)) output_folder <- paste0( - getwd(), "/Feature_set_enrichment_results/") + if (is.null(output_folder)) { + output_folder <- paste0( + getwd(), "/Feature_set_enrichment_results/" + ) + } if (!dir.exists(output_folder)) { - wrap_msg(paste0("Directory does not yet exist. Creating directory at:", - output_folder)) + wrap_msg(paste0( + "Directory does not yet exist. Creating directory at:", + output_folder + )) dir.create(output_folder) } # check for path to GSEA tool - if (is.null(path_to_GSEA)) + if (is.null(path_to_GSEA)) { stop("Path to the GSEA directory needs to be provided") - if (!file.exists(path_to_GSEA)) + } + if (!file.exists(path_to_GSEA)) { stop("Path to the GSEA directory does not exist") + } path_to_GSEA <- paste0('"', path_to_GSEA, '"') # check for path to GSEA dataset .gmt if (is.null(GSEA_dataset)) { - warning("Path to a GSEA dataset needs to be provided, only dryrun will + warning("Path to a GSEA dataset needs to be provided, only dryrun will work for testing") dont_run <- TRUE GSEA_dataset <- "test.gmt" @@ -120,14 +130,15 @@ doFeatureSetEnrichment <- function(dryrun = TRUE, # check for GSRA ranked file (path or data.frame) if (is.null(GSEA_ranked_file)) { - warning("A ranked gene file needs to be provided, only dryrun will work + warning("A ranked gene file needs to be provided, only dryrun will work for testing") dont_run <- TRUE GSEA_ranked_file <- "my_ranked_file.rnk" } else if (inherits(GSEA_ranked_file, "character")) { message("The ranked list looks like a path to a file") - if (!file.exists(GSEA_ranked_file)) + if (!file.exists(GSEA_ranked_file)) { stop("Path to the ranked file does not exist") + } } else if (inherits(GSEA_ranked_file, "data.frame")) { message("The ranked list looks like a data.frame") @@ -155,10 +166,12 @@ doFeatureSetEnrichment <- function(dryrun = TRUE, if (my_os == "windows") { execution_path <- paste0( - path_to_GSEA, "/", "gsea-cli.bat", " ", operation) + path_to_GSEA, "/", "gsea-cli.bat", " ", operation + ) } else { execution_path <- paste0( - path_to_GSEA, "/", "gsea-cli.sh", " ", operation) + path_to_GSEA, "/", "gsea-cli.sh", " ", operation + ) } created_command <- sprintf( diff --git a/R/general_help.R b/R/general_help.R index 279a963e4..67e6e7c13 100644 --- a/R/general_help.R +++ b/R/general_help.R @@ -7,8 +7,9 @@ #' @description calculate gini coefficient #' @keywords internal #' @returns gini coefficient -mygini_fun <- function(x, - weights = rep(1, length(x))) { +mygini_fun <- function( + x, + weights = rep(1, length(x))) { # adapted from R package GiniWegNeg dataset <- cbind(x, weights) ord_x <- order(x) @@ -36,9 +37,10 @@ mygini_fun <- function(x, #' @description calculate gini coefficient on a minimum length vector #' @keywords internal #' @returns gini coefficient -extended_gini_fun <- function(x, - weights = rep(1, length = length(x)), - minimum_length = 16) { +extended_gini_fun <- function( + x, + weights = rep(1, length = length(x)), + minimum_length = 16) { if (length(x) < minimum_length) { difference <- minimum_length - length(x) min_value <- min(x) @@ -57,16 +59,19 @@ extended_gini_fun <- function(x, #' @description create binarized scores from a vector using kmeans #' @returns numeric #' @keywords internal -.kmeans_binarize <- function(x, - nstart = 3, - iter.max = 10, - seed = NULL) { +.kmeans_binarize <- function( + x, + nstart = 3, + iter.max = 10, + seed = NULL) { if (!is.null(seed)) { on.exit(random_seed(), add = TRUE) set.seed(seed) } sel_gene_km <- stats::kmeans( - x, centers = 2, nstart = nstart, iter.max = iter.max)$cluster + x, + centers = 2, nstart = nstart, iter.max = iter.max + )$cluster mean_1 <- mean(x[sel_gene_km == 1]) mean_2 <- mean(x[sel_gene_km == 2]) @@ -125,22 +130,25 @@ extended_gini_fun <- function(x, #' @title .kmeans_arma_subset_binarize #' @name .kmeans_arma_subset_binarize -#' @description create binarized scores from a subsetted vector using +#' @description create binarized scores from a subsetted vector using #' kmeans_arma #' @returns numeric #' @keywords internal -.kmeans_arma_subset_binarize <- function(x, - n_iter = 5, - extreme_nr = 20, - sample_nr = 200, - seed = NULL) { +.kmeans_arma_subset_binarize <- function( + x, + n_iter = 5, + extreme_nr = 20, + sample_nr = 200, + seed = NULL) { length_x <- length(x) vector_x <- sort(x) - first_set <- vector_x[1:extreme_nr] + first_set <- vector_x[seq_len(extreme_nr)] last_set <- vector_x[(length_x - (extreme_nr - 1)):length_x] random_set <- sample( - vector_x[(extreme_nr + 1):(length_x - extreme_nr)], size = sample_nr) + vector_x[(extreme_nr + 1):(length_x - extreme_nr)], + size = sample_nr + ) testset <- c(first_set, last_set, random_set) if (!is.null(seed)) { @@ -182,15 +190,14 @@ extended_gini_fun <- function(x, #' @description wrapper for different binarization functions #' @returns matrix #' @keywords internal -kmeans_binarize_wrapper <- function( - expr_values, - subset_feats = NULL, - kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), - nstart = 3, - iter_max = 10, - extreme_nr = 50, - sample_nr = 50, - seed = NULL) { +kmeans_binarize_wrapper <- function(expr_values, + subset_feats = NULL, + kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), + nstart = 3, + iter_max = 10, + extreme_nr = 50, + sample_nr = 50, + seed = NULL) { # expression values if (!is.null(subset_feats)) { expr_values <- expr_values[rownames(expr_values) %in% subset_feats, ] @@ -247,9 +254,10 @@ kmeans_binarize_wrapper <- function( #' @description wrapper for rank binarization function #' @returns matrix #' @keywords internal -rank_binarize_wrapper <- function(expr_values, - subset_feats = NULL, - percentage_rank = 30) { +rank_binarize_wrapper <- function( + expr_values, + subset_feats = NULL, + percentage_rank = 30) { # expression values if (!is.null(subset_feats)) { expr_values <- expr_values[rownames(expr_values) %in% subset_feats, ] @@ -257,7 +265,8 @@ rank_binarize_wrapper <- function(expr_values, max_rank <- (ncol(expr_values) / 100) * percentage_rank bin_matrix <- t_flex(apply( - X = expr_values, MARGIN = 1, FUN = .rank_binarize, max_rank = max_rank)) + X = expr_values, MARGIN = 1, FUN = .rank_binarize, max_rank = max_rank + )) return(bin_matrix) } @@ -270,15 +279,16 @@ rank_binarize_wrapper <- function(expr_values, #' @title convertEnsemblToGeneSymbol #' @name convertEnsemblToGeneSymbol -#' @description This function convert ensembl gene IDs from a matrix to +#' @description This function convert ensembl gene IDs from a matrix to #' official gene symbols #' @param matrix an expression matrix with ensembl gene IDs as rownames #' @param species species to use for gene symbol conversion #' @returns expression matrix with gene symbols as rownames #' @details This function requires that the biomaRt library is installed #' @export -convertEnsemblToGeneSymbol <- function(matrix, - species = c("mouse", "human")) { +convertEnsemblToGeneSymbol <- function( + matrix, + species = c("mouse", "human")) { # data.table: set global variable dupes <- mgi_symbol <- gene_symbol <- ensembl_gene_id <- hgnc_symbol <- NULL @@ -306,10 +316,14 @@ convertEnsemblToGeneSymbol <- function(matrix, ifelse(mgi_symbol == "", ensembl_gene_id, "temporary") ), by = mgi_symbol] gene_names_DT[, gene_symbol := ifelse( - mgi_symbol == "", ensembl_gene_id, gene_symbol)] + mgi_symbol == "", ensembl_gene_id, gene_symbol + )] gene_names_DT[, gene_symbol := ifelse( - gene_symbol == "temporary", - paste0(mgi_symbol, "--", 1:.N), gene_symbol), by = mgi_symbol] + gene_symbol == "temporary", + paste0(mgi_symbol, "--", seq_len(.N)), gene_symbol + ), + by = mgi_symbol + ] # filter matrix <- matrix[rownames(matrix) %in% gene_names_DT$ensembl_gene_id, ] @@ -345,10 +359,14 @@ convertEnsemblToGeneSymbol <- function(matrix, ifelse(hgnc_symbol == "", ensembl_gene_id, "temporary") ), by = hgnc_symbol] gene_names_DT[, gene_symbol := ifelse( - hgnc_symbol == "", ensembl_gene_id, gene_symbol)] + hgnc_symbol == "", ensembl_gene_id, gene_symbol + )] gene_names_DT[, gene_symbol := ifelse( - gene_symbol == "temporary", - paste0(hgnc_symbol, "--", 1:.N), gene_symbol), by = hgnc_symbol] + gene_symbol == "temporary", + paste0(hgnc_symbol, "--", seq_len(.N)), gene_symbol + ), + by = hgnc_symbol + ] # filter matrix <- matrix[rownames(matrix) %in% gene_names_DT$ensembl_gene_id, ] @@ -383,17 +401,16 @@ convertEnsemblToGeneSymbol <- function(matrix, #' @name gpoly_from_dfr_smoothed_wrapped #' @returns giottoPolygon #' @keywords internal -gpoly_from_dfr_smoothed_wrapped <- function( - segmdfr, - name = "cell", - calc_centroids = FALSE, - smooth_polygons = FALSE, - vertices = 20L, - k = 3L, - set_neg_to_zero = TRUE, - skip_eval_dfr = FALSE, - copy_dt = TRUE, - verbose = TRUE) { +gpoly_from_dfr_smoothed_wrapped <- function(segmdfr, + name = "cell", + calc_centroids = FALSE, + smooth_polygons = FALSE, + vertices = 20L, + k = 3L, + set_neg_to_zero = TRUE, + skip_eval_dfr = FALSE, + copy_dt = TRUE, + verbose = TRUE) { gpoly <- createGiottoPolygonsFromDfr( segmdfr = segmdfr, name = name, @@ -410,13 +427,18 @@ gpoly_from_dfr_smoothed_wrapped <- function( set_neg_to_zero = set_neg_to_zero ) } - if (isTRUE(calc_centroids)) gpoly <- centroids( - gpoly, append_gpolygon = TRUE) + if (isTRUE(calc_centroids)) { + gpoly <- centroids( + gpoly, + append_gpolygon = TRUE + ) + } slot(gpoly, "spatVector") <- terra::wrap(slot(gpoly, "spatVector")) if (isTRUE(calc_centroids)) { slot(gpoly, "spatVectorCentroids") <- terra::wrap( - slot(gpoly, "spatVectorCentroids")) + slot(gpoly, "spatVectorCentroids") + ) } return(gpoly) } @@ -427,33 +449,34 @@ gpoly_from_dfr_smoothed_wrapped <- function( #' @title get10Xmatrix #' @name get10Xmatrix -#' @description This function creates an expression matrix from a 10X +#' @description This function creates an expression matrix from a 10X #' structured folder #' @param path_to_data path to the 10X folder -#' @param gene_column_index which column from the features or genes .tsv file +#' @param gene_column_index which column from the features or genes .tsv file #' to use for row ids #' @param remove_zero_rows removes rows with sum equal to zero -#' @param split_by_type split into multiple matrices based on 3rd column of +#' @param split_by_type split into multiple matrices based on 3rd column of #' features.tsv(.gz) #' @returns sparse expression matrix from 10X -#' @details A typical 10X folder is named raw_feature_bc_matrix or +#' @details A typical 10X folder is named raw_feature_bc_matrix or #' filtered_feature_bc_matrix and it has 3 files: #' \itemize{ #' \item{barcodes.tsv(.gz)} #' \item{features.tsv(.gz) or genes.tsv(.gz)} #' \item{matrix.mtx(.gz)} #' } -#' By default the first column of the features or genes .tsv file will be used, +#' By default the first column of the features or genes .tsv file will be used, #' however if multiple -#' annotations are provided (e.g. ensembl gene ids and gene symbols) the user +#' annotations are provided (e.g. ensembl gene ids and gene symbols) the user #' can select another column. #' @export -get10Xmatrix <- function(path_to_data, - gene_column_index = 1, - remove_zero_rows = TRUE, - split_by_type = TRUE) { +get10Xmatrix <- function( + path_to_data, + gene_column_index = 1, + remove_zero_rows = TRUE, + split_by_type = TRUE) { # data.table variables - total <- gene_symbol <- gene_id <- gene_id_num <- cell_id <- + total <- gene_symbol <- gene_id <- gene_id_num <- cell_id <- cell_id_num <- sort_gene_id_num <- NULL # data directory @@ -462,14 +485,16 @@ get10Xmatrix <- function(path_to_data, # get barcodes and create vector barcodes_file <- grep(files_10X, pattern = "barcodes", value = TRUE) barcodesDT <- data.table::fread( - input = paste0(path_to_data, "/", barcodes_file), header = FALSE) + input = paste0(path_to_data, "/", barcodes_file), header = FALSE + ) barcodes_vec <- barcodesDT$V1 - names(barcodes_vec) <- 1:nrow(barcodesDT) + names(barcodes_vec) <- seq_len(nrow(barcodesDT)) # get features and create vector features_file <- grep(files_10X, pattern = "features|genes", value = TRUE) featuresDT <- data.table::fread( - input = paste0(path_to_data, "/", features_file), header = FALSE) + input = paste0(path_to_data, "/", features_file), header = FALSE + ) g_name <- colnames(featuresDT)[gene_column_index] ## convert ensembl gene id to gene symbol ## @@ -477,10 +502,11 @@ get10Xmatrix <- function(path_to_data, featuresDT[, total := .N, by = get(g_name)] featuresDT[, gene_symbol := ifelse( - total > 1, paste0(get(g_name), "--", 1:.N), - get(g_name)), by = get(g_name)] + total > 1, paste0(get(g_name), "--", seq_len(.N)), + get(g_name) + ), by = get(g_name)] features_vec <- featuresDT$gene_symbol - names(features_vec) <- 1:nrow(featuresDT) + names(features_vec) <- seq_len(nrow(featuresDT)) # get matrix matrix_file <- grep(files_10X, pattern = "matrix", value = TRUE) @@ -524,23 +550,24 @@ get10Xmatrix <- function(path_to_data, #' @title get10Xmatrix_h5 #' @name get10Xmatrix_h5 -#' @description This function creates an expression matrix from a 10X h5 file +#' @description This function creates an expression matrix from a 10X h5 file #' path #' @param path_to_data path to the 10X .h5 file -#' @param gene_ids use gene symbols (default) or ensembl ids for the gene +#' @param gene_ids use gene symbols (default) or ensembl ids for the gene #' expression matrix #' @inheritParams get10Xmatrix #' @returns (list of) sparse expression matrix from 10X -#' @details If the .h5 10x file has multiple classes of features -#' (e.g. expression vs QC probes) or modalities (e.g. RNA and protein), and +#' @details If the .h5 10x file has multiple classes of features +#' (e.g. expression vs QC probes) or modalities (e.g. RNA and protein), and #' \code{split_by_type} param is \code{TRUE}, multiple matrices will be returned #' @export -get10Xmatrix_h5 <- function(path_to_data, - gene_ids = c("symbols", "ensembl"), - remove_zero_rows = TRUE, - split_by_type = TRUE) { +get10Xmatrix_h5 <- function( + path_to_data, + gene_ids = c("symbols", "ensembl"), + remove_zero_rows = TRUE, + split_by_type = TRUE) { ## function inspired by and modified from the VISION package - ## see read_10x_h5_v3 in + ## see read_10x_h5_v3 in ## https://github.com/YosefLab/VISION/blob/master/R/Utilities.R # verify if optional package is installed @@ -593,9 +620,10 @@ get10Xmatrix_h5 <- function(path_to_data, # data.table variables nr_name <- name <- uniq_name <- NULL - features_dt[, nr_name := 1:.N, by = name] + features_dt[, nr_name := seq_len(.N), by = name] features_dt[, uniq_name := ifelse( - nr_name == 1, name, paste0(name, "_", (nr_name - 1)))] + nr_name == 1, name, paste0(name, "_", (nr_name - 1)) + )] # dimension names @@ -615,7 +643,8 @@ get10Xmatrix_h5 <- function(path_to_data, for (fclass in unique(feature_types)) { result_list[[fclass]] <- sparsemat[ - features_dt$feature_type == fclass, ] + features_dt$feature_type == fclass, + ] # change names to gene symbols if it's expression if (fclass == "Gene Expression" & gene_ids == "symbols") { @@ -660,11 +689,11 @@ get10Xmatrix_h5 <- function(path_to_data, #' @title readPolygonFilesVizgenHDF5 #' @name readPolygonFilesVizgenHDF5_old -#' @description Read and create polygons for all cells, or for only selected +#' @description Read and create polygons for all cells, or for only selected #' FOVs. #' @param boundaries_path path to the cell_boundaries folder #' @param fovs subset of fovs to use -#' @param custom_polygon_names a character vector to provide custom polygon +#' @param custom_polygon_names a character vector to provide custom polygon #' names (optional) #' @param polygon_feat_types a vector containing the polygon feature types #' @param flip_x_axis flip x axis of polygon coordinates (multiply by -1) @@ -677,21 +706,22 @@ get10Xmatrix_h5 <- function(path_to_data, #' @param verbose be verbose #' @seealso \code{\link{smoothGiottoPolygons}} #' @returns data.table -#' @details Set H5Fopen_flags to "H5F_ACC_RDONLY" if you encounter permission +#' @details Set H5Fopen_flags to "H5F_ACC_RDONLY" if you encounter permission #' issues. #' @export -readPolygonFilesVizgenHDF5_old <- function(boundaries_path, - fovs = NULL, - polygon_feat_types = 0:6, - custom_polygon_names = NULL, - flip_x_axis = FALSE, - flip_y_axis = FALSE, - smooth_polygons = TRUE, - smooth_vertices = 60, - set_neg_to_zero = FALSE, - H5Fopen_flags = "H5F_ACC_RDWR", - cores = NA, - verbose = TRUE) { +readPolygonFilesVizgenHDF5_old <- function( + boundaries_path, + fovs = NULL, + polygon_feat_types = 0:6, + custom_polygon_names = NULL, + flip_x_axis = FALSE, + flip_y_axis = FALSE, + smooth_polygons = TRUE, + smooth_vertices = 60, + set_neg_to_zero = FALSE, + H5Fopen_flags = "H5F_ACC_RDWR", + cores = NA, + verbose = TRUE) { # necessary pkgs package_check(pkg_name = "rhdf5", repository = "Bioc") @@ -707,12 +737,12 @@ readPolygonFilesVizgenHDF5_old <- function(boundaries_path, # provide your own custom names if (!is.null(custom_polygon_names)) { if (!is.character(custom_polygon_names)) { - stop(wrap_txt("If custom_polygon_names are provided, it needs to + stop(wrap_txt("If custom_polygon_names are provided, it needs to be a character vector")) } if (length(custom_polygon_names) != length(poly_feat_names)) { - stop(wrap_txt("length of custom names need to be same as + stop(wrap_txt("length of custom names need to be same as polygon_feat_types")) } else { poly_feat_names <- custom_polygon_names @@ -727,14 +757,17 @@ readPolygonFilesVizgenHDF5_old <- function(boundaries_path, selected_hdf5s <- paste0("feature_data_", fovs, ".hdf5") selected_hdf5s_concatenated <- paste0(selected_hdf5s, collapse = "|") hdf5_boundary_selected_list <- grep( - selected_hdf5s_concatenated, x = hdf5_boundary_list, value = TRUE) + selected_hdf5s_concatenated, + x = hdf5_boundary_list, value = TRUE + ) } else { hdf5_boundary_selected_list <- hdf5_boundary_list } - if (isTRUE(verbose)) - wrap_msg("finished listing .hdf5 files start extracting .hdf5 + if (isTRUE(verbose)) { + wrap_msg("finished listing .hdf5 files start extracting .hdf5 information") + } # open selected polygon files hdf5_list_length <- length(hdf5_boundary_selected_list) @@ -749,18 +782,21 @@ readPolygonFilesVizgenHDF5_old <- function(boundaries_path, function(bound_i) { # get feature data read_file <- rhdf5::H5Fopen( - hdf5_boundary_selected_list[[bound_i]][[1]], - flags = H5Fopen_flags) + hdf5_boundary_selected_list[[bound_i]][[1]], + flags = H5Fopen_flags + ) fov_info <- read_file$featuredata # update progress - if (verbose) + if (verbose) { print(basename(hdf5_boundary_selected_list[[bound_i]])) + } elapsed <- (proc.time() - init)[[3L]] step_time <- elapsed / bound_i est <- (hdf5_list_length * step_time) - elapsed pb(message = c( - "// E:", time_format(elapsed), "| R:", time_format(est))) + "// E:", time_format(elapsed), "| R:", time_format(est) + )) rhdf5::H5Fclose(read_file) return(fov_info) } @@ -774,57 +810,67 @@ readPolygonFilesVizgenHDF5_old <- function(boundaries_path, # extract values for each z index and cell from read_list result_list <- lapply_flex( - seq_along(poly_feat_indexes), cores = cores, function(z_i) { - lapply_flex(seq_along(read_list), cores = cores, function(cell_i) { - singlearray <- read_list[[cell_i]][[ - poly_feat_indexes[z_i]]]$p_0$coordinates - cell_name <- cell_names[[cell_i]] - if (!is.null(singlearray)) { - singlearraydt <- data.table::as.data.table(t_flex( - as.matrix(singlearray[, , 1]))) - data.table::setnames( - singlearraydt, old = c("V1", "V2"), new = c("x", "y")) - if (flip_x_axis) singlearraydt[, x := -1 * x] - if (flip_y_axis) singlearraydt[, y := -1 * y] - - singlearraydt[, cell_id := cell_name] - } - }) - }) + seq_along(poly_feat_indexes), + cores = cores, function(z_i) { + lapply_flex(seq_along(read_list), cores = cores, function(cell_i) { + singlearray <- read_list[[cell_i]][[ + poly_feat_indexes[z_i] + ]]$p_0$coordinates + cell_name <- cell_names[[cell_i]] + if (!is.null(singlearray)) { + singlearraydt <- data.table::as.data.table(t_flex( + as.matrix(singlearray[, , 1]) + )) + data.table::setnames( + singlearraydt, + old = c("V1", "V2"), new = c("x", "y") + ) + if (flip_x_axis) singlearraydt[, x := -1 * x] + if (flip_y_axis) singlearraydt[, y := -1 * y] + + singlearraydt[, cell_id := cell_name] + } + }) + } + ) result_list_rbind <- lapply_flex( - seq_along(result_list), cores = cores, function(z_i) { - data.table::rbindlist(result_list[[z_i]]) - }) + seq_along(result_list), + cores = cores, function(z_i) { + data.table::rbindlist(result_list[[z_i]]) + } + ) - if (isTRUE(verbose)) + if (isTRUE(verbose)) { wrap_msg("finished extracting .hdf5 files start creating polygons") + } # create Giotto polygons and add them to gobject progressr::with_progress({ pb <- progressr::progressor(along = result_list_rbind) - smooth_cell_polygons_list <- lapply_flex(seq_along(result_list_rbind), - cores = cores, function(i) { - dfr_subset <- result_list_rbind[[i]][, .(x, y, cell_id)] - cell_polygons <- createGiottoPolygonsFromDfr( - segmdfr = dfr_subset, - name = poly_feat_names[i], - verbose = verbose - ) + smooth_cell_polygons_list <- lapply_flex(seq_along(result_list_rbind), + cores = cores, function(i) { + dfr_subset <- result_list_rbind[[i]][, .(x, y, cell_id)] + cell_polygons <- createGiottoPolygonsFromDfr( + segmdfr = dfr_subset, + name = poly_feat_names[i], + verbose = verbose + ) - pb(message = poly_feat_names[i]) + pb(message = poly_feat_names[i]) - if (smooth_polygons == TRUE) { - return(smoothGiottoPolygons(cell_polygons, - vertices = smooth_vertices, - set_neg_to_zero = set_neg_to_zero - )) - } else { - return(cell_polygons) + if (smooth_polygons == TRUE) { + return(smoothGiottoPolygons(cell_polygons, + vertices = smooth_vertices, + set_neg_to_zero = set_neg_to_zero + )) + } else { + return(cell_polygons) + } } - }) + ) }) @@ -840,14 +886,14 @@ readPolygonFilesVizgenHDF5_old <- function(boundaries_path, #' @title readPolygonFilesVizgenHDF5 #' @name readPolygonFilesVizgenHDF5 #' @description Read polygon info for all cells or for only selected FOVs from -#' Vizgen HDF5 files. Data is returned as a list of giottoPolygons or +#' Vizgen HDF5 files. Data is returned as a list of giottoPolygons or #' data.tables of the requested z indices. #' @param boundaries_path path to the cell_boundaries folder #' @param fovs subset of fovs to use #' @param z_indices z indices of polygons to use #' @param segm_to_use segmentation results to use (usually = 1. Depends on if #' alternative segmentations were generated) -#' @param custom_polygon_names a character vector to provide custom polygon +#' @param custom_polygon_names a character vector to provide custom polygon #' names (optional) #' @param polygon_feat_types deprecated. Use \code{z_indices} #' @param flip_x_axis flip x axis of polygon coordinates (multiply by -1) @@ -858,36 +904,37 @@ readPolygonFilesVizgenHDF5_old <- function(boundaries_path, #' @param calc_centroids calculate centroids (default = FALSE) #' @param H5Fopen_flags see \code{\link[rhdf5]{H5Fopen}} for more details #' @param cores cores to use -#' @param create_gpoly_parallel (default = TRUE) Whether to run gpoly creation +#' @param create_gpoly_parallel (default = TRUE) Whether to run gpoly creation #' in parallel #' @param create_gpoly_bin (Optional, default = FALSE) Parallelization option. -#' Accepts integer values as an binning size when generating giottoPolygon +#' Accepts integer values as an binning size when generating giottoPolygon #' objects #' @param verbose be verbose #' @param output whether to return as list of giottoPolygon or data.table #' @seealso \code{\link{smoothGiottoPolygons}} #' @returns list of giottoPolygon or data.table -#' @details Set H5Fopen_flags to "H5F_ACC_RDONLY" if you encounter permission +#' @details Set H5Fopen_flags to "H5F_ACC_RDONLY" if you encounter permission #' issues. #' @export -readPolygonFilesVizgenHDF5 <- function(boundaries_path, - fovs = NULL, - z_indices = 1L:7L, - segm_to_use = 1L, - custom_polygon_names = NULL, - flip_x_axis = FALSE, - flip_y_axis = TRUE, - calc_centroids = FALSE, - smooth_polygons = TRUE, - smooth_vertices = 60L, - set_neg_to_zero = FALSE, - H5Fopen_flags = "H5F_ACC_RDWR", - cores = determine_cores(), - create_gpoly_parallel = TRUE, - create_gpoly_bin = FALSE, - verbose = TRUE, - output = c("giottoPolygon", "data.table"), - polygon_feat_types = NULL) { +readPolygonFilesVizgenHDF5 <- function( + boundaries_path, + fovs = NULL, + z_indices = 1L:7L, + segm_to_use = 1L, + custom_polygon_names = NULL, + flip_x_axis = FALSE, + flip_y_axis = TRUE, + calc_centroids = FALSE, + smooth_polygons = TRUE, + smooth_vertices = 60L, + set_neg_to_zero = FALSE, + H5Fopen_flags = "H5F_ACC_RDWR", + cores = determine_cores(), + create_gpoly_parallel = TRUE, + create_gpoly_bin = FALSE, + verbose = TRUE, + output = c("giottoPolygon", "data.table"), + polygon_feat_types = NULL) { # necessary pkgs package_check(pkg_name = "rhdf5", repository = "Bioc") @@ -907,13 +954,14 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, # provide your own custom names if (!is.null(custom_polygon_names)) { if (!is.character(custom_polygon_names)) { - stop(wrap_txt("If custom_polygon_names are provided, it needs to + stop(wrap_txt("If custom_polygon_names are provided, it needs to be a character vector")) } if (length(custom_polygon_names) != length(z_indices)) { stop(wrap_txt( - "length of custom names need to be same as z_indices")) + "length of custom names need to be same as z_indices" + )) } } @@ -925,14 +973,17 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, selected_hdf5s <- paste0("feature_data_", fovs, ".hdf5") selected_hdf5s_concatenated <- paste0(selected_hdf5s, collapse = "|") hdf5_boundary_selected_list <- grep( - selected_hdf5s_concatenated, x = hdf5_boundary_list, value = TRUE) + selected_hdf5s_concatenated, + x = hdf5_boundary_list, value = TRUE + ) } else { hdf5_boundary_selected_list <- hdf5_boundary_list } - if (isTRUE(verbose)) - message("finished listing .hdf5 files start extracting .hdf5 + if (isTRUE(verbose)) { + message("finished listing .hdf5 files start extracting .hdf5 information") + } # open selected polygon files @@ -951,8 +1002,9 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, ) # update progress - if (verbose) + if (verbose) { print(basename(hdf5_boundary_selected_list[[bound_i]])) + } if (bound_i %% 5 == 0) { pb() } @@ -1010,15 +1062,16 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, #' @keywords internal #' @noRd -.create_giotto_polygons_vizgen <- function(z_read_DT, - poly_names = names(z_read_DT), - set_neg_to_zero = FALSE, - calc_centroids = FALSE, - smooth_polygons = TRUE, - smooth_vertices = 60L, - create_gpoly_parallel = TRUE, - create_gpoly_bin = FALSE, - verbose = TRUE) { +.create_giotto_polygons_vizgen <- function( + z_read_DT, + poly_names = names(z_read_DT), + set_neg_to_zero = FALSE, + calc_centroids = FALSE, + smooth_polygons = TRUE, + smooth_vertices = 60L, + create_gpoly_parallel = TRUE, + create_gpoly_bin = FALSE, + verbose = TRUE) { checkmate::assert_list(z_read_DT) checkmate::assert_numeric(smooth_vertices) @@ -1033,34 +1086,40 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, pb <- progressr::progressor(along = z_read_DT) smooth_cell_polygons_list <- lapply( seq_along(z_read_DT), function(i) { - dfr_subset <- z_read_DT[[i]][, .(x, y, cell_id)] - data.table::setnames( - dfr_subset, old = "cell_id", new = "poly_ID") - cell_polygons <- createGiottoPolygonsFromDfr( - segmdfr = dfr_subset, - name = poly_names[i], - calc_centroids = FALSE, - skip_eval_dfr = TRUE, - copy_dt = FALSE, - verbose = verbose - ) - if (isTRUE(smooth_polygons)) { - cell_polygons <- smoothGiottoPolygons( - gpolygon = cell_polygons, - vertices = smooth_vertices, - k = 3L, - set_neg_to_zero = set_neg_to_zero + dfr_subset <- z_read_DT[[i]][, .(x, y, cell_id)] + data.table::setnames( + dfr_subset, + old = "cell_id", new = "poly_ID" ) + cell_polygons <- createGiottoPolygonsFromDfr( + segmdfr = dfr_subset, + name = poly_names[i], + calc_centroids = FALSE, + skip_eval_dfr = TRUE, + copy_dt = FALSE, + verbose = verbose + ) + if (isTRUE(smooth_polygons)) { + cell_polygons <- smoothGiottoPolygons( + gpolygon = cell_polygons, + vertices = smooth_vertices, + k = 3L, + set_neg_to_zero = set_neg_to_zero + ) + } + if (isTRUE(calc_centroids)) { + # NOTE: won't recalculate if centroids are already attached + cell_polygons <- centroids( + cell_polygons, + append_gpolygon = TRUE + ) + } + pb(message = c( + poly_names[i], " (", i, "/", length(z_read_DT), ")" + )) + return(cell_polygons) } - if (isTRUE(calc_centroids)) { - # NOTE: won't recalculate if centroids are already attached - cell_polygons <- centroids( - cell_polygons, append_gpolygon = TRUE) - } - pb(message = c( - poly_names[i], " (", i, "/", length(z_read_DT), ")")) - return(cell_polygons) - }) + ) }) return(smooth_cell_polygons_list) } @@ -1077,7 +1136,9 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, function(i) { dfr_subset <- z_read_DT[[i]][, .(x, y, cell_id)] data.table::setnames( - dfr_subset, old = "cell_id", new = "poly_ID") + dfr_subset, + old = "cell_id", new = "poly_ID" + ) cell_polygons <- gpoly_from_dfr_smoothed_wrapped( segmdfr = dfr_subset, name = poly_names[i], @@ -1091,7 +1152,8 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, ) pb(message = c( - poly_names[i], " (", i, "/", length(z_read_DT), ")")) + poly_names[i], " (", i, "/", length(z_read_DT), ")" + )) return(cell_polygons) } ) @@ -1100,13 +1162,15 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, # unwrap results smooth_cell_polygons_list <- lapply( smooth_cell_polygons_list, function(x) { - slot(x, "spatVector") <- terra::vect(slot(x, "spatVector")) - if (isTRUE(calc_centroids)) { - slot(x, "spatVectorCentroids") <- terra::vect( - slot(x, "spatVectorCentroids")) + slot(x, "spatVector") <- terra::vect(slot(x, "spatVector")) + if (isTRUE(calc_centroids)) { + slot(x, "spatVectorCentroids") <- terra::vect( + slot(x, "spatVectorCentroids") + ) + } + return(x) } - return(x) - }) + ) } else { # with binning @@ -1125,7 +1189,9 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, ) ) DT <- data.table::merge.data.table( - DT, bin_pid, by = "poly_ID", all.x = TRUE) + DT, bin_pid, + by = "poly_ID", all.x = TRUE + ) DT <- split(DT, DT$bin_ID) }, bin = create_gpoly_bin) @@ -1153,8 +1219,9 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, ) pb(message = c( - poly_names[i], " (", i, "/", - length(dfr_subset), ")")) + poly_names[i], " (", i, "/", + length(dfr_subset), ")" + )) return(cell_polygons) } ) @@ -1165,18 +1232,20 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, # unwrap results smooth_cell_polygons_list <- lapply( seq_along(smooth_cell_polygons_list), function(i) { - p_list <- lapply(smooth_cell_polygons_list[[i]], function(x) { - slot(x, "spatVector") <- terra::vect(slot(x, "spatVector")) - if (isTRUE(calc_centroids)) { - slot(x, "spatVectorCentroids") <- terra::vect( - slot(x, "spatVectorCentroids")) - } - return(x) - }) - # rbind results - names(p_list) <- NULL - return(do.call("rbind", p_list)) - }) + p_list <- lapply(smooth_cell_polygons_list[[i]], function(x) { + slot(x, "spatVector") <- terra::vect(slot(x, "spatVector")) + if (isTRUE(calc_centroids)) { + slot(x, "spatVectorCentroids") <- terra::vect( + slot(x, "spatVectorCentroids") + ) + } + return(x) + }) + # rbind results + names(p_list) <- NULL + return(do.call("rbind", p_list)) + } + ) } @@ -1192,20 +1261,19 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, #' @title Read MERSCOPE polygons from parquet #' @name readPolygonVizgenParquet #' @description -#' Read Vizgen exported cell boundary parquet files as giottoPolyons. The z +#' Read Vizgen exported cell boundary parquet files as giottoPolyons. The z #' level can be selected. #' @param file parquet file to load -#' @param z_index either 'all' or a numeric vector of z_indices to get polygons +#' @param z_index either 'all' or a numeric vector of z_indices to get polygons #' for #' @param calc_centroids calculate centroids for the polygons (default = TRUE) #' @param verbose be verbose #' @returns giottoPolygons #' @export -readPolygonVizgenParquet <- function( - file, - z_index = "all", - calc_centroids = TRUE, - verbose = TRUE) { +readPolygonVizgenParquet <- function(file, + z_index = "all", + calc_centroids = TRUE, + verbose = TRUE) { # package checks package_check("arrow") package_check("sf") @@ -1226,7 +1294,7 @@ readPolygonVizgenParquet <- function( avail_z_idx <- arrow::open_dataset(file) %>% dplyr::distinct(ZIndex) %>% dplyr::pull() %>% - # dplyr::pull(as_vector = TRUE) %>% # switch to this in future and add + # dplyr::pull(as_vector = TRUE) %>% # switch to this in future and add # arrow version requirement sort() @@ -1235,13 +1303,14 @@ readPolygonVizgenParquet <- function( } else if (is.numeric(z_index)) { z_index <- as.integer(z_index) if (!all(z_index %in% avail_z_idx)) { - stop(paste("Not all z indices found in cell boundaries.\n + stop(paste("Not all z indices found in cell boundaries.\n Existing indices are:", paste(avail_z_idx, collapse = " "))) } z_index } - if (isTRUE(verbose)) + if (isTRUE(verbose)) { message("loading poly z_indices: ", paste(get_z_idx, collapse = " ")) + } # 2. collect by z index filter and convert WKB to multipolygon @@ -1261,7 +1330,8 @@ readPolygonVizgenParquet <- function( future.seed = TRUE ) names(multipolygons) <- lapply( - multipolygons, function(x) paste0("z", unique(x$ZIndex))) + multipolygons, function(x) paste0("z", unique(x$ZIndex)) + ) # 3. convert to giottoPolygons and append meta @@ -1313,17 +1383,18 @@ readPolygonVizgenParquet <- function( #' @returns giotto object or cell polygons list #' @seealso \code{\link{smoothGiottoPolygons}} #' @export -readPolygonFilesVizgen <- function(gobject, - boundaries_path, - fovs = NULL, - polygon_feat_types = 0:6, - flip_x_axis = FALSE, - flip_y_axis = FALSE, - smooth_polygons = TRUE, - smooth_vertices = 60, - set_neg_to_zero = FALSE, - return_gobject = TRUE, - verbose = TRUE) { +readPolygonFilesVizgen <- function( + gobject, + boundaries_path, + fovs = NULL, + polygon_feat_types = 0:6, + flip_x_axis = FALSE, + flip_y_axis = FALSE, + smooth_polygons = TRUE, + smooth_vertices = 60, + set_neg_to_zero = FALSE, + return_gobject = TRUE, + verbose = TRUE) { # define names poly_feat_names <- paste0("z", polygon_feat_types) poly_feat_indexes <- paste0("zIndex_", polygon_feat_types) @@ -1365,18 +1436,20 @@ readPolygonFilesVizgen <- function(gobject, -#' @describeIn readPolygonFilesVizgen (internal) Optimized .hdf5 reading for +#' @describeIn readPolygonFilesVizgen (internal) Optimized .hdf5 reading for #' vizgen merscope output. Returns a data.table of xyz coords and cell_id #' @keywords internal -.h5_read_vizgen <- function(h5File, - z_indices = 1L:7L, - segm_to_use = "p_0", - H5Fopen_flags = "H5F_ACC_RDWR") { +.h5_read_vizgen <- function( + h5File, + z_indices = 1L:7L, + segm_to_use = "p_0", + H5Fopen_flags = "H5F_ACC_RDWR") { # data.table vars group <- name <- cell <- z_name <- otype <- d_name <- cell_id <- NULL h5_ls <- data.table::setDT( - rhdf5::h5ls(h5File, recursive = 5, datasetinfo = FALSE)) + rhdf5::h5ls(h5File, recursive = 5, datasetinfo = FALSE) + ) cell_names <- as.character(h5_ls[group == "/featuredata", name]) z_names <- h5_ls[grep("zIndex", name), unique(name)] @@ -1385,10 +1458,12 @@ readPolygonFilesVizgen <- function(gobject, dset_names <- dset_names[grep(segm_to_use, group), ] # tag cellnames dset_names[, cell := gsub( - pattern = "/featuredata/|/zIndex.*$", replacement = "", x = group)] + pattern = "/featuredata/|/zIndex.*$", replacement = "", x = group + )] # tag z_names dset_names[, z_name := gsub( - pattern = "^.*/(zIndex_\\d*).*$", replacement = "\\1", x = group)] + pattern = "^.*/(zIndex_\\d*).*$", replacement = "\\1", x = group + )] # subset by z_indices dset_names <- dset_names[z_name %in% z_names[z_indices], ] # create full file location @@ -1401,7 +1476,9 @@ readPolygonFilesVizgen <- function(gobject, zvals <- .h5_read_bare( file = fid, name = paste0( - c("/featuredata", cell_name, "z_coordinates"), collapse = "/"), + c("/featuredata", cell_name, "z_coordinates"), + collapse = "/" + ), dapl = dapl ) names(zvals) <- z_names @@ -1411,13 +1488,16 @@ readPolygonFilesVizgen <- function(gobject, cell_data <- lapply( seq(nrow(cell_dsets)), function(fid, dapl, zvals, d_i) { - res <- .h5_read_bare( - file = fid, name = cell_dsets[d_i, d_name], dapl = dapl) - res <- t_flex(res[, , 1L]) - res <- cbind(res, zvals[cell_dsets[d_i, z_name]]) - colnames(res) <- c("x", "y", "z") - res - }, fid = fid, dapl = dapl, zvals = zvals) + res <- .h5_read_bare( + file = fid, name = cell_dsets[d_i, d_name], dapl = dapl + ) + res <- t_flex(res[, , 1L]) + res <- cbind(res, zvals[cell_dsets[d_i, z_name]]) + colnames(res) <- c("x", "y", "z") + res + }, + fid = fid, dapl = dapl, zvals = zvals + ) cell_data <- data.table::as.data.table(do.call("rbind", cell_data)) cell_data[, cell_id := cell_name] cell_data @@ -1444,7 +1524,7 @@ readPolygonFilesVizgen <- function(gobject, PACKAGE = "rhdf5" ) invisible(.Call("_H5Dclose", did, PACKAGE = "rhdf5")) - + res } @@ -1464,8 +1544,9 @@ readPolygonFilesVizgen <- function(gobject, #' @param bin_size bin size to select from .gef file #' @returns transcript with coordinates #' @export -getGEFtxCoords <- function(gef_file, - bin_size = "bin100") { +getGEFtxCoords <- function( + gef_file, + bin_size = "bin100") { # data.table vars genes <- NULL @@ -1487,9 +1568,9 @@ getGEFtxCoords <- function(gef_file, ) setDT(geneDT) - # Step 3: Combine read expression and gene data by repeating count + # Step 3: Combine read expression and gene data by repeating count # (match offset index) - # See STOMICS file format manual for more information about exprDT and + # See STOMICS file format manual for more information about exprDT and # geneDT exprDT[, genes := rep(x = geneDT$gene, geneDT$count)] diff --git a/R/giotto_viewer.R b/R/giotto_viewer.R index 2517e5084..c21f66f75 100644 --- a/R/giotto_viewer.R +++ b/R/giotto_viewer.R @@ -1,19 +1,21 @@ #' @title write_giotto_viewer_annotation -#' @description write out factor-like annotation data from a giotto object for +#' @description write out factor-like annotation data from a giotto object for #' the Viewer #' @param annotation annotation from the data.table from giotto object #' @param annot_name name of the annotation #' @param output_directory directory where to save the files #' @returns write a .txt and .annot file for the selection annotation #' @keywords internal -write_giotto_viewer_annotation <- function(annotation, - annot_name = "test", - output_directory = getwd()) { +write_giotto_viewer_annotation <- function( + annotation, + annot_name = "test", + output_directory = getwd()) { if (is.numeric(annotation) == TRUE) { # annotation information and mapping sorted_unique_numbers <- sort(unique(annotation)) annot_map <- data.table::data.table( - num = sorted_unique_numbers, fac = sorted_unique_numbers) + num = sorted_unique_numbers, fac = sorted_unique_numbers + ) annot_information <- annotation } else { # factors to numerics @@ -54,9 +56,10 @@ write_giotto_viewer_annotation <- function(annotation, #' @param output_directory directory where to save the files #' @returns write a .txt and .annot file for the selection annotation #' @keywords internal -write_giotto_viewer_numeric_annotation <- function(annotation, - annot_name = "test", - output_directory = getwd()) { +write_giotto_viewer_numeric_annotation <- function( + annotation, + annot_name = "test", + output_directory = getwd()) { # write to output directory annot_inf_map <- paste0(annot_name, "_num_annot_information", ".txt") write.table(annotation, @@ -79,13 +82,16 @@ write_giotto_viewer_numeric_annotation <- function(annotation, #' @param output_directory directory where to save the files #' @returns write a .txt and .annot file for the selection annotation #' @keywords internal -write_giotto_viewer_dim_reduction <- function(dim_reduction_cell, - dim_red = NULL, - dim_red_name = NULL, - dim_red_rounding = NULL, - dim_red_rescale = c(-20, 20), - output_directory = getwd()) { - dim_red_coord <- dim_reduction_cell[[dim_red]][[dim_red_name]]$coordinates[, 1:2] +write_giotto_viewer_dim_reduction <- function( + dim_reduction_cell, + dim_red = NULL, + dim_red_name = NULL, + dim_red_rounding = NULL, + dim_red_rescale = c(-20, 20), + output_directory = getwd()) { + dim_red_coord <- dim_reduction_cell[[dim_red]][[ + dim_red_name + ]]$coordinates[, seq_len(2)] if (is.null(dim_red_coord)) { cat("\n combination of ", dim_red, " and ", dim_red_name, " does not exist \n") @@ -98,7 +104,8 @@ write_giotto_viewer_dim_reduction <- function(dim_reduction_cell, # rescale dimension reduction coordinates if (!is.null(dim_red_rescale) & length(dim_red_rescale) == 2) { dim_red_coord <- scales::rescale( - x = dim_red_coord, to = dim_red_rescale) + x = dim_red_coord, to = dim_red_rescale + ) } dim_red_name <- paste0(dim_red, "_", dim_red_name, "_dim_coord.txt") @@ -136,33 +143,34 @@ write_giotto_viewer_dim_reduction <- function(dim_reduction_cell, #' include the provided spatial enrichment name (default PAGE or rank) #' and add the gene signature names (.e.g cell types) to the numeric annotations parameter. #' @export -exportGiottoViewer <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - spat_loc_name = "raw", - output_directory = NULL, - spat_enr_names = NULL, - factor_annotations = NULL, - numeric_annotations = NULL, - dim_reductions, - dim_reduction_names, - expression_values = c("scaled", "normalized", "custom"), - dim_red_rounding = NULL, - dim_red_rescale = c(-20, 20), - expression_rounding = 2, - overwrite_dir = TRUE, - verbose = TRUE) { +exportGiottoViewer <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + spat_loc_name = "raw", + output_directory = NULL, + spat_enr_names = NULL, + factor_annotations = NULL, + numeric_annotations = NULL, + dim_reductions, + dim_reduction_names, + expression_values = c("scaled", "normalized", "custom"), + dim_red_rounding = NULL, + dim_red_rescale = c(-20, 20), + expression_rounding = 2, + overwrite_dir = TRUE, + verbose = TRUE) { ## output directory ## if (file.exists(output_directory)) { if (overwrite_dir == TRUE) { - message("output directory already exists, files will be + message("output directory already exists, files will be overwritten") } else { - stop("output directory already exists, change overwrite_dir = TRUE + stop("output directory already exists, change overwrite_dir = TRUE to overwrite files \n") } } else if (is.null(output_directory)) { - message("no output directory is provided, defaults to current + message("no output directory is provided, defaults to current directory: ", getwd(), "\n") output_directory <- getwd() } else { @@ -264,10 +272,13 @@ exportGiottoViewer <- function(gobject, } annot_list <- data.table( - txtfiles = unlist(text_file_names), names = unlist(annot_names)) + txtfiles = unlist(text_file_names), names = unlist(annot_names) + ) write.table(annot_list, - file = paste0(output_directory, "/", - "annotation_list", "_", feat, ".txt"), + file = paste0( + output_directory, "/", + "annotation_list", "_", feat, ".txt" + ), quote = FALSE, row.names = FALSE, col.names = FALSE, sep = " " ) } @@ -301,10 +312,13 @@ exportGiottoViewer <- function(gobject, } annot_list <- data.table( - txtfiles = unlist(text_file_names), names = unlist(annot_names)) + txtfiles = unlist(text_file_names), names = unlist(annot_names) + ) write.table(annot_list, - file = paste0(output_directory, "/", - "annotation_num_list", "_", feat, ".txt"), + file = paste0( + output_directory, "/", + "annotation_num_list", "_", feat, ".txt" + ), quote = FALSE, row.names = FALSE, col.names = FALSE, sep = " " ) } @@ -356,45 +370,48 @@ exportGiottoViewer <- function(gobject, expr_values <- as.matrix(expr_values) # swap cell_IDs for numerical values - colnames(expr_values) <- 1:ncol(expr_values) + colnames(expr_values) <- seq_len(ncol(expr_values)) # round values if (!is.null(expression_rounding)) { expr_values <- round(x = expr_values, digits = expression_rounding) } output_directory_norm <- normalizePath(output_directory) fileWrite_directory <- paste0( - output_directory_norm, "/", "giotto_expression.csv") + output_directory_norm, "/", "giotto_expression.csv" + ) data.table::fwrite( - data.table::as.data.table(expr_values, keep.rownames = "gene"), - file = fileWrite_directory, sep = ",", - quote = FALSE, row.names = FALSE, col.names = TRUE) + data.table::as.data.table(expr_values, keep.rownames = "gene"), + file = fileWrite_directory, sep = ",", + quote = FALSE, row.names = FALSE, col.names = TRUE + ) - if (verbose == TRUE) + if (verbose == TRUE) { cat("finished writing giotto viewer files to", output_directory) + } if (verbose == TRUE) { message("=========================================================") - message("Next steps. Please manually run the following in a SHELL + message("Next steps. Please manually run the following in a SHELL terminal:") message("=========================================================") message(paste("cd ", output_directory)) - message("giotto_setup_image --require-stitch=n --image=n - --image-multi-channel=n --segmentation=n --multi-fov=n + message("giotto_setup_image --require-stitch=n --image=n + --image-multi-channel=n --segmentation=n --multi-fov=n --output-json=step1.json") message("smfish_step1_setup -c step1.json") - message("giotto_setup_viewer --num-panel=2 - --input-preprocess-json=step1.json - --panel-1=PanelPhysicalSimple --panel-2=PanelTsne - --output-json=step2.json + message("giotto_setup_viewer --num-panel=2 + --input-preprocess-json=step1.json + --panel-1=PanelPhysicalSimple --panel-2=PanelTsne + --output-json=step2.json --input-annotation-list=annotation_list.txt") - message("smfish_read_config -c step2.json -o test.dec6.js + message("smfish_read_config -c step2.json -o test.dec6.js -p test.dec6.html -q test.dec6.css") message("giotto_copy_js_css --output .") message("python3 -m http.server") message("=========================================================") - message("Finally, open your browser, navigate to - http://localhost:8000/. Then click on the file + message("Finally, open your browser, navigate to + http://localhost:8000/. Then click on the file test.dec6.html to see the viewer.") message("For more information, http://spatialgiotto.rc.fas.harvard.edu/giotto.viewer.setup3.html", "\n") } diff --git a/R/gstop.R b/R/gstop.R index be2a805e1..d83ad98d8 100644 --- a/R/gstop.R +++ b/R/gstop.R @@ -2,14 +2,15 @@ # .n should be increased when called from a nested location if capturing the # original call is desired. # .n should be increased to 2L when within a generic method -.gstop <- function(..., - sep = " ", - strWidth = 100, - errWidth = FALSE, - .prefix = " ", - .initial = "", - .n = 1L, - .call = FALSE) { +.gstop <- function( + ..., + sep = " ", + strWidth = 100, + errWidth = FALSE, + .prefix = " ", + .initial = "", + .n = 1L, + .call = FALSE) { GiottoUtils::gstop( ..., sep = sep, diff --git a/R/image_registration.R b/R/image_registration.R index 677e66246..e671ae779 100644 --- a/R/image_registration.R +++ b/R/image_registration.R @@ -5,7 +5,7 @@ #' @name .trakem2_rigid_transforms #' @title Read trakem2 rigid transforms -#' @description Extract rigid registration transformation values from FIJI +#' @description Extract rigid registration transformation values from FIJI #' TrakEM2 xml file. Generated through register_virtual_stack_slices. #' @param inputstring string read in from TrakeEM2 xml file #' @returns rigid registration transformation values @@ -56,18 +56,20 @@ out <- c(out, 0, 0) out <- data.table::data.table(t(matrix(out))) - colnames(out) <- c("Theta", "Xtransform", "Ytransform", "itx", "ity", - "XFinalTransform", "YFinalTransform") + colnames(out) <- c( + "Theta", "Xtransform", "Ytransform", "itx", "ity", + "XFinalTransform", "YFinalTransform" + ) - # itx and ity are additional values in the trakem2 xml files that must be - # added to Xtransform and Ytransform in order to get the final + # itx and ity are additional values in the trakem2 xml files that must be + # added to Xtransform and Ytransform in order to get the final # transformation values. - # only relevant for sampleset with more than 1 slice away from the + # only relevant for sampleset with more than 1 slice away from the # reference image out$XFinalTransform <- out$Xtransform + out$itx out$YFinalTransform <- out$Ytransform + out$ity - # Multiply theta by -1 due to differences in R and image plotting + # Multiply theta by -1 due to differences in R and image plotting # coordinates out$Theta <- -out$Theta @@ -78,7 +80,7 @@ #' @title Rigid transform spatial locations #' @name .rigid_transform_spatial_locations -#' @description Performs appropriate transforms to align spatial locations +#' @description Performs appropriate transforms to align spatial locations #' with registered images. #' @param spatlocs input spatial locations #' @param transform_values transformation values to use @@ -86,16 +88,18 @@ #' @returns spatlocs #' @keywords internal # Rotation is performed first, followed by XY transform. -.rigid_transform_spatial_locations <- function(spatlocs, - transform_values, - method) { +.rigid_transform_spatial_locations <- function( + spatlocs, + transform_values, + method) { if (method == "fiji") { spatlocsXY <- spatlocs[, c("sdimx", "sdimy")] # These functions must be performed in positive y values spatlocsXY$sdimy <- -1 * spatlocsXY$sdimy spatlocsXY <- spin(spatlocsXY, GiottoUtils::degrees( - transform_values$Theta)) %>% + transform_values$Theta + )) %>% spatShift( dx = transform_values$XFinalTransform, dy = transform_values$YFinalTransform @@ -118,7 +122,7 @@ return(spatlocs) } else { - stop('Image registration method must be provided. Only "fiji" and + stop('Image registration method must be provided. Only "fiji" and "rvision" methods currently supported.') } } @@ -135,34 +139,37 @@ #' @returns list #' @keywords internal # Automatically account for changes in image size due to alignment -.reg_img_minmax_finder <- function(gobject_list, - image_unreg = NULL, - largeImage_unreg = NULL, # TODO Currently unused - scale_factor, - transform_values, - method) { +.reg_img_minmax_finder <- function( + gobject_list, + image_unreg = NULL, + largeImage_unreg = NULL, # TODO Currently unused + scale_factor, + transform_values, + method) { # Find image spatial info from original image if possible - # Check to make sure that image_unreg finds an existing image in each + # Check to make sure that image_unreg finds an existing image in each # gobject to be registered imgPresent <- function(gobject, image, img_type) { image %in% list_images_names(gobject = gobject, img_type = img_type) } if (!is.null(image_unreg)) img_type <- "image" # TODO needs reworking - if (!is.null(largeImage_unreg)) img_type <- "largeImage" # TODO needs - # reworking - currently only pays attention to 'image' and not + if (!is.null(largeImage_unreg)) img_type <- "largeImage" # TODO needs + # reworking - currently only pays attention to 'image' and not # 'largeImage' types if (all(as.logical(lapply( - X = gobject_list, FUN = imgPresent, image = image_unreg, - img_type = img_type)))) { + X = gobject_list, FUN = imgPresent, image = image_unreg, + img_type = img_type + )))) { giottoImage_list <- lapply( - X = gobject_list, FUN = get_giottoImage, name = image_unreg, - image_type = img_type) + X = gobject_list, FUN = get_giottoImage, name = image_unreg, + image_type = img_type + ) image_corners <- lapply(giottoImage_list, .get_img_corners) # Infer image corners of registered images PRIOR TO REGISTRATION - # scale unreg_image corners to registered image (use + # scale unreg_image corners to registered image (use # reg_scalefactor/unreg_scalefactor as scale factor) image_corners <- lapply_flex( seq_along(gobject_list), @@ -175,7 +182,7 @@ } ) - # register corners based on transform values (only possible at + # register corners based on transform values (only possible at # reg_image scaling) image_corners_reg <- lapply( seq_along(image_corners), @@ -193,7 +200,9 @@ seq_along(image_corners_reg), function(x) { rescale( - image_corners_reg[[x]], (1 / scale_factor[[x]]), x0 = 0, y0 = 0) + image_corners_reg[[x]], (1 / scale_factor[[x]]), + x0 = 0, y0 = 0 + ) } ) @@ -209,7 +218,7 @@ # return the minmax values - already scaled to spatlocs return(minmaxRegVals) } else { - warning("Original images must be supplied for registered images to be + warning("Original images must be supplied for registered images to be aligned.") } } @@ -217,7 +226,7 @@ #' @title Get image corners #' @name .get_img_corners -#' @description finds four corner spatial coords of giottoImages or +#' @description finds four corner spatial coords of giottoImages or #' magick-images #' @param img_object giottoImage or magick-image to use #' @returns data.frame @@ -253,46 +262,47 @@ #' @title registerGiottoObjectList #' @name registerGiottoObjectList -#' @description Wrapper function for registerGiottoObjectListFiji and +#' @description Wrapper function for registerGiottoObjectListFiji and #' registerGiottoObjectListRvision #' @param gobject_list List of gobjects to register #' @param spat_unit spatial unit -#' @param method Method used to align gobjects. Current options are either +#' @param method Method used to align gobjects. Current options are either #' using FIJI register_virtual_stack_slices output or rvision #' @param image_unreg Gobject image slot to use. Defaults to 'image' (optional) -#' @param image_reg_name Arbitrary image slot name for registered images to +#' @param image_reg_name Arbitrary image slot name for registered images to #' occupy. Defaults to replacement of 'image' slot (optional) #' @param image_list RVISION - under construction #' @param save_dir RVISION - under construction -#' @param spatloc_unreg Unregistered spatial locations to align. Defaults to +#' @param spatloc_unreg Unregistered spatial locations to align. Defaults to #' 'raw' slot (optional) -#' @param spatloc_reg_name Arbitrary name for registered spatial locations. +#' @param spatloc_reg_name Arbitrary name for registered spatial locations. #' Defaults to replacement of 'raw' slot (optional) #' @param fiji_xml_files Filepaths to FIJI registration XML outputs -#' @param fiji_registered_images Registered images output by FIJI +#' @param fiji_registered_images Registered images output by FIJI #' register_virtual_stack_slices #' @param scale_factor Scaling to be applied to spatial coordinates -#' @param allow_rvision_autoscale Whether or not to allow rvision to +#' @param allow_rvision_autoscale Whether or not to allow rvision to #' automatically scale the images when performing image registration #' @param verbose Be verbose -#' @returns List of registered giotto objects where the registered images and +#' @returns List of registered giotto objects where the registered images and #' spatial locations #' @export -registerGiottoObjectList <- function(gobject_list, - spat_unit = NULL, - method = c("fiji", "rvision"), - image_unreg = "image", - image_reg_name = "image", - image_list = NULL, # Rvision - save_dir = NULL, # Rvision - spatloc_unreg = "raw", - spatloc_reg_name = "raw", - fiji_xml_files, - fiji_registered_images, - scale_factor = NULL, - allow_rvision_autoscale = TRUE, # Rvision - # auto_comp_reg_border = TRUE, - verbose = TRUE) { +registerGiottoObjectList <- function( + gobject_list, + spat_unit = NULL, + method = c("fiji", "rvision"), + image_unreg = "image", + image_reg_name = "image", + image_list = NULL, # Rvision + save_dir = NULL, # Rvision + spatloc_unreg = "raw", + spatloc_reg_name = "raw", + fiji_xml_files, + fiji_registered_images, + scale_factor = NULL, + allow_rvision_autoscale = TRUE, # Rvision + # auto_comp_reg_border = TRUE, + verbose = TRUE) { method <- match.arg(method, choices = c("fiji", "rvision")) if (method == "fiji") { @@ -318,7 +328,7 @@ registerGiottoObjectList <- function(gobject_list, verbose = verbose ) } else { - stop("Invalid method input\n Only fiji and rvision methods are + stop("Invalid method input\n Only fiji and rvision methods are currently supported.") } @@ -328,43 +338,44 @@ registerGiottoObjectList <- function(gobject_list, #' @title registerGiottoObjectListFiji #' @name registerGiottoObjectListFiji -#' @description Function to spatially align gobject data based on FIJI image +#' @description Function to spatially align gobject data based on FIJI image #' registration. #' @param gobject_list list of gobjects to register #' @param spat_unit spatial unit -#' @param image_unreg name of original unregistered images. Defaults to +#' @param image_unreg name of original unregistered images. Defaults to #' 'image' (optional) -#' @param image_reg_name arbitrary name for registered images to occupy. +#' @param image_reg_name arbitrary name for registered images to occupy. #' Defaults to replacement of 'image' (optional) -#' @param image_replace_name arbitrary name for any images replaced due to +#' @param image_replace_name arbitrary name for any images replaced due to #' image_reg_name argument (optional) -#' @param registered_images registered images output by FIJI +#' @param registered_images registered images output by FIJI #' register_virtual_stack_slices #' @param spatloc_unreg spatial locations to use. Defaults to 'raw' (optional) -#' @param spatloc_reg_name name for registered spatial locations. Defaults to +#' @param spatloc_reg_name name for registered spatial locations. Defaults to #' replacement of 'raw' (optional) -#' @param spatloc_replace_name arbitrary name for any spatial locations +#' @param spatloc_replace_name arbitrary name for any spatial locations #' replaced due to spatloc_reg_name argument (optional) -#' @param xml_files atomic vector of filepaths to xml outputs from FIJI +#' @param xml_files atomic vector of filepaths to xml outputs from FIJI #' register_virtual_stack_slices -#' @param scale_factor vector of scaling factors of images used in registration +#' @param scale_factor vector of scaling factors of images used in registration #' vs spatlocs #' @param verbose be verbose -#' @returns list of registered giotto objects where the registered images and +#' @returns list of registered giotto objects where the registered images and #' spatial locations #' @export -registerGiottoObjectListFiji <- function(gobject_list, - spat_unit = NULL, - image_unreg = "image", - image_reg_name = "image", - image_replace_name = "unregistered", - registered_images = NULL, - spatloc_unreg = "raw", - spatloc_reg_name = "raw", - spatloc_replace_name = "unregistered", - xml_files, - scale_factor = NULL, - verbose = TRUE) { +registerGiottoObjectListFiji <- function( + gobject_list, + spat_unit = NULL, + image_unreg = "image", + image_reg_name = "image", + image_replace_name = "unregistered", + registered_images = NULL, + spatloc_unreg = "raw", + spatloc_reg_name = "raw", + spatloc_replace_name = "unregistered", + xml_files, + scale_factor = NULL, + verbose = TRUE) { # set spat_unit based on first gobject spat_unit <- set_default_spat_unit( gobject = gobject_list[[1]], @@ -373,20 +384,22 @@ registerGiottoObjectListFiji <- function(gobject_list, ## 0. Check Params ## if (length(gobject_list) != length(xml_files)) { - stop("xml spatial transforms must be supplied for every gobject to be + stop("xml spatial transforms must be supplied for every gobject to be registered.") } if (is.null(registered_images) == FALSE) { - # If there are not the same number of registered images as gobjects, + # If there are not the same number of registered images as gobjects, # stop if (length(registered_images) != length(gobject_list)) { - stop("A registered image should be supplied for every gobject to + stop("A registered image should be supplied for every gobject to align") } if (sum(as.logical(lapply( - registered_images, methods::is, class2 = "giottoImage"))) > 0) { - stop("Registered images should be supplied as either magick-objects + registered_images, methods::is, + class2 = "giottoImage" + ))) > 0) { + stop("Registered images should be supplied as either magick-objects or filepaths") } } @@ -395,15 +408,15 @@ registerGiottoObjectListFiji <- function(gobject_list, if (!is.numeric(scale_factor)) { stop("scale_factor only accepts numerics") } - if ((length(scale_factor) != length(gobject_list)) && + if ((length(scale_factor) != length(gobject_list)) && (length(scale_factor) != 1)) { - stop("If more than one scale_factor is given, there must be one for + stop("If more than one scale_factor is given, there must be one for each gobject to be registered.") } } - # scale_factors will always be given externally. Registered images do not + # scale_factors will always be given externally. Registered images do not # have gobjects yet. # expand scale_factor if given as a single value scale_list <- c() @@ -435,7 +448,9 @@ registerGiottoObjectListFiji <- function(gobject_list, t_file <- xml_files[[file_i]] #------ Put all transform files together transf_list[[file_i]] <- paste( - readLines(t_file, warn = FALSE), collapse = "\n") + readLines(t_file, warn = FALSE), + collapse = "\n" + ) } # Select useful info out of the TrakEM2 files @@ -492,19 +507,23 @@ registerGiottoObjectListFiji <- function(gobject_list, # Params check for conflicting names if (verbose == TRUE) { if (image_unreg == image_reg_name) { - cat("Registered image name already used. Previous image named ", - image_reg_name, " renamed to ", image_replace_name) + cat( + "Registered image name already used. Previous image named ", + image_reg_name, " renamed to ", image_replace_name + ) } if (spatloc_unreg == spatloc_reg_name) { - cat("Registered spatloc name already used. - Previous spatloc named ", spatloc_reg_name, - " renamed to ", spatloc_replace_name) + cat( + "Registered spatloc name already used. + Previous spatloc named ", spatloc_reg_name, + " renamed to ", spatloc_replace_name + ) } } # Update Spatial - # Rename original spatial locations to 'unregistered' if conflicting + # Rename original spatial locations to 'unregistered' if conflicting # with output if (spatloc_unreg == spatloc_reg_name) { gobj <- set_spatial_locations( @@ -531,7 +550,7 @@ registerGiottoObjectListFiji <- function(gobject_list, # Update images - # If there is an existing image with the image_reg_name, rename it + # If there is an existing image with the image_reg_name, rename it # "unregistered" # Move the original image to 'unregistered' if (image_unreg == image_reg_name) { @@ -571,7 +590,8 @@ registerGiottoObjectListFiji <- function(gobject_list, )) names(boundaries) <- c( - "xmax_adj", "xmin_adj", "ymax_adj", "ymin_adj") + "xmax_adj", "xmin_adj", "ymax_adj", "ymin_adj" + ) gobj@images[[image_reg_name]]@boundaries <- boundaries } @@ -581,30 +601,31 @@ registerGiottoObjectListFiji <- function(gobject_list, return(gobject_list) } -# TODO check if spatloc is actually provided in createGiottoImage() and ignore +# TODO check if spatloc is actually provided in createGiottoImage() and ignore # auto align if not. #' @title registerGiottoObjectListRvision #' @name registerGiottoObjectListRvision -#' @description Function to spatially align gobject data based on Rvision image +#' @description Function to spatially align gobject data based on Rvision image #' registration. #' @param gobject_list list of gobjects to register #' @param image_list Filepaths to unregistered images #' @param save_dir (Optional) If given, save registered images to this directory #' @param spatloc_unreg spatial locations to use -#' @param spatloc_reg_name name for registered spatial locations to. Defaults +#' @param spatloc_reg_name name for registered spatial locations to. Defaults #' to replacement of spat_unreg (optional) #' @param verbose be verbose -#' @returns list of registered giotto objects where the registered images and +#' @returns list of registered giotto objects where the registered images and #' spatial locations #' @export # Register giotto objects when given raw images and spatial locations -registerGiottoObjectListRvision <- function(gobject_list = gobject_list, - image_list = NULL, - save_dir = NULL, - spatloc_unreg = NULL, - spatloc_reg_name = "raw", - verbose = TRUE) { # Not used +registerGiottoObjectListRvision <- function( + gobject_list = gobject_list, + image_list = NULL, + save_dir = NULL, + spatloc_unreg = NULL, + spatloc_reg_name = "raw", + verbose = TRUE) { # Not used package_check( pkg_name = "Rvision", @@ -635,11 +656,13 @@ registerGiottoObjectListRvision <- function(gobject_list = gobject_list, color_images <- c() for (path in image_list) { unreg_images <- append( - unreg_images, Rvision::image(filename = path), - after = length(unreg_images)) + unreg_images, Rvision::image(filename = path), + after = length(unreg_images) + ) color_images <- append( - color_images, Rvision::image(filename = path), - after = length(color_images)) + color_images, Rvision::image(filename = path), + after = length(color_images) + ) } ## 3. Perform preprocessing @@ -648,7 +671,9 @@ registerGiottoObjectListRvision <- function(gobject_list = gobject_list, for (image_i in seq_along(unreg_images)) { # Make images grayscale Rvision::changeColorSpace( - unreg_images[[image_i]], colorspace = "GRAY", target = "self") + unreg_images[[image_i]], + colorspace = "GRAY", target = "self" + ) # Retrieve image dimensions dims <- dim(unreg_images[[image_i]]) rows <- append(rows, dims[[1]], after = length(rows)) @@ -662,16 +687,24 @@ registerGiottoObjectListRvision <- function(gobject_list = gobject_list, for (i in seq_along(unreg_images)) { # Add border so all images have same square dimensions Rvision::border( - unreg_images[[i]], squmax - rows[[i]], 0, - squmax - cols[[i]], 0, border_color = "white", target = "self") + unreg_images[[i]], squmax - rows[[i]], 0, + squmax - cols[[i]], 0, + border_color = "white", target = "self" + ) Rvision::border( - color_images[[i]], squmax - rows[[i]], 0, - squmax - cols[[i]], 0, border_color = "white", target = "self") + color_images[[i]], squmax - rows[[i]], 0, + squmax - cols[[i]], 0, + border_color = "white", target = "self" + ) # Apply scaling so all images of reasonable size for processing unreg_images[[i]] <- Rvision::resize( - unreg_images[[i]], height = enddim, width = enddim, target = "new") + unreg_images[[i]], + height = enddim, width = enddim, target = "new" + ) color_images[[i]] <- Rvision::resize( - color_images[[i]], height = enddim, width = enddim, target = "new") + color_images[[i]], + height = enddim, width = enddim, target = "new" + ) } rm(cols, rows) @@ -683,8 +716,10 @@ registerGiottoObjectListRvision <- function(gobject_list = gobject_list, transfs <- base::vector(mode = "list", length = length(unreg_images)) for (i in seq_along(unreg_images)) { transfs[[i]] <- Rvision::findTransformECC( - refImage, unreg_images[[i]], warp_mode = "euclidean", - filt_size = 101) + refImage, unreg_images[[i]], + warp_mode = "euclidean", + filt_size = 101 + ) } rm(refImage) @@ -693,10 +728,14 @@ registerGiottoObjectListRvision <- function(gobject_list = gobject_list, for (i in seq_along(unreg_images)) { # Apply scaling spatloc_list[[i]][] <- rescale( - spatloc_list[[i]][], enddim / squmax, x0 = 0, y0 = 0) + spatloc_list[[i]][], enddim / squmax, + x0 = 0, y0 = 0 + ) # Apply transform to spatlocs spatloc_list[[i]][] <- .rigid_transform_spatial_locations( - spatloc_list[[i]][], transfs[[i]], method = "rvision") + spatloc_list[[i]][], transfs[[i]], + method = "rvision" + ) } rm(squmax, enddim) @@ -733,9 +772,13 @@ registerGiottoObjectListRvision <- function(gobject_list = gobject_list, # Apply transform to image transf_images <- c() for (i in seq_along(unreg_images)) { - transf_images <- append(transf_images, Rvision::warpAffine( - color_images[[i]], transfs[[i]], target = "new"), - length(transf_images)) + transf_images <- append( + transf_images, Rvision::warpAffine( + color_images[[i]], transfs[[i]], + target = "new" + ), + length(transf_images) + ) } # Save images to save directory for (image_i in seq_along(transf_images)) { @@ -777,8 +820,10 @@ fiji <- function(fijiPath = NULL) { fijiPath <- getOption("giotto.fiji") if (!is.null(fijiPath)) { if (!file.exists(fijiPath)) { - stop("fiji is not at: ", fijiPath, - " as specified by options('giotto.fiji')!") + stop( + "fiji is not at: ", fijiPath, + " as specified by options('giotto.fiji')!" + ) } } else { # look for it in sensible places @@ -789,7 +834,7 @@ fiji <- function(fijiPath = NULL) { } else { stop( "Unable to find fiji! ", - "Set options('giotto.fiji') to point to the fiji + "Set options('giotto.fiji') to point to the fiji command line executable!" ) } @@ -804,13 +849,13 @@ fiji <- function(fijiPath = NULL) { #' @title registerImagesFIJI #' @name registerImagesFIJI -#' @description Wrapper function for Register Virtual Stack Slices plugin in +#' @description Wrapper function for Register Virtual Stack Slices plugin in #' FIJI #' @param source_img_dir Folder containing images to be registered #' @param output_img_dir Folder to save registered images to -#' @param transforms_save_dir (jython implementation only) Folder to save +#' @param transforms_save_dir (jython implementation only) Folder to save #' transforms to -#' @param ref_img_name (jython implementation only) File name of reference +#' @param ref_img_name (jython implementation only) File name of reference #' image for the registration #' @param init_gauss_blur Point detector option: initial image blurring #' @param steps_per_scale_octave Point detector option @@ -834,41 +879,42 @@ fiji <- function(fijiPath = NULL) { #' \code{options(giotto.fiji="/some/path")}) #' @param DryRun Whether to return the command to be run rather than actually #' executing it. -#' @returns list of registered giotto objects where the registered images and +#' @returns list of registered giotto objects where the registered images and #' spatial locations -#' @details This function was adapted from runFijiMacro function in +#' @details This function was adapted from runFijiMacro function in #' jimpipeline by jefferislab #' #' @export -registerImagesFIJI <- function(source_img_dir, - output_img_dir, - transforms_save_dir, - ref_img_name, - # Scale Invariant Interest Point Detector Options - init_gauss_blur = 1.6, - steps_per_scale_octave = 3, - min_img_size = 64, - max_img_size = 1024, - # Feature Descriptor Options - feat_desc_size = 8, - feat_desc_orient_bins = 8, - closest_next_closest_ratio = 0.92, - # Geometric Consensus Filter Options - max_align_err = 25, - inlier_ratio = 0.05, - # FIJI Options - headless = FALSE, - batch = TRUE, - MinMem = MaxMem, - MaxMem = 2500, - IncrementalGC = TRUE, - Threads = NULL, - fijiArgs = NULL, - javaArgs = NULL, - ijArgs = NULL, - jython = FALSE, - fijiPath = fiji(), - DryRun = FALSE) { +registerImagesFIJI <- function( + source_img_dir, + output_img_dir, + transforms_save_dir, + ref_img_name, + # Scale Invariant Interest Point Detector Options + init_gauss_blur = 1.6, + steps_per_scale_octave = 3, + min_img_size = 64, + max_img_size = 1024, + # Feature Descriptor Options + feat_desc_size = 8, + feat_desc_orient_bins = 8, + closest_next_closest_ratio = 0.92, + # Geometric Consensus Filter Options + max_align_err = 25, + inlier_ratio = 0.05, + # FIJI Options + headless = FALSE, + batch = TRUE, + MinMem = MaxMem, + MaxMem = 2500, + IncrementalGC = TRUE, + Threads = NULL, + fijiArgs = NULL, + javaArgs = NULL, + ijArgs = NULL, + jython = FALSE, + fijiPath = fiji(), + DryRun = FALSE) { # Check if output directory exists. If not, create the directory if (!file.exists(output_img_dir)) { dir.create(output_img_dir) @@ -882,20 +928,24 @@ registerImagesFIJI <- function(source_img_dir, if (headless) fijiArgs <- c(fijiArgs, "--headless") fijiArgs <- paste(fijiArgs, collapse = " ") - javaArgs <- c(paste("-Xms", MinMem, "m", sep = ""), - paste("-Xmx", MaxMem, "m", sep = ""), javaArgs) + javaArgs <- c( + paste("-Xms", MinMem, "m", sep = ""), + paste("-Xmx", MaxMem, "m", sep = ""), javaArgs + ) if (IncrementalGC) javaArgs <- c(javaArgs, "-Xincgc") javaArgs <- paste(javaArgs, collapse = " ") threadAdjust <- ifelse( - is.null(Threads), "", - paste("run(\"Memory & Threads...\", \"parallel=", Threads, "\");", - sep = "")) + is.null(Threads), "", + paste("run(\"Memory & Threads...\", \"parallel=", Threads, "\");", + sep = "" + ) + ) if (jython == TRUE) { # TODO Add check to see if jython script is installed. - message('jython implementation requires Headless_RVSS.py in - "/Giotto/inst/fiji/" to be copied to + message('jython implementation requires Headless_RVSS.py in + "/Giotto/inst/fiji/" to be copied to "/Applications/Fiji.app/plugins/Scripts/MyScripts/Headless_RVSS.py"') macroCall <- paste(" -eval '", @@ -1009,15 +1059,15 @@ parse_affine <- function(x) { # install_FIJI_scripts = function(fiji = fiji()) {} # TODO These things require a correct set of boundary values -# - Subset images in Giotto using Magick and followup reassignment as the +# - Subset images in Giotto using Magick and followup reassignment as the # default 'image' # - Follow this up with potential registration -# - Need a way to determine the pixel distances between spots to get an idea of +# - Need a way to determine the pixel distances between spots to get an idea of # which regions of image 'belong' to a spot -# - Would be nice to be able to put together an image mask even in magick and -# apply it to the image to aid with img_reg and take care of jagged lines after +# - Would be nice to be able to put together an image mask even in magick and +# apply it to the image to aid with img_reg and take care of jagged lines after # image subsetting # - A shiny app to subset tissue regions would be nice # The shiny app should be able to select spots in a 2d plane by default -# If given the ability, it should also select spots of a single plane or within +# If given the ability, it should also select spots of a single plane or within # a certain range of z values and plot them as a 2D for selection purposes diff --git a/R/interactivity.R b/R/interactivity.R index 4446a0704..16e61af35 100644 --- a/R/interactivity.R +++ b/R/interactivity.R @@ -2,7 +2,7 @@ #' Select image regions by plotting interactive polygons #' -#' @description Plot interactive polygons on an image and retrieve the polygons +#' @description Plot interactive polygons on an image and retrieve the polygons #' coordinates. #' @param x A `ggplot` or `rast` plot object to draw polygons on #' @param width,height An integer, defining the width/height in pixels. @@ -11,10 +11,11 @@ #' @returns A `data.table` containing x,y coordinates from the plotted polygons. #' #' @export -plotInteractivePolygons <- function(x, - width = "auto", - height = "auto", - ...) { +plotInteractivePolygons <- function( + x, + width = "auto", + height = "auto", + ...) { package_check(pkg_name = "miniUI", repository = "CRAN") package_check(pkg_name = "shiny", repository = "CRAN") @@ -29,8 +30,10 @@ plotInteractivePolygons <- function(x, miniUI::gadgetTitleBar("Plot Interactive Polygons"), miniUI::miniContentPanel( shiny::textInput( - "polygon_name", label = "Polygon name", - value = "polygon 1"), + "polygon_name", + label = "Polygon name", + value = "polygon 1" + ), shiny::sliderInput("xrange", label = "x coordinates", min = min(terra::ext(x))[1], @@ -57,8 +60,10 @@ plotInteractivePolygons <- function(x, miniUI::gadgetTitleBar("Plot Interactive Polygons"), miniUI::miniContentPanel( shiny::textInput( - "polygon_name", label = "Polygon name", - value = "polygon 1"), + "polygon_name", + label = "Polygon name", + value = "polygon 1" + ), shiny::sliderInput("xrange", label = "x coordinates", min = min(x[["layers"]][[1]]$data$sdimx), @@ -89,9 +94,11 @@ plotInteractivePolygons <- function(x, x$coordinates$default <- TRUE x + geom_polygon( - data = clicklist(), - aes(x, y, color = name, fill = name), - alpha = 0, ... + data = clicklist(), + aes(x, y, color = name), + alpha = 0, + show.legend = FALSE, + ... ) + coord_fixed( xlim = c(input$xrange[1], input$xrange[2]), @@ -100,8 +107,10 @@ plotInteractivePolygons <- function(x, theme(legend.position = "none") } else { terra::plot(x) - lapply(split(clicklist(), by = "name"), - function(x) graphics::polygon(x$x, x$y, ...)) + lapply( + split(clicklist(), by = "name"), + function(x) graphics::polygon(x$x, x$y, ...) + ) } }, res = 96, @@ -110,14 +119,16 @@ plotInteractivePolygons <- function(x, ) clicklist <- shiny::reactiveVal(data.table::data.table( - x = numeric(), y = numeric(), name = character())) # empty table + x = numeric(), y = numeric(), name = character() + )) # empty table shiny::observeEvent(input$plot_click, { click_x <- input$plot_click$x click_y <- input$plot_click$y polygon_name <- input$polygon_name temp <- clicklist() # get the table of past clicks temp <- rbind(temp, data.table::data.table( - x = click_x, y = click_y, name = polygon_name)) + x = click_x, y = click_y, name = polygon_name + )) clicklist(temp) }) @@ -139,20 +150,23 @@ plotInteractivePolygons <- function(x, #' @param polygon_name name of polygon selections #' @param spat_unit spatial unit, default = 'cell' #' @param spat_loc_name name of spatial locations to use, default = 'raw' -#' @param polygons character. A vector with polygon names to extract cells +#' @param polygons character. A vector with polygon names to extract cells #' from. If NULL, cells from all polygons are retrieved #' -#' @returns A terra 'SpatVector' with cell ID, x y coordinates, and polygon ID +#' @returns A terra 'SpatVector' with cell ID, x y coordinates, and polygon ID #' where each cell is located in. #' @examples #' ## Plot interactive polygons #' g <- GiottoData::loadGiottoMini("visium") -#' my_polygon_coords <- data.frame(poly_ID = rep("polygon1", 3), -#' sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202)) +#' my_polygon_coords <- data.frame( +#' poly_ID = rep("polygon1", 3), +#' sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202) +#' ) #' #' ## Add polygon coordinates to Giotto object -#' my_giotto_polygons <- createGiottoPolygonsFromDfr(my_polygon_coords, -#' name = "selections") +#' my_giotto_polygons <- createGiottoPolygonsFromDfr(my_polygon_coords, +#' name = "selections" +#' ) #' g <- addGiottoPolygons( #' gobject = g, #' gpolygons = list(my_giotto_polygons) @@ -165,33 +179,35 @@ plotInteractivePolygons <- function(x, #' getCellsFromPolygon(g) #' #' @export -getCellsFromPolygon <- function(gobject, - polygon_name = "selections", - spat_unit = "cell", - spat_loc_name = "raw", - polygons = NULL) { +getCellsFromPolygon <- function( + gobject, + polygon_name = "selections", + spat_unit = "cell", + spat_loc_name = "raw", + polygons = NULL) { if (!inherits(gobject, "giotto")) { stop("gobject needs to be a giotto object") } ## get polygons spatial info - polygon_spatVector <- get_polygon_info( + polygon_spatVector <- getPolygonInfo( gobject = gobject, polygon_name = polygon_name, return_giottoPolygon = FALSE ) ## get cell spatial locations - spatial_locs <- get_spatial_locations( + spatial_locs <- getSpatialLocations( gobject = gobject, spat_unit = spat_unit, - spat_loc_name = spat_loc_name, + name = spat_loc_name, output = "data.table", copy_obj = TRUE ) ## convert cell spatial locations to spatVector - cells_spatVector <- terra::vect(as.matrix(spatial_locs[, 1:2]), + cells_spatVector <- terra::vect( + as.matrix(spatial_locs[, c("sdimx", "sdimy")]), type = "points", atts = spatial_locs ) @@ -200,7 +216,8 @@ getCellsFromPolygon <- function(gobject, if (!is.null(polygons)) { polygonCells <- terra::subset( - polygonCells, polygonCells$poly_ID %in% polygons) + polygonCells, polygonCells$poly_ID %in% polygons + ) } return(polygonCells) @@ -214,51 +231,60 @@ getCellsFromPolygon <- function(gobject, #' @param feat_type feature name where metadata will be added #' @param spat_unit spatial unit #' @param spat_loc_name name of spatial locations to use -#' @param polygons polygon names to plot (e.g. 'polygon_1'). If NULL, plots +#' @param polygons polygon names to plot (e.g. 'polygon_1'). If NULL, plots #' all available polygons -#' @param na.label polygon label for cells located outside of polygons area. +#' @param na.label polygon label for cells located outside of polygons area. #' Default = "no_polygon" #' #' @returns A Giotto object with a modified cell_metadata slot that includes the -#' polygon name where each cell is located or no_polygon label if the cell is +#' polygon name where each cell is located or no_polygon label if the cell is #' not located within a polygon area #' @examples #' ## Plot interactive polygons #' g <- GiottoData::loadGiottoMini("visium") -#' my_polygon_coords <- data.frame(poly_ID = rep("polygon1", 3), -#' sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202)) +#' my_polygon_coords <- data.frame( +#' poly_ID = rep("polygon1", 3), +#' sdimx = c(5477, 5959, 4720), +#' sdimy = c(-4125, -2808, -5202) +#' ) #' #' ## Add polygon coordinates to Giotto object -#' my_giotto_polygons <- createGiottoPolygonsFromDfr(my_polygon_coords, -#' name = "selections") +#' my_giotto_polygons <- createGiottoPolygon( +#' my_polygon_coords, +#' name = "selections" +#' ) +#' #' g <- addGiottoPolygons( #' gobject = g, #' gpolygons = list(my_giotto_polygons) #' ) #' #' ## Add polygon IDs to cell metadata -#' addPolygonCells(g) +#' g <- addPolygonCells(g) +#' pDataDT(g) #' @export -addPolygonCells <- function(gobject, - polygon_name = "selections", - spat_unit = "cell", - spat_loc_name = "raw", - feat_type = "rna", - polygons = NULL, - na.label = "no_polygon") { +addPolygonCells <- function( + gobject, + polygon_name = "selections", + spat_unit = "cell", + spat_loc_name = "raw", + feat_type = "rna", + polygons = NULL, + na.label = "no_polygon") { ## verify gobject if (!inherits(gobject, "giotto")) { stop("gobject needs to be a giotto object") } ## get cells within each polygon - polygon_cells <- data.table::as.data.table(getCellsFromPolygon( + polygon_cells <- getCellsFromPolygon( gobject = gobject, polygon_name = polygon_name, spat_unit = spat_unit, spat_loc_name = spat_loc_name, polygons = polygons - )) + ) + polygon_cells <- data.table::as.data.table(polygon_cells) data.table::setnames(polygon_cells, old = "poly_ID", new = polygon_name) ## get original cell metadata @@ -282,7 +308,8 @@ addPolygonCells <- function(gobject, ## assign a default ID to cells outside of polygons selection_values <- new_cell_metadata[[polygon_name]] selection_values <- ifelse( - is.na(selection_values), na.label, selection_values) + is.na(selection_values), na.label, selection_values + ) new_cell_metadata[, c(polygon_name) := selection_values] ## keep original order of cells @@ -309,9 +336,9 @@ addPolygonCells <- function(gobject, #' @param spat_unit spatial unit (e.g. "cell") #' @param feat_type feature type (e.g. "rna", "dna", "protein") #' @param selected_feats vector of selected features to plot -#' @param expression_values gene expression values to use +#' @param expression_values gene expression values to use #' ("normalized", "scaled", "custom") -#' @param method method to use to detect differentially expressed feats +#' @param method method to use to detect differentially expressed feats #' ("scran", "gini", "mast") #' @param \dots Arguments passed to \link[ComplexHeatmap]{Heatmap} #' @@ -319,12 +346,15 @@ addPolygonCells <- function(gobject, #' @examples #' ## Plot interactive polygons #' g <- GiottoData::loadGiottoMini("visium") -#' my_polygon_coords <- data.frame(poly_ID = rep("polygon1", 3), -#' sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202)) +#' my_polygon_coords <- data.frame( +#' poly_ID = rep("polygon1", 3), +#' sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202) +#' ) #' #' ## Add polygon coordinates to Giotto object -#' my_giotto_polygons <- createGiottoPolygonsFromDfr(my_polygon_coords, -#' name = "selections") +#' my_giotto_polygons <- createGiottoPolygonsFromDfr(my_polygon_coords, +#' name = "selections" +#' ) #' g <- addGiottoPolygons( #' gobject = g, #' gpolygons = list(my_giotto_polygons) @@ -332,17 +362,18 @@ addPolygonCells <- function(gobject, #' #' ## Add polygon cells #' g <- addPolygonCells(g) -#' +#' #' comparePolygonExpression(g) #' @export -comparePolygonExpression <- function(gobject, - polygon_name = "selections", - spat_unit = "cell", - feat_type = "rna", - selected_feats = "top_genes", - expression_values = "normalized", - method = "scran", - ...) { +comparePolygonExpression <- function( + gobject, + polygon_name = "selections", + spat_unit = "cell", + feat_type = "rna", + selected_feats = "top_genes", + expression_values = "normalized", + method = "scran", + ...) { # verify gobject if (!inherits(gobject, "giotto")) { stop("gobject needs to be a giotto object") @@ -394,7 +425,7 @@ comparePolygonExpression <- function(gobject, sd_expression_gene <- stats::sd(my_expression[gene, ]) for (cell in colnames(my_expression)) { my_zscores[gene, cell] <- ( - my_expression[gene, cell] - mean_expression_gene) / + my_expression[gene, cell] - mean_expression_gene) / sd_expression_gene } } @@ -437,12 +468,15 @@ comparePolygonExpression <- function(gobject, #' @examples #' ## Plot interactive polygons #' g <- GiottoData::loadGiottoMini("visium") -#' my_polygon_coords <- data.frame(poly_ID = rep("polygon1", 3), -#' sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202)) +#' my_polygon_coords <- data.frame( +#' poly_ID = rep("polygon1", 3), +#' sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202) +#' ) #' #' ## Add polygon coordinates to Giotto object -#' my_giotto_polygons <- createGiottoPolygonsFromDfr(my_polygon_coords, -#' name = "selections") +#' my_giotto_polygons <- createGiottoPolygonsFromDfr(my_polygon_coords, +#' name = "selections" +#' ) #' g <- addGiottoPolygons( #' gobject = g, #' gpolygons = list(my_giotto_polygons) @@ -450,15 +484,16 @@ comparePolygonExpression <- function(gobject, #' #' ## Add polygon cells #' g <- addPolygonCells(g) -#' +#' #' compareCellAbundance(g) #' @export -compareCellAbundance <- function(gobject, - polygon_name = "selections", - spat_unit = "cell", - feat_type = "rna", - cell_type_column = "leiden_clus", - ...) { +compareCellAbundance <- function( + gobject, + polygon_name = "selections", + spat_unit = "cell", + feat_type = "rna", + cell_type_column = "leiden_clus", + ...) { # verify gobject if (!inherits(gobject, "giotto")) { stop("gobject needs to be a giotto object") @@ -503,21 +538,24 @@ compareCellAbundance <- function(gobject, #' @param polygon_name name of polygon selections #' @param x A ggplot2, spatPlot or terra::rast object #' @param spat_unit spatial unit -#' @param polygons character. Vector of polygon names to plot. If NULL, all +#' @param polygons character. Vector of polygon names to plot. If NULL, all #' polygons are plotted -#' @param ... Additional parameters passed to ggplot2::geom_polygon() or +#' @param ... Additional parameters passed to ggplot2::geom_polygon() or #' graphics::polygon #' #' @returns A ggplot2 image #' @examples #' ## Plot interactive polygons #' g <- GiottoData::loadGiottoMini("visium") -#' my_polygon_coords <- data.frame(poly_ID = rep("polygon1", 3), -#' sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202)) +#' my_polygon_coords <- data.frame( +#' poly_ID = rep("polygon1", 3), +#' sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202) +#' ) #' #' ## Add polygon coordinates to Giotto object -#' my_giotto_polygons <- createGiottoPolygonsFromDfr(my_polygon_coords, -#' name = "selections") +#' my_giotto_polygons <- createGiottoPolygonsFromDfr(my_polygon_coords, +#' name = "selections" +#' ) #' g <- addGiottoPolygons( #' gobject = g, #' gpolygons = list(my_giotto_polygons) @@ -525,18 +563,19 @@ compareCellAbundance <- function(gobject, #' #' ## Add polygon cells #' g <- addPolygonCells(g) -#' +#' #' ## Create spatplot #' x <- spatPlot2D(g, return_plot = TRUE) -#' +#' #' plotPolygons(g, x = x) #' @export -plotPolygons <- function(gobject, - polygon_name = "selections", - x, - spat_unit = "cell", - polygons = NULL, - ...) { +plotPolygons <- function( + gobject, + polygon_name = "selections", + x, + spat_unit = "cell", + polygons = NULL, + ...) { ## verify gobject if (!inherits(gobject, "giotto")) { stop("gobject must be a Giotto object") @@ -594,21 +633,22 @@ plotPolygons <- function(gobject, #' @param gobject giotto object #' @param spat_unit spatial unit (e.g. "cell") #' @param feat_type feature type (e.g. "rna", "dna", "protein") -#' @param cell_color character. What to color cells by +#' @param cell_color character. What to color cells by #' (e.g. metadata col or spatial enrichment col) -#' @param cell_color_code character. discrete colors to use. Palette to use or +#' @param cell_color_code character. discrete colors to use. Palette to use or #' named vector of colors #' @param point_size size of point (cell) #' @param width plot width #' @param height plot height #' -#' @returns data.table with selected cell_IDs, spatial coordinates, and +#' @returns data.table with selected cell_IDs, spatial coordinates, and #' cluster_ID. #' @export -plotInteractive3D <- function(gobject, spat_unit = "cell", feat_type = "rna", - cell_color = "leiden_clus", - cell_color_code = NULL, point_size = 0.5, - width = "100%", height = "400px") { +plotInteractive3D <- function( + gobject, spat_unit = "cell", feat_type = "rna", + cell_color = "leiden_clus", + cell_color_code = NULL, point_size = 0.5, + width = "100%", height = "400px") { # NSE vars sdimx <- sdimy <- sdimz <- cell_ID <- NULL @@ -675,8 +715,9 @@ plotInteractive3D <- function(gobject, spat_unit = "cell", feat_type = "rna", data[data[[cell_color]] %in% input$clusters, ] %>% plotly::filter( sdimx >= input$xrange[1] & sdimx <= input$xrange[2] & - sdimy >= input$yrange[1] & sdimy <= input$yrange[2] & - sdimz >= input$zrange[1] & sdimz <= input$zrange[2]) %>% + sdimy >= input$yrange[1] & sdimy <= input$yrange[2] & + sdimz >= input$zrange[1] & sdimz <= input$zrange[2] + ) %>% plotly::select(cell_ID, sdimx, sdimy, sdimz, cell_color) }) diff --git a/R/kriging.R b/R/kriging.R index 681419cd6..ff3da4505 100644 --- a/R/kriging.R +++ b/R/kriging.R @@ -43,19 +43,22 @@ NULL #' @export setMethod( "interpolateFeature", signature(x = "giotto", y = "missing"), - function(x, - spat_unit = NULL, - feat_type = NULL, - feats, - spatvalues_params = list(), - spat_loc_name = "raw", - ext = NULL, - buffer = 50, - name_fmt = "%s", - savedir = file.path(getwd(), "interp_rasters"), - overwrite = FALSE, - verbose = NULL, - ...) { + function( + x, + spat_unit = NULL, + feat_type = NULL, + feats, + spatvalues_params = list(), + spat_loc_name = "raw", + ext = NULL, + buffer = 50, + name_fmt = "%s", + savedir = file.path(getwd(), "interp_rasters"), + overwrite = FALSE, + verbose = NULL, + ...) { + sl <- NULL + # This method prepares the data from the giotto object to pass # downstream where the actual interpolation happens @@ -130,9 +133,6 @@ setMethod( #' @rdname interpolateFeature #' @param rastersize numeric. Length of major axis in px of interpolation #' raster to create. -#' @param name name of interpolation `giottoLargeImage` to generate -#' @param filename character. Output filename. Default is \[`name`\].tif within -#' the working directory. #' @param overwrite logical. Whether raster outputs should be overwritten if #' the same `filename` is provided. #' @details @@ -142,15 +142,16 @@ setMethod( setMethod( "interpolateFeature", signature(x = "spatLocsObj", y = "data.frame"), - function(x, y, - ext = NULL, - buffer = 50, - rastersize = 500, - name_fmt = "%s", - savedir = file.path(getwd(), "interp_rasters"), - overwrite = FALSE, - # cores = GiottoUtils::determine_cores(), - ...) { + function( + x, y, + ext = NULL, + buffer = 50, + rastersize = 500, + name_fmt = "%s", + savedir = file.path(getwd(), "interp_rasters"), + overwrite = FALSE, + # cores = GiottoUtils::determine_cores(), + ...) { checkmate::assert_character(savedir) checkmate::assert_character(name_fmt) checkmate::assert_logical(overwrite) diff --git a/R/poly_influence.R b/R/poly_influence.R index 18fefd9b8..cf5a2a031 100644 --- a/R/poly_influence.R +++ b/R/poly_influence.R @@ -2,10 +2,10 @@ #' @name showPolygonSizeInfluence #' @param gobject giotto object #' @param spat_unit spatial unit -#' @param alt_spat_unit alternaitve spatial unit which represents resized +#' @param alt_spat_unit alternaitve spatial unit which represents resized #' polygon data #' @param feat_type feature type -#' @param clus_name name of cluster column in cell_metadata for given spat_unit +#' @param clus_name name of cluster column in cell_metadata for given spat_unit #' and alt_spat_unit, i.e. "kmeans" #' @param return_plot logical. whether to return the plot object #' @param verbose be verbose @@ -16,29 +16,31 @@ #' New columns, resize_switch and cluster_interaction, will be created within #' cell_metadata for spat_unit-feat_type. #' -#' These new columns will describe if a given cell switched cluster number when +#' These new columns will describe if a given cell switched cluster number when #' resized. #' If the same amount of clusters exist for spat_unit-feat_type and #' alt_spat_unit-feat_type, then clusters are determined to be #' corresponding based on % overlap in cell_IDs in each cluster. #' -#' Otherwise, multiple clusters from the spatial unit feature type pair are +#' Otherwise, multiple clusters from the spatial unit feature type pair are #' condensed to align with the smaller number of clusters and ensure overlap. #' #' @export -showPolygonSizeInfluence <- function(gobject = NULL, - spat_unit = NULL, - alt_spat_unit = NULL, - feat_type = NULL, - clus_name = "kmeans", - return_plot = FALSE, - verbose = FALSE) { +showPolygonSizeInfluence <- function( + gobject = NULL, + spat_unit = NULL, + alt_spat_unit = NULL, + feat_type = NULL, + clus_name = "kmeans", + return_plot = FALSE, + verbose = FALSE) { # NSE vars cell_ID <- total_expr <- cluster_interactions <- N <- resize_switch <- NULL # Guards - if (!c("giotto") %in% class(gobject)) + if (!c("giotto") %in% class(gobject)) { stop(wrap_txt("Please provide a valid Giotto Object.", errWidth = TRUE)) + } spat_unit <- set_default_spat_unit( gobject = gobject, @@ -52,28 +54,35 @@ showPolygonSizeInfluence <- function(gobject = NULL, if (!alt_spat_unit %in% names(gobject@expression)) { stop(wrap_txt(paste0( - "Alternative spatial unit ", alt_spat_unit, - " not found. Please ensure it exists."), errWidth = TRUE)) + "Alternative spatial unit ", alt_spat_unit, + " not found. Please ensure it exists." + ), errWidth = TRUE)) } meta_cols <- names(getCellMetadata( - gobject, - spat_unit = spat_unit, - feat_type = feat_type, - output = "data.table")) + gobject, + spat_unit = spat_unit, + feat_type = feat_type, + output = "data.table" + )) if (!clus_name %in% meta_cols) { - stop(wrap_txt(paste0( - "Cluster name ", clus_name, - " not found within cell metadata. Please ensure it exists."), - errWidth = TRUE)) + stop(wrap_txt( + paste0( + "Cluster name ", clus_name, + " not found within cell metadata. Please ensure it exists." + ), + errWidth = TRUE + )) } if (c("cluster_interactions") %in% meta_cols) { - warning((wrap_txt(paste0("Switch interactions already found within - cell_metadata for spat_unit feat_type pair:`", spat_unit, "-", - feat_type, "`. They will be overwritten."), errWidth = TRUE))) + warning((wrap_txt(paste0( + "Switch interactions already found within + cell_metadata for spat_unit feat_type pair:`", spat_unit, "-", + feat_type, "`. They will be overwritten." + ), errWidth = TRUE))) } ## Compare clustering results between cell and smallcell data ####### # ----------------------------------------------------------------- # @@ -86,7 +95,8 @@ showPolygonSizeInfluence <- function(gobject = NULL, cell_meta <- merge.data.table(cell_meta, new_clus_table, by = "cell_ID") cell_meta[, cluster_interactions := paste0(cell_meta[[ - paste0(clus_name, ".x")]], "-", cell_meta[[paste0(clus_name, ".y")]])] + paste0(clus_name, ".x") + ]], "-", cell_meta[[paste0(clus_name, ".y")]])] switches2 <- cell_meta[, .N, by = "cluster_interactions"] setorder(switches2, N) @@ -116,13 +126,15 @@ showPolygonSizeInfluence <- function(gobject = NULL, } cell_meta[, resize_switch := ifelse( - cluster_interactions %in% switch_strs, "same", "switch")] + cluster_interactions %in% switch_strs, "same", "switch" + )] gobject <- addCellMetadata( gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, new_metadata = cell_meta[ - , .(cell_ID, resize_switch, cluster_interactions)], + , .(cell_ID, resize_switch, cluster_interactions) + ], by_column = TRUE, column_cell_ID = "cell_ID" ) @@ -144,11 +156,13 @@ showPolygonSizeInfluence <- function(gobject = NULL, ) num_cells_switched <- sum( - getCellMetadata(gobject)$resize_switch == "switch") + getCellMetadata(gobject)$resize_switch == "switch" + ) num_cells_same <- sum(getCellMetadata(gobject)$resize_switch == "same") if (verbose) print(paste0(num_cells_switched, " cells switched clusters.")) - if (verbose) + if (verbose) { print(paste0(num_cells_same, " cells remained in the same cluster.")) + } if (return_plot) { return(poly_plot) @@ -169,22 +183,23 @@ showPolygonSizeInfluence <- function(gobject = NULL, #' y_m is a cluster number from the resized spatial unit #' n is the number of clusters #' -#' Clusters are determined to be corresponding based on % overlap in cell_IDs +#' Clusters are determined to be corresponding based on % overlap in cell_IDs #' in each cluster. #' #' @keywords internal -.determine_switch_string_equal <- function(cell_meta = NULL, - cell_meta_new = NULL, - clus_name = NULL) { +.determine_switch_string_equal <- function( + cell_meta = NULL, + cell_meta_new = NULL, + clus_name = NULL) { k_clusters <- sort(unique(cell_meta[[clus_name]])) num_clusters <- k_clusters[length(k_clusters)] - k_match_clusters <- 1:num_clusters + k_match_clusters <- seq_len(num_clusters) switch_strs <- c() - for (i in 1:num_clusters) { + for (i in seq_len(num_clusters)) { thresh <- 0 clus_match <- NULL - for (j in 1:num_clusters) { + for (j in seq_len(num_clusters)) { c_df <- cell_meta[cell_meta[[clus_name]] == i]$cell_ID nc_df <- cell_meta_new[cell_meta_new[[clus_name]] == j]$cell_ID @@ -198,7 +213,7 @@ showPolygonSizeInfluence <- function(gobject = NULL, k_match_clusters[i] <- clus_match } - for (idx in 1:num_clusters) { + for (idx in seq_len(num_clusters)) { p1 <- k_clusters[[idx]] p2 <- k_match_clusters[[idx]] switch_strs <- c(switch_strs, paste0(p1, "-", p2)) @@ -212,7 +227,7 @@ showPolygonSizeInfluence <- function(gobject = NULL, #' @param num_orig sorted vector of cluster numbers in the original metadata #' @param num_new sorted vector of cluster numbers in the new, resized metadata #' @returns switch_str, a vector of corresponding cluster numbers in strings -#' @details determines how to create a string in the format +#' @details determines how to create a string in the format #' c("x_1-y_1", "x_2-y_2"..."x_n, y_m") #' Where: #' x_n is a cluster number from the original spatial unit @@ -223,8 +238,9 @@ showPolygonSizeInfluence <- function(gobject = NULL, #' Essentially determines iteration order for .create_switch_string_unequal() #' #' @keywords internal -.determine_switch_string_unequal <- function(num_orig = NULL, - num_new = NULL) { +.determine_switch_string_unequal <- function( + num_orig = NULL, + num_new = NULL) { switch_strs <- c() orig_first <- TRUE @@ -261,15 +277,22 @@ showPolygonSizeInfluence <- function(gobject = NULL, #' n is the number of clusters in the original spatial unit #' m is the number of clusters in the new spatial unit #' @keywords internal -.create_switch_string_unequal <- function(num_first = NULL, - num_second = NULL, - switch_strs = NULL) { +.create_switch_string_unequal <- function( + num_first = NULL, + num_second = NULL, + switch_strs = NULL) { for (o in num_first) { for (n in num_second) { - if (as.integer(o) == as.integer(n)) switch_strs <- c( - switch_strs, paste0(as.character(o), "-", as.character(n))) - if (o > n && n == num_second[length(num_second)]) switch_strs <- c( - switch_strs, paste0(as.character(o), "-", as.character(n))) + if (as.integer(o) == as.integer(n)) { + switch_strs <- c( + switch_strs, paste0(as.character(o), "-", as.character(n)) + ) + } + if (o > n && n == num_second[length(num_second)]) { + switch_strs <- c( + switch_strs, paste0(as.character(o), "-", as.character(n)) + ) + } } } @@ -284,20 +307,22 @@ showPolygonSizeInfluence <- function(gobject = NULL, #' @param spat_unit spatial unit #' @param feat_type feature type #' @returns ggplot -#' @details Creates a pie chart showing how many cells switched clusters after +#' @details Creates a pie chart showing how many cells switched clusters after #' annotation resizing. -#' The function showPolygonSizeInfluence() must have been run on the Giotto +#' The function showPolygonSizeInfluence() must have been run on the Giotto #' Object for this function to run. #' @export -showCellProportionSwitchedPie <- function(gobject = NULL, - spat_unit = NULL, - feat_type = NULL) { +showCellProportionSwitchedPie <- function( + gobject = NULL, + spat_unit = NULL, + feat_type = NULL) { # NSE vars cluster_status <- num_cells <- resize_switch <- perc <- ypos <- NULL # Guards - if (!"giotto" %in% class(gobject)) + if (!"giotto" %in% class(gobject)) { stop(wrap_txt("Please provide a valid Giotto Object.", errWidth = TRUE)) + } spat_unit <- set_default_spat_unit( gobject = gobject, @@ -319,14 +344,15 @@ showCellProportionSwitchedPie <- function(gobject = NULL, ) if (!c("resize_switch") %in% names(cmeta)) { - stop(wrap_txt("Column 'resize_switch' not found in cell metadata. + stop(wrap_txt("Column 'resize_switch' not found in cell metadata. Ensure showPolygonSizeInfluence() has been run.", errWidth = TRUE)) } plotdf <- data.table::data.table() plotdf[, cluster_status := c("switch", "same")] plotdf[, num_cells := c(sum(cmeta[ - , resize_switch == "switch"]), sum(cmeta[, resize_switch == "same"]))] + , resize_switch == "switch" + ]), sum(cmeta[, resize_switch == "same"]))] per_switch <- plotdf$num_cells[[1]] / sum(plotdf$num_cells) * 100 per_same <- plotdf$num_cells[[2]] / sum(plotdf$num_cells) * 100 @@ -341,7 +367,8 @@ showCellProportionSwitchedPie <- function(gobject = NULL, print(plotdf) ggplot( - as.data.frame(plotdf), aes(x = "", y = perc, fill = cluster_status)) + + as.data.frame(plotdf), aes(x = "", y = perc, fill = cluster_status) + ) + coord_polar("y", start = 0) + geom_bar(stat = "identity", width = 1) + theme_void() + @@ -350,26 +377,28 @@ showCellProportionSwitchedPie <- function(gobject = NULL, #' @title showCellProportionSwitchedSanKey #' @name showCellProportionSwitchedSanKey -#' @param gobject giotto object which contains metadata for both spat_unit and +#' @param gobject giotto object which contains metadata for both spat_unit and #' alt_spat_unit #' @param spat_unit spatial unit -#' @param alt_spat_unit alternative spatial unit which stores data after +#' @param alt_spat_unit alternative spatial unit which stores data after #' resizing annotations #' @param feat_type feature type #' @returns D3 JavaScript Sankey diagram #' @details Creates a Sankey Diagram to illustrate cluster switching behavior. #' Currently only supports displaying cluster switching for kmeans clusters. #' @export -showCellProportionSwitchedSanKey <- function(gobject = NULL, - spat_unit = NULL, - alt_spat_unit = NULL, - feat_type = NULL) { +showCellProportionSwitchedSanKey <- function( + gobject = NULL, + spat_unit = NULL, + alt_spat_unit = NULL, + feat_type = NULL) { # NSE vars kmeans_small <- cell_ID <- NULL # Guards - if (!"giotto" %in% class(gobject)) + if (!"giotto" %in% class(gobject)) { stop(wrap_txt("Please provide a valid Giotto Object.", errWidth = TRUE)) + } spat_unit <- set_default_spat_unit( gobject = gobject, @@ -382,8 +411,9 @@ showCellProportionSwitchedSanKey <- function(gobject = NULL, ) if (!alt_spat_unit %in% names(gobject@expression)) { stop(wrap_txt(paste0( - "Alternative spatial unit ", alt_spat_unit, - " not found. Please ensure it exists."), errWidth = TRUE)) + "Alternative spatial unit ", alt_spat_unit, + " not found. Please ensure it exists." + ), errWidth = TRUE)) } package_check("networkD3") @@ -397,7 +427,7 @@ showCellProportionSwitchedSanKey <- function(gobject = NULL, ) if (!c("resize_switch") %in% names(cmeta)) { - stop(wrap_txt("Column 'resize_switch' not found in cell metadata. + stop(wrap_txt("Column 'resize_switch' not found in cell metadata. Ensure showPolygonSizeInfluence() has been run.", errWidth = TRUE)) } @@ -422,7 +452,9 @@ showCellProportionSwitchedSanKey <- function(gobject = NULL, small_cmeta_clus$kmeans <- NULL merged_cmeta <- data.table::merge.data.table( - cmeta, small_cmeta_clus, by.x = "cell_ID", by.y = "cell_ID") + cmeta, small_cmeta_clus, + by.x = "cell_ID", by.y = "cell_ID" + ) k1 <- unique(merged_cmeta$kmeans) @@ -436,7 +468,7 @@ showCellProportionSwitchedSanKey <- function(gobject = NULL, idx1 <- 1 idx2 <- 1 - for (i in 1:flen) { + for (i in seq_len(flen)) { c_k1[i] <- k1[idx1] - 1 # java zero-index c_k2[i] <- k2[idx2] - 1 # java zero-index @@ -447,9 +479,10 @@ showCellProportionSwitchedSanKey <- function(gobject = NULL, num_occ <- c() - for (i in 1:flen) { + for (i in seq_len(flen)) { num_occ[i] <- dim(na.omit(merged_cmeta[kmeans == (c_k1[i] + 1)][ - merged_cmeta[kmeans_small == (c_k2[i] + 1)]]))[[1]] + merged_cmeta[kmeans_small == (c_k2[i] + 1)] + ]))[[1]] } fdt[, "k1"] <- c_k1 @@ -459,7 +492,8 @@ showCellProportionSwitchedSanKey <- function(gobject = NULL, label_dt <- data.table::data.table() label_dt[, "name"] <- c(paste0("original_", as.character(sort(k1))), paste0( - "resized_", as.character(sort(k2)))) + "resized_", as.character(sort(k2)) + )) label_dt master <- list(fdt, label_dt) diff --git a/R/python_hmrf.R b/R/python_hmrf.R index 49f09175b..76bb4a503 100644 --- a/R/python_hmrf.R +++ b/R/python_hmrf.R @@ -8,16 +8,16 @@ #' @param spatial_network_name name of spatial network to use for HMRF #' @param spat_loc_name name of spatial locations #' @param spatial_genes spatial genes to use for HMRF -#' @param spatial_dimensions select spatial dimensions to use, default is all +#' @param spatial_dimensions select spatial dimensions to use, default is all #' possible dimensions #' @param dim_reduction_to_use use another dimension reduction set as input #' @param dim_reduction_name name of dimension reduction set to use #' @param dimensions_to_use number of dimensions to use as input #' @param name name of HMRF run #' @param k number of HMRF domains -#' @param seed seed to fix random number generator +#' @param seed seed to fix random number generator #' (for creating initialization of HMRF) (-1 if no fixing) -#' @param betas betas to test for. three numbers: start_beta, beta_increment, +#' @param betas betas to test for. three numbers: start_beta, beta_increment, #' num_betas e.g. c(0, 2.0, 50) #' @param tolerance tolerance #' @param zscore zscore @@ -25,15 +25,17 @@ #' @param python_path python path to use #' @param output_folder output folder to save results #' @param overwrite_output overwrite output folder -#' @returns Creates a directory with results that can be viewed with +#' @returns Creates a directory with results that can be viewed with #' viewHMRFresults #' @details Description of HMRF parameters ... #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' spat_genes <- binSpect(g) -#' -#' doHMRF(g, spatial_genes = spat_genes[seq_len(10)]$feats, -#' output_folder = tempdir()) +#' +#' doHMRF(g, +#' spatial_genes = spat_genes[seq_len(10)]$feats, +#' output_folder = tempdir() +#' ) #' @export doHMRF <- function(gobject, spat_unit = NULL, @@ -56,15 +58,8 @@ doHMRF <- function(gobject, python_path = NULL, output_folder = NULL, overwrite_output = TRUE) { - if (!requireNamespace("smfishHmrf", quietly = TRUE)) { - stop("package ", "smfishHmrf", " is not yet installed \n", - "To install: \n", - "remotes::install_bitbucket(repo = 'qzhudfci/smfishhmrf-r', ref='master')", - "see http://spatial.rc.fas.harvard.edu/install.html for more information", - call. = FALSE - ) - } + package_check("smfishHmrf", repository = "pip") # data.table set global variable to <- from <- NULL @@ -95,7 +90,8 @@ doHMRF <- function(gobject, output_folder <- paste0(getwd(), "/", "HMRF_output") if (!file.exists(output_folder)) { dir.create( - path = paste0(getwd(), "/", "HMRF_output"), recursive = TRUE) + path = paste0(getwd(), "/", "HMRF_output"), recursive = TRUE + ) } } # folder path specified @@ -108,12 +104,11 @@ doHMRF <- function(gobject, ## first write necessary txt files to output folder ## - # cell location / spatial network / expression data and selected spatial + # cell location / spatial network / expression data and selected spatial # genes ## 1. expression values if (!is.null(dim_reduction_to_use)) { - expr_values <- getDimReduction( gobject = gobject, spat_unit = spat_unit, @@ -128,7 +123,8 @@ doHMRF <- function(gobject, } else { values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -139,7 +135,7 @@ doHMRF <- function(gobject, } if (!"matrix" %in% class(expr_values)) { - warning("this matrix will be converted to a dense and memory intensive + warning("this matrix will be converted to a dense and memory intensive base matrix ...") expr_values <- as.matrix(expr_values) } @@ -149,21 +145,22 @@ doHMRF <- function(gobject, # overwrite if exists if (file.exists(expression_file) & overwrite_output == TRUE) { - message("expression_matrix.txt already exists at this location, will be + message("expression_matrix.txt already exists at this location, will be overwritten") data.table::fwrite( - data.table::as.data.table(expr_values, keep.rownames = "gene"), - file = expression_file, quote = FALSE, col.names = TRUE, - row.names = FALSE, sep = " ") - + data.table::as.data.table(expr_values, keep.rownames = "gene"), + file = expression_file, quote = FALSE, col.names = TRUE, + row.names = FALSE, sep = " " + ) } else if (file.exists(expression_file) & overwrite_output == FALSE) { message("expression_matrix.txt already exists at this location, will be used again") } else { data.table::fwrite( - data.table::as.data.table(expr_values, keep.rownames = "gene"), - file = expression_file, quote = FALSE, col.names = TRUE, - row.names = FALSE, sep = " ") + data.table::as.data.table(expr_values, keep.rownames = "gene"), + file = expression_file, quote = FALSE, col.names = TRUE, + row.names = FALSE, sep = " " + ) } @@ -176,26 +173,28 @@ doHMRF <- function(gobject, dimred_rownames <- rownames(expr_values) spatial_genes_detected <- dimred_rownames[dimensions_to_use] spatial_genes_detected <- spatial_genes_detected[ - !is.na(spatial_genes_detected)] + !is.na(spatial_genes_detected) + ] } else { if (is.null(spatial_genes)) { stop("you need to provide a vector of spatial genes (~500)") } spatial_genes_detected <- spatial_genes[ - spatial_genes %in% rownames(expr_values)] + spatial_genes %in% rownames(expr_values) + ] } spatial_genes_file <- paste0(output_folder, "/", "spatial_genes.txt") # overwrite if exists if (file.exists(spatial_genes_file) & overwrite_output == TRUE) { - message("spatial_genes.txt already exists at this location, will be + message("spatial_genes.txt already exists at this location, will be overwritten") write.table(spatial_genes_detected, file = spatial_genes_file, quote = FALSE, col.names = FALSE, row.names = FALSE ) } else if (file.exists(spatial_genes_file) & overwrite_output == FALSE) { - message("spatial_genes.txt already exists at this location, will be + message("spatial_genes.txt already exists at this location, will be used again") } else { write.table(spatial_genes_detected, @@ -218,14 +217,14 @@ doHMRF <- function(gobject, spatial_network_file <- paste0(output_folder, "/", "spatial_network.txt") if (file.exists(spatial_network_file) & overwrite_output == TRUE) { - message("spatial_network.txt already exists at this location, will be + message("spatial_network.txt already exists at this location, will be overwritten") write.table(spatial_network, file = spatial_network_file, row.names = FALSE, col.names = FALSE, quote = FALSE, sep = "\t" ) } else if (file.exists(spatial_network_file) & overwrite_output == FALSE) { - message("spatial_network.txt already exists at this location, will be + message("spatial_network.txt already exists at this location, will be used again") } else { write.table(spatial_network, @@ -248,21 +247,25 @@ doHMRF <- function(gobject, # select spatial dimensions that are available # spatial_dimensions <- spatial_dimensions[ - spatial_dimensions %in% colnames(spatial_location)] + spatial_dimensions %in% colnames(spatial_location) + ] spatial_location <- spatial_location[ - , c(spatial_dimensions, "cell_ID"), with = FALSE] + , c(spatial_dimensions, "cell_ID"), + with = FALSE + ] spatial_location_file <- paste0( - output_folder, "/", "spatial_cell_locations.txt") + output_folder, "/", "spatial_cell_locations.txt" + ) if (file.exists(spatial_location_file) & overwrite_output == TRUE) { - message("spatial_cell_locations.txt already exists at this location, + message("spatial_cell_locations.txt already exists at this location, will be overwritten") write.table(spatial_location, file = spatial_location_file, row.names = FALSE, col.names = FALSE, quote = FALSE, sep = "\t" ) } else if (file.exists(spatial_location_file)) { - message("spatial_cell_locations.txt already exists at this location, + message("spatial_cell_locations.txt already exists at this location, will be used again") } else { write.table(spatial_location, @@ -348,18 +351,23 @@ doHMRF <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' x <- tempdir() -#' doHMRF(g, spatial_genes = c("Gna12", "Ccnd2"), output_folder = x, -#' betas = c(0, 2, 50)) -#' -#' loadHMRF(output_folder_used = x, betas_used = c(0, 2, 50), -#' python_path_used = NULL) -#' +#' doHMRF(g, +#' spatial_genes = c("Gna12", "Ccnd2"), output_folder = x, +#' betas = c(0, 2, 50) +#' ) +#' +#' loadHMRF( +#' output_folder_used = x, betas_used = c(0, 2, 50), +#' python_path_used = NULL +#' ) +#' #' @export -loadHMRF <- function(name_used = "test", - output_folder_used, - k_used = 10, - betas_used, - python_path_used) { +loadHMRF <- function( + name_used = "test", + output_folder_used, + k_used = 10, + betas_used, + python_path_used) { output_data <- paste0(output_folder_used, "/", "result.spatial.zscore") if (!file.exists(output_data)) { stop("doHMRF was not run in this output directory") @@ -395,12 +403,13 @@ loadHMRF <- function(name_used = "test", #' @returns spatial plots with HMRF domains #' @seealso \code{\link{spatPlot2D}} and \code{\link{spatPlot3D}} #' @export -viewHMRFresults <- function(gobject, - HMRFoutput, - k = NULL, - betas_to_view = NULL, - third_dim = FALSE, - ...) { +viewHMRFresults <- function( + gobject, + HMRFoutput, + k = NULL, + betas_to_view = NULL, + third_dim = FALSE, + ...) { if (!"HMRFoutput" %in% class(HMRFoutput)) { stop("HMRFoutput needs to be output from doHMRFextend") } @@ -408,7 +417,9 @@ viewHMRFresults <- function(gobject, ## reader.py and get_result.py paths # TODO: part of the package get_result_path <- system.file( - "python", "get_result2.py", package = "Giotto") + "python", "get_result2.py", + package = "Giotto" + ) # paths and name name <- HMRFoutput$name @@ -423,8 +434,10 @@ viewHMRFresults <- function(gobject, # betas betas <- HMRFoutput$betas - possible_betas <- seq(betas[1], to = betas[1] + (betas[2] * (betas[3] - 1)), - by = betas[2]) + possible_betas <- seq(betas[1], + to = betas[1] + (betas[2] * (betas[3] - 1)), + by = betas[2] + ) betas_to_view_detected <- betas_to_view[betas_to_view %in% possible_betas] @@ -447,18 +460,20 @@ viewHMRFresults <- function(gobject, title_name <- paste0("k = ", k, " b = ", b) spatPlot2D( - gobject = gobject, - cell_color = output, - show_plot = TRUE, - title = title_name, - ...) + gobject = gobject, + cell_color = output, + show_plot = TRUE, + title = title_name, + ... + ) if (third_dim == TRUE) { spatPlot3D( - gobject = gobject, - cell_color = output, - show_plot = TRUE, - ...) + gobject = gobject, + cell_color = output, + show_plot = TRUE, + ... + ) } } } @@ -475,11 +490,12 @@ viewHMRFresults <- function(gobject, #' @param print_command see the python command #' @returns data.table with HMRF results for each b and the selected k #' @export -writeHMRFresults <- function(gobject, - HMRFoutput, - k = NULL, - betas_to_view = NULL, - print_command = FALSE) { +writeHMRFresults <- function( + gobject, + HMRFoutput, + k = NULL, + betas_to_view = NULL, + print_command = FALSE) { if (!"HMRFoutput" %in% class(HMRFoutput)) { stop("HMRFoutput needs to be output from doHMRFextend") } @@ -487,7 +503,9 @@ writeHMRFresults <- function(gobject, ## reader.py and get_result.py paths # TODO: part of the package get_result_path <- system.file( - "python", "get_result2.py", package = "Giotto") + "python", "get_result2.py", + package = "Giotto" + ) # paths and name name <- HMRFoutput$name @@ -502,8 +520,10 @@ writeHMRFresults <- function(gobject, # betas betas <- HMRFoutput$betas - possible_betas <- seq(betas[1], to = betas[1] + (betas[2] * (betas[3] - 1)), - by = betas[2]) + possible_betas <- seq(betas[1], + to = betas[1] + (betas[2] * (betas[3] - 1)), + by = betas[2] + ) betas_to_view_detected <- betas_to_view[betas_to_view %in% possible_betas] @@ -533,7 +553,8 @@ writeHMRFresults <- function(gobject, result_DT <- data.table::as.data.table(do.call("cbind", result_list)) result_DT <- cbind(data.table::data.table( - "cell_ID" = gobject@cell_ID), result_DT) + "cell_ID" = gobject@cell_ID + ), result_DT) return(result_DT) } @@ -551,14 +572,42 @@ writeHMRFresults <- function(gobject, #' @param betas_to_add results from different betas that you want to add #' @param hmrf_name specify a custom name #' @returns giotto object +#' @examples +#' g <- GiottoData::loadGiottoMini("visium") +#' spat_genes <- binSpect(g) +#' +#' output_folder <- file.path(tempdir(), "HMRF") +#' if (!file.exists(output_folder)) dir.create(output_folder, recursive = TRUE) +#' +#' out <- doHMRF( +#' g, +#' spatial_genes = spat_genes[seq_len(20)]$feats, +#' expression_values = "scaled", +#' spatial_network_name = "Delaunay_network", +#' k = 6, betas = c(0, 10, 5), +#' output_folder = output_folder +#' ) +#' +#' g <- addHMRF( +#' gobject = g, +#' HMRFoutput = out, +#' k = 6, +#' betas_to_add = 20, +#' hmrf_name = "HMRF" +#' ) +#' +#' spatPlot( +#' gobject = g, cell_color = "HMRF_k6_b.20", +#' ) #' @export -addHMRF <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - HMRFoutput, - k = NULL, - betas_to_add = NULL, - hmrf_name = NULL) { +addHMRF <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + HMRFoutput, + k = NULL, + betas_to_add = NULL, + hmrf_name = NULL) { if (!"HMRFoutput" %in% class(HMRFoutput)) { stop("HMRFoutput needs to be output from doHMRFextend") } @@ -580,7 +629,9 @@ addHMRF <- function(gobject, ## reader.py and get_result.py paths # TODO: part of the package get_result_path <- system.file( - "python", "get_result2.py", package = "Giotto") + "python", "get_result2.py", + package = "Giotto" + ) # paths and name name <- HMRFoutput$name @@ -595,8 +646,10 @@ addHMRF <- function(gobject, # betas betas <- HMRFoutput$betas - possible_betas <- seq(betas[1], to = betas[1] + (betas[2] * (betas[3] - 1)), - by = betas[2]) + possible_betas <- seq(betas[1], + to = betas[1] + (betas[2] * (betas[3] - 1)), + by = betas[2] + ) betas_to_add_detected <- betas_to_add[betas_to_add %in% possible_betas] @@ -665,6 +718,8 @@ addHMRF <- function(gobject, #' @name viewHMRFresults2D #' @description View results from doHMRF. #' @param gobject giotto object +#' @param spat_unit spatial unit +#' @param feat_type feature type #' @param HMRFoutput HMRF output from doHMRF #' @param k number of HMRF domains #' @param betas_to_view results from different betas that you want to view @@ -672,11 +727,21 @@ addHMRF <- function(gobject, #' @returns spatial plots with HMRF domains #' @seealso \code{\link{spatPlot2D}} #' @export -viewHMRFresults2D <- function(gobject, - HMRFoutput, - k = NULL, - betas_to_view = NULL, - ...) { +viewHMRFresults2D <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + HMRFoutput, + k = NULL, + betas_to_view = NULL, + ...) { + spat_unit <- set_default_spat_unit( + gobject = gobject, spat_unit = spat_unit + ) + feat_type <- set_default_feat_type( + gobject = gobject, spat_unit = spat_unit, feat_type = feat_type + ) + if (!"HMRFoutput" %in% class(HMRFoutput)) { stop("HMRFoutput needs to be output from doHMRFextend") } @@ -684,7 +749,9 @@ viewHMRFresults2D <- function(gobject, ## reader.py and get_result.py paths # TODO: part of the package get_result_path <- system.file( - "python", "get_result2.py", package = "Giotto") + "python", "get_result2.py", + package = "Giotto" + ) # paths and name name <- HMRFoutput$name @@ -699,8 +766,10 @@ viewHMRFresults2D <- function(gobject, # betas betas <- HMRFoutput$betas - possible_betas <- seq(betas[1], to = betas[1] + (betas[2] * (betas[3] - 1)), - by = betas[2]) + possible_betas <- seq(betas[1], + to = betas[1] + (betas[2] * (betas[3] - 1)), + by = betas[2] + ) betas_to_view_detected <- betas_to_view[betas_to_view %in% possible_betas] @@ -716,19 +785,48 @@ viewHMRFresults2D <- function(gobject, ) print(result_command) - output <- system(command = result_command, intern = TRUE) + # get cell_IDs + cids <- gsub(basename(output_data), "", output_data) %>% + gsub(pattern = "\"", replacement = "") %>% + list.files( + pattern = "expression_matrix", + full.names = TRUE + ) %>% + data.table::fread(nrows = 0L, header = TRUE) %>% + colnames() + cids <- cids[-1] # gene colname is also included + + # create unique name + annot_DT <- data.table::data.table( + cell_ID = cids, + temp_name = output + ) + + annot_name <- paste0("hmrf_k.", k, "_b.", b) + data.table::setnames(annot_DT, old = "temp_name", new = annot_name) + + + gobject <- addCellMetadata( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + column_cell_ID = "cell_ID", + new_metadata = annot_DT, + by_column = TRUE + ) title_name <- paste0("k = ", k, " b = ", b) spatPlot2D( - gobject = gobject, - cell_color = as.factor(output), - show_plot = TRUE, - save_plot = FALSE, - title = title_name, - ...) + gobject = gobject, + cell_color = annot_name, + show_plot = TRUE, + save_plot = FALSE, + title = title_name, + ... + ) } } @@ -737,6 +835,8 @@ viewHMRFresults2D <- function(gobject, #' @name viewHMRFresults3D #' @description View results from doHMRF. #' @param gobject giotto object +#' @param spat_unit spatial unit +#' @param feat_type feature type #' @param HMRFoutput HMRF output from doHMRF #' @param k number of HMRF domains #' @param betas_to_view results from different betas that you want to view @@ -744,19 +844,31 @@ viewHMRFresults2D <- function(gobject, #' @returns spatial plots with HMRF domains #' @seealso \code{\link{spatPlot3D}} #' @export -viewHMRFresults3D <- function(gobject, - HMRFoutput, - k = NULL, - betas_to_view = NULL, - ...) { +viewHMRFresults3D <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + HMRFoutput, + k = NULL, + betas_to_view = NULL, + ...) { if (!"HMRFoutput" %in% class(HMRFoutput)) { stop("HMRFoutput needs to be output from doHMRFextend") } + spat_unit <- set_default_spat_unit( + gobject = gobject, spat_unit = spat_unit + ) + feat_type <- set_default_feat_type( + gobject = gobject, spat_unit = spat_unit, feat_type = feat_type + ) + ## reader.py and get_result.py paths # TODO: part of the package get_result_path <- system.file( - "python", "get_result2.py", package = "Giotto") + "python", "get_result2.py", + package = "Giotto" + ) # paths and name name <- HMRFoutput$name @@ -771,8 +883,10 @@ viewHMRFresults3D <- function(gobject, # betas betas <- HMRFoutput$betas - possible_betas <- seq(betas[1], to = betas[1] + (betas[2] * (betas[3] - 1)), - by = betas[2]) + possible_betas <- seq(betas[1], + to = betas[1] + (betas[2] * (betas[3] - 1)), + by = betas[2] + ) betas_to_view_detected <- betas_to_view[betas_to_view %in% possible_betas] @@ -788,19 +902,48 @@ viewHMRFresults3D <- function(gobject, ) print(result_command) - output <- system(command = result_command, intern = TRUE) + # get cell_IDs + cids <- gsub(basename(output_data), "", output_data) %>% + gsub(pattern = "\"", replacement = "") %>% + list.files( + pattern = "expression_matrix", + full.names = TRUE + ) %>% + data.table::fread(nrows = 0L, header = TRUE) %>% + colnames() + cids <- cids[-1] # gene colname is also included + + # create unique name + annot_DT <- data.table::data.table( + cell_ID = cids, + temp_name = output + ) + + annot_name <- paste0("hmrf_k.", k, "_b.", b) + data.table::setnames(annot_DT, old = "temp_name", new = annot_name) + + + gobject <- addCellMetadata( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + column_cell_ID = "cell_ID", + new_metadata = annot_DT, + by_column = TRUE + ) title_name <- paste0("k = ", k, " b = ", b) spatPlot3D( - gobject = gobject, - cell_color = output, - show_plot = TRUE, - save_plot = FALSE, - title = title_name, - ...) + gobject = gobject, + cell_color = annot_name, + show_plot = TRUE, + save_plot = FALSE, + title = title_name, + ... + ) } } @@ -818,17 +961,18 @@ viewHMRFresults3D <- function(gobject, #' @param seed random seed #' @returns list #' @details -#' This function samples a subset of spatial genes among different clusters, +#' This function samples a subset of spatial genes among different clusters, #' with size n = target. -#' Number of samples from each cluster denpends on the relative proportion of +#' Number of samples from each cluster denpends on the relative proportion of #' each cluster. -#' Changing from equal size by setting sample_rate = 1 to with exact proportion +#' Changing from equal size by setting sample_rate = 1 to with exact proportion #' of each cluster by setting sample_rate = +Inf #' @keywords internal -sampling_sp_genes <- function(clust, - sample_rate = 2, - target = 500, - seed = 10) { +sampling_sp_genes <- function( + clust, + sample_rate = 2, + target = 500, + seed = 10) { tot <- 0 num_cluster <- length(unique(clust)) gene_list <- list() @@ -862,28 +1006,30 @@ sampling_sp_genes <- function(clust, union_genes <- unique(union_genes) return(list( - union_genes = union_genes, num_sample = num_sample, - num_gene = genes, gene_list = gene_list)) + union_genes = union_genes, num_sample = num_sample, + num_gene = genes, gene_list = gene_list + )) } #' @title numPts_below_line #' @name numPts_below_line -#' @description function to calculate the number of data points below a given +#' @description function to calculate the number of data points below a given #' line -#' @param myVector input sequence of sorted positive values from smallest to +#' @param myVector input sequence of sorted positive values from smallest to #' greatest #' @param slope slope to compare -#' @param x location point of the line to compare, integer from 1 to length of +#' @param x location point of the line to compare, integer from 1 to length of #' myVector #' @returns numeric #' @details -#' This function calculates the number of data points in a sorted sequence +#' This function calculates the number of data points in a sorted sequence #' below a line with given slope through a certain point on this sequence. #' @keywords internal -numPts_below_line <- function(myVector, - slope, - x) { +numPts_below_line <- function( + myVector, + slope, + x) { yPt <- myVector[x] b <- yPt - (slope * x) xPts <- seq_along(myVector) @@ -903,27 +1049,27 @@ numPts_below_line <- function(myVector, #' @param method method of spatial gene selection #' @returns list #' @details -#' This function filters given gene list with the gene sets of selected +#' This function filters given gene list with the gene sets of selected #' spatial gene test in Giotto, #' also controls the total size of the gene set with given max number. #' @keywords external #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' filterSpatialGenes(g, spatial_genes = "Gm19935") #' @export -filterSpatialGenes <- function( - gobject, spat_unit = NULL, feat_type = NULL, spatial_genes, max = 2500, - name = c("binSpect", "silhouetteRank", "silhouetteRankTest"), - method = c("none", "elbow")) { +filterSpatialGenes <- function(gobject, spat_unit = NULL, feat_type = NULL, spatial_genes, max = 2500, + name = c("binSpect", "silhouetteRank", "silhouetteRankTest"), + method = c("none", "elbow")) { name <- match.arg( - name, - unique(c("binSpect", "silhouetteRank", "silhouetteRankTest", name))) + name, + unique(c("binSpect", "silhouetteRank", "silhouetteRankTest", name)) + ) method <- match.arg(method, unique(c("none", "elbow", method))) # NSE vars - binSpect.pval <- silhouetteRank.score <- silhouetteRankTest.pval <- + binSpect.pval <- silhouetteRank.score <- silhouetteRankTest.pval <- feat_ID <- NULL # first determine how many spatial genes in this dataset @@ -962,17 +1108,21 @@ filterSpatialGenes <- function( y0s <- sort(y0) y0s[y0s < 0] <- 0 # strictly positive # plot(x0, y0) - slope <- (max(y0s) - min(y0s)) / length(y0s) # This is the slope of the + slope <- (max(y0s) - min(y0s)) / length(y0s) # This is the slope of the # line we want to slide. This is the diagonal. xPt <- floor(optimize( - numPts_below_line, lower = 1, upper = length(y0s), - myVector = y0s, slope = slope)$minimum) + numPts_below_line, + lower = 1, upper = length(y0s), + myVector = y0s, slope = slope + )$minimum) xPt <- length(y0s) - xPt y_cutoff <- y0[xPt] # The y-value at this x point. This is our y_cutoff. gx_sorted <- head(gx_sorted, n = xPt) message("Elbow method chosen to determine number of spatial genes.") - cat(paste0("Elbow point determined to be at x=", xPt, " genes", - " y=", y_cutoff)) + cat(paste0( + "Elbow point determined to be at x=", xPt, " genes", + " y=", y_cutoff + )) } # filter user's gene list (spatial_genes) @@ -981,28 +1131,28 @@ filterSpatialGenes <- function( num_genes_removed <- length(spatial_genes) - nrow(gx_sorted) return(list( - genes = gx_sorted$feat_ID, num_genes_removed = num_genes_removed)) + genes = gx_sorted$feat_ID, num_genes_removed = num_genes_removed + )) } #' @title chooseAvailableSpatialGenes #' @name chooseAvailableSpatialGenes -#' @description function to find the test name for existing spatial gene sets +#' @description function to find the test name for existing spatial gene sets #' in Giotto #' @param gobject Giotto object #' @param spat_unit spatial unit #' @param feat_type feature type #' @returns character #' @details -#' This function outputs the available test name for existing spatial gene sets +#' This function outputs the available test name for existing spatial gene sets #' in Giotto, #' which could be used in parameter ‘name’ in `filterSpatialGenes`. -#' Priorities for showing the spatial gene test names are ‘binSpect’ > +#' Priorities for showing the spatial gene test names are ‘binSpect’ > #' ‘silhouetteRankTest’ > ‘silhouetteRank’. #' @keywords internal -chooseAvailableSpatialGenes <- function( - gobject, spat_unit = NULL, feat_type = NULL) { +chooseAvailableSpatialGenes <- function(gobject, spat_unit = NULL, feat_type = NULL) { gx <- fDataDT(gobject, spat_unit = NULL, feat_type = NULL) eval1 <- "binSpect.pval" %in% names(gx) eval2 <- "silhouetteRankTest.pval" %in% names(gx) @@ -1014,7 +1164,7 @@ chooseAvailableSpatialGenes <- function( } else if (eval3 == TRUE) { return("silhouetteRank") } else { - stop(paste0("No available spatial genes. Please run binSpect or + stop(paste0("No available spatial genes. Please run binSpect or silhouetteRank\n"), call. = FALSE) } } @@ -1022,7 +1172,7 @@ chooseAvailableSpatialGenes <- function( #' @title checkAndFixSpatialGenes #' @name checkAndFixSpatialGenes -#' @description function to check the selected test name for spatial gene set +#' @description function to check the selected test name for spatial gene set #' in Giotto object #' @param gobject Giotto object #' @param spat_unit spatial unit @@ -1031,16 +1181,17 @@ chooseAvailableSpatialGenes <- function( #' @param use_score logical variable to select silhouetteRank score #' @returns character #' @details -#' This function checks the user specified test name of spatial gene set in +#' This function checks the user specified test name of spatial gene set in #' Giotto object. -#' SilhouetteRank works only with score, and SilhouetteRankTest works only +#' SilhouetteRank works only with score, and SilhouetteRankTest works only #' with pval. Use parameter use_score to specify. #' @keywords internal -checkAndFixSpatialGenes <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - use_spatial_genes, - use_score = FALSE) { +checkAndFixSpatialGenes <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + use_spatial_genes, + use_score = FALSE) { gx <- fDataDT(gobject, spat_unit = NULL, feat_type = NULL) if (use_spatial_genes == "silhouetteRank") { @@ -1051,7 +1202,7 @@ checkAndFixSpatialGenes <- function(gobject, eval2 <- "silhouetteRankTest.pval" %in% names(gx) if (eval1 == TRUE && eval2 == TRUE) { # if both evaluate to true, then decide by use_score. - # silhouetteRank works only with score, silhouetteRankTest + # silhouetteRank works only with score, silhouetteRankTest # works only with pval if (use_score == TRUE) { use_spatial_genes <- "silhouetteRank" @@ -1063,8 +1214,8 @@ checkAndFixSpatialGenes <- function(gobject, } else if (eval2 == TRUE) { use_spatial_genes <- "silhouetteRankTest" } else { - stop(paste0("\n use_spatial_genes is set to silhouetteRank, - but it has not been run yet. Run silhouetteRank + stop(paste0("\n use_spatial_genes is set to silhouetteRank, + but it has not been run yet. Run silhouetteRank first.\n"), call. = FALSE) } } @@ -1072,14 +1223,16 @@ checkAndFixSpatialGenes <- function(gobject, } else if (use_spatial_genes == "binSpect") { eval1 <- "binSpect.pval" %in% names(gx) if (eval1 == FALSE) { - stop(paste0("use_spatial_genes is set to binSpect, but it has - not been run yet. Run binSpect first."), - call. = FALSE) + stop(paste0("use_spatial_genes is set to binSpect, but it has + not been run yet. Run binSpect first."), + call. = FALSE + ) } return(use_spatial_genes) } else { - stop(paste0("use_spatial_genes is set to one that is not supported."), - call. = FALSE) + stop(paste0("use_spatial_genes is set to one that is not supported."), + call. = FALSE + ) } } @@ -1088,120 +1241,126 @@ checkAndFixSpatialGenes <- function(gobject, #' @title initHMRF_V2 #' @name initHMRF_V2 -#' @description Run initialzation for HMRF model +#' @description Run initialization for HMRF model #' @param gobject giotto object #' @param spat_unit spatial unit #' @param feat_type feature type #' @param expression_values expression values to use #' @param spatial_network_name name of spatial network to use for HMRF #' @param use_spatial_genes which of Giotto's spatial genes to use -#' @param use_score use score as gene selection criterion +#' @param use_score use score as gene selection criterion #' (applies when use_spatial_genes=silhouetteRank) #' @param gene_list_from_top total spatial genes before sampling -#' @param filter_method filter genes by top or by elbow method, prior to +#' @param filter_method filter genes by top or by elbow method, prior to #' sampling #' @param user_gene_list user-specified genes (optional) -#' @param use_pca if PCA is used on the spatial gene expression value for +#' @param use_pca if PCA is used on the spatial gene expression value for #' clustering #' @param use_pca_dim dimensions of the PCs of the selected expression #' @param gene_samples number of spatial gene subset to use for HMRF -#' @param gene_sampling_rate parameter (1-50) controlling proportion of gene -#' samples from different module when sampling, 1 corresponding to equal gene -#' samples between different modules; 50 corresponding to gene samples +#' @param gene_sampling_rate parameter (1-50) controlling proportion of gene +#' samples from different module when sampling, 1 corresponding to equal gene +#' samples between different modules; 50 corresponding to gene samples #' proportional to module size. #' @param gene_sampling_seed random number seed to sample spatial genes #' @param use_metagene if metagene expression is used for clustering #' @param cluster_metagene number of metagenes to use -#' @param top_metagene = number of genes in each cluster for the metagene +#' @param top_metagene = number of genes in each cluster for the metagene #' calculation -#' @param existing_spatial_enrichm_to_use name of existing spatial enrichment +#' @param existing_spatial_enrichm_to_use name of existing spatial enrichment #' result to use -#' @param use_neighborhood_composition if neighborhood composition is used for +#' @param use_neighborhood_composition if neighborhood composition is used for #' hmrf -#' @param spatial_network_name_for_neighborhood spatial network used to +#' @param spatial_network_name_for_neighborhood spatial network used to #' calculate neighborhood composition #' @param metadata_to_use metadata used to calculate neighborhood composition -#' @param hmrf_seed random number seed to generate initial mean vector of HMRF +#' @param hmrf_seed random number seed to generate initial mean vector of HMRF #' model -#' @param cl.method clustering method to calculate the initial mean vector, +#' @param cl.method clustering method to calculate the initial mean vector, #' selecting from 'km', 'leiden', or 'louvain' #' @param resolution.cl resolution of Leiden or Louvain clustering #' @param k number of HMRF domains #' @param tolerance error tolerance threshold #' @param zscore type of zscore to use -#' @param nstart number of Kmeans initializations from which to select the +#' @param nstart number of Kmeans initializations from which to select the #' best initialization #' @param factor_step dampened factor step #' @param python_path python_path #' @returns initialized HMRF #' @details -#' This function is the initialization step of HMRF domain clustering. First, +#' This function is the initialization step of HMRF domain clustering. First, #' user specify which of Giotto's spatial genes to run, -#' through use_spatial_genes. Spatial genes have been stored in the gene +#' through use_spatial_genes. Spatial genes have been stored in the gene #' metadata table. A first pass of genes will filter genes that -#' are not significantly spatial, as determined by filter_method. If +#' are not significantly spatial, as determined by filter_method. If #' filter_method is none, then top 2500 (gene_list_from_top) genes -#' ranked by pvalue are considered spatial. If filter_method is elbow, then the +#' ranked by pvalue are considered spatial. If filter_method is elbow, then the #' exact cutoff is determined by the elbow in -#' the -log10 P-value vs. gene rank plot. Second, users have a few options to +#' the -log10 P-value vs. gene rank plot. Second, users have a few options to #' decrease the dimension of the spatial genes for #' clustering, listed with selection priority: #' 1. use PCA of the spatial gene expressions (selected by use_pca) #' 2. use metagene expressions (selected by use_metagene) #' 3. sampling to select 500 spatial genes (controlled by gene_samples). -#' Third, once spatial genes are finalized, we are using clustering method to +#' Third, once spatial genes are finalized, we are using clustering method to #' initialize HMRF. -#' Instead of select spatial genes for domain clustering, HMRF method could -#' also applied on unit neighbohood composition of any group -#' membership(such as cell types), specified by parameter: +#' Instead of select spatial genes for domain clustering, HMRF method could +#' also applied on unit neighborhood composition of any group +#' membership(such as cell types), specified by parameter: #' use_neighborhood_composition, spatial_network_name_for_neighborhood and -#' metadata_to_use. Also HMRF provides the oppertunity for user to do +#' metadata_to_use. Also HMRF provides the opportunity for user to do #' clustering by any customized spatial enrichment matrix #' (existing_spatial_enrichm_to_use). -#' There are 3 clustering algorithm: K-means, Leiden, and Louvain to determine +#' There are 3 clustering algorithm: K-means, Leiden, and Louvain to determine #' initial centroids of HMRF. The initialization is -#' then finished. This function returns a list containing y (expression), +#' then finished. This function returns a list containing y (expression), #' nei (neighborhood structure), numnei (number of neighbors), -#' blocks (graph colors), damp (dampened factor), mu (mean), +#' blocks (graph colors), damp (dampened factor), mu (mean), #' sigma (covariance), k, genes, edgelist, init.cl (initial clusters), #' spat_unit, feat_type. This information is needed for the second step, doHMRF. +#' @examples +#' g <- GiottoData::loadGiottoMini("visium") +#' g <- binSpect(g, return_gobject = TRUE) +#' +#' initHMRF_V2(gobject = g, cl.method = "km") #' @export initHMRF_V2 <- - function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("scaled", "normalized", "custom"), - spatial_network_name = "Delaunay_network", - use_spatial_genes = c("binSpect", "silhouetteRank"), - use_score = FALSE, - gene_list_from_top = 2500, - filter_method = c("none", "elbow"), - user_gene_list = NULL, - use_pca = FALSE, - use_pca_dim = 1:20, - gene_samples = 500, - gene_sampling_rate = 2, - gene_sampling_seed = 10, - use_metagene = FALSE, - cluster_metagene = 50, - top_metagene = 20, - existing_spatial_enrichm_to_use = NULL, - use_neighborhood_composition = FALSE, - spatial_network_name_for_neighborhood = NULL, - metadata_to_use = NULL, - hmrf_seed = 100, - cl.method = c("km", "leiden", "louvain"), - resolution.cl = 1, - k = 10, - tolerance = 1e-05, - zscore = c("none", "rowcol", "colrow"), - nstart = 1000, - factor_step = 1.05, - python_path = NULL) { + function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("scaled", "normalized", "custom"), + spatial_network_name = "Delaunay_network", + use_spatial_genes = c("binSpect", "silhouetteRank"), + use_score = FALSE, + gene_list_from_top = 2500, + filter_method = c("none", "elbow"), + user_gene_list = NULL, + use_pca = FALSE, + use_pca_dim = 1:20, + gene_samples = 500, + gene_sampling_rate = 2, + gene_sampling_seed = 10, + use_metagene = FALSE, + cluster_metagene = 50, + top_metagene = 20, + existing_spatial_enrichm_to_use = NULL, + use_neighborhood_composition = FALSE, + spatial_network_name_for_neighborhood = NULL, + metadata_to_use = NULL, + hmrf_seed = 100, + cl.method = c("km", "leiden", "louvain"), + resolution.cl = 1, + k = 10, + tolerance = 1e-05, + zscore = c("none", "rowcol", "colrow"), + nstart = 1000, + factor_step = 1.05, + python_path = NULL) { wrap_msg( - "If used in published research, please cite: + "If used in published research, please cite: Q Zhu, S Shah, R Dries, L Cai, GC Yuan. - 'Identification of spatially associated subpopulations by combining + 'Identification of spatially associated subpopulations by combining scRNAseq and sequential fluorescence in situ hybridization data' Nature biotechnology 36 (12), 1183-1190. 2018\n" ) @@ -1237,51 +1396,60 @@ initHMRF_V2 <- cx <- pDataDT(gobject, spat_unit = spat_unit, feat_type = feat_type) spatial_network <- getSpatialNetwork( - gobject, - spat_unit = spat_unit, - name = spatial_network_name, - output = "networkDT", - copy_obj = FALSE) + gobject, + spat_unit = spat_unit, + name = spatial_network_name, + output = "networkDT", + copy_obj = FALSE + ) spatial_network <- spatial_network[, .(to, from)] if (use_neighborhood_composition) { if (is.null(spatial_network_name_for_neighborhood)) { - stop("spatial network is required to define neighborhood, + stop("spatial network is required to define neighborhood, set with \'spatial_network_name_for_neighborhood\' \n", call. = FALSE ) } else if (is.null(metadata_to_use)) { - stop("please specify the cluster in meta data, set with + stop("please specify the cluster in meta data, set with \'metadata_to_use\' \n", call. = FALSE ) } else if (is.null(cx[[metadata_to_use]])) { - stop("please provide a valid index in meta data, set with + stop("please provide a valid index in meta data, set with \'metadata_to_use\'", call. = FALSE ) } - cat(paste0("use spatial network composition of \'", - metadata_to_use, "\' for domain clustering")) + cat(paste0( + "use spatial network composition of \'", + metadata_to_use, "\' for domain clustering" + )) name.cl <- as.character(sort(unique(cx[[metadata_to_use]]))) spatial_network_for_neighborhood <- getSpatialNetwork( gobject, spat_unit = spat_unit, - name = spatial_network_name_for_neighborhood, - output = "networkDT", + name = spatial_network_name_for_neighborhood, + output = "networkDT", copy_obj = FALSE ) - from.all <- c(spatial_network_for_neighborhood$from, - spatial_network_for_neighborhood$to) - to.all <- c(spatial_network_for_neighborhood$to, - spatial_network_for_neighborhood$from) + from.all <- c( + spatial_network_for_neighborhood$from, + spatial_network_for_neighborhood$to + ) + to.all <- c( + spatial_network_for_neighborhood$to, + spatial_network_for_neighborhood$from + ) - ct.tab <- aggregate(cx[[metadata_to_use]][match( - to.all, cx[["cell_ID"]])], + ct.tab <- aggregate( + cx[[metadata_to_use]][match( + to.all, cx[["cell_ID"]] + )], by = list(cell_ID = from.all), function(y) { table(y)[name.cl] } @@ -1291,20 +1459,21 @@ initHMRF_V2 <- y0[is.na(y0)] <- 0 rownames(y0) <- ct.tab$cell_ID y0 <- y0 / rowSums(y0) - } else if (!is.null(existing_spatial_enrichm_to_use)) { y0 <- getSpatialEnrichment( gobject, spat_unit = spat_unit, feat_type = feat_type, - name = existing_spatial_enrichm_to_use, + name = existing_spatial_enrichm_to_use, output = "data.table" ) cell_ID_enrich <- y0$cell_ID y0 <- as.data.frame(y0[, -"cell_ID"]) rownames(y0) <- cell_ID_enrich - cat(paste0("Spatial enrichment result: \'", - existing_spatial_enrichm_to_use, "\' is used.")) + cat(paste0( + "Spatial enrichment result: \'", + existing_spatial_enrichm_to_use, "\' is used." + )) if (sum(!rownames(y0) %in% cx$cell_ID) > 0) { stop("Rownames of selected spatial enrichment result do not @@ -1330,18 +1499,18 @@ initHMRF_V2 <- "normalized", "custom", expression_values ))) expr_values <- get_expression_values( - gobject = gobject, - spat_unit = spat_unit, + gobject = gobject, + spat_unit = spat_unit, feat_type = feat_type, values = values, output = "matrix" ) if (zscore != "none") { zscore <- match.arg(zscore, c("none", "colrow", "rowcol")) expr_values <- get_expression_values( - gobject = gobject, - spat_unit = spat_unit, + gobject = gobject, + spat_unit = spat_unit, feat_type = feat_type, - values = "normalized", + values = "normalized", output = "matrix" ) if (zscore == "colrow") { @@ -1357,8 +1526,8 @@ initHMRF_V2 <- if (!"binSpect.pval" %in% names(gx) && !"silhouetteRank.score" %in% names(gx) && !"silhouetteRankTest.pval" %in% names(gx)) { - stop(paste0("Giotto spatial gene detection has not been run. - Please run spatial gene detection first: binSpect, + stop(paste0("Giotto spatial gene detection has not been run. + Please run spatial gene detection first: binSpect, silhouetteRank."), call. = FALSE ) @@ -1367,30 +1536,32 @@ initHMRF_V2 <- if (!is.null(user_gene_list)) { message("User supplied gene list detected.") message("Checking user gene list is spatial...") - + use_spatial_genes <- chooseAvailableSpatialGenes(gobject) filtered <- filterSpatialGenes( gobject, - spat_unit = spat_unit, - feat_type = feat_type, + spat_unit = spat_unit, + feat_type = feat_type, spatial_genes = user_gene_list, - max = gene_list_from_top, + max = gene_list_from_top, name = use_spatial_genes, method = filter_method ) if (filtered$num_genes_removed > 0) { cat(paste0( "Removed ", filtered$num_genes_removed, - " from user's input gene list due to being absent or + " from user's input gene list due to being absent or non-spatial genes." )) - cat(paste0("Kept ", length(filtered$genes), - " spatial genes for next step")) + cat(paste0( + "Kept ", length(filtered$genes), + " spatial genes for next step" + )) } spatial_genes <- filtered$genes if (length(spatial_genes) == 0) { - stop("No genes are remaining to do HMRF. Please give a + stop("No genes are remaining to do HMRF. Please give a larger gene list.", call. = FALSE ) @@ -1402,41 +1573,45 @@ initHMRF_V2 <- )) use_spatial_genes <- checkAndFixSpatialGenes( gobject, - spat_unit = spat_unit, + spat_unit = spat_unit, feat_type = feat_type, - use_spatial_genes = use_spatial_genes, + use_spatial_genes = use_spatial_genes, use_score = use_score ) all_genes <- gx$feat_ID filtered <- filterSpatialGenes( gobject, - spat_unit = spat_unit, + spat_unit = spat_unit, feat_type = feat_type, - spatial_genes = all_genes, + spatial_genes = all_genes, max = gene_list_from_top, name = use_spatial_genes, method = filter_method ) - cat(paste0("Kept ", length(filtered$genes), - " top spatial genes for next step")) + cat(paste0( + "Kept ", length(filtered$genes), + " top spatial genes for next step" + )) spatial_genes <- filtered$genes } if (use_pca == TRUE) { expr_values <- expr_values[spatial_genes, ] pc.expr <- prcomp(expr_values)[[2]] - use_pca_dim <- use_pca_dim[use_pca_dim %in% 1:ncol(pc.expr)] + use_pca_dim <- use_pca_dim[ + use_pca_dim %in% seq_len(ncol(pc.expr)) + ] y0 <- (pc.expr[, use_pca_dim]) } else { message("Computing spatial coexpression modules...") spat_cor_netw_DT <- detectSpatialCorFeats( gobject = gobject, - feat_type = feat_type, - spat_unit = spat_unit, + feat_type = feat_type, + spat_unit = spat_unit, expression_values = values, method = "network", spatial_network_name = spatial_network_name, - subset_feats = spatial_genes, + subset_feats = spatial_genes, network_smoothing = 0 ) @@ -1445,15 +1620,15 @@ initHMRF_V2 <- if (n < length(spatial_genes)) { spat_cor_netw_DT <- clusterSpatialCorFeats( spat_cor_netw_DT, - name = "spat_netw_clus", + name = "spat_netw_clus", k = 20 ) - message("Sampling spatial genes from coexpression + message("Sampling spatial genes from coexpression modules...") sample_genes <- sampling_sp_genes( spat_cor_netw_DT$cor_clusters$spat_netw_clus, - sample_rate = gene_sampling_rate, - target = n, + sample_rate = gene_sampling_rate, + target = n, seed = gene_sampling_seed ) spatial_genes_selected <- sample_genes$union_genes @@ -1471,11 +1646,13 @@ initHMRF_V2 <- expr_values <- expr_values[spatial_genes_selected, ] } else { k.sp <- min( - ceiling(length(spatial_genes) / 20), cluster_metagene) + ceiling(length(spatial_genes) / 20), cluster_metagene + ) if (k.sp < cluster_metagene) { cat(paste0( - "construct ", k.sp, - " coexpression modules due to limited gene size...")) + "construct ", k.sp, + " coexpression modules due to limited gene size..." + )) } spat_cor_netw_DT <- clusterSpatialCorFeats(spat_cor_netw_DT, name = "spat_netw_clus", k = k.sp @@ -1486,28 +1663,32 @@ initHMRF_V2 <- show_top_feats = 1 ) - cat(paste0("Collecting top spatial genes and calculating + cat(paste0("Collecting top spatial genes and calculating metagenes from ", k.sp, " coexpression modules...")) top_per_module <- cluster_genes_DT[ - , head(.SD, top_metagene), by = clus] + , head(.SD, top_metagene), + by = clus + ] cluster_genes <- top_per_module$clus names(cluster_genes) <- top_per_module$feat_ID meta.genes <- createMetafeats( gobject, - spat_unit = spat_unit, - feat_type = feat_type, + spat_unit = spat_unit, + feat_type = feat_type, expression_values = values, - feat_clusters = cluster_genes, + feat_clusters = cluster_genes, return_gobject = FALSE ) - expr_values <- t(meta.genes@enrichDT[, 1:k.sp]) + expr_values <- t(meta.genes@enrichDT[, seq_len(k.sp)]) colnames(expr_values) <- unlist( - meta.genes@enrichDT[, "cell_ID"]) + meta.genes@enrichDT[, "cell_ID"] + ) rownames(expr_values) <- paste0( - "metagene_", rownames(expr_values)) + "metagene_", rownames(expr_values) + ) } y0 <- t(as.matrix(expr_values)) @@ -1532,13 +1713,13 @@ initHMRF_V2 <- y <- y0 } - + numcell <- dim(y)[1] m <- dim(y)[2] ncol.nei <- max(table(c(spatial_network$to, spatial_network$from))) nei <- matrix(-1, ncol = ncol.nei, nrow = numcell) rownames(nei) <- rownames(y) - for (i in 1:numcell) { + for (i in seq_len(numcell)) { nei.i <- c(spatial_network$from[spatial_network$to == rownames(nei)[i]], spatial_network$to[spatial_network$from == rownames(nei)[i]]) @@ -1549,12 +1730,12 @@ initHMRF_V2 <- numnei <- as.integer(rowSums(nei != (-1))) nn <- nei numedge <- 0 - for (i in 1:numcell) { + for (i in seq_len(numcell)) { numedge <- numedge + length(nn[i, nn[i, ] != -1]) } edgelist <- matrix(0, nrow = numedge, ncol = 2) edge_ind <- 1 - for (i in 1:numcell) { + for (i in seq_len(numcell)) { neighbors <- nn[i, nn[i, ] != -1] for (j in seq_along(neighbors)) { edgelist[edge_ind, ] <- c(i, neighbors[j]) @@ -1563,9 +1744,11 @@ initHMRF_V2 <- } message("Parsing neighborhood graph...") pp <- tidygraph::tbl_graph( - edges = as.data.frame(edgelist), directed = FALSE) + edges = as.data.frame(edgelist), directed = FALSE + ) yy <- pp %>% dplyr::mutate( - color = as.factor(graphcoloring::color_dsatur())) + color = as.factor(graphcoloring::color_dsatur()) + ) colors <- as.list(yy)$nodes$color cl_color <- sort(unique(colors)) blocks <- lapply(cl_color, function(cl) { @@ -1585,7 +1768,7 @@ initHMRF_V2 <- nstart = nstart ) mu <- t(kk$centers) - lclust <- lapply(1:k, function(x) which(kk$cluster == x)) + lclust <- lapply(seq_len(k), function(x) which(kk$cluster == x)) } else { ##### need to double check leiden and louvain cluster functions gobject@dimension_reduction$cells$spatial <- NULL @@ -1596,8 +1779,8 @@ initHMRF_V2 <- gobject <- createNearestNetwork( gobject = gobject, - dim_reduction_to_use = "spatial", - dim_reduction_name = "spatial_feat", + dim_reduction_to_use = "spatial", + dim_reduction_name = "spatial_feat", dimensions_to_use = seq_len(ncol(y)), name = "sNN.initHMRF" ) @@ -1605,30 +1788,32 @@ initHMRF_V2 <- if (cl.method == "leiden") { message("Leiden clustering initialization...") leiden.cl <- doLeidenCluster( - gobject = gobject, - nn_network_to_use = "sNN", - network_name = "sNN.initHMRF", - set_seed = hmrf_seed, + gobject = gobject, + nn_network_to_use = "sNN", + network_name = "sNN.initHMRF", + set_seed = hmrf_seed, return_gobject = FALSE, - python_path = python_path, + python_path = python_path, resolution = resolution.cl ) cl.match <- leiden.cl$leiden_clus[ - match(rownames(y), leiden.cl$cell_ID)] + match(rownames(y), leiden.cl$cell_ID) + ] mu <- aggregate(y, by = list(cl.match), FUN = mean) } else if (cl.method == "louvain") { message("Louvain clustering initialization...") louvain.cl <- doLouvainCluster( - gobject = gobject, - nn_network_to_use = "sNN", - network_name = "sNN.initHMRF", - set_seed = hmrf_seed, + gobject = gobject, + nn_network_to_use = "sNN", + network_name = "sNN.initHMRF", + set_seed = hmrf_seed, return_gobject = FALSE, python_path = python_path, resolution = resolution.cl ) cl.match <- louvain.cl$louvain_clus[ - match(rownames(y), louvain.cl$cell_ID)] + match(rownames(y), louvain.cl$cell_ID) + ] mu <- aggregate(y, by = list(cl.match), FUN = mean) } @@ -1642,7 +1827,7 @@ initHMRF_V2 <- damp <- array(0, c(k)) sigma <- array(0, c(m, m, k)) - for (i in 1:k) { + for (i in seq_len(k)) { sigma[, , i] <- cov(y[lclust[[i]], ]) di <- smfishHmrf::findDampFactor( sigma[, , i], @@ -1655,7 +1840,7 @@ initHMRF_V2 <- list( y = y, nei = nei, numnei = numnei, blocks = blocks, damp = damp, mu = mu, sigma = sigma, k = k, genes = colnames(y), - edgelist = edgelist, init.cl = lclust, spat_unit = spat_unit, + edgelist = edgelist, init.cl = lclust, spat_unit = spat_unit, feat_type = feat_type ) } @@ -1667,41 +1852,47 @@ initHMRF_V2 <- #' @title doHMRF_V2 #' @name doHMRF_V2 #' @description function to run HMRF model -#' @param HMRF_init_obj initialization object list returned from initHMRF() +#' @param HMRF_init_obj initialization object list returned from initHMRF() #' function -#' @param betas beta value of the HMRF model, controlling the smoothness of -#' clustering. NULL value of beta will provide default values based on feature -#' numbers, otherwise, a vector of three values: initial beta, beta increment, +#' @param betas beta value of the HMRF model, controlling the smoothness of +#' clustering. NULL value of beta will provide default values based on feature +#' numbers, otherwise, a vector of three values: initial beta, beta increment, #' and number of betas #' @returns HMRF model #' @details -#' This function will run a HMRF model after initialization of HMRF. Of note +#' This function will run a HMRF model after initialization of HMRF. Of note #' is the beta parameter, the smoothing parameter. -#' If the users are interested in selecting results from different smoothness, +#' If the users are interested in selecting results from different smoothness, #' we recommend running a range of betas, -#' hence betas specify what this range is. For example, betas=c(0,10,5) will +#' hence betas specify what this range is. For example, betas=c(0,10,5) will #' run for the following betas: 0, 10, 20, 30, 40. -#' betas=c(0,5,2) will run for betas: 0, 5, 10. Setting the beta can use the +#' betas=c(0,5,2) will run for betas: 0, 5, 10. Setting the beta can use the #' following guideline: #' If number of features N is 10 0)) { - stop("please provide betas as a vector of 3 non-negative numbers - (initial value, nicrement, total iteration number)") + } else if (length(betas) != 3 || (sum(betas[seq_len(3)] < 0) > 0)) { + stop(wrap_txt( + "please provide betas as a vector of 3 non-negative numbers + (initial value, increment, total iteration number)", + errWidth = TRUE + )) } else { beta_init <- betas[1] beta_increment <- betas[2] beta_num_iter <- betas[3] - beta_seq <- (1:beta_num_iter - 1) * beta_increment + beta_init + beta_seq <- (seq_len(beta_num_iter) - 1) * beta_increment + beta_init beta_seq <- sort(unique(c(0, beta_seq))) } @@ -1765,23 +1959,23 @@ doHMRF_V2 <- function(HMRF_init_obj, betas = NULL) { print(sprintf("Doing beta=%.3f", beta_current)) tc.hmrfem <- smfishHmrf::smfishHmrf.hmrfem.multi( y = y, neighbors = nei, - beta = beta_current, - numnei = numnei, + beta = beta_current, + numnei = numnei, blocks = blocks, - mu = mu, - sigma = sigma, - verbose = TRUE, + mu = mu, + sigma = sigma, + verbose = TRUE, err = 1e-07, - maxit = 50, + maxit = 50, dampFactor = damp ) - + t_key <- sprintf("k=%d b=%.2f", k, beta_current) tc.hmrfem$sigma <- NULL tc.hmrfem$mu <- NULL rownames(tc.hmrfem$prob) <- rownames(y) rownames(tc.hmrfem$unnormprob) <- rownames(y) - names(tc.hmrfem$class) <- rownames(y) + # names(tc.hmrfem$class) <- rownames(y) res[[t_key]] <- tc.hmrfem } result.hmrf <- res @@ -1800,10 +1994,17 @@ doHMRF_V2 <- function(HMRF_init_obj, betas = NULL) { #' @param name name of HMRF models #' @returns giotto object #' @details -#' This function appends HMRF domain clusters to corresponding cell meta data -#' for all the beta values, with the given HMRF model names. For example, if -#' name = ‘hmrf1’ and name of result in HMRFoutput is ‘k=8 b=0.00’, the +#' This function appends HMRF domain clusters to corresponding cell meta data +#' for all the beta values, with the given HMRF model names. For example, if +#' name = ‘hmrf1’ and name of result in HMRFoutput is ‘k=8 b=0.00’, the #' appended cell meta data column will be named with ‘hmrf1 k=8 b=0.00’ +#' @examples +#' g <- GiottoData::loadGiottoMini("visium") +#' g <- binSpect(g, return_gobject = TRUE) +#' HMRF_init_obj <- initHMRF_V2(gobject = g, cl.method = "km") +#' HMRFoutput <- doHMRF_V2(HMRF_init_obj = HMRF_init_obj, betas = c(0, 5, 2)) +#' +#' addHMRF_V2(gobject = g, HMRFoutput = HMRFoutput) #' @export addHMRF_V2 <- function(gobject, HMRFoutput, name = "hmrf") { if (!"HMRFoutput" %in% class(HMRFoutput)) { @@ -1833,10 +2034,11 @@ addHMRF_V2 <- function(gobject, HMRFoutput, name = "hmrf") { spat_unit = spat_unit, feat_type = feat_type, column_cell_ID = "cell_ID", - new_metadata = HMRFoutput[[i]]$class[match( - ordered_cell_IDs, names(HMRFoutput[[i]]$class))], - vector_name = paste(name, names(HMRFoutput)[i]), - by_column = TRUE + # new_metadata = HMRFoutput[[i]]$class[match( + # ordered_cell_IDs, names(HMRFoutput[[i]]$class))], + new_metadata = HMRFoutput[[i]]$prob[ordered_cell_IDs, ], + vector_name = paste(name, names(HMRFoutput)[i]) + # by_column = TRUE ) } return(gobject) @@ -1866,40 +2068,45 @@ addHMRF_V2 <- function(gobject, HMRFoutput, name = "hmrf") { #' @param \dots additional params to pass to plotting #' @returns spatial plots with HMRF domains #' @details -#' This function plots spatial map of HMRF domain clusters for multiple beta +#' This function plots spatial map of HMRF domain clusters for multiple beta #' with the name (hmrf_name), -#' matching the first part of the cell meta column names with HMRF clusters +#' matching the first part of the cell meta column names with HMRF clusters #' (for example name of ‘hmrf1 k=8 b=0.00’ is ‘hmrf1’) #' @export viewHMRFresults_V2 <- - function(gobject, k, betas, - hmrf_name, - spat_unit = NULL, - feat_type = NULL, - third_dim = FALSE, - cow_n_col = 2, - cow_rel_h = 1, - cow_rel_w = 1, - cow_align = "h", - show_plot = TRUE, - save_plot = TRUE, - return_plot = TRUE, - default_save_name = "HMRF_result", - save_param = list(), - ...) { + function( + gobject, k, betas, + hmrf_name, + spat_unit = NULL, + feat_type = NULL, + third_dim = FALSE, + cow_n_col = 2, + cow_rel_h = 1, + cow_rel_w = 1, + cow_align = "h", + show_plot = TRUE, + save_plot = TRUE, + return_plot = TRUE, + default_save_name = "HMRF_result", + save_param = list(), + ...) { # beta_seq = round(betas,digits = 2) # t_key = paste0(hmrf_name,'_k', k, '_b.',beta_seq) t_key <- paste(hmrf_name, sprintf("k=%d b=%.2f", k, betas)) meta_names <- colnames(combineMetadata( - gobject = gobject, spat_unit = spat_unit, feat_type = feat_type)) + gobject = gobject, spat_unit = spat_unit, feat_type = feat_type + )) if (length(setdiff(t_key, meta_names)) > 0) { - beta_null <- paste(betas[which(!t_key %in% meta_names)], - collapse = ",") - stop(paste0('\n HMRF result "', hmrf_name, '" of k = ', k, - ", beta = ", beta_null, - " was not found in the Giotto object.")) + beta_null <- paste(betas[which(!t_key %in% meta_names)], + collapse = "," + ) + stop(paste0( + '\n HMRF result "', hmrf_name, '" of k = ', k, + ", beta = ", beta_null, + " was not found in the Giotto object." + )) } savelist <- list() @@ -1907,32 +2114,32 @@ viewHMRFresults_V2 <- { if (third_dim == TRUE) { pl <- spatPlot3D( - gobject = gobject, - spat_unit = spat_unit, + gobject = gobject, + spat_unit = spat_unit, feat_type = feat_type, - cell_color = t_key[kk], - show_plot = FALSE, - save_plot = FALSE, + cell_color = t_key[kk], + show_plot = FALSE, + save_plot = FALSE, title = t_key[kk], - default_save_name = "HMRF_result", - return_plot = TRUE, + default_save_name = "HMRF_result", + return_plot = TRUE, ... ) } else { pl <- spatPlot2D( - gobject = gobject, - spat_unit = spat_unit, + gobject = gobject, + spat_unit = spat_unit, feat_type = feat_type, - cell_color = t_key[kk], - show_plot = FALSE, - save_plot = FALSE, + cell_color = t_key[kk], + show_plot = FALSE, + save_plot = FALSE, title = t_key[kk], - cow_n_col = 1, - cow_rel_h = 1, - cow_rel_w = 1, + cow_n_col = 1, + cow_rel_h = 1, + cow_rel_w = 1, cow_align = "h", - default_save_name = "HMRF_result", - return_plot = TRUE, + default_save_name = "HMRF_result", + return_plot = TRUE, ... ) } diff --git a/R/python_scrublet.R b/R/python_scrublet.R index 200305ec1..0f422ceae 100644 --- a/R/python_scrublet.R +++ b/R/python_scrublet.R @@ -1,6 +1,7 @@ #' @title doScrubletDetect #' @name doScrubletDetect -#' @description run *scrublet* doublet detection for raw expression. +#' @description Run *scrublet* doublet detection for raw expression. Intended +#' for single cell data #' @param gobject giotto object containing expression data #' @param feat_type feature type #' @param spat_unit spatial unit @@ -21,26 +22,32 @@ #' @seealso This function wraps the python package scrublet #' \doi{10.1016/j.cels.2018.11.005} #' @returns if `return_gobject = FALSE`, a `data.table` cell_ID, doublet scores, -#' and classifications are returned. If `TRUE`, that information is appended -#' into the input `giotto` object's metadata and the `giotto` object is +#' and classifications are returned. If `TRUE`, that information is appended +#' into the input `giotto` object's metadata and the `giotto` object is #' returned. #' @md #' @examples +#' # Should only be done with single cell data, but this is just a +#' # convenient example. #' g <- GiottoData::loadGiottoMini("visium") -#' -#' doScrubletDetect(g) +#' +#' g <- doScrubletDetect(g) +#' +#' pDataDT(g) # doublet_scores and doublet cols are added +#' dimPlot2D(g, cell_color = "doublet_scores", color_as_factor = FALSE) #' @export -doScrubletDetect <- function(gobject, - feat_type = NULL, - spat_unit = "cell", - expression_values = "raw", - expected_doublet_rate = 0.06, - min_counts = 1, - min_cells = 1, - min_gene_variability_pctl = 85, - n_prin_comps = 30, - return_gobject = TRUE, - seed = 1234) { +doScrubletDetect <- function( + gobject, + feat_type = NULL, + spat_unit = "cell", + expression_values = "raw", + expected_doublet_rate = 0.06, + min_counts = 1, + min_cells = 1, + min_gene_variability_pctl = 85, + n_prin_comps = 30, + return_gobject = TRUE, + seed = 1234) { # verify if optional package is installed package_check( pkg_name = "scrublet", @@ -48,10 +55,10 @@ doScrubletDetect <- function(gobject, ) # print message with information # - message("using 'scrublet' to detect doublets. If used in published + message("using 'scrublet' to detect doublets. If used in published research, please cite: \n Wolock, S. L., Lopez, R. & Klein, A. M. - Scrublet: Computational Identification of Cell Doublets in Single-Cell + Scrublet: Computational Identification of Cell Doublets in Single-Cell Transcriptomic Data. Cell Syst. 8, 281-291.e9 (2019). https://doi.org/10.1016/j.cels.2018.11.005") @@ -59,7 +66,9 @@ doScrubletDetect <- function(gobject, python_path <- readGiottoInstructions(gobject, param = "python_path") reticulate::use_python(required = TRUE, python = python_path) python_scrublet_function <- system.file( - "python", "python_scrublet.py", package = "Giotto") + "python", "python_scrublet.py", + package = "Giotto" + ) reticulate::source_python(file = python_scrublet_function, convert = TRUE) # set seed diff --git a/R/spatial_clusters.R b/R/spatial_clusters.R index b9fef3f44..ca7fbd6cb 100644 --- a/R/spatial_clusters.R +++ b/R/spatial_clusters.R @@ -77,12 +77,13 @@ #' # don't show legend since there are too many categories generated #' spatPlot2D(g, cell_color = "new", show_legend = FALSE) #' @export -spatialSplitCluster <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - spatial_network_name = "Delaunay_network", - cluster_col, - split_clus_name = paste0(cluster_col, "_split")) { +spatialSplitCluster <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + spatial_network_name = "Delaunay_network", + cluster_col, + split_clus_name = paste0(cluster_col, "_split")) { # NSE vars cell_ID <- NULL @@ -113,14 +114,15 @@ spatialSplitCluster <- function(gobject, verbose = FALSE, ) - clus_info <- cell_meta[, c("cell_ID", cluster_col), with = FALSE] + clus_info <- cell_meta[, c("cell_ID", cluster_col), with = FALSE] # subset to needed cols - g <- GiottoClass::spat_net_to_igraph(sn) + g <- GiottoClass::spat_net_to_igraph(sn) # convert spatialNetworkObject to igraph # assign cluster info to igraph nodes clus_values <- clus_info[ - match(igraph::V(g)$name, cell_ID), get(cluster_col)] + match(igraph::V(g)$name, cell_ID), get(cluster_col) + ] igraph::V(g)$cluster <- clus_values # split cluster by spatial igraph diff --git a/R/spatial_enrichment.R b/R/spatial_enrichment.R index f3082a796..1f95d9a53 100644 --- a/R/spatial_enrichment.R +++ b/R/spatial_enrichment.R @@ -14,18 +14,29 @@ #' @returns matrix #' @seealso \code{\link{PAGEEnrich}} #' @examples -#' sign_list <- list(cell_type1 = c("Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", -#' "Rims2", "Gfap", "Gjc3", "Chrna4", "Prkcd"), -#' cell_type2 = c("Prr18", "Grb14", "Tprn", "Clic1", "Olig2", "Hrh3", "Tmbim1", -#' "Carhsp1", "Tmem88b", "Ugt8a"), -#' cell_type2 = c("Arpp19", "Lamp5", "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", -#' "Cygb", "Ttc9b","Ipcef1")) +#' sign_list <- list( +#' cell_type1 = c( +#' "Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", +#' "Rims2", "Gfap", "Gjc3", "Chrna4", "Prkcd" +#' ), +#' cell_type2 = c( +#' "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", "Hrh3", "Tmbim1", +#' "Carhsp1", "Tmem88b", "Ugt8a" +#' ), +#' cell_type2 = c( +#' "Arpp19", "Lamp5", "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", +#' "Cygb", "Ttc9b", "Ipcef1" +#' ) +#' ) #' -#' makeSignMatrixPAGE(sign_names = c("cell_type1", "cell_type2", "cell_type3"), -#' sign_list = sign_list) +#' makeSignMatrixPAGE( +#' sign_names = c("cell_type1", "cell_type2", "cell_type3"), +#' sign_list = sign_list +#' ) #' @export -makeSignMatrixPAGE <- function(sign_names, - sign_list) { +makeSignMatrixPAGE <- function( + sign_names, + sign_list) { ## check input if (!inherits(sign_list, "list")) { stop("sign_list needs to be a list of signatures for each cell type / @@ -45,11 +56,14 @@ makeSignMatrixPAGE <- function(sign_names, res <- rep(x = name_subset, length(subset)) }) mydt <- data.table::data.table( - genes = genes, types = unlist(types), value = 1) + genes = genes, types = unlist(types), value = 1 + ) # convert data.table to signature matrix dtmatrix <- data.table::dcast.data.table( - mydt, formula = genes ~ types, value.var = "value", fill = 0) + mydt, + formula = genes ~ types, value.var = "value", fill = 0 + ) final_sig_matrix <- Matrix::as.matrix(dtmatrix[, -1]) rownames(final_sig_matrix) <- dtmatrix$genes @@ -70,21 +84,26 @@ makeSignMatrixPAGE <- function(sign_names, #' @returns matrix #' @seealso \code{\link{runDWLSDeconv}} #' @examples -#' sign_gene <- c("Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", -#' "Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", -#' "Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", -#' "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b","Ipcef1") +#' sign_gene <- c( +#' "Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", +#' "Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", +#' "Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", +#' "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b", "Ipcef1" +#' ) #' -#' sign_matrix <- matrix(rnorm(length(sign_gene)*3), nrow = length(sign_gene)) +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 3), nrow = length(sign_gene)) #' rownames(sign_matrix) <- sign_gene #' colnames(sign_matrix) <- c("cell_type1", "cell_type2", "cell_type3") #' -#' makeSignMatrixDWLSfromMatrix(matrix = sign_matrix, sign_gene = sign_gene, -#' cell_type_vector = c("cell_type1", "cell_type2", "cell_type3")) +#' makeSignMatrixDWLSfromMatrix( +#' matrix = sign_matrix, sign_gene = sign_gene, +#' cell_type_vector = c("cell_type1", "cell_type2", "cell_type3") +#' ) #' @export -makeSignMatrixDWLSfromMatrix <- function(matrix, - sign_gene, - cell_type_vector) { +makeSignMatrixDWLSfromMatrix <- function( + matrix, + sign_gene, + cell_type_vector) { # 1. check if cell_type_vector and matrix are compatible if (ncol(matrix) != length(cell_type_vector)) { stop("ncol(matrix) needs to be the same as length(cell_type_vector)") @@ -114,7 +133,8 @@ makeSignMatrixDWLSfromMatrix <- function(matrix, cell_type <- unique(cell_type_vector)[cell_type_i] selected_cells <- colnames(matrix_subset)[cell_type_vector == cell_type] mean_expr_in_selected_cells <- rowMeans_flex(matrix_subset[ - , selected_cells]) + , selected_cells + ]) signMatrix[, cell_type_i] <- mean_expr_in_selected_cells } @@ -145,23 +165,28 @@ makeSignMatrixDWLSfromMatrix <- function(matrix, #' @seealso \code{\link{runDWLSDeconv}} #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' sign_gene <- c("Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", -#' "Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", -#' "Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", -#' "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b","Ipcef1") +#' sign_gene <- c( +#' "Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", +#' "Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", +#' "Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", +#' "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b", "Ipcef1" +#' ) #' -#' makeSignMatrixDWLS(gobject = g, sign_gene = sign_gene, -#' cell_type_vector = pDataDT(g)[["leiden_clus"]]) +#' makeSignMatrixDWLS( +#' gobject = g, sign_gene = sign_gene, +#' cell_type_vector = pDataDT(g)[["leiden_clus"]] +#' ) #' @export -makeSignMatrixDWLS <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - reverse_log = TRUE, - log_base = 2, - sign_gene, - cell_type_vector, - cell_type = NULL) { +makeSignMatrixDWLS <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + reverse_log = TRUE, + log_base = 2, + sign_gene, + cell_type_vector, + cell_type = NULL) { ## deprecated arguments if (!is.null(cell_type)) { warning("cell_type is deprecated, use cell_type_vector in the future") @@ -182,8 +207,9 @@ makeSignMatrixDWLS <- function(gobject, ## 1. expression matrix values <- match.arg( - expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + expression_values, + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- get_expression_values( gobject = gobject, spat_unit = spat_unit, @@ -223,22 +249,27 @@ makeSignMatrixDWLS <- function(gobject, #' @returns matrix #' @seealso \code{\link{rankEnrich}} #' @examples -#' sign_gene <- c("Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", -#' "Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", -#' "Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", -#' "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b","Ipcef1") +#' sign_gene <- c( +#' "Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", +#' "Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", +#' "Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", +#' "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b", "Ipcef1" +#' ) #' -#' sign_matrix <- matrix(rnorm(length(sign_gene)*3), nrow = length(sign_gene)) +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 3), nrow = length(sign_gene)) #' rownames(sign_matrix) <- sign_gene #' colnames(sign_matrix) <- c("cell_type1", "cell_type2", "cell_type3") #' -#' makeSignMatrixRank(sc_matrix = sign_matrix, -#' sc_cluster_ids = c("cell_type1", "cell_type2", "cell_type3")) +#' makeSignMatrixRank( +#' sc_matrix = sign_matrix, +#' sc_cluster_ids = c("cell_type1", "cell_type2", "cell_type3") +#' ) #' @export -makeSignMatrixRank <- function(sc_matrix, - sc_cluster_ids, - ties_method = c("random", "max"), - gobject = NULL) { +makeSignMatrixRank <- function( + sc_matrix, + sc_cluster_ids, + ties_method = c("random", "max"), + gobject = NULL) { if (inherits(sc_matrix, "exprObj")) { sc_matrix <- sc_matrix[] } @@ -293,14 +324,18 @@ makeSignMatrixRank <- function(sc_matrix, # calculate fold change and rank of fold-change comb_dt[, fold := log2(mean_expr + 1) - log2(av_expr + 1)] comb_dt[, rankFold := data.table::frank( - -fold, ties.method = ties_method), by = clusters] + -fold, + ties.method = ties_method + ), by = clusters] # create matrix comb_rank_mat <- data.table::dcast.data.table( - data = comb_dt, genes ~ clusters, value.var = "rankFold") + data = comb_dt, genes ~ clusters, value.var = "rankFold" + ) comb_rank_matrix <- dt_to_matrix(comb_rank_mat) comb_rank_matrix <- comb_rank_matrix[ - rownames(sc_matrix), unique(sc_cluster_ids)] + rownames(sc_matrix), unique(sc_cluster_ids) + ] return(comb_rank_matrix) } @@ -315,19 +350,22 @@ makeSignMatrixRank <- function(sc_matrix, #' @description creates permutation for the PAGEEnrich test #' @returns PAGEEnrich test #' @keywords internal -.do_page_permutation <- function(gobject, - sig_gene, - ntimes) { +.do_page_permutation <- function( + gobject, + sig_gene, + ntimes) { # check available gene available_ct <- c() for (i in colnames(sig_gene)) { gene_i <- rownames(sig_gene)[which(sig_gene[, i] == 1)] overlap_i <- intersect( - gene_i, rownames(gobject@expression$rna$normalized)) + gene_i, rownames(gobject@expression$rna$normalized) + ) if (length(overlap_i) <= 5) { output <- paste0( "Warning, ", i, " only has ", length(overlap_i), - " overlapped genes. Will remove it.") + " overlapped genes. Will remove it." + ) } else { available_ct <- c(available_ct, i) } @@ -337,34 +375,38 @@ makeSignMatrixRank <- function(sc_matrix, } # only continue with genes present in both datasets interGene <- intersect( - rownames(sig_gene), rownames(gobject@expression$rna$normalized)) + rownames(sig_gene), rownames(gobject@expression$rna$normalized) + ) sign_matrix <- sig_gene[interGene, available_ct] ct_gene_counts <- NULL - for (i in 1:dim(sign_matrix)[2]) { + for (i in seq_len(dim(sign_matrix)[2])) { a <- length(which(sign_matrix[, i] == 1)) ct_gene_counts <- c(ct_gene_counts, a) } uniq_ct_gene_counts <- unique(ct_gene_counts) background_mean_sd <- matrix( - data = NA, nrow = length(uniq_ct_gene_counts) + 1, ncol = 3) + data = NA, nrow = length(uniq_ct_gene_counts) + 1, ncol = 3 + ) for (i in seq_along(uniq_ct_gene_counts)) { gene_num <- uniq_ct_gene_counts[i] all_sample_names <- NULL all_sample_list <- NULL - for (j in 1:ntimes) { + for (j in seq_len(ntimes)) { set.seed(j) random_gene <- sample(rownames( - gobject@expression$rna$normalized), gene_num, replace = FALSE) + gobject@expression$rna$normalized + ), gene_num, replace = FALSE) ct_name <- paste("ct", j, sep = "") all_sample_names <- c(all_sample_names, ct_name) all_sample_list <- c(all_sample_list, list(random_gene)) } random_sig <- makeSignMatrixPAGE(all_sample_names, all_sample_list) random_DT <- runPAGEEnrich( - gobject, - sign_matrix = random_sig, - p_value = FALSE) + gobject, + sign_matrix = random_sig, + p_value = FALSE + ) background <- unlist(random_DT[, 2:dim(random_DT)[2]]) df_row_name <- paste("gene_num_", uniq_ct_gene_counts[i], sep = "") list_back_i <- c(df_row_name, mean(background), stats::sd(background)) @@ -407,16 +449,17 @@ makeSignMatrixRank <- function(sc_matrix, #' gene set. #' @seealso \code{\link{makeSignMatrixPAGE}} #' @export -runPAGEEnrich_OLD <- function(gobject, - sign_matrix, - expression_values = c("normalized", "scaled", "custom"), - reverse_log_scale = TRUE, - logbase = 2, - output_enrichment = c("original", "zscore"), - p_value = FALSE, - n_times = 1000, - name = NULL, - return_gobject = TRUE) { +runPAGEEnrich_OLD <- function( + gobject, + sign_matrix, + expression_values = c("normalized", "scaled", "custom"), + reverse_log_scale = TRUE, + logbase = 2, + output_enrichment = c("original", "zscore"), + p_value = FALSE, + n_times = 1000, + name = NULL, + return_gobject = TRUE) { # expression values to be used values <- match.arg(expression_values, c("normalized", "scaled", "custom")) expr_values <- get_expression_values(gobject = gobject, values = values) @@ -432,7 +475,8 @@ runPAGEEnrich_OLD <- function(gobject, if (length(overlap_i) <= 5) { output <- paste0( "Warning, ", i, " only has ", length(overlap_i), - " overlapped genes. Will remove it.") + " overlapped genes. Will remove it." + ) } else { available_ct <- c(available_ct, i) } @@ -444,7 +488,9 @@ runPAGEEnrich_OLD <- function(gobject, # output enrichment output_enrichment <- match.arg( - output_enrichment, choices = c("original", "zscore")) + output_enrichment, + choices = c("original", "zscore") + ) # only continue with genes present in both datasets interGene <- intersect(rownames(sign_matrix), rownames(expr_values)) @@ -465,8 +511,9 @@ runPAGEEnrich_OLD <- function(gobject, # get enrichment scores enrichment <- matrix( - data = NA, nrow = dim(filterSig)[2], ncol = length(cellColMean)) - for (i in (1:dim(filterSig)[2])) { + data = NA, nrow = dim(filterSig)[2], ncol = length(cellColMean) + ) + for (i in seq_len(dim(filterSig)[2])) { signames <- rownames(filterSig)[which(filterSig[, i] == 1)] sigColMean <- apply(geneFold[signames, ], 2, mean) m <- length(signames) @@ -501,12 +548,14 @@ runPAGEEnrich_OLD <- function(gobject, for (i in colnames(sign_matrix)) { gene_i <- rownames(sign_matrix)[which(sign_matrix[, i] == 1)] overlap_i <- intersect( - gene_i, rownames(gobject@expression$rna$normalized)) + gene_i, rownames(gobject@expression$rna$normalized) + ) if (length(overlap_i) <= 5) { output <- paste0( "Warning, ", i, " only has ", length(overlap_i), - " overlapped genes. It will be removed.") + " overlapped genes. It will be removed." + ) } else { available_ct <- c(available_ct, i) } @@ -518,7 +567,8 @@ runPAGEEnrich_OLD <- function(gobject, # only continue with genes present in both datasets interGene <- intersect( - rownames(sign_matrix), rownames(gobject@expression$rna$normalized)) + rownames(sign_matrix), rownames(gobject@expression$rna$normalized) + ) filter_sign_matrix <- sign_matrix[interGene, available_ct] background_mean_sd <- .do_page_permutation( @@ -527,17 +577,21 @@ runPAGEEnrich_OLD <- function(gobject, ntimes = n_times ) - for (i in 1:dim(filter_sign_matrix)[2]) { + for (i in seq_len(dim(filter_sign_matrix)[2])) { length_gene <- length(which(filter_sign_matrix[, i] == 1)) join_gene_with_length <- paste("gene_num_", length_gene, sep = "") mean_i <- as.numeric(as.character( - background_mean_sd[join_gene_with_length, ][[1]])) + background_mean_sd[join_gene_with_length, ][[1]] + )) sd_i <- as.numeric(as.character( - background_mean_sd[join_gene_with_length, ][[2]])) + background_mean_sd[join_gene_with_length, ][[2]] + )) j <- i + 1 enrichmentDT[[j]] <- stats::pnorm( - enrichmentDT[[j]], mean = mean_i, sd = sd_i, - lower.tail = FALSE, log.p = FALSE) + enrichmentDT[[j]], + mean = mean_i, sd = sd_i, + lower.tail = FALSE, log.p = FALSE + ) } } @@ -584,17 +638,18 @@ runPAGEEnrich_OLD <- function(gobject, #' @param expr_values matrix of expression values #' @returns data.table #' @keywords internal -.page_dt_method <- function(sign_matrix, - expr_values, - min_overlap_genes = 5, - logbase = 2, - reverse_log_scale = TRUE, - output_enrichment = c("original", "zscore"), - p_value = FALSE, - include_depletion = FALSE, - n_times = 1000, - max_block = 20e6, - verbose = TRUE) { +.page_dt_method <- function( + sign_matrix, + expr_values, + min_overlap_genes = 5, + logbase = 2, + reverse_log_scale = TRUE, + output_enrichment = c("original", "zscore"), + p_value = FALSE, + include_depletion = FALSE, + n_times = 1000, + max_block = 20e6, + verbose = TRUE) { # data.table variables Var1 <- value <- Var2 <- V1 <- marker <- nr_markers <- fc <- cell_ID <- zscore <- colmean <- colSd <- pval <- NULL @@ -602,7 +657,9 @@ runPAGEEnrich_OLD <- function(gobject, # output enrichment output_enrichment <- match.arg( - output_enrichment, choices = c("original", "zscore")) + output_enrichment, + choices = c("original", "zscore") + ) ## identify available cell types all_genes <- rownames(expr_values) @@ -613,11 +670,12 @@ runPAGEEnrich_OLD <- function(gobject, lost_cell_types_DT <- detected_DT[V1 <= min_overlap_genes] if (nrow(lost_cell_types_DT) > 0) { - for (row in 1:nrow(lost_cell_types_DT)) { + for (row in seq_len(nrow(lost_cell_types_DT))) { output <- paste0( - "Warning, ", lost_cell_types_DT[row][["Var2"]], " only has ", - lost_cell_types_DT[row][["V1"]], - " overlapping genes. Will be removed.") + "Warning, ", lost_cell_types_DT[row][["Var2"]], " only has ", + lost_cell_types_DT[row][["V1"]], + " overlapping genes. Will be removed." + ) if (verbose) print(output) } } @@ -659,13 +717,17 @@ runPAGEEnrich_OLD <- function(gobject, colnames(geneFold_DT) <- c("gene", "cell_ID", "fc") mergetest <- data.table::merge.data.table( - sub_ct_DT, geneFold_DT, by = "gene") + sub_ct_DT, geneFold_DT, + by = "gene" + ) mergetest <- mergetest[, mean(fc), by = .(cell_type, cell_ID, nr_markers)] if (is.integer(mergetest$cell_ID) && is.character(cellColMeanSd$cell_ID)) { mergetest$cell_ID <- as.character(mergetest$cell_ID) } mergetest <- data.table::merge.data.table( - mergetest, cellColMeanSd, by = "cell_ID") + mergetest, cellColMeanSd, + by = "cell_ID" + ) mergetest[, zscore := ((V1 - colmean) * nr_markers^(1 / 2)) / colSd] if (output_enrichment == "zscore") { @@ -684,13 +746,13 @@ runPAGEEnrich_OLD <- function(gobject, ## 2. first create the random samples all together ## cell_type_list <- list() perm_type_list <- list() - for (row in 1:nrow(sample_intrs)) { + for (row in seq_len(nrow(sample_intrs))) { cell_type <- sample_intrs[row][["cell_type"]] nr_genes <- as.numeric(sample_intrs[row][["nr_markers"]]) gene_list <- list() perm_list <- list() - for (i in 1:n_times) { + for (i in seq_len(n_times)) { sampled_genes <- sample(rownames(expr_values), size = nr_genes) gene_list[[i]] <- sampled_genes perm_list[[i]] <- rep(paste0("p_", i), nr_genes) @@ -728,9 +790,11 @@ runPAGEEnrich_OLD <- function(gobject, all_perms <- unique(perm_round) all_perms_num <- seq_along(all_perms) names(all_perms_num) <- all_perms - group_labels <- paste0("group_", 1:nr_groups) + group_labels <- paste0("group_", seq_len(nr_groups)) groups_vec <- cut( - all_perms_num, breaks = nr_groups, labels = group_labels) + all_perms_num, + breaks = nr_groups, labels = group_labels + ) names(all_perms) <- groups_vec @@ -742,16 +806,24 @@ runPAGEEnrich_OLD <- function(gobject, cell_type_perm_DT_sub <- cell_type_perm_DT[round %in% sub_perms] mergetest_perm_sub <- data.table::merge.data.table( - cell_type_perm_DT_sub, geneFold_DT, allow.cartesian = TRUE) + cell_type_perm_DT_sub, geneFold_DT, + allow.cartesian = TRUE + ) mergetest_perm_sub <- mergetest_perm_sub[ - , mean(fc), by = .(cell_type, cell_ID, nr_markers, round)] + , mean(fc), + by = .(cell_type, cell_ID, nr_markers, round) + ] if (is.integer(mergetest_perm_sub$cell_ID) && is.character( - cellColMeanSd$cell_ID)) { + cellColMeanSd$cell_ID + )) { mergetest_perm_sub$cell_ID <- as.character( - mergetest_perm_sub$cell_ID) + mergetest_perm_sub$cell_ID + ) } mergetest_perm_sub <- data.table::merge.data.table( - mergetest_perm_sub, cellColMeanSd, by = "cell_ID") + mergetest_perm_sub, cellColMeanSd, + by = "cell_ID" + ) mergetest_perm_sub[, zscore := (( V1 - colmean) * nr_markers^(1 / 2)) / colSd] @@ -761,19 +833,26 @@ runPAGEEnrich_OLD <- function(gobject, res_list_comb <- do.call("rbind", res_list) res_list_comb_average <- res_list_comb[ , .(mean_zscore = mean(zscore), sd_zscore = stats::sd(zscore)), - by = c("cell_ID", "cell_type")] + by = c("cell_ID", "cell_type") + ] mergetest_final <- data.table::merge.data.table( - mergetest, res_list_comb_average, by = c("cell_ID", "cell_type")) + mergetest, res_list_comb_average, + by = c("cell_ID", "cell_type") + ) ## calculate p.values based on normal distribution if (include_depletion == TRUE) { mergetest_final[, pval := stats::pnorm( - abs(zscore), mean = mean_zscore, sd = sd_zscore, - lower.tail = FALSE, log.p = FALSE)] + abs(zscore), + mean = mean_zscore, sd = sd_zscore, + lower.tail = FALSE, log.p = FALSE + )] } else { mergetest_final[, pval := stats::pnorm( - zscore, mean = mean_zscore, sd = sd_zscore, - lower.tail = FALSE, log.p = FALSE)] + zscore, + mean = mean_zscore, sd = sd_zscore, + lower.tail = FALSE, log.p = FALSE + )] } data.table::setorder(mergetest_final, pval) @@ -787,12 +866,16 @@ runPAGEEnrich_OLD <- function(gobject, resultmatrix <- data.table::dcast( - mergetest_final, formula = cell_ID ~ cell_type, - value.var = "pval_score") + mergetest_final, + formula = cell_ID ~ cell_type, + value.var = "pval_score" + ) return(list(DT = mergetest_final, matrix = resultmatrix)) } else { resultmatrix <- data.table::dcast( - mergetest, formula = cell_ID ~ cell_type, value.var = "zscore") + mergetest, + formula = cell_ID ~ cell_type, value.var = "zscore" + ) return(list(DT = mergetest, matrix = resultmatrix)) } } @@ -836,34 +919,38 @@ runPAGEEnrich_OLD <- function(gobject, #' @seealso \code{\link{makeSignMatrixPAGE}} #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' sign_gene <- c("Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", -#' "Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", -#' "Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", -#' "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b","Ipcef1") -#' -#' sign_matrix <- matrix(rnorm(length(sign_gene)*3, mean = 10), -#' nrow = length(sign_gene)) +#' sign_gene <- c( +#' "Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", +#' "Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", +#' "Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", +#' "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b", "Ipcef1" +#' ) +#' +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 3, mean = 10), +#' nrow = length(sign_gene) +#' ) #' rownames(sign_matrix) <- sign_gene #' colnames(sign_matrix) <- c("cell_type1", "cell_type2", "cell_type3") -#' +#' #' runPAGEEnrich(gobject = g, sign_matrix = sign_matrix) #' @export -runPAGEEnrich <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - sign_matrix, - expression_values = c("normalized", "scaled", "custom"), - min_overlap_genes = 5, - reverse_log_scale = TRUE, - logbase = 2, - output_enrichment = c("original", "zscore"), - p_value = FALSE, - include_depletion = FALSE, - n_times = 1000, - max_block = 20e6, - name = NULL, - verbose = TRUE, - return_gobject = TRUE) { +runPAGEEnrich <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + sign_matrix, + expression_values = c("normalized", "scaled", "custom"), + min_overlap_genes = 5, + reverse_log_scale = TRUE, + logbase = 2, + output_enrichment = c("original", "zscore"), + p_value = FALSE, + include_depletion = FALSE, + n_times = 1000, + max_block = 20e6, + name = NULL, + verbose = TRUE, + return_gobject = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -878,7 +965,8 @@ runPAGEEnrich <- function(gobject, # expression values to be used values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom"), expression_values)) + unique(c("normalized", "scaled", "custom"), expression_values) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -999,10 +1087,12 @@ PAGEEnrich <- function(...) { #' @keywords internal .do_rank_permutation <- function(sc_gene, n) { random_df <- data.frame(matrix(ncol = n, nrow = length(sc_gene))) - for (i in 1:n) { + for (i in seq_len(n)) { set.seed(i) random_rank <- sample( - seq_along(sc_gene), length(sc_gene), replace = FALSE) + seq_along(sc_gene), length(sc_gene), + replace = FALSE + ) random_df[, i] <- random_rank } rownames(random_df) <- sc_gene @@ -1044,33 +1134,38 @@ PAGEEnrich <- function(...) { #' @seealso \code{\link{makeSignMatrixRank}} #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' x <- findMarkers_one_vs_all(g, -#' cluster_column = "leiden_clus", min_feats = 20) +#' x <- findMarkers_one_vs_all(g, +#' cluster_column = "leiden_clus", min_feats = 20 +#' ) #' sign_gene <- x$feats -#' -#' sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -#' nrow = length(sign_gene)) +#' +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), +#' nrow = length(sign_gene) +#' ) #' rownames(sign_matrix) <- sign_gene -#' colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) -#' -#' runRankEnrich(gobject = g, sign_matrix = sign_matrix, -#' expression_values = "normalized") +#' colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) +#' +#' runRankEnrich( +#' gobject = g, sign_matrix = sign_matrix, +#' expression_values = "normalized" +#' ) #' @export -runRankEnrich <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - sign_matrix, - expression_values = c("normalized", "raw", "scaled", "custom"), - reverse_log_scale = TRUE, - logbase = 2, - output_enrichment = c("original", "zscore"), - ties_method = c("average", "max"), - p_value = FALSE, - n_times = 1000, - rbp_p = 0.99, - num_agg = 100, - name = NULL, - return_gobject = TRUE) { +runRankEnrich <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + sign_matrix, + expression_values = c("normalized", "raw", "scaled", "custom"), + reverse_log_scale = TRUE, + logbase = 2, + output_enrichment = c("original", "zscore"), + ties_method = c("average", "max"), + p_value = FALSE, + n_times = 1000, + rbp_p = 0.99, + num_agg = 100, + name = NULL, + return_gobject = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1088,7 +1183,8 @@ runRankEnrich <- function(gobject, # expression values to be used values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1113,7 +1209,9 @@ runRankEnrich <- function(gobject, # output enrichment output_enrichment <- match.arg( - output_enrichment, choices = c("original", "zscore")) + output_enrichment, + choices = c("original", "zscore") + ) enrichment <- matrix( data = NA, @@ -1124,7 +1222,9 @@ runRankEnrich <- function(gobject, # calculate mean gene expression if (reverse_log_scale == TRUE) { mean_gene_expr <- log(Matrix::rowMeans( - logbase^expr_values[] - 1, dims = 1) + 1) + logbase^expr_values[] - 1, + dims = 1 + ) + 1) } else { mean_gene_expr <- Matrix::rowMeans(expr_values[]) } @@ -1145,7 +1245,7 @@ runRankEnrich <- function(gobject, rownames(rankFold) <- rownames(expr_values[]) colnames(rankFold) <- colnames(expr_values[]) - for (i in (1:dim(sign_matrix)[2])) { + for (i in seq_len(dim(sign_matrix)[2])) { signames <- rownames(sign_matrix)[which(sign_matrix[, i] > 0)] interGene <- intersect(signames, rownames(rankFold)) filterSig <- sign_matrix[interGene, ] @@ -1156,9 +1256,9 @@ runRankEnrich <- function(gobject, vectorX <- rep(NA, dim(filterRankFold)[2]) - for (j in (1:dim(filterRankFold)[2])) { + for (j in seq_len(dim(filterRankFold)[2])) { toprpb <- sort(rpb[, j], decreasing = TRUE) - zscore <- sum(toprpb[1:num_agg]) + zscore <- sum(toprpb[seq_len(num_agg)]) vectorX[j] <- zscore } enrichment[i, ] <- vectorX @@ -1199,14 +1299,19 @@ runRankEnrich <- function(gobject, background <- unlist(random_DT[, 2:dim(random_DT)[2]]) fit.gamma <- fitdistrplus::fitdist( - background, distr = "gamma", method = "mle") + background, + distr = "gamma", method = "mle" + ) pvalue_DT <- enrichmentDT enrichmentDT[, 2:dim(enrichmentDT)[2]] <- lapply( enrichmentDT[, 2:dim(enrichmentDT)[2]], function(x) { - stats::pgamma( - x, fit.gamma$estimate[1], rate = fit.gamma$estimate[2], - lower.tail = FALSE, log.p = FALSE) - }) + stats::pgamma( + x, fit.gamma$estimate[1], + rate = fit.gamma$estimate[2], + lower.tail = FALSE, log.p = FALSE + ) + } + ) } # create spatial enrichment object @@ -1230,7 +1335,8 @@ runRankEnrich <- function(gobject, ## return object or results ## if (return_gobject == TRUE) { spenr_names <- list_spatial_enrichments_names( - gobject = gobject, spat_unit = spat_unit, feat_type = feat_type) + gobject = gobject, spat_unit = spat_unit, feat_type = feat_type + ) if (name %in% spenr_names) { @@ -1308,28 +1414,31 @@ rankEnrich <- function(...) { #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' x <- findMarkers_one_vs_all(g, -#' cluster_column = "leiden_clus", min_feats = 20) +#' cluster_column = "leiden_clus", min_feats = 20 +#' ) #' sign_gene <- x$feats #' -#' sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -#' nrow = length(sign_gene)) +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), +#' nrow = length(sign_gene) +#' ) #' rownames(sign_matrix) <- sign_gene -#' colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) +#' colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) #' #' runHyperGeometricEnrich(gobject = g, sign_matrix = sign_matrix) #' @export -runHyperGeometricEnrich <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - sign_matrix, - expression_values = c("normalized", "scaled", "custom"), - reverse_log_scale = TRUE, - logbase = 2, - top_percentage = 5, - output_enrichment = c("original", "zscore"), - p_value = FALSE, - name = NULL, - return_gobject = TRUE) { +runHyperGeometricEnrich <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + sign_matrix, + expression_values = c("normalized", "scaled", "custom"), + reverse_log_scale = TRUE, + logbase = 2, + top_percentage = 5, + output_enrichment = c("original", "zscore"), + p_value = FALSE, + name = NULL, + return_gobject = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1343,7 +1452,8 @@ runHyperGeometricEnrich <- function(gobject, values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1358,7 +1468,9 @@ runHyperGeometricEnrich <- function(gobject, # output enrichment output_enrichment <- match.arg( - output_enrichment, choices = c("original", "zscore")) + output_enrichment, + choices = c("original", "zscore") + ) # calculate mean gene expression if (reverse_log_scale == TRUE) { @@ -1375,7 +1487,9 @@ runHyperGeometricEnrich <- function(gobject, top_q <- 1 - top_percentage / 100 quantilecut <- apply( - foldChange, 2, stats::quantile, probs = top_q, na.rm = TRUE) + foldChange, 2, stats::quantile, + probs = top_q, na.rm = TRUE + ) expbinary <- t_flex(1 * t_flex(foldChange > quantilecut)) markerGenes <- rownames(inter_sign_matrix) @@ -1387,21 +1501,26 @@ runHyperGeometricEnrich <- function(gobject, ncol = dim(expbinaryOverlap)[2] ) - for (i in (1:dim(inter_sign_matrix)[2])) { + for (i in seq_len(dim(inter_sign_matrix)[2])) { signames <- rownames(inter_sign_matrix)[ - which(inter_sign_matrix[, i] == 1)] + which(inter_sign_matrix[, i] == 1) + ] vectorX <- NULL - for (j in (1:dim(expbinaryOverlap)[2])) { + for (j in seq_len(dim(expbinaryOverlap)[2])) { cellsiggene <- names(expbinaryOverlap[ - which(expbinaryOverlap[, j] == 1), j]) + which(expbinaryOverlap[, j] == 1), j + ]) x <- length(intersect(cellsiggene, signames)) m <- length(rownames(inter_sign_matrix)[which( - inter_sign_matrix[, i] == 1)]) + inter_sign_matrix[, i] == 1 + )]) n <- total - m k <- length(intersect(cellsiggene, markerGenes)) enrich <- (0 - log10(stats::phyper( - x, m, n, k, log.p = FALSE, lower.tail = FALSE))) + x, m, n, k, + log.p = FALSE, lower.tail = FALSE + ))) vectorX <- append(vectorX, enrich) } enrichment[i, ] <- vectorX @@ -1424,8 +1543,9 @@ runHyperGeometricEnrich <- function(gobject, if (p_value == TRUE) { enrichmentDT[, 2:dim(enrichmentDT)[2]] <- lapply( enrichmentDT[, 2:dim(enrichmentDT)[2]], function(x) { - 10^(-x) - }) + 10^(-x) + } + ) } # create spatial enrichment object @@ -1449,7 +1569,8 @@ runHyperGeometricEnrich <- function(gobject, ## return object or results ## if (return_gobject == TRUE) { spenr_names <- list_spatial_enrichments_names( - gobject = gobject, spat_unit = spat_unit, feat_type = feat_type) + gobject = gobject, spat_unit = spat_unit, feat_type = feat_type + ) if (name %in% spenr_names) { cat(name, " has already been used, will be overwritten") @@ -1537,41 +1658,48 @@ hyperGeometricEnrich <- function(...) { #' @md #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' x <- findMarkers_one_vs_all(g, -#' cluster_column = "leiden_clus", min_feats = 20) +#' x <- findMarkers_one_vs_all(g, +#' cluster_column = "leiden_clus", min_feats = 20 +#' ) #' sign_gene <- x$feats -#' -#' sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -#' nrow = length(sign_gene)) +#' +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), +#' nrow = length(sign_gene) +#' ) #' rownames(sign_matrix) <- sign_gene -#' colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) -#' +#' colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) +#' #' runSpatialEnrich(gobject = g, sign_matrix = sign_matrix) #' @export -runSpatialEnrich <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - enrich_method = c("PAGE", "rank", "hypergeometric"), - sign_matrix, - expression_values = c("normalized", "scaled", "custom"), - min_overlap_genes = 5, - reverse_log_scale = TRUE, - logbase = 2, - p_value = FALSE, - n_times = 1000, - rbp_p = 0.99, - num_agg = 100, - max_block = 20e6, - top_percentage = 5, - output_enrichment = c("original", "zscore"), - name = NULL, - verbose = TRUE, - return_gobject = TRUE) { +runSpatialEnrich <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + enrich_method = c("PAGE", "rank", "hypergeometric"), + sign_matrix, + expression_values = c("normalized", "scaled", "custom"), + min_overlap_genes = 5, + reverse_log_scale = TRUE, + logbase = 2, + p_value = FALSE, + n_times = 1000, + rbp_p = 0.99, + num_agg = 100, + max_block = 20e6, + top_percentage = 5, + output_enrichment = c("original", "zscore"), + name = NULL, + verbose = TRUE, + return_gobject = TRUE) { enrich_method <- match.arg( - enrich_method, choices = c("PAGE", "rank", "hypergeometric")) + enrich_method, + choices = c("PAGE", "rank", "hypergeometric") + ) output_enrichment <- match.arg( - output_enrichment, choices = c("original", "zscore")) + output_enrichment, + choices = c("original", "zscore") + ) if (enrich_method == "PAGE") { @@ -1710,29 +1838,32 @@ NULL #' \item{\emph{Geary's C} 'geary'} #' } #' @export -spatialAutoCorGlobal <- function(gobject = NULL, - spat_unit = NULL, - feat_type = NULL, - feats = NULL, - method = c("moran", "geary"), - data_to_use = c("expression", "cell_meta"), - expression_values = c("normalized", "scaled", "custom"), - meta_cols = NULL, - spatial_network_to_use = "kNN_network", - wm_method = c("distance", "adjacency"), - wm_name = "spat_weights", - node_values = NULL, - weight_matrix = NULL, - test_method = c("none", "monte_carlo"), - mc_nsim = 99, - cor_name = NULL, - return_gobject = FALSE, - verbose = TRUE) { +spatialAutoCorGlobal <- function( + gobject = NULL, + spat_unit = NULL, + feat_type = NULL, + feats = NULL, + method = c("moran", "geary"), + data_to_use = c("expression", "cell_meta"), + expression_values = c("normalized", "scaled", "custom"), + meta_cols = NULL, + spatial_network_to_use = "kNN_network", + wm_method = c("distance", "adjacency"), + wm_name = "spat_weights", + node_values = NULL, + weight_matrix = NULL, + test_method = c("none", "monte_carlo"), + mc_nsim = 99, + cor_name = NULL, + return_gobject = FALSE, + verbose = TRUE) { # 0. determine inputs method <- match.arg(method, choices = c("moran", "geary")) test_method <- match.arg(test_method, choices = c("none", "monte_carlo")) data_to_use <- match.arg( - data_to_use, choices = c("expression", "cell_meta")) + data_to_use, + choices = c("expression", "cell_meta") + ) if (is.null(cor_name)) cor_name <- method if (!is.null(node_values)) { if (is.numeric(node_values)) { @@ -1743,9 +1874,13 @@ spatialAutoCorGlobal <- function(gobject = NULL, } use_ext_vals <- data.table::fifelse( - !is.null(node_values), yes = TRUE, no = FALSE) + !is.null(node_values), + yes = TRUE, no = FALSE + ) use_sn <- data.table::fifelse( - !is.null(weight_matrix), yes = FALSE, no = TRUE) + !is.null(weight_matrix), + yes = FALSE, no = TRUE + ) use_expr <- data.table::fcase( isTRUE(use_ext_vals), FALSE, @@ -1837,9 +1972,12 @@ spatialAutoCorGlobal <- function(gobject = NULL, # return info if (isTRUE(return_gobject)) { - if (isTRUE(verbose)) - wrap_msg("Appending", method, - "results to feature metadata: fDataDT()") + if (isTRUE(verbose)) { + wrap_msg( + "Appending", method, + "results to feature metadata: fDataDT()" + ) + } gobject <- addFeatMetadata( gobject = gobject, spat_unit = spat_unit, @@ -1873,30 +2011,35 @@ spatialAutoCorGlobal <- function(gobject = NULL, #' \item{\emph{Local mean} 'mean'} #' } #' @export -spatialAutoCorLocal <- function(gobject = NULL, - spat_unit = NULL, - feat_type = NULL, - feats = NULL, - method = c("moran", "gi", "gi*", "mean"), - data_to_use = c("expression", "cell_meta"), - expression_values = c("normalized", "scaled", "custom"), - meta_cols = NULL, - spatial_network_to_use = "kNN_network", - wm_method = c("distance", "adjacency"), - wm_name = "spat_weights", - node_values = NULL, - weight_matrix = NULL, - test_method = c("none"), - # cor_name = NULL, - enrich_name = NULL, - return_gobject = TRUE, - output = c("spatEnrObj", "data.table"), - verbose = TRUE) { +spatialAutoCorLocal <- function( + gobject = NULL, + spat_unit = NULL, + feat_type = NULL, + feats = NULL, + method = c("moran", "gi", "gi*", "mean"), + data_to_use = c("expression", "cell_meta"), + expression_values = c("normalized", "scaled", "custom"), + meta_cols = NULL, + spatial_network_to_use = "kNN_network", + wm_method = c("distance", "adjacency"), + wm_name = "spat_weights", + node_values = NULL, + weight_matrix = NULL, + test_method = c("none"), + # cor_name = NULL, + enrich_name = NULL, + return_gobject = TRUE, + output = c("spatEnrObj", "data.table"), + verbose = TRUE) { # 0. determine inputs method_select <- match.arg( - method, choices = c("moran", "gi", "gi*", "mean")) + method, + choices = c("moran", "gi", "gi*", "mean") + ) data_to_use <- match.arg( - data_to_use, choices = c("expression", "cell_meta")) + data_to_use, + choices = c("expression", "cell_meta") + ) output <- match.arg(output, choices = c("spatEnrObj", "data.table")) # if(is.null(cor_name)) cor_name = method @@ -1916,9 +2059,13 @@ spatialAutoCorLocal <- function(gobject = NULL, use_ext_vals <- data.table::fifelse( - !is.null(node_values), yes = TRUE, no = FALSE) + !is.null(node_values), + yes = TRUE, no = FALSE + ) use_sn <- data.table::fifelse( - !is.null(weight_matrix), yes = FALSE, no = TRUE) + !is.null(weight_matrix), + yes = FALSE, no = TRUE + ) use_expr <- data.table::fcase( isTRUE(use_ext_vals), FALSE, @@ -2021,7 +2168,7 @@ spatialAutoCorLocal <- function(gobject = NULL, if (isTRUE(return_gobject)) { if (isTRUE(verbose)) { wrap_msg("Attaching ", method_select, - ' results as spatial enrichment: "', + ' results as spatial enrichment: "', enrich_name, '"', sep = "" ) @@ -2053,13 +2200,14 @@ spatialAutoCorLocal <- function(gobject = NULL, #' .run_spat_autocor_global #' @returns data.table #' @keywords internal -.run_spat_autocor_global <- function(use_values, - feats, - weight_matrix, - method, - test_method, - mc_nsim, - cor_name) { +.run_spat_autocor_global <- function( + use_values, + feats, + weight_matrix, + method, + test_method, + mc_nsim, + cor_name) { # data.table vars cell_ID <- nsim <- NULL @@ -2071,8 +2219,11 @@ spatialAutoCorLocal <- function(gobject = NULL, } progressr::with_progress({ - if (step_size > 1) pb <- progressr::progressor( - steps = nfeats / step_size) + if (step_size > 1) { + pb <- progressr::progressor( + steps = nfeats / step_size + ) + } res_list <- lapply_flex( seq_along(feats), # future.packages = c('terra', 'data.table'), @@ -2125,7 +2276,8 @@ spatialAutoCorLocal <- function(gobject = NULL, colnames(res_dt) <- c("feat_ID", cor_name) } else { colnames(res_dt) <- c("feat_ID", cor_name, paste0( - cor_name, "_", test_method)) + cor_name, "_", test_method + )) } return(res_dt) } @@ -2133,12 +2285,13 @@ spatialAutoCorLocal <- function(gobject = NULL, #' .run_spat_autocor_local #' @returns data.table #' @keywords internal -.run_spat_autocor_local <- function(use_values, - feats, - weight_matrix, - method, - test_method, - IDs) { +.run_spat_autocor_local <- function( + use_values, + feats, + weight_matrix, + method, + test_method, + IDs) { cell_ID <- NULL nfeats <- length(feats) @@ -2149,8 +2302,11 @@ spatialAutoCorLocal <- function(gobject = NULL, } progressr::with_progress({ - if (step_size > 1) pb <- progressr::progressor( - steps = nfeats / step_size) + if (step_size > 1) { + pb <- progressr::progressor( + steps = nfeats / step_size + ) + } res_list <- lapply_flex( seq_along(feats), # future.packages = c('terra', 'data.table'), @@ -2216,23 +2372,24 @@ spatialAutoCorLocal <- function(gobject = NULL, # 4, IDs - cell_IDs if available # Some additional information about information used in specific workflows are # also returned -.evaluate_autocor_input <- function(gobject, - use_ext_vals, - use_sn, - use_expr, - use_meta, - spat_unit, - feat_type, - feats, - data_to_use, - expression_values, - meta_cols, - spatial_network_to_use, - wm_method, - wm_name, - node_values, - weight_matrix, - verbose = TRUE) { +.evaluate_autocor_input <- function( + gobject, + use_ext_vals, + use_sn, + use_expr, + use_meta, + spat_unit, + feat_type, + feats, + data_to_use, + expression_values, + meta_cols, + spatial_network_to_use, + wm_method, + wm_name, + node_values, + weight_matrix, + verbose = TRUE) { cell_ID <- NULL # 1. Get spatial network to either get or generate a spatial weight matrix @@ -2250,7 +2407,9 @@ spatialAutoCorLocal <- function(gobject = NULL, # if no weight_matrix already generated... if (is.null(weight_matrix)) { wm_method <- match.arg( - wm_method, choices = c("distance", "adjacency")) + wm_method, + choices = c("distance", "adjacency") + ) if (isTRUE(verbose)) { wrap_msg( "No spatial weight matrix found in selected spatial network @@ -2294,7 +2453,8 @@ spatialAutoCorLocal <- function(gobject = NULL, # EXPR=================================================================# values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) use_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -2358,7 +2518,8 @@ spatialAutoCorLocal <- function(gobject = NULL, (nrow(use_values) != nrow(weight_matrix))) { stop(wrap_txt("Number of values to correlate do not match number of weight matrix entries", - errWidth = TRUE)) + errWidth = TRUE + )) } @@ -2391,11 +2552,12 @@ spatialAutoCorLocal <- function(gobject = NULL, #' @description Rui to fill in #' @returns matrix #' @keywords internal -enrich_deconvolution <- function(expr, - log_expr, - cluster_info, - ct_exp, - cutoff) { +enrich_deconvolution <- function( + expr, + log_expr, + cluster_info, + ct_exp, + cutoff) { ##### generate enrich 0/1 matrix based on expression matrix ct_exp <- ct_exp[rowSums(ct_exp) > 0, ] enrich_matrix <- matrix(0, nrow = dim(ct_exp)[1], ncol = dim(ct_exp)[2]) @@ -2423,7 +2585,8 @@ enrich_deconvolution <- function(expr, cluster_info <- cluster_info for (i in seq_along(cluster_sort)) { cluster_i_enrich <- enrich_result[ - , which(cluster_info == cluster_sort[i])] + , which(cluster_info == cluster_sort[i]) + ] row_i_max <- Rfast::rowMaxs(cluster_i_enrich, value = TRUE) ct <- rownames(enrich_result)[which(row_i_max > cutoff)] if (length(ct) < 2) { @@ -2433,7 +2596,8 @@ enrich_deconvolution <- function(expr, ct_gene <- c() for (j in seq_along(ct)) { sig_gene_j <- rownames(enrich_matrix)[ - which(enrich_matrix[, ct[j]] == 1)] + which(enrich_matrix[, ct[j]] == 1) + ] ct_gene <- c(ct_gene, sig_gene_j) } uniq_ct_gene <- intersect(rownames(expr), unique(ct_gene)) @@ -2442,7 +2606,8 @@ enrich_deconvolution <- function(expr, cluster_cell_exp <- expr[uniq_ct_gene, cluster_i_cell] cluster_i_dwls <- optimize_deconvolute_dwls( - cluster_cell_exp, select_sig_exp) + cluster_cell_exp, select_sig_exp + ) dwls_results[ct, cluster_i_cell] <- cluster_i_dwls } ##### remove negative values @@ -2459,10 +2624,11 @@ enrich_deconvolution <- function(expr, #' @description Rui to fill in #' @returns matrix #' @keywords internal -spot_deconvolution <- function(expr, - cluster_info, - ct_exp, - binary_matrix) { +spot_deconvolution <- function( + expr, + cluster_info, + ct_exp, + binary_matrix) { ##### generate enrich 0/1 matrix based on expression matrix enrich_matrix <- matrix(0, nrow = dim(ct_exp)[1], ncol = dim(ct_exp)[2]) rowmax_col <- Rfast::rowMaxs(ct_exp) @@ -2480,7 +2646,8 @@ spot_deconvolution <- function(expr, for (i in seq_along(cluster_sort)) { cluster_i_matrix <- binary_matrix[ - , which(cluster_info == cluster_sort[i])] + , which(cluster_info == cluster_sort[i]) + ] row_i_max <- Rfast::rowMaxs(cluster_i_matrix, value = TRUE) ct_i <- rownames(cluster_i_matrix)[which(row_i_max == 1)] ######## calculate proportion based on binarized deconvolution @@ -2491,7 +2658,8 @@ spot_deconvolution <- function(expr, ct_gene <- c() for (j in seq_along(ct_i)) { sig_gene_j <- rownames(enrich_matrix)[ - which(enrich_matrix[, ct_i[j]] == 1)] + which(enrich_matrix[, ct_i[j]] == 1) + ] ct_gene <- c(ct_gene, sig_gene_j) } uniq_ct_gene <- intersect(rownames(expr), unique(ct_gene)) @@ -2503,12 +2671,14 @@ spot_deconvolution <- function(expr, all_exp <- Matrix::rowMeans(cluster_cell_exp) solution_all_exp <- solve_OLS_internal(select_sig_exp, all_exp) constant_J <- find_dampening_constant( - select_sig_exp, all_exp, solution_all_exp) + select_sig_exp, all_exp, solution_all_exp + ) ###### deconvolution for each spot - for (k in 1:(dim(cluster_cell_exp)[2])) { + for (k in seq_len(dim(cluster_cell_exp)[2])) { B <- Matrix::as.matrix(cluster_cell_exp[, k]) ct_spot_k <- rownames(cluster_i_matrix)[ - which(cluster_i_matrix[, k] == 1)] + which(cluster_i_matrix[, k] == 1) + ] if (sum(B) == 0 || length(ct_spot_k) == 0) { ####* must include the case where all genes are 0 dwls_results[, colnames(cluster_cell_exp)[k]] <- NA @@ -2517,16 +2687,19 @@ spot_deconvolution <- function(expr, } if (length(ct_spot_k) == 1) { dwls_results[ - ct_spot_k[1], colnames(cluster_cell_exp)[k]] <- 1 + ct_spot_k[1], colnames(cluster_cell_exp)[k] + ] <- 1 } else { ct_k_gene <- c() for (m in seq_along(ct_spot_k)) { sig_gene_k <- rownames(enrich_matrix)[which( - enrich_matrix[, ct_spot_k[m]] == 1)] + enrich_matrix[, ct_spot_k[m]] == 1 + )] ct_k_gene <- c(ct_k_gene, sig_gene_k) } uniq_ct_k_gene <- intersect( - rownames(ct_exp), unique(ct_k_gene)) + rownames(ct_exp), unique(ct_k_gene) + ) S_k <- Matrix::as.matrix(ct_exp[uniq_ct_k_gene, ct_spot_k]) if (sum(B[uniq_ct_k_gene, ]) == 0) { ####* must include the case all genes are 0 @@ -2534,7 +2707,8 @@ spot_deconvolution <- function(expr, ####* will produce NAs for some spots in the output } else { solDWLS <- optimize_solveDampenedWLS(S_k, B[ - uniq_ct_k_gene, ], constant_J) + uniq_ct_k_gene, + ], constant_J) dwls_results[names(solDWLS), colnames(cluster_cell_exp)[k]] <- solDWLS } } @@ -2555,9 +2729,10 @@ spot_deconvolution <- function(expr, #' @description Rui to fill in #' @returns enrichment values #' @keywords internal -cluster_enrich_analysis <- function(exp_matrix, - cluster_info, - enrich_sig_matrix) { +cluster_enrich_analysis <- function( + exp_matrix, + cluster_info, + enrich_sig_matrix) { uniq_cluster <- mixedsort(unique(cluster_info)) if (length(uniq_cluster) == 1) { stop("Only one cluster identified, need at least two.") @@ -2566,8 +2741,11 @@ cluster_enrich_analysis <- function(exp_matrix, for (i in uniq_cluster) { cluster_exp <- cbind( cluster_exp, - (apply(exp_matrix, 1, - function(y) mean(y[which(cluster_info == i)])))) + (apply( + exp_matrix, 1, + function(y) mean(y[which(cluster_info == i)]) + )) + ) } log_cluster_exp <- log2(cluster_exp + 1) colnames(log_cluster_exp) <- uniq_cluster @@ -2580,8 +2758,9 @@ cluster_enrich_analysis <- function(exp_matrix, #' @description Rui to fill in #' @returns enrichment matrix #' @keywords internal -enrich_analysis <- function(expr_values, - sign_matrix) { +enrich_analysis <- function( + expr_values, + sign_matrix) { # output enrichment # only continue with genes present in both datasets interGene <- intersect(rownames(sign_matrix), rownames(expr_values)) @@ -2597,8 +2776,9 @@ enrich_analysis <- function(expr_values, # get enrichment scores enrichment <- matrix( - data = NA, nrow = dim(filterSig)[2], ncol = length(cellColMean)) - for (i in (1:dim(filterSig)[2])) { + data = NA, nrow = dim(filterSig)[2], ncol = length(cellColMean) + ) + for (i in seq_len(dim(filterSig)[2])) { signames <- rownames(filterSig)[which(filterSig[, i] == 1)] sigColMean <- apply(geneFold[signames, ], 2, mean) m <- length(signames) @@ -2623,8 +2803,9 @@ enrich_analysis <- function(expr_values, #' @description Rui to fill in #' @returns matrix #' @keywords internal -optimize_deconvolute_dwls <- function(exp, - Signature) { +optimize_deconvolute_dwls <- function( + exp, + Signature) { ###### overlap signature with spatial genes Genes <- intersect(rownames(Signature), rownames(exp)) S <- Signature[Genes, ] @@ -2637,7 +2818,7 @@ optimize_deconvolute_dwls <- function(exp, solution_all_exp <- solve_OLS_internal(S, all_exp[Genes]) constant_J <- find_dampening_constant(S, all_exp[Genes], solution_all_exp) - for (j in 1:(dim(subBulk)[2])) { + for (j in seq_len(dim(subBulk)[2])) { B <- subBulk[, j] if (sum(B) > 0) { solDWLS <- optimize_solveDampenedWLS(S, B, constant_J) @@ -2658,9 +2839,10 @@ optimize_deconvolute_dwls <- function(exp, #' @title optimize_solveDampenedWLS #' @returns numeric #' @keywords internal -optimize_solveDampenedWLS <- function(S, - B, - constant_J) { +optimize_solveDampenedWLS <- function( + S, + B, + constant_J) { # first solve OLS, use this solution to find a starting point for the # weights solution <- solve_OLS_internal(S, B) @@ -2696,9 +2878,10 @@ optimize_solveDampenedWLS <- function(S, #' @description find a dampening constant for the weights using cross-validation #' @returns numeric #' @keywords internal -find_dampening_constant <- function(S, - B, - goldStandard) { +find_dampening_constant <- function( + S, + B, + goldStandard) { solutionsSd <- NULL # goldStandard is used to define the weights @@ -2715,20 +2898,22 @@ find_dampening_constant <- function(S, # try multiple values of the dampening constant (multiplier) # for each, calculate the variance of the dampened weighted solution for # a subset of genes - for (j in 1:ceiling(log2(max(wsScaledMinusInf)))) { + for (j in seq_len(ceiling(log2(max(wsScaledMinusInf))))) { multiplier <- 1 * 2^(j - 1) wsDampened <- wsScaled wsDampened[which(wsScaled > multiplier)] <- multiplier solutions <- NULL - seeds <- c(1:100) - for (i in 1:100) { + seeds <- seq_len(100) + for (i in seq_len(100)) { set.seed(seeds[i]) # make nondeterministic subset <- sample(length(ws), size = length(ws) * 0.5) # randomly select half of gene set # solve dampened weighted least squares for subset fit <- stats::lm( - B[subset] ~ -1 + S[subset, ], weights = wsDampened[subset]) + B[subset] ~ -1 + S[subset, , drop = FALSE], + weights = wsDampened[subset] + ) sol <- fit$coef * sum(goldStandard) / sum(fit$coef) solutions <- cbind(solutions, sol) } @@ -2745,8 +2930,9 @@ find_dampening_constant <- function(S, #' @description basic functions for dwls #' @returns numeric #' @keywords internal -solve_OLS_internal <- function(S, - B) { +solve_OLS_internal <- function( + S, + B) { D <- t(S) %*% S d <- t(S) %*% B A <- cbind(diag(dim(S)[2])) @@ -2811,10 +2997,11 @@ solve_OLS_internal <- function(S, #' @description solve WLS given a dampening constant #' @returns matrix #' @keywords internal -solve_dampened_WLSj <- function(S, - B, - goldStandard, - j) { +solve_dampened_WLSj <- function( + S, + B, + goldStandard, + j) { multiplier <- 1 * 2^(j - 1) sol <- goldStandard ws <- as.vector((1 / (S %*% sol))^2) @@ -2863,27 +3050,30 @@ solve_dampened_WLSj <- function(S, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' x <- findMarkers_one_vs_all(g, -#' cluster_column = "leiden_clus", min_feats = 20) +#' cluster_column = "leiden_clus", min_feats = 20 +#' ) #' sign_gene <- x$feats #' -#' sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -#' nrow = length(sign_gene)) +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), +#' nrow = length(sign_gene) +#' ) #' rownames(sign_matrix) <- sign_gene -#' colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) +#' colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) #' #' runDWLSDeconv(gobject = g, sign_matrix = sign_matrix) #' @export -runDWLSDeconv <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized"), - logbase = 2, - cluster_column = "leiden_clus", - sign_matrix, - n_cell = 50, - cutoff = 2, - name = NULL, - return_gobject = TRUE) { +runDWLSDeconv <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized"), + logbase = 2, + cluster_column = "leiden_clus", + sign_matrix, + n_cell = 50, + cutoff = 2, + name = NULL, + return_gobject = TRUE) { # verify if optional package is installed package_check(pkg_name = "quadprog", repository = "CRAN") package_check(pkg_name = "Rfast", repository = "CRAN") @@ -2905,7 +3095,8 @@ runDWLSDeconv <- function(gobject, values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -2954,9 +3145,11 @@ runDWLSDeconv <- function(gobject, binary_matrix = binarize_proportion ) deconvolutionDT <- data.table::data.table( - cell_ID = colnames(spot_proportion)) + cell_ID = colnames(spot_proportion) + ) deconvolutionDT <- cbind( - deconvolutionDT, data.table::as.data.table(t(spot_proportion))) + deconvolutionDT, data.table::as.data.table(t(spot_proportion)) + ) # create spatial enrichment object enrObj <- create_spat_enr_obj( @@ -3039,29 +3232,32 @@ runDWLSDeconv <- function(gobject, #' @seealso \code{\link{runDWLSDeconv}} #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' x <- findMarkers_one_vs_all(g, -#' cluster_column = "leiden_clus", min_feats = 20) +#' x <- findMarkers_one_vs_all(g, +#' cluster_column = "leiden_clus", min_feats = 20 +#' ) #' sign_gene <- x$feats -#' -#' sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -#' nrow = length(sign_gene)) +#' +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), +#' nrow = length(sign_gene) +#' ) #' rownames(sign_matrix) <- sign_gene -#' colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) -#' +#' colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) +#' #' runSpatialDeconv(gobject = g, sign_matrix = sign_matrix) #' @export -runSpatialDeconv <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - deconv_method = c("DWLS"), - expression_values = c("normalized"), - logbase = 2, - cluster_column = "leiden_clus", - sign_matrix, - n_cell = 50, - cutoff = 2, - name = NULL, - return_gobject = TRUE) { +runSpatialDeconv <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + deconv_method = c("DWLS"), + expression_values = c("normalized"), + logbase = 2, + cluster_column = "leiden_clus", + sign_matrix, + n_cell = 50, + cutoff = 2, + name = NULL, + return_gobject = TRUE) { deconv_method <- match.arg(deconv_method, choices = c("DWLS")) diff --git a/R/spatial_enrichment_visuals.R b/R/spatial_enrichment_visuals.R index e898eb5fc..fec02713c 100644 --- a/R/spatial_enrichment_visuals.R +++ b/R/spatial_enrichment_visuals.R @@ -2,7 +2,7 @@ #' @name findCellTypesFromEnrichment #' @param gobject Giotto Object #' @param spat_unit spatial unit in which the enrichment information is stored -#' @param feat_type feature type for which the enrichment information was +#' @param feat_type feature type for which the enrichment information was #' calculated #' @param enrichment_name name of the spatial enrichment #' i.e. output from GiottoClass::list_spatial_enrichment_names() @@ -24,15 +24,17 @@ #' the associated cell types from the enrichment. #' #' @export -findCellTypesFromEnrichment <- function(gobject = NULL, - spat_unit = NULL, - feat_type = NULL, - enrichment_name = "PAGE_z_score", - return_frequency_table = FALSE) { +findCellTypesFromEnrichment <- function( + gobject = NULL, + spat_unit = NULL, + feat_type = NULL, + enrichment_name = "PAGE_z_score", + return_frequency_table = FALSE) { # guard clauses - if (!inherits(gobject, "giotto")) + if (!inherits(gobject, "giotto")) { stop("gobject needs to be a giotto object") + } spat_unit <- set_default_spat_unit( gobject = gobject, @@ -69,17 +71,20 @@ findCellTypesFromEnrichment <- function(gobject = NULL, # new column, mapping a cell to it's most likely type if (enrich_is_p_value) { pz_enrich[, probable_cell_type := names( - .SD)[max.col(-.SD)], .SDcols = 2:n_c] + .SD + )[max.col(-.SD)], .SDcols = 2:n_c] } else { pz_enrich[, probable_cell_type := names( - .SD)[max.col(.SD)], .SDcols = 2:n_c] + .SD + )[max.col(.SD)], .SDcols = 2:n_c] } cell_ID_and_types_pz_enrich <- pz_enrich[, .(cell_ID, probable_cell_type)] if (return_frequency_table) { pz_enrich_cell_type_frequencies <- table( - cell_ID_and_types_pz_enrich$probable_cell_type) + cell_ID_and_types_pz_enrich$probable_cell_type + ) return(pz_enrich_cell_type_frequencies) } @@ -90,7 +95,7 @@ findCellTypesFromEnrichment <- function(gobject = NULL, #' @name plotCellTypesFromEnrichment #' @param gobject Giotto Object #' @param spat_unit spatial unit in which the enrichment information is stored -#' @param feat_type feature type for which the enrichment information was +#' @param feat_type feature type for which the enrichment information was #' calculated #' @param enrichment_name name of the spatial enrichment #' i.e. output from GiottoClass::list_spatial_enrichment_names() @@ -104,20 +109,21 @@ findCellTypesFromEnrichment <- function(gobject = NULL, #' This function generates a bar plot of cell types vs the frequency #' of that cell type in the data. These cell type results are #' based on the provided `enrichment_name`, and will be determined -#' by the maximum value of the z-score or p-value for a given cell or +#' by the maximum value of the z-score or p-value for a given cell or #' annotation. #' #' @export -plotCellTypesFromEnrichment <- function(gobject = NULL, - spat_unit = NULL, - feat_type = NULL, - enrichment_name = "PAGE_z_score", - title = NULL, - save_param = list(), - default_save_name = "cell_types_from_enrichment", - save_plot = NULL, - show_plot = NULL, - return_plot = NULL) { +plotCellTypesFromEnrichment <- function( + gobject = NULL, + spat_unit = NULL, + feat_type = NULL, + enrichment_name = "PAGE_z_score", + title = NULL, + save_param = list(), + default_save_name = "cell_types_from_enrichment", + save_plot = NULL, + show_plot = NULL, + return_plot = NULL) { # guard clauses handled at first step downstream # therefore, omitting here. id_and_types <- findCellTypesFromEnrichment( @@ -131,8 +137,11 @@ plotCellTypesFromEnrichment <- function(gobject = NULL, # data.table column probable_cell_type <- NULL - if (is.null(title)) title <- paste0( - spat_unit, "cell types (maximum", enrichment_name, ")") + if (is.null(title)) { + title <- paste0( + spat_unit, "cell types (maximum", enrichment_name, ")" + ) + } pl <- ggplot2::ggplot(id_and_types, aes(x = probable_cell_type)) + ggplot2::geom_bar() + @@ -163,7 +172,7 @@ plotCellTypesFromEnrichment <- function(gobject = NULL, #' @name pieCellTypesFromEnrichment #' @param gobject Giotto Object #' @param spat_unit spatial unit in which the enrichment information is stored -#' @param feat_type feature type for which the enrichment information was +#' @param feat_type feature type for which the enrichment information was #' calculated #' @param enrichment_name name of the spatial enrichment #' i.e. output from GiottoClass::list_spatial_enrichment_names() @@ -179,16 +188,17 @@ plotCellTypesFromEnrichment <- function(gobject = NULL, #' and will be determined by the maximum value of the z-score #' or p-value for a given cell or annotation. #' @export -pieCellTypesFromEnrichment <- function(gobject = NULL, - spat_unit = NULL, - feat_type = NULL, - enrichment_name = "PAGE_z_score", - title = NULL, - save_param = list(), - default_save_name = "cell_types_from_enrichment_pie", - save_plot = NULL, - show_plot = NULL, - return_plot = NULL) { +pieCellTypesFromEnrichment <- function( + gobject = NULL, + spat_unit = NULL, + feat_type = NULL, + enrichment_name = "PAGE_z_score", + title = NULL, + save_param = list(), + default_save_name = "cell_types_from_enrichment_pie", + save_plot = NULL, + show_plot = NULL, + return_plot = NULL) { # guard clauses handled one step downstream freq_table <- findCellTypesFromEnrichment( @@ -211,7 +221,8 @@ pieCellTypesFromEnrichment <- function(gobject = NULL, for (i in cell_types) { # hackish, admittedly nullvar <- freq_dt[cell_type == i, perc := num_cells / sum( - freq_dt$num_cells) * 100] + freq_dt$num_cells + ) * 100] } rm(nullvar) # saves memory diff --git a/R/spatial_genes.R b/R/spatial_genes.R index 68fd2d3fa..53bc927c5 100644 --- a/R/spatial_genes.R +++ b/R/spatial_genes.R @@ -8,11 +8,12 @@ NULL #' @rdname spat_fisher_exact #' @keywords internal -.spat_fish_func <- function(feat, - bin_matrix, - spat_mat, - calc_hub = FALSE, - hub_min_int = 3) { +.spat_fish_func <- function( + feat, + bin_matrix, + spat_mat, + calc_hub = FALSE, + hub_min_int = 3) { feat_vector <- bin_matrix[rownames(bin_matrix) == feat, ] feat_vectorA <- feat_vector[names(feat_vector) %in% rownames(spat_mat)] @@ -47,19 +48,22 @@ NULL high_cells <- names(feat_vector[feat_vector == 1]) subset_spat_mat <- spat_mat[ rownames(spat_mat) %in% high_cells, colnames(spat_mat) %in% - high_cells] + high_cells + ] if (length(subset_spat_mat) == 1) { hub_nr <- 0 } else { subset_spat_mat <- spat_mat[ rownames(spat_mat) %in% high_cells, colnames(spat_mat) %in% - high_cells] + high_cells + ] rowhubs <- rowSums_flex(subset_spat_mat) colhubs <- colSums_flex(subset_spat_mat) hub_nr <- length(unique(c( names(colhubs[colhubs > hub_min_int]), - names(rowhubs[colhubs > hub_min_int])))) + names(rowhubs[colhubs > hub_min_int]) + ))) } fish_res <- stats::fisher.test(table_matrix)[c("p.value", "estimate")] @@ -72,11 +76,12 @@ NULL #' @describeIn spat_fisher_exact data.table implementation #' @keywords internal -.spat_fish_func_dt <- function(bin_matrix_DTm, - spat_netw_min, - calc_hub = FALSE, - hub_min_int = 3, - cores = NA) { +.spat_fish_func_dt <- function( + bin_matrix_DTm, + spat_netw_min, + calc_hub = FALSE, + hub_min_int = 3, + cores = NA) { # set number of cores automatically, but with limit of 10 cores <- determine_cores(cores) data.table::setDTthreads(threads = cores) @@ -91,12 +96,15 @@ NULL bin_matrix_DTm, by.x = "from", by.y = "variable", - allow.cartesian = TRUE) + allow.cartesian = TRUE + ) data.table::setnames(spatial_network_min_ext, "value", "from_value") spatial_network_min_ext <- data.table::merge.data.table( - spatial_network_min_ext, by.x = c("to", "feat_ID"), - bin_matrix_DTm, by.y = c("variable", "feat_ID")) + spatial_network_min_ext, + by.x = c("to", "feat_ID"), + bin_matrix_DTm, by.y = c("variable", "feat_ID") + ) data.table::setnames(spatial_network_min_ext, "value", "to_value") @@ -121,7 +129,8 @@ NULL # sort the combinations and run fisher test data.table::setorder(freq_summary2, feat_ID, combn, -N) fish_results <- freq_summary2[, stats::fisher.test( - matrix(N, nrow = 2))[c(1, 3)], by = feat_ID] + matrix(N, nrow = 2) + )[c(1, 3)], by = feat_ID] ## hubs ## @@ -140,14 +149,17 @@ NULL # get hubs and add 0's hub_DT <- double_pos_both[V1 > hub_min_int, .N, by = feat_ID] hub_DT_zeroes <- data.table::data.table(feat_ID = unique( - spatial_network_min_ext$feat_ID), N = 0) + spatial_network_min_ext$feat_ID + ), N = 0) hub_DT2 <- rbind(hub_DT, hub_DT_zeroes) hub_DT2 <- hub_DT2[, sum(N), by = feat_ID] data.table::setnames(hub_DT2, "V1", "hub_nr") fish_results <- data.table::merge.data.table( - fish_results, hub_DT2, by = "feat_ID") + fish_results, hub_DT2, + by = "feat_ID" + ) } return(fish_results) @@ -164,11 +176,12 @@ NULL #' @rdname spat_odds_ratio #' @keywords internal -.spat_or_func <- function(feat, - bin_matrix, - spat_mat, - calc_hub = FALSE, - hub_min_int = 3) { +.spat_or_func <- function( + feat, + bin_matrix, + spat_mat, + calc_hub = FALSE, + hub_min_int = 3) { feat_vector <- bin_matrix[rownames(bin_matrix) == feat, ] feat_vectorA <- feat_vector[names(feat_vector) %in% rownames(spat_mat)] @@ -204,7 +217,8 @@ NULL high_cells <- names(feat_vector[feat_vector == 1]) subset_spat_mat <- spat_mat[ rownames(spat_mat) %in% high_cells, colnames(spat_mat) %in% - high_cells] + high_cells + ] if (length(subset_spat_mat) == 1) { hub_nr <- 0 @@ -212,14 +226,16 @@ NULL rowhubs <- rowSums_flex(subset_spat_mat) colhubs <- colSums_flex(subset_spat_mat) hub_nr <- length(unique(c(names( - colhubs[colhubs > hub_min_int]), names( - rowhubs[colhubs > hub_min_int])))) + colhubs[colhubs > hub_min_int] + ), names( + rowhubs[colhubs > hub_min_int] + )))) } fish_matrix <- table_matrix fish_matrix <- fish_matrix / 1000 OR <- ((fish_matrix[1] * fish_matrix[4]) / - (fish_matrix[2] * fish_matrix[3])) + (fish_matrix[2] * fish_matrix[3])) return(c(feats = list(feat), OR, hubs = list(hub_nr))) } @@ -234,11 +250,12 @@ NULL #' @describeIn spat_odds_ratio data.table implementation #' @keywords internal -.spat_or_func_dt <- function(bin_matrix_DTm, - spat_netw_min, - calc_hub = FALSE, - hub_min_int = 3, - cores = NA) { +.spat_or_func_dt <- function( + bin_matrix_DTm, + spat_netw_min, + calc_hub = FALSE, + hub_min_int = 3, + cores = NA) { # set number of cores automatically, but with limit of 10 cores <- determine_cores(cores) data.table::setDTthreads(threads = cores) @@ -251,12 +268,15 @@ NULL spatial_network_min_ext <- data.table::merge.data.table( spat_netw_min, bin_matrix_DTm, by.x = "from", by.y = "variable", - allow.cartesian = TRUE) + allow.cartesian = TRUE + ) data.table::setnames(spatial_network_min_ext, "value", "from_value") spatial_network_min_ext <- data.table::merge.data.table( - spatial_network_min_ext, by.x = c("to", "feat_ID"), - bin_matrix_DTm, by.y = c("variable", "feat_ID")) + spatial_network_min_ext, + by.x = c("to", "feat_ID"), + bin_matrix_DTm, by.y = c("variable", "feat_ID") + ) data.table::setnames(spatial_network_min_ext, "value", "to_value") @@ -281,7 +301,9 @@ NULL # sort the combinations and run fisher test setorder(freq_summary2, feat_ID, combn, -N) or_results <- freq_summary2[ - , .or_test_func(matrix(N, nrow = 2)), by = feat_ID] + , .or_test_func(matrix(N, nrow = 2)), + by = feat_ID + ] ## hubs ## @@ -300,14 +322,17 @@ NULL # get hubs and add 0's hub_DT <- double_pos_both[V1 > hub_min_int, .N, by = feat_ID] hub_DT_zeroes <- data.table::data.table( - feat_ID = unique(spatial_network_min_ext$feat_ID), N = 0) + feat_ID = unique(spatial_network_min_ext$feat_ID), N = 0 + ) hub_DT2 <- rbind(hub_DT, hub_DT_zeroes) hub_DT2 <- hub_DT2[, sum(N), by = feat_ID] data.table::setnames(hub_DT2, "V1", "hub_nr") or_results <- data.table::merge.data.table( - or_results, hub_DT2, by = "feat_ID") + or_results, hub_DT2, + by = "feat_ID" + ) } return(or_results) @@ -336,17 +361,18 @@ NULL #' @describeIn calculate_spatial_enrichment calculate using a 'simple' and #' efficient for loop #' @keywords internal -.calc_spatial_enrichment_minimum <- function(spatial_network, - bin_matrix, - adjust_method = "fdr", - do_fisher_test = TRUE) { +.calc_spatial_enrichment_minimum <- function( + spatial_network, + bin_matrix, + adjust_method = "fdr", + do_fisher_test = TRUE) { # data.table variables from <- to <- feats <- variable <- value <- p.value <- adj.p.value <- score <- estimate <- NULL spatial_network_min <- spatial_network[, .(from, to)] - all_colindex <- 1:ncol(bin_matrix) + all_colindex <- seq_len(ncol(bin_matrix)) names(all_colindex) <- colnames(bin_matrix) # code for possible combinations @@ -355,15 +381,17 @@ NULL # preallocate final matrix for results matrix_res <- matrix( - data = NA, nrow = nrow(bin_matrix), ncol = nrow(spatial_network_min)) + data = NA, nrow = nrow(bin_matrix), ncol = nrow(spatial_network_min) + ) ## 1. summarize results for each edge in the network - for (row_i in 1:nrow(spatial_network_min)) { + for (row_i in seq_len(nrow(spatial_network_min))) { from_id <- spatial_network_min[row_i][["from"]] to_id <- spatial_network_min[row_i][["to"]] sumres <- data.table::as.data.table(bin_matrix[ - , all_colindex[c(from_id, to_id)]]) + , all_colindex[c(from_id, to_id)] + ]) sumres[, combn := paste0(get(from_id), "-", get(to_id))] code_res <- convert_code[sumres$combn] @@ -378,7 +406,7 @@ NULL ## 2. calculate the frequencies of possible combinations ## # '0-0' = 1, '0-1' = 2, '1-0' = 3 and '1-1' = 4 - for (row_i in 1:nrow(matrix_res)) { + for (row_i in seq_len(nrow(matrix_res))) { x <- matrix_res[row_i, ] x <- factor(x, levels = c(1, 2, 3, 4)) tabres <- as.vector(table(x)) @@ -387,7 +415,7 @@ NULL } rownames(table_res) <- rownames(matrix_res) - colnames(table_res) <- 1:4 + colnames(table_res) <- seq_len(4) rable_resDT <- data.table::as.data.table(table_res) rable_resDT[, feats := rownames(table_res)] @@ -398,20 +426,26 @@ NULL ## run fisher test ## if (do_fisher_test == TRUE) { results <- rable_resDTm[, stats::fisher.test(matrix( - value, nrow = 2))[c(1, 3)], by = feats] + value, + nrow = 2 + ))[c(1, 3)], by = feats] # replace zero p-values with lowest p-value min_pvalue <- min(results$p.value[results$p.value > 0]) results[, p.value := ifelse(p.value == 0, min_pvalue, p.value)] results[, adj.p.value := stats::p.adjust( - p.value, method = adjust_method)] + p.value, + method = adjust_method + )] # sort feats based on p-value and estimate results[, score := -log(p.value) * estimate] data.table::setorder(results, -score) } else { results <- rable_resDTm[, .or_test_func(matrix( - value, nrow = 2)), by = feats] + value, + nrow = 2 + )), by = feats] data.table::setorder(results, -estimate) } @@ -421,21 +455,24 @@ NULL #' @describeIn calculate_spatial_enrichment calculate using 'matrix' #' implementation #' @keywords internal -.calc_spatial_enrichment_matrix <- function(spatial_network, - bin_matrix, - adjust_method = "fdr", - do_fisher_test = TRUE, - do_parallel = TRUE, - cores = NA, - calc_hub = FALSE, - hub_min_int = 3, - verbose = TRUE) { +.calc_spatial_enrichment_matrix <- function( + spatial_network, + bin_matrix, + adjust_method = "fdr", + do_fisher_test = TRUE, + do_parallel = TRUE, + cores = NA, + calc_hub = FALSE, + hub_min_int = 3, + verbose = TRUE) { # data.table variables verbose <- feats <- p.value <- estimate <- adj.p.value <- score <- NULL # convert spatial network data.table to spatial matrix dc_spat_network <- data.table::dcast.data.table( - spatial_network, formula = to ~ from, value.var = "distance", fill = 0) + spatial_network, + formula = to ~ from, value.var = "distance", fill = 0 + ) spat_mat <- dt_to_matrix(dc_spat_network) spat_mat[spat_mat > 0] <- 1 @@ -486,13 +523,16 @@ NULL if (do_fisher_test == TRUE) { result[, c("p.value", "estimate") := list( - as.numeric(p.value), as.numeric(estimate))] + as.numeric(p.value), as.numeric(estimate) + )] # convert p.value = 0 to lowest p-value min_pvalue <- min(result$p.value[result$p.value > 0]) result[, p.value := ifelse(p.value == 0, min_pvalue, p.value)] result[, adj.p.value := stats::p.adjust( - p.value, method = adjust_method)] + p.value, + method = adjust_method + )] result[, score := -log(p.value) * estimate] data.table::setorder(result, -score) @@ -508,15 +548,14 @@ NULL #' @describeIn calculate_spatial_enrichment calculate using 'data.table' #' implementation #' @keywords internal -.calc_spatial_enrichment_dt <- function( - bin_matrix, - spatial_network, - calc_hub = FALSE, - hub_min_int = 3, - group_size = "automatic", - do_fisher_test = TRUE, - adjust_method = "fdr", - cores = NA) { +.calc_spatial_enrichment_dt <- function(bin_matrix, + spatial_network, + calc_hub = FALSE, + hub_min_int = 3, + group_size = "automatic", + do_fisher_test = TRUE, + adjust_method = "fdr", + cores = NA) { # set number of cores automatically, but with limit of 10 cores <- determine_cores(cores) data.table::setDTthreads(threads = cores) @@ -542,13 +581,18 @@ NULL } groups <- ceiling(nrow(bin_matrix) / group_size) - cut_groups <- cut(1:nrow(bin_matrix), breaks = groups, labels = 1:groups) + cut_groups <- cut(seq_len(nrow(bin_matrix)), + breaks = groups, + labels = seq_len(groups) + ) if (any(table(cut_groups) == 1)) { - stop("With group size = ", group_size, + stop( + "With group size = ", group_size, " you have a single gene in a group. Manually pick another group - size") + size" + ) } - indexes <- 1:nrow(bin_matrix) + indexes <- seq_len(nrow(bin_matrix)) names(indexes) <- cut_groups @@ -559,7 +603,9 @@ NULL bin_matrix_DT <- data.table::as.data.table(bin_matrix[sel_indices, ]) bin_matrix_DT[, feat_ID := rownames(bin_matrix[sel_indices, ])] bin_matrix_DTm <- data.table::melt.data.table( - bin_matrix_DT, id.vars = "feat_ID") + bin_matrix_DT, + id.vars = "feat_ID" + ) if (do_fisher_test == TRUE) { test <- .spat_fish_func_dt( @@ -589,7 +635,9 @@ NULL min_pvalue <- min(result$p.value[result$p.value > 0]) result[, p.value := ifelse(p.value == 0, min_pvalue, p.value)] result[, adj.p.value := stats::p.adjust( - p.value, method = adjust_method)] + p.value, + method = adjust_method + )] result[, score := -log(p.value) * estimate] data.table::setorder(result, -score) @@ -622,7 +670,6 @@ NULL #' @param bin_method method to binarize gene expression #' @param expression_values expression values to use #' @param subset_feats only select a subset of features to test -#' @param subset_genes deprecated, use subset_feats #' @param spatial_network_name name of spatial network to use #' (default = 'spatial_network') #' @param spatial_network_k different k's for a spatial kNN to evaluate @@ -715,40 +762,38 @@ NULL #' @rdname binSpect #' @export -binSpect <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - bin_method = c("kmeans", "rank"), - expression_values = c("normalized", "scaled", "custom"), - subset_feats = NULL, - spatial_network_name = "Delaunay_network", - spatial_network_k = NULL, - reduce_network = FALSE, - kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), - nstart = 3, - iter_max = 10, - extreme_nr = 50, - sample_nr = 50, - percentage_rank = 30, - do_fisher_test = TRUE, - adjust_method = "fdr", - calc_hub = FALSE, - hub_min_int = 3, - get_av_expr = TRUE, - get_high_expr = TRUE, - implementation = c("data.table", "simple", "matrix"), - group_size = "automatic", - do_parallel = TRUE, - cores = NA, - verbose = TRUE, - knn_params = NULL, - set.seed = deprecated(), - seed = 1234, - bin_matrix = NULL, - summarize = c("p.value", "adj.p.value"), - return_gobject = FALSE -) { +binSpect <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + bin_method = c("kmeans", "rank"), + expression_values = c("normalized", "scaled", "custom"), + subset_feats = NULL, + spatial_network_name = "Delaunay_network", + spatial_network_k = NULL, + reduce_network = FALSE, + kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), + nstart = 3, + iter_max = 10, + extreme_nr = 50, + sample_nr = 50, + percentage_rank = 30, + do_fisher_test = TRUE, + adjust_method = "fdr", + calc_hub = FALSE, + hub_min_int = 3, + get_av_expr = TRUE, + get_high_expr = TRUE, + implementation = c("data.table", "simple", "matrix"), + group_size = "automatic", + do_parallel = TRUE, + cores = NA, + verbose = TRUE, + knn_params = NULL, + set.seed = deprecated(), + seed = 1234, + bin_matrix = NULL, + summarize = c("p.value", "adj.p.value"), + return_gobject = FALSE) { # TODO align set.seed, set_seed, seed_number naming and usage across # packages # use only param seed. If NULL, set no seed. If !NULL set value as seed @@ -769,7 +814,7 @@ binSpect <- function( "subset_feats", "reduce_network", "kmeans_algo", "nstart", "iter_max", "extreme_nr", "sample_nr", "percentage_rank", "do_fisher_test", "adjust_method", - "calc_hub" , "hub_min_int", "get_av_expr", "get_high_expr", + "calc_hub", "hub_min_int", "get_av_expr", "get_high_expr", "implementation", "group_size", "do_parallel", "cores", "seed", "verbose" )) @@ -779,20 +824,20 @@ binSpect <- function( gobject = gobject, spatial_network_k = spatial_network_k, knn_params = knn_params, - summarize = summarize, + summarize = summarize )) } else { output <- do.call(binSpectSingle, args = c(a, gobject = gobject, spatial_network_name = spatial_network_name, - bin_matrix = bin_matrix, + bin_matrix = bin_matrix )) } if (isTRUE(return_gobject)) { - result_dt <- data.table::data.table( - feats = output$feats, pval = output$adj.p.value) + feats = output$feats, pval = output$adj.p.value + ) data.table::setnames(result_dt, old = "pval", new = "binSpect.pval") gobject <- addFeatMetadata( gobject, @@ -817,30 +862,31 @@ binSpect <- function( #' @param expression_matrix expression matrix #' @param spatial_network spatial network in data.table format #' @export -binSpectSingleMatrix <- function(expression_matrix, - spatial_network = NULL, - bin_matrix = NULL, - bin_method = c("kmeans", "rank"), - subset_feats = NULL, - kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), - nstart = 3, - iter_max = 10, - extreme_nr = 50, - sample_nr = 50, - percentage_rank = 30, - do_fisher_test = TRUE, - adjust_method = "fdr", - calc_hub = FALSE, - hub_min_int = 3, - get_av_expr = TRUE, - get_high_expr = TRUE, - implementation = c("data.table", "simple", "matrix"), - group_size = "automatic", - do_parallel = TRUE, - cores = NA, - verbose = FALSE, - set.seed = deprecated(), - seed = 1234) { +binSpectSingleMatrix <- function( + expression_matrix, + spatial_network = NULL, + bin_matrix = NULL, + bin_method = c("kmeans", "rank"), + subset_feats = NULL, + kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), + nstart = 3, + iter_max = 10, + extreme_nr = 50, + sample_nr = 50, + percentage_rank = 30, + do_fisher_test = TRUE, + adjust_method = "fdr", + calc_hub = FALSE, + hub_min_int = 3, + get_av_expr = TRUE, + get_high_expr = TRUE, + implementation = c("data.table", "simple", "matrix"), + group_size = "automatic", + do_parallel = TRUE, + cores = NA, + verbose = FALSE, + set.seed = deprecated(), + seed = 1234) { if (is_present(set.seed) && !is.function(set.seed)) { deprecate_warn( when = "4.0.3", @@ -874,11 +920,14 @@ binSpectSingleMatrix <- function(expression_matrix, # kmeans algorithm kmeans_algo <- match.arg( kmeans_algo, - choices = c("kmeans", "kmeans_arma", "kmeans_arma_subset")) + choices = c("kmeans", "kmeans_arma", "kmeans_arma_subset") + ) # implementation implementation <- match.arg( - implementation, choices = c("data.table", "simple", "matrix")) + implementation, + choices = c("data.table", "simple", "matrix") + ) # spatial network @@ -974,7 +1023,8 @@ binSpectSingleMatrix <- function(expression_matrix, # expression if (!is.null(subset_feats)) { expr_values <- expression_matrix[ - rownames(expression_matrix) %in% subset_feats, ] + rownames(expression_matrix) %in% subset_feats, + ] } else { expr_values <- expression_matrix } @@ -984,7 +1034,8 @@ binSpectSingleMatrix <- function(expression_matrix, mean(x[x > 0]) }) av_expr_DT <- data.table::data.table( - feats = names(av_expr), av_expr = av_expr) + feats = names(av_expr), av_expr = av_expr + ) result <- merge(result, av_expr_DT, by = "feats") vmsg(.v = verbose, "\n 3. (optional) average expression of high @@ -999,7 +1050,8 @@ binSpectSingleMatrix <- function(expression_matrix, if (get_high_expr) { high_expr <- rowSums(bin_matrix) high_expr_DT <- data.table::data.table( - feats = names(high_expr), high_expr = high_expr) + feats = names(high_expr), high_expr = high_expr + ) result <- merge(result, high_expr_DT, by = "feats") vmsg(.v = verbose, "\n 4. (optional) number of high expressing cells @@ -1021,34 +1073,35 @@ binSpectSingleMatrix <- function(expression_matrix, #' @describeIn binSpect binSpect for a single spatial network #' @export -binSpectSingle <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - bin_method = c("kmeans", "rank"), - expression_values = c("normalized", "scaled", "custom"), - subset_feats = NULL, - spatial_network_name = "Delaunay_network", - reduce_network = FALSE, - kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), - nstart = 3, - iter_max = 10, - extreme_nr = 50, - sample_nr = 50, - percentage_rank = 30, - do_fisher_test = TRUE, - adjust_method = "fdr", - calc_hub = FALSE, - hub_min_int = 3, - get_av_expr = TRUE, - get_high_expr = TRUE, - implementation = c("data.table", "simple", "matrix"), - group_size = "automatic", - do_parallel = TRUE, - cores = NA, - verbose = TRUE, - set.seed = deprecated(), - seed = 1234, - bin_matrix = NULL) { +binSpectSingle <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + bin_method = c("kmeans", "rank"), + expression_values = c("normalized", "scaled", "custom"), + subset_feats = NULL, + spatial_network_name = "Delaunay_network", + reduce_network = FALSE, + kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), + nstart = 3, + iter_max = 10, + extreme_nr = 50, + sample_nr = 50, + percentage_rank = 30, + do_fisher_test = TRUE, + adjust_method = "fdr", + calc_hub = FALSE, + hub_min_int = 3, + get_av_expr = TRUE, + get_high_expr = TRUE, + implementation = c("data.table", "simple", "matrix"), + group_size = "automatic", + do_parallel = TRUE, + cores = NA, + verbose = TRUE, + set.seed = deprecated(), + seed = 1234, + bin_matrix = NULL) { ## deprecated arguments if (is_present(set.seed) && !is.function(set.seed)) { @@ -1076,7 +1129,8 @@ binSpectSingle <- function(gobject, ## 1. expression matrix values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1094,15 +1148,18 @@ binSpectSingle <- function(gobject, output = "networkDT" ) if (is.null(spatial_network)) { - stop("spatial_network_name: ", spatial_network_name, - " does not exist, create a spatial network first") + stop( + "spatial_network_name: ", spatial_network_name, + " does not exist, create a spatial network first" + ) } # convert to full network if (reduce_network == FALSE) { spatial_network <- convert_to_full_spatial_network(spatial_network) data.table::setnames( - spatial_network, c("source", "target"), c("from", "to")) + spatial_network, c("source", "target"), c("from", "to") + ) } @@ -1139,35 +1196,36 @@ binSpectSingle <- function(gobject, #' @describeIn binSpect binSpect for multiple spatial kNN networks #' @export -binSpectMulti <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - bin_method = c("kmeans", "rank"), - expression_values = c("normalized", "scaled", "custom"), - subset_feats = NULL, - spatial_network_k = c(5, 10, 20), - reduce_network = FALSE, - kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), - nstart = 3, - iter_max = 10, - extreme_nr = 50, - sample_nr = 50, - percentage_rank = c(10, 30), - do_fisher_test = TRUE, - adjust_method = "fdr", - calc_hub = FALSE, - hub_min_int = 3, - get_av_expr = TRUE, - get_high_expr = TRUE, - implementation = c("data.table", "simple", "matrix"), - group_size = "automatic", - do_parallel = TRUE, - cores = NA, - verbose = TRUE, - knn_params = NULL, - set.seed = deprecated(), - seed = 1234, - summarize = c("adj.p.value", "p.value")) { +binSpectMulti <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + bin_method = c("kmeans", "rank"), + expression_values = c("normalized", "scaled", "custom"), + subset_feats = NULL, + spatial_network_k = c(5, 10, 20), + reduce_network = FALSE, + kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), + nstart = 3, + iter_max = 10, + extreme_nr = 50, + sample_nr = 50, + percentage_rank = c(10, 30), + do_fisher_test = TRUE, + adjust_method = "fdr", + calc_hub = FALSE, + hub_min_int = 3, + get_av_expr = TRUE, + get_high_expr = TRUE, + implementation = c("data.table", "simple", "matrix"), + group_size = "automatic", + do_parallel = TRUE, + cores = NA, + verbose = TRUE, + knn_params = NULL, + set.seed = deprecated(), + seed = 1234, + summarize = c("adj.p.value", "p.value")) { ## deprecated arguments if (is_present(set.seed) && !is.function(set.seed)) { deprecate_warn( @@ -1191,8 +1249,9 @@ binSpectMulti <- function(gobject, feat_type = feat_type ) - if (verbose == TRUE) + if (verbose == TRUE) { message("This is the multi parameter version of binSpect") + } # set number of cores automatically, but with limit of 10 cores <- determine_cores(cores) @@ -1223,8 +1282,9 @@ binSpectMulti <- function(gobject, )) for (rank_i in percentage_rank) { - if (verbose == TRUE) + if (verbose == TRUE) { cat("Run for k = ", k, " and rank % = ", rank_i) + } result <- binSpectSingle( gobject = temp_gobject, @@ -1266,7 +1326,8 @@ binSpectMulti <- function(gobject, ## expression matrix values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1347,10 +1408,12 @@ binSpectMulti <- function(gobject, simple_result <- combined_result[, sum(log(get(summarize))), by = feats] simple_result[, V1 := V1 * -2] simple_result[, p.val := stats::pchisq( - q = V1, df = total_trials, log.p = FALSE, lower.tail = FALSE)] + q = V1, df = total_trials, log.p = FALSE, lower.tail = FALSE + )] return(list( - combined = combined_result, simple = simple_result[, .(feats, p.val)])) + combined = combined_result, simple = simple_result[, .(feats, p.val)] + )) } @@ -1392,31 +1455,32 @@ binSpectMulti <- function(gobject, #' is set. #' @param summarize summarize the p-values or adjusted p-values #' @returns data.table with results -binSpectMultiMatrix <- function(expression_matrix, - spatial_networks, - bin_method = c("kmeans", "rank"), - subset_feats = NULL, - kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), - nstart = 3, - iter_max = 10, - extreme_nr = 50, - sample_nr = 50, - percentage_rank = c(10, 30), - do_fisher_test = TRUE, - adjust_method = "fdr", - calc_hub = FALSE, - hub_min_int = 3, - get_av_expr = TRUE, - get_high_expr = TRUE, - implementation = c("data.table", "simple", "matrix"), - group_size = "automatic", - do_parallel = TRUE, - cores = NA, - verbose = TRUE, - knn_params = NULL, - set.seed = deprecated(), - seed = 1234, - summarize = c("adj.p.value", "p.value")) { +binSpectMultiMatrix <- function( + expression_matrix, + spatial_networks, + bin_method = c("kmeans", "rank"), + subset_feats = NULL, + kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), + nstart = 3, + iter_max = 10, + extreme_nr = 50, + sample_nr = 50, + percentage_rank = c(10, 30), + do_fisher_test = TRUE, + adjust_method = "fdr", + calc_hub = FALSE, + hub_min_int = 3, + get_av_expr = TRUE, + get_high_expr = TRUE, + implementation = c("data.table", "simple", "matrix"), + group_size = "automatic", + do_parallel = TRUE, + cores = NA, + verbose = TRUE, + knn_params = NULL, + set.seed = deprecated(), + seed = 1234, + summarize = c("adj.p.value", "p.value")) { if (is_present(set.seed) && !is.function(set.seed)) { deprecate_warn( when = "4.0.3", @@ -1429,8 +1493,9 @@ binSpectMultiMatrix <- function(expression_matrix, } - if (verbose == TRUE) + if (verbose == TRUE) { message("This is the multi parameter version of binSpect") + } # set number of cores automatically, but with limit of 10 cores <- determine_cores(cores) @@ -1451,8 +1516,9 @@ binSpectMultiMatrix <- function(expression_matrix, for (k in seq_along(spatial_networks)) { for (rank_i in percentage_rank) { - if (verbose == TRUE) + if (verbose == TRUE) { cat("Run for spatial network ", k, " and rank % = ", rank_i) + } result <- binSpectSingleMatrix( expression_matrix = expression_matrix, @@ -1545,10 +1611,12 @@ binSpectMultiMatrix <- function(expression_matrix, simple_result <- combined_result[, sum(log(get(summarize))), by = feats] simple_result[, V1 := V1 * -2] simple_result[, p.val := stats::pchisq( - q = V1, df = total_trials, log.p = FALSE, lower.tail = FALSE)] + q = V1, df = total_trials, log.p = FALSE, lower.tail = FALSE + )] return(list( - combined = combined_result, simple = simple_result[, .(feats, p.val)])) + combined = combined_result, simple = simple_result[, .(feats, p.val)] + )) } @@ -1576,16 +1644,17 @@ binSpectMultiMatrix <- function(expression_matrix, #' @returns data.table with spatial scores #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' silhouetteRank(g) #' @export -silhouetteRank <- function(gobject, - expression_values = c("normalized", "scaled", "custom"), - metric = "euclidean", - subset_genes = NULL, - rbp_p = 0.95, - examine_top = 0.3, - python_path = NULL) { +silhouetteRank <- function( + gobject, + expression_values = c("normalized", "scaled", "custom"), + metric = "euclidean", + subset_genes = NULL, + rbp_p = 0.95, + examine_top = 0.3, + python_path = NULL) { # expression values values <- match.arg(expression_values, c("normalized", "scaled", "custom")) expr_values <- getExpression( @@ -1621,7 +1690,9 @@ silhouetteRank <- function(gobject, ## prepare python path and louvain script reticulate::use_python(required = TRUE, python = python_path) python_silh_function <- system.file( - "python", "python_spatial_genes.py", package = "Giotto") + "python", "python_spatial_genes.py", + package = "Giotto" + ) reticulate::source_python(file = python_silh_function) output_python <- python_spatial_genes( @@ -1667,21 +1738,22 @@ silhouetteRank <- function(gobject, #' @returns data.table with spatial scores #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' silhouetteRankTest(g) #' @export -silhouetteRankTest <- function(gobject, - expression_values = c("normalized", "scaled", "custom"), - subset_genes = NULL, - overwrite_input_bin = TRUE, - rbp_ps = c(0.95, 0.99), - examine_tops = c(0.005, 0.010, 0.050, 0.100, 0.300), - matrix_type = "dissim", - num_core = 4, - parallel_path = "/usr/bin", - output = NULL, - query_sizes = 10L, - verbose = FALSE) { +silhouetteRankTest <- function( + gobject, + expression_values = c("normalized", "scaled", "custom"), + subset_genes = NULL, + overwrite_input_bin = TRUE, + rbp_ps = c(0.95, 0.99), + examine_tops = c(0.005, 0.010, 0.050, 0.100, 0.300), + matrix_type = "dissim", + num_core = 4, + parallel_path = "/usr/bin", + output = NULL, + query_sizes = 10L, + verbose = FALSE) { # data.table variables cell_ID <- sdimx <- sdimy <- sdimz <- NULL @@ -1696,7 +1768,6 @@ silhouetteRankTest <- function(gobject, "To install: \n", "install.packages('eva')" ) - } ## test if python package is installed @@ -1747,16 +1818,28 @@ silhouetteRankTest <- function(gobject, if (is.null(output)) { save_dir <- readGiottoInstructions(gobject, param = "save_dir") silh_output_dir <- paste0(save_dir, "/", "silhouetteRank_output/") - if (!file.exists(silh_output_dir)) dir.create( - silh_output_dir, recursive = TRUE) + if (!file.exists(silh_output_dir)) { + dir.create( + silh_output_dir, + recursive = TRUE + ) + } } else if (file.exists(output)) { silh_output_dir <- paste0(output, "/", "silhouetteRank_output/") - if (!file.exists(silh_output_dir)) dir.create( - silh_output_dir, recursive = TRUE) + if (!file.exists(silh_output_dir)) { + dir.create( + silh_output_dir, + recursive = TRUE + ) + } } else { silh_output_dir <- paste0(output, "/", "silhouetteRank_output/") - if (!file.exists(silh_output_dir)) dir.create( - silh_output_dir, recursive = TRUE) + if (!file.exists(silh_output_dir)) { + dir.create( + silh_output_dir, + recursive = TRUE + ) + } } # log directory @@ -1785,8 +1868,11 @@ silhouetteRankTest <- function(gobject, silh_output_dir_norm <- normalizePath(silh_output_dir) expr_values_path_norm <- paste0(silh_output_dir_norm, "/", "expression.txt") - data.table::fwrite(data.table::as.data.table( - expr_values, keep.rownames = "gene"), + data.table::fwrite( + data.table::as.data.table( + expr_values, + keep.rownames = "gene" + ), file = expr_values_path_norm, quote = FALSE, sep = "\t", @@ -1800,7 +1886,9 @@ silhouetteRankTest <- function(gobject, python_path <- readGiottoInstructions(gobject, param = "python_path") reticulate::use_python(required = TRUE, python = python_path) python_silh_function <- system.file( - "python", "silhouette_rank_wrapper.py", package = "Giotto") + "python", "silhouette_rank_wrapper.py", + package = "Giotto" + ) reticulate::source_python(file = python_silh_function) @@ -1847,29 +1935,30 @@ silhouetteRankTest <- function(gobject, #' @param default_save_name default save name for saving, don't change, #' change save_name in save_param #' @returns a list of data.frames with results and plot (optional) -#' @details This function is a wrapper for the SpatialDE method originally +#' @details This function is a wrapper for the SpatialDE method originally #' implemented #' in python. See publication \doi{10.1038/nmeth.4636} #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' spatialDE(g) #' @export -spatialDE <- function(gobject = NULL, - feat_type = NULL, - spat_unit = NULL, - spat_loc_name = "raw", - expression_values = c("raw", "normalized", "scaled", "custom"), - size = c(4, 2, 1), - color = c("blue", "green", "red"), - sig_alpha = 0.5, - unsig_alpha = 0.5, - python_path = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "SpatialDE") { +spatialDE <- function( + gobject = NULL, + feat_type = NULL, + spat_unit = NULL, + spat_loc_name = "raw", + expression_values = c("raw", "normalized", "scaled", "custom"), + size = c(4, 2, 1), + color = c("blue", "green", "red"), + sig_alpha = 0.5, + unsig_alpha = 0.5, + python_path = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "SpatialDE") { # test if SPARK is installed ## module_test <- reticulate::py_module_available("SpatialDE") @@ -1919,7 +2008,8 @@ spatialDE <- function(gobject = NULL, # expression values <- match.arg( - expression_values, c("raw", "normalized", "scaled", "custom")) + expression_values, c("raw", "normalized", "scaled", "custom") + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1936,7 +2026,9 @@ spatialDE <- function(gobject = NULL, ## source python file reticulate::use_python(required = TRUE, python = python_path) reader_path <- system.file( - "python", "SpatialDE_wrapper.py", package = "Giotto") + "python", "SpatialDE_wrapper.py", + package = "Giotto" + ) reticulate::source_python(file = reader_path) ## get spatial locations @@ -1951,13 +2043,15 @@ spatialDE <- function(gobject = NULL, ## run spatialDE Spatial_DE_results <- Spatial_DE( - as.data.frame(t(as.matrix(expr_values))), spatial_locs) + as.data.frame(t(as.matrix(expr_values))), spatial_locs + ) results <- as.data.frame(reticulate::py_to_r(Spatial_DE_results[[1]])) if (length(Spatial_DE_results) == 2) { ms_results <- as.data.frame( - reticulate::py_to_r(Spatial_DE_results[[2]])) + reticulate::py_to_r(Spatial_DE_results[[2]]) + ) spatial_genes_results <- list(results, ms_results) names(spatial_genes_results) <- c("results", "ms_results") } else { @@ -1968,11 +2062,17 @@ spatialDE <- function(gobject = NULL, # print, return and save parameters show_plot <- ifelse(is.na(show_plot), readGiottoInstructions( - gobject, param = "show_plot"), show_plot) + gobject, + param = "show_plot" + ), show_plot) save_plot <- ifelse(is.na(save_plot), readGiottoInstructions( - gobject, param = "save_plot"), save_plot) + gobject, + param = "save_plot" + ), save_plot) return_plot <- ifelse(is.na(return_plot), readGiottoInstructions( - gobject, param = "return_plot"), return_plot) + gobject, + param = "return_plot" + ), return_plot) ## create plot if (isTRUE(show_plot) || @@ -1997,8 +2097,11 @@ spatialDE <- function(gobject = NULL, if (save_plot == TRUE) { do.call( "all_plots_save_function", - c(list(gobject = gobject, plot_object = FSV_plot, - default_save_name = default_save_name), save_param)) + c(list( + gobject = gobject, plot_object = FSV_plot, + default_save_name = default_save_name + ), save_param) + ) } ## return results and plot (optional) @@ -2025,24 +2128,25 @@ spatialDE <- function(gobject = NULL, #' @param python_path specify specific path to python if required #' @param return_gobject show plot #' @returns An updated giotto object -#' @details This function is a wrapper for the SpatialAEH method +#' @details This function is a wrapper for the SpatialAEH method #' implemented in the ... #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' spatialAEH(g) #' @export -spatialAEH <- function(gobject = NULL, - feat_type = NULL, - spat_unit = NULL, - spat_loc_name = "raw", - SpatialDE_results = NULL, - name_pattern = "AEH_patterns", - expression_values = c("raw", "normalized", "scaled", "custom"), - pattern_num = 6, - l = 1.05, - python_path = NULL, - return_gobject = TRUE) { +spatialAEH <- function( + gobject = NULL, + feat_type = NULL, + spat_unit = NULL, + spat_loc_name = "raw", + SpatialDE_results = NULL, + name_pattern = "AEH_patterns", + expression_values = c("raw", "normalized", "scaled", "custom"), + pattern_num = 6, + l = 1.05, + python_path = NULL, + return_gobject = TRUE) { # data.table variables cell_ID <- NULL @@ -2059,7 +2163,8 @@ spatialAEH <- function(gobject = NULL, # expression values <- match.arg( - expression_values, c("raw", "normalized", "scaled", "custom")) + expression_values, c("raw", "normalized", "scaled", "custom") + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -2075,7 +2180,9 @@ spatialAEH <- function(gobject = NULL, ## source python file reticulate::use_python(required = TRUE, python = python_path) reader_path <- system.file( - "python", "SpatialDE_wrapper.py", package = "Giotto") + "python", "SpatialDE_wrapper.py", + package = "Giotto" + ) reticulate::source_python(file = reader_path) @@ -2104,14 +2211,17 @@ spatialAEH <- function(gobject = NULL, spatial_pattern_results <- list(histology_results, cell_pattern_score) names(spatial_pattern_results) <- c( - "histology_results", "cell_pattern_score") + "histology_results", "cell_pattern_score" + ) if (return_gobject == TRUE) { dt_res <- data.table::as.data.table( - spatial_pattern_results[["cell_pattern_score"]]) + spatial_pattern_results[["cell_pattern_score"]] + ) dt_res[["cell_ID"]] <- rownames( - spatial_pattern_results[["cell_pattern_score"]]) + spatial_pattern_results[["cell_pattern_score"]] + ) gobject@spatial_enrichment[[name_pattern]] <- dt_res return(gobject) } else { @@ -2131,15 +2241,18 @@ spatialAEH <- function(gobject = NULL, #' @param unsig_alpha transparency of unsignificant genes #' @returns ggplot object #' @keywords internal -FSV_show <- function(results, - ms_results = NULL, - size = c(4, 2, 1), - color = c("blue", "green", "red"), - sig_alpha = 0.5, - unsig_alpha = 0.5) { +FSV_show <- function( + results, + ms_results = NULL, + size = c(4, 2, 1), + color = c("blue", "green", "red"), + sig_alpha = 0.5, + unsig_alpha = 0.5) { results$FSV95conf <- 2 * sqrt(results$s2_FSV) results$intervals <- cut( - results$FSV95conf, c(0, 1e-1, 1e0, Inf), label = FALSE) + results$FSV95conf, c(0, 1e-1, 1e0, Inf), + label = FALSE + ) results$log_pval <- log10(results$pval) if (is.null(ms_results)) { @@ -2160,7 +2273,8 @@ FSV_show <- function(results, pl <- pl + ggplot2::geom_point( data = results[results$qval < 0.05, ], ggplot2::aes_string( - x = "FSV", y = "log_pval", fill = "model_bic", size = "intervals"), + x = "FSV", y = "log_pval", fill = "model_bic", size = "intervals" + ), show.legend = TRUE, shape = 21, alpha = sig_alpha, stroke = 0.1, color = "black" ) + @@ -2179,10 +2293,12 @@ FSV_show <- function(results, labels = c("linear", "periodical", "general") ) + ggplot2::geom_hline(yintercept = max(results[ - results$qval < 0.05, ]$log_pval), linetype = "dashed") + + results$qval < 0.05, + ]$log_pval), linetype = "dashed") + ggplot2::geom_text(ggplot2::aes(0.9, max(results[ - results$qval < 0.05, ]$log_pval), - label = "FDR = 0.05", vjust = -1 + results$qval < 0.05, + ]$log_pval), + label = "FDR = 0.05", vjust = -1 )) + ggplot2::scale_y_reverse() @@ -2208,23 +2324,24 @@ FSV_show <- function(results, #' @param \dots Additional parameters to the #' \code{\link[trendsceek]{trendsceek_test}} function #' @returns data.frame with trendsceek spatial genes results -#' @details This function is a wrapper for the trendsceek_test method +#' @details This function is a wrapper for the trendsceek_test method #' implemented in the trendsceek package #' Publication: \doi{10.1038/nmeth.4634} #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' trendSceek(g) #' @export -trendSceek <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - spat_loc_name = "raw", - expression_values = c("normalized", "raw"), - subset_genes = NULL, - nrand = 100, - ncores = 8, - ...) { +trendSceek <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + spat_loc_name = "raw", + expression_values = c("normalized", "raw"), + subset_genes = NULL, + nrand = 100, + ncores = 8, + ...) { # verify if optional package is installed package_check( pkg_name = "trendsceek", @@ -2294,7 +2411,9 @@ trendSceek <- function(gobject, ## run trendsceek trendsceektest <- trendsceek::trendsceek_test( - pp, nrand = nrand, ncores = ncores, ...) + pp, + nrand = nrand, ncores = ncores, ... + ) ## get final results trendsceektest <- trendsceektest$supstats_wide @@ -2322,7 +2441,7 @@ trendSceek <- function(gobject, #' @param \dots Additional parameters to the \code{\link[SPARK]{spark.vc}} #' function #' @returns data.table with SPARK spatial genes results or the SPARK object -#' @details This function is a wrapper for the method implemented in the +#' @details This function is a wrapper for the method implemented in the #' SPARK package: #' \pkg{SPARK} package: #' 1. **CreateSPARKObject** create a SPARK object from a giotto object @@ -2334,20 +2453,21 @@ trendSceek <- function(gobject, #' @md #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' spark(g) #' @export -spark <- function(gobject, - spat_loc_name = "raw", - feat_type = NULL, - spat_unit = NULL, - percentage = 0.1, - min_count = 10, - expression_values = "raw", - num_core = 5, - covariates = NULL, - return_object = c("data.table", "spark"), - ...) { +spark <- function( + gobject, + spat_loc_name = "raw", + feat_type = NULL, + spat_unit = NULL, + percentage = 0.1, + min_count = 10, + expression_values = "raw", + num_core = 5, + covariates = NULL, + return_object = c("data.table", "spark"), + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2400,7 +2520,7 @@ spark <- function(gobject, ## create SPARK object for analysis and filter out lowly expressed genes sobject <- SPARK::CreateSPARKObject( counts = expr, - location = locs[, 1:2], + location = locs[, seq_len(2)], percentage = percentage, min_total_counts = min_count ) @@ -2483,26 +2603,28 @@ spark <- function(gobject, #' @details #' Steps to identify spatial patterns: #' \itemize{ -#' \item{1. average gene expression for cells within a grid, see createSpatialGrid} -#' \item{2. perform PCA on the average grid expression profiles} -#' \item{3. convert variance of principlal components (PCs) to z-scores and select PCs based on a z-score threshold} +#' * 1. average gene expression for cells within a grid, see createSpatialGrid +#' * 2. perform PCA on the average grid expression profiles +#' * 3. convert variance of principal components (PCs) to z-scores and +#' select PCs based on a z-score threshold #' } #' @export -detectSpatialPatterns <- function(gobject, - expression_values = c("normalized", "scaled", "custom"), - spatial_grid_name = "spatial_grid", - min_cells_per_grid = 4, - scale_unit = FALSE, - ncp = 100, - show_plot = TRUE, - PC_zscore = 1.5) { +detectSpatialPatterns <- function( + gobject, + expression_values = c("normalized", "scaled", "custom"), + spatial_grid_name = "spatial_grid", + min_cells_per_grid = 4, + scale_unit = FALSE, + ncp = 100, + show_plot = TRUE, + PC_zscore = 1.5) { ############################################################################ stop(wrap_txt( - "This function has not been updated for use with the current version + "This function has not been updated for use with the current version of Giotto. See details: https://github.com/drieslab/Giotto/issues/666#issuecomment-1540447537", - errWidth = TRUE + errWidth = TRUE )) ############################################################################ # expression values to be used @@ -2535,10 +2657,12 @@ detectSpatialPatterns <- function(gobject, if (all(c("sdimx", "sdimy", "sdimz") %in% colnames(spatial_locs))) { spatial_locs <- annotate_spatlocs_with_spatgrid_3D( - spatloc = spatial_locs, spatgrid = spatial_grid) + spatloc = spatial_locs, spatgrid = spatial_grid + ) } else if (all(c("sdimx", "sdimy") %in% colnames(spatial_locs))) { spatial_locs <- annotate_spatlocs_with_spatgrid_2D( - spatloc = spatial_locs, spatgrid = spatial_grid) + spatloc = spatial_locs, spatgrid = spatial_grid + ) } @@ -2573,7 +2697,8 @@ detectSpatialPatterns <- function(gobject, X = t(loc_av_expr_matrix), scale.unit = scale_unit, ncp = ncp, - graph = FALSE) + graph = FALSE + ) # screeplot screeplot <- factoextra::fviz_eig(mypca, addlabels = TRUE, ylim = c(0, 50)) @@ -2605,7 +2730,8 @@ detectSpatialPatterns <- function(gobject, data.table::setnames(pca_matrix_DT, old = "dimkeep", dims_to_keep) } else { pca_matrix_DT <- data.table::as.data.table(pca_matrix[ - , seq_along(dims_to_keep)]) + , seq_along(dims_to_keep) + ]) pca_matrix_DT[, loc_ID := colnames(loc_av_expr_matrix)] } @@ -2620,7 +2746,8 @@ detectSpatialPatterns <- function(gobject, data.table::setnames(feat_matrix_DT, old = "featkeep", dims_to_keep) } else { feat_matrix_DT <- data.table::as.data.table(feat_matrix[ - , seq_along(dims_to_keep)]) + , seq_along(dims_to_keep) + ]) feat_matrix_DT[, gene_ID := rownames(loc_av_expr_matrix)] } @@ -2658,19 +2785,20 @@ detectSpatialPatterns <- function(gobject, #' change save_name in save_param #' @returns ggplot #' @export -showPattern2D <- function(gobject, - spatPatObj, - dimension = 1, - trim = c(0.02, 0.98), - background_color = "white", - grid_border_color = "grey", - show_legend = TRUE, - point_size = 1, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "showPattern2D") { +showPattern2D <- function( + gobject, + spatPatObj, + dimension = 1, + trim = c(0.02, 0.98), + background_color = "white", + grid_border_color = "grey", + show_legend = TRUE, + point_size = 1, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "showPattern2D") { if (!"spatPatObj" %in% class(spatPatObj)) { stop("spatPatObj needs to be the output from detectSpatialPatterns") } @@ -2685,16 +2813,21 @@ showPattern2D <- function(gobject, # annotate grid with PC values annotated_grid <- merge( - spatPatObj$spatial_grid, by.x = "gr_name", PC_DT, by.y = "loc_ID") + spatPatObj$spatial_grid, + by.x = "gr_name", PC_DT, by.y = "loc_ID" + ) # trim PC values if (!is.null(trim)) { boundaries <- stats::quantile(annotated_grid[[ - selected_PC]], probs = trim) + selected_PC + ]], probs = trim) annotated_grid[[selected_PC]][annotated_grid[[ - selected_PC]] < boundaries[1]] <- boundaries[1] + selected_PC + ]] < boundaries[1]] <- boundaries[1] annotated_grid[[selected_PC]][annotated_grid[[ - selected_PC]] > boundaries[2]] <- boundaries[2] + selected_PC + ]] > boundaries[2]] <- boundaries[2] } # 2D-plot @@ -2772,24 +2905,25 @@ showPattern <- function(gobject, spatPatObj, ...) { #' change save_name in save_param #' @returns plotly #' @export -showPattern3D <- function(gobject, - spatPatObj, - dimension = 1, - trim = c(0.02, 0.98), - background_color = "white", - grid_border_color = "grey", - show_legend = TRUE, - point_size = 1, - axis_scale = c("cube", "real", "custom"), - custom_ratio = NULL, - x_ticks = NULL, - y_ticks = NULL, - z_ticks = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "showPattern3D") { +showPattern3D <- function( + gobject, + spatPatObj, + dimension = 1, + trim = c(0.02, 0.98), + background_color = "white", + grid_border_color = "grey", + show_legend = TRUE, + point_size = 1, + axis_scale = c("cube", "real", "custom"), + custom_ratio = NULL, + x_ticks = NULL, + y_ticks = NULL, + z_ticks = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "showPattern3D") { # data.table variables center_x <- x_start <- x_end <- center_y <- y_start <- y_end <- center_z <- z_start <- z_end <- NULL @@ -2808,16 +2942,21 @@ showPattern3D <- function(gobject, # annotate grid with PC values annotated_grid <- merge( - spatPatObj$spatial_grid, by.x = "gr_name", PC_DT, by.y = "loc_ID") + spatPatObj$spatial_grid, + by.x = "gr_name", PC_DT, by.y = "loc_ID" + ) # trim PC values if (!is.null(trim)) { boundaries <- stats::quantile(annotated_grid[[ - selected_PC]], probs = trim) + selected_PC + ]], probs = trim) annotated_grid[[selected_PC]][annotated_grid[[ - selected_PC]] < boundaries[1]] <- boundaries[1] + selected_PC + ]] < boundaries[1]] <- boundaries[1] annotated_grid[[selected_PC]][annotated_grid[[ - selected_PC]] > boundaries[2]] <- boundaries[2] + selected_PC + ]] > boundaries[2]] <- boundaries[2] } @@ -2852,7 +2991,8 @@ showPattern3D <- function(gobject, ) )) dpl <- dpl %>% plotly::colorbar( - title = paste(paste("dim.", dimension, sep = ""), "genes", sep = " ")) + title = paste(paste("dim.", dimension, sep = ""), "genes", sep = " ") + ) # output plot return(GiottoVisuals::plot_output_handler( @@ -2890,18 +3030,19 @@ showPattern3D <- function(gobject, #' change save_name in save_param #' @returns ggplot #' @export -showPatternGenes <- function(gobject, - spatPatObj, - dimension = 1, - top_pos_genes = 5, - top_neg_genes = 5, - point_size = 1, - return_DT = FALSE, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "showPatternGenes") { +showPatternGenes <- function( + gobject, + spatPatObj, + dimension = 1, + top_pos_genes = 5, + top_neg_genes = 5, + point_size = 1, + return_DT = FALSE, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "showPatternGenes") { # data.table variables gene_ID <- NULL @@ -2921,11 +3062,14 @@ showPatternGenes <- function(gobject, # order and subset gene_cor_DT <- gene_cor_DT[ - !is.na(get(selected_PC))][order(get(selected_PC))] + !is.na(get(selected_PC)) + ][order(get(selected_PC))] subset <- gene_cor_DT[ - c(1:top_neg_genes, (nrow( - gene_cor_DT) - top_pos_genes):nrow(gene_cor_DT))] + c(seq_len(top_neg_genes), (nrow( + gene_cor_DT + ) - top_pos_genes):nrow(gene_cor_DT)) + ] subset[, gene_ID := factor(gene_ID, gene_ID)] ## return DT and make not plot ## @@ -2937,7 +3081,8 @@ showPatternGenes <- function(gobject, pl <- pl + ggplot2::theme_classic() pl <- pl + ggplot2::geom_point( data = subset, - aes_string(x = selected_PC, y = "gene_ID"), size = point_size) + aes_string(x = selected_PC, y = "gene_ID"), size = point_size + ) pl <- pl + ggplot2::geom_vline(xintercept = 0, linetype = 2) pl <- pl + ggplot2::labs(x = "correlation", y = "", title = selected_PC) pl <- pl + ggplot2::theme(plot.title = element_text(hjust = 0.5)) @@ -2970,13 +3115,14 @@ showPatternGenes <- function(gobject, #' @returns Data.table with genes associated with selected dimension (PC). #' @details Description. #' @export -selectPatternGenes <- function(spatPatObj, - dimensions = 1:5, - top_pos_genes = 10, - top_neg_genes = 10, - min_pos_cor = 0.5, - min_neg_cor = -0.5, - return_top_selection = FALSE) { +selectPatternGenes <- function( + spatPatObj, + dimensions = 1:5, + top_pos_genes = 10, + top_neg_genes = 10, + min_pos_cor = 0.5, + min_neg_cor = -0.5, + return_top_selection = FALSE) { if (!"spatPatObj" %in% class(spatPatObj)) { stop("spatPatObj needs to be the output from detectSpatialPatterns") } @@ -2996,11 +3142,15 @@ selectPatternGenes <- function(spatPatObj, # melt and select gene_cor_DT_m <- data.table::melt.data.table( - gene_cor_DT, id.vars = "gene_ID") + gene_cor_DT, + id.vars = "gene_ID" + ) gene_cor_DT_m[, top_pos_rank := rank(value), by = "variable"] gene_cor_DT_m[, top_neg_rank := rank(-value), by = "variable"] selection <- gene_cor_DT_m[ - top_pos_rank %in% 1:top_pos_genes | top_neg_rank %in% 1:top_neg_genes] + top_pos_rank %in% seq_len(top_pos_genes) | + top_neg_rank %in% seq_len(top_neg_genes) + ] # filter on min correlation selection <- selection[value > min_pos_cor | value < min_neg_cor] @@ -3017,9 +3167,11 @@ selectPatternGenes <- function(spatPatObj, # add other genes back output_selection <- uniq_selection[, .(gene_ID, variable)] other_genes <- gene_cor_DT[!gene_ID %in% output_selection$gene_ID][[ - "gene_ID"]] + "gene_ID" + ]] other_genes_DT <- data.table::data.table( - gene_ID = other_genes, variable = "noDim") + gene_ID = other_genes, variable = "noDim" + ) comb_output_genes <- rbind(output_selection, other_genes_DT) setnames(comb_output_genes, "variable", "patDim") @@ -3040,10 +3192,9 @@ selectPatternGenes <- function(spatPatObj, #' @title do_spatial_knn_smoothing #' @name do_spatial_knn_smoothing #' @description smooth gene expression over a kNN spatial network -#' @param gobject giotto object -#' @param expression_values gene expression values to use +#' @param expression_matrix gene expression values to use #' @param subset_feats subset of features to use -#' @param spatial_network_name name of spatial network to use +#' @param spatial_network spatial network to use #' @param b smoothing factor beteen 0 and 1 (default: automatic) #' @returns matrix with smoothened gene expression values based on kNN #' spatial network @@ -3053,10 +3204,11 @@ selectPatternGenes <- function(spatPatObj, #' number of k-neighbors in the selected spatial network. Setting b = 0 means #' no smoothing and b = 1 means no contribution from its own expression. #' @keywords internal -do_spatial_knn_smoothing <- function(expression_matrix, - spatial_network, - subset_feats = NULL, - b = NULL) { +do_spatial_knn_smoothing <- function( + expression_matrix, + spatial_network, + subset_feats = NULL, + b = NULL) { # checks if (!is.null(b)) { if (b > 1 | b < 0) { @@ -3084,7 +3236,9 @@ do_spatial_knn_smoothing <- function(expression_matrix, expr_values_dt <- data.table::as.data.table(as.matrix(expr_values)) expr_values_dt[, feat_ID := rownames(expr_values)] expr_values_dt_m <- data.table::melt.data.table( - expr_values_dt, id.vars = "feat_ID", variable.name = "cell_ID") + expr_values_dt, + id.vars = "feat_ID", variable.name = "cell_ID" + ) # merge spatial network and matrix @@ -3098,13 +3252,16 @@ do_spatial_knn_smoothing <- function(expression_matrix, # exclude 0's? # trimmed mean? spatial_network_ext_smooth <- spatial_network_ext[ - , mean(value), by = c("to", "feat_ID")] + , mean(value), + by = c("to", "feat_ID") + ] # convert back to matrix spatial_smooth_dc <- data.table::dcast.data.table( data = spatial_network_ext_smooth, formula = feat_ID ~ to, - value.var = "V1") + value.var = "V1" + ) spatial_smooth_matrix <- dt_to_matrix(spatial_smooth_dc) # if network was not fully connected, some cells might be missing and @@ -3117,11 +3274,13 @@ do_spatial_knn_smoothing <- function(expression_matrix, if (length(missing_cells) > 0) { missing_matrix <- expr_values[, missing_cells] spatial_smooth_matrix <- cbind(spatial_smooth_matrix[ - rownames(expr_values), ], missing_matrix) + rownames(expr_values), + ], missing_matrix) } spatial_smooth_matrix <- spatial_smooth_matrix[ - rownames(expr_values), colnames(expr_values)] + rownames(expr_values), colnames(expr_values) + ] # combine original and smoothed values according to smoothening b # create best guess for b if not given @@ -3169,11 +3328,12 @@ evaluate_provided_spatial_locations <- function(spatial_locs) { #' @description smooth gene expression over a defined spatial grid #' @returns matrix with smoothened gene expression values based on spatial grid #' @keywords internal -do_spatial_grid_averaging <- function(expression_matrix, - spatial_grid, - spatial_locs, - subset_feats = NULL, - min_cells_per_grid = 4) { +do_spatial_grid_averaging <- function( + expression_matrix, + spatial_grid, + spatial_locs, + subset_feats = NULL, + min_cells_per_grid = 4) { # matrix expr_values <- expression_matrix if (!is.null(subset_feats)) { @@ -3191,10 +3351,12 @@ do_spatial_grid_averaging <- function(expression_matrix, # annoate spatial locations with spatial grid if (all(c("sdimx", "sdimy", "sdimz") %in% colnames(spatial_locs))) { spatial_locs <- annotate_spatlocs_with_spatgrid_3D( - spatloc = spatial_locs, spatgrid = spatial_grid) + spatloc = spatial_locs, spatgrid = spatial_grid + ) } else if (all(c("sdimx", "sdimy") %in% colnames(spatial_locs))) { spatial_locs <- annotate_spatlocs_with_spatgrid_2D( - spatloc = spatial_locs, spatgrid = spatial_grid) + spatloc = spatial_locs, spatgrid = spatial_grid + ) } @@ -3210,210 +3372,84 @@ do_spatial_grid_averaging <- function(expression_matrix, loc_av_expr_list <- list() for (loc_name in loc_names) { loc_cell_IDs <- spatial_locs[gr_loc == loc_name]$cell_ID - subset_expr <- expr_values[, colnames(expr_values) %in% loc_cell_IDs] - if (is.vector(subset_expr) == TRUE) { - loc_av_expr <- subset_expr - } else { - loc_av_expr <- rowMeans(subset_expr) - } - loc_av_expr_list[[loc_name]] <- loc_av_expr - } - loc_av_expr_matrix <- do.call("cbind", loc_av_expr_list) - loc_av_expr_matrix <- as.matrix(loc_av_expr_matrix) - - return(loc_av_expr_matrix) -} - - - -#' @title detectSpatialCorFeatsMatrix -#' @name detectSpatialCorFeatsMatrix -#' @description Detect genes that are spatially correlated -#' @param expression_matrix provided expression matrix -#' @param method method to use for spatial averaging -#' @param spatial_network provided spatial network -#' @param spatial_grid provided spatial grid -#' @param spatial_locs provided spatial locations -#' @param subset_feats subset of features to use -#' @param network_smoothing smoothing factor beteen 0 and 1 -#' (has automatic default, see details) -#' @param min_cells_per_grid minimum number of cells to consider a grid -#' @param cor_method correlation method -#' @returns returns a spatial correlation object: `spatCorObject` -#' @details -#' For `method = network`, it expects a fully connected spatial network. -#' You can make sure to create a -#' fully connected network by setting minimal_k > 0 in the -#' \code{\link{createSpatialNetwork}} function. -#' 1. **grid-averaging:** average gene expression values within a predefined -#' spatial grid -#' 2. **network-averaging:** smoothens the gene expression matrix by -#' averaging the expression within one cell by using the neighbours within -#' the predefined spatial network. \eqn{b} is a smoothening factor passed by -#' `network_smoothing` param that defaults to \eqn{1 - 1/k}, where \eqn{k} -#' is the median number of k-neighbors in the selected spatial network. -#' Setting \eqn{b = 0} means no smoothing and \eqn{b = 1} means no -#' contribution from its own expression. -#' -#' The `spatCorObject` can be further explored with `showSpatialCorGenes()` -#' @seealso \code{\link{showSpatialCorFeats}} -#' @md -#' @examples -#' g <- GiottoData::loadGiottoMini("visium") -#' -#' detectSpatialCorFeatsMatrix(expression_matrix = getExpression( -#' g, output = "matrix"), method = "network", -#' spatial_network = getSpatialNetwork(g, output = "networkDT")) -#' @export -detectSpatialCorFeatsMatrix <- function(expression_matrix, - method = c("grid", "network"), - spatial_network, - spatial_grid, - spatial_locs, - subset_feats = NULL, - network_smoothing = NULL, - min_cells_per_grid = 4, - cor_method = c("pearson", "kendall", "spearman")) { - ## correlation method to be used - cor_method <- match.arg( - cor_method, choices = c("pearson", "kendall", "spearman")) - - ## method to be used - method <- match.arg(method, choices = c("grid", "network")) - - ## spatial averaging or smoothing - if (method == "grid") { - loc_av_expr_matrix <- do_spatial_grid_averaging( - expression_matrix = as.matrix(expression_matrix), - spatial_grid = spatial_grid, - spatial_locs = spatial_locs, - subset_feats = subset_feats, - min_cells_per_grid = min_cells_per_grid - ) - - # data.table variables - feat_ID <- variable <- NULL - - cor_spat_matrix <- cor_flex(t_flex( - as.matrix(loc_av_expr_matrix)), method = cor_method) - cor_spat_matrixDT <- data.table::as.data.table(cor_spat_matrix) - cor_spat_matrixDT[, feat_ID := rownames(cor_spat_matrix)] - cor_spat_DT <- data.table::melt.data.table( - data = cor_spat_matrixDT, - id.vars = "feat_ID", value.name = "spat_cor" - ) - } - - if (method == "network") { - knn_av_expr_matrix <- do_spatial_knn_smoothing( - expression_matrix = as.matrix(expression_matrix), - spatial_network = spatial_network, - subset_feats = subset_feats, - b = network_smoothing - ) - - - - cor_spat_matrix <- cor_flex(t_flex(as.matrix( - knn_av_expr_matrix)), method = cor_method) - cor_spat_matrixDT <- data.table::as.data.table(cor_spat_matrix) - cor_spat_matrixDT[, feat_ID := rownames(cor_spat_matrix)] - cor_spat_DT <- data.table::melt.data.table( - data = cor_spat_matrixDT, - id.vars = "feat_ID", value.name = "spat_cor" - ) - } - - - - # data.table variables - cordiff <- spat_cor <- expr_cor <- spatrank <- exprrank <- rankdiff <- NULL - - ## 2. perform expression correlation at single-cell level without - ## spatial information - - # matrix - expr_values <- expression_matrix - if (!is.null(subset_feats)) { - expr_values <- expr_values[rownames(expr_values) %in% subset_feats, ] - } - - cor_matrix <- cor_flex(t_flex(expr_values), method = cor_method) - cor_matrixDT <- data.table::as.data.table(cor_matrix) - cor_matrixDT[, feat_ID := rownames(cor_matrix)] - cor_DT <- data.table::melt.data.table( - data = cor_matrixDT, - id.vars = "feat_ID", value.name = "expr_cor" - ) - - ## 3. merge spatial and expression correlation - data.table::setorder(cor_spat_DT, feat_ID, variable) - data.table::setorder(cor_DT, feat_ID, variable) - doubleDT <- cbind(cor_spat_DT, expr_cor = cor_DT[["expr_cor"]]) - - # difference in correlation scores - doubleDT[, cordiff := spat_cor - expr_cor] - - # difference in rank scores - doubleDT[, spatrank := data.table::frank( - -spat_cor, ties.method = "first"), by = feat_ID] - doubleDT[, exprrank := data.table::frank( - -expr_cor, ties.method = "first"), by = feat_ID] - doubleDT[, rankdiff := spatrank - exprrank] - - # sort data - data.table::setorder(doubleDT, feat_ID, -spat_cor) - - spatCorObject <- list( - cor_DT = doubleDT, - feat_order = rownames(cor_spat_matrix), - cor_hclust = list(), - cor_clusters = list() - ) - - class(spatCorObject) <- append(class(spatCorObject), "spatCorObject") + subset_expr <- expr_values[, colnames(expr_values) %in% loc_cell_IDs] + if (is.vector(subset_expr) == TRUE) { + loc_av_expr <- subset_expr + } else { + loc_av_expr <- rowMeans(subset_expr) + } + loc_av_expr_list[[loc_name]] <- loc_av_expr + } + loc_av_expr_matrix <- do.call("cbind", loc_av_expr_list) + loc_av_expr_matrix <- as.matrix(loc_av_expr_matrix) - return(spatCorObject) + return(loc_av_expr_matrix) } -#' @title detectSpatialCorFeats +#' @title Detect spatially correlated features #' @name detectSpatialCorFeats -#' @description Detect features that are spatially correlated +#' @description Detect features that are spatially correlated. Functions for +#' starting from either a gobject (`detectSpatialCorFeats()`) or individual +#' pieces of data (`detectSpatialCorFeatsMatrix()`) are provided. #' @param gobject giotto object #' @param spat_unit spatial unit #' @param feat_type feature type -#' @param spat_loc_name name for spatial locations -#' @param method method to use for spatial averaging #' @param expression_values gene expression values to use -#' @param subset_feats subset of feats to use +#' @param expression_matrix provided expression matrix +#' @param spat_loc_name name for spatial locations +#' @param spatial_locs provided spatial locations #' @param spatial_network_name name of spatial network to use -#' @param network_smoothing smoothing factor beteen 0 and 1 -#' (default: automatic) +#' @param spatial_network provided spatial network #' @param spatial_grid_name name of spatial grid to use +#' @param spatial_grid provided spatial grid +#' @param method method to use for spatial averaging +#' @param subset_feats subset of features to use +#' @param network_smoothing smoothing factor between 0 and 1 +#' (has automatic default, see details) #' @param min_cells_per_grid minimum number of cells to consider a grid #' @param cor_method correlation method -#' @returns returns a spatial correlation object: "spatCorObject" +#' @returns returns a spatial correlation object: `spatCorObject` #' @details -#' For method = network, it expects a fully connected spatial network. You -#' can make sure to create a +#' For `method = network`, it expects a fully connected spatial network. +#' You can make sure to create a #' fully connected network by setting minimal_k > 0 in the -#' \code{\link{createSpatialNetwork}} function. -#' \itemize{ -#' \item{1. grid-averaging: }{average gene expression values within a predefined spatial grid} -#' \item{2. network-averaging: }{smoothens the gene expression matrix by averaging the expression within one cell -#' by using the neighbours within the predefined spatial network. b is a smoothening factor -#' that defaults to 1 - 1/k, where k is the median number of k-neighbors in the -#' selected spatial network. Setting b = 0 means no smoothing and b = 1 means no contribution -#' from its own expression.} -#' } -#' The spatCorObject can be further explored with showSpatialCorFeats() +#' \code{\link{createSpatialNetwork}} function. +#' 1. **grid-averaging:** average gene expression values within a predefined +#' spatial grid +#' 2. **network-averaging:** smoothens the gene expression matrix by +#' averaging the expression within one cell by using the neighbours within +#' the predefined spatial network. \eqn{b} is a smoothening factor passed by +#' `network_smoothing` param that defaults to \eqn{1 - 1/k}, where \eqn{k} +#' is the median number of k-neighbors in the selected spatial network. +#' Setting \eqn{b = 0} means no smoothing and \eqn{b = 1} means no +#' contribution from its own expression. +#' +#' The `spatCorObject` can be further explored with `showSpatialCorFeats()` #' @seealso \code{\link{showSpatialCorFeats}} +#' @md #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' +#' # Perform with data in a gobject #' detectSpatialCorFeats(g, method = "network") +#' +#' # This analysis can also be performed with data outside of the gobject +#' detectSpatialCorFeatsMatrix( +#' expression_matrix = getExpression( +#' g, +#' output = "matrix" +#' ), +#' method = "network", +#' spatial_network = getSpatialNetwork(g, output = "networkDT") +#' ) +#' +NULL + + + +#' @rdname detectSpatialCorFeats #' @export detectSpatialCorFeats <- function(gobject, spat_unit = NULL, @@ -3440,7 +3476,9 @@ detectSpatialCorFeats <- function(gobject, ## correlation method to be used cor_method <- match.arg( - cor_method, choices = c("pearson", "kendall", "spearman")) + cor_method, + choices = c("pearson", "kendall", "spearman") + ) ## method to be used method <- match.arg(method, choices = c("grid", "network")) @@ -3448,7 +3486,8 @@ detectSpatialCorFeats <- function(gobject, # get expression matrix values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -3464,7 +3503,8 @@ detectSpatialCorFeats <- function(gobject, # get spatial locations - spatial_locs <- getSpatialLocations(gobject, + spatial_locs <- getSpatialLocations( + gobject, spat_unit = spat_unit, name = spat_loc_name, output = "data.table", @@ -3494,7 +3534,8 @@ detectSpatialCorFeats <- function(gobject, feat_ID <- variable <- NULL cor_spat_matrix <- cor_flex(t_flex(as.matrix( - loc_av_expr_matrix)), method = cor_method) + loc_av_expr_matrix + )), method = cor_method) cor_spat_matrixDT <- data.table::as.data.table(cor_spat_matrix) cor_spat_matrixDT[, feat_ID := rownames(cor_spat_matrix)] cor_spat_DT <- data.table::melt.data.table( @@ -3523,7 +3564,8 @@ detectSpatialCorFeats <- function(gobject, cor_spat_matrix <- cor_flex(t_flex(as.matrix( - knn_av_expr_matrix)), method = cor_method) + knn_av_expr_matrix + )), method = cor_method) cor_spat_matrixDT <- data.table::as.data.table(cor_spat_matrix) cor_spat_matrixDT[, feat_ID := rownames(cor_spat_matrix)] cor_spat_DT <- data.table::melt.data.table( @@ -3557,9 +3599,13 @@ detectSpatialCorFeats <- function(gobject, # difference in rank scores doubleDT[, spatrank := frank( - -spat_cor, ties.method = "first"), by = feat_ID] + -spat_cor, + ties.method = "first" + ), by = feat_ID] doubleDT[, exprrank := frank( - -expr_cor, ties.method = "first"), by = feat_ID] + -expr_cor, + ties.method = "first" + ), by = feat_ID] doubleDT[, rankdiff := spatrank - exprrank] # sort data @@ -3578,82 +3624,133 @@ detectSpatialCorFeats <- function(gobject, } - -#' @title detectSpatialCorGenes -#' @name detectSpatialCorGenes -#' @description Detect genes that are spatially correlated -#' @param gobject giotto object -#' @param feat_type feature type -#' @param spat_unit spatial unit -#' @param method method to use for spatial averaging -#' @param expression_values gene expression values to use -#' @param subset_feats subset of feats to use -#' @param subset_genes deprecated, use \code{subset_feats} -#' @param spatial_network_name name of spatial network to use -#' @param network_smoothing smoothing factor beteen 0 and 1 -#' (default: automatic) -#' @param spatial_grid_name name of spatial grid to use -#' @param min_cells_per_grid minimum number of cells to consider a grid -#' @param cor_method correlation method -#' @returns returns a spatial correlation object: "spatCorObject" -#' @details -#' For method = network, it expects a fully connected spatial network. You -#' can make sure to create a -#' fully connected network by setting minimal_k > 0 in the -#' \code{\link{createSpatialNetwork}} function. -#' \itemize{ -#' \item{1. grid-averaging: }{average gene expression values within a -#' predefined spatial grid} -#' \item{2. network-averaging: }{smoothens the gene expression matrix by -#' averaging the expression within one cell -#' by using the neighbours within the predefined spatial network. b is a -#' smoothening factor that defaults to 1 - 1/k, where k is the median -#' number of k-neighbors in the selected spatial network. Setting b = 0 -#' means no smoothing and b = 1 means no contribution -#' from its own expression.} -#' } -#' The spatCorObject can be further explored with showSpatialCorGenes() -#' @seealso \code{\link{showSpatialCorGenes}} +#' @rdname detectSpatialCorFeats #' @export -detectSpatialCorGenes <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - method = c("grid", "network"), - expression_values = c("normalized", "scaled", "custom"), - subset_feats = NULL, - subset_genes = NULL, - spatial_network_name = "Delaunay_network", - network_smoothing = NULL, - spatial_grid_name = "spatial_grid", - min_cells_per_grid = 4, - cor_method = c("pearson", "kendall", "spearman")) { - ## deprecated arguments - if (!is.null(subset_genes)) { - subset_feats <- subset_genes - warning("subset_genes is deprecated, use subset_feats in the future") +detectSpatialCorFeatsMatrix <- function( + expression_matrix, + method = c("grid", "network"), + spatial_network, + spatial_grid, + spatial_locs, + subset_feats = NULL, + network_smoothing = NULL, + min_cells_per_grid = 4, + cor_method = c("pearson", "kendall", "spearman")) { + ## correlation method to be used + cor_method <- match.arg( + cor_method, + choices = c("pearson", "kendall", "spearman") + ) + + ## method to be used + method <- match.arg(method, choices = c("grid", "network")) + + ## spatial averaging or smoothing + if (method == "grid") { + loc_av_expr_matrix <- do_spatial_grid_averaging( + expression_matrix = as.matrix(expression_matrix), + spatial_grid = spatial_grid, + spatial_locs = spatial_locs, + subset_feats = subset_feats, + min_cells_per_grid = min_cells_per_grid + ) + + # data.table variables + feat_ID <- variable <- NULL + + cor_spat_matrix <- cor_flex(t_flex( + as.matrix(loc_av_expr_matrix) + ), method = cor_method) + cor_spat_matrixDT <- data.table::as.data.table(cor_spat_matrix) + cor_spat_matrixDT[, feat_ID := rownames(cor_spat_matrix)] + cor_spat_DT <- data.table::melt.data.table( + data = cor_spat_matrixDT, + id.vars = "feat_ID", value.name = "spat_cor" + ) } - warning("Deprecated and replaced by detectSpatialCorFeats") + if (method == "network") { + knn_av_expr_matrix <- do_spatial_knn_smoothing( + expression_matrix = as.matrix(expression_matrix), + spatial_network = spatial_network, + subset_feats = subset_feats, + b = network_smoothing + ) + - detectSpatialCorFeats( - gobject = gobject, - feat_type = feat_type, - spat_unit = spat_unit, - method = method, - expression_values = expression_values, - subset_feats = subset_feats, - spatial_network_name = spatial_network_name, - network_smoothing = network_smoothing, - spatial_grid_name = spatial_grid_name, - min_cells_per_grid = min_cells_per_grid, - cor_method = cor_method + + cor_spat_matrix <- cor_flex(t_flex(as.matrix( + knn_av_expr_matrix + )), method = cor_method) + cor_spat_matrixDT <- data.table::as.data.table(cor_spat_matrix) + cor_spat_matrixDT[, feat_ID := rownames(cor_spat_matrix)] + cor_spat_DT <- data.table::melt.data.table( + data = cor_spat_matrixDT, + id.vars = "feat_ID", value.name = "spat_cor" + ) + } + + + + # data.table variables + cordiff <- spat_cor <- expr_cor <- spatrank <- exprrank <- rankdiff <- NULL + + ## 2. perform expression correlation at single-cell level without + ## spatial information + + # matrix + expr_values <- expression_matrix + if (!is.null(subset_feats)) { + expr_values <- expr_values[rownames(expr_values) %in% subset_feats, ] + } + + cor_matrix <- cor_flex(t_flex(expr_values), method = cor_method) + cor_matrixDT <- data.table::as.data.table(cor_matrix) + cor_matrixDT[, feat_ID := rownames(cor_matrix)] + cor_DT <- data.table::melt.data.table( + data = cor_matrixDT, + id.vars = "feat_ID", value.name = "expr_cor" + ) + + ## 3. merge spatial and expression correlation + data.table::setorder(cor_spat_DT, feat_ID, variable) + data.table::setorder(cor_DT, feat_ID, variable) + doubleDT <- cbind(cor_spat_DT, expr_cor = cor_DT[["expr_cor"]]) + + # difference in correlation scores + doubleDT[, cordiff := spat_cor - expr_cor] + + # difference in rank scores + doubleDT[, spatrank := data.table::frank( + -spat_cor, + ties.method = "first" + ), by = feat_ID] + doubleDT[, exprrank := data.table::frank( + -expr_cor, + ties.method = "first" + ), by = feat_ID] + doubleDT[, rankdiff := spatrank - exprrank] + + # sort data + data.table::setorder(doubleDT, feat_ID, -spat_cor) + + spatCorObject <- list( + cor_DT = doubleDT, + feat_order = rownames(cor_spat_matrix), + cor_hclust = list(), + cor_clusters = list() ) + + class(spatCorObject) <- append(class(spatCorObject), "spatCorObject") + + return(spatCorObject) } + #' @title showSpatialCorFeats #' @name showSpatialCorFeats #' @description Shows and filters spatially correlated features @@ -3670,15 +3767,16 @@ detectSpatialCorGenes <- function(gobject, #' @param show_top_feats show top features per gene #' @returns data.table with filtered information #' @export -showSpatialCorFeats <- function(spatCorObject, - use_clus_name = NULL, - selected_clusters = NULL, - feats = NULL, - min_spat_cor = 0.5, - min_expr_cor = NULL, - min_cor_diff = NULL, - min_rank_diff = NULL, - show_top_feats = NULL) { +showSpatialCorFeats <- function( + spatCorObject, + use_clus_name = NULL, + selected_clusters = NULL, + feats = NULL, + min_spat_cor = 0.5, + min_expr_cor = NULL, + min_cor_diff = NULL, + min_rank_diff = NULL, + show_top_feats = NULL) { # data.table variables clus <- feat_ID <- spat_cor <- cor_diff <- rankdiff <- NULL @@ -3696,9 +3794,12 @@ showSpatialCorFeats <- function(spatCorObject, clusters <- clusters_part names_clusters <- names(clusters_part) clusters_DT <- data.table::data.table( - "feat_ID" = names_clusters, "clus" = clusters) + "feat_ID" = names_clusters, "clus" = clusters + ) filter_DT <- data.table::merge.data.table( - filter_DT, clusters_DT, by = "feat_ID") + filter_DT, clusters_DT, + by = "feat_ID" + ) } ## 0. subset clusters @@ -3758,15 +3859,16 @@ showSpatialCorFeats <- function(spatCorObject, #' @param show_top_genes show top genes per gene #' @returns data.table with filtered information #' @export -showSpatialCorGenes <- function(spatCorObject, - use_clus_name = NULL, - selected_clusters = NULL, - genes = NULL, - min_spat_cor = 0.5, - min_expr_cor = NULL, - min_cor_diff = NULL, - min_rank_diff = NULL, - show_top_genes = NULL) { +showSpatialCorGenes <- function( + spatCorObject, + use_clus_name = NULL, + selected_clusters = NULL, + genes = NULL, + min_spat_cor = 0.5, + min_expr_cor = NULL, + min_cor_diff = NULL, + min_rank_diff = NULL, + show_top_genes = NULL) { warning("Deprecated and replaced by showSpatialCorFeats") showSpatialCorFeats( @@ -3801,13 +3903,16 @@ showSpatialCorGenes <- function(spatCorObject, #' g <- GiottoData::loadGiottoMini("visium") #' #' clusterSpatialCorFeats(spatCorObject = detectSpatialCorFeats( -#' g, method = "network")) +#' g, +#' method = "network" +#' )) #' @export -clusterSpatialCorFeats <- function(spatCorObject, - name = "spat_clus", - hclust_method = "ward.D", - k = 10, - return_obj = TRUE) { +clusterSpatialCorFeats <- function( + spatCorObject, + name = "spat_clus", + hclust_method = "ward.D", + k = 10, + return_obj = TRUE) { # check input if (!"spatCorObject" %in% class(spatCorObject)) { stop("spatCorObject needs to be the output from @@ -3817,7 +3922,9 @@ clusterSpatialCorFeats <- function(spatCorObject, # create correlation matrix cor_DT <- spatCorObject[["cor_DT"]] cor_DT_dc <- data.table::dcast.data.table( - cor_DT, formula = feat_ID ~ variable, value.var = "spat_cor") + cor_DT, + formula = feat_ID ~ variable, value.var = "spat_cor" + ) cor_matrix <- dt_to_matrix(cor_DT_dc) # re-ordering matrix @@ -3853,11 +3960,12 @@ clusterSpatialCorFeats <- function(spatCorObject, #' @param return_obj return spatial correlation object (spatCorObject) #' @returns spatCorObject or cluster results #' @export -clusterSpatialCorGenes <- function(spatCorObject, - name = "spat_clus", - hclust_method = "ward.D", - k = 10, - return_obj = TRUE) { +clusterSpatialCorGenes <- function( + spatCorObject, + name = "spat_clus", + hclust_method = "ward.D", + k = 10, + return_obj = TRUE) { warning("Deprecated and replaced by clusterSpatialCorFeats") clusterSpatialCorFeats( @@ -3896,20 +4004,21 @@ clusterSpatialCorGenes <- function(spatCorObject, #' \code{\link[ComplexHeatmap]{Heatmap}} function from ComplexHeatmap #' @returns Heatmap generated by ComplexHeatmap #' @export -heatmSpatialCorFeats <- function(gobject, - spatCorObject, - use_clus_name = NULL, - show_cluster_annot = TRUE, - show_row_dend = TRUE, - show_column_dend = FALSE, - show_row_names = FALSE, - show_column_names = FALSE, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "heatmSpatialCorFeats", - ...) { +heatmSpatialCorFeats <- function( + gobject, + spatCorObject, + use_clus_name = NULL, + show_cluster_annot = TRUE, + show_row_dend = TRUE, + show_column_dend = FALSE, + show_row_names = FALSE, + show_column_names = FALSE, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "heatmSpatialCorFeats", + ...) { ## check input if (!"spatCorObject" %in% class(spatCorObject)) { stop("spatCorObject needs to be the output from @@ -3922,7 +4031,9 @@ heatmSpatialCorFeats <- function(gobject, ## create correlation matrix cor_DT <- spatCorObject[["cor_DT"]] cor_DT_dc <- data.table::dcast.data.table( - cor_DT, formula = feat_ID ~ variable, value.var = "spat_cor") + cor_DT, + formula = feat_ID ~ variable, value.var = "spat_cor" + ) cor_matrix <- dt_to_matrix(cor_DT_dc) # re-ordering matrix @@ -4030,18 +4141,21 @@ heatmSpatialCorGenes <- function(...) { #' spatCorObject <- detectSpatialCorFeats(g, method = "network") #' clusters <- clusterSpatialCorFeats(spatCorObject = spatCorObject) #' -#' rankSpatialCorGroups(gobject = g, spatCorObject = clusters, -#' use_clus_name = "spat_clus") +#' rankSpatialCorGroups( +#' gobject = g, spatCorObject = clusters, +#' use_clus_name = "spat_clus" +#' ) #' @md #' @export -rankSpatialCorGroups <- function(gobject, - spatCorObject, - use_clus_name = NULL, - show_plot = NULL, - return_plot = FALSE, - save_plot = NULL, - save_param = list(), - default_save_name = "rankSpatialCorGroups") { +rankSpatialCorGroups <- function( + gobject, + spatCorObject, + use_clus_name = NULL, + show_plot = NULL, + return_plot = FALSE, + save_plot = NULL, + save_param = list(), + default_save_name = "rankSpatialCorGroups") { ## check input if (!"spatCorObject" %in% class(spatCorObject)) { stop("spatCorObject needs to be the output from @@ -4061,7 +4175,9 @@ rankSpatialCorGroups <- function(gobject, ## create correlation matrix cor_DT <- spatCorObject[["cor_DT"]] cor_DT_dc <- data.table::dcast.data.table( - cor_DT, formula = feat_ID ~ variable, value.var = "spat_cor") + cor_DT, + formula = feat_ID ~ variable, value.var = "spat_cor" + ) cor_matrix <- dt_to_matrix(cor_DT_dc) # re-ordering matrix @@ -4081,13 +4197,15 @@ rankSpatialCorGroups <- function(gobject, sub_cor_matrix <- cor_matrix[ rownames(cor_matrix) %in% selected_feats, - colnames(cor_matrix) %in% selected_feats] + colnames(cor_matrix) %in% selected_feats + ] mean_score <- mean_flex(sub_cor_matrix) res_cor_list[[id]] <- mean_score sub_neg_cor_matrix <- cor_matrix[ rownames(cor_matrix) %in% selected_feats, - !colnames(cor_matrix) %in% selected_feats] + !colnames(cor_matrix) %in% selected_feats + ] mean_neg_score <- mean_flex(sub_neg_cor_matrix) res_neg_cor_list[[id]] <- mean_neg_score } @@ -4134,7 +4252,7 @@ rankSpatialCorGroups <- function(gobject, -#' @title getBalancedSpatCoexpressionFeats +#' @title Get balanced spatial coexpression features #' @name getBalancedSpatCoexpressionFeats #' @description Extract features from spatial co-expression modules in a #' balanced manner @@ -4148,18 +4266,21 @@ rankSpatialCorGroups <- function(gobject, #' @returns balanced vector with features for each co-expression module #' @details There are 3 different ways of selecting features from the spatial #' co-expression modules -#' \itemize{ -#' \item{1. weighted: }{Features are ranked based on summarized pairwise co-expression scores} -#' \item{2. random: }{A random selection of features, set seed for reproducibility} -#' \item{3. informed: }{Features are selected based on prior information/ranking} -#' } +#' 1. **weighted:** Features are ranked based on summarized pairwise +#' co-expression scores +#' 2. **random:** A random selection of features, set seed for +#' reproducibility +#' 3. **informed:** Features are selected based on prior information/ranking +#' +#' @md #' @export -getBalancedSpatCoexpressionFeats <- function(spatCorObject, - maximum = 50, - rank = c("weighted", "random", "informed"), - informed_ranking = NULL, - seed = NA, - verbose = TRUE) { +getBalancedSpatCoexpressionFeats <- function( + spatCorObject, + maximum = 50, + rank = c("weighted", "random", "informed"), + informed_ranking = NULL, + seed = NA, + verbose = TRUE) { # data.table vars feat_ID <- variable <- combo <- spat_cor <- rnk <- feat_id <- V1 <- NULL @@ -4188,7 +4309,8 @@ getBalancedSpatCoexpressionFeats <- function(spatCorObject, wrap_msg( "There are only ", feat_length, " features for cluster ", clus, "\n", - "Maximum will be set to ", feat_length) + "Maximum will be set to ", feat_length + ) } else { maximum_to_use <- maximum } @@ -4219,7 +4341,8 @@ getBalancedSpatCoexpressionFeats <- function(spatCorObject, selected_cluster_features <- names(clusters[clusters == clus]) subset_cor_data <- cor_data[ feat_ID %in% selected_cluster_features & - variable %in% selected_cluster_features] + variable %in% selected_cluster_features + ] subset_cor_data <- subset_cor_data[feat_ID != variable] subset_cor_data <- dt_sort_combine_two_columns( DT = subset_cor_data, @@ -4232,10 +4355,12 @@ getBalancedSpatCoexpressionFeats <- function(spatCorObject, # create a ranked data.table rnk1DT <- data.table::data.table( feat_id = subset_cor_data$feat_ID, - rnk = seq_along(subset_cor_data$feat_ID)) + rnk = seq_along(subset_cor_data$feat_ID) + ) rnk2DT <- data.table::data.table( feat_id = subset_cor_data$variable, - rnk = seq_along(subset_cor_data$variable)) + rnk = seq_along(subset_cor_data$variable) + ) rnkDT <- data.table::rbindlist(list(rnk1DT, rnk2DT)) data.table::setorder(rnkDT, rnk) @@ -4249,12 +4374,13 @@ getBalancedSpatCoexpressionFeats <- function(spatCorObject, wrap_msg( "There are only ", feat_length, " features for cluster ", clus, "\n", - "Maximum will be set to ", feat_length) + "Maximum will be set to ", feat_length + ) } else { maximum_to_use <- maximum } - selected_feats <- rnkcombined[1:maximum_to_use][["feat_id"]] + selected_feats <- rnkcombined[seq_len(maximum_to_use)][["feat_id"]] clus_id <- rep(clus, length(selected_feats)) names(clus_id) <- selected_feats @@ -4287,7 +4413,8 @@ getBalancedSpatCoexpressionFeats <- function(spatCorObject, wrap_msg( "There are only ", feat_length, " features for cluster ", clus, "\n", - "Maximum will be set to ", feat_length) + "Maximum will be set to ", feat_length + ) } else { maximum_to_use <- maximum } @@ -4295,7 +4422,8 @@ getBalancedSpatCoexpressionFeats <- function(spatCorObject, informed_subset <- informed_ranking_numerical[ names(informed_ranking_numerical) %in% - selected_cluster_features] + selected_cluster_features + ] informed_subset <- sort(informed_subset) feat_length <- length(informed_subset) @@ -4304,12 +4432,13 @@ getBalancedSpatCoexpressionFeats <- function(spatCorObject, wrap_msg( "There are only ", feat_length, " features for cluster ", clus, "\n", - "Maximum will be set to ", feat_length) + "Maximum will be set to ", feat_length + ) } else { maximum_to_use <- maximum } - selected_feats <- names(informed_subset[1:maximum_to_use]) + selected_feats <- names(informed_subset[seq_len(maximum_to_use)]) clus_id <- rep(clus, length(selected_feats)) names(clus_id) <- selected_feats @@ -4345,25 +4474,30 @@ getBalancedSpatCoexpressionFeats <- function(spatCorObject, #' @param show_pattern show the discrete spatial pattern #' @param pattern_colors 2 color vector for the spatial pattern #' @param normalization_params additional parameters for (re-)normalizing -#' @returns Reprocessed Giotto object for which one gene has a forced +#' @returns Reprocessed Giotto object for which one gene has a forced #' spatial pattern #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' -#' simulateOneGenePatternGiottoObject(gobject = g, -#' pattern_cell_ids = c("AAAGGGATGTAGCAAG-1", "TCAAACAACCGCGTCG-1", -#' "ACGATCATACATAGAG-1", "TATGCTCCCTACTTAC-1"), -#' gene_name = "Gna12") +#' +#' simulateOneGenePatternGiottoObject( +#' gobject = g, +#' pattern_cell_ids = c( +#' "AAAGGGATGTAGCAAG-1", "TCAAACAACCGCGTCG-1", +#' "ACGATCATACATAGAG-1", "TATGCTCCCTACTTAC-1" +#' ), +#' gene_name = "Gna12" +#' ) #' @export -simulateOneGenePatternGiottoObject <- function(gobject, - pattern_name = "pattern", - pattern_cell_ids = NULL, - gene_name = NULL, - spatial_prob = 0.95, - gradient_direction = NULL, - show_pattern = TRUE, - pattern_colors = c("in" = "green", "out" = "red"), - normalization_params = list()) { +simulateOneGenePatternGiottoObject <- function( + gobject, + pattern_name = "pattern", + pattern_cell_ids = NULL, + gene_name = NULL, + spatial_prob = 0.95, + gradient_direction = NULL, + show_pattern = TRUE, + pattern_colors = c("in" = "green", "out" = "red"), + normalization_params = list()) { # data.table variables cell_ID <- sdimx_y <- sdimx <- sdimy <- NULL @@ -4374,7 +4508,8 @@ simulateOneGenePatternGiottoObject <- function(gobject, ## create and add annotation for pattern cell_meta <- pDataDT(gobject) cell_meta[, (pattern_name) := ifelse( - cell_ID %in% pattern_cell_ids, "in", "out")] + cell_ID %in% pattern_cell_ids, "in", "out" + )] newgobject <- addCellMetadata( gobject, @@ -4403,24 +4538,30 @@ simulateOneGenePatternGiottoObject <- function(gobject, copy_obj = TRUE ) cell_meta <- data.table::merge.data.table( - cell_meta, cell_coord, by = "cell_ID") + cell_meta, cell_coord, + by = "cell_ID" + ) ## get number of cells within pattern cell_number <- nrow(cell_meta[get(pattern_name) == "in"]) ## normalized expression - #expr_data <- newgobject@norm_expr - expr_data <- getExpression(gobject = newgobject, - values = "normalized", - output = "matrix") + # expr_data <- newgobject@norm_expr + expr_data <- getExpression( + gobject = newgobject, + values = "normalized", + output = "matrix" + ) result_list <- list() ## raw expression - #raw_expr_data <- newgobject@raw_exprs - raw_expr_data <- getExpression(gobject = newgobject, - values = "raw", - output = "matrix") + # raw_expr_data <- newgobject@raw_exprs + raw_expr_data <- getExpression( + gobject = newgobject, + values = "raw", + output = "matrix" + ) raw_result_list <- list() @@ -4448,15 +4589,19 @@ simulateOneGenePatternGiottoObject <- function(gobject, outside_prob <- 1 - spatial_prob prob_vector <- c( rep(spatial_prob, cell_number), - rep(outside_prob, remaining_cell_number)) + rep(outside_prob, remaining_cell_number) + ) # first get the 'in' pattern sample values randomly sample_values <- sample( - sort_expr_gene, replace = FALSE, size = cell_number, prob = prob_vector) + sort_expr_gene, + replace = FALSE, size = cell_number, prob = prob_vector + ) # then take the remaining 'out' pattern values randomly remain_values <- sort_expr_gene[ - !names(sort_expr_gene) %in% names(sample_values)] + !names(sort_expr_gene) %in% names(sample_values) + ] remain_values <- sample(remain_values, size = length(remain_values)) @@ -4506,19 +4651,23 @@ simulateOneGenePatternGiottoObject <- function(gobject, # change the original matrices raw_expr_data[rownames(raw_expr_data) == gene_name, ] <- new_sim_raw_values - #newgobject@raw_exprs <- raw_expr_data - newgobject <- setExpression(gobject = newgobject, - x = createExprObj( - expression_data = raw_expr_data, - name = "raw"), - name = "raw", - provenance = prov(getCellMetadata(newgobject))) + # newgobject@raw_exprs <- raw_expr_data + newgobject <- setExpression( + gobject = newgobject, + x = createExprObj( + expression_data = raw_expr_data, + name = "raw" + ), + name = "raw", + provenance = prov(getCellMetadata(newgobject)) + ) # recalculate normalized values newgobject <- do.call( - "normalizeGiotto", - args = c(gobject = newgobject, normalization_params)) - + "normalizeGiotto", + args = c(gobject = newgobject, normalization_params) + ) + newgobject <- addStatistics(gobject = newgobject) return(newgobject) @@ -4534,28 +4683,33 @@ simulateOneGenePatternGiottoObject <- function(gobject, #' @description runs all spatial tests for 1 probability and 1 rep #' @returns data.table #' @keywords internal -run_spatial_sim_tests_one_rep <- function(gobject, - pattern_name = "pattern", - pattern_cell_ids = NULL, - gene_name = NULL, - spatial_prob = 0.95, - show_pattern = FALSE, - spatial_network_name = "kNN_network", - spat_methods = c("binSpect_single", "binSpect_multi", "spatialDE", - "spark", "silhouetteRank"), - spat_methods_params = list(NA, NA, NA, NA, NA), - spat_methods_names = c("binSpect_single", "binSpect_multi", "spatialDE", - "spark", "silhouetteRank"), - save_plot = FALSE, - save_raw = FALSE, - save_norm = FALSE, - save_dir = "~", - save_name = "plot", - run_simulations = TRUE, - ...) { +run_spatial_sim_tests_one_rep <- function( + gobject, + pattern_name = "pattern", + pattern_cell_ids = NULL, + gene_name = NULL, + spatial_prob = 0.95, + show_pattern = FALSE, + spatial_network_name = "kNN_network", + spat_methods = c( + "binSpect_single", "binSpect_multi", "spatialDE", + "spark", "silhouetteRank" + ), + spat_methods_params = list(NA, NA, NA, NA, NA), + spat_methods_names = c( + "binSpect_single", "binSpect_multi", "spatialDE", + "spark", "silhouetteRank" + ), + save_plot = FALSE, + save_raw = FALSE, + save_norm = FALSE, + save_dir = "~", + save_name = "plot", + run_simulations = TRUE, + ...) { # data.table variables genes <- prob <- time <- adj.p.value <- method <- p.val <- sd <- - qval <- pval <- g <- adjusted_pvalue <- NULL + qval <- pval <- g <- adjusted_pvalue <- feats <- NULL ## test if spat_methods, params and names have the same length if (length(spat_methods) != length(spat_methods_params)) { @@ -4582,7 +4736,7 @@ run_spatial_sim_tests_one_rep <- function(gobject, # save plot if (save_plot == TRUE) { spatFeatPlot2D(simulate_patch, - expression_values = "normalized", + expression_values = "normalized", feats = gene_name, point_shape = "border", point_border_stroke = 0.1, @@ -4604,9 +4758,11 @@ run_spatial_sim_tests_one_rep <- function(gobject, write.table( x = as.matrix(getExpression( - gobject = simulate_patch, values = "raw", output = "matrix")), + gobject = simulate_patch, values = "raw", output = "matrix" + )), file = paste0( - save_dir, "/", pattern_name, "/", save_name, "_raw_data.txt"), + save_dir, "/", pattern_name, "/", save_name, "_raw_data.txt" + ), sep = "\t" ) } @@ -4618,11 +4774,13 @@ run_spatial_sim_tests_one_rep <- function(gobject, write.table( x = as.matrix(getExpression( - gobject = simulate_patch, - values = "normalized", - output = "matrix")), + gobject = simulate_patch, + values = "normalized", + output = "matrix" + )), file = paste0( - save_dir, "/", pattern_name, "/", save_name, "_norm_data.txt"), + save_dir, "/", pattern_name, "/", save_name, "_norm_data.txt" + ), sep = "\t" ) } @@ -4636,8 +4794,10 @@ run_spatial_sim_tests_one_rep <- function(gobject, # method selected_method <- spat_methods[test] if (!selected_method %in% - c("binSpect_single", "binSpect_multi", "spatialDE", "spark", - "silhouetteRank")) { + c( + "binSpect_single", "binSpect_multi", "spatialDE", "spark", + "silhouetteRank" + )) { stop(selected_method, " is not a know spatial method") } @@ -4718,9 +4878,11 @@ run_spatial_sim_tests_one_rep <- function(gobject, spatial_gene_results[, time := total_time[["elapsed"]]] spatial_gene_results <- spatial_gene_results[ - , .(feats, adj.p.value, prob, time)] + , .(feats, adj.p.value, prob, time) + ] colnames(spatial_gene_results) <- c( - "feats", "adj.p.value", "prob", "time") + "feats", "adj.p.value", "prob", "time" + ) spatial_gene_results[, method := selected_name] } else if (selected_method == "binSpect_multi") { @@ -4738,20 +4900,25 @@ run_spatial_sim_tests_one_rep <- function(gobject, spatial_gene_results[, time := total_time[["elapsed"]]] spatial_gene_results <- spatial_gene_results[ - , .(feats, p.val, prob, time)] + , .(feats, p.val, prob, time) + ] colnames(spatial_gene_results) <- c( - "feats", "adj.p.value", "prob", "time") + "feats", "adj.p.value", "prob", "time" + ) spatial_gene_results[, method := selected_name] } else if (selected_method == "spatialDE") { start <- proc.time() new_raw_sim_matrix <- getExpression(simulate_patch, - values = "raw", - output = "matrix") + values = "raw", + output = "matrix" + ) sd_cells <- apply(new_raw_sim_matrix, 2, sd) sd_non_zero_cells <- names(sd_cells[sd_cells != 0]) simulate_patch_fix <- subsetGiotto( - simulate_patch, cell_ids = sd_non_zero_cells) + simulate_patch, + cell_ids = sd_non_zero_cells + ) spatial_gene_results <- do.call("spatialDE", c( gobject = simulate_patch_fix, @@ -4759,14 +4926,17 @@ run_spatial_sim_tests_one_rep <- function(gobject, )) spatialDE_spatialgenes_sim_res <- spatial_gene_results$results$results - if (is.null(spatialDE_spatialgenes_sim_res)) + if (is.null(spatialDE_spatialgenes_sim_res)) { spatialDE_spatialgenes_sim_res <- spatial_gene_results$results + } spatialDE_spatialgenes_sim_res <- data.table::as.data.table( - spatialDE_spatialgenes_sim_res) + spatialDE_spatialgenes_sim_res + ) data.table::setorder(spatialDE_spatialgenes_sim_res, qval, pval) spatialDE_result <- spatialDE_spatialgenes_sim_res[ - g == gene_name] + g == gene_name + ] spatialDE_time <- proc.time() - start @@ -4774,9 +4944,11 @@ run_spatial_sim_tests_one_rep <- function(gobject, spatialDE_result[, time := spatialDE_time[["elapsed"]]] spatial_gene_results <- spatialDE_result[ - , .(g, qval, prob, time)] + , .(g, qval, prob, time) + ] colnames(spatial_gene_results) <- c( - "feats", "adj.p.value", "prob", "time") + "feats", "adj.p.value", "prob", "time" + ) spatial_gene_results[, method := "spatialDE"] } else if (selected_method == "spark") { ## spark @@ -4793,9 +4965,11 @@ run_spatial_sim_tests_one_rep <- function(gobject, spark_result[, time := spark_time[["elapsed"]]] spatial_gene_results <- spark_result[ - , .(genes, adjusted_pvalue, prob, time)] + , .(genes, adjusted_pvalue, prob, time) + ] colnames(spatial_gene_results) <- c( - "genes", "adj.p.value", "prob", "time") + "genes", "adj.p.value", "prob", "time" + ) spatial_gene_results[, method := "spark"] } else if (selected_method == "silhouetteRank") { ## silhouetterank @@ -4807,7 +4981,9 @@ run_spatial_sim_tests_one_rep <- function(gobject, )) data.table::setnames( - spatial_gene_results, old = "gene", new = "genes") + spatial_gene_results, + old = "gene", new = "genes" + ) spatial_gene_results <- spatial_gene_results[genes == gene_name] silh_time <- proc.time() - start @@ -4816,9 +4992,11 @@ run_spatial_sim_tests_one_rep <- function(gobject, # silhrank uses qval by default spatial_gene_results <- spatial_gene_results[ - , .(genes, qval, prob, time)] + , .(genes, qval, prob, time) + ] colnames(spatial_gene_results) <- c( - "genes", "adj.p.value", "prob", "time") + "genes", "adj.p.value", "prob", "time" + ) spatial_gene_results[, method := "silhouette"] } @@ -4842,25 +5020,30 @@ run_spatial_sim_tests_one_rep <- function(gobject, #' repetitions #' @returns data.table #' @keywords internal -run_spatial_sim_tests_multi <- function(gobject, - pattern_name = "pattern", - pattern_cell_ids = NULL, - gene_name = NULL, - spatial_probs = c(0.5, 1), - reps = 2, - spatial_network_name = "kNN_network", - spat_methods = c("binSpect_single", "binSpect_multi", "spatialDE", - "spark", "silhouetteRank"), - spat_methods_params = list(NA, NA, NA, NA, NA), - spat_methods_names = c("binSpect_single", "binSpect_multi", "spatialDE", - "spark", "silhouetteRank"), - save_plot = FALSE, - save_raw = FALSE, - save_norm = FALSE, - save_dir = "~", - verbose = TRUE, - run_simulations = TRUE, - ...) { +run_spatial_sim_tests_multi <- function( + gobject, + pattern_name = "pattern", + pattern_cell_ids = NULL, + gene_name = NULL, + spatial_probs = c(0.5, 1), + reps = 2, + spatial_network_name = "kNN_network", + spat_methods = c( + "binSpect_single", "binSpect_multi", "spatialDE", + "spark", "silhouetteRank" + ), + spat_methods_params = list(NA, NA, NA, NA, NA), + spat_methods_names = c( + "binSpect_single", "binSpect_multi", "spatialDE", + "spark", "silhouetteRank" + ), + save_plot = FALSE, + save_raw = FALSE, + save_norm = FALSE, + save_dir = "~", + verbose = TRUE, + run_simulations = TRUE, + ...) { prob_list <- list() for (prob_ind in seq_along(spatial_probs)) { prob_i <- spatial_probs[prob_ind] @@ -4868,12 +5051,14 @@ run_spatial_sim_tests_multi <- function(gobject, if (verbose) message("start with ", prob_i) rep_list <- list() - for (rep_i in 1:reps) { + for (rep_i in seq_len(reps)) { if (verbose) message("repetition = ", rep_i) - plot_name <- paste0("plot_", gene_name, "_prob", - prob_i, "_rep", rep_i) + plot_name <- paste0( + "plot_", gene_name, "_prob", + prob_i, "_rep", rep_i + ) rep_res <- run_spatial_sim_tests_one_rep(gobject, @@ -4946,34 +5131,43 @@ run_spatial_sim_tests_multi <- function(gobject, #' @returns data.table with results #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' -#' runPatternSimulation(gobject = g, pattern_cell_ids = c("AAAGGGATGTAGCAAG-1", -#' "TCAAACAACCGCGTCG-1", "ACGATCATACATAGAG-1", "TATGCTCCCTACTTAC-1"), -#' spatial_network_name = "spatial_network", gene_names = c("Gna12", "Ccnd2")) +#' +#' runPatternSimulation( +#' gobject = g, pattern_cell_ids = c( +#' "AAAGGGATGTAGCAAG-1", +#' "TCAAACAACCGCGTCG-1", "ACGATCATACATAGAG-1", "TATGCTCCCTACTTAC-1" +#' ), +#' spatial_network_name = "spatial_network", gene_names = c("Gna12", "Ccnd2") +#' ) #' @export -runPatternSimulation <- function(gobject, - pattern_name = "pattern", - pattern_colors = c("in" = "green", "out" = "red"), - pattern_cell_ids = NULL, - gene_names = NULL, - spatial_probs = c(0.5, 1), - reps = 2, - spatial_network_name = "kNN_network", - spat_methods = c("binSpect_single", "binSpect_multi", "spatialDE", - "spark", "silhouetteRank"), - spat_methods_params = list(NA, NA, NA, NA, NA), - spat_methods_names = c("binSpect_single", "binSpect_multi", "spatialDE", - "spark", "silhouetteRank"), - scalefactor = 6000, - save_plot = TRUE, - save_raw = TRUE, - save_norm = TRUE, - save_dir = "~", - max_col = 4, - height = 7, - width = 7, - run_simulations = TRUE, - ...) { +runPatternSimulation <- function( + gobject, + pattern_name = "pattern", + pattern_colors = c("in" = "green", "out" = "red"), + pattern_cell_ids = NULL, + gene_names = NULL, + spatial_probs = c(0.5, 1), + reps = 2, + spatial_network_name = "kNN_network", + spat_methods = c( + "binSpect_single", "binSpect_multi", "spatialDE", + "spark", "silhouetteRank" + ), + spat_methods_params = list(NA, NA, NA, NA, NA), + spat_methods_names = c( + "binSpect_single", "binSpect_multi", "spatialDE", + "spark", "silhouetteRank" + ), + scalefactor = 6000, + save_plot = TRUE, + save_raw = TRUE, + save_norm = TRUE, + save_dir = "~", + max_col = 4, + height = 7, + width = 7, + run_simulations = TRUE, + ...) { # data.table variables prob <- method <- adj.p.value <- time <- NULL @@ -4984,8 +5178,10 @@ runPatternSimulation <- function(gobject, pattern_cell_ids = pattern_cell_ids, gene_name = gene_names[1], spatial_prob = 1, - normalization_params = list(scalefactor = scalefactor, - verbose = TRUE) + normalization_params = list( + scalefactor = scalefactor, + verbose = TRUE + ) ) spatPlot2D(example_patch, @@ -5053,13 +5249,17 @@ runPatternSimulation <- function(gobject, if (save_plot == TRUE) { subdir <- paste0(save_dir, "/", pattern_name, "/") - if (!file.exists(subdir)) dir.create( - path = subdir, recursive = TRUE) + if (!file.exists(subdir)) { + dir.create( + path = subdir, recursive = TRUE + ) + } # write results data.table::fwrite( x = generesults, file = paste0(subdir, "/", gene, "_results.txt"), - sep = "\t", quote = FALSE) + sep = "\t", quote = FALSE + ) } all_results[[gene_ind]] <- generesults @@ -5081,21 +5281,28 @@ runPatternSimulation <- function(gobject, pl <- ggplot2::ggplot() pl <- pl + ggplot2::geom_boxplot( data = results, - ggplot2::aes(x = method, y = adj.p.value, color = prob)) + ggplot2::aes(x = method, y = adj.p.value, color = prob) + ) pl <- pl + ggplot2::geom_point( data = results, ggplot2::aes(x = method, y = adj.p.value, color = prob), - size = 2, position = ggplot2::position_jitterdodge()) + size = 2, position = ggplot2::position_jitterdodge() + ) pl <- pl + ggplot2::theme_bw() + ggplot2::theme(axis.text.x = ggplot2::element_text( - angle = 90, vjust = 1, hjust = 1)) + angle = 90, vjust = 1, hjust = 1 + )) pl <- pl + ggplot2::facet_wrap(~genes, nrow = nr_rows) pl <- pl + ggplot2::geom_hline( - yintercept = 0.05, color = "red", linetype = 2) + yintercept = 0.05, color = "red", linetype = 2 + ) - grDevices::pdf(file = paste0( - save_dir, "/", pattern_name, "_boxplot_pvalues.pdf"), - width = width, height = height) + grDevices::pdf( + file = paste0( + save_dir, "/", pattern_name, "_boxplot_pvalues.pdf" + ), + width = width, height = height + ) print(pl) grDevices::dev.off() @@ -5105,19 +5312,26 @@ runPatternSimulation <- function(gobject, pl <- ggplot2::ggplot() pl <- pl + ggplot2::geom_boxplot( data = results, - ggplot2::aes(x = method, y = -log10(adj.p.value), color = prob)) + ggplot2::aes(x = method, y = -log10(adj.p.value), color = prob) + ) pl <- pl + ggplot2::geom_point( data = results, ggplot2::aes(x = method, y = -log10(adj.p.value), color = prob), - size = 2, position = ggplot2::position_jitterdodge()) + size = 2, position = ggplot2::position_jitterdodge() + ) pl <- pl + ggplot2::theme_bw() + ggplot2::theme( axis.text.x = ggplot2::element_text( - angle = 90, vjust = 1, hjust = 1)) + angle = 90, vjust = 1, hjust = 1 + ) + ) pl <- pl + ggplot2::facet_wrap(~genes, nrow = nr_rows) - grDevices::pdf(file = paste0( - save_dir, "/", pattern_name, "_boxplot_log10pvalues.pdf"), - width = width, height = height) + grDevices::pdf( + file = paste0( + save_dir, "/", pattern_name, "_boxplot_log10pvalues.pdf" + ), + width = width, height = height + ) print(pl) grDevices::dev.off() @@ -5126,18 +5340,25 @@ runPatternSimulation <- function(gobject, pl <- ggplot2::ggplot() pl <- pl + ggplot2::geom_boxplot( data = results, - ggplot2::aes(x = method, y = time, color = prob)) + ggplot2::aes(x = method, y = time, color = prob) + ) pl <- pl + ggplot2::geom_point( data = results, ggplot2::aes(x = method, y = time, color = prob), size = 2, - position = ggplot2::position_jitterdodge()) + position = ggplot2::position_jitterdodge() + ) pl <- pl + ggplot2::theme_bw() + ggplot2::theme( axis.text.x = ggplot2::element_text( - angle = 90, vjust = 1, hjust = 1)) + angle = 90, vjust = 1, hjust = 1 + ) + ) - grDevices::pdf(file = paste0( - save_dir, "/", pattern_name, "_boxplot_time.pdf"), - width = width, height = height) + grDevices::pdf( + file = paste0( + save_dir, "/", pattern_name, "_boxplot_time.pdf" + ), + width = width, height = height + ) print(pl) grDevices::dev.off() } @@ -5147,7 +5368,8 @@ runPatternSimulation <- function(gobject, data.table::fwrite( x = results, file = paste0(save_dir, "/", pattern_name, "_results.txt"), - sep = "\t", quote = FALSE) + sep = "\t", quote = FALSE + ) return(results) } else { return(NULL) diff --git a/R/spatial_interaction.R b/R/spatial_interaction.R index cb9f6c5f0..a643c66d0 100644 --- a/R/spatial_interaction.R +++ b/R/spatial_interaction.R @@ -51,7 +51,7 @@ make_simulated_network <- function(gobject, spatial_network_annot$to_cell_type) middle_point <- length(all_cell_type) / 2 - for (sim in 1:number_of_simulations) { + for (sim in seq_len(number_of_simulations)) { if (set_seed == TRUE) { seed_number <- seed_number + sim set.seed(seed = seed_number) @@ -60,7 +60,7 @@ make_simulated_network <- function(gobject, reshuffled_all_cell_type <- sample( x = all_cell_type, size = length(all_cell_type), replace = FALSE) - new_from_cell_type <- reshuffled_all_cell_type[1:middle_point] + new_from_cell_type <- reshuffled_all_cell_type[seq_len(middle_point)] s1_list[[sim]] <- new_from_cell_type new_to_cell_type <- reshuffled_all_cell_type[ @@ -70,7 +70,7 @@ make_simulated_network <- function(gobject, s1_vector <- do.call("c", s1_list) s2_vector <- do.call("c", s2_list) - round_vector <- rep(x = 1:number_of_simulations, each = length_ints) + round_vector <- rep(x = seq_len(number_of_simulations), each = length_ints) round_vector <- paste0("sim", round_vector) # data.table variables @@ -80,7 +80,7 @@ make_simulated_network <- function(gobject, s1 = s1_vector, s2 = s2_vector, round = round_vector) uniq_sim_comb <- unique(sample_dt[, .(s1, s2)]) uniq_sim_comb[, unified_int := paste( - sort(c(s1, s2)), collapse = "--"), by = 1:nrow(uniq_sim_comb)] + sort(c(s1, s2)), collapse = "--"), by = seq_len(nrow(uniq_sim_comb))] sample_dt[uniq_sim_comb, unified_int := unified_int, on = c( s1 = "s1", s2 = "s2")] sample_dt[, type_int := ifelse(s1 == s2, "homo", "hetero")] @@ -190,7 +190,8 @@ cellProximityEnrichment <- function(gobject, minimum_simulations <- unique_ints[rep( seq_len(nrow(unique_ints)), number_of_simulations), ] minimum_simulations[, round := rep( - paste0("sim", 1:number_of_simulations), each = nrow(unique_ints))] + paste0("sim", seq_len(number_of_simulations)), + each = nrow(unique_ints))] minimum_simulations[, N := 0] table_sim_minimum_results <- rbind(table_sim_results, minimum_simulations) @@ -312,7 +313,7 @@ cellProximityEnrichment <- function(gobject, # order table_mean_results_dc <- table_mean_results_dc[order(-PI_value)] - table_mean_results_dc[, int_ranking := 1:.N] + table_mean_results_dc[, int_ranking := seq_len(.N)] return(list(raw_sim_table = table_results, enrichm_res = table_mean_results_dc)) @@ -698,7 +699,7 @@ NULL seed_number_list <- seed_number:(seed_number + (n - 1)) } - result <- lapply(X = 1:n, FUN = function(x) { + result <- lapply(X = seq_len(n), FUN = function(x) { seed_number <- seed_number_list[x] perm_rand <- .do_permuttest_random( @@ -1051,9 +1052,13 @@ NULL #' @title findInteractionChangedFeats #' @name findInteractionChangedFeats +#' @aliases findICF #' @description Identifies cell-to-cell Interaction Changed Features (ICF), #' i.e. features that are differentially expressed due to proximity to other -#' cell types. +#' cell types. This function is appropriate for single-cell level data. For +#' data from spot-based spatial assays or spatially binned data, see +#' [findICFSpot()], which runs on top of DWLS results or similar spot-level +#' cell-type enrichment outputs #' @param gobject giotto object #' @param feat_type feature type #' @param spat_unit spatial unit @@ -1072,33 +1077,48 @@ NULL #' @param do_parallel run calculations in parallel with mclapply #' @param set_seed set a seed for reproducibility #' @param seed_number seed number -#' @returns icfObject that contains the Interaction Changed differential +#' @returns `icfObject` that contains the Interaction Changed differential #' feature scores #' @details Function to calculate if features are differentially expressed in #' cell types when they interact (approximated by physical proximity) with -#' other cell types. The results data.table in the icfObject contains +#' other cell types. The results data.table in the `icfObject` contains #' - at least - the following columns: -#' \itemize{ -#' \item{features:}{ All or selected list of tested features} -#' \item{sel:}{ average feature expression in the interacting cells from the target cell type } -#' \item{other:}{ average feature expression in the NOT-interacting cells from the target cell type } -#' \item{log2fc:}{ log2 fold-change between sel and other} -#' \item{diff:}{ spatial expression difference between sel and other} -#' \item{p.value:}{ associated p-value} -#' \item{p.adj:}{ adjusted p-value} -#' \item{cell_type:}{ target cell type} -#' \item{int_cell_type:}{ interacting cell type} -#' \item{nr_select:}{ number of cells for selected target cell type} -#' \item{int_nr_select:}{ number of cells for interacting cell type} -#' \item{nr_other:}{ number of other cells of selected target cell type} -#' \item{int_nr_other:}{ number of other cells for interacting cell type} -#' \item{unif_int:}{ cell-cell interaction} -#' } +#' * **features:** All or selected list of tested features +#' * **sel:** average feature expression in the interacting cells from the +#' target cell type +#' * **other:** average feature expression in the NOT-interacting cells from +#' the target cell type +#' * **log2fc:** log2 fold-change between sel and other +#' * **diff:** spatial expression difference between sel and other +#' * **p.value:** associated p-value +#' * **p.adj:** adjusted p-value +#' * **cell_type:** target cell type +#' * **int_cell_type:** interacting cell type +#' * **nr_select:** number of cells for selected target cell type +#' * **int_nr_select:** number of cells for interacting cell type +#' * **nr_other:** number of other cells of selected target cell type +#' * **int_nr_other:** number of other cells for interacting cell type +#' * **unif_int:** cell-cell interaction +#' +#' @seealso [filterInteractionChangedFeats()] [findICFSpot()] +#' @md #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' findInteractionChangedFeats(g, cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) +#' icf1 <- findInteractionChangedFeats(g, +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), +#' nr_permutations = 10 +#' ) +#' force(icf1) +#' force(icf1$ICFscores) +#' +#' # this is just an alias with a shorter name +#' icf2 <- findICF(g, +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), +#' nr_permutations = 10 +#' ) #' @export findInteractionChangedFeats <- function(gobject, feat_type = NULL, @@ -1164,8 +1184,9 @@ findInteractionChangedFeats <- function(gobject, mean_method <- match.arg(mean_method, choices = c("arithmic", "geometric")) ## metadata - cell_metadata <- pDataDT(gobject, feat_type = feat_type) - + cell_metadata <- pDataDT( + gobject, spat_unit = spat_unit, feat_type = feat_type + ) ## annotated spatial network @@ -1178,267 +1199,108 @@ findInteractionChangedFeats <- function(gobject, all_interactions <- unique(annot_spatnetwork$unified_int) - if (do_parallel == TRUE) { - fin_result <- lapply_flex( - X = all_interactions, future.seed = TRUE, FUN = function(x) { + ## prepare function + fcp_feats_per_i <- function(x) { + .findCellProximityFeats_per_interaction( + expr_values = expr_values, + cell_metadata = cell_metadata, + annot_spatnetwork = annot_spatnetwork, + minimum_unique_cells = minimum_unique_cells, + minimum_unique_int_cells = minimum_unique_int_cells, + sel_int = x, + cluster_column = cluster_column, + exclude_selected_cells_from_test = exclude_selected_cells_from_test, + diff_test = diff_test, + mean_method = mean_method, + offset = offset, + adjust_method = adjust_method, + nr_permutations = nr_permutations, + set_seed = set_seed, + seed_number = seed_number + ) + } - tempres <- .findCellProximityFeats_per_interaction( - expr_values = expr_values, - cell_metadata = cell_metadata, - annot_spatnetwork = annot_spatnetwork, - minimum_unique_cells = minimum_unique_cells, - minimum_unique_int_cells = minimum_unique_int_cells, - sel_int = x, - cluster_column = cluster_column, - exclude_selected_cells_from_test = exclude_selected_cells_from_test, - diff_test = diff_test, - mean_method = mean_method, - offset = offset, - adjust_method = adjust_method, - nr_permutations = nr_permutations, - set_seed = set_seed, - seed_number = seed_number - ) - }) - } else { + if (isTRUE(do_parallel)) { # parallel + fin_result <- lapply_flex( + X = all_interactions, future.seed = TRUE, FUN = fcp_feats_per_i + ) + } else { # sequential fin_result <- list() for (i in seq_along(all_interactions)) { x <- all_interactions[i] - - - tempres <- .findCellProximityFeats_per_interaction( - expr_values = expr_values, - cell_metadata = cell_metadata, - annot_spatnetwork = annot_spatnetwork, - minimum_unique_cells = minimum_unique_cells, - minimum_unique_int_cells = minimum_unique_int_cells, - sel_int = x, - cluster_column = cluster_column, - exclude_selected_cells_from_test = exclude_selected_cells_from_test, - diff_test = diff_test, - mean_method = mean_method, - offset = offset, - adjust_method = adjust_method, - nr_permutations = nr_permutations, - set_seed = set_seed, - seed_number = seed_number - ) - + tempres <- fcp_feats_per_i(x) fin_result[[i]] <- tempres } } final_result <- do.call("rbind", fin_result) - - - # data.table variables + # NSE variables spec_int <- cell_type <- int_cell_type <- type_int <- NULL final_result[, spec_int := paste0(cell_type, "--", int_cell_type)] final_result[, type_int := ifelse( cell_type == int_cell_type, "homo", "hetero")] - - # return(final_result) - permutation_test <- ifelse( diff_test == "permutation", nr_permutations, "no permutations") - icfObject <- list( - ICFscores = final_result, - Giotto_info = list( - "values" = values, - "cluster" = cluster_column, - "spatial network" = spatial_network_name + icfObject <- structure( + .Data = list( + ICFscores = final_result, + Giotto_info = list( + "values" = values, + "cluster" = cluster_column, + "spatial network" = spatial_network_name + ), + test_info = list( + "test" = diff_test, + "p.adj" = adjust_method, + "min cells" = minimum_unique_cells, + "min interacting cells" = minimum_unique_int_cells, + "exclude selected cells" = exclude_selected_cells_from_test, + "perm" = permutation_test + ) ), - test_info = list( - "test" = diff_test, - "p.adj" = adjust_method, - "min cells" = minimum_unique_cells, - "min interacting cells" = minimum_unique_int_cells, - "exclude selected cells" = exclude_selected_cells_from_test, - "perm" = permutation_test - ) + class = "icfObject" ) - class(icfObject) <- append("icfObject", class(icfObject)) return(icfObject) } - - -#' @title findInteractionChangedGenes -#' @name findInteractionChangedGenes -#' @description Identifies cell-to-cell Interaction Changed Features (ICF), -#' i.e. genes that are differentially expressed due to interactions with other -#' cell types. -#' @param ... params to pass to \code{findInteractionChangedFeats} -#' @seealso \code{\link{findInteractionChangedFeats}} -#' @returns interaction changed genes -#' @export -findInteractionChangedGenes <- function(...) { - .Deprecated(new = "findInteractionChangedFeats") - - findInteractionChangedFeats(...) -} - - - -#' @title findCellProximityGenes -#' @name findCellProximityGenes -#' @description Identifies cell-to-cell Interaction Changed Features (ICF), -#' i.e. genes that are differentially expressed due to proximity to other cell -#' types. -#' @inheritDotParams findInteractionChangedFeats -#' @seealso \code{\link{findInteractionChangedFeats}} -#' @returns cell-cell interaction changed genes +#' @rdname findInteractionChangedFeats #' @export -findCellProximityGenes <- function(...) { - .Deprecated(new = "findInteractionChangedFeats") - - findInteractionChangedFeats(...) -} - +findICF <- findInteractionChangedFeats - - - -#' @title findICF -#' @name findICF -#' @description Identifies cell-to-cell Interaction Changed Features (ICF), -#' i.e. features that are differentially expressed due to proximity to other -#' cell types. -#' @param gobject giotto object -#' @param feat_type feature type -#' @param spat_unit spatial unit -#' @param expression_values expression values to use -#' @param selected_feats subset of selected features (optional) -#' @param cluster_column name of column to use for cell types -#' @param spatial_network_name name of spatial network to use -#' @param minimum_unique_cells minimum number of target cells required -#' @param minimum_unique_int_cells minimum number of interacting cells required -#' @param diff_test which differential expression test -#' @param mean_method method to use to calculate the mean -#' @param offset offset value to use when calculating log2 ratio -#' @param adjust_method which method to adjust p-values -#' @param nr_permutations number of permutations if diff_test = permutation -#' @param exclude_selected_cells_from_test exclude interacting cells other cells -#' @param do_parallel run calculations in parallel with mclapply -#' @param set_seed set a seed for reproducibility -#' @param seed_number seed number -#' @returns `icfObject` that contains the Interaction Changed differential gene -#' scores -#' @details Function to calculate if genes are differentially expressed in -#' cell types when they interact (approximated by physical proximity) with -#' other cell types. The results data.table in the `icfObject` contains -#' - at least - the following columns: -#' \itemize{ -#' \item{features:}{ All or selected list of tested features} -#' \item{sel:}{ average feature expression in the interacting cells from the target cell type } -#' \item{other:}{ average feature expression in the NOT-interacting cells from the target cell type } -#' \item{log2fc:}{ log2 fold-change between sel and other} -#' \item{diff:}{ spatial expression difference between sel and other} -#' \item{p.value:}{ associated p-value} -#' \item{p.adj:}{ adjusted p-value} -#' \item{cell_type:}{ target cell type} -#' \item{int_cell_type:}{ interacting cell type} -#' \item{nr_select:}{ number of cells for selected target cell type} -#' \item{int_nr_select:}{ number of cells for interacting cell type} -#' \item{nr_other:}{ number of other cells of selected target cell type} -#' \item{int_nr_other:}{ number of other cells for interacting cell type} -#' \item{unif_int:}{ cell-cell interaction} -#' } -#' @seealso \code{\link{findInteractionChangedFeats}} -#' @examples -#' g <- GiottoData::loadGiottoMini("visium") -#' -#' findICF(g, cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) +#' @name print.icfObject +#' @title icfObject print method +#' @param x object to print +#' @param \dots additional params to pass (none implemented) +#' @keywords internal #' @export -findICF <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = "normalized", - selected_feats = NULL, - cluster_column, - spatial_network_name = "Delaunay_network", - minimum_unique_cells = 1, - minimum_unique_int_cells = 1, - diff_test = c("permutation", "limma", "t.test", "wilcox"), - mean_method = c("arithmic", "geometric"), - offset = 0.1, - adjust_method = c( - "bonferroni", "BH", "holm", "hochberg", "hommel", - "BY", "fdr", "none" - ), - nr_permutations = 100, - exclude_selected_cells_from_test = TRUE, - do_parallel = TRUE, - set_seed = TRUE, - seed_number = 1234) { - findInteractionChangedFeats( - gobject = gobject, - feat_type = feat_type, - spat_unit = spat_unit, - expression_values = expression_values, - selected_feats = selected_feats, - cluster_column = cluster_column, - spatial_network_name = spatial_network_name, - minimum_unique_cells = minimum_unique_cells, - minimum_unique_int_cells = minimum_unique_int_cells, - diff_test = diff_test, - mean_method = mean_method, - offset = offset, - adjust_method = adjust_method, - nr_permutations = nr_permutations, - exclude_selected_cells_from_test = exclude_selected_cells_from_test, - do_parallel = do_parallel, - set_seed = set_seed, - seed_number = seed_number +print.icfObject <- function(x, ...) { + cat("An object of class", class(x), "\n") + info <- list( + dimensions = sprintf("%d, %d (icfs, attributes)", + nrow(x$ICFscores), ncol(x$ICFscores)) ) + print_list(info, pre = " -") + cat("\n") + print_list(x$Giotto_info, pre = " -") + cat("\n") + print_list(x$test_info, pre = " -") } -#' @title findICG -#' @name findICG -#' @description Identifies cell-to-cell Interaction Changed Features (ICF), -#' i.e. genes that are differentially expressed due to interaction with other -#' cell types. -#' @inheritDotParams findICF -#' @seealso \code{\link{findICF}} -#' @returns cell-cell interaction changed features -#' @export -findICG <- function(...) { - .Deprecated(new = "findICF") - - findICF(...) -} - - - -#' @title findCPG -#' @name findCPG -#' @description Identifies cell-to-cell Interaction Changed Features (ICF), -#' i.e. genes that are differentially expressed due to proximity to other cell -#' types. -#' @inheritDotParams findICF -#' @returns cell-to-cell Interaction Changed Genes -#' @seealso \code{\link{findICF}} -#' @export -findCPG <- function(...) { - .Deprecated(new = "findICF") - - findICF(...) -} #' @title filterInteractionChangedFeats #' @name filterInteractionChangedFeats +#' @aliases filterICF #' @description Filter Interaction Changed Feature scores. #' @param icfObject ICF (interaction changed feature) score object #' @param min_cells minimum number of source cell type @@ -1452,7 +1314,27 @@ findCPG <- function(...) { #' @param min_zscore minimum z-score change #' @param zscores_column calculate z-scores over cell types or genes #' @param direction differential expression directions to keep -#' @returns icfObject that contains the filtered differential feature scores +#' @returns `icfObject` that contains the filtered differential feature scores +#' @md +#' @examples +#' g <- GiottoData::loadGiottoMini("visium") +#' +#' icf <- findInteractionChangedFeats(g, +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), +#' nr_permutations = 10 +#' ) +#' force(icf) +#' force(icf$ICFscores) +#' +#' icf_filter1 <- filterInteractionChangedFeats(icf, min_cells = 4) +#' force(icf_filter1) +#' force(icf_filter1$ICFscores) +#' +#' # filterICF is a simple alias with a shortened name +#' icf_filter2 <- filterICF(icf, min_cells = 4) +#' force(icf_filter2) +#' #' @export filterInteractionChangedFeats <- function(icfObject, min_cells = 4, @@ -1465,7 +1347,7 @@ filterInteractionChangedFeats <- function(icfObject, min_zscore = 2, zscores_column = c("cell_type", "feats"), direction = c("both", "up", "down")) { - # data.table variables + # NSE vars nr_select <- int_nr_select <- zscores <- log2fc <- sel <- other <- p.adj <- NULL @@ -1521,123 +1403,19 @@ filterInteractionChangedFeats <- function(icfObject, return(newobj) } - -#' @title filterInteractionChangedGenes -#' @name filterInteractionChangedGenes -#' @description Filter Interaction Changed Feature scores. -#' @inheritDotParams filterInteractionChangedFeats -#' @seealso \code{\link{filterInteractionChangedFeats}} -#' @returns filtered interaction changed feature scores -#' @export -filterInteractionChangedGenes <- function(...) { - .Deprecated(new = "filterInteractionChangedFeats") - - filterInteractionChangedFeats(...) -} - - -#' @title filterCellProximityGenes -#' @name filterCellProximityGenes -#' @description Filter Interaction Changed Feature scores. -#' @inheritDotParams filterInteractionChangedFeats -#' @seealso \code{\link{filterInteractionChangedFeats}} -#' @returns proximity genes +#' @rdname filterInteractionChangedFeats #' @export -filterCellProximityGenes <- function(...) { - .Deprecated(new = "filterInteractionChangedFeats") - - filterInteractionChangedFeats(...) -} +filterICF <- filterInteractionChangedFeats -#' @title filterICF -#' @name filterICF -#' @description Filter Interaction Changed Feature scores. -#' @param icfObject ICF (interaction changed feature) score object -#' @param min_cells minimum number of source cell type -#' @param min_cells_expr minimum expression level for source cell type -#' @param min_int_cells minimum number of interacting neighbor cell type -#' @param min_int_cells_expr minimum expression level for interacting neighbor -#' cell type -#' @param min_fdr minimum adjusted p-value -#' @param min_spat_diff minimum absolute spatial expression difference -#' @param min_log2_fc minimum log2 fold-change -#' @param min_zscore minimum z-score change -#' @param zscores_column calculate z-scores over cell types or features -#' @param direction differential expression directions to keep -#' @returns icfObject that contains the filtered differential feature scores -#' @examples -#' g <- GiottoData::loadGiottoMini("visium") -#' -#' g_icf <- findInteractionChangedFeats(g, cluster_column = "leiden_clus") -#' -#' filterICF(g_icf) -#' @export -filterICF <- function(icfObject, - min_cells = 4, - min_cells_expr = 1, - min_int_cells = 4, - min_int_cells_expr = 1, - min_fdr = 0.1, - min_spat_diff = 0.2, - min_log2_fc = 0.2, - min_zscore = 2, - zscores_column = c("cell_type", "feats"), - direction = c("both", "up", "down")) { - filterInteractionChangedFeats( - icfObject = icfObject, - min_cells = min_cells, - min_cells_expr = min_cells_expr, - min_int_cells = min_int_cells, - min_int_cells_expr = min_int_cells_expr, - min_fdr = min_fdr, - min_spat_diff = min_spat_diff, - min_log2_fc = min_log2_fc, - min_zscore = min_zscore, - zscores_column = zscores_column, - direction = direction - ) -} - - - -#' @title filterICG -#' @name filterICG -#' @description Filter Interaction Changed Gene scores. -#' @inheritDotParams filterICF -#' @seealso \code{\link{filterICF}} -#' @returns filtered interaction changed gene scores -#' @export -filterICG <- function(...) { - .Deprecated(new = "filterICF") - filterICF(...) -} - - -#' @title filterCPG -#' @name filterCPG -#' @description Filter Interaction Changed Gene scores. -#' @inheritDotParams filterICF -#' @seealso \code{\link{filterICF}} -#' @returns filtered interaction changed gene scores -#' @export -filterCPG <- function(...) { - .Deprecated(new = "filterICF") - - filterICF(...) -} - - - - -# * #### -# FTF feat-to-feat (pairs of ICF) #### +# * #### +# FTF feat-to-feat (pairs of ICF) #### #' @title Combine ICF scores per interaction #' @name .combineInteractionChangedFeatures_per_interaction @@ -1981,6 +1759,7 @@ filterCPG <- function(...) { #' @title combineInteractionChangedFeats #' @name combineInteractionChangedFeats +#' @aliases combineICF #' @description Combine ICF scores in a pairwise manner. #' @param icfObject ICF (interaction changed feat) score object #' @param selected_ints subset of selected cell-cell interactions (optional) @@ -1996,14 +1775,19 @@ filterCPG <- function(...) { #' @param min_log2_fc minimum absolute log2 fold-change #' @param do_parallel run calculations in parallel with mclapply #' @param verbose verbose -#' @returns combIcfObject that contains the filtered differential feature scores +#' @returns `combIcfObject` that contains the filtered differential feature +#' scores #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' g_icf <- findInteractionChangedFeats(g, -#' cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), +#' nr_permutations = 10 +#' ) #' -#' combineInteractionChangedFeats(g_icf) +#' cicf <- combineInteractionChangedFeats(g_icf) +#' force(cicf) +#' combineICF(g_icf) # this is a shortened alias #' @export combineInteractionChangedFeats <- function(icfObject, selected_ints = NULL, @@ -2017,7 +1801,7 @@ combineInteractionChangedFeats <- function(icfObject, min_log2_fc = 0.5, do_parallel = TRUE, verbose = TRUE) { - # data.table variables + # NSE vars unif_int <- feat1_feat2 <- feats_1 <- feats_2 <- comb_logfc <- log2fc_1 <- log2fc_2 <- direction <- NULL @@ -2090,145 +1874,62 @@ combineInteractionChangedFeats <- function(icfObject, ifelse(log2fc_1 < 0 & log2fc_2 < 0, "both_down", "mixed") )] - combIcfObject <- list( - combICFscores = final_results, - Giotto_info = list( - "values" = icfObject[["Giotto_info"]][["values"]], - "cluster" = icfObject[["Giotto_info"]][["cluster"]], - "spatial network" = icfObject[["Giotto_info"]][["spatial network"]] + combIcfObject <- structure( + .Data = list( + combICFscores = final_results, + Giotto_info = list( + "values" = icfObject[["Giotto_info"]][["values"]], + "cluster" = icfObject[["Giotto_info"]][["cluster"]], + "spatial network" = + icfObject[["Giotto_info"]][["spatial network"]] + ), + test_info = list( + "test" = icfObject[["test_info"]][["test"]], + "p.adj" = icfObject[["test_info"]][["p.adj"]], + "min cells" = icfObject[["test_info"]][["min cells"]], + "min interacting cells" = icfObject[["test_info"]][[ + "min interacting cells"]], + "exclude selected cells" = icfObject[["test_info"]][[ + "exclude selected cells"]], + "perm" = icfObject[["test_info"]][["perm"]] + ) ), - test_info = list( - "test" = icfObject[["test_info"]][["test"]], - "p.adj" = icfObject[["test_info"]][["p.adj"]], - "min cells" = icfObject[["test_info"]][["min cells"]], - "min interacting cells" = icfObject[["test_info"]][[ - "min interacting cells"]], - "exclude selected cells" = icfObject[["test_info"]][[ - "exclude selected cells"]], - "perm" = icfObject[["test_info"]][["perm"]] - ) + class = "combIcfObject" ) - class(combIcfObject) <- append(class(combIcfObject), "combIcfObject") return(combIcfObject) } - -#' @title combineInteractionChangedGenes -#' @name combineInteractionChangedGenes -#' @description Combine ICF scores in a pairwise manner. -#' @inheritDotParams combineInteractionChangedFeats -#' @returns ICF scores -#' @seealso \code{\link{combineInteractionChangedFeats}} -#' @export -combineInteractionChangedGenes <- function(...) { - .Deprecated(new = "combineInteractionChangedFeats") - - combineInteractionChangedFeats(...) -} - - -#' @title combineCellProximityGenes -#' @name combineCellProximityGenes -#' @description Combine ICF scores in a pairwise manner. -#' @inheritDotParams combineInteractionChangedFeats -#' @returns ICF scores -#' @seealso \code{\link{combineInteractionChangedFeats}} +#' @rdname combineInteractionChangedFeats #' @export -combineCellProximityGenes <- function(...) { - .Deprecated(new = "combineInteractionChangedFeats") - - combineInteractionChangedFeats(...) -} +combineICF <- combineInteractionChangedFeats -#' @title combineICF -#' @name combineICF -#' @description Combine ICF scores in a pairwise manner. -#' @param icfObject ICF (interaction changed feat) score object -#' @param selected_ints subset of selected cell-cell interactions (optional) -#' @param selected_feats subset of selected Feats (optional) -#' @param specific_feats_1 specific Featset combo -#' (need to position match specific_genes_2) -#' @param specific_feats_2 specific Featset combo -#' (need to position match specific_genes_1) -#' @param min_cells minimum number of target cell type -#' @param min_int_cells minimum number of interacting cell type -#' @param min_fdr minimum adjusted p-value -#' @param min_spat_diff minimum absolute spatial expression difference -#' @param min_log2_fc minimum absolute log2 fold-change -#' @param do_parallel run calculations in parallel with mclapply -#' @param verbose verbose -#' @returns icfObject that contains the filtered differential feats scores -#' @examples -#' g <- GiottoData::loadGiottoMini("visium") -#' g_icf <- findInteractionChangedFeats(g, cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) -#' -#' combineICF(g_icf) +#' @name print.combIcfObject +#' @title combIcfObject print method +#' @param x object to print +#' @param \dots additional params to pass (none implemented) +#' @keywords internal #' @export -combineICF <- function(icfObject, - selected_ints = NULL, - selected_feats = NULL, - specific_feats_1 = NULL, - specific_feats_2 = NULL, - min_cells = 5, - min_int_cells = 3, - min_fdr = 0.05, - min_spat_diff = 0, - min_log2_fc = 0.5, - do_parallel = TRUE, - verbose = TRUE) { - combineInteractionChangedFeats( - icfObject = icfObject, - selected_ints = selected_ints, - selected_feats = selected_feats, - specific_feats_1 = specific_feats_1, - specific_feats_2 = specific_feats_2, - min_cells = min_cells, - min_int_cells = min_int_cells, - min_fdr = min_fdr, - min_spat_diff = min_spat_diff, - min_log2_fc = min_log2_fc, - do_parallel = do_parallel, - verbose = verbose +print.combIcfObject <- function(x, ...) { + cat("An object of class", class(x), "\n") + info <- list( + dimensions = sprintf("%d, %d (icf pairs, attributes)", + nrow(x$combICFscores), ncol(x$combICFscores)) ) + print_list(info, pre = " -") + cat("\n") + print_list(x$Giotto_info, pre = " -") + cat("\n") + print_list(x$test_info, pre = " -") } -#' @title combineICG -#' @name combineICG -#' @description Combine ICF scores in a pairwise manner. -#' @inheritDotParams combineICF -#' @returns ICF scores -#' @seealso \code{\link{combineICF}} -#' @export -combineICG <- function(...) { - .Deprecated(new = "combineICF") - - combineICF(...) -} - -#' @title combineCPG -#' @name combineCPG -#' @description Combine ICF scores in a pairwise manner. -#' @inheritDotParams combineICF -#' @returns ICF scores -#' @seealso \code{\link{combineICF}} -#' @export -combineCPG <- function(...) { - .Deprecated(new = "combineICF") - - combineICF(...) -} - - - # * #### # cell communication #### #' @title average_feat_feat_expression_in_groups -#' @name average_feat_feat_expression_in_groups +#' @name .average_feat_feat_expression_in_groups #' @description calculate average expression per cluster #' @param gobject giotto object to use #' @param spat_unit spatial unit @@ -2238,7 +1939,7 @@ combineCPG <- function(...) { #' @param feat_set_2 second specific feat set from feat pairs #' @returns data.table with average expression scores for each cluster #' @keywords internal -average_feat_feat_expression_in_groups <- function(gobject, +.average_feat_feat_expression_in_groups <- function(gobject, spat_unit = NULL, feat_type = NULL, cluster_column = "cell_types", @@ -2296,7 +1997,8 @@ average_feat_feat_expression_in_groups <- function(gobject, lig_test <- data.table::as.data.table( reshape2::melt(ligand_match, measure.vars = all_ligand_cols)) lig_test[, ligand := rep(rownames(ligand_match), ncol(ligand_match))] - lig_test[, ligand := strsplit(ligand, "\\.")[[1]][1], by = 1:nrow(lig_test)] + lig_test[, ligand := strsplit(ligand, "\\.")[[1]][1], + by = seq_len(nrow(lig_test))] lig_test[, LR_comb := rep(LR_pairs, ncol(ligand_match))] setnames(lig_test, "value", "lig_expr") setnames(lig_test, "variable", "lig_cell_type") @@ -2306,7 +2008,7 @@ average_feat_feat_expression_in_groups <- function(gobject, receptor_match, measure.vars = all_receptor_cols)) rec_test[, receptor := rep(rownames(receptor_match), ncol(receptor_match))] rec_test[, receptor := strsplit( - receptor, "\\.")[[1]][1], by = 1:nrow(rec_test)] + receptor, "\\.")[[1]][1], by = seq_len(nrow(rec_test))] rec_test[, LR_comb := rep(LR_pairs, ncol(receptor_match))] setnames(rec_test, "value", "rec_expr") setnames(rec_test, "variable", "rec_cell_type") @@ -2355,8 +2057,13 @@ average_feat_feat_expression_in_groups <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' exprCellCellcom(g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik") +#' res <- exprCellCellcom(g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", +#' feat_set_2 = "9630013A20Rik" +#' ) +#' +#' force(res) #' @export exprCellCellcom <- function(gobject, feat_type = NULL, @@ -2408,7 +2115,7 @@ exprCellCellcom <- function(gobject, names(nr_cells) <- nr_cell_types$cluster_column - comScore <- average_feat_feat_expression_in_groups( + comScore <- .average_feat_feat_expression_in_groups( gobject = gobject, feat_type = feat_type, spat_unit = spat_unit, @@ -2436,60 +2143,67 @@ exprCellCellcom <- function(gobject, # not yet available - for (sim in 1:random_iter) { - if (verbose == TRUE) cat("simulation ", sim) + progressr::with_progress({ + pb <- progressr::progressor(steps = random_iter) + for (sim in seq_len(random_iter)) { - # create temporary giotto - tempGiotto <- subsetGiotto( - gobject = gobject, - feat_type = feat_type, - spat_unit = spat_unit - ) + # create temporary giotto + tempGiotto <- subsetGiotto( + gobject = gobject, + feat_type = feat_type, + spat_unit = spat_unit + ) - # randomize annoation - cell_types <- cell_metadata[[cluster_column]] - if (set_seed == TRUE) { - seed_number <- seed_number + sim - set.seed(seed = seed_number) - } - random_cell_types <- sample(x = cell_types, size = length(cell_types)) - tempGiotto <- addCellMetadata( - gobject = tempGiotto, - feat_type = feat_type, - spat_unit = spat_unit, - new_metadata = random_cell_types, - by_column = FALSE # on purpose since values are random - ) + # randomize annoation + cell_types <- cell_metadata[[cluster_column]] + if (set_seed == TRUE) { + seed_number <- seed_number + sim + set.seed(seed = seed_number) + } + random_cell_types <- sample( + x = cell_types, size = length(cell_types) + ) + tempGiotto <- addCellMetadata( + gobject = tempGiotto, + feat_type = feat_type, + spat_unit = spat_unit, + new_metadata = random_cell_types, + by_column = FALSE # on purpose since values are random + ) - # get random communication scores - randomScore <- average_feat_feat_expression_in_groups( - gobject = tempGiotto, - feat_type = feat_type, - spat_unit = spat_unit, - cluster_column = "random_cell_types", - feat_set_1 = feat_set_1, - feat_set_2 = feat_set_2 - ) + # get random communication scores + randomScore <- .average_feat_feat_expression_in_groups( + gobject = tempGiotto, + feat_type = feat_type, + spat_unit = spat_unit, + cluster_column = "random_cell_types", + feat_set_1 = feat_set_1, + feat_set_2 = feat_set_2 + ) + + # average random score + total_av <- total_av + randomScore[["LR_expr"]] - # average random score - total_av <- total_av + randomScore[["LR_expr"]] + # difference between observed and random + difference <- comScore[["LR_expr"]] - randomScore[["LR_expr"]] + + # calculate total difference + if (detailed == FALSE) { + total_sum <- total_sum + difference + } else { + total_sum[, sim] <- difference + } - # difference between observed and random - difference <- comScore[["LR_expr"]] - randomScore[["LR_expr"]] + # calculate p-values + difference[difference > 0] <- 1 + difference[difference < 0] <- -1 + total_bool <- total_bool + difference - # calculate total difference - if (detailed == FALSE) { - total_sum <- total_sum + difference - } else { - total_sum[, sim] <- difference + pb(sprintf("simulation %d", sim)) } + }) - # calculate p-values - difference[difference > 0] <- 1 - difference[difference < 0] <- -1 - total_bool <- total_bool + difference - } comScore[, rand_expr := total_av / random_iter] @@ -2582,7 +2296,7 @@ exprCellCellcom <- function(gobject, set.seed(seed = seed_number) } sub_sample_ids <- possible_metadata[get(cluster_column) == uniq_type][ - sample(x = 1:.N, size = length_random)][["cell_ID"]] + sample(x = seq_len(.N), size = length_random)][["cell_ID"]] sample_ids[[i]] <- sub_sample_ids } return(unlist(sample_ids)) @@ -2591,9 +2305,9 @@ exprCellCellcom <- function(gobject, -#' @title specificCellCellcommunicationScores -#' @name specificCellCellcommunicationScores -#' @description Specific Cell-Cell communication scores based on spatial +#' @title Spatial cell cell communication scoring +#' @name spatCellCellcom +#' @description Spatial Cell-Cell communication scores based on spatial #' expression of interacting cells #' @param gobject giotto object to use #' @param feat_type feature type @@ -2602,10 +2316,8 @@ exprCellCellcom <- function(gobject, #' interacting cells #' @param cluster_column cluster column with cell type information #' @param random_iter number of iterations -#' @param cell_type_1 first cell type -#' @param cell_type_2 second cell type -#' @param feat_set_1 first specific gene set from gene pairs -#' @param feat_set_2 second specific gene set from gene pairs +#' @param feat_set_1 first specific feature set from feature pairs +#' @param feat_set_2 second specific feature set from feature pairs #' @param gene_set_1 deprecated, use feat_set_1 #' @param gene_set_2 deprecated, use feat_set_2 #' @param log2FC_addendum addendum to add when calculating log2FC @@ -2615,50 +2327,73 @@ exprCellCellcom <- function(gobject, #' (random variance and z-score) #' @param adjust_method which method to adjust p-values #' @param adjust_target adjust multiple hypotheses at the cell or feature level +#' @param do_parallel run calculations in parallel with mclapply +#' @param cores number of cores to use if do_parallel = TRUE #' @param set_seed set a seed for reproducibility #' @param seed_number seed number #' @param verbose verbose -#' @returns Cell-Cell communication scores for feature pairs based on spatial +#' @returns Cell-Cell communication scores for feature pairs based on spatial #' interaction -#' @details Statistical framework to identify if pairs of features +#' @details Statistical framework to identify if pairs of genes #' (such as ligand-receptor combinations) #' are expressed at higher levels than expected based on a reshuffled null #' distribution of feature expression values in cells that are spatially in #' proximity to each other. -#' \itemize{ -#' \item{LR_comb:}{Pair of ligand and receptor} -#' \item{lig_cell_type:}{ cell type to assess expression level of ligand } -#' \item{lig_expr:}{ average expression of ligand in lig_cell_type } -#' \item{ligand:}{ ligand name } -#' \item{rec_cell_type:}{ cell type to assess expression level of receptor } -#' \item{rec_expr:}{ average expression of receptor in rec_cell_type} -#' \item{receptor:}{ receptor name } -#' \item{LR_expr:}{ combined average ligand and receptor expression } -#' \item{lig_nr:}{ total number of cells from lig_cell_type that spatially interact with cells from rec_cell_type } -#' \item{rec_nr:}{ total number of cells from rec_cell_type that spatially interact with cells from lig_cell_type } -#' \item{rand_expr:}{ average combined ligand and receptor expression from random spatial permutations } -#' \item{av_diff:}{ average difference between LR_expr and rand_expr over all random spatial permutations } -#' \item{sd_diff:}{ (optional) standard deviation of the difference between LR_expr and rand_expr over all random spatial permutations } -#' \item{z_score:}{ (optinal) z-score } -#' \item{log2fc:}{ log2 fold-change (LR_expr/rand_expr) } -#' \item{pvalue:}{ p-value } -#' \item{LR_cell_comb:}{ cell type pair combination } -#' \item{p.adj:}{ adjusted p-value } -#' \item{PI:}{ significanc score: log2fc * -log10(p.adj) } -#' } +#' * **LR_comb:** Pair of ligand and receptor +#' * **lig_cell_type:** cell type to assess expression level of ligand +#' * **lig_expr:** average expression of ligand in lig_cell_type +#' * **ligand:** ligand name +#' * **rec_cell_type:** cell type to assess expression level of receptor +#' * **rec_expr:** average expression of receptor in rec_cell_type +#' * **receptor:** receptor name +#' * **LR_expr:** combined average ligand and receptor expression +#' * **lig_nr:** total number of cells from lig_cell_type that spatially +#' interact with cells from rec_cell_type +#' * **rec_nr:** total number of cells from rec_cell_type that spatially +#' interact with cells from lig_cell_type +#' * **rand_expr:** average combined ligand and receptor expression from +#' random spatial permutations +#' * **av_diff:** average difference between LR_expr and rand_expr over all +#' random spatial permutations +#' * **sd_diff:** (optional) standard deviation of the difference between +#' LR_expr and rand_expr over all random spatial permutations +#' * **z_score:** (optional) z-score +#' * **log2fc:** log2 fold-change (LR_expr/rand_expr) +#' * **pvalue:** p-value +#' * **LR_cell_comb:** cell type pair combination +#' * **p.adj:** adjusted p-value +#' * **PI:** significance score: \eqn{log2fc * -log10(p.adj)} +#' +#' @md #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' -#' specificCellCellcommunicationScores(g, cluster_column = "leiden_clus") +#' +#' res1 <- spatCellCellcom( +#' gobject = g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", +#' feat_set_2 = "9630013A20Rik", +#' verbose = "a lot", +#' random_iter = 10 +#' ) +#' force(res1) +#' +#' res2 <- specificCellCellcommunicationScores(g, +#' cluster_column = "leiden_clus", +#' cell_type_1 = 1, +#' cell_type_2 = 2, +#' feat_set_1 = "Gm19935", +#' feat_set_2 = "9630013A20Rik" +#' ) +#' +#' force(res2) #' @export -specificCellCellcommunicationScores <- function(gobject, +spatCellCellcom <- function(gobject, feat_type = NULL, spat_unit = NULL, spatial_network_name = "Delaunay_network", - cluster_column = "cell_types", - random_iter = 100, - cell_type_1 = "astrocyte", - cell_type_2 = "endothelial", + cluster_column = NULL, + random_iter = 1000, feat_set_1, feat_set_2, gene_set_1 = NULL, @@ -2671,9 +2406,13 @@ specificCellCellcommunicationScores <- function(gobject, "BY", "none" ), adjust_target = c("feats", "cells"), - set_seed = FALSE, + do_parallel = TRUE, + cores = NA, + set_seed = TRUE, seed_number = 1234, - verbose = TRUE) { + verbose = c("a little", "a lot", "none")) { + verbose <- match.arg(verbose, choices = c("a little", "a lot", "none")) + # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2685,6 +2424,19 @@ specificCellCellcommunicationScores <- function(gobject, feat_type = feat_type ) + ## check if spatial network exists ## + spat_networks <- list_spatial_networks_names(gobject, + spat_unit = spat_unit + ) + + if (!spatial_network_name %in% spat_networks) { + stop( + spatial_network_name, " is not an existing spatial network \n", + "use showNetworks() to see the available networks \n", + "or create a new spatial network with createSpatialNetwork()" + ) + } + ## deprecated arguments if (!is.null(gene_set_1)) { feat_set_1 <- gene_set_1 @@ -2695,32 +2447,198 @@ specificCellCellcommunicationScores <- function(gobject, warning("gene_set_2 is deprecated, use feat_set_2 in the future") } + if (is.null(cluster_column)) { + stop("Name of column in cell metadata with cell type info is needed") + } - # data.table variables - from_to <- cell_ID <- lig_cell_type <- rec_cell_type <- lig_nr <- - rec_nr <- rand_expr <- NULL - av_diff <- log2fc <- LR_expr <- pvalue <- LR_cell_comb <- p.adj <- - LR_comb <- PI <- NULL - sd_diff <- z_score <- NULL - - # get parameters - adjust_method <- match.arg(adjust_method, choices = c( - "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", - "BY", "none" - )) - adjust_target <- match.arg(adjust_target, choices = c("feats", "cells")) - # metadata - cell_metadata <- pDataDT( - gobject = gobject, + cell_metadata <- pDataDT(gobject, feat_type = feat_type, spat_unit = spat_unit ) - # get annotated spatial network - annot_network <- annotateSpatialNetwork(gobject, - feat_type = feat_type, - spat_unit = spat_unit, + ## get all combinations between cell types + all_uniq_values <- unique(cell_metadata[[cluster_column]]) + same_DT <- data.table::data.table( + V1 = all_uniq_values, V2 = all_uniq_values) + combn_DT <- data.table::as.data.table(t(combn(all_uniq_values, m = 2))) + combn_DT <- rbind(same_DT, combn_DT) + + ## parallel option ## + if (isTRUE(do_parallel)) { + savelist <- lapply_flex( + X = seq_len(nrow(combn_DT)), future.seed = TRUE, + cores = cores, fun = function(row) { + cell_type_1 <- combn_DT[row][["V1"]] + cell_type_2 <- combn_DT[row][["V2"]] + + specific_scores <- specificCellCellcommunicationScores( + gobject = gobject, + feat_type = feat_type, + spat_unit = spat_unit, + cluster_column = cluster_column, + random_iter = random_iter, + cell_type_1 = cell_type_1, + cell_type_2 = cell_type_2, + feat_set_1 = feat_set_1, + feat_set_2 = feat_set_2, + spatial_network_name = spatial_network_name, + log2FC_addendum = log2FC_addendum, + min_observations = min_observations, + detailed = detailed, + adjust_method = adjust_method, + adjust_target = adjust_target, + set_seed = set_seed, + seed_number = seed_number, + verbose = verbose %in% c("a lot") + ) + }) + } else { + ## for loop over all combinations ## + savelist <- list() + countdown <- nrow(combn_DT) + + for (row in seq_len(nrow(combn_DT))) { + cell_type_1 <- combn_DT[row][["V1"]] + cell_type_2 <- combn_DT[row][["V2"]] + + if (verbose == "a little" || verbose == "a lot") + cat(sprintf("[PROCESS nr %d : %d and %d] ", + countdown, cell_type_1, cell_type_2)) + + if (verbose %in% c("a little", "none")) { + specific_verbose <- FALSE + } else { + specific_verbose <- TRUE + } + + specific_scores <- specificCellCellcommunicationScores( + gobject = gobject, + feat_type = feat_type, + spat_unit = spat_unit, + cluster_column = cluster_column, + random_iter = random_iter, + cell_type_1 = cell_type_1, + cell_type_2 = cell_type_2, + feat_set_1 = feat_set_1, + feat_set_2 = feat_set_2, + spatial_network_name = spatial_network_name, + log2FC_addendum = log2FC_addendum, + min_observations = min_observations, + detailed = detailed, + adjust_method = adjust_method, + adjust_target = adjust_target, + set_seed = set_seed, + seed_number = seed_number, + verbose = specific_verbose + ) + savelist[[row]] <- specific_scores + countdown <- countdown - 1 + } + } + + finalDT <- do.call("rbind", savelist) + + # data.table variables + LR_comb <- LR_expr <- NULL + + data.table::setorder(finalDT, LR_comb, -LR_expr) + + return(finalDT) +} + + + + + +#' @rdname spatCellCellcom +#' @param cell_type_1 character. First cell type +#' @param cell_type_2 character. Second cell type +#' @export +specificCellCellcommunicationScores <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + spatial_network_name = "Delaunay_network", + cluster_column = NULL, + random_iter = 100, + cell_type_1 = NULL, + cell_type_2 = NULL, + feat_set_1, + feat_set_2, + gene_set_1 = NULL, + gene_set_2 = NULL, + log2FC_addendum = 0.1, + min_observations = 2, + detailed = FALSE, + adjust_method = c( + "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", + "BY", "none" + ), + adjust_target = c("feats", "cells"), + set_seed = FALSE, + seed_number = 1234, + verbose = TRUE +) { + + # Set feat_type and spat_unit + spat_unit <- set_default_spat_unit( + gobject = gobject, + spat_unit = spat_unit + ) + feat_type <- set_default_feat_type( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + + ## deprecated arguments + if (!is.null(gene_set_1)) { + feat_set_1 <- gene_set_1 + warning("gene_set_1 is deprecated, use feat_set_1 in the future") + } + if (!is.null(gene_set_2)) { + feat_set_2 <- gene_set_2 + warning("gene_set_2 is deprecated, use feat_set_2 in the future") + } + + if (is.null(cluster_column)) { + stop("Name of column in cell metadata with cell type info is needed") + } + + if (is.null(cell_type_1) || is.null(cell_type_2)) { + stop(sprintf( + "`%s` and `%s` in `%s` must be given", + "cell_type_1", "cell_type_2", "cluster_column") + ) + } + + + # data.table variables + from_to <- cell_ID <- lig_cell_type <- rec_cell_type <- lig_nr <- + rec_nr <- rand_expr <- NULL + av_diff <- log2fc <- LR_expr <- pvalue <- LR_cell_comb <- p.adj <- + LR_comb <- PI <- NULL + sd_diff <- z_score <- NULL + + # get parameters + adjust_method <- match.arg(adjust_method, choices = c( + "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", + "BY", "none" + )) + adjust_target <- match.arg(adjust_target, choices = c("feats", "cells")) + + # metadata + cell_metadata <- pDataDT( + gobject = gobject, + feat_type = feat_type, + spat_unit = spat_unit + ) + + # get annotated spatial network + annot_network <- annotateSpatialNetwork(gobject, + feat_type = feat_type, + spat_unit = spat_unit, spatial_network_name = spatial_network_name, cluster_column = cluster_column ) @@ -2747,8 +2665,8 @@ specificCellCellcommunicationScores <- function(gobject, # get information about number of cells temp_meta <- pDataDT(subsetGiotto, - feat_type = feat_type, - spat_unit = spat_unit + feat_type = feat_type, + spat_unit = spat_unit ) nr_cell_types <- temp_meta[cell_ID %in% subset_ids][ , .N, by = c(cluster_column)] @@ -2756,7 +2674,7 @@ specificCellCellcommunicationScores <- function(gobject, names(nr_cells) <- nr_cell_types$cell_types # get average communication scores - comScore <- average_feat_feat_expression_in_groups( + comScore <- .average_feat_feat_expression_in_groups( gobject = subsetGiotto, feat_type = feat_type, spat_unit = spat_unit, @@ -2765,8 +2683,8 @@ specificCellCellcommunicationScores <- function(gobject, feat_set_2 = feat_set_2 ) comScore <- comScore[(lig_cell_type == cell_type_1 & - rec_cell_type == cell_type_2) | - (lig_cell_type == cell_type_2 & rec_cell_type == cell_type_1)] + rec_cell_type == cell_type_2) | + (lig_cell_type == cell_type_2 & rec_cell_type == cell_type_1)] comScore[, lig_nr := nr_cells[lig_cell_type]] comScore[, rec_nr := nr_cells[rec_cell_type]] @@ -2786,11 +2704,13 @@ specificCellCellcommunicationScores <- function(gobject, subset_metadata <- cell_metadata[cell_ID %in% subset_ids] needed_cell_types <- subset_metadata[[cluster_column]] - + if (verbose) cat("simulations:") ## simulations ## - for (sim in 1:random_iter) { - if (verbose == TRUE) cat("simulation ", sim) + for (sim in seq_len(random_iter)) { + if (verbose) { + cat(sprintf(" %s ", sim)) + } # get random ids and subset if (set_seed == TRUE) { @@ -2814,7 +2734,7 @@ specificCellCellcommunicationScores <- function(gobject, ) # get random communication scores - randomScore <- average_feat_feat_expression_in_groups( + randomScore <- .average_feat_feat_expression_in_groups( gobject = tempGiotto, feat_type = feat_type, spat_unit = spat_unit, @@ -2824,7 +2744,7 @@ specificCellCellcommunicationScores <- function(gobject, ) randomScore <- randomScore[(lig_cell_type == cell_type_1 & rec_cell_type == cell_type_2) | - (lig_cell_type == cell_type_2 & rec_cell_type == cell_type_1)] + (lig_cell_type == cell_type_2 & rec_cell_type == cell_type_1)] @@ -2879,267 +2799,63 @@ specificCellCellcommunicationScores <- function(gobject, # get minimum adjusted p.value that is not zero all_p.adj <- comScore[["p.adj"]] - lowest_p.adj <- min(all_p.adj[all_p.adj != 0]) + nonzero_p.adj <- all_p.adj[all_p.adj != 0] + if (length(nonzero_p.adj) == 0L) { + warning( + call. = FALSE, + "no adjusted p.values that are not zero; returning Inf" + ) + if (verbose) cat("<- Inf returned") + lowest_p.adj <- Inf + } else { + lowest_p.adj <- min(nonzero_p.adj) + } + comScore[, PI := ifelse(p.adj == 0, log2fc * ( -log10(lowest_p.adj)), log2fc * (-log10(p.adj)))] + if (verbose) cat("\n") + return(comScore) } } -#' @title spatCellCellcom -#' @name spatCellCellcom -#' @description Spatial Cell-Cell communication scores based on spatial -#' expression of interacting cells -#' @param gobject giotto object to use -#' @param feat_type feature type -#' @param spat_unit spatial unit -#' @param spatial_network_name spatial network to use for identifying -#' interacting cells -#' @param cluster_column cluster column with cell type information -#' @param random_iter number of iterations -#' @param feat_set_1 first specific feature set from feature pairs -#' @param feat_set_2 second specific feature set from feature pairs -#' @param gene_set_1 deprecated, use feat_set_1 -#' @param gene_set_2 deprecated, use feat_set_2 -#' @param log2FC_addendum addendum to add when calculating log2FC -#' @param min_observations minimum number of interactions needed to be -#' considered -#' @param detailed provide more detailed information -#' (random variance and z-score) -#' @param adjust_method which method to adjust p-values -#' @param adjust_target adjust multiple hypotheses at the cell or feature level -#' @param do_parallel run calculations in parallel with mclapply -#' @param cores number of cores to use if do_parallel = TRUE -#' @param set_seed set a seed for reproducibility -#' @param seed_number seed number -#' @param verbose verbose -#' @returns Cell-Cell communication scores for feature pairs based on spatial -#' interaction -#' @details Statistical framework to identify if pairs of genes -#' (such as ligand-receptor combinations) -#' are expressed at higher levels than expected based on a reshuffled null -#' distribution of feature expression values in cells that are spatially in -#' proximity to each other. -#' * **LR_comb:** Pair of ligand and receptor -#' * **lig_cell_type:** cell type to assess expression level of ligand -#' * **lig_expr:** average expression of ligand in lig_cell_type -#' * **ligand:** ligand name -#' * **rec_cell_type:** cell type to assess expression level of receptor -#' * **rec_expr:** average expression of receptor in rec_cell_type -#' * **receptor:** receptor name -#' * **LR_expr:** combined average ligand and receptor expression -#' * **lig_nr:** total number of cells from lig_cell_type that spatially -#' interact with cells from rec_cell_type -#' * **rec_nr:** total number of cells from rec_cell_type that spatially -#' interact with cells from lig_cell_type -#' * **rand_expr:** average combined ligand and receptor expression from -#' random spatial permutations -#' * **av_diff:** average difference between LR_expr and rand_expr over all -#' random spatial permutations -#' * **sd_diff:** (optional) standard deviation of the difference between -#' LR_expr and rand_expr over all random spatial permutations -#' * **z_score:** (optional) z-score -#' * **log2fc:** log2 fold-change (LR_expr/rand_expr) -#' * **pvalue:** p-value -#' * **LR_cell_comb:** cell type pair combination -#' * **p.adj:** adjusted p-value -#' * **PI:** significance score: log2fc * -log10(p.adj) -#' @md + + +#' @title Combine cell cell communication tables +#' @name combCCcom +#' @description Combine spatial and expression based cell-cell communication +#' data.tables +#' @param spatialCC spatial cell-cell communication scores +#' @param exprCC expression cell-cell communication scores +#' @param min_lig_nr minimum number of ligand cells +#' @param min_rec_nr minimum number of receptor cells +#' @param min_padj_value minimum adjusted p-value +#' @param min_log2fc minimum log2 fold-change +#' @param min_av_diff minimum average expression difference +#' @param detailed detailed option used with \code{\link{spatCellCellcom}} +#' (default = FALSE) +#' @returns combined data.table with spatial and expression communication data #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' spatCellCellcom( -#' gobject = g, +#' exprCC <- exprCellCellcom(g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", +#' feat_set_2 = "9630013A20Rik" +#' ) +#' +#' spatialCC <- spatCellCellcom(gobject = g, #' cluster_column = "leiden_clus", #' feat_set_1 = "Gm19935", #' feat_set_2 = "9630013A20Rik", #' verbose = "a lot", #' random_iter = 10 #' ) -#' @export -spatCellCellcom <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - spatial_network_name = "Delaunay_network", - cluster_column = "cell_types", - random_iter = 1000, - feat_set_1, - feat_set_2, - gene_set_1 = NULL, - gene_set_2 = NULL, - log2FC_addendum = 0.1, - min_observations = 2, - detailed = FALSE, - adjust_method = c( - "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", - "BY", "none" - ), - adjust_target = c("feats", "cells"), - do_parallel = TRUE, - cores = NA, - set_seed = TRUE, - seed_number = 1234, - verbose = c("a little", "a lot", "none")) { - verbose <- match.arg(verbose, choices = c("a little", "a lot", "none")) - - # Set feat_type and spat_unit - spat_unit <- set_default_spat_unit( - gobject = gobject, - spat_unit = spat_unit - ) - feat_type <- set_default_feat_type( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type - ) - - ## check if spatial network exists ## - spat_networks <- list_spatial_networks_names(gobject, - spat_unit = spat_unit - ) - - if (!spatial_network_name %in% spat_networks) { - stop( - spatial_network_name, " is not an existing spatial network \n", - "use showNetworks() to see the available networks \n", - "or create a new spatial network with createSpatialNetwork()" - ) - } - - ## deprecated arguments - if (!is.null(gene_set_1)) { - feat_set_1 <- gene_set_1 - warning("gene_set_1 is deprecated, use feat_set_1 in the future") - } - if (!is.null(gene_set_2)) { - feat_set_2 <- gene_set_2 - warning("gene_set_2 is deprecated, use feat_set_2 in the future") - } - - - cell_metadata <- pDataDT(gobject, - feat_type = feat_type, - spat_unit = spat_unit - ) - - ## get all combinations between cell types - all_uniq_values <- unique(cell_metadata[[cluster_column]]) - same_DT <- data.table::data.table( - V1 = all_uniq_values, V2 = all_uniq_values) - combn_DT <- data.table::as.data.table(t(combn(all_uniq_values, m = 2))) - combn_DT <- rbind(same_DT, combn_DT) - - ## parallel option ## - if (do_parallel == TRUE) { - savelist <- lapply_flex( - X = 1:nrow(combn_DT), future.seed = TRUE, - cores = cores, fun = function(row) { - cell_type_1 <- combn_DT[row][["V1"]] - cell_type_2 <- combn_DT[row][["V2"]] - - specific_scores <- specificCellCellcommunicationScores( - gobject = gobject, - feat_type = feat_type, - spat_unit = spat_unit, - cluster_column = cluster_column, - random_iter = random_iter, - cell_type_1 = cell_type_1, - cell_type_2 = cell_type_2, - feat_set_1 = feat_set_1, - feat_set_2 = feat_set_2, - spatial_network_name = spatial_network_name, - log2FC_addendum = log2FC_addendum, - min_observations = min_observations, - detailed = detailed, - adjust_method = adjust_method, - adjust_target = adjust_target, - set_seed = set_seed, - seed_number = seed_number - ) - }) - } else { - ## for loop over all combinations ## - savelist <- list() - countdown <- nrow(combn_DT) - - for (row in 1:nrow(combn_DT)) { - cell_type_1 <- combn_DT[row][["V1"]] - cell_type_2 <- combn_DT[row][["V2"]] - - if (verbose == "a little" | verbose == "a lot") - cat("PROCESS nr ", countdown, ": ", cell_type_1, " and ", - cell_type_2) - - if (verbose %in% c("a little", "none")) { - specific_verbose <- FALSE - } else { - specific_verbose <- TRUE - } - - specific_scores <- specificCellCellcommunicationScores( - gobject = gobject, - feat_type = feat_type, - spat_unit = spat_unit, - cluster_column = cluster_column, - random_iter = random_iter, - cell_type_1 = cell_type_1, - cell_type_2 = cell_type_2, - feat_set_1 = feat_set_1, - feat_set_2 = feat_set_2, - spatial_network_name = spatial_network_name, - log2FC_addendum = log2FC_addendum, - min_observations = min_observations, - detailed = detailed, - adjust_method = adjust_method, - adjust_target = adjust_target, - set_seed = set_seed, - seed_number = seed_number, - verbose = specific_verbose - ) - savelist[[row]] <- specific_scores - countdown <- countdown - 1 - } - } - - finalDT <- do.call("rbind", savelist) - - # data.table variables - LR_comb <- LR_expr <- NULL - - data.table::setorder(finalDT, LR_comb, -LR_expr) - - return(finalDT) -} - - - - -#' @title combCCcom -#' @name combCCcom -#' @description Combine spatial and expression based cell-cell communication -#' data.tables -#' @param spatialCC spatial cell-cell communication scores -#' @param exprCC expression cell-cell communication scores -#' @param min_lig_nr minimum number of ligand cells -#' @param min_rec_nr minimum number of receptor cells -#' @param min_padj_value minimum adjusted p-value -#' @param min_log2fc minimum log2 fold-change -#' @param min_av_diff minimum average expression difference -#' @param detailed detailed option used with \code{\link{spatCellCellcom}} -#' (default = FALSE) -#' @returns combined data.table with spatial and expression communication data -#' @examples -#' g <- GiottoData::loadGiottoMini("visium") -#' -#' exprCC <- exprCellCellcom(g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik") -#' spatialCC <- spatCellCellcom(gobject = g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot", -#' random_iter = 10) -#' -#' combCCcom(spatialCC = spatialCC, exprCC = exprCC) +#' +#' combCC <- combCCcom(spatialCC = spatialCC, exprCC = exprCC) +#' force(combCC) #' @export combCCcom <- function(spatialCC, exprCC, @@ -3201,3 +2917,190 @@ combCCcom <- function(spatialCC, return(merge_DT) } + + + + + + + +# DEPRECATED #### + +#' @title deprecated +#' @name findInteractionChangedGenes +#' @description Identifies cell-to-cell Interaction Changed Features (ICF), +#' i.e. genes that are differentially expressed due to interactions with other +#' cell types. +#' @param ... params to pass to \code{findInteractionChangedFeats} +#' @seealso \code{\link{findInteractionChangedFeats}} +#' @returns interaction changed genes +#' @export +findInteractionChangedGenes <- function(...) { + .Deprecated(new = "findInteractionChangedFeats") + + findInteractionChangedFeats(...) +} + + + +#' @title deprecated +#' @name findCellProximityGenes +#' @description Identifies cell-to-cell Interaction Changed Features (ICF), +#' i.e. genes that are differentially expressed due to proximity to other cell +#' types. +#' @inheritDotParams findInteractionChangedFeats +#' @seealso \code{\link{findInteractionChangedFeats}} +#' @returns cell-cell interaction changed genes +#' @export +findCellProximityGenes <- function(...) { + .Deprecated(new = "findInteractionChangedFeats") + + findInteractionChangedFeats(...) +} + + + +#' @title deprecated +#' @name findICG +#' @description Identifies cell-to-cell Interaction Changed Features (ICF), +#' i.e. genes that are differentially expressed due to interaction with other +#' cell types. +#' @inheritDotParams findICF +#' @seealso \code{\link{findICF}} +#' @returns cell-cell interaction changed features +#' @export +findICG <- function(...) { + .Deprecated(new = "findICF") + + findICF(...) +} + + + +#' @title deprecated +#' @name findCPG +#' @description Identifies cell-to-cell Interaction Changed Features (ICF), +#' i.e. genes that are differentially expressed due to proximity to other cell +#' types. +#' @inheritDotParams findICF +#' @returns cell-to-cell Interaction Changed Genes +#' @seealso \code{\link{findICF}} +#' @export +findCPG <- function(...) { + .Deprecated(new = "findICF") + + findICF(...) +} + +#' @title deprecated +#' @name filterInteractionChangedGenes +#' @description Filter Interaction Changed Feature scores. +#' @inheritDotParams filterInteractionChangedFeats +#' @seealso \code{\link{filterInteractionChangedFeats}} +#' @returns filtered interaction changed feature scores +#' @export +filterInteractionChangedGenes <- function(...) { + .Deprecated(new = "filterInteractionChangedFeats") + + filterInteractionChangedFeats(...) +} + + +#' @title deprecated +#' @name filterCellProximityGenes +#' @description Filter Interaction Changed Feature scores. +#' @inheritDotParams filterInteractionChangedFeats +#' @seealso \code{\link{filterInteractionChangedFeats}} +#' @returns proximity genes +#' @export +filterCellProximityGenes <- function(...) { + .Deprecated(new = "filterInteractionChangedFeats") + + filterInteractionChangedFeats(...) +} + + + + + +#' @title deprecated +#' @name filterICG +#' @description Filter Interaction Changed Gene scores. +#' @inheritDotParams filterICF +#' @seealso \code{\link{filterICF}} +#' @returns filtered interaction changed gene scores +#' @export +filterICG <- function(...) { + .Deprecated(new = "filterICF") + + filterICF(...) +} + + + +#' @title deprecated +#' @name filterCPG +#' @description Filter Interaction Changed Gene scores. +#' @inheritDotParams filterICF +#' @seealso \code{\link{filterICF}} +#' @returns filtered interaction changed gene scores +#' @export +filterCPG <- function(...) { + .Deprecated(new = "filterICF") + + filterICF(...) +} + + +#' @title deprecated +#' @name combineInteractionChangedGenes +#' @description Combine ICF scores in a pairwise manner. +#' @inheritDotParams combineInteractionChangedFeats +#' @returns ICF scores +#' @seealso \code{\link{combineInteractionChangedFeats}} +#' @export +combineInteractionChangedGenes <- function(...) { + .Deprecated(new = "combineInteractionChangedFeats") + + combineInteractionChangedFeats(...) +} + + +#' @title deprecated +#' @name combineCellProximityGenes +#' @description Combine ICF scores in a pairwise manner. +#' @inheritDotParams combineInteractionChangedFeats +#' @returns ICF scores +#' @seealso \code{\link{combineInteractionChangedFeats}} +#' @export +combineCellProximityGenes <- function(...) { + .Deprecated(new = "combineInteractionChangedFeats") + + combineInteractionChangedFeats(...) +} + +#' @title deprecated +#' @name combineICG +#' @description Combine ICF scores in a pairwise manner. +#' @inheritDotParams combineICF +#' @returns ICF scores +#' @seealso \code{\link{combineICF}} +#' @export +combineICG <- function(...) { + .Deprecated(new = "combineICF") + + combineICF(...) +} + +#' @title deprecated +#' @name combineCPG +#' @description Combine ICF scores in a pairwise manner. +#' @inheritDotParams combineICF +#' @returns ICF scores +#' @seealso \code{\link{combineICF}} +#' @export +combineCPG <- function(...) { + .Deprecated(new = "combineICF") + + combineICF(...) +} diff --git a/R/spatial_interaction_spot.R b/R/spatial_interaction_spot.R index 8360f8da0..ab8ed82d4 100644 --- a/R/spatial_interaction_spot.R +++ b/R/spatial_interaction_spot.R @@ -8,7 +8,7 @@ #' @name cell_proximity_spots #' @description Compute cell-cell interactions observed value for internal and #' external spots -#' @param dwls_values data.table of cell type enrichment in each spot and +#' @param dwls_values data.table of cell type enrichment in each spot and #' multiply by cell number in each spot #' @returns List of cell proximity observed value in data.table format. Columns: #' unified_int, type_int, V1, external, internal. @@ -16,12 +16,13 @@ NULL -#' @describeIn cell_proximity_spots Compute cell-cell interactions observed +#' @describeIn cell_proximity_spots Compute cell-cell interactions observed #' value inner each spot #' @param cell_IDs cell_IDs #' @keywords internal -.cell_proximity_spots_internal <- function(cell_IDs, - dwls_values) { +.cell_proximity_spots_internal <- function( + cell_IDs, + dwls_values) { # data.table variables value <- unified_int <- Var1 <- Var2 <- internal <- NULL @@ -41,7 +42,8 @@ NULL unified_int_same <- names(same_ct) unified_int_same <- paste0(unified_int_same, "--", unified_int_same) same_ct <- data.table::data.table( - "unified_int" = unified_int_same, "internal" = same_ct) + "unified_int" = unified_int_same, "internal" = same_ct + ) } # calculate proximity of different cell type (A==B) @@ -55,13 +57,17 @@ NULL diff_ct <- data.table::as.data.table(reshape2::melt(diff_ct)) diff_ct <- diff_ct[value != "NA"] diff_ct[, c("Var1", "Var2") := lapply( - .SD, as.character), .SDcols = c("Var1", "Var2")] + .SD, as.character + ), .SDcols = c("Var1", "Var2")] diff_ct[, unified_int := ifelse( - Var1 < Var2, paste0(Var1, "--", Var2), - paste0(Var2, "--", Var1))] + Var1 < Var2, paste0(Var1, "--", Var2), + paste0(Var2, "--", Var1) + )] diff_ct <- diff_ct[, c("unified_int", "value")] data.table::setnames( - diff_ct, old = c("value"), new = c("internal")) + diff_ct, + old = c("value"), new = c("internal") + ) } # merge spot proximity to proximity data.table @@ -76,25 +82,27 @@ NULL } -#' @describeIn cell_proximity_spots Compute cell-cell interactions observed +#' @describeIn cell_proximity_spots Compute cell-cell interactions observed #' value for interacted spots #' @param pairs data.table of paired spots. Format: cell_ID1, cell_ID2, N #' @keywords internal -.cell_proximity_spots_external <- function( - pairs, - dwls_values) { +.cell_proximity_spots_external <- function(pairs, + dwls_values) { cell_IDs <- unique(c(pairs$from, pairs$to)) pairs <- pairs[, .N, by = c("from", "to")] # add internal pairs to make full matrix pairs_spots <- data.table::data.table(from = cell_IDs, to = cell_IDs, N = 0) pairs_balance <- data.table::data.table( - from = pairs$to, to = pairs$from, N = pairs$N) + from = pairs$to, to = pairs$from, N = pairs$N + ) pairs_for_mat <- rbind(pairs_spots, pairs, pairs_balance) pairs_for_mat <- pairs_for_mat[, .N, by = c("from", "to")] # make square matrix of interaction between spots pairs_mat <- reshape2::acast( - pairs_for_mat, from ~ to, value.var = "N", fill = 0) + pairs_for_mat, from ~ to, + value.var = "N", fill = 0 + ) pairs_mat <- pairs_mat[cell_IDs, cell_IDs] # calculate cell-type/cell-type interactions @@ -133,9 +141,10 @@ NULL #' @param pairs_external data.table of paired spots. Format: cell_ID1, cell_ID2, #' N. Passes to `.cell_proximity_spots_external` `pairs` param #' @keywords internal -.cell_proximity_spots <- function(cell_IDs, - pairs_external, - dwls_values) { +.cell_proximity_spots <- function( + cell_IDs, + pairs_external, + dwls_values) { # data.table variables V1 <- internal <- external <- s1 <- s2 <- unified_int <- type_int <- NULL @@ -156,7 +165,9 @@ NULL if (length(cell_IDs) > 0) { proximity_dt <- merge( - proximity_ex, proximity_in, by = "unified_int", all = TRUE) + proximity_ex, proximity_in, + by = "unified_int", all = TRUE + ) } else { proximity_dt <- proximity_ex[, "internal" := 0] } @@ -164,19 +175,22 @@ NULL proximity_dt[, V1 := internal + external] proximity_dt[, s1 := strsplit(as.character( - unified_int), split = "--")[[1]][1], by = 1:nrow(proximity_dt)] + unified_int + ), split = "--")[[1]][1], by = seq_len(nrow(proximity_dt))] proximity_dt[, s2 := strsplit(as.character( - unified_int), split = "--")[[1]][2], by = 1:nrow(proximity_dt)] + unified_int + ), split = "--")[[1]][2], by = seq_len(nrow(proximity_dt))] proximity_dt[, type_int := ifelse(s1 == s2, "homo", "hetero")] proximity_dt <- proximity_dt[ - , c("unified_int", "type_int", "V1", "external", "internal")] + , c("unified_int", "type_int", "V1", "external", "internal") + ] return(proximity_dt) } #' @title cellProximityEnrichmentSpots #' @name cellProximityEnrichmentSpots -#' @description Compute cell-cell interaction enrichment for spots +#' @description Compute cell-cell interaction enrichment for spots #' (observed vs expected) #' #' @param gobject giotto object @@ -185,55 +199,58 @@ NULL #' @param spatial_network_name name of spatial network to use #' @param cluster_column name of column to use for clusters #' @param cells_in_spot cell number in each spot -#' @param number_of_simulations number of simulations to create expected +#' @param number_of_simulations number of simulations to create expected #' observations -#' @param adjust_method method to adjust p.values +#' @param adjust_method method to adjust p.values #' (e.g. "none", "fdr", "bonferroni","BH","holm", "hochberg", "hommel","BY") #' @param set_seed use of seed. Default = TRUE #' @param seed_number seed number to use. Default = 1234 #' @param verbose be verbose #' -#' @returns List of cell Proximity scores (CPscores) in data.table format. +#' @returns List of cell Proximity scores (CPscores) in data.table format. #' The first -#' data.table (raw_sim_table) shows the raw observations of both the original -#' and simulated networks. The second data.table (enrichm_res) shows the +#' data.table (raw_sim_table) shows the raw observations of both the original +#' and simulated networks. The second data.table (enrichm_res) shows the #' enrichment results. -#' @details Spatial proximity enrichment or depletion between pairs of cell +#' @details Spatial proximity enrichment or depletion between pairs of cell #' types is calculated by calculating the observed over the expected frequency -#' of cell-cell proximity interactions. The expected frequency is the average -#' frequency calculated from a number of spatial network simulations. Each -#' individual simulation is obtained by reshuffling the cell type labels of +#' of cell-cell proximity interactions. The expected frequency is the average +#' frequency calculated from a number of spatial network simulations. Each +#' individual simulation is obtained by reshuffling the cell type labels of #' each node (spot) in the spatial network. #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' x <- findMarkers_one_vs_all(g, -#' cluster_column = "leiden_clus", min_feats = 20) +#' x <- findMarkers_one_vs_all(g, +#' cluster_column = "leiden_clus", min_feats = 20 +#' ) #' sign_gene <- x$feats -#' -#' sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -#' nrow = length(sign_gene)) +#' +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), +#' nrow = length(sign_gene) +#' ) #' rownames(sign_matrix) <- sign_gene -#' colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) -#' +#' colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) +#' #' g <- runDWLSDeconv(gobject = g, sign_matrix = sign_matrix) -#' +#' #' cellProximityEnrichmentSpots(gobject = g) #' @export -cellProximityEnrichmentSpots <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - spatial_network_name = "spatial_network", - cluster_column = "cell_ID", - cells_in_spot = 1, - number_of_simulations = 100, - adjust_method = c( - "none", "fdr", "bonferroni", "BH", - "holm", "hochberg", "hommel", - "BY" - ), - set_seed = TRUE, - seed_number = 1234, - verbose = FALSE) { +cellProximityEnrichmentSpots <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + spatial_network_name = "spatial_network", + cluster_column = "cell_ID", + cells_in_spot = 1, + number_of_simulations = 100, + adjust_method = c( + "none", "fdr", "bonferroni", "BH", + "holm", "hochberg", "hommel", + "BY" + ), + set_seed = TRUE, + seed_number = 1234, + verbose = FALSE) { # p.adj test sel_adjust_method <- match.arg(adjust_method, choices = c( "none", "fdr", "bonferroni", "BH", @@ -250,11 +267,12 @@ cellProximityEnrichmentSpots <- function(gobject, ) # data.table variables - orig <- from <- to <- unified_int <- unified_cells <- type_int <- N <- + orig <- from <- to <- unified_int <- unified_cells <- type_int <- N <- V1 <- original <- enrichm <- simulations <- NULL spatial_network_annot <- dt_sort_combine_two_columns( - spatial_network_annot, "to", "from", "unified_cells") + spatial_network_annot, "to", "from", "unified_cells" + ) spatial_network_annot <- spatial_network_annot[!duplicated(unified_cells)] # exact spatial_enrichment matrix @@ -300,7 +318,7 @@ cellProximityEnrichmentSpots <- function(gobject, # method for get simulation cell-type/cell-type interaction for each round data.table::setnames(sample_dt, old = c("s1", "s2"), new = c("from", "to")) table_sim_results <- NULL - for (sim in 1:number_of_simulations) { + for (sim in seq_len(number_of_simulations)) { r <- paste0("sim", sim) sim_pairs <- sample_dt[round == r, c("from", "to")] @@ -325,21 +343,26 @@ cellProximityEnrichmentSpots <- function(gobject, # add missing combinations from original or simulations # probably not needed anymore all_sim_ints <- as.character(unique(table_results[ - orig == "simulations"]$unified_int)) + orig == "simulations" + ]$unified_int)) all_orig_ints <- as.character(unique(table_results[ - orig == "original"]$unified_int)) + orig == "original" + ]$unified_int)) missing_in_orig <- all_sim_ints[!all_sim_ints %in% all_orig_ints] missing_in_sim <- all_orig_ints[!all_orig_ints %in% all_sim_ints] create_missing_for_orig <- table_results[unified_int %in% missing_in_orig] create_missing_for_orig <- unique(create_missing_for_orig[ - , c("orig", "V1") := list("original", 0)]) + , c("orig", "V1") := list("original", 0) + ]) create_missing_for_sim <- table_results[unified_int %in% missing_in_sim] create_missing_for_sim <- unique(create_missing_for_sim[ - , c("orig", "V1") := list("simulations", 0)]) + , c("orig", "V1") := list("simulations", 0) + ]) table_results <- do.call( - "rbind", - list(table_results, create_missing_for_orig, create_missing_for_sim)) + "rbind", + list(table_results, create_missing_for_orig, create_missing_for_sim) + ) ## p-values if (verbose) message("3/5 Calculating p-values") @@ -358,40 +381,46 @@ cellProximityEnrichmentSpots <- function(gobject, length_simulations <- length(sim_values) if (length_simulations != number_of_simulations) { - additional_length_needed <- number_of_simulations - + additional_length_needed <- number_of_simulations - length_simulations sim_values <- c(sim_values, rep(0, additional_length_needed)) } - p_orig_higher <- 1 - (sum((orig_value + 1) > (sim_values + 1)) / - number_of_simulations) - p_orig_lower <- 1 - (sum((orig_value + 1) < (sim_values + 1)) / - number_of_simulations) + p_orig_higher <- 1 - (sum((orig_value + 1) > (sim_values + 1)) / + number_of_simulations) + p_orig_lower <- 1 - (sum((orig_value + 1) < (sim_values + 1)) / + number_of_simulations) combo_list[[int_combo]] <- this_combo p_high[[int_combo]] <- p_orig_higher p_low[[int_combo]] <- p_orig_lower } res_pvalue_DT <- data.table::data.table( - unified_int = as.vector(combo_list), - p_higher_orig = p_high, - p_lower_orig = p_low) + unified_int = as.vector(combo_list), + p_higher_orig = p_high, + p_lower_orig = p_low + ) # depletion or enrichment in barplot format if (verbose) message("4/5 Depletion or enrichment in barplot format") table_mean_results <- table_results[ - , .(mean(V1)), by = c("orig", "unified_int", "type_int")] + , .(mean(V1)), + by = c("orig", "unified_int", "type_int") + ] table_mean_results_dc <- data.table::dcast.data.table( - data = table_mean_results, - formula = type_int + unified_int ~ orig, value.var = "V1") + data = table_mean_results, + formula = type_int + unified_int ~ orig, value.var = "V1" + ) table_mean_results_dc[, original := ifelse(is.na(original), 0, original)] table_mean_results_dc[, enrichm := log2((original + 1) / (simulations + 1))] table_mean_results_dc <- merge( - table_mean_results_dc, res_pvalue_DT, by = "unified_int") + table_mean_results_dc, res_pvalue_DT, + by = "unified_int" + ) data.table::setorder(table_mean_results_dc, enrichm) table_mean_results_dc[, unified_int := factor(unified_int, unified_int)] @@ -400,13 +429,17 @@ cellProximityEnrichmentSpots <- function(gobject, if (verbose) message("5/5 Calculating adjust p-values for mht") # data.table variables - p.adj_higher <- p.adj_lower <- p_lower_orig <- p_higher_orig <- + p.adj_higher <- p.adj_lower <- p_lower_orig <- p_higher_orig <- PI_value <- int_ranking <- NULL table_mean_results_dc[, p.adj_higher := stats::p.adjust( - p_higher_orig, method = sel_adjust_method)] + p_higher_orig, + method = sel_adjust_method + )] table_mean_results_dc[, p.adj_lower := stats::p.adjust( - p_lower_orig, method = sel_adjust_method)] + p_lower_orig, + method = sel_adjust_method + )] table_mean_results_dc[, PI_value := ifelse(p.adj_higher <= p.adj_lower, @@ -417,10 +450,11 @@ cellProximityEnrichmentSpots <- function(gobject, # order table_mean_results_dc <- table_mean_results_dc[order(-PI_value)] - table_mean_results_dc[, int_ranking := 1:.N] + table_mean_results_dc[, int_ranking := seq_len(.N)] return(list( - raw_sim_table = table_results, enrichm_res = table_mean_results_dc)) + raw_sim_table = table_results, enrichm_res = table_mean_results_dc + )) } @@ -432,7 +466,7 @@ cellProximityEnrichmentSpots <- function(gobject, #' @title featExpDWLS #' @name featExpDWLS -#' @description Compute predicted feature expression value by spatialDWSL +#' @description Compute predicted feature expression value by spatialDWSL #' results and average feature expression for cell type #' #' @param gobject giotto object @@ -442,10 +476,11 @@ cellProximityEnrichmentSpots <- function(gobject, #' #' @returns matrix #' @export -featExpDWLS <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - ave_celltype_exp) { +featExpDWLS <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + ave_celltype_exp) { # exact spatial_enrichment matrix dwls_values <- getSpatialEnrichment(gobject, spat_unit = spat_unit, @@ -456,7 +491,7 @@ featExpDWLS <- function(gobject, # 1. check if cell_type_vector and matrix are compatible if (ncol(ave_celltype_exp) != ncol(dwls_values) - 1) { - stop("ncol(ave_celltype_exp) needs to be the same as + stop("ncol(ave_celltype_exp) needs to be the same as ncol(dwls_values) - 1") } @@ -472,7 +507,7 @@ featExpDWLS <- function(gobject, ) average_exp <- as.matrix(ave_celltype_exp) - for (spot_i in 1:nrow(dwls_values)) { + for (spot_i in seq_len(nrow(dwls_values))) { spot <- dwls_values[spot_i, 1] spot_dwls <- dwls_values[spot_i, -1] data.table::setDF(spot_dwls) @@ -491,28 +526,32 @@ featExpDWLS <- function(gobject, #' @title Calculate feature expression residual #' @name .cal_expr_residual -#' @description Calculate feature expression residual +#' @description Calculate feature expression residual #' (observed_exp - DWLS_predicted) #' #' @param gobject giotto object #' @param spat_unit spatial unit (e.g. 'cell') #' @param feat_type feature type (e.g. 'rna') -#' @param expression_values expression values to use +#' @param expression_values expression values to use #' (e.g. 'normalized', 'scaled', 'custom') #' @param ave_celltype_exp average expression matrix in cell types #' @returns matrix #' @keywords internal -.cal_expr_residual <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - ave_celltype_exp) { +.cal_expr_residual <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + ave_celltype_exp) { # expression data values <- match.arg( - expression_values, choices = c("normalized", "scaled", "custom")) - + expression_values, + choices = c("normalized", "scaled", "custom") + ) + expr_observed <- slot(gobject@expression[[spat_unit]][[ - feat_type]][[values]], "exprMat") + feat_type + ]][[values]], "exprMat") # Compute predicted feature expression value expr_predicted <- featExpDWLS( @@ -522,12 +561,14 @@ featExpDWLS <- function(gobject, ave_celltype_exp = ave_celltype_exp ) - # Get the difference expression matrix between observed and predicted + # Get the difference expression matrix between observed and predicted # expression intersect_feature <- intersect( - rownames(expr_predicted), rownames(expr_observed)) + rownames(expr_predicted), rownames(expr_observed) + ) expr_residual <- expr_observed[intersect_feature, ] - expr_predicted[ - intersect_feature, ] + intersect_feature, + ] expr_residual <- as.matrix(expr_residual) return(expr_residual) @@ -545,28 +586,31 @@ featExpDWLS <- function(gobject, #' @param spatial_network_name name of spatial network to use #' @param cluster_column name of column to use for clusters #' -#' @returns matrix that rownames are cell-cell interaction pairs and colnames +#' @returns matrix that rownames are cell-cell interaction pairs and colnames #' are cell_IDs #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' x <- findMarkers_one_vs_all(g, -#' cluster_column = "leiden_clus", min_feats = 20) +#' x <- findMarkers_one_vs_all(g, +#' cluster_column = "leiden_clus", min_feats = 20 +#' ) #' sign_gene <- x$feats -#' -#' sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -#' nrow = length(sign_gene)) +#' +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), +#' nrow = length(sign_gene) +#' ) #' rownames(sign_matrix) <- sign_gene -#' colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) -#' +#' colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) +#' #' g <- runDWLSDeconv(gobject = g, sign_matrix = sign_matrix) -#' +#' #' cellProximityEnrichmentEachSpot(gobject = g) #' @export -cellProximityEnrichmentEachSpot <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - spatial_network_name = "spatial_network", - cluster_column = "cell_ID") { +cellProximityEnrichmentEachSpot <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + spatial_network_name = "spatial_network", + cluster_column = "cell_ID") { spatial_network_annot <- annotateSpatialNetwork( gobject = gobject, spat_unit = spat_unit, @@ -576,11 +620,12 @@ cellProximityEnrichmentEachSpot <- function(gobject, ) # data.table variables - V1 <- V2 <- from <- to <- int_cell_IDS <- Var1 <- Var2 <- + V1 <- V2 <- from <- to <- int_cell_IDS <- Var1 <- Var2 <- unified_cells <- type_int <- N <- NULL spatial_network_annot <- dt_sort_combine_two_columns( - spatial_network_annot, "to", "from", "unified_cells") + spatial_network_annot, "to", "from", "unified_cells" + ) spatial_network_annot <- spatial_network_annot[!duplicated(unified_cells)] # exact spatial_enrichment matrix @@ -601,8 +646,11 @@ cellProximityEnrichmentEachSpot <- function(gobject, # get cell-cell types pairs cts <- colnames(dwls_values) ct_pairs <- data.table::data.table( - V1 = rep(cts, each = length(cts)), V2 = rep(cts, length(cts))) - ct_pairs[, unified_int := paste0(V1, "--", V2), by = 1:nrow(ct_pairs)] + V1 = rep(cts, each = length(cts)), V2 = rep(cts, length(cts)) + ) + ct_pairs[, unified_int := paste0(V1, "--", V2), + by = seq_len(nrow(ct_pairs)) + ] unified_int <- ct_pairs$unified_int @@ -639,7 +687,7 @@ cellProximityEnrichmentEachSpot <- function(gobject, idx2 <- which(colSums(dwls_int_cells) > 0) dwls_int_cells <- dwls_int_cells[, idx2] - # all the interacted cells dwls have same cell type with + # all the interacted cells dwls have same cell type with # proportion=1 if (length(idx2) == 1) { dwls_int_cells <- matrix(dwls_int_cells, @@ -670,7 +718,8 @@ cellProximityEnrichmentEachSpot <- function(gobject, spot_proximity <- reshape2::melt(spot_proximity) spot_proximity <- data.table::data.table(spot_proximity) spot_proximity[, c("Var1", "Var2") := lapply( - .SD, as.character), .SDcols = c("Var1", "Var2")] + .SD, as.character + ), .SDcols = c("Var1", "Var2")] spot_proximity[, unified_int := paste0(Var1, "--", Var2)] # add to proximityMat(matrix) @@ -685,12 +734,13 @@ cellProximityEnrichmentEachSpot <- function(gobject, #' cell proximity score of selected cell for spots #' @returns data.table #' @keywords internal -.cal_diff_per_interaction <- function(sel_int, - other_ints, - select_ind, - other_ind, - proximityMat, - expr_residual) { +.cal_diff_per_interaction <- function( + sel_int, + other_ints, + select_ind, + other_ind, + proximityMat, + expr_residual) { pcc_diff <- sel <- other <- NULL # get data @@ -729,7 +779,9 @@ cellProximityEnrichmentEachSpot <- function(gobject, expr_residual_dt[, diff := sel - other] results_dt <- data.table::merge.data.table( - expr_residual_dt, pcc_dt, by = "features") + expr_residual_dt, pcc_dt, + by = "features" + ) return(results_dt) } @@ -746,13 +798,14 @@ NULL #' @describeIn do_permuttest_spot Calculate original values for spots #' @keywords internal -.do_permuttest_original_spot <- function(sel_int, - other_ints, - select_ind, - other_ind, - name = "orig", - proximityMat, - expr_residual) { +.do_permuttest_original_spot <- function( + sel_int, + other_ints, + select_ind, + other_ind, + name = "orig", + proximityMat, + expr_residual) { resultsDT <- .cal_diff_per_interaction( sel_int = sel_int, other_ints = other_ints, @@ -768,15 +821,16 @@ NULL #' @describeIn do_permuttest_spot Calculate random values for spots #' @keywords internal -.do_permuttest_random_spot <- function(sel_int, - other_ints, - select_ind, - other_ind, - name = "perm_1", - proximityMat, - expr_residual, - set_seed = TRUE, - seed_number = 1234) { +.do_permuttest_random_spot <- function( + sel_int, + other_ints, + select_ind, + other_ind, + name = "perm_1", + proximityMat, + expr_residual, + set_seed = TRUE, + seed_number = 1234) { # data.table variables features <- NULL @@ -799,10 +853,16 @@ NULL prox <- proximityMat[random_sel_int, ] prox <- prox[prox > 0] random_select <- c(sample( - all_IDs, size = l_select_ind - 1, replace = FALSE), names(prox[1])) - random_other <- c(sample( - all_IDs, size = l_other_ind, replace = FALSE), - names(prox[length(prox)])) + all_IDs, + size = l_select_ind - 1, replace = FALSE + ), names(prox[1])) + random_other <- c( + sample( + all_IDs, + size = l_other_ind, replace = FALSE + ), + names(prox[length(prox)]) + ) resultsDT <- .cal_diff_per_interaction( sel_int = random_sel_int, @@ -821,21 +881,22 @@ NULL #' @describeIn do_permuttest_spot Calculate multiple random values for spots #' @keywords internal -.do_multi_permuttest_random_spot <- function(sel_int, - other_ints, - select_ind, - other_ind, - proximityMat, - expr_residual, - n = 100, - cores = NA, - set_seed = TRUE, - seed_number = 1234) { +.do_multi_permuttest_random_spot <- function( + sel_int, + other_ints, + select_ind, + other_ind, + proximityMat, + expr_residual, + n = 100, + cores = NA, + set_seed = TRUE, + seed_number = 1234) { if (set_seed == TRUE) { seed_number_list <- seed_number:(seed_number + (n - 1)) } - result <- lapply_flex(X = 1:n, cores = cores, fun = function(x) { + result <- lapply_flex(X = seq_len(n), cores = cores, fun = function(x) { seed_number <- seed_number_list[x] perm_rand <- .do_permuttest_random_spot( @@ -855,24 +916,25 @@ NULL } -#' @describeIn do_permuttest_spot Performs permutation test on subsets of a +#' @describeIn do_permuttest_spot Performs permutation test on subsets of a #' matrix for spots #' @keywords internal -.do_permuttest_spot <- function(sel_int, - other_ints, - select_ind, - other_ind, - proximityMat, - expr_residual, - n_perm = 100, - adjust_method = "fdr", - cores = 2, - set_seed = TRUE, - seed_number = 1234) { +.do_permuttest_spot <- function( + sel_int, + other_ints, + select_ind, + other_ind, + proximityMat, + expr_residual, + n_perm = 100, + adjust_method = "fdr", + cores = 2, + set_seed = TRUE, + seed_number = 1234) { # data.table variables log2fc_diff <- log2fc <- sel <- other <- features <- p_higher <- p_lower <- perm_sel <- NULL - perm_other <- perm_log2fc <- perm_diff <- p.value <- p.adj <- + perm_other <- perm_log2fc <- perm_diff <- p.value <- p.adj <- pcc_sel <- pcc_diff <- NULL perm_pcc_sel <- perm_pcc_diff <- pcc_other <- NULL @@ -904,9 +966,12 @@ NULL ## # random_perms[, log2fc_diff := rep(original$log2fc, n_perm) - log2fc] random_perms[, c( - "perm_sel", "perm_other", "perm_pcc_sel", "perm_pcc_diff") := list( - mean(sel), mean(other), mean(pcc_sel), mean(pcc_diff)), - by = features] + "perm_sel", "perm_other", "perm_pcc_sel", "perm_pcc_diff" + ) := list( + mean(sel), mean(other), mean(pcc_sel), mean(pcc_diff) + ), + by = features + ] ## get p-values random_perms[, p_higher := sum(pcc_diff > 0), by = features] @@ -916,20 +981,23 @@ NULL ## combine results permutation and original random_perms_res <- unique(random_perms[, .( - features, perm_sel, perm_other, perm_pcc_sel, perm_pcc_diff, - p_higher, p_lower)]) + features, perm_sel, perm_other, perm_pcc_sel, perm_pcc_diff, + p_higher, p_lower + )]) results_m <- data.table::merge.data.table( - random_perms_res, + random_perms_res, original[, .(features, sel, other, diff, pcc_sel, pcc_other, pcc_diff)], - by = "features") + by = "features" + ) # select lowest p-value and perform p.adj results_m[, p.value := ifelse(p_higher <= p_lower, p_higher, p_lower)] results_m[, p.adj := stats::p.adjust(p.value, method = adjust_method)] results_m <- results_m[, .( - features, sel, other, pcc_sel, pcc_other, pcc_diff, p.value, p.adj, - perm_sel, perm_other, perm_pcc_sel, perm_pcc_diff)] + features, sel, other, pcc_sel, pcc_other, pcc_diff, p.value, p.adj, + perm_sel, perm_other, perm_pcc_sel, perm_pcc_diff + )] setorder(results_m, p.adj, -pcc_diff) return(results_m) @@ -938,25 +1006,28 @@ NULL #' @title Cell proximity testing for spot data #' @name .do_cell_proximity_test_spot -#' @description Performs a selected differential test on subsets of a matrix +#' @description Performs a selected differential test on subsets of a matrix #' for spots #' @returns differential test on subsets of a matrix #' @keywords internal -.do_cell_proximity_test_spot <- function(sel_int, - other_ints, - select_ind, - other_ind, - proximityMat, - expr_residual, - diff_test, - n_perm = 100, - adjust_method = "fdr", - cores = 2, - set_seed = TRUE, - seed_number = 1234) { +.do_cell_proximity_test_spot <- function( + sel_int, + other_ints, + select_ind, + other_ind, + proximityMat, + expr_residual, + diff_test, + n_perm = 100, + adjust_method = "fdr", + cores = 2, + set_seed = TRUE, + seed_number = 1234) { # get parameters diff_test <- match.arg( - diff_test, choices = c("permutation", "limma", "t.test", "wilcox")) + diff_test, + choices = c("permutation", "limma", "t.test", "wilcox") + ) adjust_method <- match.arg(adjust_method, choices = c( "bonferroni", "BH", "holm", "hochberg", "hommel", "BY", "fdr", "none" @@ -983,25 +1054,26 @@ NULL #' @title Find ICF per interaction for spot data #' @name .findICF_per_interaction_spot -#' @description Identifies features that are differentially expressed due to +#' @description Identifies features that are differentially expressed due to #' proximity to other cell types for spots. #' @returns data.table #' @keywords internal -.findICF_per_interaction_spot <- function(sel_int, - all_ints, - proximityMat, - expr_residual, - dwls_values, - dwls_cutoff = 0.001, - CCI_cell_score = 0.01, - minimum_unique_cells = 1, - minimum_unique_int_cells = 1, - diff_test = "permutation", - n_perm = 100, - adjust_method = "fdr", - cores = 2, - set_seed = TRUE, - seed_number = 1234) { +.findICF_per_interaction_spot <- function( + sel_int, + all_ints, + proximityMat, + expr_residual, + dwls_values, + dwls_cutoff = 0.001, + CCI_cell_score = 0.01, + minimum_unique_cells = 1, + minimum_unique_int_cells = 1, + diff_test = "permutation", + n_perm = 100, + adjust_method = "fdr", + cores = 2, + set_seed = TRUE, + seed_number = 1234) { # data.table variables unified_int <- NULL @@ -1026,7 +1098,8 @@ NULL ## do not continue if too few cells ## if (length(spec_IDs) < minimum_unique_cells | length( - other_IDs) < minimum_unique_cells) { + other_IDs + ) < minimum_unique_cells) { result <- NULL } else { result <- .do_cell_proximity_test_spot( @@ -1064,9 +1137,10 @@ NULL #' @title findICFSpot #' @name findICFSpot -#' @description Identifies cell-to-cell Interaction Changed Features (ICF) for -#' spots, i.e. features expression residual that are different due to proximity -#' to other cell types. +#' @description Identifies cell-to-cell Interaction Changed Features (ICF) for +#' spots, i.e. features expression residual that are different due to proximity +#' to other cell types. Works using results from celltype deconvolution methods +#' such as those from [runDWLSDeconv()]. #' #' @param gobject A giotto object #' @param spat_unit spatial unit (e.g. 'cell') @@ -1075,6 +1149,7 @@ NULL #' @param ave_celltype_exp average feature expression in each cell type #' @param selected_features subset of selected features (optional) #' @param spatial_network_name name of spatial network to use +#' @param deconv_name name of deconvolution/spatial enrichment values to use #' @param minimum_unique_cells minimum number of target cells required #' @param minimum_unique_int_cells minimum number of interacting cells required #' @param CCI_cell_score cell proximity score to filter no interacted cell @@ -1088,35 +1163,61 @@ NULL #' @param seed_number seed number #' @param verbose be verbose #' -#' @returns icfObject that contains the differential feat scores -#' @details Function to calculate if features expression residual are -#' differentially expressed in cell types when they interact +#' @returns `icfObject` that contains the differential feat scores +#' @details Function to calculate if features expression residual are +#' differentially expressed in cell types when they interact #' (approximated by physical proximity) with other cell types. #' Feature expression residual calculated as: -#' (observed expression in spot - cell_type_proportion * +#' (observed expression in spot - cell_type_proportion * #' average_expressed_in_cell_type) -#' The results data.table in the icfObject contains - at least - +#' The results data.table in the `icfObject` contains - at least - #' the following columns: -#' \itemize{ -#' \item{features:}{ All or selected list of tested features} -#' \item{sel:}{ average feature expression residual in the interacting cells from the target cell type } -#' \item{other:}{ average feature expression residual in the NOT-interacting cells from the target cell type } -#' \item{pcc_sel:}{ correlation between cell proximity score and expression residual in the interacting cells from the target cell type} -#' \item{pcc_other:}{ correlation between cell proximity score and expression residual in the NOT-interacting cells from the target cell type } -#' \item{pcc_diff:}{ correlation difference between sel and other} -#' \item{p.value:}{ associated p-value} -#' \item{p.adj:}{ adjusted p-value} -#' \item{cell_type:}{ target cell type} -#' \item{int_cell_type:}{ interacting cell type} -#' \item{nr_select:}{ number of cells for selected target cell type} -#' \item{int_nr_select:}{ number of cells for interacting cell type} -#' \item{unif_int:}{ cell-cell interaction} -#' } +#' * **features:** All or selected list of tested features +#' * **sel:** average feature expression residual in the interacting cells +#' from the target cell type +#' * **other:** average feature expression residual in the NOT-interacting +#' cells from the target cell type +#' * **pcc_sel:** correlation between cell proximity score and expression +#' residual in the interacting cells from the target cell type +#' * **pcc_other:** correlation between cell proximity score and expression +#' residual in the NOT-interacting cells from the target cell type +#' * **pcc_diff:** correlation difference between sel and other +#' * **p.value:** associated p-value +#' * **p.adj:** adjusted p-value +#' * **cell_type:** target cell type +#' * **int_cell_type:** interacting cell type +#' * **nr_select:** number of cells for selected target cell type +#' * **int_nr_select:** number of cells for interacting cell type +#' * **unif_int:** cell-cell interaction +#' #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' g_expression <- getExpression(g, output = "matrix") +#' x <- findMarkers_one_vs_all(g, +#' cluster_column = "leiden_clus", min_feats = 20 +#' ) +#' sign_gene <- x$feats +#' +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), +#' nrow = length(sign_gene) +#' ) +#' rownames(sign_matrix) <- sign_gene +#' colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) +#' +#' g <- runDWLSDeconv(gobject = g, sign_matrix = sign_matrix) +#' ave_celltype_exp <- calculateMetaTable(g, metadata_cols = "leiden_clus") +#' ave_celltype_exp <- reshape2::dcast(ave_celltype_exp, variable~leiden_clus) +#' rownames(ave_celltype_exp) <- ave_celltype_exp$variable +#' ave_celltype_exp <- ave_celltype_exp[,-1] +#' colnames(ave_celltype_exp) <- colnames(sign_matrix) #' -#' findICFSpot(g, spat_unit = "cell", feat_type = "rna", ave_celltype_exp = g_expression, spatial_network_name = "spatial_network") +#' findICFSpot(g, +#' spat_unit = "cell", +#' feat_type = "rna", +#' ave_celltype_exp = ave_celltype_exp, +#' spatial_network_name = "spatial_network" +#' ) +#' @seealso [findInteractionChangedFeats()] +#' @md #' @export findICFSpot <- function(gobject, spat_unit = NULL, @@ -1125,6 +1226,7 @@ findICFSpot <- function(gobject, ave_celltype_exp, selected_features = NULL, spatial_network_name = "Delaunay_network", + deconv_name = "DWLS", minimum_unique_cells = 5, minimum_unique_int_cells = 5, CCI_cell_score = 0.1, @@ -1142,9 +1244,12 @@ findICFSpot <- function(gobject, # expression data values <- match.arg( - expression_values, choices = c("normalized", "scaled", "custom")) + expression_values, + choices = c("normalized", "scaled", "custom") + ) features_overlap <- intersect( - slot(gobject, "feat_ID")[[feat_type]], rownames(ave_celltype_exp)) + slot(gobject, "feat_ID")[[feat_type]], rownames(ave_celltype_exp) + ) ave_celltype_exp_sel <- ave_celltype_exp[features_overlap, ] expr_residual <- .cal_expr_residual( gobject = gobject, @@ -1156,7 +1261,8 @@ findICFSpot <- function(gobject, ## test selected features ## if (!is.null(selected_features)) { expr_residual <- expr_residual[ - rownames(expr_residual) %in% selected_features, ] + rownames(expr_residual) %in% selected_features, + ] } # compute cell proximity for each spot @@ -1169,16 +1275,18 @@ findICFSpot <- function(gobject, # compute correlation between features and cell-types to find ICFs all_ints <- data.table::data.table(unified_int = rownames(proximityMat)) all_ints[, cell_type := strsplit( - as.character(unified_int), "--")[[1]][1], by = 1:nrow(all_ints)] + as.character(unified_int), "--" + )[[1]][1], by = seq_len(nrow(all_ints))] all_ints[, int_cell_type := strsplit( - as.character(unified_int), "--")[[1]][2], by = 1:nrow(all_ints)] + as.character(unified_int), "--" + )[[1]][2], by = seq_len(nrow(all_ints))] # exact spatial_enrichment matrix dwls_values <- getSpatialEnrichment( gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, - name = "DWLS", + name = deconv_name, output = "data.table" ) data.table::setDF(dwls_values) @@ -1190,23 +1298,24 @@ findICFSpot <- function(gobject, if (do_parallel == TRUE) { fin_result <- lapply_flex( X = all_ints$unified_int, cores = cores, fun = function(x) { - tempres <- .findICF_per_interaction_spot( - sel_int = x, - all_ints = all_ints, - proximityMat = proximityMat, - expr_residual = expr_residual, - dwls_values = dwls_values, - dwls_cutoff = dwls_cutoff, - CCI_cell_score = CCI_cell_score, - minimum_unique_cells = minimum_unique_cells, - minimum_unique_int_cells = minimum_unique_int_cells, - n_perm = nr_permutations, - adjust_method = adjust_method, - cores = cores, - set_seed = set_seed, - seed_number = seed_number - ) - }) + tempres <- .findICF_per_interaction_spot( + sel_int = x, + all_ints = all_ints, + proximityMat = proximityMat, + expr_residual = expr_residual, + dwls_values = dwls_values, + dwls_cutoff = dwls_cutoff, + CCI_cell_score = CCI_cell_score, + minimum_unique_cells = minimum_unique_cells, + minimum_unique_int_cells = minimum_unique_int_cells, + n_perm = nr_permutations, + adjust_method = adjust_method, + cores = cores, + set_seed = set_seed, + seed_number = seed_number + ) + } + ) } else { fin_result <- list() @@ -1241,30 +1350,34 @@ findICFSpot <- function(gobject, final_result[, spec_int := paste0(cell_type, "--", int_cell_type)] final_result[, type_int := ifelse( - cell_type == int_cell_type, "homo", "hetero")] + cell_type == int_cell_type, "homo", "hetero" + )] # return(final_result) permutation_test <- ifelse( - diff_test == "permutation", nr_permutations, "no permutations") - - icfObject <- list( - ICFscores = final_result, - Giotto_info = list( - "values" = values, - "cluster" = "cell_ID", - "spatial network" = spatial_network_name + diff_test == "permutation", nr_permutations, "no permutations" + ) + + icfObject <- structure( + .Data = list( + ICFscores = final_result, + Giotto_info = list( + "values" = values, + "cluster" = "cell_ID", + "spatial network" = spatial_network_name + ), + test_info = list( + "test" = diff_test, + "p.adj" = adjust_method, + "min cells" = minimum_unique_cells, + "min interacting cells" = minimum_unique_int_cells, + "perm" = permutation_test + ) ), - test_info = list( - "test" = diff_test, - "p.adj" = adjust_method, - "min cells" = minimum_unique_cells, - "min interacting cells" = minimum_unique_int_cells, - "perm" = permutation_test - ) + class = "icfObject" ) - class(icfObject) <- append(class(icfObject), "icfObject") return(icfObject) } @@ -1275,10 +1388,10 @@ findICFSpot <- function(gobject, #' #' @param icfObject ICF (interaction changed feature) score object #' @param min_cells minimum number of source cell type -#' @param min_cells_expr_resi minimum expression residual level for source +#' @param min_cells_expr_resi minimum expression residual level for source #' cell type #' @param min_int_cells minimum number of interacting neighbor cell type -#' @param min_int_cells_expr_resi minimum expression residual level for +#' @param min_int_cells_expr_resi minimum expression residual level for #' interacting neighbor cell type #' @param min_fdr minimum adjusted p-value #' @param min_pcc_diff minimum absolute pcc difference difference @@ -1290,31 +1403,34 @@ findICFSpot <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' icfObject <- findInteractionChangedFeats(g, cluster_column = "leiden_clus") -#' +#' #' filterICFSpot(icfObject = icfObject) #' @export -filterICFSpot <- function(icfObject, - min_cells = 4, - min_cells_expr_resi = 0.05, - min_int_cells = 4, - min_int_cells_expr_resi = 0.05, - min_fdr = 0.5, - min_pcc_diff = 0.05, - min_zscore = 0.05, - zscores_column = c("cell_type", "features"), - direction = c("both", "up", "down")) { +filterICFSpot <- function( + icfObject, + min_cells = 4, + min_cells_expr_resi = 0.05, + min_int_cells = 4, + min_int_cells_expr_resi = 0.05, + min_fdr = 0.5, + min_pcc_diff = 0.05, + min_zscore = 0.05, + zscores_column = c("cell_type", "features"), + direction = c("both", "up", "down")) { # data.table variables - nr_select <- int_nr_select <- zscores <- perm_diff <- sel <- other <- + nr_select <- int_nr_select <- zscores <- perm_diff <- sel <- other <- p.adj <- NULL log2fc <- min_log2_fc <- NULL if (!"icfObject" %in% class(icfObject)) { - stop("icfObject needs to be the output from + stop("icfObject needs to be the output from findInteractionChangedFeats() or findICF()") } zscores_column <- match.arg( - zscores_column, choices = c("cell_type", "features")) + zscores_column, + choices = c("cell_type", "features") + ) ICFscore <- copy(icfObject[["ICFscores"]]) @@ -1325,7 +1441,8 @@ filterICFSpot <- function(icfObject, ## sequential filter steps ## # 1. minimum number of source and target cells selection_scores <- ICFscore[ - nr_select >= min_cells & int_nr_select >= min_int_cells] + nr_select >= min_cells & int_nr_select >= min_int_cells + ] # 2. create z-scores for log2fc per cell type selection_scores[, zscores := scale(perm_diff), by = c(zscores_column)] @@ -1333,9 +1450,11 @@ filterICFSpot <- function(icfObject, # 3. filter based on z-scores and minimum levels comb_DT <- rbind( selection_scores[zscores >= min_zscore & abs( - perm_diff) >= min_pcc_diff & sel >= min_cells_expr_resi], + perm_diff + ) >= min_pcc_diff & sel >= min_cells_expr_resi], selection_scores[zscores <= -min_zscore & abs( - perm_diff) >= min_pcc_diff & other >= min_int_cells_expr_resi] + perm_diff + ) >= min_pcc_diff & other >= min_int_cells_expr_resi] ) # 4. filter based on adjusted p-value (fdr) @@ -1370,30 +1489,35 @@ filterICFSpot <- function(icfObject, #' @returns plot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' icfObject <- findInteractionChangedFeats(g, cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) -#' -#' plotICFSpot(gobject = g, icfObject = icfObject, -#' source_type = "1", source_markers = "Ccnd2", -#' ICF_features = c("3" = "Gna12", "1" = "Ccnd2", "8" = "Btbd17")) +#' icfObject <- findInteractionChangedFeats(g, +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +#' ) +#' +#' plotICFSpot( +#' gobject = g, icfObject = icfObject, +#' source_type = "1", source_markers = "Ccnd2", +#' ICF_features = c("3" = "Gna12", "1" = "Ccnd2", "8" = "Btbd17") +#' ) #' @export -plotICFSpot <- function(gobject, - icfObject, - source_type, - source_markers, - ICF_features, - cell_color_code = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotICFSpot") { +plotICFSpot <- function( + gobject, + icfObject, + source_type, + source_markers, + ICF_features, + cell_color_code = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotICFSpot") { # data.table variables - cell_type <- int_cell_type <- pcc_diff <- NULL + cell_type <- int_cell_type <- pcc_diff <- feats <- perm_diff <- NULL if (!"icfObject" %in% class(icfObject)) { - stop("icfObject needs to be the output from + stop("icfObject needs to be the output from findInteractionChangedFeats() or findICF()") } @@ -1418,16 +1542,20 @@ plotICFSpot <- function(gobject, features <- group <- NULL tempDT <- ICFscores[feats %in% all_features][ - cell_type == source_type][int_cell_type %in% neighbor_types] + cell_type == source_type + ][int_cell_type %in% neighbor_types] tempDT[, features := factor(feats, levels = detected_features)] tempDT[, group := names(ICF_features[ - ICF_features == feats]), by = 1:nrow(tempDT)] + ICF_features == feats + ]), by = seq_len(nrow(tempDT))] if (is.null(cell_color_code)) { mycolors <- set_default_color_discrete_cell( - instrs = instructions(gobject))(n = length(unique( - tempDT$int_cell_type))) + instrs = instructions(gobject) + )(n = length(unique( + tempDT$int_cell_type + ))) names(mycolors) <- unique(tempDT$int_cell_type) } else { mycolors <- cell_color_code @@ -1437,17 +1565,20 @@ plotICFSpot <- function(gobject, pl <- ggplot2::ggplot() pl <- pl + ggplot2::theme_classic() + ggplot2::theme( axis.text.x = ggplot2::element_text( - size = 14, angle = 45, vjust = 1, hjust = 1), + size = 14, angle = 45, vjust = 1, hjust = 1 + ), axis.text.y = ggplot2::element_text(size = 14), axis.title = ggplot2::element_text(size = 14) ) pl <- pl + ggplot2::geom_bar( - data = tempDT, - ggplot2::aes(x = feats, y = perm_diff, fill = int_cell_type), - stat = "identity", position = ggplot2::position_dodge()) + data = tempDT, + ggplot2::aes(x = feats, y = perm_diff, fill = int_cell_type), + stat = "identity", position = ggplot2::position_dodge() + ) pl <- pl + ggplot2::scale_fill_manual(values = mycolors) pl <- pl + ggplot2::labs(x = "", title = paste0( - "fold-change z-scores in ", source_type)) + "fold-change z-scores in ", source_type + )) return(plot_output_handler( gobject = gobject, @@ -1482,47 +1613,55 @@ plotICFSpot <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' icfObject <- findInteractionChangedFeats(g, cluster_column = "leiden_clus") -#' -#' plotCellProximityFeatSpot(gobject = g, icfObject = icfObject, -#' show_plot = TRUE, save_plot = FALSE, return_plot = FALSE, -#' min_pcc_diff = 0.01) +#' +#' plotCellProximityFeatSpot( +#' gobject = g, icfObject = icfObject, +#' show_plot = TRUE, save_plot = FALSE, return_plot = FALSE, +#' min_pcc_diff = 0.01 +#' ) #' @export -plotCellProximityFeatSpot <- function(gobject, - icfObject, - method = c("volcano", "cell_barplot", "cell-cell", "cell_sankey", - "heatmap", "dotplot"), - min_cells = 4, - min_cells_expr_resi = 0.05, - min_int_cells = 4, - min_int_cells_expr_resi = 0.05, - min_fdr = 0.5, - min_pcc_diff = 0.05, - min_zscore = 0.05, - zscores_column = c("cell_type", "features"), - direction = c("both", "up", "down"), - cell_color_code = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCellProximityFeats") { +plotCellProximityFeatSpot <- function( + gobject, + icfObject, + method = c( + "volcano", "cell_barplot", "cell-cell", "cell_sankey", + "heatmap", "dotplot" + ), + min_cells = 4, + min_cells_expr_resi = 0.05, + min_int_cells = 4, + min_int_cells_expr_resi = 0.05, + min_fdr = 0.5, + min_pcc_diff = 0.05, + min_zscore = 0.05, + zscores_column = c("cell_type", "features"), + direction = c("both", "up", "down"), + cell_color_code = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCellProximityFeats") { if (!"icfObject" %in% class(icfObject)) { - stop("icfObject needs to be the output from + stop("icfObject needs to be the output from findInteractionChangedFeats() or findICF()") } # print, return and save parameters show_plot <- ifelse( - is.null(show_plot), + is.null(show_plot), readGiottoInstructions(gobject, param = "show_plot"), - show_plot) + show_plot + ) save_plot <- ifelse( - is.null(save_plot), + is.null(save_plot), readGiottoInstructions(gobject, param = "save_plot"), - save_plot) + save_plot + ) return_plot <- ifelse( - is.null(return_plot), - readGiottoInstructions(gobject, param = "return_plot"), return_plot) + is.null(return_plot), + readGiottoInstructions(gobject, param = "return_plot"), return_plot + ) ## first filter @@ -1537,16 +1676,19 @@ plotCellProximityFeatSpot <- function(gobject, zscores_column = c("cell_type", "features"), direction = c("both", "up", "down") ) - + message("filter complete") complete_part <- filter_icf[["ICFscores"]] ## other parameters method <- match.arg( - method, - choices = c("volcano", "cell_barplot", "cell-cell", "cell_sankey", - "heatmap", "dotplot")) + method, + choices = c( + "volcano", "cell_barplot", "cell-cell", "cell_sankey", + "heatmap", "dotplot" + ) + ) # variables @@ -1557,10 +1699,12 @@ plotCellProximityFeatSpot <- function(gobject, ## volcanoplot pl <- ggplot2::ggplot() pl <- pl + ggplot2::geom_point( - data = complete_part, + data = complete_part, ggplot2::aes( - x = perm_diff, - y = ifelse(is.infinite(-log10(p.adj)), 1000, -log10(p.adj)))) + x = perm_diff, + y = ifelse(is.infinite(-log10(p.adj)), 1000, -log10(p.adj)) + ) + ) pl <- pl + ggplot2::theme_classic() pl <- pl + ggplot2::geom_vline(xintercept = 0, linetype = 2) pl <- pl + ggplot2::labs(x = "pcc diff", y = "-log10(p.adjusted)") @@ -1574,9 +1718,12 @@ plotCellProximityFeatSpot <- function(gobject, ## save plot if (save_plot == TRUE) { do.call( - "all_plots_save_function", - c(list(gobject = gobject, plot_object = pl, - default_save_name = default_save_name), save_param)) + "all_plots_save_function", + c(list( + gobject = gobject, plot_object = pl, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1591,11 +1738,13 @@ plotCellProximityFeatSpot <- function(gobject, pl <- ggplot2::ggplot() pl <- pl + ggplot2::geom_bar( - data = complete_part, - ggplot2::aes(x = unif_int, fill = unif_int)) + data = complete_part, + ggplot2::aes(x = unif_int, fill = unif_int) + ) pl <- pl + ggplot2::theme_classic() + ggplot2::theme( axis.text.x = ggplot2::element_text - (angle = 90, hjust = 1, vjust = 1)) + (angle = 90, hjust = 1, vjust = 1) + ) pl <- pl + ggplot2::coord_flip() ## print plot @@ -1606,9 +1755,12 @@ plotCellProximityFeatSpot <- function(gobject, ## save plot if (save_plot == TRUE) { do.call( - "all_plots_save_function", - c(list(gobject = gobject, plot_object = pl, - default_save_name = default_save_name), save_param)) + "all_plots_save_function", + c(list( + gobject = gobject, plot_object = pl, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1624,15 +1776,18 @@ plotCellProximityFeatSpot <- function(gobject, pl <- ggplot2::ggplot() pl <- pl + ggplot2::geom_bar( - data = complete_part, - ggplot2::aes(x = cell_type, fill = int_cell_type)) + data = complete_part, + ggplot2::aes(x = cell_type, fill = int_cell_type) + ) if (!is.null(cell_color_code)) { pl <- pl + ggplot2::scale_fill_manual(values = cell_color_code) } pl <- pl + ggplot2::theme_classic() + ggplot2::theme( - axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) + axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1) + ) pl <- pl + ggplot2::labs( - x = "", y = "# of features influenced by cell neighborhood") + x = "", y = "# of features influenced by cell neighborhood" + ) ## print plot @@ -1643,9 +1798,12 @@ plotCellProximityFeatSpot <- function(gobject, ## save plot if (save_plot == TRUE) { do.call( - "all_plots_save_function", - c(list(gobject = gobject, plot_object = pl, - default_save_name = default_save_name), save_param)) + "all_plots_save_function", + c(list( + gobject = gobject, plot_object = pl, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1666,14 +1824,18 @@ plotCellProximityFeatSpot <- function(gobject, ) + ggalluvial::geom_alluvium(aes(fill = cell_type), width = 1 / 12) + ggalluvial::geom_stratum( - width = 1 / 12, fill = "black", color = "grey") + + width = 1 / 12, fill = "black", color = "grey" + ) + ggplot2::scale_x_discrete( - limits = c("cell type", "neighbours"), expand = c(.05, .05)) + + limits = c("cell type", "neighbours"), expand = c(.05, .05) + ) + ggplot2::geom_label( - tat = "stratum", label.strata = TRUE, size = 3) + + tat = "stratum", label.strata = TRUE, size = 3 + ) + ggplot2::theme_classic() + ggplot2::labs( - x = "", y = "# of features influenced by cell neighborhood") + x = "", y = "# of features influenced by cell neighborhood" + ) if (!is.null(cell_color_code)) { pl <- pl + ggplot2::scale_fill_manual(values = cell_color_code) @@ -1689,9 +1851,12 @@ plotCellProximityFeatSpot <- function(gobject, ## save plot if (save_plot == TRUE) { do.call( - "all_plots_save_function", - c(list(gobject = gobject, plot_object = pl, - default_save_name = default_save_name), save_param)) + "all_plots_save_function", + c(list( + gobject = gobject, plot_object = pl, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1700,23 +1865,30 @@ plotCellProximityFeatSpot <- function(gobject, } } else if (method == "dotplot") { changed_features <- complete_part[ - , .N, by = c("cell_type", "int_cell_type")] + , .N, + by = c("cell_type", "int_cell_type") + ] changed_features[, cell_type := factor(cell_type, unique(cell_type))] changed_features[, int_cell_type := factor( - int_cell_type, unique(int_cell_type))] + int_cell_type, unique(int_cell_type) + )] pl <- ggplot2::ggplot() pl <- pl + ggplot2::theme_classic() pl <- pl + ggplot2::geom_point( - data = changed_features, - ggplot2::aes(x = cell_type, y = int_cell_type, size = N)) + data = changed_features, + ggplot2::aes(x = cell_type, y = int_cell_type, size = N) + ) pl <- pl + ggplot2::scale_size_continuous( - guide = guide_legend(title = "# of ICFs")) + guide = guide_legend(title = "# of ICFs") + ) pl <- pl + ggplot2::theme(axis.text.x = ggplot2::element_text( - angle = 90, vjust = 1, hjust = 1)) + angle = 90, vjust = 1, hjust = 1 + )) pl <- pl + ggplot2::labs( - x = "source cell type", y = "neighbor cell type") + x = "source cell type", y = "neighbor cell type" + ) ## print plot if (show_plot == TRUE) { @@ -1726,9 +1898,12 @@ plotCellProximityFeatSpot <- function(gobject, ## save plot if (save_plot == TRUE) { do.call( - "all_plots_save_function", - c(list(gobject = gobject, plot_object = pl, - default_save_name = default_save_name), save_param)) + "all_plots_save_function", + c(list( + gobject = gobject, plot_object = pl, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1737,17 +1912,21 @@ plotCellProximityFeatSpot <- function(gobject, } } else if (method == "heatmap") { changed_features <- complete_part[ - , .N, by = c("cell_type", "int_cell_type")] + , .N, + by = c("cell_type", "int_cell_type") + ] changed_features[, cell_type := factor(cell_type, unique(cell_type))] changed_features[, int_cell_type := factor( - int_cell_type, unique(int_cell_type))] + int_cell_type, unique(int_cell_type) + )] changed_features_d <- data.table::dcast.data.table( - changed_features, - cell_type ~ int_cell_type, - value.var = "N", - fill = 0) + changed_features, + cell_type ~ int_cell_type, + value.var = "N", + fill = 0 + ) changed_features_m <- dt_to_matrix(changed_features_d) col_fun <- GiottoVisuals::colorRamp2( @@ -1755,11 +1934,13 @@ plotCellProximityFeatSpot <- function(gobject, colors = c("white", "white", "blue", "yellow", "red") ) - heatm <- ComplexHeatmap::Heatmap(as.matrix(log2( - changed_features_m + 1)), + heatm <- ComplexHeatmap::Heatmap( + as.matrix(log2( + changed_features_m + 1 + )), col = col_fun, - row_title = "cell_type", - column_title = "int_cell_type", + row_title = "cell_type", + column_title = "int_cell_type", heatmap_legend_param = list(title = "log2(# DEGs)") ) @@ -1771,9 +1952,12 @@ plotCellProximityFeatSpot <- function(gobject, ## save plot if (save_plot == TRUE) { do.call( - "all_plots_save_function", - c(list(gobject = gobject, plot_object = heatm, - default_save_name = default_save_name), save_param)) + "all_plots_save_function", + c(list( + gobject = gobject, plot_object = heatm, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1790,7 +1974,7 @@ plotCellProximityFeatSpot <- function(gobject, #' @title Specific cell-cell communication scores for spot data #' @name .specific_CCCScores_spots -#' @description Specific Cell-Cell communication scores based on spatial +#' @description Specific Cell-Cell communication scores based on spatial #' expression of interacting cells at spots resolution #' #' @param gobject giotto object to use @@ -1804,9 +1988,9 @@ plotCellProximityFeatSpot <- function(gobject, #' @param cell_type_2 second cell type #' @param feature_set_1 first specific feature set from feature pairs #' @param feature_set_2 second specific feature set from feature pairs -#' @param min_observations minimum number of interactions needed to be +#' @param min_observations minimum number of interactions needed to be #' considered -#' @param detailed provide more detailed information +#' @param detailed provide more detailed information #' (random variance and z-score) #' @param adjust_method which method to adjust p-values #' @param adjust_target adjust multiple hypotheses at the cell or feature level @@ -1814,59 +1998,67 @@ plotCellProximityFeatSpot <- function(gobject, #' @param seed_number seed number #' @param verbose verbose #' -#' @returns Cell-Cell communication scores for feature pairs based on spatial +#' @returns Cell-Cell communication scores for feature pairs based on spatial #' interaction -#' @details Statistical framework to identify if pairs of features -#' (such as ligand-receptor combinations) are expressed at higher levels than -#' expected based on a reshuffled null distribution of feature expression +#' @details Statistical framework to identify if pairs of features +#' (such as ligand-receptor combinations) are expressed at higher levels than +#' expected based on a reshuffled null distribution of feature expression #' values in cells that are spatially in proximity to each other. #' \itemize{ -#' \item{LR_comb:}{Pair of ligand and receptor} -#' \item{lig_cell_type:}{ cell type to assess expression level of ligand } -#' \item{lig_expr:}{ average expressionresidual(observed - DWLS_predicted) of ligand in lig_cell_type } -#' \item{ligand:}{ ligand name } -#' \item{rec_cell_type:}{ cell type to assess expression level of receptor } -#' \item{rec_expr:}{ average expression residual(observed - DWLS_predicted) of receptor in rec_cell_type} -#' \item{receptor:}{ receptor name } -#' \item{LR_expr:}{ combined average ligand and receptor expression } -#' \item{lig_nr:}{ total number of cells from lig_cell_type that spatially interact with cells from rec_cell_type } -#' \item{rec_nr:}{ total number of cells from rec_cell_type that spatially interact with cells from lig_cell_type } -#' \item{rand_expr:}{ average combined ligand and receptor expression residual from random spatial permutations } -#' \item{av_diff:}{ average difference between LR_expr and rand_expr over all random spatial permutations } -#' \item{sd_diff:}{ (optional) standard deviation of the difference between LR_expr and rand_expr over all random spatial permutations } -#' \item{z_score:}{ (optinal) z-score } -#' \item{log2fc:}{ LR_expr - rand_expr } -#' \item{pvalue:}{ p-value } -#' \item{LR_cell_comb:}{ cell type pair combination } -#' \item{p.adj:}{ adjusted p-value } -#' \item{PI:}{ significanc score: log2fc * -log10(p.adj) } +#' * LR_comb: Pair of ligand and receptor +#' * lig_cell_type: cell type to assess expression level of ligand +#' * lig_expr: average expression residual (observed - DWLS_predicted) of +#' ligand in lig_cell_type +#' * ligand: ligand name +#' * rec_cell_type: cell type to assess expression level of receptor +#' * rec_expr: average expression residual(observed - DWLS_predicted) of +#' receptor in rec_cell_type +#' * receptor: receptor name +#' * LR_expr: combined average ligand and receptor expression +#' * lig_nr: total number of cells from lig_cell_type that spatially interact +#' with cells from rec_cell_type +#' * rec_nr: total number of cells from rec_cell_type that spatially interact +#' with cells from lig_cell_type +#' * rand_expr: average combined ligand and receptor expression residual from +#' random spatial permutations +#' * av_diff: average difference between LR_expr and rand_expr over all random +#' spatial permutations +#' * sd_diff: (optional) standard deviation of the difference between LR_expr +#' and rand_expr over all random spatial permutations +#' * z_score: (optinal) z-score +#' * log2fc: LR_expr - rand_expr +#' * pvalue: p-value +#' * LR_cell_comb: cell type pair combination +#' * p.adj: adjusted p-value +#' * PI: significance score: log2fc \* -log10(p.adj) #' } #' @keywords internal -.specific_CCCScores_spots <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expr_residual, - dwls_values, - proximityMat, - random_iter = 1000, - cell_type_1 = "astrocytes", - cell_type_2 = "endothelial", - feature_set_1, - feature_set_2, - min_observations = 2, - detailed = FALSE, - adjust_method = c( - "fdr", "bonferroni", " BH", "holm", "hochberg", "hommel", - "BY", "none" - ), - adjust_target = c("features", "cells"), - set_seed = FALSE, - seed_number = 1234, - verbose = FALSE) { +.specific_CCCScores_spots <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expr_residual, + dwls_values, + proximityMat, + random_iter = 1000, + cell_type_1 = "astrocytes", + cell_type_2 = "endothelial", + feature_set_1, + feature_set_2, + min_observations = 2, + detailed = FALSE, + adjust_method = c( + "fdr", "bonferroni", " BH", "holm", "hochberg", "hommel", + "BY", "none" + ), + adjust_target = c("features", "cells"), + set_seed = FALSE, + seed_number = 1234, + verbose = FALSE) { # data.table variables - from_to <- cell_ID <- lig_cell_type <- rec_cell_type <- lig_nr <- + from_to <- cell_ID <- lig_cell_type <- rec_cell_type <- lig_nr <- rec_nr <- rand_expr <- NULL - av_diff <- log2fc <- LR_expr <- pvalue <- LR_cell_comb <- p.adj <- + av_diff <- log2fc <- LR_expr <- pvalue <- LR_cell_comb <- p.adj <- LR_comb <- PI <- NULL sd_diff <- z_score <- ligand <- receptor <- NULL @@ -1881,8 +2073,11 @@ plotCellProximityFeatSpot <- function(gobject, cell_direction_1 <- paste0(cell_type_1, "--", cell_type_2) cell_direction_2 <- paste0(cell_type_2, "--", cell_type_1) - if (verbose) print(paste0( - "Processing specific CCC Scores: ", cell_direction_1)) + if (verbose) { + print(paste0( + "Processing specific CCC Scores: ", cell_direction_1 + )) + } proxi_1 <- proximityMat[cell_direction_1, ] proxi_2 <- proximityMat[cell_direction_2, ] @@ -1895,7 +2090,7 @@ plotCellProximityFeatSpot <- function(gobject, dwls_ct2 <- dwls_values[, cell_type_2] # make sure that there are sufficient observations - if (length(ct1_cell_ids) <= min_observations | + if (length(ct1_cell_ids) <= min_observations | length(ct2_cell_ids) <= min_observations) { return(NULL) } else { @@ -1943,7 +2138,7 @@ plotCellProximityFeatSpot <- function(gobject, all_cell_ids <- colnames(expr_residual) ## simulations ## - for (sim in 1:random_iter) { + for (sim in seq_len(random_iter)) { if (verbose == TRUE) cat("simulation ", sim, "\n") # get random ids and subset @@ -1953,9 +2148,13 @@ plotCellProximityFeatSpot <- function(gobject, } random_ids_1 <- sample( - all_cell_ids, size = length(ct1_cell_ids), replace = FALSE) + all_cell_ids, + size = length(ct1_cell_ids), replace = FALSE + ) random_ids_2 <- sample( - all_cell_ids, size = length(ct2_cell_ids), replace = FALSE) + all_cell_ids, + size = length(ct2_cell_ids), replace = FALSE + ) # get feature expression residual for ligand and receptor random_expr_res_L <- expr_residual[feature_set_1, random_ids_1] @@ -1998,7 +2197,9 @@ plotCellProximityFeatSpot <- function(gobject, if (detailed == TRUE) { av_difference_scores <- rowMeans_flex(total_sum) sd_difference_scores <- apply( - total_sum, MARGIN = 1, FUN = stats::sd) + total_sum, + MARGIN = 1, FUN = stats::sd + ) comScore[, av_diff := av_difference_scores] comScore[, sd_diff := sd_difference_scores] @@ -2014,19 +2215,24 @@ plotCellProximityFeatSpot <- function(gobject, if (adjust_target == "features") { comScore[, p.adj := stats::p.adjust( - pvalue, method = adjust_method), by = .(LR_cell_comb)] + pvalue, + method = adjust_method + ), by = .(LR_cell_comb)] } else if (adjust_target == "cells") { comScore[, p.adj := stats::p.adjust( - pvalue, method = adjust_method), by = .(LR_comb)] + pvalue, + method = adjust_method + ), by = .(LR_comb)] } # get minimum adjusted p.value that is not zero all_p.adj <- comScore[["p.adj"]] lowest_p.adj <- min(all_p.adj[all_p.adj != 0]) comScore[, PI := ifelse( - p.adj == 0, - log2fc * (-log10(lowest_p.adj)), - log2fc * (-log10(p.adj)))] + p.adj == 0, + log2fc * (-log10(lowest_p.adj)), + log2fc * (-log10(p.adj)) + )] return(comScore) } @@ -2036,7 +2242,7 @@ plotCellProximityFeatSpot <- function(gobject, #' @title spatCellCellcomSpots #' @name spatCellCellcomSpots -#' @description Spatial Cell-Cell communication scores based on spatial +#' @description Spatial Cell-Cell communication scores based on spatial #' expression of interacting cells at spots resolution #' #' @param gobject giotto object to use @@ -2044,18 +2250,18 @@ plotCellProximityFeatSpot <- function(gobject, #' @param feat_type feature type (e.g. 'rna') #' @param ave_celltype_exp Matrix with average expression per cell type #' @param expression_values (e.g. 'normalized', 'scaled', 'custom') -#' @param spatial_network_name spatial network to use for identifying +#' @param spatial_network_name spatial network to use for identifying #' interacting cells #' @param cluster_column cluster column with cell type information #' @param random_iter number of iterations #' @param feature_set_1 first specific feature set from feature pairs #' @param feature_set_2 second specific feature set from feature pairs -#' @param min_observations minimum number of interactions needed to be +#' @param min_observations minimum number of interactions needed to be #' considered -#' @param detailed provide more detailed information +#' @param detailed provide more detailed information #' (random variance and z-score) #' @param adjust_method which method to adjust p-values -#' @param adjust_target adjust multiple hypotheses at the cell or feature +#' @param adjust_target adjust multiple hypotheses at the cell or feature #' level #' @param do_parallel run calculations in parallel with mclapply #' @param cores number of cores to use if do_parallel = TRUE @@ -2063,56 +2269,64 @@ plotCellProximityFeatSpot <- function(gobject, #' @param seed_number seed number #' @param verbose verbose (e.g. 'a little', 'a lot', 'none') #' -#' @returns Cell-Cell communication scores for feature pairs based on spatial +#' @returns Cell-Cell communication scores for feature pairs based on spatial #' interaction -#' @details Statistical framework to identify if pairs of features -#' (such as ligand-receptor combinations) are expressed at higher levels than -#' expected based on a reshuffled null distribution of feature expression +#' @details Statistical framework to identify if pairs of features +#' (such as ligand-receptor combinations) are expressed at higher levels than +#' expected based on a reshuffled null distribution of feature expression #' values in cells that are spatially in proximity to each other. #' \itemize{ -#' \item{LR_comb:}{Pair of ligand and receptor} -#' \item{lig_cell_type:}{ cell type to assess expression level of ligand } -#' \item{lig_expr:}{ average expression residual(observed - DWLS_predicted) of ligand in lig_cell_type } -#' \item{ligand:}{ ligand name } -#' \item{rec_cell_type:}{ cell type to assess expression level of receptor } -#' \item{rec_expr:}{ average expression residual(observed - DWLS_predicted) of receptor in rec_cell_type} -#' \item{receptor:}{ receptor name } -#' \item{LR_expr:}{ combined average ligand and receptor expression residual} -#' \item{lig_nr:}{ total number of cells from lig_cell_type that spatially interact with cells from rec_cell_type } -#' \item{rec_nr:}{ total number of cells from rec_cell_type that spatially interact with cells from lig_cell_type } -#' \item{rand_expr:}{ average combined ligand and receptor expression residual from random spatial permutations } -#' \item{av_diff:}{ average difference between LR_expr and rand_expr over all random spatial permutations } -#' \item{sd_diff:}{ (optional) standard deviation of the difference between LR_expr and rand_expr over all random spatial permutations } -#' \item{z_score:}{ (optinal) z-score } -#' \item{log2fc:}{ LR_expr - rand_expr } -#' \item{pvalue:}{ p-value } -#' \item{LR_cell_comb:}{ cell type pair combination } -#' \item{p.adj:}{ adjusted p-value } -#' \item{PI:}{ significanc score: log2fc * -log10(p.adj) } +#' * LR_comb:Pair of ligand and receptor +#' * lig_cell_type: cell type to assess expression level of ligand +#' * lig_expr: average expression residual(observed - DWLS_predicted) of +#' ligand in lig_cell_type +#' * ligand: ligand name +#' * rec_cell_type: cell type to assess expression level of receptor +#' * rec_expr: average expression residual(observed - DWLS_predicted) of +#' receptor in rec_cell_type +#' * receptor: receptor name +#' * LR_expr: combined average ligand and receptor expression residual +#' * lig_nr: total number of cells from lig_cell_type that spatially interact +#' with cells from rec_cell_type +#' * rec_nr: total number of cells from rec_cell_type that spatially interact +#' with cells from lig_cell_type +#' * rand_expr: average combined ligand and receptor expression residual from +#' random spatial permutations +#' * av_diff: average difference between LR_expr and rand_expr over all random +#' spatial permutations +#' * sd_diff: (optional) standard deviation of the difference between LR_expr +#' and rand_expr over all random spatial permutations +#' * z_score: (optional) z-score +#' * log2fc: LR_expr - rand_expr +#' * pvalue: p-value +#' * LR_cell_comb: cell type pair combination +#' * p.adj: adjusted p-value +#' * PI: significanc score: log2fc \* -log10(p.adj) #' } #' @export -spatCellCellcomSpots <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - ave_celltype_exp, - spatial_network_name = "Delaunay_network", - cluster_column = "cell_ID", - random_iter = 1000, - feature_set_1, - feature_set_2, - min_observations = 2, - expression_values = c("normalized", "scaled", "custom"), - detailed = FALSE, - adjust_method = c( - "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", - "BY", "none" - ), - adjust_target = c("features", "cells"), - do_parallel = TRUE, - cores = NA, - set_seed = TRUE, - seed_number = 1234, - verbose = c("a little", "a lot", "none")) { +spatCellCellcomSpots <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + ave_celltype_exp, + spatial_network_name = "Delaunay_network", + cluster_column = "cell_ID", + random_iter = 1000, + feature_set_1, + feature_set_2, + min_observations = 2, + expression_values = c("normalized", "scaled", "custom"), + detailed = FALSE, + adjust_method = c( + "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", + "BY", "none" + ), + adjust_target = c("features", "cells"), + do_parallel = TRUE, + cores = NA, + set_seed = TRUE, + seed_number = 1234, + verbose = c("a little", "a lot", "none")) { # data.table vars V1 <- V2 <- LR_cell_comb <- NULL @@ -2138,7 +2352,9 @@ spatCellCellcomSpots <- function(gobject, # expression data values <- match.arg( - expression_values, choices = c("normalized", "scaled", "custom")) + expression_values, + choices = c("normalized", "scaled", "custom") + ) expr_residual <- .cal_expr_residual( gobject = gobject, spat_unit = spat_unit, @@ -2156,7 +2372,8 @@ spatCellCellcomSpots <- function(gobject, # select overlapped spots intersect_cell_IDs <- intersect( - colnames(expr_residual), colnames(proximityMat)) + colnames(expr_residual), colnames(proximityMat) + ) expr_residual <- expr_residual[, intersect_cell_IDs] proximityMat <- proximityMat[, intersect_cell_IDs] @@ -2174,17 +2391,19 @@ spatCellCellcomSpots <- function(gobject, # check feature list LR_comb <- data.table::data.table( - ligand = feature_set_1, receptor = feature_set_2) + ligand = feature_set_1, receptor = feature_set_2 + ) # check LR pair not captured in giotto object LR_out <- LR_comb[!LR_comb$ligand %in% rownames( - expr_residual) | !LR_comb$receptor %in% rownames(expr_residual)] + expr_residual + ) | !LR_comb$receptor %in% rownames(expr_residual)] if (dim(LR_out)[1] > 0) { - message("Ligand or receptor were removed after computing expresion + message("Ligand or receptor were removed after computing expresion residual.") print(LR_out) - LR_comb <- LR_comb[LR_comb$ligand %in% rownames(expr_residual) & - LR_comb$receptor %in% rownames(expr_residual)] + LR_comb <- LR_comb[LR_comb$ligand %in% rownames(expr_residual) & + LR_comb$receptor %in% rownames(expr_residual)] feature_set_1 <- LR_comb$ligand feature_set_2 <- LR_comb$receptor } @@ -2192,50 +2411,56 @@ spatCellCellcomSpots <- function(gobject, ## get all combinations between cell types combn_DT <- data.table::data.table(LR_cell_comb = rownames(proximityMat)) combn_DT[, V1 := strsplit( - LR_cell_comb, "--")[[1]][1], by = 1:nrow(combn_DT)] + LR_cell_comb, "--" + )[[1]][1], by = seq_len(nrow(combn_DT))] combn_DT[, V2 := strsplit( - LR_cell_comb, "--")[[1]][2], by = 1:nrow(combn_DT)] + LR_cell_comb, "--" + )[[1]][2], by = seq_len(nrow(combn_DT))] ## parallel option ## if (do_parallel == TRUE) { savelist <- lapply_flex( - X = 1:nrow(combn_DT), cores = cores, fun = function(row) { - cell_type_1 <- combn_DT[row][["V1"]] - cell_type_2 <- combn_DT[row][["V2"]] - - - specific_scores <- .specific_CCCScores_spots( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type, - expr_residual = expr_residual, - dwls_values = dwls_values, - proximityMat = proximityMat, - random_iter = random_iter, - cell_type_1 = cell_type_1, - cell_type_2 = cell_type_2, - feature_set_1 = feature_set_1, - feature_set_2 = feature_set_2, - min_observations = min_observations, - detailed = detailed, - adjust_method = adjust_method, - adjust_target = adjust_target, - set_seed = set_seed, - seed_number = seed_number - ) - }) + X = seq_len(nrow(combn_DT)), cores = cores, fun = function(row) { + cell_type_1 <- combn_DT[row][["V1"]] + cell_type_2 <- combn_DT[row][["V2"]] + + + specific_scores <- .specific_CCCScores_spots( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + expr_residual = expr_residual, + dwls_values = dwls_values, + proximityMat = proximityMat, + random_iter = random_iter, + cell_type_1 = cell_type_1, + cell_type_2 = cell_type_2, + feature_set_1 = feature_set_1, + feature_set_2 = feature_set_2, + min_observations = min_observations, + detailed = detailed, + adjust_method = adjust_method, + adjust_target = adjust_target, + set_seed = set_seed, + seed_number = seed_number + ) + } + ) } else { ## for loop over all combinations ## savelist <- list() countdown <- nrow(combn_DT) - for (row in 1:nrow(combn_DT)) { + for (row in seq_len(nrow(combn_DT))) { cell_type_1 <- combn_DT[row][["V1"]] cell_type_2 <- combn_DT[row][["V2"]] - if (verbose == "a little" | verbose == "a lot") - cat("PROCESS nr ", countdown, ": ", - cell_type_1, " and ", cell_type_2) + if (verbose == "a little" | verbose == "a lot") { + cat( + "PROCESS nr ", countdown, ": ", + cell_type_1, " and ", cell_type_2 + ) + } specific_scores <- .specific_CCCScores_spots( gobject = gobject, diff --git a/R/spatial_interaction_visuals.R b/R/spatial_interaction_visuals.R index 136d12c26..15769e7d9 100644 --- a/R/spatial_interaction_visuals.R +++ b/R/spatial_interaction_visuals.R @@ -10,40 +10,46 @@ #' @returns ggplot barplot #' @details This function creates a barplot that shows the spatial proximity #' enrichment or depletion of cell type pairs. -#' @examples -#' g <- GiottoData::loadGiottoMini("visium") -#' -#' cellProximityBarplot(gobject = g, -#' CPscore = cellProximityEnrichment(g, cluster_column = "leiden_clus")) +#' @examples +#' g <- GiottoData::loadGiottoMini("visium") +#' +#' cellProximityBarplot( +#' gobject = g, +#' CPscore = cellProximityEnrichment(g, cluster_column = "leiden_clus") +#' ) #' @export -cellProximityBarplot <- function(gobject, - CPscore, - min_orig_ints = 5, - min_sim_ints = 5, - p_val = 0.05, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "cellProximityBarplot") { +cellProximityBarplot <- function( + gobject, + CPscore, + min_orig_ints = 5, + min_sim_ints = 5, + p_val = 0.05, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "cellProximityBarplot") { table_mean_results_dc <- CPscore$enrichm_res ## filter to remove low number of cell-cell proximity interactions ## # data.table variables - original <- simulations <- p_higher_orig <- p_lower_orig <- enrichm <- + original <- simulations <- p_higher_orig <- p_lower_orig <- enrichm <- type_int <- unified_int <- NULL table_mean_results_dc_filter <- table_mean_results_dc[ - original >= min_orig_ints & simulations >= min_sim_ints, ] + original >= min_orig_ints & simulations >= min_sim_ints, + ] table_mean_results_dc_filter <- table_mean_results_dc_filter[ - p_higher_orig <= p_val | p_lower_orig <= p_val, ] + p_higher_orig <= p_val | p_lower_orig <= p_val, + ] pl <- ggplot2::ggplot() pl <- pl + ggplot2::geom_bar( - data = table_mean_results_dc_filter, - ggplot2::aes(x = unified_int, y = enrichm, fill = type_int), - stat = "identity", show.legend = FALSE) + data = table_mean_results_dc_filter, + ggplot2::aes(x = unified_int, y = enrichm, fill = type_int), + stat = "identity", show.legend = FALSE + ) pl <- pl + ggplot2::coord_flip() pl <- pl + ggplot2::theme_bw() pl <- pl + ggplot2::labs(y = "enrichment/depletion") @@ -51,18 +57,22 @@ cellProximityBarplot <- function(gobject, bpl <- ggplot2::ggplot() bpl <- bpl + ggplot2::geom_bar( - data = table_mean_results_dc_filter, - ggplot2::aes(x = unified_int, y = original, fill = type_int), - stat = "identity", show.legend = TRUE) + data = table_mean_results_dc_filter, + ggplot2::aes(x = unified_int, y = original, fill = type_int), + stat = "identity", show.legend = TRUE + ) bpl <- bpl + ggplot2::coord_flip() bpl <- bpl + ggplot2::theme_bw() + ggplot2::theme( - axis.text.y = element_blank()) + axis.text.y = element_blank() + ) bpl <- bpl + ggplot2::labs(y = "# of interactions") bpl combo_plot <- cowplot::plot_grid( - pl, bpl, ncol = 2, rel_heights = c(1), - rel_widths = c(3, 1.5), align = "h") + pl, bpl, + ncol = 2, rel_heights = c(1), + rel_widths = c(3, 1.5), align = "h" + ) # output plot return(GiottoVisuals::plot_output_handler( @@ -85,7 +95,7 @@ cellProximityBarplot <- function(gobject, #' @param CPscore CPscore, output from cellProximityEnrichment() #' @param scale scale cell-cell proximity interaction scores #' @param order_cell_types order cell types based on enrichment correlation -#' @param color_breaks numerical vector of length 3 to represent min, mean +#' @param color_breaks numerical vector of length 3 to represent min, mean #' and maximum #' @param color_names character color vector of length 3 #' @returns ggplot heatmap @@ -93,50 +103,60 @@ cellProximityBarplot <- function(gobject, #' enrichment or depletion of cell type pairs. #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' x <- cellProximityEnrichment(g, cluster_column = "leiden_clus") -#' +#' #' cellProximityHeatmap(gobject = g, CPscore = x) #' @export -cellProximityHeatmap <- function(gobject, - CPscore, - scale = TRUE, - order_cell_types = TRUE, - color_breaks = NULL, - color_names = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "cellProximityHeatmap") { +cellProximityHeatmap <- function( + gobject, + CPscore, + scale = TRUE, + order_cell_types = TRUE, + color_breaks = NULL, + color_names = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "cellProximityHeatmap") { enrich_res <- CPscore$enrichm_res # data.table variables first_type <- second_type <- unified_int <- NULL enrich_res[, first_type := strsplit( - x = as.character(unified_int), split = "--")[[1]][1], - by = 1:nrow(enrich_res)] + x = as.character(unified_int), split = "--" + )[[1]][1], + by = seq_len(nrow(enrich_res)) + ] enrich_res[, second_type := strsplit( - x = as.character(unified_int), split = "--")[[1]][2], - by = 1:nrow(enrich_res)] + x = as.character(unified_int), split = "--" + )[[1]][2], + by = seq_len(nrow(enrich_res)) + ] # create matrix enrich_mat <- data.table::dcast.data.table( - data = enrich_res, - formula = first_type ~ second_type, - value.var = "enrichm") + data = enrich_res, + formula = first_type ~ second_type, + value.var = "enrichm" + ) matrix_d <- as.matrix(enrich_mat[, -1]) rownames(matrix_d) <- as.vector(enrich_mat[[1]]) t_matrix_d <- t_flex(matrix_d) # fill in NAs based on values in upper and lower matrix triangle t_matrix_d[upper.tri(t_matrix_d)][is.na(t_matrix_d[ - upper.tri(t_matrix_d)])] <- matrix_d[upper.tri(matrix_d)][ - is.na(t_matrix_d[upper.tri(t_matrix_d)])] + upper.tri(t_matrix_d) + ])] <- matrix_d[upper.tri(matrix_d)][ + is.na(t_matrix_d[upper.tri(t_matrix_d)]) + ] t_matrix_d[lower.tri(t_matrix_d)][is.na(t_matrix_d[ - lower.tri(t_matrix_d)])] <- matrix_d[lower.tri(matrix_d)][ - is.na(t_matrix_d[lower.tri(t_matrix_d)])] + lower.tri(t_matrix_d) + ])] <- matrix_d[lower.tri(matrix_d)][ + is.na(t_matrix_d[lower.tri(t_matrix_d)]) + ] t_matrix_d[is.na(t_matrix_d)] <- 0 final_matrix <- t_matrix_d @@ -145,7 +165,8 @@ cellProximityHeatmap <- function(gobject, final_matrix <- t_flex(scale(t_flex(final_matrix))) final_matrix <- t_flex(final_matrix) final_matrix[lower.tri(final_matrix)] <- t_flex(final_matrix)[ - lower.tri(final_matrix)] + lower.tri(final_matrix) + ] } # order cell types @@ -171,17 +192,19 @@ cellProximityHeatmap <- function(gobject, } heatm <- ComplexHeatmap::Heatmap( - matrix = final_matrix, - cluster_rows = FALSE, + matrix = final_matrix, + cluster_rows = FALSE, cluster_columns = FALSE, col = GiottoVisuals::colorRamp2( - breaks = color_breaks, colors = color_names) + breaks = color_breaks, colors = color_names + ) ) } else { heatm <- ComplexHeatmap::Heatmap( - matrix = final_matrix, - cluster_rows = FALSE, - cluster_columns = FALSE) + matrix = final_matrix, + cluster_rows = FALSE, + cluster_columns = FALSE + ) } return(plot_output_handler( @@ -208,9 +231,9 @@ cellProximityHeatmap <- function(gobject, #' @param color_depletion color for depleted cell-cell interactions #' @param color_enrichment color for enriched cell-cell interactions #' @param rescale_edge_weights rescale edge weights (boolean) -#' @param edge_weight_range_depletion numerical vector of length 2 to rescale +#' @param edge_weight_range_depletion numerical vector of length 2 to rescale #' depleted edge weights -#' @param edge_weight_range_enrichment numerical vector of length 2 to rescale +#' @param edge_weight_range_enrichment numerical vector of length 2 to rescale #' enriched edge weights #' @param layout layout algorithm to use to draw nodes and edges #' @param only_show_enrichment_edges show only the enriched pairwise scores @@ -223,31 +246,32 @@ cellProximityHeatmap <- function(gobject, #' enrichment or depletion of cell type pairs. #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' x <- cellProximityEnrichment(g, cluster_column = "leiden_clus") -#' +#' #' cellProximityNetwork(gobject = g, CPscore = x) #' @export -cellProximityNetwork <- function(gobject, - CPscore, - remove_self_edges = FALSE, - self_loop_strength = 0.1, - color_depletion = "lightgreen", - color_enrichment = "red", - rescale_edge_weights = TRUE, - edge_weight_range_depletion = c(0.1, 1), - edge_weight_range_enrichment = c(1, 5), - layout = c("Fruchterman", "DrL", "Kamada-Kawai"), - only_show_enrichment_edges = FALSE, - edge_width_range = c(0.1, 2), - node_size = 4, - node_color_code = NULL, - node_text_size = 6, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "cellProximityNetwork") { +cellProximityNetwork <- function( + gobject, + CPscore, + remove_self_edges = FALSE, + self_loop_strength = 0.1, + color_depletion = "lightgreen", + color_enrichment = "red", + rescale_edge_weights = TRUE, + edge_weight_range_depletion = c(0.1, 1), + edge_weight_range_enrichment = c(1, 5), + layout = c("Fruchterman", "DrL", "Kamada-Kawai"), + only_show_enrichment_edges = FALSE, + edge_width_range = c(0.1, 2), + node_size = 4, + node_color_code = NULL, + node_text_size = 6, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "cellProximityNetwork") { # extract scores # data.table variables @@ -255,17 +279,27 @@ cellProximityNetwork <- function(gobject, CPscores <- CPscore[["enrichm_res"]] CPscores[, cell_1 := strsplit( - as.character(unified_int), split = "--")[[1]][1], by = 1:nrow(CPscores)] + as.character(unified_int), + split = "--" + )[[1]][1], + by = seq_len(nrow(CPscores)) + ] CPscores[, cell_2 := strsplit( - as.character(unified_int), split = "--")[[1]][2], by = 1:nrow(CPscores)] + as.character(unified_int), + split = "--" + )[[1]][2], + by = seq_len(nrow(CPscores)) + ] # create igraph with enrichm as weight edges igd <- igraph::graph_from_data_frame( - d = CPscores[, c("cell_1", "cell_2", "enrichm")], directed = FALSE) + d = CPscores[, c("cell_1", "cell_2", "enrichm")], directed = FALSE + ) if (remove_self_edges == TRUE) { igd <- igraph::simplify( - graph = igd, remove.loops = TRUE, remove.multiple = FALSE) + graph = igd, remove.loops = TRUE, remove.multiple = FALSE + ) } edges_sizes <- igraph::get.edge.attribute(igd, "enrichm") @@ -275,9 +309,11 @@ cellProximityNetwork <- function(gobject, # rescale if wanted if (rescale_edge_weights == TRUE) { pos_edges_sizes_resc <- scales::rescale( - x = post_edges_sizes, to = edge_weight_range_enrichment) + x = post_edges_sizes, to = edge_weight_range_enrichment + ) neg_edges_sizes_resc <- scales::rescale( - x = neg_edges_sizes, to = edge_weight_range_depletion) + x = neg_edges_sizes, to = edge_weight_range_depletion + ) edges_sizes_resc <- c(pos_edges_sizes_resc, neg_edges_sizes_resc) } else { edges_sizes_resc <- c(post_edges_sizes, neg_edges_sizes) @@ -298,15 +334,18 @@ cellProximityNetwork <- function(gobject, } } else { layout <- match.arg( - arg = layout, choices = c("Fruchterman", "DrL", "Kamada-Kawai")) + arg = layout, choices = c("Fruchterman", "DrL", "Kamada-Kawai") + ) } igd <- igraph::set.edge.attribute( - graph = igd, index = igraph::E(igd), name = "color", - value = edges_colors) + graph = igd, index = igraph::E(igd), name = "color", + value = edges_colors + ) igd <- igraph::set.edge.attribute( - graph = igd, index = igraph::E(igd), name = "size", - value = as.numeric(edges_sizes_resc)) + graph = igd, index = igraph::E(igd), name = "size", + value = as.numeric(edges_sizes_resc) + ) ## only show attractive edges if (only_show_enrichment_edges == TRUE) { @@ -321,13 +360,16 @@ cellProximityNetwork <- function(gobject, ## get coordinates layouts if (layout == "Fruchterman") { coords <- igraph::layout_with_fr( - graph = igd, weights = edges_sizes_resc) + graph = igd, weights = edges_sizes_resc + ) } else if (layout == "DrL") { coords <- igraph::layout_with_drl( - graph = igd, weights = edges_sizes_resc) + graph = igd, weights = edges_sizes_resc + ) } else if (layout == "Kamada-Kawai") { coords <- igraph::layout_with_kk( - graph = igd, weights = edges_sizes_resc) + graph = igd, weights = edges_sizes_resc + ) } else { stop("Currently no other layouts have been implemented") } @@ -335,25 +377,36 @@ cellProximityNetwork <- function(gobject, ## create plot gpl <- ggraph::ggraph(graph = igd, layout = coords) gpl <- gpl + ggraph::geom_edge_link( - ggplot2::aes(color = factor(color), - edge_width = size, edge_alpha = size), - show.legend = FALSE) + ggplot2::aes( + color = factor(color), + edge_width = size, edge_alpha = size + ), + show.legend = FALSE + ) if (remove_self_edges == FALSE) { gpl <- gpl + ggraph::geom_edge_loop( - ggplot2::aes(color = factor(color), edge_width = size, - edge_alpha = size, strength = self_loop_strength), - show.legend = FALSE) + ggplot2::aes( + color = factor(color), edge_width = size, + edge_alpha = size, strength = self_loop_strength + ), + show.legend = FALSE + ) } gpl <- gpl + ggraph::scale_edge_color_manual( - values = c("enriched" = color_enrichment, "depleted" = color_depletion)) + values = c("enriched" = color_enrichment, "depleted" = color_depletion) + ) gpl <- gpl + ggraph::scale_edge_width(range = edge_width_range) gpl <- gpl + ggraph::scale_edge_alpha(range = c(0.1, 1)) gpl <- gpl + ggraph::geom_node_text( - ggplot2::aes(label = name), repel = TRUE, size = node_text_size) + ggplot2::aes(label = name), + repel = TRUE, size = node_text_size + ) gpl <- gpl + ggraph::geom_node_point( - ggplot2::aes(color = name), size = node_size) + ggplot2::aes(color = name), + size = node_size + ) if (!is.null(node_color_code)) { gpl <- gpl + ggplot2::scale_color_manual(values = node_color_code) } @@ -390,52 +443,59 @@ cellProximityNetwork <- function(gobject, NULL -#' @describeIn cellProximityVisPlot_internals Visualize 2D cell-cell +#' @describeIn cellProximityVisPlot_internals Visualize 2D cell-cell #' interactions according to spatial coordinates in ggplot mode #' @keywords internal -.cellProximityVisPlot_2D_ggplot <- function(gobject, - interaction_name = NULL, - cluster_column = NULL, - sdimx = NULL, - sdimy = NULL, - cell_color = NULL, - cell_color_code = NULL, - color_as_factor = TRUE, - show_other_cells = FALSE, - show_network = FALSE, - show_other_network = FALSE, - network_color = NULL, - spatial_network_name = "Delaunay_network", - show_grid = FALSE, - grid_color = NULL, - spatial_grid_name = "spatial_grid", - coord_fix_ratio = 1, - show_legend = TRUE, - point_size_select = 2, - point_select_border_col = "black", - point_select_border_stroke = 0.05, - point_size_other = 1, - point_alpha_other = 0.3, - point_other_border_col = "lightgrey", - point_other_border_stroke = 0.01, - ...) { +.cellProximityVisPlot_2D_ggplot <- function( + gobject, + interaction_name = NULL, + cluster_column = NULL, + sdimx = NULL, + sdimy = NULL, + cell_color = NULL, + cell_color_code = NULL, + color_as_factor = TRUE, + show_other_cells = FALSE, + show_network = FALSE, + show_other_network = FALSE, + network_color = NULL, + spatial_network_name = "Delaunay_network", + show_grid = FALSE, + grid_color = NULL, + spatial_grid_name = "spatial_grid", + coord_fix_ratio = 1, + show_legend = TRUE, + point_size_select = 2, + point_select_border_col = "black", + point_select_border_stroke = 0.05, + point_size_other = 1, + point_alpha_other = 0.3, + point_other_border_col = "lightgrey", + point_other_border_stroke = 0.01, + ...) { # data.table variables - unified_int <- sdimx_begin <- sdimy_begin <- sdimx_end <- sdimy_end <- + unified_int <- sdimx_begin <- sdimy_begin <- sdimx_end <- sdimy_end <- x_start <- x_end <- NULL y_start <- y_end <- cell_ID <- NULL if (is.null(interaction_name)) { - stop("you need to specific at least one interaction name, run + stop("you need to specific at least one interaction name, run cellProximityEnrichment") } - cell_locations <- getSpatialLocations(gobject = gobject, - output = "data.table") - spatial_grid <- getSpatialGrid(gobject = gobject, - name = spatial_grid_name) - cell_metadata <- getCellMetadata(gobject = gobject, - output = "data.table") + cell_locations <- getSpatialLocations( + gobject = gobject, + output = "data.table" + ) + spatial_grid <- getSpatialGrid( + gobject = gobject, + name = spatial_grid_name + ) + cell_metadata <- getCellMetadata( + gobject = gobject, + output = "data.table" + ) @@ -454,7 +514,8 @@ NULL if (show_other_cells) { CellType <- strsplit(interaction_name, "--") all_cell_IDs <- cell_metadata[cell_metadata[[ - cluster_column]] == CellType[[1]][1] | + cluster_column + ]] == CellType[[1]][1] | cell_metadata[[cluster_column]] == CellType[[1]][2], ]$cell_ID other_cell_IDs <- setdiff(all_cell_IDs, cell_IDs_to_keep) } @@ -465,13 +526,15 @@ NULL cell_locations_metadata <- cell_locations } else { cell_locations_metadata <- merge( - cell_locations, cell_metadata, by = "cell_ID") + cell_locations, cell_metadata, + by = "cell_ID" + ) } # first 2 dimensions need to be defined if (is.null(sdimx) | is.null(sdimy)) { - message("first and second dimenion need to be defined, default is + message("first and second dimenion need to be defined, default is first 2\n") sdimx <- "sdimx" sdimy <- "sdimy" @@ -485,15 +548,19 @@ NULL if (show_other_network) { pl <- pl + ggplot2::geom_segment( data = spatial_network[!unified_int %in% interaction_name], - aes(x = sdimx_begin, y = sdimy_begin, xend = sdimx_end, - yend = sdimy_end), + aes( + x = sdimx_begin, y = sdimy_begin, xend = sdimx_end, + yend = sdimy_end + ), color = "lightgrey", size = 0.5, alpha = 0.5 ) } pl <- pl + ggplot2::geom_segment( data = spatial_network[unified_int %in% interaction_name], - aes(x = sdimx_begin, y = sdimy_begin, xend = sdimx_end, - yend = sdimy_end), + aes( + x = sdimx_begin, y = sdimy_begin, xend = sdimx_end, + yend = sdimy_end + ), color = network_color, size = 0.5, alpha = 0.5 ) } @@ -501,7 +568,7 @@ NULL if (!is.null(spatial_grid) & show_grid == TRUE) { if (is.null(grid_color)) grid_color <- "black" pl <- pl + ggplot2::geom_rect( - data = spatial_grid, + data = spatial_grid, aes(xmin = x_start, xmax = x_end, ymin = y_start, ymax = y_end), color = grid_color, fill = NA ) @@ -511,22 +578,22 @@ NULL if (is.null(cell_color)) { cell_color <- "lightblue" pl <- pl + ggplot2::geom_point( - data = cell_locations[!cell_ID %in% cell_IDs_to_keep], + data = cell_locations[!cell_ID %in% cell_IDs_to_keep], aes_string(x = sdimx, y = sdimy), - show.legend = show_legend, shape = 21, fill = "lightgrey", + show.legend = show_legend, shape = 21, fill = "lightgrey", size = point_size_other ) pl <- pl + ggplot2::geom_point( - data = cell_locations[cell_ID %in% cell_IDs_to_keep], + data = cell_locations[cell_ID %in% cell_IDs_to_keep], aes_string(x = sdimx, y = sdimy), - show.legend = show_legend, shape = 21, fill = cell_color, + show.legend = show_legend, shape = 21, fill = cell_color, size = point_size_select ) if (show_other_cells) { pl <- pl + ggplot2::geom_point( - data = cell_locations[cell_ID %in% other_cell_IDs], + data = cell_locations[cell_ID %in% other_cell_IDs], aes_string(x = sdimx, y = sdimy), - show.legend = show_legend, shape = 21, fill = cell_color, + show.legend = show_legend, shape = 21, fill = cell_color, alpha = point_alpha_other, size = point_size_select * 0.5 ) @@ -539,24 +606,24 @@ NULL } pl <- pl + ggplot2::geom_point( - data = cell_locations_metadata[!cell_ID %in% cell_IDs_to_keep], + data = cell_locations_metadata[!cell_ID %in% cell_IDs_to_keep], aes_string(x = sdimx, y = sdimy), fill = "lightgrey", shape = 21, size = point_size_other, - color = point_other_border_col, + color = point_other_border_col, stroke = point_other_border_stroke ) pl <- pl + ggplot2::geom_point( - data = cell_locations_metadata[cell_ID %in% cell_IDs_to_keep], + data = cell_locations_metadata[cell_ID %in% cell_IDs_to_keep], aes_string(x = sdimx, y = sdimy, fill = cell_color), show.legend = show_legend, shape = 21, size = point_size_select, - color = point_select_border_col, + color = point_select_border_col, stroke = point_select_border_stroke ) if (show_other_cells) { pl <- pl + ggplot2::geom_point( data = cell_locations_metadata[cell_ID %in% other_cell_IDs], aes_string(x = sdimx, y = sdimy, fill = cell_color), - show.legend = show_legend, shape = 21, + show.legend = show_legend, shape = 21, alpha = point_alpha_other, size = point_size_select * 0.5 ) @@ -569,7 +636,8 @@ NULL } else if (color_as_factor == TRUE) { number_colors <- length(unique(factor_data)) cell_color_code <- set_default_color_discrete_cell( - instrs = instructions(gobject))(n = number_colors) + instrs = instructions(gobject) + )(n = number_colors) names(cell_color_code) <- unique(factor_data) pl <- pl + ggplot2::scale_fill_manual(values = cell_color_code) } else if (color_as_factor == FALSE) { @@ -586,19 +654,20 @@ NULL } else { pl <- pl + ggplot2::geom_point( data = cell_locations_metadata[ - !cell_ID %in% cell_IDs_to_keep], + !cell_ID %in% cell_IDs_to_keep + ], aes_string(x = sdimx, y = sdimy), - show.legend = show_legend, shape = 21, + show.legend = show_legend, shape = 21, fill = "lightgrey", size = point_size_other, - color = point_other_border_col, + color = point_other_border_col, stroke = point_other_border_stroke ) pl <- pl + ggplot2::geom_point( - data = cell_locations_metadata[cell_ID %in% cell_IDs_to_keep], + data = cell_locations_metadata[cell_ID %in% cell_IDs_to_keep], aes_string(x = sdimx, y = sdimy), - show.legend = show_legend, shape = 21, fill = cell_color, + show.legend = show_legend, shape = 21, fill = cell_color, size = point_size_select, - color = point_select_border_col, + color = point_select_border_col, stroke = point_select_border_stroke ) } @@ -622,39 +691,40 @@ NULL -#' @describeIn cellProximityVisPlot_internals Visualize 2D cell-cell +#' @describeIn cellProximityVisPlot_internals Visualize 2D cell-cell #' interactions according to spatial coordinates in plotly mode #' @keywords internal -.cellProximityVisPlot_2D_plotly <- function(gobject, - interaction_name = NULL, - cluster_column = NULL, - sdimx = NULL, - sdimy = NULL, - cell_color = NULL, - cell_color_code = NULL, - color_as_factor = TRUE, - show_other_cells = FALSE, - show_network = FALSE, - show_other_network = FALSE, - network_color = NULL, - spatial_network_name = "Delaunay_network", - show_grid = FALSE, - grid_color = NULL, - spatial_grid_name = "spatial_grid", - show_legend = TRUE, - point_size_select = 2, - point_size_other = 1, - point_alpha_other = 0.3, - axis_scale = c("cube", "real", "custom"), - custom_ratio = NULL, - x_ticks = NULL, - y_ticks = NULL, - ...) { +.cellProximityVisPlot_2D_plotly <- function( + gobject, + interaction_name = NULL, + cluster_column = NULL, + sdimx = NULL, + sdimy = NULL, + cell_color = NULL, + cell_color_code = NULL, + color_as_factor = TRUE, + show_other_cells = FALSE, + show_network = FALSE, + show_other_network = FALSE, + network_color = NULL, + spatial_network_name = "Delaunay_network", + show_grid = FALSE, + grid_color = NULL, + spatial_grid_name = "spatial_grid", + show_legend = TRUE, + point_size_select = 2, + point_size_other = 1, + point_alpha_other = 0.3, + axis_scale = c("cube", "real", "custom"), + custom_ratio = NULL, + x_ticks = NULL, + y_ticks = NULL, + ...) { # data.table variables cell_ID <- unified_int <- NULL if (is.null(interaction_name)) { - stop("you need to specific at least one interaction name, run + stop("you need to specific at least one interaction name, run cellProximityEnrichment") } @@ -665,18 +735,21 @@ NULL spatial_network <- annotateSpatialNetwork( - gobject = gobject, - spatial_network_name = spatial_network_name, - cluster_column = cluster_column) + gobject = gobject, + spatial_network_name = spatial_network_name, + cluster_column = cluster_column + ) cell_IDs_to_keep <- unique(c( - spatial_network[unified_int %in% interaction_name]$to, - spatial_network[unified_int %in% interaction_name]$from)) + spatial_network[unified_int %in% interaction_name]$to, + spatial_network[unified_int %in% interaction_name]$from + )) if (show_other_cells) { CellType <- strsplit(interaction_name, "-") all_cell_IDs <- cell_metadata[cell_metadata[[ - cluster_column]] == CellType[[1]][1] | + cluster_column + ]] == CellType[[1]][1] | cell_metadata[[cluster_column]] == CellType[[1]][2], ]$cell_ID other_cell_IDs <- setdiff(all_cell_IDs, cell_IDs_to_keep) } @@ -686,7 +759,9 @@ NULL cell_locations_metadata <- cell_locations } else { cell_locations_metadata <- merge( - cell_locations, cell_metadata, by = "cell_ID") + cell_locations, cell_metadata, + by = "cell_ID" + ) } @@ -694,7 +769,7 @@ NULL # first 2 dimensions need to be defined if (is.null(sdimx) | is.null(sdimy)) { - message("first and second dimenion need to be defined, default is + message("first and second dimenion need to be defined, default is first 2") sdimx <- "sdimx" sdimy <- "sdimy" @@ -773,12 +848,15 @@ NULL if (cell_color %in% colnames(cell_locations_metadata)) { if (is.null(cell_color_code)) { number_colors <- length(unique(cell_locations_metadata[[ - cell_color]])) + cell_color + ]])) cell_color_code <- set_default_color_discrete_cell( - instrs = instructions(gobject))(n = number_colors) + instrs = instructions(gobject) + )(n = number_colors) } cell_locations_metadata[[cell_color]] <- as.factor( - cell_locations_metadata[[cell_color]]) + cell_locations_metadata[[cell_color]] + ) pl <- pl %>% plotly::add_trace( type = "scatter", mode = "markers", @@ -786,7 +864,8 @@ NULL data = cell_locations_metadata[cell_ID %in% cell_IDs_to_keep], x = ~sdimx, y = ~sdimy, color = cell_locations_metadata[ - cell_ID %in% cell_IDs_to_keep][[cell_color]], + cell_ID %in% cell_IDs_to_keep + ][[cell_color]], colors = cell_color_code, marker = list(size = point_size_select) ) @@ -797,7 +876,8 @@ NULL data = cell_locations_metadata[cell_ID %in% other_cell_IDs], x = ~sdimx, y = ~sdimy, color = cell_locations_metadata[ - cell_ID %in% other_cell_IDs][[cell_color]], + cell_ID %in% other_cell_IDs + ][[cell_color]], colors = cell_color_code, opacity = point_alpha_other, marker = list(size = point_size_select * 0.7) @@ -809,9 +889,10 @@ NULL data = cell_locations_metadata[!cell_ID %in% cell_IDs_to_keep], x = ~sdimx, y = ~sdimy, marker = list( - size = point_size_other, - color = "lightgray", - colors = "lightgray"), + size = point_size_other, + color = "lightgray", + colors = "lightgray" + ), opacity = point_alpha_other ) } else { @@ -824,9 +905,10 @@ NULL data = cell_locations_metadata[cell_ID %in% cell_IDs_to_keep], x = ~sdimx, y = ~sdimy, marker = list( - size = point_size_select, - color = "lightblue", - colors = "lightblue") + size = point_size_select, + color = "lightblue", + colors = "lightblue" + ) ) if (show_other_cells) { pl <- pl %>% plotly::add_trace( @@ -835,9 +917,10 @@ NULL x = ~sdimx, y = ~sdimy, name = "selected cells outside network", marker = list( - size = point_size_select * 0.7, - color = "lightblue", - colors = "lightblue"), + size = point_size_select * 0.7, + color = "lightblue", + colors = "lightblue" + ), opacity = point_alpha_other ) } @@ -847,9 +930,10 @@ NULL data = cell_locations_metadata[!cell_ID %in% cell_IDs_to_keep], x = ~sdimx, y = ~sdimy, marker = list( - size = point_size_other, - color = "lightgray", - colors = "lightgray"), + size = point_size_other, + color = "lightgray", + colors = "lightgray" + ), opacity = point_alpha_other ) } @@ -869,41 +953,42 @@ NULL } -#' @describeIn cellProximityVisPlot_internals Visualize 3D cell-cell +#' @describeIn cellProximityVisPlot_internals Visualize 3D cell-cell #' interactions according to spatial coordinates in plotly mode #' @keywords internal -.cellProximityVisPlot_3D_plotly <- function(gobject, - interaction_name = NULL, - cluster_column = NULL, - sdimx = NULL, - sdimy = NULL, - sdimz = NULL, - cell_color = NULL, - cell_color_code = NULL, - color_as_factor = TRUE, - show_other_cells = FALSE, - show_network = FALSE, - show_other_network = FALSE, - network_color = NULL, - spatial_network_name = "Delaunay_network", - show_grid = FALSE, - grid_color = NULL, - spatial_grid_name = "spatial_grid", - show_legend = TRUE, - point_size_select = 2, - point_size_other = 1, - point_alpha_other = 0.5, - axis_scale = c("cube", "real", "custom"), - custom_ratio = NULL, - x_ticks = NULL, - y_ticks = NULL, - z_ticks = NULL, - ...) { +.cellProximityVisPlot_3D_plotly <- function( + gobject, + interaction_name = NULL, + cluster_column = NULL, + sdimx = NULL, + sdimy = NULL, + sdimz = NULL, + cell_color = NULL, + cell_color_code = NULL, + color_as_factor = TRUE, + show_other_cells = FALSE, + show_network = FALSE, + show_other_network = FALSE, + network_color = NULL, + spatial_network_name = "Delaunay_network", + show_grid = FALSE, + grid_color = NULL, + spatial_grid_name = "spatial_grid", + show_legend = TRUE, + point_size_select = 2, + point_size_other = 1, + point_alpha_other = 0.5, + axis_scale = c("cube", "real", "custom"), + custom_ratio = NULL, + x_ticks = NULL, + y_ticks = NULL, + z_ticks = NULL, + ...) { # data.table variables cell_ID <- unified_int <- NULL if (is.null(interaction_name)) { - stop("you need to specific at least one interaction name, + stop("you need to specific at least one interaction name, run cellProximityEnrichment") } @@ -914,18 +999,23 @@ NULL spatial_network <- annotateSpatialNetwork( - gobject = gobject, - spatial_network_name = spatial_network_name, - cluster_column = cluster_column) + gobject = gobject, + spatial_network_name = spatial_network_name, + cluster_column = cluster_column + ) - cell_IDs_to_keep <- unique(c(spatial_network[ - unified_int %in% interaction_name]$to, - spatial_network[unified_int %in% interaction_name]$from)) + cell_IDs_to_keep <- unique(c( + spatial_network[ + unified_int %in% interaction_name + ]$to, + spatial_network[unified_int %in% interaction_name]$from + )) if (show_other_cells) { CellType <- strsplit(interaction_name, "-") all_cell_IDs <- cell_metadata[cell_metadata[[ - cluster_column]] == CellType[[1]][1] | + cluster_column + ]] == CellType[[1]][1] | cell_metadata[[cluster_column]] == CellType[[1]][2], ]$cell_ID other_cell_IDs <- setdiff(all_cell_IDs, cell_IDs_to_keep) } @@ -935,7 +1025,9 @@ NULL cell_locations_metadata <- cell_locations } else { cell_locations_metadata <- merge( - cell_locations, cell_metadata, by = "cell_ID") + cell_locations, cell_metadata, + by = "cell_ID" + ) } @@ -943,7 +1035,7 @@ NULL # first 2 dimensions need to be defined if (is.null(sdimx) | is.null(sdimy)) { - message("first and second dimenion need to be defined, default is + message("first and second dimenion need to be defined, default is first 2") sdimx <- "sdimx" sdimy <- "sdimy" @@ -970,21 +1062,26 @@ NULL if (cell_color %in% colnames(cell_locations_metadata)) { if (is.null(cell_color_code)) { number_colors <- length(unique(cell_locations_metadata[[ - cell_color]])) + cell_color + ]])) cell_color_code <- set_default_color_discrete_cell( - instrs = instructions(gobject))(n = number_colors) + instrs = instructions(gobject) + )(n = number_colors) } cell_locations_metadata[[cell_color]] <- as.factor( - cell_locations_metadata[[cell_color]]) + cell_locations_metadata[[cell_color]] + ) pl <- pl %>% plotly::add_trace( type = "scatter3d", mode = "markers", data = cell_locations_metadata[ - cell_ID %in% cell_IDs_to_keep], + cell_ID %in% cell_IDs_to_keep + ], x = ~sdimx, y = ~sdimy, z = ~sdimz, color = cell_locations_metadata[ - cell_ID %in% cell_IDs_to_keep][[cell_color]], + cell_ID %in% cell_IDs_to_keep + ][[cell_color]], colors = cell_color_code, marker = list(size = point_size_select) ) %>% @@ -992,12 +1089,14 @@ NULL type = "scatter3d", mode = "markers", name = "unselected cells", data = cell_locations_metadata[ - !cell_ID %in% cell_IDs_to_keep], + !cell_ID %in% cell_IDs_to_keep + ], x = ~sdimx, y = ~sdimy, z = ~sdimz, marker = list( - size = point_size_other, - color = "lightgray", - colors = "lightgray"), + size = point_size_other, + color = "lightgray", + colors = "lightgray" + ), opacity = point_alpha_other ) if (show_other_cells) { @@ -1007,7 +1106,8 @@ NULL data = cell_locations_metadata[cell_ID %in% other_cell_IDs], x = ~sdimx, y = ~sdimy, z = ~sdimz, color = cell_locations_metadata[ - cell_ID %in% other_cell_IDs][[cell_color]], + cell_ID %in% other_cell_IDs + ][[cell_color]], colors = cell_color_code, opacity = point_alpha_other, marker = list(size = point_size_select * 0.7) @@ -1024,9 +1124,10 @@ NULL data = cell_locations_metadata[cell_ID %in% cell_IDs_to_keep], x = ~sdimx, y = ~sdimy, z = ~sdimz, marker = list( - size = point_size_select, - color = "lightblue", - colors = "lightblue") + size = point_size_select, + color = "lightblue", + colors = "lightblue" + ) ) %>% plotly::add_trace( type = "scatter3d", mode = "markers", @@ -1034,9 +1135,10 @@ NULL data = cell_locations_metadata[!cell_ID %in% cell_IDs_to_keep], x = ~sdimx, y = ~sdimy, z = ~sdimz, marker = list( - size = point_size_other, - color = "lightgray", - colors = "lightgray"), + size = point_size_other, + color = "lightgray", + colors = "lightgray" + ), opacity = point_alpha_other ) if (show_other_cells) { @@ -1046,9 +1148,10 @@ NULL x = ~sdimx, y = ~sdimy, z = ~sdimz, name = "selected cells outside network", marker = list( - size = point_size_select * 0.7, - color = "lightblue", - colors = "lightblue"), + size = point_size_select * 0.7, + color = "lightblue", + colors = "lightblue" + ), opacity = point_alpha_other ) } @@ -1060,18 +1163,18 @@ NULL unselect_network <- spatial_network[!unified_int %in% interaction_name] select_network <- spatial_network[unified_int %in% interaction_name] pl <- pl %>% plotly::add_trace( - name = "sptial network", mode = "lines", + name = "sptial network", mode = "lines", type = "scatter3d", opacity = 0.5, data = plotly_network(select_network), - x = ~x, y = ~y, z = ~z, inherit = FALSE, + x = ~x, y = ~y, z = ~z, inherit = FALSE, line = list(color = network_color) ) if (show_other_network == TRUE) { pl <- pl %>% plotly::add_trace( - name = "unselected sptial network", mode = "lines", + name = "unselected sptial network", mode = "lines", type = "scatter3d", opacity = 0.1, data = plotly_network(unselect_network), - x = ~x, y = ~y, z = ~z, inherit = FALSE, + x = ~x, y = ~y, z = ~z, inherit = FALSE, line = list(color = "lightgray") ) } @@ -1093,7 +1196,7 @@ NULL #' @title cellProximityVisPlot #' @name cellProximityVisPlot -#' @description Visualize cell-cell interactions according to spatial +#' @description Visualize cell-cell interactions according to spatial #' coordinates #' @param gobject giotto object #' @param interaction_name cell-cell interaction name @@ -1134,43 +1237,46 @@ NULL #' g <- GiottoData::loadGiottoMini("visium") #' g <- createSpatialGrid(g, sdimx_stepsize = 5, sdimy_stepsize = 5) #' x <- cellProximityEnrichment(g, cluster_column = "leiden_clus") -#' -#' cellProximityVisPlot(gobject = g, interaction_name = x, -#' cluster_column = "leiden_clus", sdimx = "sdimx", sdimy = "sdimy") +#' +#' cellProximityVisPlot( +#' gobject = g, interaction_name = x, +#' cluster_column = "leiden_clus", sdimx = "sdimx", sdimy = "sdimy" +#' ) #' @export -cellProximityVisPlot <- function(gobject, - interaction_name = NULL, - cluster_column = NULL, - sdimx = NULL, - sdimy = NULL, - sdimz = NULL, - cell_color = NULL, - cell_color_code = NULL, - color_as_factor = TRUE, - show_other_cells = FALSE, - show_network = FALSE, - show_other_network = FALSE, - network_color = NULL, - spatial_network_name = "Delaunay_network", - show_grid = FALSE, - grid_color = NULL, - spatial_grid_name = "spatial_grid", - coord_fix_ratio = 1, - show_legend = TRUE, - point_size_select = 2, - point_select_border_col = "black", - point_select_border_stroke = 0.05, - point_size_other = 1, - point_alpha_other = 0.3, - point_other_border_col = "lightgrey", - point_other_border_stroke = 0.01, - axis_scale = c("cube", "real", "custom"), - custom_ratio = NULL, - x_ticks = NULL, - y_ticks = NULL, - z_ticks = NULL, - plot_method = c("ggplot", "plotly"), - ...) { +cellProximityVisPlot <- function( + gobject, + interaction_name = NULL, + cluster_column = NULL, + sdimx = NULL, + sdimy = NULL, + sdimz = NULL, + cell_color = NULL, + cell_color_code = NULL, + color_as_factor = TRUE, + show_other_cells = FALSE, + show_network = FALSE, + show_other_network = FALSE, + network_color = NULL, + spatial_network_name = "Delaunay_network", + show_grid = FALSE, + grid_color = NULL, + spatial_grid_name = "spatial_grid", + coord_fix_ratio = 1, + show_legend = TRUE, + point_size_select = 2, + point_select_border_col = "black", + point_select_border_stroke = 0.05, + point_size_other = 1, + point_alpha_other = 0.3, + point_other_border_col = "lightgrey", + point_other_border_stroke = 0.01, + axis_scale = c("cube", "real", "custom"), + custom_ratio = NULL, + x_ticks = NULL, + y_ticks = NULL, + z_ticks = NULL, + plot_method = c("ggplot", "plotly"), + ...) { ## decide plot method plot_method <- match.arg(plot_method, choices = c("ggplot", "plotly")) axis_scale <- match.arg(axis_scale, c("cube", "real", "custom")) @@ -1178,7 +1284,7 @@ cellProximityVisPlot <- function(gobject, if (plot_method == "ggplot") { if (is.null(sdimx) | is.null(sdimy)) { - warning("plot_method = ggplot, but spatial dimensions for sdimx + warning("plot_method = ggplot, but spatial dimensions for sdimx and sdimy for 2D plotting are not given. \n It will default to the 'sdimx' and 'sdimy'") sdimx <- "sdimx" @@ -1186,7 +1292,7 @@ cellProximityVisPlot <- function(gobject, } if (length(c(sdimx, sdimy, sdimz)) == 3) { - warning("ggplot is not able to produce 3D plot! Please choose + warning("ggplot is not able to produce 3D plot! Please choose plotly method") } result <- .cellProximityVisPlot_2D_ggplot( @@ -1309,7 +1415,7 @@ cellProximityVisPlot <- function(gobject, #' @param min_cells minimum number of source cell type #' @param min_cells_expr minimum expression level for source cell type #' @param min_int_cells minimum number of interacting neighbor cell type -#' @param min_int_cells_expr minimum expression level for interacting neighbor +#' @param min_int_cells_expr minimum expression level for interacting neighbor #' cell type #' @param min_fdr minimum adjusted p-value #' @param min_spat_diff minimum absolute spatial expression difference @@ -1321,45 +1427,53 @@ cellProximityVisPlot <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' icfObject <- findInteractionChangedFeats(g, cluster_column = "leiden_clus") -#' -#' plotCellProximityFeats(gobject = g, icfObject = icfObject, -#' show_plot = TRUE, save_plot = FALSE, return_plot = FALSE) +#' +#' plotCellProximityFeats( +#' gobject = g, icfObject = icfObject, +#' show_plot = TRUE, save_plot = FALSE, return_plot = FALSE +#' ) #' @export -plotCellProximityFeats <- function(gobject, - icfObject, - method = c("volcano", "cell_barplot", "cell-cell", "cell_sankey", - "heatmap", "dotplot"), - min_cells = 4, - min_cells_expr = 1, - min_int_cells = 4, - min_int_cells_expr = 1, - min_fdr = 0.1, - min_spat_diff = 0.2, - min_log2_fc = 0.2, - min_zscore = 2, - zscores_column = c("cell_type", "feats"), - direction = c("both", "up", "down"), - cell_color_code = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCellProximityFeats") { +plotCellProximityFeats <- function( + gobject, + icfObject, + method = c( + "volcano", "cell_barplot", "cell-cell", "cell_sankey", + "heatmap", "dotplot" + ), + min_cells = 4, + min_cells_expr = 1, + min_int_cells = 4, + min_int_cells_expr = 1, + min_fdr = 0.1, + min_spat_diff = 0.2, + min_log2_fc = 0.2, + min_zscore = 2, + zscores_column = c("cell_type", "feats"), + direction = c("both", "up", "down"), + cell_color_code = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCellProximityFeats") { if (!"icfObject" %in% class(icfObject)) { - stop("icfObject needs to be the output from + stop("icfObject needs to be the output from findInteractionChangedFeats() or findICF()") } # print, return and save parameters show_plot <- ifelse( - is.na(show_plot), - readGiottoInstructions(gobject, param = "show_plot"), show_plot) + is.na(show_plot), + readGiottoInstructions(gobject, param = "show_plot"), show_plot + ) save_plot <- ifelse( - is.na(save_plot), - readGiottoInstructions(gobject, param = "save_plot"), save_plot) + is.na(save_plot), + readGiottoInstructions(gobject, param = "save_plot"), save_plot + ) return_plot <- ifelse( - is.na(return_plot), - readGiottoInstructions(gobject, param = "return_plot"), return_plot) + is.na(return_plot), + readGiottoInstructions(gobject, param = "return_plot"), return_plot + ) ## first filter @@ -1381,9 +1495,12 @@ plotCellProximityFeats <- function(gobject, ## other parameters method <- match.arg( - method, - choices = c("volcano", "cell_barplot", "cell-cell", "cell_sankey", - "heatmap", "dotplot")) + method, + choices = c( + "volcano", "cell_barplot", "cell-cell", "cell_sankey", + "heatmap", "dotplot" + ) + ) # variables @@ -1394,14 +1511,19 @@ plotCellProximityFeats <- function(gobject, ## volcanoplot pl <- ggplot2::ggplot() pl <- pl + ggplot2::geom_point( - data = complete_part, - ggplot2::aes(x = log2fc, - y = ifelse(is.infinite(-log10(p.adj)), - 1000, -log10(p.adj)))) + data = complete_part, + ggplot2::aes( + x = log2fc, + y = ifelse(is.infinite(-log10(p.adj)), + 1000, -log10(p.adj) + ) + ) + ) pl <- pl + ggplot2::theme_classic() pl <- pl + ggplot2::geom_vline(xintercept = 0, linetype = 2) pl <- pl + ggplot2::labs( - x = "log2 fold-change", y = "-log10(p.adjusted)") + x = "log2 fold-change", y = "-log10(p.adjusted)" + ) ## print plot @@ -1412,9 +1534,12 @@ plotCellProximityFeats <- function(gobject, ## save plot if (save_plot == TRUE) { do.call( - "all_plots_save_function", - c(list(gobject = gobject, plot_object = pl, - default_save_name = default_save_name), save_param)) + "all_plots_save_function", + c(list( + gobject = gobject, plot_object = pl, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1429,11 +1554,14 @@ plotCellProximityFeats <- function(gobject, pl <- ggplot2::ggplot() pl <- pl + ggplot2::geom_bar( - data = complete_part, - ggplot2::aes(x = unif_int, fill = unif_int)) + data = complete_part, + ggplot2::aes(x = unif_int, fill = unif_int) + ) pl <- pl + ggplot2::theme_classic() + ggplot2::theme( axis.text.x = ggplot2::element_text( - angle = 90, hjust = 1, vjust = 1)) + angle = 90, hjust = 1, vjust = 1 + ) + ) pl <- pl + ggplot2::coord_flip() ## print plot @@ -1444,9 +1572,12 @@ plotCellProximityFeats <- function(gobject, ## save plot if (save_plot == TRUE) { do.call( - "all_plots_save_function", - c(list(gobject = gobject, plot_object = pl, - default_save_name = default_save_name), save_param)) + "all_plots_save_function", + c(list( + gobject = gobject, plot_object = pl, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1463,14 +1594,17 @@ plotCellProximityFeats <- function(gobject, pl <- ggplot2::ggplot() pl <- pl + ggplot2::geom_bar( data = complete_part, - ggplot2::aes(x = cell_type, fill = int_cell_type)) + ggplot2::aes(x = cell_type, fill = int_cell_type) + ) if (!is.null(cell_color_code)) { pl <- pl + ggplot2::scale_fill_manual(values = cell_color_code) } pl <- pl + ggplot2::theme_classic() + ggplot2::theme( - axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) + axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1) + ) pl <- pl + ggplot2::labs( - x = "", y = "# of features influenced by cell neighborhood") + x = "", y = "# of features influenced by cell neighborhood" + ) ## print plot @@ -1481,9 +1615,12 @@ plotCellProximityFeats <- function(gobject, ## save plot if (save_plot == TRUE) { do.call( - "all_plots_save_function", - c(list(gobject = gobject, plot_object = pl, - default_save_name = default_save_name), save_param)) + "all_plots_save_function", + c(list( + gobject = gobject, plot_object = pl, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1503,14 +1640,18 @@ plotCellProximityFeats <- function(gobject, ) + ggalluvial::geom_alluvium(aes(fill = cell_type), width = 1 / 12) + ggalluvial::geom_stratum( - width = 1 / 12, fill = "black", color = "grey") + + width = 1 / 12, fill = "black", color = "grey" + ) + ggplot2::scale_x_discrete( - limits = c("cell type", "neighbours"), expand = c(.05, .05)) + + limits = c("cell type", "neighbours"), expand = c(.05, .05) + ) + ggplot2::geom_label( - stat = "stratum", label.strata = TRUE, size = 3) + + stat = "stratum", label.strata = TRUE, size = 3 + ) + ggplot2::theme_classic() + ggplot2::labs( - x = "", y = "# of features influenced by cell neighborhood") + x = "", y = "# of features influenced by cell neighborhood" + ) if (!is.null(cell_color_code)) { pl <- pl + ggplot2::scale_fill_manual(values = cell_color_code) @@ -1526,9 +1667,12 @@ plotCellProximityFeats <- function(gobject, ## save plot if (save_plot == TRUE) { do.call( - "all_plots_save_function", - c(list(gobject = gobject, plot_object = pl, - default_save_name = default_save_name), save_param)) + "all_plots_save_function", + c(list( + gobject = gobject, plot_object = pl, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1537,23 +1681,30 @@ plotCellProximityFeats <- function(gobject, } } else if (method == "dotplot") { changed_feats <- complete_part[ - , .N, by = c("cell_type", "int_cell_type")] + , .N, + by = c("cell_type", "int_cell_type") + ] changed_feats[, cell_type := factor(cell_type, unique(cell_type))] changed_feats[, int_cell_type := factor( - int_cell_type, unique(int_cell_type))] + int_cell_type, unique(int_cell_type) + )] pl <- ggplot2::ggplot() pl <- pl + ggplot2::theme_classic() pl <- pl + ggplot2::geom_point( - data = changed_feats, - ggplot2::aes(x = cell_type, y = int_cell_type, size = N)) + data = changed_feats, + ggplot2::aes(x = cell_type, y = int_cell_type, size = N) + ) pl <- pl + ggplot2::scale_size_continuous( - guide = guide_legend(title = "# of ICFs")) + guide = guide_legend(title = "# of ICFs") + ) pl <- pl + ggplot2::theme(axis.text.x = ggplot2::element_text( - angle = 90, vjust = 1, hjust = 1)) + angle = 90, vjust = 1, hjust = 1 + )) pl <- pl + ggplot2::labs( - x = "source cell type", y = "neighbor cell type") + x = "source cell type", y = "neighbor cell type" + ) ## print plot if (show_plot == TRUE) { @@ -1563,9 +1714,12 @@ plotCellProximityFeats <- function(gobject, ## save plot if (save_plot == TRUE) { do.call( - "all_plots_save_function", - c(list(gobject = gobject, plot_object = pl, - default_save_name = default_save_name), save_param)) + "all_plots_save_function", + c(list( + gobject = gobject, plot_object = pl, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1574,14 +1728,19 @@ plotCellProximityFeats <- function(gobject, } } else if (method == "heatmap") { changed_feats <- complete_part[ - , .N, by = c("cell_type", "int_cell_type")] + , .N, + by = c("cell_type", "int_cell_type") + ] changed_feats[, cell_type := factor(cell_type, unique(cell_type))] changed_feats[, int_cell_type := factor( - int_cell_type, unique(int_cell_type))] + int_cell_type, unique(int_cell_type) + )] changed_feats_d <- data.table::dcast.data.table( - changed_feats, cell_type ~ int_cell_type, value.var = "N", fill = 0) + changed_feats, cell_type ~ int_cell_type, + value.var = "N", fill = 0 + ) changed_feats_m <- dt_to_matrix(changed_feats_d) col_fun <- GiottoVisuals::colorRamp2( @@ -1591,7 +1750,7 @@ plotCellProximityFeats <- function(gobject, heatm <- ComplexHeatmap::Heatmap(log2(changed_feats_m + 1), col = col_fun, - row_title = "cell_type", + row_title = "cell_type", column_title = "int_cell_type", heatmap_legend_param = list(title = "log2(# DEGs)") ) @@ -1604,9 +1763,12 @@ plotCellProximityFeats <- function(gobject, ## save plot if (save_plot == TRUE) { do.call( - "all_plots_save_function", - c(list(gobject = gobject, plot_object = heatm, - default_save_name = default_save_name), save_param)) + "all_plots_save_function", + c(list( + gobject = gobject, plot_object = heatm, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1639,7 +1801,7 @@ plotCellProximityFeats <- function(gobject, #' @param min_cells minimum number of source cell type #' @param min_cells_expr minimum expression level for source cell type #' @param min_int_cells minimum number of interacting neighbor cell type -#' @param min_int_cells_expr minimum expression level for interacting neighbor +#' @param min_int_cells_expr minimum expression level for interacting neighbor #' cell type #' @param min_fdr minimum adjusted p-value #' @param min_spat_diff minimum absolute spatial expression difference @@ -1650,32 +1812,39 @@ plotCellProximityFeats <- function(gobject, #' @returns plot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' icfObject <- findInteractionChangedFeats(g, cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) -#' -#' plotCPF(gobject = g, icfObject = icfObject, show_plot = TRUE, -#' save_plot = FALSE, return_plot = FALSE) +#' icfObject <- findInteractionChangedFeats(g, +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +#' ) +#' +#' plotCPF( +#' gobject = g, icfObject = icfObject, show_plot = TRUE, +#' save_plot = FALSE, return_plot = FALSE +#' ) #' @export -plotCPF <- function(gobject, - icfObject, - method = c("volcano", "cell_barplot", "cell-cell", "cell_sankey", - "heatmap", "dotplot"), - min_cells = 5, - min_cells_expr = 1, - min_int_cells = 3, - min_int_cells_expr = 1, - min_fdr = 0.05, - min_spat_diff = 0.2, - min_log2_fc = 0.2, - min_zscore = 2, - zscores_column = c("cell_type", "feats"), - direction = c("both", "up", "down"), - cell_color_code = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCPG") { +plotCPF <- function( + gobject, + icfObject, + method = c( + "volcano", "cell_barplot", "cell-cell", "cell_sankey", + "heatmap", "dotplot" + ), + min_cells = 5, + min_cells_expr = 1, + min_int_cells = 3, + min_int_cells_expr = 1, + min_fdr = 0.05, + min_spat_diff = 0.2, + min_log2_fc = 0.2, + min_zscore = 2, + zscores_column = c("cell_type", "feats"), + direction = c("both", "up", "down"), + cell_color_code = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCPG") { plotCellProximityFeats( gobject = gobject, icfObject = icfObject, @@ -1714,30 +1883,35 @@ plotCPF <- function(gobject, #' @returns plot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' icfObject <- findInteractionChangedFeats(g, cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) -#' -#' plotInteractionChangedFeats(gobject = g, icfObject = icfObject, -#' source_type = "1", source_markers = "Ccnd2", -#' ICF_feats = c("3" = "Gna12", "1" = "Ccnd2", "8" = "Btbd17")) +#' icfObject <- findInteractionChangedFeats(g, +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +#' ) +#' +#' plotInteractionChangedFeats( +#' gobject = g, icfObject = icfObject, +#' source_type = "1", source_markers = "Ccnd2", +#' ICF_feats = c("3" = "Gna12", "1" = "Ccnd2", "8" = "Btbd17") +#' ) #' @export -plotInteractionChangedFeats <- function(gobject, - icfObject, - source_type, - source_markers, - ICF_feats, - cell_color_code = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotInteractionChangedFeats") { +plotInteractionChangedFeats <- function( + gobject, + icfObject, + source_type, + source_markers, + ICF_feats, + cell_color_code = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotInteractionChangedFeats") { # data.table variables cell_type <- int_cell_type <- log2fc <- NULL if (!"icfObject" %in% class(icfObject)) { - stop("icfObject needs to be the output from + stop("icfObject needs to be the output from findInteractionChangedFeats() or findICF()") } @@ -1754,22 +1928,28 @@ plotInteractionChangedFeats <- function(gobject, if (length(not_detected_feats) > 0) { cat( "These selected features are not in the icfObject: \n", - not_detected_feats) + not_detected_feats + ) } # data.table set column names feats <- group <- NULL tempDT <- ICFscores[feats %in% all_feats][cell_type == source_type][ - int_cell_type %in% neighbor_types] + int_cell_type %in% neighbor_types + ] tempDT[, feats := factor(feats, levels = detected_feats)] - tempDT[, group := names(ICF_feats[ICF_feats == feats]), by = 1:nrow(tempDT)] + tempDT[, group := names(ICF_feats[ICF_feats == feats]), + by = seq_len(nrow(tempDT)) + ] if (is.null(cell_color_code)) { mycolors <- set_default_color_discrete_cell( - instrs = instructions(gobject))(n = length(unique( - tempDT$int_cell_type))) + instrs = instructions(gobject) + )(n = length(unique( + tempDT$int_cell_type + ))) names(mycolors) <- unique(tempDT$int_cell_type) } else { mycolors <- cell_color_code @@ -1779,17 +1959,20 @@ plotInteractionChangedFeats <- function(gobject, pl <- ggplot2::ggplot() pl <- pl + ggplot2::theme_classic() + ggplot2::theme( axis.text.x = ggplot2::element_text( - size = 14, angle = 45, vjust = 1, hjust = 1), + size = 14, angle = 45, vjust = 1, hjust = 1 + ), axis.text.y = ggplot2::element_text(size = 14), axis.title = ggplot2::element_text(size = 14) ) pl <- pl + ggplot2::geom_bar( - data = tempDT, - ggplot2::aes(x = feats, y = log2fc, fill = int_cell_type), - stat = "identity", position = ggplot2::position_dodge()) + data = tempDT, + ggplot2::aes(x = feats, y = log2fc, fill = int_cell_type), + stat = "identity", position = ggplot2::position_dodge() + ) pl <- pl + ggplot2::scale_fill_manual(values = mycolors) pl <- pl + ggplot2::labs(x = "", title = paste0( - "fold-change z-scores in ", source_type)) + "fold-change z-scores in ", source_type + )) # output plot return(GiottoVisuals::plot_output_handler( @@ -1823,24 +2006,29 @@ plotInteractionChangedFeats <- function(gobject, #' @returns plot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' icfObject <- findInteractionChangedFeats(g, cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) -#' -#' plotICF(gobject = g, icfObject = icfObject, -#' source_type = "1", source_markers = "Ccnd2", -#' ICF_feats = c("3" = "Gna12", "1" = "Ccnd2", "8" = "Btbd17")) +#' icfObject <- findInteractionChangedFeats(g, +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +#' ) +#' +#' plotICF( +#' gobject = g, icfObject = icfObject, +#' source_type = "1", source_markers = "Ccnd2", +#' ICF_feats = c("3" = "Gna12", "1" = "Ccnd2", "8" = "Btbd17") +#' ) #' @export -plotICF <- function(gobject, - icfObject, - source_type, - source_markers, - ICF_feats, - cell_color_code = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotICF") { +plotICF <- function( + gobject, + icfObject, + source_type, + source_markers, + ICF_feats, + cell_color_code = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotICF") { plotInteractionChangedFeats( gobject = gobject, icfObject = icfObject, @@ -1881,58 +2069,64 @@ plotICF <- function(gobject, #' @returns ggplot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' -#' g_icf <- findInteractionChangedFeats(g, -#' cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) -#' +#' +#' g_icf <- findInteractionChangedFeats(g, +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +#' ) +#' #' combIcfObject <- combineInteractionChangedFeats(g_icf) -#' -#' plotCombineInteractionChangedFeats(gobject = g, -#' combIcfObject = combIcfObject, -#' selected_feat_to_feat = c("Btbd17--Ccnd2", "Btbd17--Gna12"), -#' selected_interactions = "1--8") +#' +#' plotCombineInteractionChangedFeats( +#' gobject = g, +#' combIcfObject = combIcfObject, +#' selected_feat_to_feat = c("Btbd17--Ccnd2", "Btbd17--Gna12"), +#' selected_interactions = "1--8" +#' ) #' @export -plotCombineInteractionChangedFeats <- function(gobject, - combIcfObject, - selected_interactions = NULL, - selected_feat_to_feat = NULL, - detail_plot = TRUE, - simple_plot = FALSE, - simple_plot_facet = c("interaction", "feats"), - facet_scales = "fixed", - facet_ncol = length(selected_feat_to_feat), - facet_nrow = length(selected_interactions), - colors = c("#9932CC", "#FF8C00"), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCombineICF") { +plotCombineInteractionChangedFeats <- function( + gobject, + combIcfObject, + selected_interactions = NULL, + selected_feat_to_feat = NULL, + detail_plot = TRUE, + simple_plot = FALSE, + simple_plot_facet = c("interaction", "feats"), + facet_scales = "fixed", + facet_ncol = length(selected_feat_to_feat), + facet_nrow = length(selected_interactions), + colors = c("#9932CC", "#FF8C00"), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCombineICF") { ## check validity if (!"combIcfObject" %in% class(combIcfObject)) { - stop("combIcfObject needs to be the output from + stop("combIcfObject needs to be the output from combineInteractionChangedFeats() or combineICF()") } combIcfscore <- copy(combIcfObject[["combICFscores"]]) if (is.null(selected_interactions) | is.null(selected_feat_to_feat)) { - stop("You need to provide a selection of cell-cell interactions and + stop("You need to provide a selection of cell-cell interactions and features-features to plot") } # data.table variables - unif_feat_feat <- unif_int <- other_2 <- sel_2 <- other_1 <- sel_1 <- + unif_feat_feat <- unif_int <- other_2 <- sel_2 <- other_1 <- sel_1 <- cols <- NULL - subDT <- combIcfscore[unif_feat_feat %in% selected_feat_to_feat & - unif_int %in% selected_interactions] + subDT <- combIcfscore[unif_feat_feat %in% selected_feat_to_feat & + unif_int %in% selected_interactions] # order interactions and feat-to-feat according to input subDT[, unif_feat_feat := factor( - unif_feat_feat, levels = selected_feat_to_feat)] + unif_feat_feat, + levels = selected_feat_to_feat + )] subDT[, unif_int := factor(unif_int, levels = selected_interactions)] if (simple_plot == FALSE) { @@ -1941,31 +2135,37 @@ plotCombineInteractionChangedFeats <- function(gobject, if (detail_plot == TRUE) { pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = 0, y = other_2, colour = "other cell expression"), - shape = 1) + data = subDT, + aes(x = 0, y = other_2, colour = "other cell expression"), + shape = 1 + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = 0, y = sel_2, colour = "selected cell expression"), - shape = 1) + data = subDT, + aes(x = 0, y = sel_2, colour = "selected cell expression"), + shape = 1 + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = other_1, y = 0, colour = "other cell expression"), - shape = 1) + data = subDT, + aes(x = other_1, y = 0, colour = "other cell expression"), + shape = 1 + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = sel_1, y = 0, colour = "selected cell expression"), - shape = 1) + data = subDT, + aes(x = sel_1, y = 0, colour = "selected cell expression"), + shape = 1 + ) } pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = other_1, y = other_2, colour = "other cell expression"), - size = 2) + data = subDT, + aes(x = other_1, y = other_2, colour = "other cell expression"), + size = 2 + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = sel_1, y = sel_2, colour = "selected cell expression"), - size = 2) + data = subDT, + aes(x = sel_1, y = sel_2, colour = "selected cell expression"), + size = 2 + ) pl <- pl + ggplot2::geom_segment(data = subDT, aes( x = other_1, xend = sel_1, y = other_2, yend = sel_2 @@ -1975,14 +2175,16 @@ plotCombineInteractionChangedFeats <- function(gobject, y = paste(subDT$feats_2, subDT$cell_type_2, sep = " in ") ) pl <- pl + ggplot2::scale_colour_manual( - name = "expression source", values = colors) + name = "expression source", values = colors + ) pl <- pl + ggplot2::facet_wrap(~ unif_feat_feat + unif_int, nrow = facet_nrow, ncol = facet_ncol, scales = facet_scales ) } else { simple_plot_facet <- match.arg( - arg = simple_plot_facet, choices = c("interaction", "feats")) + arg = simple_plot_facet, choices = c("interaction", "feats") + ) if (simple_plot_facet == "interaction") { pl <- ggplot2::ggplot() @@ -1992,15 +2194,22 @@ plotCombineInteractionChangedFeats <- function(gobject, y = unif_feat_feat, yend = unif_feat_feat ), linetype = 2) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = sum(c(other_1, other_2)), y = unif_feat_feat, - colour = "other cell expression")) + data = subDT, + aes( + x = sum(c(other_1, other_2)), y = unif_feat_feat, + colour = "other cell expression" + ) + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = sum(c(sel_1, sel_2)), y = unif_feat_feat, - colour = "selected cell expression")) + data = subDT, + aes( + x = sum(c(sel_1, sel_2)), y = unif_feat_feat, + colour = "selected cell expression" + ) + ) pl <- pl + ggplot2::scale_colour_manual( - name = "expression source", values = cols) + name = "expression source", values = cols + ) pl <- pl + ggplot2::facet_wrap(~unif_int, scales = facet_scales) pl <- pl + ggplot2::labs(x = "interactions", y = "feat-feat") } else { @@ -2011,17 +2220,26 @@ plotCombineInteractionChangedFeats <- function(gobject, y = unif_int, yend = unif_int ), linetype = 2) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = sum(c(other_1, other_2)), y = unif_int, - colour = "other cell expression")) + data = subDT, + aes( + x = sum(c(other_1, other_2)), y = unif_int, + colour = "other cell expression" + ) + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = sum(c(sel_1, sel_2)), y = unif_int, - colour = "selected cell expression")) + data = subDT, + aes( + x = sum(c(sel_1, sel_2)), y = unif_int, + colour = "selected cell expression" + ) + ) pl <- pl + ggplot2::scale_colour_manual( - name = "expression source", values = cols) + name = "expression source", values = cols + ) pl <- pl + ggplot2::facet_wrap( - ~unif_feat_feat, scales = facet_scales) + ~unif_feat_feat, + scales = facet_scales + ) pl <- pl + ggplot2::labs(x = "feat-feat", y = "interactions") } } @@ -2063,33 +2281,37 @@ plotCombineInteractionChangedFeats <- function(gobject, #' @returns ggplot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' -#' g_icf <- findInteractionChangedFeats(g, -#' cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) -#' +#' +#' g_icf <- findInteractionChangedFeats(g, +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +#' ) +#' #' combIcfObject <- combineInteractionChangedFeats(g_icf) -#' -#' plotCombineICF(gobject = g, combIcfObject = combIcfObject, -#' selected_feat_to_feat = c("Btbd17--Ccnd2", "Btbd17--Gna12"), -#' selected_interactions = "1--8") +#' +#' plotCombineICF( +#' gobject = g, combIcfObject = combIcfObject, +#' selected_feat_to_feat = c("Btbd17--Ccnd2", "Btbd17--Gna12"), +#' selected_interactions = "1--8" +#' ) #' @export -plotCombineICF <- function(gobject, - combIcfObject, - selected_interactions = NULL, - selected_feat_to_feat = NULL, - detail_plot = TRUE, - simple_plot = FALSE, - simple_plot_facet = c("interaction", "feats"), - facet_scales = "fixed", - facet_ncol = length(selected_feat_to_feat), - facet_nrow = length(selected_interactions), - colors = c("#9932CC", "#FF8C00"), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCombineICF") { +plotCombineICF <- function( + gobject, + combIcfObject, + selected_interactions = NULL, + selected_feat_to_feat = NULL, + detail_plot = TRUE, + simple_plot = FALSE, + simple_plot_facet = c("interaction", "feats"), + facet_scales = "fixed", + facet_ncol = length(selected_feat_to_feat), + facet_nrow = length(selected_interactions), + colors = c("#9932CC", "#FF8C00"), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCombineICF") { plotCombineInteractionChangedFeats( gobject = gobject, combIcfObject = combIcfObject, @@ -2126,13 +2348,13 @@ plotCombineICF <- function(gobject, #' @title plotCombineCellCellCommunication #' @name plotCombineCellCellCommunication -#' @description Create visualization for combined (pairwise) cell proximity +#' @description Create visualization for combined (pairwise) cell proximity #' gene scores #' @inheritParams data_access_params #' @inheritParams plot_output_params #' @param combCCcom combined communcation scores, output from combCCcom() #' @param selected_LR selected ligand-receptor pair -#' @param selected_cell_LR selected cell-cell interaction pair for +#' @param selected_cell_LR selected cell-cell interaction pair for #' ligand-receptor pair #' @param detail_plot show detailed info in both interacting cell types #' @param simple_plot show a simplified plot @@ -2144,49 +2366,59 @@ plotCombineICF <- function(gobject, #' @returns ggplot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' -#' comScores <- exprCellCellcom(g, cluster_column = "leiden_clus", -#' feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), -#' feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17")) -#' -#' exprCC <- exprCellCellcom(g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik") -#' spatialCC <- spatCellCellcom(gobject = g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot") -#' +#' +#' comScores <- exprCellCellcom(g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), +#' feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17") +#' ) +#' +#' exprCC <- exprCellCellcom(g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik" +#' ) +#' spatialCC <- spatCellCellcom( +#' gobject = g, cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot" +#' ) +#' #' combCCcom <- combCCcom(spatialCC = spatialCC, exprCC = exprCC) -#' -#' plotCombineCellCellCommunication(gobject = g, combCCcom = combCCcom, -#' selected_LR = c("Gm19935-9630013A20Rik"), selected_cell_LR = c("1--1")) +#' +#' plotCombineCellCellCommunication( +#' gobject = g, combCCcom = combCCcom, +#' selected_LR = c("Gm19935-9630013A20Rik"), selected_cell_LR = c("1--1") +#' ) #' @export -plotCombineCellCellCommunication <- function(gobject, - combCCcom, - selected_LR = NULL, - selected_cell_LR = NULL, - detail_plot = TRUE, - simple_plot = FALSE, - simple_plot_facet = c("interaction", "genes"), - facet_scales = "fixed", - facet_ncol = length(selected_LR), - facet_nrow = length(selected_cell_LR), - colors = c("#9932CC", "#FF8C00"), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCombineCellCellCommunication") { +plotCombineCellCellCommunication <- function( + gobject, + combCCcom, + selected_LR = NULL, + selected_cell_LR = NULL, + detail_plot = TRUE, + simple_plot = FALSE, + simple_plot_facet = c("interaction", "genes"), + facet_scales = "fixed", + facet_ncol = length(selected_LR), + facet_nrow = length(selected_cell_LR), + colors = c("#9932CC", "#FF8C00"), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCombineCellCellCommunication") { # data.table variables - LR_comb <- LR_cell_comb <- lig_expr <- lig_expr_spat <- rec_expr <- + LR_comb <- LR_cell_comb <- lig_expr <- lig_expr_spat <- rec_expr <- rec_expr_spat <- LR_expr <- LR_expr_spat <- NULL ## check validity if (is.null(selected_cell_LR) | is.null(selected_LR)) { - stop("You need to provide a selection of cell-cell interactions + stop("You need to provide a selection of cell-cell interactions and genes-genes to plot") } subDT <- combCCcom[ - LR_comb %in% selected_LR & LR_cell_comb %in% selected_cell_LR] + LR_comb %in% selected_LR & LR_cell_comb %in% selected_cell_LR + ] # order interactions and gene-to-gene according to input subDT[, LR_comb := factor(LR_comb, levels = selected_LR)] @@ -2198,31 +2430,43 @@ plotCombineCellCellCommunication <- function(gobject, if (detail_plot == TRUE) { pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = 0, y = lig_expr, colour = "overall cell expression"), - shape = 1) + data = subDT, + aes(x = 0, y = lig_expr, colour = "overall cell expression"), + shape = 1 + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = 0, y = lig_expr_spat, - colour = "spatial cell expression"), shape = 1) + data = subDT, + aes( + x = 0, y = lig_expr_spat, + colour = "spatial cell expression" + ), shape = 1 + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = rec_expr, y = 0, colour = "overall cell expression"), - shape = 1) + data = subDT, + aes(x = rec_expr, y = 0, colour = "overall cell expression"), + shape = 1 + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = rec_expr_spat, y = 0, - colour = "spatial cell expression"), shape = 1) + data = subDT, + aes( + x = rec_expr_spat, y = 0, + colour = "spatial cell expression" + ), shape = 1 + ) } pl <- pl + ggplot2::geom_point( - data = subDT, + data = subDT, aes(x = rec_expr, y = lig_expr, colour = "overall cell expression"), - size = 2) + size = 2 + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = rec_expr_spat, y = lig_expr_spat, - colour = "spatial cell expression"), size = 2) + data = subDT, + aes( + x = rec_expr_spat, y = lig_expr_spat, + colour = "spatial cell expression" + ), size = 2 + ) pl <- pl + ggplot2::geom_segment(data = subDT, aes( x = rec_expr, xend = rec_expr_spat, y = lig_expr, yend = lig_expr_spat @@ -2232,14 +2476,16 @@ plotCombineCellCellCommunication <- function(gobject, y = paste(subDT$ligand, subDT$lig_cell_type, sep = " in ") ) pl <- pl + ggplot2::scale_colour_manual( - name = "expression source", values = colors) + name = "expression source", values = colors + ) pl <- pl + ggplot2::facet_wrap(~ LR_comb + LR_cell_comb, nrow = facet_nrow, ncol = facet_ncol, scales = facet_scales ) } else { simple_plot_facet <- match.arg( - arg = simple_plot_facet, choices = c("interaction", "genes")) + arg = simple_plot_facet, choices = c("interaction", "genes") + ) if (simple_plot_facet == "interaction") { pl <- ggplot2::ggplot() @@ -2249,15 +2495,22 @@ plotCombineCellCellCommunication <- function(gobject, y = LR_comb, yend = LR_comb ), linetype = 2) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = LR_expr, y = LR_comb, - colour = "overall cell expression")) + data = subDT, + aes( + x = LR_expr, y = LR_comb, + colour = "overall cell expression" + ) + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = LR_expr_spat, y = LR_comb, - colour = "spatial cell expression")) + data = subDT, + aes( + x = LR_expr_spat, y = LR_comb, + colour = "spatial cell expression" + ) + ) pl <- pl + ggplot2::scale_colour_manual( - name = "expression source", values = colors) + name = "expression source", values = colors + ) pl <- pl + ggplot2::facet_wrap(~LR_cell_comb, scales = "fixed") pl <- pl + ggplot2::labs(x = "interactions", y = "gene-gene") pl @@ -2269,15 +2522,22 @@ plotCombineCellCellCommunication <- function(gobject, y = LR_cell_comb, yend = LR_cell_comb ), linetype = 2) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = LR_expr, y = LR_cell_comb, - colour = "overall cell expression")) + data = subDT, + aes( + x = LR_expr, y = LR_cell_comb, + colour = "overall cell expression" + ) + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = LR_expr_spat, y = LR_cell_comb, - colour = "spatial cell expression")) + data = subDT, + aes( + x = LR_expr_spat, y = LR_cell_comb, + colour = "spatial cell expression" + ) + ) pl <- pl + ggplot2::scale_colour_manual( - name = "expression source", values = colors) + name = "expression source", values = colors + ) pl <- pl + ggplot2::facet_wrap(~LR_comb, scales = facet_scales) pl <- pl + ggplot2::labs(x = "gene-gene", y = "interactions") } @@ -2300,13 +2560,13 @@ plotCombineCellCellCommunication <- function(gobject, #' @title plotCombineCCcom #' @name plotCombineCCcom -#' @description Create visualization for combined (pairwise) cell proximity +#' @description Create visualization for combined (pairwise) cell proximity #' gene scores #' @inheritParams data_access_params #' @inheritParams plot_output_params #' @param combCCcom combined communcation scores, output from combCCcom() #' @param selected_LR selected ligand-receptor pair -#' @param selected_cell_LR selected cell-cell interaction pair for +#' @param selected_cell_LR selected cell-cell interaction pair for #' ligand-receptor pair #' @param detail_plot show detailed info in both interacting cell types #' @param simple_plot show a simplified plot @@ -2318,37 +2578,46 @@ plotCombineCellCellCommunication <- function(gobject, #' @returns ggplot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' -#' comScores <- exprCellCellcom(g, cluster_column = "leiden_clus", -#' feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), -#' feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17")) -#' -#' exprCC <- exprCellCellcom(g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik") -#' spatialCC <- spatCellCellcom(gobject = g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot") -#' +#' +#' comScores <- exprCellCellcom(g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), +#' feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17") +#' ) +#' +#' exprCC <- exprCellCellcom(g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik" +#' ) +#' spatialCC <- spatCellCellcom( +#' gobject = g, cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot" +#' ) +#' #' combCCcom <- combCCcom(spatialCC = spatialCC, exprCC = exprCC) -#' -#' plotCombineCCcom(gobject = g, combCCcom = combCCcom, -#' selected_LR = c("Gm19935-9630013A20Rik"), selected_cell_LR = c("1--1")) +#' +#' plotCombineCCcom( +#' gobject = g, combCCcom = combCCcom, +#' selected_LR = c("Gm19935-9630013A20Rik"), selected_cell_LR = c("1--1") +#' ) #' @export -plotCombineCCcom <- function(gobject, - combCCcom, - selected_LR = NULL, - selected_cell_LR = NULL, - detail_plot = TRUE, - simple_plot = FALSE, - simple_plot_facet = c("interaction", "genes"), - facet_scales = "fixed", - facet_ncol = length(selected_LR), - facet_nrow = length(selected_cell_LR), - colors = c("#9932CC", "#FF8C00"), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCombineCCcom") { +plotCombineCCcom <- function( + gobject, + combCCcom, + selected_LR = NULL, + selected_cell_LR = NULL, + detail_plot = TRUE, + simple_plot = FALSE, + simple_plot_facet = c("interaction", "genes"), + facet_scales = "fixed", + facet_ncol = length(selected_LR), + facet_nrow = length(selected_cell_LR), + colors = c("#9932CC", "#FF8C00"), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCombineCCcom") { plotCombineCellCellCommunication( gobject = gobject, combCCcom = combCCcom, @@ -2373,15 +2642,15 @@ plotCombineCCcom <- function(gobject, #' @title plotCCcomHeatmap #' @name plotCCcomHeatmap -#' @description Plots heatmap for ligand-receptor communication scores in +#' @description Plots heatmap for ligand-receptor communication scores in #' cell-cell interactions #' @inheritParams data_access_params #' @inheritParams plot_output_params #' @inheritParams plot_params -#' @param comScores communinication scores from \code{\link{exprCellCellcom}} +#' @param comScores communinication scores from \code{\link{exprCellCellcom}} #' or \code{\link{spatCellCellcom}} #' @param selected_LR selected ligand-receptor combinations -#' @param selected_cell_LR selected cell-cell combinations for ligand-receptor +#' @param selected_cell_LR selected cell-cell combinations for ligand-receptor #' combinations #' @param show_LR_names show ligand-receptor names #' @param show_cell_LR_names show cell-cell names @@ -2391,33 +2660,40 @@ plotCombineCCcom <- function(gobject, #' @returns ggplot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' -#' comScores <- exprCellCellcom(g, cluster_column = "leiden_clus", -#' feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), -#' feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17")) -#' +#' +#' comScores <- exprCellCellcom(g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), +#' feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17") +#' ) +#' #' plotCCcomHeatmap(gobject = g, comScores = comScores, show_plot = TRUE) #' @export -plotCCcomHeatmap <- function(gobject, - comScores, - selected_LR = NULL, - selected_cell_LR = NULL, - show_LR_names = TRUE, - show_cell_LR_names = TRUE, - show = c("PI", "LR_expr", "log2fc"), - cor_method = c("pearson", "kendall", "spearman"), - aggl_method = c("ward.D", "ward.D2", "single", "complete", "average", - "mcquitty", "median", "centroid"), - gradient_color = NULL, - gradient_style = c("divergent", "sequential"), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCCcomHeatmap") { +plotCCcomHeatmap <- function( + gobject, + comScores, + selected_LR = NULL, + selected_cell_LR = NULL, + show_LR_names = TRUE, + show_cell_LR_names = TRUE, + show = c("PI", "LR_expr", "log2fc"), + cor_method = c("pearson", "kendall", "spearman"), + aggl_method = c( + "ward.D", "ward.D2", "single", "complete", "average", + "mcquitty", "median", "centroid" + ), + gradient_color = NULL, + gradient_style = c("divergent", "sequential"), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCCcomHeatmap") { # get parameters cor_method <- match.arg( - cor_method, choices = c("pearson", "kendall", "spearman")) + cor_method, + choices = c("pearson", "kendall", "spearman") + ) aggl_method <- match.arg(aggl_method, choices = c( "ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median", "centroid" @@ -2429,8 +2705,8 @@ plotCCcomHeatmap <- function(gobject, # plot method if (!is.null(selected_LR) & !is.null(selected_cell_LR)) { - selDT <- comScores[LR_comb %in% selected_LR & LR_cell_comb %in% - selected_cell_LR] + selDT <- comScores[LR_comb %in% selected_LR & LR_cell_comb %in% + selected_cell_LR] } else if (!is.null(selected_LR)) { selDT <- comScores[LR_comb %in% selected_LR] } else if (!is.null(selected_cell_LR)) { @@ -2442,14 +2718,18 @@ plotCCcomHeatmap <- function(gobject, # creat matrix show <- match.arg(show, choices = c("PI", "LR_expr", "log2fc")) selDT_d <- data.table::dcast.data.table( - selDT, LR_cell_comb ~ LR_comb, value.var = show, fill = 0) + selDT, LR_cell_comb ~ LR_comb, + value.var = show, fill = 0 + ) selDT_m <- dt_to_matrix(selDT_d) ## cells corclus_cells_dist <- stats::as.dist( - 1 - cor_flex(x = t_flex(selDT_m), method = cor_method)) + 1 - cor_flex(x = t_flex(selDT_m), method = cor_method) + ) hclusters_cells <- stats::hclust( - d = corclus_cells_dist, method = aggl_method) + d = corclus_cells_dist, method = aggl_method + ) clus_names <- rownames(selDT_m) names(clus_names) <- seq_along(clus_names) clus_sort_names <- clus_names[hclusters_cells$order] @@ -2457,9 +2737,11 @@ plotCCcomHeatmap <- function(gobject, ## genes corclus_genes_dist <- stats::as.dist( - 1 - cor_flex(x = selDT_m, method = cor_method)) + 1 - cor_flex(x = selDT_m, method = cor_method) + ) hclusters_genes <- stats::hclust( - d = corclus_genes_dist, method = aggl_method) + d = corclus_genes_dist, method = aggl_method + ) clus_names <- colnames(selDT_m) names(clus_names) <- seq_along(clus_names) clus_sort_names <- clus_names[hclusters_genes$order] @@ -2513,19 +2795,19 @@ plotCCcomHeatmap <- function(gobject, #' @title plotCCcomDotplot #' @name plotCCcomDotplot -#' @description Plots dotplot for ligand-receptor communication scores in +#' @description Plots dotplot for ligand-receptor communication scores in #' cell-cell interactions #' @inheritParams data_access_params #' @inheritParams plot_output_params #' @inheritParams plot_params -#' @param comScores communication scores from \code{\link{exprCellCellcom}} +#' @param comScores communication scores from \code{\link{exprCellCellcom}} #' or \code{\link{spatCellCellcom}} #' @param selected_LR selected ligand-receptor combinations -#' @param selected_cell_LR selected cell-cell combinations for ligand-receptor +#' @param selected_cell_LR selected cell-cell combinations for ligand-receptor #' combinations #' @param show_LR_names show ligand-receptor names #' @param show_cell_LR_names show cell-cell names -#' @param cluster_on values to use for clustering of cell-cell and +#' @param cluster_on values to use for clustering of cell-cell and #' ligand-receptor pairs #' @param cor_method correlation method used for clustering #' @param aggl_method agglomeration method used by hclust @@ -2534,33 +2816,40 @@ plotCCcomHeatmap <- function(gobject, #' @returns ggplot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' -#' comScores <- exprCellCellcom(g, cluster_column = "leiden_clus", -#' feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), -#' feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17")) -#' +#' +#' comScores <- exprCellCellcom(g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), +#' feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17") +#' ) +#' #' plotCCcomDotplot(gobject = g, comScores = comScores, show_plot = TRUE) #' @export -plotCCcomDotplot <- function(gobject, - comScores, - selected_LR = NULL, - selected_cell_LR = NULL, - show_LR_names = TRUE, - show_cell_LR_names = TRUE, - cluster_on = c("PI", "LR_expr", "log2fc"), - cor_method = c("pearson", "kendall", "spearman"), - aggl_method = c("ward.D", "ward.D2", "single", "complete", "average", - "mcquitty", "median", "centroid"), - dot_color_gradient = NULL, - gradient_style = c("divergent", "sequential"), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCCcomDotplot") { +plotCCcomDotplot <- function( + gobject, + comScores, + selected_LR = NULL, + selected_cell_LR = NULL, + show_LR_names = TRUE, + show_cell_LR_names = TRUE, + cluster_on = c("PI", "LR_expr", "log2fc"), + cor_method = c("pearson", "kendall", "spearman"), + aggl_method = c( + "ward.D", "ward.D2", "single", "complete", "average", + "mcquitty", "median", "centroid" + ), + dot_color_gradient = NULL, + gradient_style = c("divergent", "sequential"), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCCcomDotplot") { # get parameters cor_method <- match.arg( - cor_method, choices = c("pearson", "kendall", "spearman")) + cor_method, + choices = c("pearson", "kendall", "spearman") + ) aggl_method <- match.arg(aggl_method, choices = c( "ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median", "centroid" @@ -2572,8 +2861,8 @@ plotCCcomDotplot <- function(gobject, # plot method if (!is.null(selected_LR) & !is.null(selected_cell_LR)) { - selDT <- comScores[LR_comb %in% selected_LR & LR_cell_comb %in% - selected_cell_LR] + selDT <- comScores[LR_comb %in% selected_LR & LR_cell_comb %in% + selected_cell_LR] } else if (!is.null(selected_LR)) { selDT <- comScores[LR_comb %in% selected_LR] } else if (!is.null(selected_cell_LR)) { @@ -2585,27 +2874,37 @@ plotCCcomDotplot <- function(gobject, # creat matrix cluster_on <- match.arg(cluster_on, choices = c("PI", "LR_expr", "log2fc")) selDT_d <- data.table::dcast.data.table( - selDT, LR_cell_comb ~ LR_comb, value.var = cluster_on, fill = 0) + selDT, LR_cell_comb ~ LR_comb, + value.var = cluster_on, fill = 0 + ) selDT_m <- dt_to_matrix(selDT_d) # remove zero variance sd_rows <- apply(selDT_m, 1, sd) sd_rows_zero <- names(sd_rows[sd_rows == 0]) - if (length(sd_rows_zero) > 0) selDT_m <- selDT_m[ - !rownames(selDT_m) %in% sd_rows_zero, ] + if (length(sd_rows_zero) > 0) { + selDT_m <- selDT_m[ + !rownames(selDT_m) %in% sd_rows_zero, + ] + } sd_cols <- apply(selDT_m, 2, sd) sd_cols_zero <- names(sd_cols[sd_cols == 0]) - if (length(sd_cols_zero) > 0) selDT_m <- selDT_m[ - , !colnames(selDT_m) %in% sd_cols_zero] + if (length(sd_cols_zero) > 0) { + selDT_m <- selDT_m[ + , !colnames(selDT_m) %in% sd_cols_zero + ] + } ## cells corclus_cells_dist <- stats::as.dist( - 1 - cor_flex(x = t_flex(selDT_m), method = cor_method)) + 1 - cor_flex(x = t_flex(selDT_m), method = cor_method) + ) hclusters_cells <- stats::hclust( - d = corclus_cells_dist, method = aggl_method) + d = corclus_cells_dist, method = aggl_method + ) clus_names <- rownames(selDT_m) names(clus_names) <- seq_along(clus_names) clus_sort_names <- clus_names[hclusters_cells$order] @@ -2613,9 +2912,11 @@ plotCCcomDotplot <- function(gobject, ## genes corclus_genes_dist <- stats::as.dist( - 1 - cor_flex(x = selDT_m, method = cor_method)) + 1 - cor_flex(x = selDT_m, method = cor_method) + ) hclusters_genes <- stats::hclust( - d = corclus_genes_dist, method = aggl_method) + d = corclus_genes_dist, method = aggl_method + ) clus_names <- colnames(selDT_m) names(clus_names) <- seq_along(clus_names) clus_sort_names <- clus_names[hclusters_genes$order] @@ -2675,7 +2976,7 @@ plotCCcomDotplot <- function(gobject, #' @title plotRankSpatvsExpr #' @name plotRankSpatvsExpr -#' @description Plots dotplot to compare ligand-receptor rankings from +#' @description Plots dotplot to compare ligand-receptor rankings from #' spatial and expression information #' @inheritParams data_access_params #' @inheritParams plot_output_params @@ -2691,39 +2992,44 @@ plotCCcomDotplot <- function(gobject, #' @param size_range size ranges of dotplot #' @param xlims x-limits, numerical vector of 2 #' @param ylims y-limits, numerical vector of 2 -#' @param selected_ranks numerical vector, will be used to print out the +#' @param selected_ranks numerical vector, will be used to print out the #' percentage of top spatial ranks are recovered #' @returns ggplot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' -#' exprCC <- exprCellCellcom(g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik") -#' spatialCC <- spatCellCellcom(gobject = g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot", -#' random_iter = 10) -#' +#' +#' exprCC <- exprCellCellcom(g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik" +#' ) +#' spatialCC <- spatCellCellcom( +#' gobject = g, cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot", +#' random_iter = 10 +#' ) +#' #' combCC <- combCCcom(spatialCC = spatialCC, exprCC = exprCC) -#' +#' #' plotRankSpatvsExpr(gobject = g, combCC = combCC) #' @export -plotRankSpatvsExpr <- function(gobject, - combCC, - expr_rnk_column = "LR_expr_rnk", - spat_rnk_column = "LR_spat_rnk", - dot_color_gradient = NULL, - midpoint = deprecated(), - gradient_midpoint = 10, - gradient_style = c("divergent", "sequential"), - size_range = c(0.01, 1.5), - xlims = NULL, - ylims = NULL, - selected_ranks = c(1, 10, 20), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotRankSpatvsExpr") { +plotRankSpatvsExpr <- function( + gobject, + combCC, + expr_rnk_column = "LR_expr_rnk", + spat_rnk_column = "LR_spat_rnk", + dot_color_gradient = NULL, + midpoint = deprecated(), + gradient_midpoint = 10, + gradient_style = c("divergent", "sequential"), + size_range = c(0.01, 1.5), + xlims = NULL, + ylims = NULL, + selected_ranks = c(1, 10, 20), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotRankSpatvsExpr") { # deprecate if (GiottoUtils::is_present(midpoint)) { deprecate_warn( @@ -2742,16 +3048,20 @@ plotRankSpatvsExpr <- function(gobject, rnk_list <- list() spt_list <- list() - for (rnk in 1:total_rnks) { - mytab <- table(cut(sort(combCC[get(expr_rnk_column) == rnk][[ - spat_rnk_column]]), breaks = seq(0, total_rnks, 1), - labels = c(1:total_rnks))) + for (rnk in seq_len(total_rnks)) { + mytab <- table(cut( + sort(combCC[get(expr_rnk_column) == rnk][[ + spat_rnk_column + ]]), + breaks = seq(0, total_rnks, 1), + labels = seq_len(total_rnks) + )) rnk_list[[rnk]] <- mytab spt_list[[rnk]] <- names(mytab) } rnk_res <- data.table::as.data.table(do.call("rbind", rnk_list)) - rnk_res[, spt_rank := 1:total_rnks] + rnk_res[, spt_rank := seq_len(total_rnks)] rnk_res_m <- data.table::melt.data.table(rnk_res, id.vars = "spt_rank") rnk_res_m[, spt_rank := as.numeric(spt_rank)] @@ -2760,20 +3070,24 @@ plotRankSpatvsExpr <- function(gobject, rnk_res_m[, diff := variable - spt_rank] for (i in selected_ranks) { - perc_recovered <- 100 * (sum(rnk_res_m[abs(diff) < i]$value) / - sum(rnk_res_m$value)) - cat("for top ", i, " expression ranks, you recover ", - round(perc_recovered, 2), "% of the highest spatial rank") + perc_recovered <- 100 * (sum(rnk_res_m[abs(diff) < i]$value) / + sum(rnk_res_m$value)) + cat( + "for top ", i, " expression ranks, you recover ", + round(perc_recovered, 2), "% of the highest spatial rank" + ) } # full plot pl <- ggplot2::ggplot() pl <- pl + ggplot2::theme_classic() + ggplot2::theme( - axis.text = element_blank()) + axis.text = element_blank() + ) pl <- pl + ggplot2::geom_point( - data = rnk_res_m, - ggplot2::aes(x = variable, y = spt_rank, size = value, color = value)) + data = rnk_res_m, + ggplot2::aes(x = variable, y = spt_rank, size = value, color = value) + ) pl <- pl + set_default_color_continuous_CCcom_dotplot( colors = dot_color_gradient, instrs = instructions(gobject), @@ -2783,7 +3097,8 @@ plotRankSpatvsExpr <- function(gobject, guide = guide_legend(title = "") ) pl <- pl + ggplot2::scale_size_continuous( - range = size_range, guide = "none") + range = size_range, guide = "none" + ) pl <- pl + ggplot2::labs(x = "expression rank", y = "spatial rank") if (!is.null(xlims)) { @@ -2811,23 +3126,24 @@ plotRankSpatvsExpr <- function(gobject, #' @title Create recovery plot #' @name .plotRecovery_sub -#' @description Plots recovery plot to compare ligand-receptor rankings from +#' @description Plots recovery plot to compare ligand-receptor rankings from #' spatial and expression information #' @param combCC combined communinication scores from \code{\link{combCCcom}} #' @param first_col first column to use #' @param second_col second column to use #' @returns ggplot #' @keywords internal -.plotRecovery_sub <- function(combCC, - first_col = "LR_expr_rnk", - second_col = "LR_spat_rnk") { +.plotRecovery_sub <- function( + combCC, + first_col = "LR_expr_rnk", + second_col = "LR_spat_rnk") { # data.table variables concord <- perc <- not_concord <- secondrank <- secondrank_perc <- NULL mergeDT_filt <- combCC[get(first_col) == 1] mymat <- matrix(data = NA, nrow = max(combCC[[second_col]]), ncol = 2) - for (i in 1:max(combCC[[second_col]])) { + for (i in seq_len(max(combCC[[second_col]]))) { mergeDT_filt[, concord := ifelse(get(second_col) <= i, "yes", "no")] mytable <- table(mergeDT_filt$concord) @@ -2843,7 +3159,7 @@ plotRankSpatvsExpr <- function(gobject, mymatDT <- data.table::as.data.table(mymat) colnames(mymatDT) <- c("concord", "not_concord") mymatDT[, perc := 100 * (concord / (concord + not_concord))] - mymatDT[, secondrank := 1:nrow(mymatDT)] + mymatDT[, secondrank := seq_len(nrow(mymatDT))] mymatDT[, secondrank_perc := (secondrank / max(secondrank)) * 100] # percentage explained @@ -2854,8 +3170,9 @@ plotRankSpatvsExpr <- function(gobject, pl <- ggplot2::ggplot() pl <- pl + ggplot2::theme_classic() pl <- pl + ggplot2::geom_point( - data = mymatDT, - aes(x = secondrank_perc, y = perc)) + data = mymatDT, + aes(x = secondrank_perc, y = perc) + ) pl <- pl + ggplot2::scale_x_continuous(expand = c(0, 0), limits = c(0, 100)) pl <- pl + ggplot2::scale_y_continuous(expand = c(0, 0), limits = c(0, 100)) pl <- pl + ggplot2::geom_abline(slope = 1, intercept = 0, color = "blue") @@ -2869,7 +3186,7 @@ plotRankSpatvsExpr <- function(gobject, #' @title plotRecovery #' @name plotRecovery -#' @description Plots recovery plot to compare ligand-receptor rankings from +#' @description Plots recovery plot to compare ligand-receptor rankings from #' spatial and expression information #' @inheritParams data_access_params #' @inheritParams plot_output_params @@ -2880,29 +3197,36 @@ plotRankSpatvsExpr <- function(gobject, #' @returns ggplot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' -#' exprCC <- exprCellCellcom(g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik") -#' spatialCC <- spatCellCellcom(gobject = g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot", -#' random_iter = 10) -#' +#' +#' exprCC <- exprCellCellcom(g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik" +#' ) +#' spatialCC <- spatCellCellcom( +#' gobject = g, cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot", +#' random_iter = 10 +#' ) +#' #' combCC <- combCCcom(spatialCC = spatialCC, exprCC = exprCC) -#' +#' #' plotRecovery(gobject = g, combCC = combCC) #' @export -plotRecovery <- function(gobject, - combCC, - expr_rnk_column = "exprPI_rnk", - spat_rnk_column = "spatPI_rnk", - ground_truth = c("spatial", "expression"), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotRecovery") { +plotRecovery <- function( + gobject, + combCC, + expr_rnk_column = "exprPI_rnk", + spat_rnk_column = "spatPI_rnk", + ground_truth = c("spatial", "expression"), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotRecovery") { ground_truth <- match.arg( - ground_truth, choices = c("spatial", "expression")) + ground_truth, + choices = c("spatial", "expression") + ) if (ground_truth == "spatial") { @@ -2912,8 +3236,9 @@ plotRecovery <- function(gobject, second_col = expr_rnk_column ) pl <- pl + ggplot2::labs( - x = "% expression rank included", - y = "% highest spatial rank recovered") + x = "% expression rank included", + y = "% highest spatial rank recovered" + ) } else if (ground_truth == "expression") { pl <- .plotRecovery_sub( combCC = combCC, @@ -2921,8 +3246,9 @@ plotRecovery <- function(gobject, second_col = spat_rnk_column ) pl <- pl + ggplot2::labs( - x = "% spatial rank included", - y = "% highest expression rank recovered") + x = "% spatial rank included", + y = "% highest expression rank recovered" + ) } return(plot_output_handler( @@ -2950,7 +3276,7 @@ plotRecovery <- function(gobject, #' @title cellProximitySpatPlot2D #' @name cellProximitySpatPlot2D -#' @description Visualize 2D cell-cell interactions according to spatial +#' @description Visualize 2D cell-cell interactions according to spatial #' coordinates in ggplot mode #' @inheritParams data_access_params #' @inheritParams plot_output_params @@ -2983,45 +3309,48 @@ plotRecovery <- function(gobject, #' g <- GiottoData::loadGiottoMini("visium") #' g <- createSpatialGrid(g, sdimx_stepsize = 5, sdimy_stepsize = 5) #' x <- cellProximityEnrichment(g, cluster_column = "leiden_clus") -#' -#' cellProximitySpatPlot2D(gobject = g, cluster_column = "leiden_clus", -#' interaction_name = x) +#' +#' cellProximitySpatPlot2D( +#' gobject = g, cluster_column = "leiden_clus", +#' interaction_name = x +#' ) #' @export -cellProximitySpatPlot2D <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - spat_loc_name = NULL, - interaction_name = NULL, - cluster_column = NULL, - sdimx = "sdimx", - sdimy = "sdimy", - cell_color = NULL, - cell_color_code = NULL, - color_as_factor = TRUE, - show_other_cells = FALSE, - show_network = FALSE, - show_other_network = FALSE, - network_color = NULL, - spatial_network_name = "Delaunay_network", - show_grid = FALSE, - grid_color = NULL, - spatial_grid_name = "spatial_grid", - coord_fix_ratio = 1, - show_legend = TRUE, - point_size_select = 2, - point_select_border_col = "black", - point_select_border_stroke = 0.05, - point_size_other = 1, - point_alpha_other = 0.3, - point_other_border_col = "lightgrey", - point_other_border_stroke = 0.01, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "cellProximitySpatPlot2D") { +cellProximitySpatPlot2D <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + spat_loc_name = NULL, + interaction_name = NULL, + cluster_column = NULL, + sdimx = "sdimx", + sdimy = "sdimy", + cell_color = NULL, + cell_color_code = NULL, + color_as_factor = TRUE, + show_other_cells = FALSE, + show_network = FALSE, + show_other_network = FALSE, + network_color = NULL, + spatial_network_name = "Delaunay_network", + show_grid = FALSE, + grid_color = NULL, + spatial_grid_name = "spatial_grid", + coord_fix_ratio = 1, + show_legend = TRUE, + point_size_select = 2, + point_select_border_col = "black", + point_select_border_stroke = 0.05, + point_size_other = 1, + point_alpha_other = 0.3, + point_other_border_col = "lightgrey", + point_other_border_stroke = 0.01, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "cellProximitySpatPlot2D") { if (is.null(interaction_name)) { - stop("you need to specific at least one interaction name, run + stop("you need to specific at least one interaction name, run cellProximityEnrichment") } @@ -3071,7 +3400,7 @@ cellProximitySpatPlot2D <- function(gobject, # data.table variables - unified_int <- sdimx_begin <- sdimy_begin <- sdimx_end <- sdimy_end <- + unified_int <- sdimx_begin <- sdimy_begin <- sdimx_end <- sdimy_end <- x_start <- x_end <- y_start <- y_end <- cell_ID <- NULL cell_IDs_to_keep <- unique(c( @@ -3082,7 +3411,8 @@ cellProximitySpatPlot2D <- function(gobject, if (show_other_cells) { CellType <- strsplit(interaction_name, "--") all_cell_IDs <- cell_metadata[cell_metadata[[ - cluster_column]] == CellType[[1]][1] | + cluster_column + ]] == CellType[[1]][1] | cell_metadata[[cluster_column]] == CellType[[1]][2], ]$cell_ID other_cell_IDs <- setdiff(all_cell_IDs, cell_IDs_to_keep) } @@ -3093,13 +3423,15 @@ cellProximitySpatPlot2D <- function(gobject, cell_locations_metadata <- cell_locations } else { cell_locations_metadata <- merge( - cell_locations, cell_metadata, by = "cell_ID") + cell_locations, cell_metadata, + by = "cell_ID" + ) } # first 2 dimensions need to be defined if (is.null(sdimx) | is.null(sdimy)) { - message("first and second dimension need to be defined, default is + message("first and second dimension need to be defined, default is first 2") sdimx <- "sdimx" sdimy <- "sdimy" @@ -3113,15 +3445,19 @@ cellProximitySpatPlot2D <- function(gobject, if (show_other_network) { pl <- pl + ggplot2::geom_segment( data = spatial_network[!unified_int %in% interaction_name], - aes(x = sdimx_begin, y = sdimy_begin, xend = sdimx_end, - yend = sdimy_end), + aes( + x = sdimx_begin, y = sdimy_begin, xend = sdimx_end, + yend = sdimy_end + ), color = "lightgrey", size = 0.5, alpha = 0.5 ) } pl <- pl + ggplot2::geom_segment( data = spatial_network[unified_int %in% interaction_name], - aes(x = sdimx_begin, y = sdimy_begin, xend = sdimx_end, - yend = sdimy_end), + aes( + x = sdimx_begin, y = sdimy_begin, xend = sdimx_end, + yend = sdimy_end + ), color = network_color, size = 0.5, alpha = 0.5 ) } @@ -3129,7 +3465,7 @@ cellProximitySpatPlot2D <- function(gobject, if (!is.null(spatial_grid) & show_grid == TRUE) { if (is.null(grid_color)) grid_color <- "black" pl <- pl + ggplot2::geom_rect( - data = spatial_grid, + data = spatial_grid, aes(xmin = x_start, xmax = x_end, ymin = y_start, ymax = y_end), color = grid_color, fill = NA ) @@ -3139,22 +3475,22 @@ cellProximitySpatPlot2D <- function(gobject, if (is.null(cell_color)) { cell_color <- "lightblue" pl <- pl + ggplot2::geom_point( - data = cell_locations[!cell_ID %in% cell_IDs_to_keep], + data = cell_locations[!cell_ID %in% cell_IDs_to_keep], aes_string(x = sdimx, y = sdimy), - show.legend = show_legend, shape = 21, fill = "lightgrey", + show.legend = show_legend, shape = 21, fill = "lightgrey", size = point_size_other ) pl <- pl + ggplot2::geom_point( - data = cell_locations[cell_ID %in% cell_IDs_to_keep], + data = cell_locations[cell_ID %in% cell_IDs_to_keep], aes_string(x = sdimx, y = sdimy), - show.legend = show_legend, shape = 21, fill = cell_color, + show.legend = show_legend, shape = 21, fill = cell_color, size = point_size_select ) if (show_other_cells) { pl <- pl + ggplot2::geom_point( - data = cell_locations[cell_ID %in% other_cell_IDs], + data = cell_locations[cell_ID %in% other_cell_IDs], aes_string(x = sdimx, y = sdimy), - show.legend = show_legend, shape = 21, fill = cell_color, + show.legend = show_legend, shape = 21, fill = cell_color, alpha = point_alpha_other, size = point_size_select * 0.5 ) @@ -3170,21 +3506,21 @@ cellProximitySpatPlot2D <- function(gobject, data = cell_locations_metadata[!cell_ID %in% cell_IDs_to_keep], aes_string(x = sdimx, y = sdimy), fill = "lightgrey", shape = 21, size = point_size_other, - color = point_other_border_col, + color = point_other_border_col, stroke = point_other_border_stroke ) pl <- pl + ggplot2::geom_point( - data = cell_locations_metadata[cell_ID %in% cell_IDs_to_keep], + data = cell_locations_metadata[cell_ID %in% cell_IDs_to_keep], aes_string(x = sdimx, y = sdimy, fill = cell_color), show.legend = show_legend, shape = 21, size = point_size_select, - color = point_select_border_col, + color = point_select_border_col, stroke = point_select_border_stroke ) if (show_other_cells) { pl <- pl + ggplot2::geom_point( data = cell_locations_metadata[cell_ID %in% other_cell_IDs], aes_string(x = sdimx, y = sdimy, fill = cell_color), - show.legend = show_legend, shape = 21, + show.legend = show_legend, shape = 21, alpha = point_alpha_other, size = point_size_select * 0.5 ) @@ -3197,7 +3533,8 @@ cellProximitySpatPlot2D <- function(gobject, } else if (color_as_factor == TRUE) { number_colors <- length(unique(factor_data)) cell_color_code <- set_default_color_discrete_cell( - instrs = instructions(gobject))(n = number_colors) + instrs = instructions(gobject) + )(n = number_colors) names(cell_color_code) <- unique(factor_data) pl <- pl + ggplot2::scale_fill_manual(values = cell_color_code) } else if (color_as_factor == FALSE) { @@ -3215,17 +3552,17 @@ cellProximitySpatPlot2D <- function(gobject, pl <- pl + ggplot2::geom_point( data = cell_locations_metadata[!cell_ID %in% cell_IDs_to_keep], aes_string(x = sdimx, y = sdimy), - show.legend = show_legend, shape = 21, fill = "lightgrey", + show.legend = show_legend, shape = 21, fill = "lightgrey", size = point_size_other, - color = point_other_border_col, + color = point_other_border_col, stroke = point_other_border_stroke ) pl <- pl + ggplot2::geom_point( data = cell_locations_metadata[cell_ID %in% cell_IDs_to_keep], aes_string(x = sdimx, y = sdimy), - show.legend = show_legend, shape = 21, fill = cell_color, + show.legend = show_legend, shape = 21, fill = cell_color, size = point_size_select, - color = point_select_border_col, + color = point_select_border_col, stroke = point_select_border_stroke ) } @@ -3260,14 +3597,14 @@ cellProximitySpatPlot2D <- function(gobject, #' @title cellProximitySpatPlot #' @name cellProximitySpatPlot -#' @description Visualize 2D cell-cell interactions according to spatial +#' @description Visualize 2D cell-cell interactions according to spatial #' coordinates in ggplot mode #' @param gobject giotto object #' @inheritDotParams cellProximitySpatPlot2D -gobject #' @returns ggplot #' @details Description of parameters. #' @export -#' @seealso \code{\link{cellProximitySpatPlot2D}} and +#' @seealso \code{\link{cellProximitySpatPlot2D}} and #' \code{\link{cellProximitySpatPlot3D}} for 3D cellProximitySpatPlot <- function(gobject, ...) { cellProximitySpatPlot2D(gobject = gobject, ...) @@ -3276,7 +3613,7 @@ cellProximitySpatPlot <- function(gobject, ...) { #' @title cellProximitySpatPlot3D #' @name cellProximitySpatPlot3D -#' @description Visualize 3D cell-cell interactions according to spatial +#' @description Visualize 3D cell-cell interactions according to spatial #' coordinates in plotly mode #' @inheritParams data_access_params #' @inheritParams plot_output_params @@ -3307,38 +3644,39 @@ cellProximitySpatPlot <- function(gobject, ...) { #' @returns plotly #' @details Description of parameters. #' @export -cellProximitySpatPlot3D <- function(gobject, - interaction_name = NULL, - cluster_column = NULL, - sdimx = "sdimx", - sdimy = "sdimy", - sdimz = "sdimz", - cell_color = NULL, - cell_color_code = NULL, - color_as_factor = TRUE, - show_other_cells = TRUE, - show_network = TRUE, - show_other_network = FALSE, - network_color = NULL, - spatial_network_name = "Delaunay_network", - show_grid = FALSE, - grid_color = NULL, - spatial_grid_name = "spatial_grid", - show_legend = TRUE, - point_size_select = 4, - point_size_other = 2, - point_alpha_other = 0.5, - axis_scale = c("cube", "real", "custom"), - custom_ratio = NULL, - x_ticks = NULL, - y_ticks = NULL, - z_ticks = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "cellProximitySpatPlot3D", - ...) { +cellProximitySpatPlot3D <- function( + gobject, + interaction_name = NULL, + cluster_column = NULL, + sdimx = "sdimx", + sdimy = "sdimy", + sdimz = "sdimz", + cell_color = NULL, + cell_color_code = NULL, + color_as_factor = TRUE, + show_other_cells = TRUE, + show_network = TRUE, + show_other_network = FALSE, + network_color = NULL, + spatial_network_name = "Delaunay_network", + show_grid = FALSE, + grid_color = NULL, + spatial_grid_name = "spatial_grid", + show_legend = TRUE, + point_size_select = 4, + point_size_other = 2, + point_alpha_other = 0.5, + axis_scale = c("cube", "real", "custom"), + custom_ratio = NULL, + x_ticks = NULL, + y_ticks = NULL, + z_ticks = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "cellProximitySpatPlot3D", + ...) { if (is.null(sdimz)) { pl <- .cellProximityVisPlot_2D_plotly( gobject = gobject, diff --git a/R/spdep.R b/R/spdep.R index d557c0021..6abbe9cad 100644 --- a/R/spdep.R +++ b/R/spdep.R @@ -2,12 +2,12 @@ #' #' @param gobject Input a Giotto object. #' @param method Specify a method name to compute auto correlation. -#' Available methods include +#' Available methods include #' \code{"geary.test", "lee.test", "lm.morantest","moran.test"}. #' @param spat_unit spatial unit #' @param feat_type feature type #' @param expression_values expression values to use, default = normalized -#' @param spatial_network_to_use spatial network to use, +#' @param spatial_network_to_use spatial network to use, #' default = spatial_network #' @param verbose be verbose #' @param return_gobject if FALSE, results are returned as data.table. @@ -15,17 +15,18 @@ #' @returns A data table with computed values for each feature. #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' spdepAutoCorr(g) #' @export -spdepAutoCorr <- function(gobject, - method = c("geary.test", "lee.test", "lm.morantest", "moran.test"), - spat_unit = NULL, - feat_type = NULL, - expression_values = "normalized", - spatial_network_to_use = "spatial_network", - return_gobject = FALSE, - verbose = FALSE) { +spdepAutoCorr <- function( + gobject, + method = c("geary.test", "lee.test", "lm.morantest", "moran.test"), + spat_unit = NULL, + feat_type = NULL, + expression_values = "normalized", + spatial_network_to_use = "spatial_network", + return_gobject = FALSE, + verbose = FALSE) { # Check and match the specified method argument method <- match.arg(method) @@ -77,8 +78,11 @@ spdepAutoCorr <- function(gobject, result_list <- list() progressr::with_progress({ - if (step_size > 1) pb <- progressr::progressor( - steps = nfeats / step_size) + if (step_size > 1) { + pb <- progressr::progressor( + steps = nfeats / step_size + ) + } result_list <- lapply_flex( seq_along(feat), future.packages = c("data.table", "spdep"), @@ -91,7 +95,8 @@ spdepAutoCorr <- function(gobject, # Extract the estimated value from the result result_value <- callSpdepVar$estimate[1] temp_dt <- data.table( - feat_ID = feat[feat_value], value = result_value) + feat_ID = feat[feat_value], value = result_value + ) # increment progress if (exists("pb")) if (feat_value %% step_size == 0) pb() return(temp_dt) @@ -141,11 +146,11 @@ callSpdep <- function(method, ...) { # Check if 'method' argument is NULL, if so, stop with an error if (is.null(method)) { - stop("The 'method' argument has not been provided. Please specify a + stop("The 'method' argument has not been provided. Please specify a valid method.") } - # Check if 'method' exists in the 'spdep' package, if not, stop with an + # Check if 'method' exists in the 'spdep' package, if not, stop with an # error method <- try(eval(get(method, envir = loadNamespace("spdep"))), silent = TRUE @@ -186,7 +191,7 @@ callSpdep <- function(method, ...) { if (all(!(names(methodparam)) %in% allArgs)) { stop("Invalid or missing parameters.") } - # A vector of specified arguments that trigger + # A vector of specified arguments that trigger # 'spW <- spweights.constants()' requiredArgs <- c("n", "n1", "n2", "n3", "nn", "S0", "S1", "S2") @@ -194,7 +199,7 @@ callSpdep <- function(method, ...) { if (any(requiredArgs %in% allArgs)) { # Obtain arguments from 'spweights.constants' spW <- spdep::spweights.constants(listw = methodparam$listw) - # Combine user-provided arguments and 'spW', checking only against + # Combine user-provided arguments and 'spW', checking only against # 'feats' value combinedParams <- append(methodparam, spW) } else { diff --git a/R/variable_genes.R b/R/variable_genes.R index 39e12568c..c65c0cb89 100644 --- a/R/variable_genes.R +++ b/R/variable_genes.R @@ -1,9 +1,10 @@ -.calc_cov_group_hvf <- function(feat_in_cells_detected, - nr_expression_groups = 20, - zscore_threshold = 1, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL) { +.calc_cov_group_hvf <- function( + feat_in_cells_detected, + nr_expression_groups = 20, + zscore_threshold = 1, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL) { # NSE vars cov_group_zscore <- cov <- selected <- mean_expr <- NULL @@ -11,30 +12,36 @@ prob_sequence <- seq(0, 1, steps) prob_sequence[length(prob_sequence)] <- 1 expr_group_breaks <- stats::quantile( - feat_in_cells_detected$mean_expr, probs = prob_sequence) + feat_in_cells_detected$mean_expr, + probs = prob_sequence + ) ## remove zero's from cuts if there are too many and make first group zero if (any(duplicated(expr_group_breaks))) { m_expr_vector <- feat_in_cells_detected$mean_expr expr_group_breaks <- stats::quantile( - m_expr_vector[m_expr_vector > 0], probs = prob_sequence) + m_expr_vector[m_expr_vector > 0], + probs = prob_sequence + ) expr_group_breaks[[1]] <- 0 } expr_groups <- cut( x = feat_in_cells_detected$mean_expr, breaks = expr_group_breaks, - labels = paste0("group_", 1:nr_expression_groups), + labels = paste0("group_", seq_len(nr_expression_groups)), include.lowest = TRUE ) feat_in_cells_detected[, expr_groups := expr_groups] feat_in_cells_detected[, cov_group_zscore := scale(cov), by = expr_groups] feat_in_cells_detected[, selected := ifelse( - cov_group_zscore > zscore_threshold, "yes", "no")] + cov_group_zscore > zscore_threshold, "yes", "no" + )] if (any(isTRUE(show_plot), isTRUE(return_plot), isTRUE(save_plot))) { pl <- .create_cov_group_hvf_plot( - feat_in_cells_detected, nr_expression_groups) + feat_in_cells_detected, nr_expression_groups + ) return(list(dt = feat_in_cells_detected, pl = pl)) } else { @@ -48,11 +55,12 @@ -.calc_cov_loess_hvf <- function(feat_in_cells_detected, - difference_in_cov = 0.1, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL) { +.calc_cov_loess_hvf <- function( + feat_in_cells_detected, + difference_in_cov = 0.1, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL) { # NSE vars cov_diff <- pred_cov_feats <- selected <- NULL @@ -61,18 +69,25 @@ var_col <- "cov" loess_model_sample <- stats::loess( - loess_formula, data = feat_in_cells_detected) + loess_formula, + data = feat_in_cells_detected + ) feat_in_cells_detected$pred_cov_feats <- stats::predict( - loess_model_sample, newdata = feat_in_cells_detected) - feat_in_cells_detected[, cov_diff := get(var_col) - pred_cov_feats, - by = 1:nrow(feat_in_cells_detected)] + loess_model_sample, + newdata = feat_in_cells_detected + ) + feat_in_cells_detected[, cov_diff := get(var_col) - pred_cov_feats, + by = seq_len(nrow(feat_in_cells_detected)) + ] data.table::setorder(feat_in_cells_detected, -cov_diff) feat_in_cells_detected[, selected := ifelse( - cov_diff > difference_in_cov, "yes", "no")] + cov_diff > difference_in_cov, "yes", "no" + )] if (any(isTRUE(show_plot), isTRUE(return_plot), isTRUE(save_plot))) { pl <- .create_cov_loess_hvf_plot( - feat_in_cells_detected, difference_in_cov, var_col) + feat_in_cells_detected, difference_in_cov, var_col + ) return(list(dt = feat_in_cells_detected, pl = pl)) } else { @@ -82,13 +97,14 @@ -.calc_var_hvf <- function(scaled_matrix, - var_threshold = 1.5, - var_number = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - use_parallel = FALSE) { +.calc_var_hvf <- function( + scaled_matrix, + var_threshold = 1.5, + var_number = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + use_parallel = FALSE) { # NSE vars var <- selected <- NULL @@ -96,7 +112,7 @@ test <- apply(X = scaled_matrix, MARGIN = 1, FUN = function(x) var(x)) } else { test <- future.apply::future_apply( - X = scaled_matrix, MARGIN = 1, FUN = function(x) var(x), + X = scaled_matrix, MARGIN = 1, FUN = function(x) var(x), future.seed = TRUE ) } @@ -106,7 +122,7 @@ dt_res <- data.table::data.table(feats = names(test), var = test) if (!is.null(var_number) & is.numeric(var_number)) { - dt_res[, selected := 1:.N] + dt_res[, selected := seq_len(.N)] dt_res[, selected := ifelse(selected <= var_number, "yes", "no")] } else { dt_res[, selected := ifelse(var >= var_threshold, "yes", "no")] @@ -116,7 +132,7 @@ if (isTRUE(show_plot) || isTRUE(return_plot) || isTRUE(save_plot)) { - dt_res[, rank := 1:.N] + dt_res[, rank := seq_len(.N)] pl <- .create_calc_var_hvf_plot(dt_res) @@ -167,10 +183,9 @@ } -.calc_expr_cov_stats_parallel <- function( - expr_values, - expression_threshold, - cores = GiottoUtils::determine_cores()) { +.calc_expr_cov_stats_parallel <- function(expr_values, + expression_threshold, + cores = GiottoUtils::determine_cores()) { # NSE vars cov <- sd <- mean_expr <- NULL @@ -219,78 +234,81 @@ #' @param feat_type feature type #' @param expression_values expression values to use #' @param method method to calculate highly variable features -#' @param reverse_log_scale reverse log-scale of expression values +#' @param reverse_log_scale reverse log-scale of expression values #' (default = FALSE) #' @param logbase if `reverse_log_scale` is TRUE, which log base was used? #' @param expression_threshold expression threshold to consider a gene detected -#' @param nr_expression_groups (cov_groups) number of expression groups for +#' @param nr_expression_groups (cov_groups) number of expression groups for #' cov_groups #' @param zscore_threshold (cov_groups) zscore to select hvg for cov_groups #' @param HVFname name for highly variable features in cell metadata -#' @param difference_in_cov (cov_loess) minimum difference in coefficient of +#' @param difference_in_cov (cov_loess) minimum difference in coefficient of #' variance required -#' @param var_threshold (var_p_resid) variance threshold for features for +#' @param var_threshold (var_p_resid) variance threshold for features for #' var_p_resid method -#' @param var_number (var_p_resid) number of top variance features for +#' @param var_number (var_p_resid) number of top variance features for #' var_p_resid method -#' @param random_subset random subset to perform HVF detection on. +#' @param random_subset random subset to perform HVF detection on. #' Passing `NULL` runs HVF on all cells. #' @param set_seed logical. whether to set a seed when random_subset is used #' @param seed_number seed number to use when random_subset is used #' @param show_plot show plot #' @param return_plot return ggplot object (overridden by `return_gobject`) #' @param save_plot logical. directly save the plot -#' @param save_param list of saving parameters from +#' @param save_param list of saving parameters from #' [GiottoVisuals::all_plots_save_function()] -#' @param default_save_name default save name for saving, don't change, change +#' @param default_save_name default save name for saving, don't change, change #' save_name in save_param #' @param return_gobject boolean: return giotto object (default = TRUE) -#' @returns giotto object highly variable features appended to feature metadata +#' @param verbose be verbose +#' @returns giotto object highly variable features appended to feature metadata #' (`fDataDT()`) #' @details #' Currently we provide 2 ways to calculate highly variable genes: #' #' \strong{1. high coeff of variance (COV) within groups: } \cr -#' First genes are binned (\emph{nr_expression_groups}) into average expression -#' groups and the COV for each feature is converted into a z-score within each -#' bin. Features with a z-score higher than the threshold +#' First genes are binned (\emph{nr_expression_groups}) into average expression +#' groups and the COV for each feature is converted into a z-score within each +#' bin. Features with a z-score higher than the threshold #' (\emph{zscore_threshold}) are considered highly variable. \cr #' #' \strong{2. high COV based on loess regression prediction: } \cr -#' A predicted COV is calculated for each feature using loess regression +#' A predicted COV is calculated for each feature using loess regression #' (COV~log(mean expression)) -#' Features that show a higher than predicted COV (\emph{difference_in_cov}) +#' Features that show a higher than predicted COV (\emph{difference_in_cov}) #' are considered highly variable. \cr #' #' @md #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' calculateHVF(g) #' @export -calculateHVF <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - method = c("cov_groups", "cov_loess", "var_p_resid"), - reverse_log_scale = FALSE, - logbase = 2, - expression_threshold = 0, - nr_expression_groups = 20, - zscore_threshold = 1.5, - HVFname = "hvf", - difference_in_cov = 0.1, - var_threshold = 1.5, - var_number = NULL, - random_subset = NULL, - set_seed = TRUE, - seed_number = 1234, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "HVFplot", - return_gobject = TRUE) { +calculateHVF <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + method = c("cov_groups", "cov_loess", "var_p_resid"), + reverse_log_scale = FALSE, + logbase = 2, + expression_threshold = 0, + nr_expression_groups = 20, + zscore_threshold = 1.5, + HVFname = "hvf", + difference_in_cov = 0.1, + var_threshold = 1.5, + var_number = NULL, + random_subset = NULL, + set_seed = TRUE, + seed_number = 1234, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "HVFplot", + return_gobject = TRUE, + verbose = TRUE) { # NSE vars selected <- feats <- var <- NULL @@ -317,8 +335,9 @@ calculateHVF <- function(gobject, # expression values to be used values <- match.arg( - expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + expression_values, + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -336,7 +355,9 @@ calculateHVF <- function(gobject, if (!is.null(random_subset)) { if (isTRUE(set_seed)) set.seed(seed = seed_number) - random_selection <- sort(sample(1:ncol(expr_values), random_subset)) + random_selection <- sort(sample( + seq_len(ncol(expr_values)), random_subset + )) expr_values <- expr_values[, random_selection] if (isTRUE(set_seed)) GiottoUtils::random_seed() @@ -345,20 +366,25 @@ calculateHVF <- function(gobject, # print, return and save parameters - show_plot <- ifelse(is.na(show_plot), - readGiottoInstructions(gobject, param = "show_plot"), - show_plot) - save_plot <- ifelse(is.na(save_plot), - readGiottoInstructions(gobject, param = "save_plot"), - save_plot) - return_plot <- ifelse(is.na(return_plot), - readGiottoInstructions(gobject, param = "return_plot"), - return_plot) + show_plot <- ifelse(is.na(show_plot), + readGiottoInstructions(gobject, param = "show_plot"), + show_plot + ) + save_plot <- ifelse(is.na(save_plot), + readGiottoInstructions(gobject, param = "save_plot"), + save_plot + ) + return_plot <- ifelse(is.na(return_plot), + readGiottoInstructions(gobject, param = "return_plot"), + return_plot + ) # method to use method <- match.arg( - method, choices = c("cov_groups", "cov_loess", "var_p_resid")) + method, + choices = c("cov_groups", "cov_loess", "var_p_resid") + ) # select function to use based on whether future parallelization is planned calc_cov_fun <- ifelse( use_parallel, @@ -414,16 +440,19 @@ calculateHVF <- function(gobject, ## save plot if (isTRUE(save_plot)) { do.call( - GiottoVisuals::all_plots_save_function, - c(list(gobject = gobject, plot_object = pl, - default_save_name = default_save_name), save_param)) + GiottoVisuals::all_plots_save_function, + c(list( + gobject = gobject, plot_object = pl, + default_save_name = default_save_name + ), save_param) + ) } ## return plot if (isTRUE(return_plot)) { if (isTRUE(return_gobject)) { message("return_plot = TRUE and return_gobject = TRUE \n - plot will not be returned to object, but can still be + plot will not be returned to object, but can still be saved with save_plot = TRUE or manually") } else { return(pl) @@ -443,7 +472,10 @@ calculateHVF <- function(gobject, column_names_feat_metadata <- colnames(feat_metadata[]) if (HVFname %in% column_names_feat_metadata) { - cat(HVFname, " has already been used, will be overwritten") + vmsg( + .v = verbose, HVFname, + " has already been used, will be overwritten" + ) feat_metadata[][, eval(HVFname) := NULL] ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### @@ -490,8 +522,7 @@ calculateHVF <- function(gobject, # plot generation #### -.create_cov_group_hvf_plot <- function( - feat_in_cells_detected, nr_expression_groups) { +.create_cov_group_hvf_plot <- function(feat_in_cells_detected, nr_expression_groups) { pl <- ggplot2::ggplot() pl <- pl + ggplot2::theme_classic() + ggplot2::theme( @@ -499,8 +530,9 @@ calculateHVF <- function(gobject, axis.text = ggplot2::element_text(size = 12) ) pl <- pl + ggplot2::geom_point( - data = feat_in_cells_detected, - ggplot2::aes_string(x = "mean_expr", y = "cov", color = "selected")) + data = feat_in_cells_detected, + ggplot2::aes_string(x = "mean_expr", y = "cov", color = "selected") + ) pl <- pl + ggplot2::scale_color_manual( values = c(no = "lightgrey", yes = "orange"), guide = ggplot2::guide_legend( @@ -509,7 +541,9 @@ calculateHVF <- function(gobject, ) ) pl <- pl + ggplot2::facet_wrap( - ~expr_groups, ncol = nr_expression_groups, scales = "free_x") + ~expr_groups, + ncol = nr_expression_groups, scales = "free_x" + ) pl <- pl + ggplot2::theme( axis.text.x = ggplot2::element_blank(), strip.text = ggplot2::element_text(size = 4) @@ -519,8 +553,7 @@ calculateHVF <- function(gobject, } -.create_cov_loess_hvf_plot <- function( - feat_in_cells_detected, difference_in_cov, var_col) { +.create_cov_loess_hvf_plot <- function(feat_in_cells_detected, difference_in_cov, var_col) { pl <- ggplot2::ggplot() pl <- pl + ggplot2::theme_classic() + ggplot2::theme( @@ -528,17 +561,22 @@ calculateHVF <- function(gobject, axis.text = ggplot2::element_text(size = 12) ) pl <- pl + ggplot2::geom_point( - data = feat_in_cells_detected, - ggplot2::aes_string(x = "log(mean_expr)", y = var_col, - color = "selected")) + data = feat_in_cells_detected, + ggplot2::aes_string( + x = "log(mean_expr)", y = var_col, + color = "selected" + ) + ) pl <- pl + ggplot2::geom_line( - data = feat_in_cells_detected, - ggplot2::aes_string(x = "log(mean_expr)", y = "pred_cov_feats"), - color = "blue") + data = feat_in_cells_detected, + ggplot2::aes_string(x = "log(mean_expr)", y = "pred_cov_feats"), + color = "blue" + ) hvg_line <- paste0("pred_cov_feats+", difference_in_cov) pl <- pl + ggplot2::geom_line( - data = feat_in_cells_detected, - ggplot2::aes_string(x = "log(mean_expr)", y = hvg_line), linetype = 2) + data = feat_in_cells_detected, + ggplot2::aes_string(x = "log(mean_expr)", y = hvg_line), linetype = 2 + ) pl <- pl + ggplot2::labs(x = "log(mean expression)", y = var_col) pl <- pl + ggplot2::scale_color_manual( values = c(no = "lightgrey", yes = "orange"), @@ -554,7 +592,8 @@ calculateHVF <- function(gobject, .create_calc_var_hvf_plot <- function(dt_res) { pl <- ggplot2::ggplot() pl <- pl + ggplot2::geom_point( - data = dt_res, aes_string(x = "rank", y = "var", color = "selected")) + data = dt_res, aes_string(x = "rank", y = "var", color = "selected") + ) pl <- pl + ggplot2::scale_x_reverse() pl <- pl + ggplot2::theme_classic() + ggplot2::theme( axis.title = ggplot2::element_text(size = 14), diff --git a/R/wnn.R b/R/wnn.R index af8be2427..588d11888 100644 --- a/R/wnn.R +++ b/R/wnn.R @@ -13,22 +13,23 @@ #' @param w_name_modality_2 name for modality 2 weights #' @param verbose be verbose #' -#' @returns A Giotto object with integrated UMAP (integrated.umap) within the -#' dimension_reduction slot and Leiden clusters (integrated_leiden_clus) in the +#' @returns A Giotto object with integrated UMAP (integrated.umap) within the +#' dimension_reduction slot and Leiden clusters (integrated_leiden_clus) in the #' cellular metadata. #' @export -runWNN <- function(gobject, - spat_unit = "cell", - modality_1 = "rna", - modality_2 = "protein", - pca_name_modality_1 = "rna.pca", - pca_name_modality_2 = "protein.pca", - k = 20, - integrated_feat_type = NULL, - matrix_result_name = NULL, - w_name_modality_1 = NULL, - w_name_modality_2 = NULL, - verbose = FALSE) { +runWNN <- function( + gobject, + spat_unit = "cell", + modality_1 = "rna", + modality_2 = "protein", + pca_name_modality_1 = "rna.pca", + pca_name_modality_2 = "protein.pca", + k = 20, + integrated_feat_type = NULL, + matrix_result_name = NULL, + w_name_modality_1 = NULL, + w_name_modality_2 = NULL, + verbose = FALSE) { # validate Giotto object if (!inherits(gobject, "giotto")) { stop("gobject needs to be a giotto object") @@ -36,9 +37,9 @@ runWNN <- function(gobject, # validate modalities if (!modality_1 %in% names( - gobject@dimension_reduction$cells[[spat_unit]]) || - !modality_2 %in% names(gobject@dimension_reduction$cells[[spat_unit]])) - { + gobject@dimension_reduction$cells[[spat_unit]] + ) || + !modality_2 %in% names(gobject@dimension_reduction$cells[[spat_unit]])) { stop(paste(modality_1, "and", modality_2, " pca must exist")) } @@ -124,18 +125,24 @@ runWNN <- function(gobject, ## modality1 modality1 - if (verbose) - message(paste("Calculating low dimensional cell-cell distances for", - modality_1)) + if (verbose) { + message(paste( + "Calculating low dimensional cell-cell distances for", + modality_1 + )) + } all_cell_distances_1_1 <- dist(pca_1) all_cell_distances_1_1 <- as.matrix(all_cell_distances_1_1) ## modality2 modality2 - if (verbose) - message(paste("Calculating low dimensional cell-cell distances for", - modality_2)) + if (verbose) { + message(paste( + "Calculating low dimensional cell-cell distances for", + modality_2 + )) + } all_cell_distances_2_2 <- dist(pca_2) @@ -234,13 +241,15 @@ runWNN <- function(gobject, if (nrow(jaccard_values == 20)) { further_cell_cell_distances <- all_cell_distances_1_1[ - cell_a, jaccard_values$to] + cell_a, jaccard_values$to + ] } else { further_cell_cell_distances <- tail(sort(all_cell_distances_1_1[ - cell_a, ]), 20) + cell_a, + ]), 20) } - modality1_sigma_i[cell_a] <- mean(further_cell_cell_distances) + modality1_sigma_i[cell_a] <- mean(further_cell_cell_distances) # cell-specific kernel bandwidth. } @@ -254,13 +263,15 @@ runWNN <- function(gobject, if (nrow(jaccard_values == 20)) { further_cell_cell_distances <- all_cell_distances_2_2[ - cell_a, jaccard_values$to] + cell_a, jaccard_values$to + ] } else { further_cell_cell_distances <- tail(sort(all_cell_distances_2_2[ - cell_a, ]), 20) + cell_a, + ]), 20) } - modality2_sigma_i[cell_a] <- mean(further_cell_cell_distances) + modality2_sigma_i[cell_a] <- mean(further_cell_cell_distances) # cell-specific kernel bandwidth. } @@ -282,13 +293,13 @@ runWNN <- function(gobject, d_modality1_i_modality1_knn1 <- sqrt(sum(( modality1_i - modality1_knn1)^2)) - difference_distances <- d_modality1_i_modality2_predicted - + difference_distances <- d_modality1_i_modality2_predicted - d_modality1_i_modality1_knn1 max_value <- max(c(difference_distances, 0)) theta_1_1[[cell_a]] <- exp(( - -max_value) / (modality1_sigma_i[cell_a] - - d_modality1_i_modality1_knn1)) + -max_value) / (modality1_sigma_i[cell_a] - + d_modality1_i_modality1_knn1)) } ## modality2 modality2 @@ -304,13 +315,13 @@ runWNN <- function(gobject, d_modality2_i_modality2_knn1 <- sqrt(sum(( modality2_i - modality2_knn1)^2)) - difference_distances <- d_modality2_i_modality2_predicted - + difference_distances <- d_modality2_i_modality2_predicted - d_modality2_i_modality2_knn1 max_value <- max(c(difference_distances, 0)) theta_modality2_modality2[[cell_a]] <- exp(( - -max_value) / (modality2_sigma_i[cell_a] - - d_modality2_i_modality2_knn1)) + -max_value) / (modality2_sigma_i[cell_a] - + d_modality2_i_modality2_knn1)) } @@ -327,13 +338,13 @@ runWNN <- function(gobject, d_modality1_i_modality1_knn1 <- sqrt(sum(( modality1_i - modality1_knn1)^2)) - difference_distances <- d_modality1_i_modality2_predicted - + difference_distances <- d_modality1_i_modality2_predicted - d_modality1_i_modality1_knn1 max_value <- max(c(difference_distances, 0)) theta_modality1_modality2[[cell_a]] <- exp(( - -max_value) / (modality1_sigma_i[cell_a] - - d_modality1_i_modality1_knn1)) + -max_value) / (modality1_sigma_i[cell_a] - + d_modality1_i_modality1_knn1)) } @@ -350,13 +361,13 @@ runWNN <- function(gobject, d_modality2_i_modality2_knn1 <- sqrt(sum(( modality2_i - modality2_knn1)^2)) - difference_distances <- d_modality2_i_modality1_predicted - + difference_distances <- d_modality2_i_modality1_predicted - d_modality2_i_modality2_knn1 max_value <- max(c(difference_distances, 0)) theta_modality2_modality1[[cell_a]] <- exp(( - -max_value) / (modality2_sigma_i[cell_a] - - d_modality2_i_modality2_knn1)) + -max_value) / (modality2_sigma_i[cell_a] - + d_modality2_i_modality2_knn1)) } @@ -370,7 +381,7 @@ runWNN <- function(gobject, ratio_modality1 <- list() for (cell_a in cell_names) { - ratio_modality1[[cell_a]] <- theta_1_1[[cell_a]] / + ratio_modality1[[cell_a]] <- theta_1_1[[cell_a]] / (theta_modality1_modality2[[cell_a]] + epsilon) } @@ -379,7 +390,7 @@ runWNN <- function(gobject, ratio_modality2 <- list() for (cell_a in cell_names) { - ratio_modality2[[cell_a]] <- theta_modality2_modality2[[cell_a]] / + ratio_modality2[[cell_a]] <- theta_modality2_modality2[[cell_a]] / (theta_modality2_modality1[[cell_a]] + epsilon) } @@ -392,7 +403,7 @@ runWNN <- function(gobject, names(w_modality1) <- cell_names for (cell_a in cell_names) { - w_modality1[cell_a] <- exp(ratio_modality1[[cell_a]]) / + w_modality1[cell_a] <- exp(ratio_modality1[[cell_a]]) / (exp(ratio_modality1[[cell_a]]) + exp(ratio_modality2[[cell_a]])) } @@ -400,7 +411,7 @@ runWNN <- function(gobject, names(w_modality2) <- cell_names for (cell_a in cell_names) { - w_modality2[cell_a] <- exp(ratio_modality2[[cell_a]]) / + w_modality2[cell_a] <- exp(ratio_modality2[[cell_a]]) / (exp(ratio_modality1[[cell_a]]) + exp(ratio_modality2[[cell_a]])) } @@ -421,15 +432,15 @@ runWNN <- function(gobject, ## theta_modality1 - theta_modality1_cella_cellb <- exp(-1 * (all_cell_distances_1_1 / - modality1_sigma_i)**kernelpower) + theta_modality1_cella_cellb <- exp(-1 * (all_cell_distances_1_1 / + modality1_sigma_i)**kernelpower) ## theta_modality2 - theta_modality2_cella_cellb <- exp(-1 * (all_cell_distances_2_2 / - modality2_sigma_i)**kernelpower) + theta_modality2_cella_cellb <- exp(-1 * (all_cell_distances_2_2 / + modality2_sigma_i)**kernelpower) ## theta_weighted - theta_weighted <- w_modality1 * theta_modality1_cella_cellb + + theta_weighted <- w_modality1 * theta_modality1_cella_cellb + w_modality2 * theta_modality2_cella_cellb @@ -511,18 +522,19 @@ runWNN <- function(gobject, #' #' @returns A Giotto object with integrated UMAP #' @export -runIntegratedUMAP <- function(gobject, - spat_unit = "cell", - modality1 = "rna", - modality2 = "protein", - integrated_feat_type = NULL, - integration_method = "WNN", - matrix_result_name = "theta_weighted_matrix", - k = 20, - spread = 5, - min_dist = 0.01, - force = FALSE, - ...) { +runIntegratedUMAP <- function( + gobject, + spat_unit = "cell", + modality1 = "rna", + modality2 = "protein", + integrated_feat_type = NULL, + integration_method = "WNN", + matrix_result_name = "theta_weighted_matrix", + k = 20, + spread = 5, + min_dist = 0.01, + force = FALSE, + ...) { if (is.null(integrated_feat_type)) { integrated_feat_type <- paste0(modality1, "_", modality2) } @@ -537,7 +549,8 @@ runIntegratedUMAP <- function(gobject, theta_weighted[is.na(theta_weighted)] <- 0 if (is.null(gobject@nn_network[[spat_unit]][[ - modality1]]$kNN$integrated_kNN) || force == TRUE) { + modality1 + ]]$kNN$integrated_kNN) || force == TRUE) { ################# Calculate integrated Nearest Neighbors ############### message("Calculating integrated Nearest Neighbors") @@ -545,11 +558,11 @@ runIntegratedUMAP <- function(gobject, cell_names <- colnames(theta_weighted) nn_network <- dbscan::kNN(x = theta_weighted, k = k, sort = TRUE) - from <- to <- weight <- distance <- from_cell_ID <- to_cell_ID <- + from <- to <- weight <- distance <- from_cell_ID <- to_cell_ID <- shared <- NULL nn_network_dt <- data.table::data.table( from = rep( - 1:nrow(nn_network$id), + seq_len(nrow(nn_network$id)), k ), to = as.vector(nn_network$id), @@ -559,7 +572,8 @@ runIntegratedUMAP <- function(gobject, nn_network_dt[, `:=`(from_cell_ID, cell_names[from])] nn_network_dt[, `:=`(to_cell_ID, cell_names[to])] all_index <- unique( - x = c(nn_network_dt$from_cell_ID, nn_network_dt$to_cell_ID)) + x = c(nn_network_dt$from_cell_ID, nn_network_dt$to_cell_ID) + ) ################################ Create igraph ######################### @@ -649,7 +663,8 @@ runIntegratedUMAP <- function(gobject, ## add umap gobject@dimension_reduction$cells[[spat_unit]][[modality1]][["umap"]][[ - "integrated.umap"]] <- list( + "integrated.umap" + ]] <- list( name = "integrated.umap", feat_type = modality1, spat_unit = spat_unit, @@ -659,7 +674,8 @@ runIntegratedUMAP <- function(gobject, ) gobject@dimension_reduction$cells[[spat_unit]][[modality2]][["umap"]][[ - "integrated.umap"]] <- list( + "integrated.umap" + ]] <- list( name = "integrated.umap", feat_type = modality2, spat_unit = spat_unit, diff --git a/R/zzz.R b/R/zzz.R index d15b5883c..9cfa6b5af 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -19,4 +19,27 @@ data.table::setDTthreads(threads = cores) options("giotto.check_core" = FALSE) } + + # options # + + # GiottoVisuals # + # ------------- # + # colors continuous + init_option("giotto.color_cd_pal", c("blue", "white", "red")) + init_option("giotto.color_cs_pal", "viridis") + init_option("giotto.color_c_rev", FALSE) + + # colors discrete + init_option("giotto.color_d_pal", "distinct") + init_option("giotto.color_d_rev", FALSE) + init_option("giotto.color_d_strategy", "interpolate") + + # image resampling + init_option("giotto.plot_img_max_sample", 5e5) + init_option("giotto.plot_img_max_crop", 1e8) + init_option("giotto.plot_img_max_resample_scale", 100) + + # GiottoUtils # + # ----------- # + init_option("giotto.verbose", TRUE) } diff --git a/man/adapt_aspect_ratio.Rd b/man/adapt_aspect_ratio.Rd index d1f7daa92..e6b170194 100644 --- a/man/adapt_aspect_ratio.Rd +++ b/man/adapt_aspect_ratio.Rd @@ -30,7 +30,7 @@ adapt_aspect_ratio( numeric } \description{ -adapt the aspact ratio after inserting cross section mesh grid +adapt the aspact ratio after inserting cross section mesh grid lines } \keyword{internal} diff --git a/man/addCellStatistics.Rd b/man/addCellStatistics.Rd index 9abfb793b..42707b57f 100644 --- a/man/addCellStatistics.Rd +++ b/man/addCellStatistics.Rd @@ -10,7 +10,8 @@ addCellStatistics( spat_unit = NULL, expression_values = c("normalized", "scaled", "custom"), detection_threshold = 0, - return_gobject = TRUE + return_gobject = TRUE, + verbose = TRUE ) } \arguments{ @@ -25,6 +26,8 @@ addCellStatistics( \item{detection_threshold}{detection threshold to consider a gene detected} \item{return_gobject}{boolean: return giotto object (default = TRUE)} + +\item{verbose}{be verbose} } \value{ giotto object if return_gobject = TRUE diff --git a/man/addFeatStatistics.Rd b/man/addFeatStatistics.Rd index 84b5f4e91..c532b733b 100644 --- a/man/addFeatStatistics.Rd +++ b/man/addFeatStatistics.Rd @@ -10,7 +10,8 @@ addFeatStatistics( spat_unit = NULL, expression_values = c("normalized", "scaled", "custom"), detection_threshold = 0, - return_gobject = TRUE + return_gobject = TRUE, + verbose = TRUE ) } \arguments{ @@ -25,6 +26,8 @@ addFeatStatistics( \item{detection_threshold}{detection threshold to consider a gene detected} \item{return_gobject}{boolean: return giotto object (default = TRUE)} + +\item{verbose}{be verbose} } \value{ giotto object if return_gobject = TRUE diff --git a/man/addHMRF.Rd b/man/addHMRF.Rd index 93d5961f6..2e398efcb 100644 --- a/man/addHMRF.Rd +++ b/man/addHMRF.Rd @@ -35,3 +35,31 @@ giotto object \description{ Add selected results from doHMRF to the giotto object } +\examples{ +g <- GiottoData::loadGiottoMini("visium") +spat_genes <- binSpect(g) + +output_folder <- file.path(tempdir(), "HMRF") +if (!file.exists(output_folder)) dir.create(output_folder, recursive = TRUE) + +out <- doHMRF( + g, + spatial_genes = spat_genes[seq_len(20)]$feats, + expression_values = "scaled", + spatial_network_name = "Delaunay_network", + k = 6, betas = c(0, 10, 5), + output_folder = output_folder +) + +g <- addHMRF( + gobject = g, + HMRFoutput = out, + k = 6, + betas_to_add = 20, + hmrf_name = "HMRF" +) + +spatPlot( + gobject = g, cell_color = "HMRF_k6_b.20", +) +} diff --git a/man/addHMRF_V2.Rd b/man/addHMRF_V2.Rd index 33c17f775..4c89f66aa 100644 --- a/man/addHMRF_V2.Rd +++ b/man/addHMRF_V2.Rd @@ -20,8 +20,16 @@ giotto object function to add HMRF Domain Type to cell meta data } \details{ -This function appends HMRF domain clusters to corresponding cell meta data -for all the beta values, with the given HMRF model names. For example, if -name = ‘hmrf1’ and name of result in HMRFoutput is ‘k=8 b=0.00’, the +This function appends HMRF domain clusters to corresponding cell meta data +for all the beta values, with the given HMRF model names. For example, if +name = ‘hmrf1’ and name of result in HMRFoutput is ‘k=8 b=0.00’, the appended cell meta data column will be named with ‘hmrf1 k=8 b=0.00’ } +\examples{ +g <- GiottoData::loadGiottoMini("visium") +g <- binSpect(g, return_gobject = TRUE) +HMRF_init_obj <- initHMRF_V2(gobject = g, cl.method = "km") +HMRFoutput <- doHMRF_V2(HMRF_init_obj = HMRF_init_obj, betas = c(0, 5, 2)) + +addHMRF_V2(gobject = g, HMRFoutput = HMRFoutput) +} diff --git a/man/addPolygonCells.Rd b/man/addPolygonCells.Rd index 9386b5c60..0a3708da8 100644 --- a/man/addPolygonCells.Rd +++ b/man/addPolygonCells.Rd @@ -25,15 +25,15 @@ addPolygonCells( \item{feat_type}{feature name where metadata will be added} -\item{polygons}{polygon names to plot (e.g. 'polygon_1'). If NULL, plots +\item{polygons}{polygon names to plot (e.g. 'polygon_1'). If NULL, plots all available polygons} -\item{na.label}{polygon label for cells located outside of polygons area. +\item{na.label}{polygon label for cells located outside of polygons area. Default = "no_polygon"} } \value{ A Giotto object with a modified cell_metadata slot that includes the -polygon name where each cell is located or no_polygon label if the cell is +polygon name where each cell is located or no_polygon label if the cell is not located within a polygon area } \description{ @@ -42,17 +42,24 @@ Add corresponding polygon IDs to cell metadata \examples{ ## Plot interactive polygons g <- GiottoData::loadGiottoMini("visium") -my_polygon_coords <- data.frame(poly_ID = rep("polygon1", 3), -sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202)) +my_polygon_coords <- data.frame( + poly_ID = rep("polygon1", 3), + sdimx = c(5477, 5959, 4720), + sdimy = c(-4125, -2808, -5202) +) ## Add polygon coordinates to Giotto object -my_giotto_polygons <- createGiottoPolygonsFromDfr(my_polygon_coords, -name = "selections") +my_giotto_polygons <- createGiottoPolygon( + my_polygon_coords, + name = "selections" +) + g <- addGiottoPolygons( gobject = g, gpolygons = list(my_giotto_polygons) ) ## Add polygon IDs to cell metadata -addPolygonCells(g) +g <- addPolygonCells(g) +pDataDT(g) } diff --git a/man/addStatistics.Rd b/man/addStatistics.Rd index 95df75bda..9fee79234 100644 --- a/man/addStatistics.Rd +++ b/man/addStatistics.Rd @@ -10,7 +10,8 @@ addStatistics( spat_unit = NULL, expression_values = c("normalized", "scaled", "custom"), detection_threshold = 0, - return_gobject = TRUE + return_gobject = TRUE, + verbose = TRUE ) } \arguments{ @@ -25,6 +26,8 @@ addStatistics( \item{detection_threshold}{detection threshold to consider a feature detected} \item{return_gobject}{boolean: return giotto object (default = TRUE)} + +\item{verbose}{be verbose} } \value{ giotto object if return_gobject = TRUE, else a list with results diff --git a/man/binSpect.Rd b/man/binSpect.Rd index 060f88f7b..996fb5436 100644 --- a/man/binSpect.Rd +++ b/man/binSpect.Rd @@ -208,8 +208,6 @@ separately (default)} \item{expression_matrix}{expression matrix} \item{spatial_network}{spatial network in data.table format} - -\item{subset_genes}{deprecated, use subset_feats} } \value{ data.table with results (see details) diff --git a/man/calculateHVF.Rd b/man/calculateHVF.Rd index 9c826f008..d7d3a2de6 100644 --- a/man/calculateHVF.Rd +++ b/man/calculateHVF.Rd @@ -27,7 +27,8 @@ calculateHVF( save_plot = NULL, save_param = list(), default_save_name = "HVFplot", - return_gobject = TRUE + return_gobject = TRUE, + verbose = TRUE ) } \arguments{ @@ -84,6 +85,8 @@ Passing \code{NULL} runs HVF on all cells.} save_name in save_param} \item{return_gobject}{boolean: return giotto object (default = TRUE)} + +\item{verbose}{be verbose} } \value{ giotto object highly variable features appended to feature metadata diff --git a/man/cellProximityBarplot.Rd b/man/cellProximityBarplot.Rd index aef957080..bebc29ba1 100644 --- a/man/cellProximityBarplot.Rd +++ b/man/cellProximityBarplot.Rd @@ -47,9 +47,12 @@ Create barplot from cell-cell proximity scores \details{ This function creates a barplot that shows the spatial proximity enrichment or depletion of cell type pairs. - @examples - g <- GiottoData::loadGiottoMini("visium") - - cellProximityBarplot(gobject = g, - CPscore = cellProximityEnrichment(g, cluster_column = "leiden_clus")) +} +\examples{ +g <- GiottoData::loadGiottoMini("visium") + +cellProximityBarplot( + gobject = g, + CPscore = cellProximityEnrichment(g, cluster_column = "leiden_clus") +) } diff --git a/man/cellProximityEnrichmentEachSpot.Rd b/man/cellProximityEnrichmentEachSpot.Rd index 572232bc6..0d269746b 100644 --- a/man/cellProximityEnrichmentEachSpot.Rd +++ b/man/cellProximityEnrichmentEachSpot.Rd @@ -24,7 +24,7 @@ cellProximityEnrichmentEachSpot( \item{cluster_column}{name of column to use for clusters} } \value{ -matrix that rownames are cell-cell interaction pairs and colnames +matrix that rownames are cell-cell interaction pairs and colnames are cell_IDs } \description{ @@ -33,14 +33,16 @@ interacted spots (observed) } \examples{ g <- GiottoData::loadGiottoMini("visium") -x <- findMarkers_one_vs_all(g, -cluster_column = "leiden_clus", min_feats = 20) +x <- findMarkers_one_vs_all(g, + cluster_column = "leiden_clus", min_feats = 20 +) sign_gene <- x$feats -sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -nrow = length(sign_gene)) +sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), + nrow = length(sign_gene) +) rownames(sign_matrix) <- sign_gene -colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) +colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) g <- runDWLSDeconv(gobject = g, sign_matrix = sign_matrix) diff --git a/man/cellProximityEnrichmentSpots.Rd b/man/cellProximityEnrichmentSpots.Rd index c1a7898ad..e618dbbc3 100644 --- a/man/cellProximityEnrichmentSpots.Rd +++ b/man/cellProximityEnrichmentSpots.Rd @@ -32,10 +32,10 @@ cellProximityEnrichmentSpots( \item{cells_in_spot}{cell number in each spot} -\item{number_of_simulations}{number of simulations to create expected +\item{number_of_simulations}{number of simulations to create expected observations} -\item{adjust_method}{method to adjust p.values +\item{adjust_method}{method to adjust p.values (e.g. "none", "fdr", "bonferroni","BH","holm", "hochberg", "hommel","BY")} \item{set_seed}{use of seed. Default = TRUE} @@ -45,34 +45,36 @@ observations} \item{verbose}{be verbose} } \value{ -List of cell Proximity scores (CPscores) in data.table format. +List of cell Proximity scores (CPscores) in data.table format. The first -data.table (raw_sim_table) shows the raw observations of both the original -and simulated networks. The second data.table (enrichm_res) shows the +data.table (raw_sim_table) shows the raw observations of both the original +and simulated networks. The second data.table (enrichm_res) shows the enrichment results. } \description{ -Compute cell-cell interaction enrichment for spots +Compute cell-cell interaction enrichment for spots (observed vs expected) } \details{ -Spatial proximity enrichment or depletion between pairs of cell +Spatial proximity enrichment or depletion between pairs of cell types is calculated by calculating the observed over the expected frequency -of cell-cell proximity interactions. The expected frequency is the average -frequency calculated from a number of spatial network simulations. Each -individual simulation is obtained by reshuffling the cell type labels of +of cell-cell proximity interactions. The expected frequency is the average +frequency calculated from a number of spatial network simulations. Each +individual simulation is obtained by reshuffling the cell type labels of each node (spot) in the spatial network. } \examples{ g <- GiottoData::loadGiottoMini("visium") -x <- findMarkers_one_vs_all(g, -cluster_column = "leiden_clus", min_feats = 20) +x <- findMarkers_one_vs_all(g, + cluster_column = "leiden_clus", min_feats = 20 +) sign_gene <- x$feats -sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -nrow = length(sign_gene)) +sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), + nrow = length(sign_gene) +) rownames(sign_matrix) <- sign_gene -colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) +colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) g <- runDWLSDeconv(gobject = g, sign_matrix = sign_matrix) diff --git a/man/cellProximityHeatmap.Rd b/man/cellProximityHeatmap.Rd index 9cca766d1..7939e7d52 100644 --- a/man/cellProximityHeatmap.Rd +++ b/man/cellProximityHeatmap.Rd @@ -27,7 +27,7 @@ cellProximityHeatmap( \item{order_cell_types}{order cell types based on enrichment correlation} -\item{color_breaks}{numerical vector of length 3 to represent min, mean +\item{color_breaks}{numerical vector of length 3 to represent min, mean and maximum} \item{color_names}{character color vector of length 3} diff --git a/man/cellProximityNetwork.Rd b/man/cellProximityNetwork.Rd index faf220369..1a67385bd 100644 --- a/man/cellProximityNetwork.Rd +++ b/man/cellProximityNetwork.Rd @@ -42,10 +42,10 @@ cellProximityNetwork( \item{rescale_edge_weights}{rescale edge weights (boolean)} -\item{edge_weight_range_depletion}{numerical vector of length 2 to rescale +\item{edge_weight_range_depletion}{numerical vector of length 2 to rescale depleted edge weights} -\item{edge_weight_range_enrichment}{numerical vector of length 2 to rescale +\item{edge_weight_range_enrichment}{numerical vector of length 2 to rescale enriched edge weights} \item{layout}{layout algorithm to use to draw nodes and edges} diff --git a/man/cellProximitySpatPlot.Rd b/man/cellProximitySpatPlot.Rd index 8b5875886..d6bd596a5 100644 --- a/man/cellProximitySpatPlot.Rd +++ b/man/cellProximitySpatPlot.Rd @@ -53,13 +53,13 @@ named vector of colors} ggplot } \description{ -Visualize 2D cell-cell interactions according to spatial +Visualize 2D cell-cell interactions according to spatial coordinates in ggplot mode } \details{ Description of parameters. } \seealso{ -\code{\link{cellProximitySpatPlot2D}} and +\code{\link{cellProximitySpatPlot2D}} and \code{\link{cellProximitySpatPlot3D}} for 3D } diff --git a/man/cellProximitySpatPlot2D.Rd b/man/cellProximitySpatPlot2D.Rd index c3183fa5c..d6fbf5549 100644 --- a/man/cellProximitySpatPlot2D.Rd +++ b/man/cellProximitySpatPlot2D.Rd @@ -114,7 +114,7 @@ are used when this is TRUE. continuous colors when FALSE.} ggplot } \description{ -Visualize 2D cell-cell interactions according to spatial +Visualize 2D cell-cell interactions according to spatial coordinates in ggplot mode } \details{ @@ -125,6 +125,8 @@ g <- GiottoData::loadGiottoMini("visium") g <- createSpatialGrid(g, sdimx_stepsize = 5, sdimy_stepsize = 5) x <- cellProximityEnrichment(g, cluster_column = "leiden_clus") -cellProximitySpatPlot2D(gobject = g, cluster_column = "leiden_clus", -interaction_name = x) +cellProximitySpatPlot2D( + gobject = g, cluster_column = "leiden_clus", + interaction_name = x +) } diff --git a/man/cellProximitySpatPlot3D.Rd b/man/cellProximitySpatPlot3D.Rd index 7ee4885fb..79c56c1c2 100644 --- a/man/cellProximitySpatPlot3D.Rd +++ b/man/cellProximitySpatPlot3D.Rd @@ -111,7 +111,7 @@ are used when this is TRUE. continuous colors when FALSE.} plotly } \description{ -Visualize 3D cell-cell interactions according to spatial +Visualize 3D cell-cell interactions according to spatial coordinates in plotly mode } \details{ diff --git a/man/cellProximityVisPlot.Rd b/man/cellProximityVisPlot.Rd index f095c8629..7b13edc05 100644 --- a/man/cellProximityVisPlot.Rd +++ b/man/cellProximityVisPlot.Rd @@ -111,7 +111,7 @@ cellProximityVisPlot( ggplot or plotly } \description{ -Visualize cell-cell interactions according to spatial +Visualize cell-cell interactions according to spatial coordinates } \details{ @@ -122,6 +122,8 @@ g <- GiottoData::loadGiottoMini("visium") g <- createSpatialGrid(g, sdimx_stepsize = 5, sdimy_stepsize = 5) x <- cellProximityEnrichment(g, cluster_column = "leiden_clus") -cellProximityVisPlot(gobject = g, interaction_name = x, -cluster_column = "leiden_clus", sdimx = "sdimx", sdimy = "sdimy") +cellProximityVisPlot( + gobject = g, interaction_name = x, + cluster_column = "leiden_clus", sdimx = "sdimx", sdimy = "sdimy" +) } diff --git a/man/cellProximityVisPlot_internals.Rd b/man/cellProximityVisPlot_internals.Rd index 631140393..226436e52 100644 --- a/man/cellProximityVisPlot_internals.Rd +++ b/man/cellProximityVisPlot_internals.Rd @@ -102,13 +102,13 @@ Create the plots for `cellProximityVisPlot()` } \section{Functions}{ \itemize{ -\item \code{.cellProximityVisPlot_2D_ggplot()}: Visualize 2D cell-cell +\item \code{.cellProximityVisPlot_2D_ggplot()}: Visualize 2D cell-cell interactions according to spatial coordinates in ggplot mode -\item \code{.cellProximityVisPlot_2D_plotly()}: Visualize 2D cell-cell +\item \code{.cellProximityVisPlot_2D_plotly()}: Visualize 2D cell-cell interactions according to spatial coordinates in plotly mode -\item \code{.cellProximityVisPlot_3D_plotly()}: Visualize 3D cell-cell +\item \code{.cellProximityVisPlot_3D_plotly()}: Visualize 3D cell-cell interactions according to spatial coordinates in plotly mode }} diff --git a/man/cell_proximity_spots.Rd b/man/cell_proximity_spots.Rd index aca4dbbd8..b4101159e 100644 --- a/man/cell_proximity_spots.Rd +++ b/man/cell_proximity_spots.Rd @@ -16,7 +16,7 @@ \arguments{ \item{cell_IDs}{cell_IDs} -\item{dwls_values}{data.table of cell type enrichment in each spot and +\item{dwls_values}{data.table of cell type enrichment in each spot and multiply by cell number in each spot} \item{pairs}{data.table of paired spots. Format: cell_ID1, cell_ID2, N} @@ -34,10 +34,10 @@ external spots } \section{Functions}{ \itemize{ -\item \code{.cell_proximity_spots_internal()}: Compute cell-cell interactions observed +\item \code{.cell_proximity_spots_internal()}: Compute cell-cell interactions observed value inner each spot -\item \code{.cell_proximity_spots_external()}: Compute cell-cell interactions observed +\item \code{.cell_proximity_spots_external()}: Compute cell-cell interactions observed value for interacted spots \item \code{.cell_proximity_spots()}: Wrapper function diff --git a/man/checkAndFixSpatialGenes.Rd b/man/checkAndFixSpatialGenes.Rd index 366693aa2..4e4908fc0 100644 --- a/man/checkAndFixSpatialGenes.Rd +++ b/man/checkAndFixSpatialGenes.Rd @@ -27,13 +27,13 @@ checkAndFixSpatialGenes( character } \description{ -function to check the selected test name for spatial gene set +function to check the selected test name for spatial gene set in Giotto object } \details{ -This function checks the user specified test name of spatial gene set in +This function checks the user specified test name of spatial gene set in Giotto object. -SilhouetteRank works only with score, and SilhouetteRankTest works only +SilhouetteRank works only with score, and SilhouetteRankTest works only with pval. Use parameter use_score to specify. } \keyword{internal} diff --git a/man/chooseAvailableSpatialGenes.Rd b/man/chooseAvailableSpatialGenes.Rd index 5b6787069..394498345 100644 --- a/man/chooseAvailableSpatialGenes.Rd +++ b/man/chooseAvailableSpatialGenes.Rd @@ -17,14 +17,14 @@ chooseAvailableSpatialGenes(gobject, spat_unit = NULL, feat_type = NULL) character } \description{ -function to find the test name for existing spatial gene sets +function to find the test name for existing spatial gene sets in Giotto } \details{ -This function outputs the available test name for existing spatial gene sets +This function outputs the available test name for existing spatial gene sets in Giotto, which could be used in parameter ‘name’ in `filterSpatialGenes`. -Priorities for showing the spatial gene test names are ‘binSpect’ > +Priorities for showing the spatial gene test names are ‘binSpect’ > ‘silhouetteRankTest’ > ‘silhouetteRank’. } \keyword{internal} diff --git a/man/clusterSpatialCorFeats.Rd b/man/clusterSpatialCorFeats.Rd index 7c85aeaff..467bb8c49 100644 --- a/man/clusterSpatialCorFeats.Rd +++ b/man/clusterSpatialCorFeats.Rd @@ -33,5 +33,7 @@ Cluster based on spatially correlated features g <- GiottoData::loadGiottoMini("visium") clusterSpatialCorFeats(spatCorObject = detectSpatialCorFeats( -g, method = "network")) + g, + method = "network" +)) } diff --git a/man/combCCcom.Rd b/man/combCCcom.Rd index 61d253f98..f31d5799b 100644 --- a/man/combCCcom.Rd +++ b/man/combCCcom.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/spatial_interaction.R \name{combCCcom} \alias{combCCcom} -\title{combCCcom} +\title{Combine cell cell communication tables} \usage{ combCCcom( spatialCC, @@ -43,11 +43,20 @@ data.tables \examples{ g <- GiottoData::loadGiottoMini("visium") -exprCC <- exprCellCellcom(g, cluster_column = "leiden_clus", -feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik") -spatialCC <- spatCellCellcom(gobject = g, cluster_column = "leiden_clus", -feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot", -random_iter = 10) +exprCC <- exprCellCellcom(g, + cluster_column = "leiden_clus", + feat_set_1 = "Gm19935", + feat_set_2 = "9630013A20Rik" +) + +spatialCC <- spatCellCellcom(gobject = g, + cluster_column = "leiden_clus", + feat_set_1 = "Gm19935", + feat_set_2 = "9630013A20Rik", + verbose = "a lot", + random_iter = 10 +) -combCCcom(spatialCC = spatialCC, exprCC = exprCC) +combCC <- combCCcom(spatialCC = spatialCC, exprCC = exprCC) +force(combCC) } diff --git a/man/combineCPG.Rd b/man/combineCPG.Rd index 0536ecbdc..f4aeb7a23 100644 --- a/man/combineCPG.Rd +++ b/man/combineCPG.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/spatial_interaction.R \name{combineCPG} \alias{combineCPG} -\title{combineCPG} +\title{deprecated} \usage{ combineCPG(...) } @@ -12,11 +12,11 @@ combineCPG(...) \describe{ \item{\code{icfObject}}{ICF (interaction changed feat) score object} \item{\code{selected_ints}}{subset of selected cell-cell interactions (optional)} - \item{\code{selected_feats}}{subset of selected Feats (optional)} - \item{\code{specific_feats_1}}{specific Featset combo -(need to position match specific_genes_2)} - \item{\code{specific_feats_2}}{specific Featset combo -(need to position match specific_genes_1)} + \item{\code{selected_feats}}{subset of selected Features (optional)} + \item{\code{specific_feats_1}}{specific Featureset combo +(need to position match specific_feats_2)} + \item{\code{specific_feats_2}}{specific Featureset combo +(need to position match specific_feats_1)} \item{\code{min_cells}}{minimum number of target cell type} \item{\code{min_int_cells}}{minimum number of interacting cell type} \item{\code{min_fdr}}{minimum adjusted p-value} diff --git a/man/combineCellProximityGenes.Rd b/man/combineCellProximityGenes.Rd index 1451edf7c..f1b2aeb6f 100644 --- a/man/combineCellProximityGenes.Rd +++ b/man/combineCellProximityGenes.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/spatial_interaction.R \name{combineCellProximityGenes} \alias{combineCellProximityGenes} -\title{combineCellProximityGenes} +\title{deprecated} \usage{ combineCellProximityGenes(...) } diff --git a/man/combineICF.Rd b/man/combineICF.Rd deleted file mode 100644 index 6cf6d1413..000000000 --- a/man/combineICF.Rd +++ /dev/null @@ -1,61 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_interaction.R -\name{combineICF} -\alias{combineICF} -\title{combineICF} -\usage{ -combineICF( - icfObject, - selected_ints = NULL, - selected_feats = NULL, - specific_feats_1 = NULL, - specific_feats_2 = NULL, - min_cells = 5, - min_int_cells = 3, - min_fdr = 0.05, - min_spat_diff = 0, - min_log2_fc = 0.5, - do_parallel = TRUE, - verbose = TRUE -) -} -\arguments{ -\item{icfObject}{ICF (interaction changed feat) score object} - -\item{selected_ints}{subset of selected cell-cell interactions (optional)} - -\item{selected_feats}{subset of selected Feats (optional)} - -\item{specific_feats_1}{specific Featset combo -(need to position match specific_genes_2)} - -\item{specific_feats_2}{specific Featset combo -(need to position match specific_genes_1)} - -\item{min_cells}{minimum number of target cell type} - -\item{min_int_cells}{minimum number of interacting cell type} - -\item{min_fdr}{minimum adjusted p-value} - -\item{min_spat_diff}{minimum absolute spatial expression difference} - -\item{min_log2_fc}{minimum absolute log2 fold-change} - -\item{do_parallel}{run calculations in parallel with mclapply} - -\item{verbose}{verbose} -} -\value{ -icfObject that contains the filtered differential feats scores -} -\description{ -Combine ICF scores in a pairwise manner. -} -\examples{ -g <- GiottoData::loadGiottoMini("visium") -g_icf <- findInteractionChangedFeats(g, cluster_column = "leiden_clus", -selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) - -combineICF(g_icf) -} diff --git a/man/combineICG.Rd b/man/combineICG.Rd index 07a793b94..c90ca8c00 100644 --- a/man/combineICG.Rd +++ b/man/combineICG.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/spatial_interaction.R \name{combineICG} \alias{combineICG} -\title{combineICG} +\title{deprecated} \usage{ combineICG(...) } @@ -12,11 +12,11 @@ combineICG(...) \describe{ \item{\code{icfObject}}{ICF (interaction changed feat) score object} \item{\code{selected_ints}}{subset of selected cell-cell interactions (optional)} - \item{\code{selected_feats}}{subset of selected Feats (optional)} - \item{\code{specific_feats_1}}{specific Featset combo -(need to position match specific_genes_2)} - \item{\code{specific_feats_2}}{specific Featset combo -(need to position match specific_genes_1)} + \item{\code{selected_feats}}{subset of selected Features (optional)} + \item{\code{specific_feats_1}}{specific Featureset combo +(need to position match specific_feats_2)} + \item{\code{specific_feats_2}}{specific Featureset combo +(need to position match specific_feats_1)} \item{\code{min_cells}}{minimum number of target cell type} \item{\code{min_int_cells}}{minimum number of interacting cell type} \item{\code{min_fdr}}{minimum adjusted p-value} diff --git a/man/combineInteractionChangedFeats.Rd b/man/combineInteractionChangedFeats.Rd index 506685ce2..4851333e3 100644 --- a/man/combineInteractionChangedFeats.Rd +++ b/man/combineInteractionChangedFeats.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/spatial_interaction.R \name{combineInteractionChangedFeats} \alias{combineInteractionChangedFeats} +\alias{combineICF} \title{combineInteractionChangedFeats} \usage{ combineInteractionChangedFeats( @@ -18,6 +19,21 @@ combineInteractionChangedFeats( do_parallel = TRUE, verbose = TRUE ) + +combineICF( + icfObject, + selected_ints = NULL, + selected_feats = NULL, + specific_feats_1 = NULL, + specific_feats_2 = NULL, + min_cells = 5, + min_int_cells = 3, + min_fdr = 0.05, + min_spat_diff = 0, + min_log2_fc = 0.5, + do_parallel = TRUE, + verbose = TRUE +) } \arguments{ \item{icfObject}{ICF (interaction changed feat) score object} @@ -47,7 +63,8 @@ combineInteractionChangedFeats( \item{verbose}{verbose} } \value{ -combIcfObject that contains the filtered differential feature scores +`combIcfObject` that contains the filtered differential feature +scores } \description{ Combine ICF scores in a pairwise manner. @@ -55,8 +72,12 @@ Combine ICF scores in a pairwise manner. \examples{ g <- GiottoData::loadGiottoMini("visium") g_icf <- findInteractionChangedFeats(g, -cluster_column = "leiden_clus", -selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) + cluster_column = "leiden_clus", + selected_feats = c("Gna12", "Ccnd2", "Btbd17"), + nr_permutations = 10 +) -combineInteractionChangedFeats(g_icf) +cicf <- combineInteractionChangedFeats(g_icf) +force(cicf) +combineICF(g_icf) # this is a shortened alias } diff --git a/man/combineInteractionChangedGenes.Rd b/man/combineInteractionChangedGenes.Rd index 49014ddbf..061a625c4 100644 --- a/man/combineInteractionChangedGenes.Rd +++ b/man/combineInteractionChangedGenes.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/spatial_interaction.R \name{combineInteractionChangedGenes} \alias{combineInteractionChangedGenes} -\title{combineInteractionChangedGenes} +\title{deprecated} \usage{ combineInteractionChangedGenes(...) } diff --git a/man/compareCellAbundance.Rd b/man/compareCellAbundance.Rd index 992341757..0c96076de 100644 --- a/man/compareCellAbundance.Rd +++ b/man/compareCellAbundance.Rd @@ -35,12 +35,15 @@ Compare cell types percent per polygon \examples{ ## Plot interactive polygons g <- GiottoData::loadGiottoMini("visium") -my_polygon_coords <- data.frame(poly_ID = rep("polygon1", 3), -sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202)) +my_polygon_coords <- data.frame( + poly_ID = rep("polygon1", 3), + sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202) +) ## Add polygon coordinates to Giotto object -my_giotto_polygons <- createGiottoPolygonsFromDfr(my_polygon_coords, -name = "selections") +my_giotto_polygons <- createGiottoPolygonsFromDfr(my_polygon_coords, + name = "selections" +) g <- addGiottoPolygons( gobject = g, gpolygons = list(my_giotto_polygons) diff --git a/man/comparePolygonExpression.Rd b/man/comparePolygonExpression.Rd index cff74a382..238091577 100644 --- a/man/comparePolygonExpression.Rd +++ b/man/comparePolygonExpression.Rd @@ -26,10 +26,10 @@ comparePolygonExpression( \item{selected_feats}{vector of selected features to plot} -\item{expression_values}{gene expression values to use +\item{expression_values}{gene expression values to use ("normalized", "scaled", "custom")} -\item{method}{method to use to detect differentially expressed feats +\item{method}{method to use to detect differentially expressed feats ("scran", "gini", "mast")} \item{\dots}{Arguments passed to \link[ComplexHeatmap]{Heatmap}} @@ -43,12 +43,15 @@ Compare gene expression between polygon areas \examples{ ## Plot interactive polygons g <- GiottoData::loadGiottoMini("visium") -my_polygon_coords <- data.frame(poly_ID = rep("polygon1", 3), -sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202)) +my_polygon_coords <- data.frame( + poly_ID = rep("polygon1", 3), + sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202) +) ## Add polygon coordinates to Giotto object -my_giotto_polygons <- createGiottoPolygonsFromDfr(my_polygon_coords, -name = "selections") +my_giotto_polygons <- createGiottoPolygonsFromDfr(my_polygon_coords, + name = "selections" +) g <- addGiottoPolygons( gobject = g, gpolygons = list(my_giotto_polygons) diff --git a/man/convertEnsemblToGeneSymbol.Rd b/man/convertEnsemblToGeneSymbol.Rd index 88d106181..ab4148656 100644 --- a/man/convertEnsemblToGeneSymbol.Rd +++ b/man/convertEnsemblToGeneSymbol.Rd @@ -15,7 +15,7 @@ convertEnsemblToGeneSymbol(matrix, species = c("mouse", "human")) expression matrix with gene symbols as rownames } \description{ -This function convert ensembl gene IDs from a matrix to +This function convert ensembl gene IDs from a matrix to official gene symbols } \details{ diff --git a/man/createArchRProj.Rd b/man/createArchRProj.Rd index b43e48c6f..dd5971525 100644 --- a/man/createArchRProj.Rd +++ b/man/createArchRProj.Rd @@ -25,19 +25,19 @@ These files can be in one of the following formats: (i) scATAC tabix files, \item{genome}{A string indicating the default genome to be used for all ArchR functions. Currently supported values include "hg19","hg38","mm9", and "mm10". -This value is stored as a global environment variable, not part of the +This value is stored as a global environment variable, not part of the ArchRProject. This can be overwritten on a per-function basis using the given function's geneAnnotationand genomeAnnotation parameter. For something other than one of -the currently supported, see createGeneAnnnotation() and +the currently supported, see createGeneAnnnotation() and createGenomeAnnnotation()} -\item{createArrowFiles_params}{list of parameters passed to +\item{createArrowFiles_params}{list of parameters passed to `ArchR::createArrowFiles`} \item{ArchRProject_params}{list of parameters passed to `ArchR::ArchRProject`} -\item{addIterativeLSI_params}{list of parameters passed to +\item{addIterativeLSI_params}{list of parameters passed to `ArchR::addIterativeLSI`} \item{threads}{number of threads to use. Default = `ArchR::getArchRThreads()`} @@ -47,7 +47,7 @@ createGenomeAnnnotation()} \item{verbose}{Default = TRUE} } \value{ -An ArchR project with GeneScoreMatrix, TileMatrix, and +An ArchR project with GeneScoreMatrix, TileMatrix, and TileMatrix-based LSI } \description{ diff --git a/man/createCrossSection.Rd b/man/createCrossSection.Rd index fea26d186..76df8790e 100644 --- a/man/createCrossSection.Rd +++ b/man/createCrossSection.Rd @@ -6,6 +6,7 @@ \usage{ createCrossSection( gobject, + spat_unit = NULL, spat_loc_name = "raw", name = "cross_section", spatial_network_name = "Delaunay_network", @@ -23,70 +24,75 @@ createCrossSection( planeVector1 = NULL, planeVector2 = NULL, mesh_grid_n = 20, - return_gobject = TRUE + return_gobject = TRUE, + verbose = NULL ) } \arguments{ \item{gobject}{giotto object} +\item{spat_unit}{spatial unit} + \item{spat_loc_name}{name of spatial locations} \item{name}{name of cress section object. (default = cross_sectino)} -\item{spatial_network_name}{name of spatial network object. +\item{spatial_network_name}{name of spatial network object. (default = Delaunay_network)} -\item{thickness_unit}{unit of the virtual section thickness. If "cell", -average size of the observed cells is used as length unit. If "natural", -the unit of cell location coordinates is used.(default = cell)} +\item{thickness_unit}{unit of the virtual section thickness. If "cell", +average size of the observed cells is used as length unit. If "natural", +the unit of cell location coordinates is used. (default = cell)} \item{slice_thickness}{thickness of slice. default = 2} -\item{cell_distance_estimate_method}{method to estimate average distance +\item{cell_distance_estimate_method}{method to estimate average distance between neighobring cells. (default = mean)} -\item{extend_ratio}{deciding the span of the cross section meshgrid, as a -ratio of extension compared to the borders of the vitural tissue section. +\item{extend_ratio}{deciding the span of the cross section meshgrid, as a +ratio of extension compared to the borders of the vitural tissue section. (default = 0.2)} \item{method}{method to define the cross section plane. -If equation, the plane is defined by a four element numerical vector -(equation) in the form of c(A,B,C,D), corresponding to a plane with +If equation, the plane is defined by a four element numerical vector +(equation) in the form of c(A,B,C,D), corresponding to a plane with equation Ax+By+Cz=D. -If 3 points, the plane is define by the coordinates of 3 points, as given by +If 3 points, the plane is define by the coordinates of 3 points, as given by point1, point2, and point3. -If point and norm vector, the plane is defined by the coordinates of one -point (point1) in the plane and the coordinates of one norm vector +If point and norm vector, the plane is defined by the coordinates of one +point (point1) in the plane and the coordinates of one norm vector (normVector) to the plane. -If point and two plane vector, the plane is defined by the coordinates of -one point (point1) in the plane and the coordinates of two vectors +If point and two plane vector, the plane is defined by the coordinates of +one point (point1) in the plane and the coordinates of two vectors (planeVector1, planeVector2) in the plane. (default = equation)} -\item{equation}{equation required by method "equation".equations needs to be -a numerical vector of length 4, in the form of c(A,B,C,D), which defines +\item{equation}{equation required by method "equation".equations needs to be +a numerical vector of length 4, in the form of c(A,B,C,D), which defines plane Ax+By+Cz=D.} -\item{point1}{coordinates of the first point required by method +\item{point1}{coordinates of the first point required by method "3 points","point and norm vector", and "point and two plane vectors".} \item{point2}{coordinates of the second point required by method "3 points"} \item{point3}{coordinates of the third point required by method "3 points"} -\item{normVector}{coordinates of the norm vector required by method +\item{normVector}{coordinates of the norm vector required by method "point and norm vector"} -\item{planeVector1}{coordinates of the first plane vector required by +\item{planeVector1}{coordinates of the first plane vector required by method "point and two plane vectors"} -\item{planeVector2}{coordinates of the second plane vector required by +\item{planeVector2}{coordinates of the second plane vector required by method "point and two plane vectors"} -\item{mesh_grid_n}{numer of meshgrid lines to generate along both directions +\item{mesh_grid_n}{numer of meshgrid lines to generate along both directions for the cross section plane.} \item{return_gobject}{boolean: return giotto object (default = TRUE)} + +\item{verbose}{be verbose} } \value{ giotto object with updated spatial network slot @@ -95,12 +101,21 @@ giotto object with updated spatial network slot Create a virtual 2D cross section. } \details{ -Creates a virtual 2D cross section object for a given spatial -network object. The users need to provide the definition of the cross +Creates a virtual 2D cross section object for a given spatial +network object. The users need to provide the definition of the cross section plane (see method). } \examples{ -g <- GiottoData::loadGiottoMini("visium") +g <- GiottoData::loadGiottoMini("starmap") + +g <- createCrossSection( + gobject = g, + method = "equation", + equation = c(0, 1, 0, 600), + extend_ratio = 0.6, + name = "new_cs", + return_gobject = TRUE +) -createCrossSection(gobject = g, spatial_network_name = "spatial_network") +crossSectionPlot(g, name = "new_cs") } diff --git a/man/createGiottoCosMxObject.Rd b/man/createGiottoCosMxObject.Rd index 5343dde5a..2d0a13235 100644 --- a/man/createGiottoCosMxObject.Rd +++ b/man/createGiottoCosMxObject.Rd @@ -20,11 +20,11 @@ createGiottoCosMxObject( \item{cosmx_dir}{full path to the exported cosmx directory} \item{data_to_use}{which type(s) of expression data to build the gobject with -Default is \code{'all'} information available. \code{'subcellular'} loads -the transcript coordinates only. \code{'aggregate'} loads the provided +Default is \code{'all'} information available. \code{'subcellular'} loads +the transcript coordinates only. \code{'aggregate'} loads the provided aggregated expression matrix.} -\item{remove_background_polygon}{try to remove background polygon +\item{remove_background_polygon}{try to remove background polygon (default: FALSE)} \item{background_algo}{algorithm to remove background polygon} @@ -49,9 +49,9 @@ Given the path to a CosMx experiment directory, creates a Giotto object. } \details{ -[\strong{Expected Directory}] This function generates a giotto object when -given a link to a cosmx output directory. It expects the following items -within the directory where the \strong{bolded} portions are what this +[\strong{Expected Directory}] This function generates a giotto object when +given a link to a cosmx output directory. It expects the following items +within the directory where the \strong{bolded} portions are what this function matches against: \itemize{ \item{\strong{CellComposite} (folder of images)} @@ -66,23 +66,23 @@ function matches against: [\strong{Workflows}] Workflow to use is accessed through the data_to_use param \itemize{ - \item{'all' - loads and requires subcellular information from tx_file and + \item{'all' - loads and requires subcellular information from tx_file and fov_positions_file - and also the existing aggregated information + and also the existing aggregated information (expression, spatial locations, and metadata) from exprMat_file and metadata_file.} - \item{'subcellular' - loads and requires subcellular information from + \item{'subcellular' - loads and requires subcellular information from tx_file and fov_positions_file only.} - \item{'aggregate' - loads and requires the existing aggregate information - (expression, spatial locations, and metadata) from exprMat_file and + \item{'aggregate' - loads and requires the existing aggregate information + (expression, spatial locations, and metadata) from exprMat_file and metadata_file.} } -[\strong{Images}] Images in the default CellComposite, CellLabels, +[\strong{Images}] Images in the default CellComposite, CellLabels, CompartmentLabels, and CellOverlay -folders will be loaded as giotto largeImage objects in all workflows as -long as they are available. Additionally, CellComposite images will be +folders will be loaded as giotto largeImage objects in all workflows as +long as they are available. Additionally, CellComposite images will be converted to giotto image objects, making plotting with these image objects more responsive when accessing them from a server. \code{\link{showGiottoImageNames}} can be used to see the available images. diff --git a/man/createGiottoMerscopeObject.Rd b/man/createGiottoMerscopeObject.Rd index d93a7caa5..23722c1de 100644 --- a/man/createGiottoMerscopeObject.Rd +++ b/man/createGiottoMerscopeObject.Rd @@ -37,10 +37,10 @@ createGiottoMerscopeObject( \arguments{ \item{merscope_dir}{full path to the exported merscope directory} -\item{data_to_use}{which of either the 'subcellular' or 'aggregate' +\item{data_to_use}{which of either the 'subcellular' or 'aggregate' information to use for object creation} -\item{FOVs}{which FOVs to use when building the subcellular object. +\item{FOVs}{which FOVs to use when building the subcellular object. (default is NULL) NULL loads all FOVs (very slow)} @@ -66,13 +66,13 @@ provided} a giotto object } \description{ -Given the path to a MERSCOPE experiment directory, creates a +Given the path to a MERSCOPE experiment directory, creates a Giotto object. } \details{ -[\strong{Expected Directory}] This function generates a giotto object when -given a link to a MERSCOPE output directory. It expects the following items -within the directory where the \strong{bolded} portions are what this +[\strong{Expected Directory}] This function generates a giotto object when +given a link to a MERSCOPE output directory. It expects the following items +within the directory where the \strong{bolded} portions are what this function matches against: \itemize{ \item{\strong{cell_boundaries} (folder .hdf5 files)} @@ -84,10 +84,10 @@ function matches against: } \section{Functions}{ \itemize{ -\item \code{.createGiottoMerscopeObject_subcellular()}: Create giotto object with +\item \code{.createGiottoMerscopeObject_subcellular()}: Create giotto object with 'subcellular' workflow -\item \code{.createGiottoMerscopeObject_aggregate()}: Create giotto object with 'aggregate' +\item \code{.createGiottoMerscopeObject_aggregate()}: Create giotto object with 'aggregate' workflow }} diff --git a/man/createGiottoObjectfromArchR.Rd b/man/createGiottoObjectfromArchR.Rd index 35c8db106..1b7748a2b 100644 --- a/man/createGiottoObjectfromArchR.Rd +++ b/man/createGiottoObjectfromArchR.Rd @@ -20,10 +20,10 @@ createGiottoObjectfromArchR( \item{expression_feat}{Giotto object available features (e.g. atac, rna, ...)} -\item{spatial_locs}{data.table or data.frame with coordinates for cell +\item{spatial_locs}{data.table or data.frame with coordinates for cell centroids} -\item{sampleNames}{A character vector containing the ArchR project sample +\item{sampleNames}{A character vector containing the ArchR project sample name} \item{...}{additional arguments passed to `createGiottoObject`} diff --git a/man/createGiottoVisiumObject.Rd b/man/createGiottoVisiumObject.Rd index 6c7c17fae..3229754b9 100644 --- a/man/createGiottoVisiumObject.Rd +++ b/man/createGiottoVisiumObject.Rd @@ -39,7 +39,7 @@ createGiottoVisiumObject( \item{h5_tissue_positions_path}{path to tissue locations (.csv file)} -\item{h5_image_png_path}{path to tissue .png file (optional). Image +\item{h5_image_png_path}{path to tissue .png file (optional). Image autoscaling looks for matches in the filename for either 'hires' or 'lowres'} \item{h5_json_scalefactors_path}{path to .json scalefactors (optional)} @@ -56,15 +56,15 @@ autoscaling looks for matches in the filename for either 'hires' or 'lowres'} \item{ymin_adj}{deprecated} -\item{instructions}{list of instructions or output result from +\item{instructions}{list of instructions or output result from \code{\link[GiottoClass]{createGiottoInstructions}}} -\item{expression_matrix_class}{class of expression matrix to use +\item{expression_matrix_class}{class of expression matrix to use (e.g. 'dgCMatrix', 'DelayedArray')} \item{h5_file}{optional path to create an on-disk h5 file} -\item{cores}{how many cores or threads to use to read data if paths are +\item{cores}{how many cores or threads to use to read data if paths are provided} \item{verbose}{be verbose} @@ -73,7 +73,7 @@ provided} giotto object } \description{ -Create Giotto object directly from a 10X visium folder. Also +Create Giotto object directly from a 10X visium folder. Also accepts visium H5 outputs. } \details{ diff --git a/man/createGiottoXeniumObject.Rd b/man/createGiottoXeniumObject.Rd index 0fddd0694..e738694d6 100644 --- a/man/createGiottoXeniumObject.Rd +++ b/man/createGiottoXeniumObject.Rd @@ -28,24 +28,24 @@ createGiottoXeniumObject( \item{load_format}{files formats from which to load the data. Either `csv` or `parquet` currently supported.} -\item{h5_expression}{(boolean) whether to load cell_feature_matrix from .h5 +\item{h5_expression}{(boolean) whether to load cell_feature_matrix from .h5 file. Default is \code{TRUE}} \item{h5_gene_ids}{use gene symbols (default) or ensembl ids for the .h5 gene expression matrix} -\item{gene_column_index}{which column from the features or genes .tsv file +\item{gene_column_index}{which column from the features or genes .tsv file to use for row ids} -\item{bounds_to_load}{vector of boundary information to load +\item{bounds_to_load}{vector of boundary information to load (e.g. \code{'cell'} or \code{'nucleus'} by themselves or \code{c('cell', 'nucleus')} to load both at the same time.)} -\item{qv_threshold}{Minimum Phred-scaled quality score cutoff to be included +\item{qv_threshold}{Minimum Phred-scaled quality score cutoff to be included as a subcellular transcript detection (default = 20)} -\item{key_list}{(advanced) list of grep-based keywords to split the +\item{key_list}{(advanced) list of grep-based keywords to split the subcellular feature detections by feature type. See details} \item{instructions}{list of instructions or output result @@ -60,7 +60,7 @@ provided} giotto object } \description{ -Given the path to a Xenium experiment output folder, creates a +Given the path to a Xenium experiment output folder, creates a Giotto object } \details{ @@ -68,20 +68,20 @@ Giotto object Xenium provides info on feature detections that include more than only the Gene Expression specific probes. Additional probes for QC are included: \emph{blank codeword}, \emph{negative control codeword}, and -\emph{negative control probe}. These additional QC probes each occupy and -are treated as their own feature types so that they can largely remain +\emph{negative control probe}. These additional QC probes each occupy and +are treated as their own feature types so that they can largely remain independent of the gene expression information. [\strong{key_list}] Related to \code{data_to_use = 'subcellular'} workflow only: -Additional QC probe information is in the subcellular feature detections -information and must be separated from the gene expression information +Additional QC probe information is in the subcellular feature detections +information and must be separated from the gene expression information during processing. -The QC probes have prefixes that allow them to be selected from the rest of +The QC probes have prefixes that allow them to be selected from the rest of the feature IDs. -Giotto uses a named list of keywords (\code{key_list}) to select these QC -probes, with the list names being the names that will be assigned as the -feature type of these feature detections. The default list is used when +Giotto uses a named list of keywords (\code{key_list}) to select these QC +probes, with the list names being the names that will be assigned as the +feature type of these feature detections. The default list is used when \code{key_list} = NULL. Default list: diff --git a/man/createSpatialGenomicsObject.Rd b/man/createSpatialGenomicsObject.Rd index 1571bcf4b..98650b81f 100644 --- a/man/createSpatialGenomicsObject.Rd +++ b/man/createSpatialGenomicsObject.Rd @@ -9,7 +9,7 @@ createSpatialGenomicsObject(sg_dir = NULL, instructions = NULL) \arguments{ \item{sg_dir}{full path to the exported Spatial Genomics directory} -\item{instructions}{new instructions +\item{instructions}{new instructions (e.g. result from createGiottoInstructions)} } \value{ diff --git a/man/create_crossSection_object.Rd b/man/create_crossSection_object.Rd index dd6bbe9d5..495f0657e 100644 --- a/man/create_crossSection_object.Rd +++ b/man/create_crossSection_object.Rd @@ -26,38 +26,38 @@ create_crossSection_object( \item{method}{method to define the cross section plane.} -\item{thickness_unit}{unit of the virtual section thickness. If "cell", -average size of the observed cells is used as length unit. If "natural", +\item{thickness_unit}{unit of the virtual section thickness. If "cell", +average size of the observed cells is used as length unit. If "natural", the unit of cell location coordinates is used.(default = cell)} \item{slice_thickness}{thickness of slice} -\item{cell_distance_estimate_method}{method to estimate average distance +\item{cell_distance_estimate_method}{method to estimate average distance between neighboring cells. (default = mean)} -\item{extend_ratio}{deciding the span of the cross section meshgrid, as a -ratio of extension compared to the borders of the virtual tissue section. +\item{extend_ratio}{deciding the span of the cross section meshgrid, as a +ratio of extension compared to the borders of the virtual tissue section. (default = 0.2)} -\item{plane_equation}{a numerical vector of length 4, in the form of +\item{plane_equation}{a numerical vector of length 4, in the form of c(A,B,C,D), which defines plane Ax+By+Cz=D.} -\item{mesh_grid_n}{number of meshgrid lines to generate along both +\item{mesh_grid_n}{number of meshgrid lines to generate along both directions for the cross section plane.} \item{mesh_obj}{object that stores the cross section meshgrid information.} \item{cell_subset}{cells selected by the cross section} -\item{cell_subset_spatial_locations}{locations of cells selected by the +\item{cell_subset_spatial_locations}{locations of cells selected by the cross section} -\item{cell_subset_projection_locations}{3D projection coordinates of +\item{cell_subset_projection_locations}{3D projection coordinates of selected cells onto the cross section plane} \item{cell_subset_projection_PCA}{pca of projection coordinates} -\item{cell_subset_projection_coords}{2D PCA coordinates of selected cells +\item{cell_subset_projection_coords}{2D PCA coordinates of selected cells in the cross section plane} } \value{ diff --git a/man/crossSectionGenePlot.Rd b/man/crossSectionFeatPlot.Rd similarity index 74% rename from man/crossSectionGenePlot.Rd rename to man/crossSectionFeatPlot.Rd index 1352bab53..d8e939185 100644 --- a/man/crossSectionGenePlot.Rd +++ b/man/crossSectionFeatPlot.Rd @@ -1,11 +1,13 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cross_section.R -\name{crossSectionGenePlot} -\alias{crossSectionGenePlot} -\title{crossSectionGenePlot} +\name{crossSectionFeatPlot} +\alias{crossSectionFeatPlot} +\title{crossSectionFeatPlot} \usage{ -crossSectionGenePlot( +crossSectionFeatPlot( gobject = NULL, + spat_unit = NULL, + feat_type = NULL, spat_loc_name = "raw", crossSection_obj = NULL, name = NULL, @@ -17,6 +19,10 @@ crossSectionGenePlot( \arguments{ \item{gobject}{giotto object} +\item{spat_unit}{spatial unit} + +\item{feat_type}{feature type} + \item{spat_loc_name}{name of spatial locations} \item{crossSection_obj}{crossSection object} @@ -34,8 +40,8 @@ change save_name in save_param} ggplot } \description{ -Visualize cells and gene expression in a virtual cross section -according to spatial coordinates +Visualize cells and feature expression in a virtual cross +section according to spatial coordinates } \details{ Description of parameters. diff --git a/man/crossSectionGenePlot3D.Rd b/man/crossSectionFeatPlot3D.Rd similarity index 67% rename from man/crossSectionGenePlot3D.Rd rename to man/crossSectionFeatPlot3D.Rd index 6c613f91e..8d6339194 100644 --- a/man/crossSectionGenePlot3D.Rd +++ b/man/crossSectionFeatPlot3D.Rd @@ -1,14 +1,17 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cross_section.R -\name{crossSectionGenePlot3D} -\alias{crossSectionGenePlot3D} -\title{crossSectionGenePlot3D} +\name{crossSectionFeatPlot3D} +\alias{crossSectionFeatPlot3D} +\title{crossSectionFeatPlot3D} \usage{ -crossSectionGenePlot3D( +crossSectionFeatPlot3D( gobject, + spat_unit = NULL, + feat_type = NULL, crossSection_obj = NULL, name = NULL, spatial_network_name = "Delaunay_network", + show_other_cells = TRUE, other_cell_color = alpha("lightgrey", 0), default_save_name = "crossSectionGenePlot3D", ... @@ -17,16 +20,22 @@ crossSectionGenePlot3D( \arguments{ \item{gobject}{giotto object} +\item{spat_unit}{spatial unit} + +\item{feat_type}{feature type} + \item{crossSection_obj}{cross section object as alternative input. default = NULL.} \item{name}{name of virtual cross section to use} \item{spatial_network_name}{name of spatial network to use} -\item{other_cell_color}{color of cells outside the cross section. +\item{show_other_cells}{logical. Default = TRUE} + +\item{other_cell_color}{color of cells outside the cross section. default = transparent.} -\item{default_save_name}{default save name for saving, don't change, change +\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} \item{...}{parameters for spatGenePlot3D} @@ -35,8 +44,8 @@ save_name in save_param} ggplot } \description{ -Visualize cells and gene expression in a virtual cross section -according to spatial coordinates +Visualize cells and feature expression in a virtual cross +section according to spatial coordinates } \details{ Description of parameters. diff --git a/man/crossSectionPlot.Rd b/man/crossSectionPlot.Rd index ac4062e83..f7059e680 100644 --- a/man/crossSectionPlot.Rd +++ b/man/crossSectionPlot.Rd @@ -6,8 +6,9 @@ \usage{ crossSectionPlot( gobject, - spat_loc_name = "raw", + spat_unit = NULL, feat_type = NULL, + spat_loc_name = "raw", crossSection_obj = NULL, name = NULL, spatial_network_name = "Delaunay_network", @@ -18,18 +19,20 @@ crossSectionPlot( \arguments{ \item{gobject}{giotto object} -\item{spat_loc_name}{name of spatial locations} +\item{spat_unit}{spatial unit} \item{feat_type}{feature type} -\item{crossSection_obj}{cross section object as alternative input. +\item{spat_loc_name}{name of spatial locations} + +\item{crossSection_obj}{cross section object as alternative input. default = NULL.} \item{name}{name of virtual cross section to use} \item{spatial_network_name}{name of spatial network to use} -\item{default_save_name}{default save name for saving, don't change, +\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} \item{...}{parameters for spatPlot2D} @@ -38,7 +41,7 @@ change save_name in save_param} ggplot } \description{ -Visualize cells in a virtual cross section according to +Visualize cells in a virtual cross section according to spatial coordinates } \details{ diff --git a/man/crossSectionPlot3D.Rd b/man/crossSectionPlot3D.Rd index efb03fad5..04be6daf5 100644 --- a/man/crossSectionPlot3D.Rd +++ b/man/crossSectionPlot3D.Rd @@ -6,6 +6,8 @@ \usage{ crossSectionPlot3D( gobject, + spat_unit = NULL, + feat_type = NULL, crossSection_obj = NULL, name = NULL, spatial_network_name = "Delaunay_network", @@ -18,7 +20,11 @@ crossSectionPlot3D( \arguments{ \item{gobject}{giotto object} -\item{crossSection_obj}{cross section object as alternative input. +\item{spat_unit}{spatial unit} + +\item{feat_type}{feature type} + +\item{crossSection_obj}{cross section object as alternative input. default = NULL.} \item{name}{name of virtual cross section to use} @@ -27,10 +33,10 @@ default = NULL.} \item{show_other_cells}{display not selected cells} -\item{other_cell_color}{color of cells outside the cross section. +\item{other_cell_color}{color of cells outside the cross section. default = transparent.} -\item{default_save_name}{default save name for saving, don't change, +\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} \item{...}{parameters for spatPlot3D} @@ -39,7 +45,7 @@ change save_name in save_param} ggplot } \description{ -Visualize cells in a virtual cross section according to spatial +Visualize cells in a virtual cross section according to spatial coordinates } \details{ diff --git a/man/detectSpatialCorFeats.Rd b/man/detectSpatialCorFeats.Rd index 4cba6edaf..42b69eb39 100644 --- a/man/detectSpatialCorFeats.Rd +++ b/man/detectSpatialCorFeats.Rd @@ -2,7 +2,8 @@ % Please edit documentation in R/spatial_genes.R \name{detectSpatialCorFeats} \alias{detectSpatialCorFeats} -\title{detectSpatialCorFeats} +\alias{detectSpatialCorFeatsMatrix} +\title{Detect spatially correlated features} \usage{ detectSpatialCorFeats( gobject, @@ -18,6 +19,18 @@ detectSpatialCorFeats( min_cells_per_grid = 4, cor_method = c("pearson", "kendall", "spearman") ) + +detectSpatialCorFeatsMatrix( + expression_matrix, + method = c("grid", "network"), + spatial_network, + spatial_grid, + spatial_locs, + subset_feats = NULL, + network_smoothing = NULL, + min_cells_per_grid = 4, + cor_method = c("pearson", "kendall", "spearman") +) } \arguments{ \item{gobject}{giotto object} @@ -32,44 +45,70 @@ detectSpatialCorFeats( \item{expression_values}{gene expression values to use} -\item{subset_feats}{subset of feats to use} +\item{subset_feats}{subset of features to use} \item{spatial_network_name}{name of spatial network to use} -\item{network_smoothing}{smoothing factor beteen 0 and 1 -(default: automatic)} +\item{network_smoothing}{smoothing factor between 0 and 1 +(has automatic default, see details)} \item{spatial_grid_name}{name of spatial grid to use} \item{min_cells_per_grid}{minimum number of cells to consider a grid} \item{cor_method}{correlation method} + +\item{expression_matrix}{provided expression matrix} + +\item{spatial_network}{provided spatial network} + +\item{spatial_grid}{provided spatial grid} + +\item{spatial_locs}{provided spatial locations} } \value{ -returns a spatial correlation object: "spatCorObject" +returns a spatial correlation object: \code{spatCorObject} } \description{ -Detect features that are spatially correlated +Detect features that are spatially correlated. Functions for +starting from either a gobject (\code{detectSpatialCorFeats()}) or individual +pieces of data (\code{detectSpatialCorFeatsMatrix()}) are provided. } \details{ -For method = network, it expects a fully connected spatial network. You -can make sure to create a +For \code{method = network}, it expects a fully connected spatial network. +You can make sure to create a fully connected network by setting minimal_k > 0 in the - \code{\link{createSpatialNetwork}} function. -\itemize{ - \item{1. grid-averaging: }{average gene expression values within a predefined spatial grid} - \item{2. network-averaging: }{smoothens the gene expression matrix by averaging the expression within one cell - by using the neighbours within the predefined spatial network. b is a smoothening factor - that defaults to 1 - 1/k, where k is the median number of k-neighbors in the - selected spatial network. Setting b = 0 means no smoothing and b = 1 means no contribution - from its own expression.} +\code{\link{createSpatialNetwork}} function. +\enumerate{ +\item \strong{grid-averaging:} average gene expression values within a predefined +spatial grid +\item \strong{network-averaging:} smoothens the gene expression matrix by +averaging the expression within one cell by using the neighbours within +the predefined spatial network. \eqn{b} is a smoothening factor passed by +\code{network_smoothing} param that defaults to \eqn{1 - 1/k}, where \eqn{k} +is the median number of k-neighbors in the selected spatial network. +Setting \eqn{b = 0} means no smoothing and \eqn{b = 1} means no +contribution from its own expression. } -The spatCorObject can be further explored with showSpatialCorFeats() + +The \code{spatCorObject} can be further explored with \code{showSpatialCorFeats()} } \examples{ g <- GiottoData::loadGiottoMini("visium") +# Perform with data in a gobject detectSpatialCorFeats(g, method = "network") + +# This analysis can also be performed with data outside of the gobject +detectSpatialCorFeatsMatrix( + expression_matrix = getExpression( + g, + output = "matrix" + ), + method = "network", + spatial_network = getSpatialNetwork(g, output = "networkDT") +) + } \seealso{ \code{\link{showSpatialCorFeats}} diff --git a/man/detectSpatialCorFeatsMatrix.Rd b/man/detectSpatialCorFeatsMatrix.Rd deleted file mode 100644 index 515557f80..000000000 --- a/man/detectSpatialCorFeatsMatrix.Rd +++ /dev/null @@ -1,73 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_genes.R -\name{detectSpatialCorFeatsMatrix} -\alias{detectSpatialCorFeatsMatrix} -\title{detectSpatialCorFeatsMatrix} -\usage{ -detectSpatialCorFeatsMatrix( - expression_matrix, - method = c("grid", "network"), - spatial_network, - spatial_grid, - spatial_locs, - subset_feats = NULL, - network_smoothing = NULL, - min_cells_per_grid = 4, - cor_method = c("pearson", "kendall", "spearman") -) -} -\arguments{ -\item{expression_matrix}{provided expression matrix} - -\item{method}{method to use for spatial averaging} - -\item{spatial_network}{provided spatial network} - -\item{spatial_grid}{provided spatial grid} - -\item{spatial_locs}{provided spatial locations} - -\item{subset_feats}{subset of features to use} - -\item{network_smoothing}{smoothing factor beteen 0 and 1 -(has automatic default, see details)} - -\item{min_cells_per_grid}{minimum number of cells to consider a grid} - -\item{cor_method}{correlation method} -} -\value{ -returns a spatial correlation object: \code{spatCorObject} -} -\description{ -Detect genes that are spatially correlated -} -\details{ -For \code{method = network}, it expects a fully connected spatial network. -You can make sure to create a -fully connected network by setting minimal_k > 0 in the -\code{\link{createSpatialNetwork}} function. -\enumerate{ -\item \strong{grid-averaging:} average gene expression values within a predefined -spatial grid -\item \strong{network-averaging:} smoothens the gene expression matrix by -averaging the expression within one cell by using the neighbours within -the predefined spatial network. \eqn{b} is a smoothening factor passed by -\code{network_smoothing} param that defaults to \eqn{1 - 1/k}, where \eqn{k} -is the median number of k-neighbors in the selected spatial network. -Setting \eqn{b = 0} means no smoothing and \eqn{b = 1} means no -contribution from its own expression. -} - -The \code{spatCorObject} can be further explored with \code{showSpatialCorGenes()} -} -\examples{ -g <- GiottoData::loadGiottoMini("visium") - -detectSpatialCorFeatsMatrix(expression_matrix = getExpression( -g, output = "matrix"), method = "network", -spatial_network = getSpatialNetwork(g, output = "networkDT")) -} -\seealso{ -\code{\link{showSpatialCorFeats}} -} diff --git a/man/detectSpatialCorGenes.Rd b/man/detectSpatialCorGenes.Rd deleted file mode 100644 index 6c5a2ce6b..000000000 --- a/man/detectSpatialCorGenes.Rd +++ /dev/null @@ -1,74 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_genes.R -\name{detectSpatialCorGenes} -\alias{detectSpatialCorGenes} -\title{detectSpatialCorGenes} -\usage{ -detectSpatialCorGenes( - gobject, - feat_type = NULL, - spat_unit = NULL, - method = c("grid", "network"), - expression_values = c("normalized", "scaled", "custom"), - subset_feats = NULL, - subset_genes = NULL, - spatial_network_name = "Delaunay_network", - network_smoothing = NULL, - spatial_grid_name = "spatial_grid", - min_cells_per_grid = 4, - cor_method = c("pearson", "kendall", "spearman") -) -} -\arguments{ -\item{gobject}{giotto object} - -\item{feat_type}{feature type} - -\item{spat_unit}{spatial unit} - -\item{method}{method to use for spatial averaging} - -\item{expression_values}{gene expression values to use} - -\item{subset_feats}{subset of feats to use} - -\item{subset_genes}{deprecated, use \code{subset_feats}} - -\item{spatial_network_name}{name of spatial network to use} - -\item{network_smoothing}{smoothing factor beteen 0 and 1 -(default: automatic)} - -\item{spatial_grid_name}{name of spatial grid to use} - -\item{min_cells_per_grid}{minimum number of cells to consider a grid} - -\item{cor_method}{correlation method} -} -\value{ -returns a spatial correlation object: "spatCorObject" -} -\description{ -Detect genes that are spatially correlated -} -\details{ -For method = network, it expects a fully connected spatial network. You -can make sure to create a -fully connected network by setting minimal_k > 0 in the -\code{\link{createSpatialNetwork}} function. -\itemize{ - \item{1. grid-averaging: }{average gene expression values within a - predefined spatial grid} - \item{2. network-averaging: }{smoothens the gene expression matrix by - averaging the expression within one cell - by using the neighbours within the predefined spatial network. b is a - smoothening factor that defaults to 1 - 1/k, where k is the median - number of k-neighbors in the selected spatial network. Setting b = 0 - means no smoothing and b = 1 means no contribution - from its own expression.} -} -The spatCorObject can be further explored with showSpatialCorGenes() -} -\seealso{ -\code{\link{showSpatialCorGenes}} -} diff --git a/man/detectSpatialPatterns.Rd b/man/detectSpatialPatterns.Rd index f45d0592c..0547b4b78 100644 --- a/man/detectSpatialPatterns.Rd +++ b/man/detectSpatialPatterns.Rd @@ -43,8 +43,9 @@ in a spatial grid. \details{ Steps to identify spatial patterns: \itemize{ - \item{1. average gene expression for cells within a grid, see createSpatialGrid} - \item{2. perform PCA on the average grid expression profiles} - \item{3. convert variance of principlal components (PCs) to z-scores and select PCs based on a z-score threshold} + * 1. average gene expression for cells within a grid, see createSpatialGrid + * 2. perform PCA on the average grid expression profiles + * 3. convert variance of principal components (PCs) to z-scores and + select PCs based on a z-score threshold } } diff --git a/man/doClusterProjection.Rd b/man/doClusterProjection.Rd index 6ccea3858..7bc3dd27b 100644 --- a/man/doClusterProjection.Rd +++ b/man/doClusterProjection.Rd @@ -78,7 +78,9 @@ Giotto object. \examples{ g <- GiottoData::loadGiottoMini("visium") x <- pDataDT(g) -g_small <- subsetGiotto(g, cell_ids = sample(x$cell_ID,300)) -doClusterProjection(target_gobject = g, source_gobject = g_small, -source_cluster_labels = "leiden_clus") +g_small <- subsetGiotto(g, cell_ids = sample(x$cell_ID, 300)) +doClusterProjection( + target_gobject = g, source_gobject = g_small, + source_cluster_labels = "leiden_clus" +) } diff --git a/man/doFeatureSetEnrichment.Rd b/man/doFeatureSetEnrichment.Rd index 28387127f..359b7d3e5 100644 --- a/man/doFeatureSetEnrichment.Rd +++ b/man/doFeatureSetEnrichment.Rd @@ -28,39 +28,39 @@ doFeatureSetEnrichment( \item{path_to_GSEA}{path to GSEA command line executable, e.g. gsea-XXX.jar. See details (1.) for more information.} -\item{GSEA_dataset}{path to a Human/Mouse collection from GSEA, e.g. +\item{GSEA_dataset}{path to a Human/Mouse collection from GSEA, e.g. Hallmarks C1. See details (2.) for more information.} -\item{GSEA_ranked_file}{path to .rnk file for GSEA. See details (3.) for +\item{GSEA_ranked_file}{path to .rnk file for GSEA. See details (3.) for more information} -\item{output_folder}{path to which the GSEA results will be saved. Default +\item{output_folder}{path to which the GSEA results will be saved. Default is current working directory.} -\item{name_analysis_folder}{default output subdirectory prefix to which +\item{name_analysis_folder}{default output subdirectory prefix to which results are saved. - Will live within output_folder; equivalent of + Will live within output_folder; equivalent of "Analysis Name" in GSEA Application.} -\item{collapse}{only 'false' is supported. This will use your dataset as-is, +\item{collapse}{only 'false' is supported. This will use your dataset as-is, in the original format.} -\item{mode}{option selected in Advanced Field "Collapsing Mode for +\item{mode}{option selected in Advanced Field "Collapsing Mode for Probe Sets => 1 gene"} \item{norm}{normalization mode; only meandiv is supported.} \item{nperm}{number of permutations, default 1000} -\item{scoring_scheme}{Default "weighted", equivalent of +\item{scoring_scheme}{Default "weighted", equivalent of "enrichment statistic" in GSEA Application} \item{plot_top_x}{Default 20, number of enrichment plots to produce.} -\item{set_max}{default 500, equivalent to "max size; exclude larger sets" +\item{set_max}{default 500, equivalent to "max size; exclude larger sets" in Basic Fields in GSEA Application} -\item{set_min}{default 15, equivalent to "min size; exclude smaller sets" +\item{set_min}{default 15, equivalent to "min size; exclude smaller sets" in Basic Fields in GSEA Application} } \value{ @@ -74,11 +74,11 @@ NECESSARY PREREQUISITES 1. download and install the COMMAND line (all platforms) gsea-XXX.jar https://www.gsea-msigdb.org/gsea/downloads.jsp 1.1. download zip file -1.2. unzip and move to known location +1.2. unzip and move to known location (e.g. in path/to/your/applications/gsea/GSEA_4.3.2) 2. download the Human and Mouse collections -https://www.gsea-msigdb.org/gsea/msigdb/index.jsp or zipped folder +https://www.gsea-msigdb.org/gsea/msigdb/index.jsp or zipped folder https://www.gsea-msigdb.org/gsea/downloads.jsp (all downloaded) 3. create ranked gene lists diff --git a/man/doGiottoClustree.Rd b/man/doGiottoClustree.Rd index cc605fabf..ff99d4c6a 100644 --- a/man/doGiottoClustree.Rd +++ b/man/doGiottoClustree.Rd @@ -68,8 +68,10 @@ will be returned. \examples{ g <- GiottoData::loadGiottoMini("visium") -doGiottoClustree(gobject = g, res_vector = c(0.5, 0.8), return_plot = FALSE, -show_plot = FALSE, save_plot = FALSE) +doGiottoClustree( + gobject = g, res_vector = c(0.5, 0.8), return_plot = FALSE, + show_plot = FALSE, save_plot = FALSE +) } \seealso{ \code{\link{doLeidenCluster}} diff --git a/man/doHMRF.Rd b/man/doHMRF.Rd index 6021d2a19..0e2975699 100644 --- a/man/doHMRF.Rd +++ b/man/doHMRF.Rd @@ -43,7 +43,7 @@ doHMRF( \item{spatial_genes}{spatial genes to use for HMRF} -\item{spatial_dimensions}{select spatial dimensions to use, default is all +\item{spatial_dimensions}{select spatial dimensions to use, default is all possible dimensions} \item{dim_reduction_to_use}{use another dimension reduction set as input} @@ -52,14 +52,14 @@ possible dimensions} \item{dimensions_to_use}{number of dimensions to use as input} -\item{seed}{seed to fix random number generator +\item{seed}{seed to fix random number generator (for creating initialization of HMRF) (-1 if no fixing)} \item{name}{name of HMRF run} \item{k}{number of HMRF domains} -\item{betas}{betas to test for. three numbers: start_beta, beta_increment, +\item{betas}{betas to test for. three numbers: start_beta, beta_increment, num_betas e.g. c(0, 2.0, 50)} \item{tolerance}{tolerance} @@ -75,7 +75,7 @@ num_betas e.g. c(0, 2.0, 50)} \item{overwrite_output}{overwrite output folder} } \value{ -Creates a directory with results that can be viewed with +Creates a directory with results that can be viewed with viewHMRFresults } \description{ @@ -88,6 +88,8 @@ Description of HMRF parameters ... g <- GiottoData::loadGiottoMini("visium") spat_genes <- binSpect(g) -doHMRF(g, spatial_genes = spat_genes[seq_len(10)]$feats, -output_folder = tempdir()) +doHMRF(g, + spatial_genes = spat_genes[seq_len(10)]$feats, + output_folder = tempdir() +) } diff --git a/man/doHMRF_V2.Rd b/man/doHMRF_V2.Rd index c147e6957..d8560a60a 100644 --- a/man/doHMRF_V2.Rd +++ b/man/doHMRF_V2.Rd @@ -7,12 +7,12 @@ doHMRF_V2(HMRF_init_obj, betas = NULL) } \arguments{ -\item{HMRF_init_obj}{initialization object list returned from initHMRF() +\item{HMRF_init_obj}{initialization object list returned from initHMRF() function} -\item{betas}{beta value of the HMRF model, controlling the smoothness of -clustering. NULL value of beta will provide default values based on feature -numbers, otherwise, a vector of three values: initial beta, beta increment, +\item{betas}{beta value of the HMRF model, controlling the smoothness of +clustering. NULL value of beta will provide default values based on feature +numbers, otherwise, a vector of three values: initial beta, beta increment, and number of betas} } \value{ @@ -22,20 +22,27 @@ HMRF model function to run HMRF model } \details{ -This function will run a HMRF model after initialization of HMRF. Of note +This function will run a HMRF model after initialization of HMRF. Of note is the beta parameter, the smoothing parameter. -If the users are interested in selecting results from different smoothness, +If the users are interested in selecting results from different smoothness, we recommend running a range of betas, -hence betas specify what this range is. For example, betas=c(0,10,5) will +hence betas specify what this range is. For example, betas=c(0,10,5) will run for the following betas: 0, 10, 20, 30, 40. -betas=c(0,5,2) will run for betas: 0, 5, 10. Setting the beta can use the +betas=c(0,5,2) will run for betas: 0, 5, 10. Setting the beta can use the following guideline: If number of features N is 10: can be used to select a column name of highly variable features, created by (see \code{\link{calculateHVF}}) - \item feats_to_use = c('geneA', 'geneB', ...): will use all manually + \item feats_to_use = c('geneA', 'geneB', ...): will use all manually provided features } By default the number of principle components that we calculate is 100, which diff --git a/man/runPCAprojection.Rd b/man/runPCAprojection.Rd index 31d0251f6..1492e60ad 100644 --- a/man/runPCAprojection.Rd +++ b/man/runPCAprojection.Rd @@ -69,11 +69,11 @@ runPCAprojection( giotto object with updated PCA dimension recuction } \description{ -runs a Principal Component Analysis on a random +runs a Principal Component Analysis on a random subset + projection } \details{ -See \code{\link[BiocSingular]{runPCA}} and +See \code{\link[BiocSingular]{runPCA}} and \code{\link[FactoMineR]{PCA}} for more information about other parameters. This PCA implementation is similar to \code{\link{runPCA}}, except that it performs PCA on a subset of the cells or features, and predict on the others. @@ -82,7 +82,7 @@ This can significantly increase speed without sacrificing accuracy too much. \item feats_to_use = NULL: will use all features from the selected matrix \item feats_to_use = : can be used to select a column name of highly variable features, created by (see \code{\link{calculateHVF}}) - \item feats_to_use = c('geneA', 'geneB', ...): will use all manually + \item feats_to_use = c('geneA', 'geneB', ...): will use all manually provided features } } diff --git a/man/runPCAprojectionBatch.Rd b/man/runPCAprojectionBatch.Rd index 926001375..518ff46c0 100644 --- a/man/runPCAprojectionBatch.Rd +++ b/man/runPCAprojectionBatch.Rd @@ -72,28 +72,30 @@ runPCAprojectionBatch( giotto object with updated PCA dimension reduction } \description{ -runs a Principal Component Analysis on multiple random +runs a Principal Component Analysis on multiple random batches + projection } \details{ -See \code{\link[BiocSingular]{runPCA}} and +See \code{\link[BiocSingular]{runPCA}} and \code{\link[FactoMineR]{PCA}} for more information about other parameters. -This PCA implementation is similar to \code{\link{runPCA}} and +This PCA implementation is similar to \code{\link{runPCA}} and \code{\link{runPCAprojection}}, -except that it performs PCA on multiple subsets (batches) of the cells or +except that it performs PCA on multiple subsets (batches) of the cells or features, -and predict on the others. This can significantly increase speed without +and predict on the others. This can significantly increase speed without sacrificing accuracy too much. \itemize{ \item feats_to_use = NULL: will use all features from the selected matrix \item feats_to_use = : can be used to select a column name of highly variable features, created by (see \code{\link{calculateHVF}}) - \item feats_to_use = c('geneA', 'geneB', ...): will use all manually + \item feats_to_use = c('geneA', 'geneB', ...): will use all manually provided features } } \examples{ g <- GiottoData::loadGiottoMini("visium") -runPCAprojectionBatch(g) +# set feats_to_use to NULL since there are not many hvfs +# (only 48 in this mini dataset) +runPCAprojectionBatch(g, feats_to_use = NULL) } diff --git a/man/runPatternSimulation.Rd b/man/runPatternSimulation.Rd index 7a63a7ddd..c06ba28da 100644 --- a/man/runPatternSimulation.Rd +++ b/man/runPatternSimulation.Rd @@ -86,7 +86,11 @@ and runs the different spatial gene detection tests \examples{ g <- GiottoData::loadGiottoMini("visium") -runPatternSimulation(gobject = g, pattern_cell_ids = c("AAAGGGATGTAGCAAG-1", -"TCAAACAACCGCGTCG-1", "ACGATCATACATAGAG-1", "TATGCTCCCTACTTAC-1"), -spatial_network_name = "spatial_network", gene_names = c("Gna12", "Ccnd2")) +runPatternSimulation( + gobject = g, pattern_cell_ids = c( + "AAAGGGATGTAGCAAG-1", + "TCAAACAACCGCGTCG-1", "ACGATCATACATAGAG-1", "TATGCTCCCTACTTAC-1" + ), + spatial_network_name = "spatial_network", gene_names = c("Gna12", "Ccnd2") +) } diff --git a/man/runRankEnrich.Rd b/man/runRankEnrich.Rd index efe7c02a8..4c1059e33 100644 --- a/man/runRankEnrich.Rd +++ b/man/runRankEnrich.Rd @@ -75,17 +75,21 @@ and the final enrichment score is then calculated as the sum of top 100 RBPs. } \examples{ g <- GiottoData::loadGiottoMini("visium") -x <- findMarkers_one_vs_all(g, -cluster_column = "leiden_clus", min_feats = 20) +x <- findMarkers_one_vs_all(g, + cluster_column = "leiden_clus", min_feats = 20 +) sign_gene <- x$feats -sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -nrow = length(sign_gene)) +sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), + nrow = length(sign_gene) +) rownames(sign_matrix) <- sign_gene -colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) +colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) -runRankEnrich(gobject = g, sign_matrix = sign_matrix, -expression_values = "normalized") +runRankEnrich( + gobject = g, sign_matrix = sign_matrix, + expression_values = "normalized" +) } \seealso{ \code{\link{makeSignMatrixRank}} diff --git a/man/runSpatialDeconv.Rd b/man/runSpatialDeconv.Rd index 9b62f08d4..65cb4e709 100644 --- a/man/runSpatialDeconv.Rd +++ b/man/runSpatialDeconv.Rd @@ -53,14 +53,16 @@ expression data } \examples{ g <- GiottoData::loadGiottoMini("visium") -x <- findMarkers_one_vs_all(g, -cluster_column = "leiden_clus", min_feats = 20) +x <- findMarkers_one_vs_all(g, + cluster_column = "leiden_clus", min_feats = 20 +) sign_gene <- x$feats -sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -nrow = length(sign_gene)) +sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), + nrow = length(sign_gene) +) rownames(sign_matrix) <- sign_gene -colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) +colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) runSpatialDeconv(gobject = g, sign_matrix = sign_matrix) } diff --git a/man/runSpatialEnrich.Rd b/man/runSpatialEnrich.Rd index 9946d4c02..4f1924cab 100644 --- a/man/runSpatialEnrich.Rd +++ b/man/runSpatialEnrich.Rd @@ -85,14 +85,16 @@ For details see the individual functions: } \examples{ g <- GiottoData::loadGiottoMini("visium") -x <- findMarkers_one_vs_all(g, -cluster_column = "leiden_clus", min_feats = 20) +x <- findMarkers_one_vs_all(g, + cluster_column = "leiden_clus", min_feats = 20 +) sign_gene <- x$feats -sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -nrow = length(sign_gene)) +sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), + nrow = length(sign_gene) +) rownames(sign_matrix) <- sign_gene -colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) +colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) runSpatialEnrich(gobject = g, sign_matrix = sign_matrix) } diff --git a/man/runUMAPprojection.Rd b/man/runUMAPprojection.Rd index ad99c7cda..4bc1e2abd 100644 --- a/man/runUMAPprojection.Rd +++ b/man/runUMAPprojection.Rd @@ -84,7 +84,7 @@ giotto object with updated UMAP dimension reduction run UMAP on subset and project on the rest } \details{ -See \code{\link[uwot]{umap}} for more information about these and +See \code{\link[uwot]{umap}} for more information about these and other parameters. \itemize{ \item Input for UMAP dimension reduction can be another dimension reduction (default = 'pca') diff --git a/man/runWNN.Rd b/man/runWNN.Rd index 4a542f892..f58829f3f 100644 --- a/man/runWNN.Rd +++ b/man/runWNN.Rd @@ -45,8 +45,8 @@ runWNN( \item{verbose}{be verbose} } \value{ -A Giotto object with integrated UMAP (integrated.umap) within the -dimension_reduction slot and Leiden clusters (integrated_leiden_clus) in the +A Giotto object with integrated UMAP (integrated.umap) within the +dimension_reduction slot and Leiden clusters (integrated_leiden_clus) in the cellular metadata. } \description{ diff --git a/man/runtSNE.Rd b/man/runtSNE.Rd index ac280eba2..ff8cddfee 100644 --- a/man/runtSNE.Rd +++ b/man/runtSNE.Rd @@ -72,7 +72,7 @@ giotto object with updated tSNE dimension recuction run tSNE } \details{ -See \code{\link[Rtsne]{Rtsne}} for more information about these and +See \code{\link[Rtsne]{Rtsne}} for more information about these and other parameters. \cr \itemize{ \item Input for tSNE dimension reduction can be another dimension reduction (default = 'pca') diff --git a/man/sampling_sp_genes.Rd b/man/sampling_sp_genes.Rd index 6028564bf..8832de69e 100644 --- a/man/sampling_sp_genes.Rd +++ b/man/sampling_sp_genes.Rd @@ -22,11 +22,11 @@ list function to select a set of spatial genes } \details{ -This function samples a subset of spatial genes among different clusters, +This function samples a subset of spatial genes among different clusters, with size n = target. -Number of samples from each cluster denpends on the relative proportion of +Number of samples from each cluster denpends on the relative proportion of each cluster. -Changing from equal size by setting sample_rate = 1 to with exact proportion +Changing from equal size by setting sample_rate = 1 to with exact proportion of each cluster by setting sample_rate = +Inf } \keyword{internal} diff --git a/man/screePlot.Rd b/man/screePlot.Rd index d4f0a542f..2f65fb4f9 100644 --- a/man/screePlot.Rd +++ b/man/screePlot.Rd @@ -72,14 +72,14 @@ screePlot( ggplot object for scree method } \description{ -identify significant principal components (PCs) using an +identify significant principal components (PCs) using an screeplot (a.k.a. elbowplot) } \details{ Screeplot works by plotting the explained variance of each - individual PC in a barplot allowing you to identify which PC provides a + individual PC in a barplot allowing you to identify which PC provides a significant contribution (a.k.a 'elbow method'). \cr - Screeplot will use an available pca object, based on the parameter 'name', + Screeplot will use an available pca object, based on the parameter 'name', or it will create it if it's not available (see \code{\link{runPCA}}) } \examples{ diff --git a/man/showCellProportionSwitchedPie.Rd b/man/showCellProportionSwitchedPie.Rd index 723b80486..e79ddc0b4 100644 --- a/man/showCellProportionSwitchedPie.Rd +++ b/man/showCellProportionSwitchedPie.Rd @@ -24,8 +24,8 @@ ggplot showCellProportionSwitchedPie } \details{ -Creates a pie chart showing how many cells switched clusters after +Creates a pie chart showing how many cells switched clusters after annotation resizing. -The function showPolygonSizeInfluence() must have been run on the Giotto +The function showPolygonSizeInfluence() must have been run on the Giotto Object for this function to run. } diff --git a/man/showCellProportionSwitchedSanKey.Rd b/man/showCellProportionSwitchedSanKey.Rd index 69e5fa050..c350901d1 100644 --- a/man/showCellProportionSwitchedSanKey.Rd +++ b/man/showCellProportionSwitchedSanKey.Rd @@ -12,12 +12,12 @@ showCellProportionSwitchedSanKey( ) } \arguments{ -\item{gobject}{giotto object which contains metadata for both spat_unit and +\item{gobject}{giotto object which contains metadata for both spat_unit and alt_spat_unit} \item{spat_unit}{spatial unit} -\item{alt_spat_unit}{alternative spatial unit which stores data after +\item{alt_spat_unit}{alternative spatial unit which stores data after resizing annotations} \item{feat_type}{feature type} diff --git a/man/showPolygonSizeInfluence.Rd b/man/showPolygonSizeInfluence.Rd index c3562b6a8..44d7469f2 100644 --- a/man/showPolygonSizeInfluence.Rd +++ b/man/showPolygonSizeInfluence.Rd @@ -19,12 +19,12 @@ showPolygonSizeInfluence( \item{spat_unit}{spatial unit} -\item{alt_spat_unit}{alternaitve spatial unit which represents resized +\item{alt_spat_unit}{alternaitve spatial unit which represents resized polygon data} \item{feat_type}{feature type} -\item{clus_name}{name of cluster column in cell_metadata for given spat_unit +\item{clus_name}{name of cluster column in cell_metadata for given spat_unit and alt_spat_unit, i.e. "kmeans"} \item{return_plot}{logical. whether to return the plot object} @@ -43,12 +43,12 @@ Compares cell metadata from spat_unit-feat_type pairs as provided. New columns, resize_switch and cluster_interaction, will be created within cell_metadata for spat_unit-feat_type. -These new columns will describe if a given cell switched cluster number when +These new columns will describe if a given cell switched cluster number when resized. If the same amount of clusters exist for spat_unit-feat_type and alt_spat_unit-feat_type, then clusters are determined to be corresponding based on % overlap in cell_IDs in each cluster. -Otherwise, multiple clusters from the spatial unit feature type pair are +Otherwise, multiple clusters from the spatial unit feature type pair are condensed to align with the smaller number of clusters and ensure overlap. } diff --git a/man/signPCA.Rd b/man/signPCA.Rd index 6245bde35..0df0b6379 100644 --- a/man/signPCA.Rd +++ b/man/signPCA.Rd @@ -84,14 +84,14 @@ ggplot object for scree method and maxtrix of p-values for jackstraw identify significant prinicipal components (PCs) } \details{ -Two different methods can be used to assess the number of relevant +Two different methods can be used to assess the number of relevant or significant prinicipal components (PC's). \cr 1. Screeplot works by plotting the explained variance of each - individual PC in a barplot allowing you to identify which PC provides a + individual PC in a barplot allowing you to identify which PC provides a significant contribution (a.k.a. 'elbow method'). \cr - 2. The Jackstraw method uses the \code{\link[jackstraw]{permutationPA}} - function. By systematically permuting genes it identifies robust, and thus + 2. The Jackstraw method uses the \code{\link[jackstraw]{permutationPA}} + function. By systematically permuting genes it identifies robust, and thus significant, PCs. \cr } diff --git a/man/simulateOneGenePatternGiottoObject.Rd b/man/simulateOneGenePatternGiottoObject.Rd index 68829a014..b73185499 100644 --- a/man/simulateOneGenePatternGiottoObject.Rd +++ b/man/simulateOneGenePatternGiottoObject.Rd @@ -37,7 +37,7 @@ part of the spatial pattern} \item{normalization_params}{additional parameters for (re-)normalizing} } \value{ -Reprocessed Giotto object for which one gene has a forced +Reprocessed Giotto object for which one gene has a forced spatial pattern } \description{ @@ -46,8 +46,12 @@ Create a simulated spatial pattern for one selected gnee \examples{ g <- GiottoData::loadGiottoMini("visium") -simulateOneGenePatternGiottoObject(gobject = g, -pattern_cell_ids = c("AAAGGGATGTAGCAAG-1", "TCAAACAACCGCGTCG-1", -"ACGATCATACATAGAG-1", "TATGCTCCCTACTTAC-1"), -gene_name = "Gna12") +simulateOneGenePatternGiottoObject( + gobject = g, + pattern_cell_ids = c( + "AAAGGGATGTAGCAAG-1", "TCAAACAACCGCGTCG-1", + "ACGATCATACATAGAG-1", "TATGCTCCCTACTTAC-1" + ), + gene_name = "Gna12" +) } diff --git a/man/spatCellCellcom.Rd b/man/spatCellCellcom.Rd index e73bf8ebc..5c04c1a1d 100644 --- a/man/spatCellCellcom.Rd +++ b/man/spatCellCellcom.Rd @@ -2,14 +2,15 @@ % Please edit documentation in R/spatial_interaction.R \name{spatCellCellcom} \alias{spatCellCellcom} -\title{spatCellCellcom} +\alias{specificCellCellcommunicationScores} +\title{Spatial cell cell communication scoring} \usage{ spatCellCellcom( gobject, feat_type = NULL, spat_unit = NULL, spatial_network_name = "Delaunay_network", - cluster_column = "cell_types", + cluster_column = NULL, random_iter = 1000, feat_set_1, feat_set_2, @@ -27,6 +28,30 @@ spatCellCellcom( seed_number = 1234, verbose = c("a little", "a lot", "none") ) + +specificCellCellcommunicationScores( + gobject, + feat_type = NULL, + spat_unit = NULL, + spatial_network_name = "Delaunay_network", + cluster_column = NULL, + random_iter = 100, + cell_type_1 = NULL, + cell_type_2 = NULL, + feat_set_1, + feat_set_2, + gene_set_1 = NULL, + gene_set_2 = NULL, + log2FC_addendum = 0.1, + min_observations = 2, + detailed = FALSE, + adjust_method = c("fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", "BY", + "none"), + adjust_target = c("feats", "cells"), + set_seed = FALSE, + seed_number = 1234, + verbose = TRUE +) } \arguments{ \item{gobject}{giotto object to use} @@ -71,6 +96,10 @@ considered} \item{seed_number}{seed number} \item{verbose}{verbose} + +\item{cell_type_1}{character. First cell type} + +\item{cell_type_2}{character. Second cell type} } \value{ Cell-Cell communication scores for feature pairs based on spatial @@ -110,13 +139,13 @@ LR_expr and rand_expr over all random spatial permutations \item \strong{pvalue:} p-value \item \strong{LR_cell_comb:} cell type pair combination \item \strong{p.adj:} adjusted p-value -\item \strong{PI:} significance score: log2fc * -log10(p.adj) +\item \strong{PI:} significance score: \eqn{log2fc * -log10(p.adj)} } } \examples{ g <- GiottoData::loadGiottoMini("visium") -spatCellCellcom( +res1 <- spatCellCellcom( gobject = g, cluster_column = "leiden_clus", feat_set_1 = "Gm19935", @@ -124,4 +153,15 @@ spatCellCellcom( verbose = "a lot", random_iter = 10 ) +force(res1) + +res2 <- specificCellCellcommunicationScores(g, + cluster_column = "leiden_clus", + cell_type_1 = 1, + cell_type_2 = 2, + feat_set_1 = "Gm19935", + feat_set_2 = "9630013A20Rik" +) + +force(res2) } diff --git a/man/spatCellCellcomSpots.Rd b/man/spatCellCellcomSpots.Rd index 6c219f697..ac6e46389 100644 --- a/man/spatCellCellcomSpots.Rd +++ b/man/spatCellCellcomSpots.Rd @@ -36,7 +36,7 @@ spatCellCellcomSpots( \item{ave_celltype_exp}{Matrix with average expression per cell type} -\item{spatial_network_name}{spatial network to use for identifying +\item{spatial_network_name}{spatial network to use for identifying interacting cells} \item{cluster_column}{cluster column with cell type information} @@ -47,17 +47,17 @@ interacting cells} \item{feature_set_2}{second specific feature set from feature pairs} -\item{min_observations}{minimum number of interactions needed to be +\item{min_observations}{minimum number of interactions needed to be considered} \item{expression_values}{(e.g. 'normalized', 'scaled', 'custom')} -\item{detailed}{provide more detailed information +\item{detailed}{provide more detailed information (random variance and z-score)} \item{adjust_method}{which method to adjust p-values} -\item{adjust_target}{adjust multiple hypotheses at the cell or feature +\item{adjust_target}{adjust multiple hypotheses at the cell or feature level} \item{do_parallel}{run calculations in parallel with mclapply} @@ -71,37 +71,44 @@ level} \item{verbose}{verbose (e.g. 'a little', 'a lot', 'none')} } \value{ -Cell-Cell communication scores for feature pairs based on spatial +Cell-Cell communication scores for feature pairs based on spatial interaction } \description{ -Spatial Cell-Cell communication scores based on spatial +Spatial Cell-Cell communication scores based on spatial expression of interacting cells at spots resolution } \details{ -Statistical framework to identify if pairs of features -(such as ligand-receptor combinations) are expressed at higher levels than -expected based on a reshuffled null distribution of feature expression +Statistical framework to identify if pairs of features +(such as ligand-receptor combinations) are expressed at higher levels than +expected based on a reshuffled null distribution of feature expression values in cells that are spatially in proximity to each other. \itemize{ - \item{LR_comb:}{Pair of ligand and receptor} - \item{lig_cell_type:}{ cell type to assess expression level of ligand } - \item{lig_expr:}{ average expression residual(observed - DWLS_predicted) of ligand in lig_cell_type } - \item{ligand:}{ ligand name } - \item{rec_cell_type:}{ cell type to assess expression level of receptor } - \item{rec_expr:}{ average expression residual(observed - DWLS_predicted) of receptor in rec_cell_type} - \item{receptor:}{ receptor name } - \item{LR_expr:}{ combined average ligand and receptor expression residual} - \item{lig_nr:}{ total number of cells from lig_cell_type that spatially interact with cells from rec_cell_type } - \item{rec_nr:}{ total number of cells from rec_cell_type that spatially interact with cells from lig_cell_type } - \item{rand_expr:}{ average combined ligand and receptor expression residual from random spatial permutations } - \item{av_diff:}{ average difference between LR_expr and rand_expr over all random spatial permutations } - \item{sd_diff:}{ (optional) standard deviation of the difference between LR_expr and rand_expr over all random spatial permutations } - \item{z_score:}{ (optinal) z-score } - \item{log2fc:}{ LR_expr - rand_expr } - \item{pvalue:}{ p-value } - \item{LR_cell_comb:}{ cell type pair combination } - \item{p.adj:}{ adjusted p-value } - \item{PI:}{ significanc score: log2fc * -log10(p.adj) } + * LR_comb:Pair of ligand and receptor + * lig_cell_type: cell type to assess expression level of ligand + * lig_expr: average expression residual(observed - DWLS_predicted) of + ligand in lig_cell_type + * ligand: ligand name + * rec_cell_type: cell type to assess expression level of receptor + * rec_expr: average expression residual(observed - DWLS_predicted) of + receptor in rec_cell_type + * receptor: receptor name + * LR_expr: combined average ligand and receptor expression residual + * lig_nr: total number of cells from lig_cell_type that spatially interact + with cells from rec_cell_type + * rec_nr: total number of cells from rec_cell_type that spatially interact + with cells from lig_cell_type + * rand_expr: average combined ligand and receptor expression residual from + random spatial permutations + * av_diff: average difference between LR_expr and rand_expr over all random + spatial permutations + * sd_diff: (optional) standard deviation of the difference between LR_expr + and rand_expr over all random spatial permutations + * z_score: (optional) z-score + * log2fc: LR_expr - rand_expr + * pvalue: p-value + * LR_cell_comb: cell type pair combination + * p.adj: adjusted p-value + * PI: significanc score: log2fc \* -log10(p.adj) } } diff --git a/man/spatialAEH.Rd b/man/spatialAEH.Rd index c97cc454a..02457c49a 100644 --- a/man/spatialAEH.Rd +++ b/man/spatialAEH.Rd @@ -48,7 +48,7 @@ An updated giotto object Compute spatial variable genes with spatialDE method } \details{ -This function is a wrapper for the SpatialAEH method +This function is a wrapper for the SpatialAEH method implemented in the ... } \examples{ diff --git a/man/spatialDE.Rd b/man/spatialDE.Rd index 876a72c88..d6e414023 100644 --- a/man/spatialDE.Rd +++ b/man/spatialDE.Rd @@ -62,7 +62,7 @@ a list of data.frames with results and plot (optional) Compute spatial variable genes with spatialDE method } \details{ -This function is a wrapper for the SpatialDE method originally +This function is a wrapper for the SpatialDE method originally implemented in python. See publication \doi{10.1038/nmeth.4636} } diff --git a/man/spdepAutoCorr.Rd b/man/spdepAutoCorr.Rd index 9364ef8af..31b226470 100644 --- a/man/spdepAutoCorr.Rd +++ b/man/spdepAutoCorr.Rd @@ -19,7 +19,7 @@ spdepAutoCorr( \item{gobject}{Input a Giotto object.} \item{method}{Specify a method name to compute auto correlation. -Available methods include +Available methods include \code{"geary.test", "lee.test", "lm.morantest","moran.test"}.} \item{spat_unit}{spatial unit} @@ -28,7 +28,7 @@ Available methods include \item{expression_values}{expression values to use, default = normalized} -\item{spatial_network_to_use}{spatial network to use, +\item{spatial_network_to_use}{spatial network to use, default = spatial_network} \item{return_gobject}{if FALSE, results are returned as data.table. diff --git a/man/specificCellCellcommunicationScores.Rd b/man/specificCellCellcommunicationScores.Rd deleted file mode 100644 index ac12f78b9..000000000 --- a/man/specificCellCellcommunicationScores.Rd +++ /dev/null @@ -1,115 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_interaction.R -\name{specificCellCellcommunicationScores} -\alias{specificCellCellcommunicationScores} -\title{specificCellCellcommunicationScores} -\usage{ -specificCellCellcommunicationScores( - gobject, - feat_type = NULL, - spat_unit = NULL, - spatial_network_name = "Delaunay_network", - cluster_column = "cell_types", - random_iter = 100, - cell_type_1 = "astrocyte", - cell_type_2 = "endothelial", - feat_set_1, - feat_set_2, - gene_set_1 = NULL, - gene_set_2 = NULL, - log2FC_addendum = 0.1, - min_observations = 2, - detailed = FALSE, - adjust_method = c("fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", "BY", - "none"), - adjust_target = c("feats", "cells"), - set_seed = FALSE, - seed_number = 1234, - verbose = TRUE -) -} -\arguments{ -\item{gobject}{giotto object to use} - -\item{feat_type}{feature type} - -\item{spat_unit}{spatial unit} - -\item{spatial_network_name}{spatial network to use for identifying -interacting cells} - -\item{cluster_column}{cluster column with cell type information} - -\item{random_iter}{number of iterations} - -\item{cell_type_1}{first cell type} - -\item{cell_type_2}{second cell type} - -\item{feat_set_1}{first specific gene set from gene pairs} - -\item{feat_set_2}{second specific gene set from gene pairs} - -\item{gene_set_1}{deprecated, use feat_set_1} - -\item{gene_set_2}{deprecated, use feat_set_2} - -\item{log2FC_addendum}{addendum to add when calculating log2FC} - -\item{min_observations}{minimum number of interactions needed to be -considered} - -\item{detailed}{provide more detailed information -(random variance and z-score)} - -\item{adjust_method}{which method to adjust p-values} - -\item{adjust_target}{adjust multiple hypotheses at the cell or feature level} - -\item{set_seed}{set a seed for reproducibility} - -\item{seed_number}{seed number} - -\item{verbose}{verbose} -} -\value{ -Cell-Cell communication scores for feature pairs based on spatial -interaction -} -\description{ -Specific Cell-Cell communication scores based on spatial -expression of interacting cells -} -\details{ -Statistical framework to identify if pairs of features -(such as ligand-receptor combinations) -are expressed at higher levels than expected based on a reshuffled null -distribution of feature expression values in cells that are spatially in -proximity to each other. -\itemize{ - \item{LR_comb:}{Pair of ligand and receptor} - \item{lig_cell_type:}{ cell type to assess expression level of ligand } - \item{lig_expr:}{ average expression of ligand in lig_cell_type } - \item{ligand:}{ ligand name } - \item{rec_cell_type:}{ cell type to assess expression level of receptor } - \item{rec_expr:}{ average expression of receptor in rec_cell_type} - \item{receptor:}{ receptor name } - \item{LR_expr:}{ combined average ligand and receptor expression } - \item{lig_nr:}{ total number of cells from lig_cell_type that spatially interact with cells from rec_cell_type } - \item{rec_nr:}{ total number of cells from rec_cell_type that spatially interact with cells from lig_cell_type } - \item{rand_expr:}{ average combined ligand and receptor expression from random spatial permutations } - \item{av_diff:}{ average difference between LR_expr and rand_expr over all random spatial permutations } - \item{sd_diff:}{ (optional) standard deviation of the difference between LR_expr and rand_expr over all random spatial permutations } - \item{z_score:}{ (optinal) z-score } - \item{log2fc:}{ log2 fold-change (LR_expr/rand_expr) } - \item{pvalue:}{ p-value } - \item{LR_cell_comb:}{ cell type pair combination } - \item{p.adj:}{ adjusted p-value } - \item{PI:}{ significanc score: log2fc * -log10(p.adj) } -} -} -\examples{ -g <- GiottoData::loadGiottoMini("visium") - -specificCellCellcommunicationScores(g, cluster_column = "leiden_clus") -} diff --git a/man/subClusterCells.Rd b/man/subClusterCells.Rd index 4eca454f8..a595efc8a 100644 --- a/man/subClusterCells.Rd +++ b/man/subClusterCells.Rd @@ -2,7 +2,9 @@ % Please edit documentation in R/clustering.R \name{subClusterCells} \alias{subClusterCells} -\title{subClusterCells} +\alias{doLeidenSubCluster} +\alias{doLouvainSubCluster} +\title{Cell subclustering} \usage{ subClusterCells( gobject, @@ -10,12 +12,17 @@ subClusterCells( cluster_method = c("leiden", "louvain_community", "louvain_multinet"), cluster_column = NULL, selected_clusters = NULL, - hvg_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = + hvg_param = deprecated(), + hvf_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = "normalized"), - hvg_min_perc_cells = 5, - hvg_mean_expr_det = 1, - use_all_genes_as_hvg = FALSE, - min_nr_of_hvg = 5, + hvg_min_perc_cells = deprecated(), + hvf_min_perc_cells = 5, + hvg_mean_expr_det = deprecated(), + hvf_mean_expr_det = 1, + use_all_genes_as_hvg = deprecated(), + use_all_feats_as_hvf = FALSE, + min_nr_of_hvg = deprecated(), + min_nr_of_hvf = 5, pca_param = list(expression_values = "normalized", scale_unit = TRUE), nn_param = list(dimensions_to_use = 1:20), k_neighbors = 10, @@ -29,40 +36,111 @@ subClusterCells( return_gobject = TRUE, verbose = TRUE ) + +doLeidenSubCluster( + gobject, + feat_type = NULL, + name = "sub_leiden_clus", + cluster_column = NULL, + selected_clusters = NULL, + hvf_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = + "normalized"), + hvg_param = deprecated(), + hvf_min_perc_cells = 5, + hvg_min_perc_cells = deprecated(), + hvf_mean_expr_det = 1, + hvg_mean_expr_det = deprecated(), + use_all_feats_as_hvf = FALSE, + use_all_genes_as_hvg = deprecated(), + min_nr_of_hvf = 5, + min_nr_of_hvg = deprecated(), + pca_param = list(expression_values = "normalized", scale_unit = TRUE), + nn_param = list(dimensions_to_use = 1:20), + k_neighbors = 10, + resolution = 0.5, + n_iterations = 500, + python_path = NULL, + nn_network_to_use = "sNN", + network_name = "sNN.pca", + return_gobject = TRUE, + toplevel = 2, + verbose = TRUE +) + +doLouvainSubCluster( + gobject, + name = "sub_louvain_clus", + version = c("community", "multinet"), + cluster_column = NULL, + selected_clusters = NULL, + hvg_param = deprecated(), + hvf_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = + "normalized"), + hvg_min_perc_cells = deprecated(), + hvf_min_perc_cells = 5, + hvg_mean_expr_det = deprecated(), + hvf_mean_expr_det = 1, + use_all_genes_as_hvg = deprecated(), + use_all_feats_as_hvf = FALSE, + min_nr_of_hvg = deprecated(), + min_nr_of_hvf = 5, + pca_param = list(expression_values = "normalized", scale_unit = TRUE), + nn_param = list(dimensions_to_use = 1:20), + k_neighbors = 10, + resolution = 0.5, + gamma = 1, + omega = 1, + python_path = NULL, + nn_network_to_use = "sNN", + network_name = "sNN.pca", + return_gobject = TRUE, + verbose = TRUE +) } \arguments{ -\item{gobject}{giotto object} +\item{gobject}{\code{giotto} object} \item{name}{name for new clustering result} -\item{cluster_method}{clustering method to use} +\item{cluster_method}{clustering method to use. Currently one of "leiden" +(default), "louvain_community", "louvain_multinet"} \item{cluster_column}{cluster column to subcluster} \item{selected_clusters}{only do subclustering on these clusters} -\item{hvg_param}{parameters for calculateHVG} +\item{hvg_param}{deprecated} -\item{hvg_min_perc_cells}{threshold for detection in min percentage of cells} +\item{hvf_param}{list of parameters for \code{\link[=calculateHVF]{calculateHVF()}}} -\item{hvg_mean_expr_det}{threshold for mean expression level in cells with +\item{hvg_min_perc_cells}{deprecated} + +\item{hvf_min_perc_cells}{threshold for detection in min percentage of cells} + +\item{hvg_mean_expr_det}{deprecated} + +\item{hvf_mean_expr_det}{threshold for mean expression level in cells with detection} -\item{use_all_genes_as_hvg}{forces all genes to be HVG and to be used as +\item{use_all_genes_as_hvg}{deprecated} + +\item{use_all_feats_as_hvf}{forces all features to be HVF and to be used as input for PCA} -\item{min_nr_of_hvg}{minimum number of HVG, or all genes will be used as +\item{min_nr_of_hvg}{deprecated} + +\item{min_nr_of_hvf}{minimum number of HVF, or all features will be used as input for PCA} -\item{pca_param}{parameters for runPCA} +\item{pca_param}{list of parameters for \code{\link[=runPCA]{runPCA()}}} -\item{nn_param}{parameters for parameters for createNearestNetwork} +\item{nn_param}{list of parameters for \code{\link[=createNearestNetwork]{createNearestNetwork()}}} -\item{k_neighbors}{number of k for createNearestNetwork} +\item{k_neighbors}{number of k for \code{\link[=createNearestNetwork]{createNearestNetwork()}}} -\item{resolution}{resolution} +\item{resolution}{resolution for community algorithm} -\item{n_iterations}{number of interations to run the Leiden algorithm.} +\item{n_iterations}{number of iterations to run the Leiden algorithm.} \item{gamma}{gamma} @@ -74,34 +152,64 @@ input for PCA} \item{network_name}{name of NN network to use} -\item{return_gobject}{boolean: return giotto object (default = TRUE)} +\item{return_gobject}{logical. return \code{giotto} object (default = TRUE)} \item{verbose}{verbose} + +\item{feat_type}{feature type} + +\item{toplevel}{do not use} + +\item{version}{version of Louvain algorithm to use. One of "community" or +"multinet", with the default being "community"} } \value{ -giotto object with new subclusters appended to cell metadata +\code{giotto} object with new subclusters appended to cell metadata } \description{ -subcluster cells +Perform cell subclustering by taking an annotated group of +cells and performing another round of clustering on just that subset. +Several methods are implemented. \code{subClusterCells()} is the main wrapper +function. \code{doLeidenSubCluster()} and \code{doLouvainSubCluster()} are more +specific implementations. } \details{ This function performs subclustering on selected clusters. The systematic steps are: -\itemize{ - \item{1. subset Giotto object} - \item{2. identify highly variable genes} - \item{3. run PCA} - \item{4. create nearest neighbouring network} - \item{5. do clustering} +\enumerate{ +\item subset Giotto object +\item identify highly variable genes +\item run PCA +\item create nearest neighbouring network +\item do clustering } } +\section{Functions}{ +\itemize{ +\item \code{doLeidenSubCluster()}: Further subcluster cells using a NN-network and +the Leiden algorithm + +\item \code{doLouvainSubCluster()}: subcluster cells using a NN-network and the +Louvain algorithm + +}} \examples{ g <- GiottoData::loadGiottoMini("visium") +# Run some subclusterings based on "leiden_clus" annotations that already +# exist in the visium mini object + +# default method is leiden subclustering subClusterCells(g, cluster_column = "leiden_clus") -} -\seealso{ -\code{\link{.doLouvainCluster_multinet}}, -\code{\link{.doLouvainCluster_community}} -and @seealso \code{\link{doLeidenCluster}} + +# use louvain instead +subClusterCells(g, + cluster_column = "leiden_clus", + cluster_method = "louvain_community" +) + +# directly call the more specific functions +doLeidenSubCluster(g, cluster_column = "leiden_clus") + +doLouvainSubCluster(g, cluster_column = "leiden_clus") } diff --git a/man/trendSceek.Rd b/man/trendSceek.Rd index fdcc03bf4..72bea81f6 100644 --- a/man/trendSceek.Rd +++ b/man/trendSceek.Rd @@ -45,7 +45,7 @@ data.frame with trendsceek spatial genes results Compute spatial variable genes with trendsceek method } \details{ -This function is a wrapper for the trendsceek_test method +This function is a wrapper for the trendsceek_test method implemented in the trendsceek package Publication: \doi{10.1038/nmeth.4634} } diff --git a/man/viewHMRFresults2D.Rd b/man/viewHMRFresults2D.Rd index aeae8bd19..8d0c87a38 100644 --- a/man/viewHMRFresults2D.Rd +++ b/man/viewHMRFresults2D.Rd @@ -4,11 +4,23 @@ \alias{viewHMRFresults2D} \title{viewHMRFresults2D} \usage{ -viewHMRFresults2D(gobject, HMRFoutput, k = NULL, betas_to_view = NULL, ...) +viewHMRFresults2D( + gobject, + spat_unit = NULL, + feat_type = NULL, + HMRFoutput, + k = NULL, + betas_to_view = NULL, + ... +) } \arguments{ \item{gobject}{giotto object} +\item{spat_unit}{spatial unit} + +\item{feat_type}{feature type} + \item{HMRFoutput}{HMRF output from doHMRF} \item{k}{number of HMRF domains} diff --git a/man/viewHMRFresults3D.Rd b/man/viewHMRFresults3D.Rd index 0eb7e2e3f..3158bd2fe 100644 --- a/man/viewHMRFresults3D.Rd +++ b/man/viewHMRFresults3D.Rd @@ -4,11 +4,23 @@ \alias{viewHMRFresults3D} \title{viewHMRFresults3D} \usage{ -viewHMRFresults3D(gobject, HMRFoutput, k = NULL, betas_to_view = NULL, ...) +viewHMRFresults3D( + gobject, + spat_unit = NULL, + feat_type = NULL, + HMRFoutput, + k = NULL, + betas_to_view = NULL, + ... +) } \arguments{ \item{gobject}{giotto object} +\item{spat_unit}{spatial unit} + +\item{feat_type}{feature type} + \item{HMRFoutput}{HMRF output from doHMRF} \item{k}{number of HMRF domains} diff --git a/man/viewHMRFresults_V2.Rd b/man/viewHMRFresults_V2.Rd index 67a05c64a..c8baf3d77 100644 --- a/man/viewHMRFresults_V2.Rd +++ b/man/viewHMRFresults_V2.Rd @@ -66,8 +66,8 @@ spatial plots with HMRF domains function to view HMRF results with multiple betas } \details{ -This function plots spatial map of HMRF domain clusters for multiple beta +This function plots spatial map of HMRF domain clusters for multiple beta with the name (hmrf_name), -matching the first part of the cell meta column names with HMRF clusters +matching the first part of the cell meta column names with HMRF clusters (for example name of ‘hmrf1 k=8 b=0.00’ is ‘hmrf1’) } diff --git a/man/visium_micron_scalefactor.Rd b/man/visium_micron_scalefactor.Rd index 9c9f93949..96eb9e3ea 100644 --- a/man/visium_micron_scalefactor.Rd +++ b/man/visium_micron_scalefactor.Rd @@ -8,7 +8,7 @@ .visium_micron_scale(json_scalefactors) } \arguments{ -\item{json_scalefactors}{list of scalefactors from +\item{json_scalefactors}{list of scalefactors from .visium_read_scalefactors()} } \value{ diff --git a/man/write_giotto_viewer_annotation.Rd b/man/write_giotto_viewer_annotation.Rd index ef5c40f51..9e2c2334b 100644 --- a/man/write_giotto_viewer_annotation.Rd +++ b/man/write_giotto_viewer_annotation.Rd @@ -21,7 +21,7 @@ write_giotto_viewer_annotation( write a .txt and .annot file for the selection annotation } \description{ -write out factor-like annotation data from a giotto object for +write out factor-like annotation data from a giotto object for the Viewer } \keyword{internal} diff --git a/vignettes/intro_to_giotto.Rmd b/vignettes/intro_to_giotto.Rmd index a446de3aa..3f5ce35ec 100644 --- a/vignettes/intro_to_giotto.Rmd +++ b/vignettes/intro_to_giotto.Rmd @@ -9,8 +9,8 @@ vignette: > ```{r, include = FALSE} knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" + collapse = TRUE, + comment = "#>" ) ``` @@ -63,8 +63,9 @@ g <- runPCA(g) Plot PCA ```{r} -plotPCA(g, - cell_color = "leiden_clus") +plotPCA(g, + cell_color = "leiden_clus" +) ``` Run UMAP @@ -77,7 +78,8 @@ Plot UMAP ```{r} plotUMAP(g, - cell_color = "leiden_clus") + cell_color = "leiden_clus" +) ``` Run tSNE @@ -90,7 +92,8 @@ Plot tSNE ```{r} plotTSNE(g, - cell_color = "leiden_clus") + cell_color = "leiden_clus" +) ``` Do clustering @@ -103,7 +106,8 @@ Spatial plot with clusters ```{r} spatPlot2D(g, - cell_color = "leiden_clus") + cell_color = "leiden_clus" +) ``` Session info