Skip to content

Commit

Permalink
Merge pull request #799 from jiajic/suite_modular
Browse files Browse the repository at this point in the history
Add: `cell_ids` param to `calculateHVF()`
jiajic authored Nov 18, 2023
2 parents 599f504 + 32307eb commit 299d556
Showing 8 changed files with 47 additions and 41 deletions.
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -8,7 +8,8 @@
- New function `plotCellTypesFromEnrichment()` within `spatial_enrichment_visuals.R` that generates a bar plot of cell types vs frequency based on a provided enrichment
- New function `pieCellTypesFromEnrichment()` within `spatial_enrichment_visuals.R` that generates a pie chart of cell types based on a provided enrichment
- New function `addVisiumPolygons()` within `convenience.R` (along with its requisite internal functions) that adds circular polygons centered at the spatial locations of a Giotto Object made with Visium data. Takes a Giotto Object and a path to the Visium output file `scalefactors_json.json` as input arguments.
- Added `addVisiumPolygons()` to `createGiottoVisiumObject()` workflow.
- Add `addVisiumPolygons()` to `createGiottoVisiumObject()` workflow.
- Add `cell_ids` param to `calculateHVF()` to allow calculation of HVFs on a subset of cells

## Changes
- Improved performance of gefToGiotto()
26 changes: 8 additions & 18 deletions R/clustering.R
Original file line number Diff line number Diff line change
@@ -1035,7 +1035,6 @@ doSNNCluster <- function(gobject,
#' @param spat_unit spatial unit (e.g. "rna", "dna", "protein")
#' @param expression_values expression values to use (e.g. "normalized", "scaled", "custom")
#' @param feats_to_use subset of features to use
#' @param genes_to_use deprecated use feats_to_use
#' @param dim_reduction_to_use dimension reduction to use (e.g. "cells", "pca", "umap", "tsne")
#' @param dim_reduction_name dimensions reduction name, default to "pca"
#' @param dimensions_to_use dimensions to use, default = 1:10
@@ -1057,7 +1056,6 @@ doKmeans <- function(gobject,
spat_unit = NULL,
expression_values = c('normalized', 'scaled', 'custom'),
feats_to_use = NULL,
genes_to_use = NULL,
dim_reduction_to_use = c('cells', 'pca', 'umap', 'tsne'),
dim_reduction_name = 'pca',
dimensions_to_use = 1:10,
@@ -1073,14 +1071,6 @@ doKmeans <- function(gobject,
set_seed = TRUE,
seed_number = 1234) {



## deprecated arguments
if(!is.null(genes_to_use)) {
feats_to_use = genes_to_use
warning('genes_to_use is deprecated, use feats_to_use in the future \n')
}

# Set feat_type and spat_unit
spat_unit = set_default_spat_unit(gobject = gobject,
spat_unit = spat_unit)
@@ -1225,7 +1215,7 @@ doKmeans <- function(gobject,
#' @param spat_unit spatial unit
#' @param feat_type feature type
#' @param expression_values expression values to use
#' @param genes_to_use subset of genes to use
#' @param feats_to_use subset of features to use
#' @param dim_reduction_to_use dimension reduction to use
#' @param dim_reduction_name dimensions reduction name
#' @param dimensions_to_use dimensions to use
@@ -1245,7 +1235,7 @@ doHclust <- function(gobject,
spat_unit = NULL,
feat_type = NULL,
expression_values = c('normalized', 'scaled', 'custom'),
genes_to_use = NULL,
feats_to_use = NULL,
dim_reduction_to_use = c('cells', 'pca', 'umap', 'tsne'),
dim_reduction_name = 'pca',
dimensions_to_use = 1:10,
@@ -1308,8 +1298,8 @@ doHclust <- function(gobject,
values = values)

# subset expression matrix
if(!is.null(genes_to_use)) {
expr_values = expr_values[rownames(expr_values) %in% genes_to_use, ]
if(!is.null(feats_to_use)) {
expr_values = expr_values[rownames(expr_values) %in% feats_to_use, ]
}

# features as columns
@@ -1424,7 +1414,7 @@ doHclust <- function(gobject,
#' @param sNNclust_minPts SNNclust: min points
#' @param borderPoints SNNclust: border points
#' @param expression_values expression values to use
#' @param genes_to_use = NULL,
#' @param feats_to_use features to use in clustering,
#' @param dim_reduction_to_use dimension reduction to use
#' @param dim_reduction_name name of reduction 'pca',
#' @param dimensions_to_use dimensions to use
@@ -1480,7 +1470,7 @@ clusterCells <- function(gobject,
borderPoints = TRUE,

expression_values = c('normalized', 'scaled', 'custom'),
genes_to_use = NULL,
feats_to_use = NULL,
dim_reduction_to_use = c('cells', 'pca', 'umap', 'tsne'),
dim_reduction_name = 'pca',
dimensions_to_use = 1:10,
@@ -1584,7 +1574,7 @@ clusterCells <- function(gobject,
result = doKmeans(gobject = gobject,
name = name,
expression_values = expression_values,
genes_to_use = genes_to_use,
feats_to_use = feats_to_use,
dim_reduction_to_use = dim_reduction_to_use,
dim_reduction_name = dim_reduction_name,
dimensions_to_use = dimensions_to_use,
@@ -1602,7 +1592,7 @@ clusterCells <- function(gobject,
result = doHclust(gobject = gobject,
name = name,
expression_values = expression_values,
genes_to_use = genes_to_use,
feats_to_use = feats_to_use,
dim_reduction_to_use = dim_reduction_to_use,
dim_reduction_name = dim_reduction_name,
dimensions_to_use = dimensions_to_use,
42 changes: 28 additions & 14 deletions R/variable_genes.R
Original file line number Diff line number Diff line change
@@ -97,7 +97,7 @@ calc_var_HVF = function(scaled_matrix,
selected = NULL

test = apply(X = scaled_matrix, MARGIN = 1, FUN = function(x) var(x))
test = sort(test, decreasing = T)
test = sort(test, decreasing = TRUE)

dt_res = data.table::data.table(feats = names(test), var = test)

@@ -109,7 +109,9 @@ calc_var_HVF = function(scaled_matrix,
}


if(show_plot == TRUE | return_plot == TRUE | save_plot == TRUE) {
if(isTRUE(show_plot) ||
isTRUE(return_plot) ||
isTRUE(save_plot)) {

dt_res[, rank := 1:.N]
pl <- create_calc_var_HVF_plot(dt_res)
@@ -144,6 +146,8 @@ calc_var_HVF = function(scaled_matrix,
#' @param difference_in_cov [cov_loess] minimum difference in coefficient of variance required
#' @param var_threshold [var_p_resid] variance threshold for features for var_p_resid method
#' @param var_number [var_p_resid] number of top variance features for var_p_resid method
#' @param cell_ids Specific cell_IDs (`spatIDs()`) that the HVF detection
#' should be performed on. Passing NULL (default) runs HVF on all cells.
#' @param show_plot show plot
#' @param return_plot return ggplot object (overridden by `return_gobject`)
#' @param save_plot directly save the plot [boolean]
@@ -178,6 +182,7 @@ calculateHVF <- function(gobject,
difference_in_cov = 0.1,
var_threshold = 1.5,
var_number = NULL,
cell_ids = NULL,
show_plot = NA,
return_plot = NA,
save_plot = NA,
@@ -204,10 +209,15 @@ calculateHVF <- function(gobject,
output = 'matrix')

# not advised
if(reverse_log_scale == TRUE) {
if(isTRUE(reverse_log_scale)) {
expr_values = (logbase^expr_values)-1
}

# id subset
if (!is.null(cell_ids)) {
subset_bool <- colnames(expr_values) %in% cell_ids
expr_values <- expr_values[, subset_bool]
}

# method to use
method = match.arg(method, choices = c('cov_groups', 'cov_loess', 'var_p_resid'))
@@ -222,24 +232,28 @@ calculateHVF <- function(gobject,

if(method == 'var_p_resid') {

results = calc_var_HVF(scaled_matrix = expr_values,
var_threshold = var_threshold,
var_number = var_number,
show_plot = show_plot,
return_plot = return_plot,
save_plot = save_plot)
results = calc_var_HVF(
scaled_matrix = expr_values,
var_threshold = var_threshold,
var_number = var_number,
show_plot = show_plot,
return_plot = return_plot,
save_plot = save_plot
)

feat_in_cells_detected = results[['dt']]
pl = results[['pl']]

} else {

## create data.table with relevant statistics ##
feat_in_cells_detected <- data.table::data.table(feats = rownames(expr_values),
nr_cells = rowSums_flex(expr_values > expression_threshold),
total_expr = rowSums_flex(expr_values),
mean_expr = rowMeans_flex(expr_values),
sd = unlist(apply(expr_values, 1, sd)))
feat_in_cells_detected <- data.table::data.table(
feats = rownames(expr_values),
nr_cells = rowSums_flex(expr_values > expression_threshold),
total_expr = rowSums_flex(expr_values),
mean_expr = rowMeans_flex(expr_values),
sd = unlist(apply(expr_values, 1, sd))
)
feat_in_cells_detected[, cov := (sd/mean_expr)]
gini_level <- unlist(apply(expr_values, MARGIN = 1, mygini_fun))
feat_in_cells_detected[, gini := gini_level]
4 changes: 4 additions & 0 deletions man/calculateHVF.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/clusterCells.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/doHclust.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 0 additions & 3 deletions man/doKmeans.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/reexports.Rd

Large diffs are not rendered by default.

0 comments on commit 299d556

Please sign in to comment.