diff --git a/DESCRIPTION b/DESCRIPTION index 340b4e716..cdd15a0da 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -124,7 +124,8 @@ Suggests: tiff, trendsceek, testthat (>= 3.0.0), - qs + qs, + rmarkdown Remotes: drieslab/GiottoUtils, drieslab/GiottoClass, diff --git a/R/auxiliary_giotto.R b/R/auxiliary_giotto.R index 82a65fd15..de16b2c08 100644 --- a/R/auxiliary_giotto.R +++ b/R/auxiliary_giotto.R @@ -47,10 +47,13 @@ # mymatrix = log(mymatrix + offset)/log(base) } else if (methods::is(mymatrix, "dgCMatrix")) { mymatrix@x <- log(mymatrix@x + offset) / log(base) - # replace with sparseMatrixStats + # replace with sparseMatrixStats } else if (methods::is(mymatrix, "Matrix")) { mymatrix@x <- log(mymatrix@x + offset) / log(base) - } else { + } else if(methods::is(mymatrix, 'dbMatrix')) { + mymatrix[] <- dplyr::mutate(mymatrix[], x = x + offset) # workaround for lack of @x slot + mymatrix <- log(mymatrix)/log(base) + } else { mymatrix <- log(as.matrix(mymatrix) + offset) / log(base) } @@ -732,7 +735,9 @@ filterGiotto <- function( description = "_filter" ) - return(newGiottoObject) + return(newGiottoObject) + + } @@ -740,6 +745,46 @@ filterGiotto <- function( ### normalization #### +#' @title compute_dbMatrix +#' @description saves dbMatrix to db if global option is set +#' @details +#' Set \code{options(giotto.dbmatrix_compute = FALSE)} if saving dbMatrix +#' after each step of normalization workflow is not desired. +#' @keywords internal +.compute_dbMatrix <- function(dbMatrix, name, verbose = TRUE) { + # input validation + if(!inherits(dbMatrix, 'dbMatrix')) { + stop('dbMatrix must be of class dbMatrix') + } + + if(!is.character(name)) { + stop('name must be a character') + } + + # TODO: update with dbData generic + con = dbMatrix:::get_con(dbMatrix) + + # overwrite table by default + if(name %in% DBI::dbListTables(con)) { + DBI::dbRemoveTable(con, name) + } + + if(verbose){ + msg <- glue::glue('Computing {name} expression matrix on disk...') + cat(msg) + } + + dbMatrix[] |> + dplyr::compute(temporary=F, name = name) + + # TODO: update below with proper setters from dbMatrix + dbMatrix[] <- dplyr::tbl(con, name) # reassign to computed mat + dbMatrix@name <- name + + if(verbose) cat('done \n') + + return(dbMatrix) +} #' @title RNA standard normalization #' @name .rna_standard_normalization @@ -776,9 +821,6 @@ filterGiotto <- function( feat_names <- rownames(raw_expr[]) col_names <- colnames(raw_expr[]) - - - ## 1. library size normalize if (library_size_norm == TRUE) { norm_expr <- .lib_norm_giotto( @@ -853,6 +895,16 @@ filterGiotto <- function( } ## 5. create and set exprObj + # Save dbMatrix to db + compute_mat <- getOption("giotto.dbmatrix_compute", default = FALSE) + if(compute_mat && !is.null(norm_expr)){ + norm_expr <- .compute_dbMatrix( + dbMatrix = norm_expr, + name = 'normalized', + verbose = verbose + ) + } + norm_expr <- create_expr_obj( name = "normalized", exprMat = norm_expr, @@ -861,6 +913,15 @@ filterGiotto <- function( provenance = provenance, misc = NULL ) + + # Save dbMatrix to db + if(compute_mat && !is.null(norm_scaled_expr)){ + norm_scaled_expr = .compute_dbMatrix( + dbMatrix = norm_scaled_expr, + name = 'scaled', + verbose = verbose + ) + } norm_scaled_expr <- create_expr_obj( name = "scaled", diff --git a/tests/testthat/test-dbMatrix_filterGiotto.R b/tests/testthat/test-dbMatrix_filterGiotto.R new file mode 100644 index 000000000..29979f555 --- /dev/null +++ b/tests/testthat/test-dbMatrix_filterGiotto.R @@ -0,0 +1,45 @@ +# silence deprecated internal functions +rlang::local_options(lifecycle_verbosity = "quiet") + +# ---------------------------------------------------------------------------- # +# Setup data +visium = GiottoData::loadGiottoMini(dataset = "visium") +dgc = getExpression(visium, output = "matrix") + +con = DBI::dbConnect(duckdb::duckdb(), ":memory:") + +dbsm = dbMatrix::dbMatrix(value = dgc, + con = con, + name = 'dgc', + class = "dbSparseMatrix", + overwrite = TRUE) + +# Create exprObj with dbsm +expObj_db = createExprObj(expression_data = dbsm, + expression_matrix_class = 'dbSparseMatrix', + name = 'raw') + +# Create giotto object +gobject_db = suppressWarnings(createGiottoObject(expression = expObj_db)) + +# ---------------------------------------------------------------------------- # +# Perform filtering +visium_filtered = filterGiotto(visium, spat_unit = "cell", + feat_type = "rna", + expression_values = "raw") + +gobject_db_filtered = filterGiotto(gobject_db, spat_unit = "cell", + feat_type = "rna", + expression_values = "raw") + +# Get filtered matrix +dgc_visium = getExpression(visium_filtered, output = "matrix") +mat_db = getExpression(gobject_db_filtered, output = "matrix") +dgc_db = dbMatrix:::as_matrix(mat_db) + +# ---------------------------------------------------------------------------- # +# Test filterGiotto() equivalence between dbMatrix and dgCMatrix + +test_that("dbMatrix equivalent to dgCMatrix after filterGiotto()", { + expect_equal(dgc_visium, dgc_db) +}) diff --git a/tests/testthat/test-dbMatrix_libNorm.R b/tests/testthat/test-dbMatrix_libNorm.R new file mode 100644 index 000000000..be575f17b --- /dev/null +++ b/tests/testthat/test-dbMatrix_libNorm.R @@ -0,0 +1,64 @@ +# silence deprecated internal functions +rlang::local_options(lifecycle_verbosity = "quiet") + +# ---------------------------------------------------------------------------- # +# Setup data +visium = GiottoData::loadGiottoMini(dataset = "visium") +dgc = getExpression(visium, output = "matrix") + +con = DBI::dbConnect(duckdb::duckdb(), ":memory:") + +dbsm = dbMatrix::dbMatrix(value = dgc, + con = con, + name = 'dgc', + class = "dbSparseMatrix", + overwrite = TRUE) + +# Create exprObj with dbsm +expObj_db = createExprObj(expression_data = dbsm, + expression_matrix_class = 'dbSparseMatrix', + name = 'raw') + +# Create giotto object +gobject_db = suppressWarnings(createGiottoObject(expression = expObj_db)) + +# ---------------------------------------------------------------------------- # +# Perform filtering +visium_filtered = filterGiotto(visium, spat_unit = "cell", + feat_type = "rna", + expression_values = "raw") + +gobject_db_filtered = filterGiotto(gobject_db, spat_unit = "cell", + feat_type = "rna", + expression_values = "raw") + +# ---------------------------------------------------------------------------- # +# Perform library normalization and scaling +visium_filtered = normalizeGiotto(gobject = visium_filtered, + spat_unit = 'cell', + feat_type = 'rna', + expression_values = 'raw', + library_size_norm = TRUE, + log_norm = FALSE, + scale_feats = FALSE, + scale_cells = FALSE) + + +gobject_db_filtered = normalizeGiotto(gobject = gobject_db_filtered, + spat_unit = 'cell', + feat_type = 'rna', + expression_values = 'raw', + library_size_norm = TRUE, + log_norm = FALSE, + scale_feats = FALSE, + scale_cells = FALSE) +# Get normalized matrix +dgc_visium = getExpression(visium_filtered, output = "matrix", values = "normalized") +mat_db = getExpression(gobject_db_filtered, output = "matrix", values = "normalized") +dgc_db = dbMatrix:::as_matrix(mat_db) + +# ---------------------------------------------------------------------------- # +# Test normalizeGiotto() equivalence between dbMatrix and dgCMatrix +test_that("dbMatrix equivalent to dgCMatrix after normalizeGiotto(library_size_norm = TRUE)", { + expect_equal(dgc_visium, dgc_db) +}) \ No newline at end of file diff --git a/tests/testthat/test-dbMatrix_logNorm.R b/tests/testthat/test-dbMatrix_logNorm.R new file mode 100644 index 000000000..1731e634f --- /dev/null +++ b/tests/testthat/test-dbMatrix_logNorm.R @@ -0,0 +1,64 @@ +# silence deprecated internal functions +rlang::local_options(lifecycle_verbosity = "quiet") + +# ---------------------------------------------------------------------------- # +# Setup data +visium = GiottoData::loadGiottoMini(dataset = "visium") +dgc = getExpression(visium, output = "matrix") + +con = DBI::dbConnect(duckdb::duckdb(), ":memory:") + +dbsm = dbMatrix::dbMatrix(value = dgc, + con = con, + name = 'dgc', + class = "dbSparseMatrix", + overwrite = TRUE) + +# Create exprObj with dbsm +expObj_db = createExprObj(expression_data = dbsm, + expression_matrix_class = 'dbSparseMatrix', + name = 'raw') + +# Create giotto object +gobject_db = suppressWarnings(createGiottoObject(expression = expObj_db)) + +# ---------------------------------------------------------------------------- # +# Perform filtering +visium_filtered = filterGiotto(visium, spat_unit = "cell", + feat_type = "rna", + expression_values = "raw") + +gobject_db_filtered = filterGiotto(gobject_db, spat_unit = "cell", + feat_type = "rna", + expression_values = "raw") + +# ---------------------------------------------------------------------------- # +# Perform library normalization and scaling +visium_filtered = normalizeGiotto(gobject = visium_filtered, + spat_unit = 'cell', + feat_type = 'rna', + expression_values = 'raw', + library_size_norm = FALSE, + log_norm = TRUE, + scale_feats = FALSE, + scale_cells = FALSE) + + +gobject_db_filtered = normalizeGiotto(gobject = gobject_db_filtered, + spat_unit = 'cell', + feat_type = 'rna', + expression_values = 'raw', + library_size_norm = FALSE, + log_norm = TRUE, + scale_feats = FALSE, + scale_cells = FALSE) +# Get normalized matrix +dgc_visium = getExpression(visium_filtered, output = "matrix", values = "normalized") +mat_db = getExpression(gobject_db_filtered, output = "matrix", values = "normalized") +dgc_db = dbMatrix:::as_matrix(mat_db) + +# ---------------------------------------------------------------------------- # +# Test normalizeGiotto() equivalence between dbMatrix and dgCMatrix +test_that("dbMatrix equivalent to dgCMatrix after normalizeGiotto(log_norm=TRUE)", { + expect_equal(dgc_visium, dgc_db) +}) \ No newline at end of file diff --git a/tests/testthat/test-dbMatrix_scale.R b/tests/testthat/test-dbMatrix_scale.R new file mode 100644 index 000000000..b28504d35 --- /dev/null +++ b/tests/testthat/test-dbMatrix_scale.R @@ -0,0 +1,64 @@ +# silence deprecated internal functions +rlang::local_options(lifecycle_verbosity = "quiet") + +# ---------------------------------------------------------------------------- # +# Setup data +visium = GiottoData::loadGiottoMini(dataset = "visium") +dgc = getExpression(visium, output = "matrix") + +con = DBI::dbConnect(duckdb::duckdb(), ":memory:") + +dbsm = dbMatrix::dbMatrix(value = dgc, + con = con, + name = 'dgc', + class = "dbSparseMatrix", + overwrite = TRUE) + +# Create exprObj with dbsm +expObj_db = createExprObj(expression_data = dbsm, + expression_matrix_class = 'dbSparseMatrix', + name = 'raw') + +# Create giotto object +gobject_db = suppressWarnings(createGiottoObject(expression = expObj_db)) + +# ---------------------------------------------------------------------------- # +# Perform filtering +visium_filtered = filterGiotto(visium, spat_unit = "cell", + feat_type = "rna", + expression_values = "raw") + +gobject_db_filtered = filterGiotto(gobject_db, spat_unit = "cell", + feat_type = "rna", + expression_values = "raw") + +# ---------------------------------------------------------------------------- # +# Perform library normalization and scaling +visium_filtered = normalizeGiotto(gobject = visium_filtered, + spat_unit = 'cell', + feat_type = 'rna', + expression_values = 'raw', + library_size_norm = FALSE, + log_norm = FALSE, + scale_feats = TRUE, + scale_cells = TRUE) + + +gobject_db_filtered = normalizeGiotto(gobject = gobject_db_filtered, + spat_unit = 'cell', + feat_type = 'rna', + expression_values = 'raw', + library_size_norm = FALSE, + log_norm = FALSE, + scale_feats = TRUE, + scale_cells = TRUE) +# Get normalized matrix +dgc_visium = getExpression(visium_filtered, output = "matrix", values = "scaled") |> as.matrix() +mat_db = getExpression(gobject_db_filtered, output = "matrix", values = "scaled") +dgc_db = dbMatrix:::as_matrix(mat_db) + +# ---------------------------------------------------------------------------- # +# Test normalizeGiotto() equivalence between dbMatrix and dgCMatrix +test_that("dbMatrix equivalent to dgCMatrix after normalizeGiotto(scale_feats=T,scale=cells=T)", { + expect_equal(dgc_visium, dgc_db) +}) \ No newline at end of file diff --git a/vignettes/dbMatrix.Rmd b/vignettes/dbMatrix.Rmd new file mode 100644 index 000000000..588737d97 --- /dev/null +++ b/vignettes/dbMatrix.Rmd @@ -0,0 +1,93 @@ +--- +title: "Using dbMatrix with Giotto" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Using dbMatrix with Giotto} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +# Introduction +This vignette demonstrates how to use a [`dbMatrix`](https://github.com/drieslab/dbMatrix) within a Giotto Object. The `dbMatrix` is a database-backed matrix that can be used to store large matrices in a database. This allows for efficient storage and retrieval of large matrices and enables efficiently working with larger-than-memory cell count matrices. + +# 1. Set up Giotto + +```{r, eval=FALSE} +# Ensure Giotto Suite is installed. +if(!"Giotto" %in% installed.packages()) { + devtools::install_github("drieslab/Giotto@suite") +} + +# Ensure GiottoData, a small, helper module for tutorials, is installed. +if(!"GiottoData" %in% installed.packages()) { + devtools::install_github("drieslab/GiottoData") +} + +library(Giotto) +library(GiottoData) + +# Ensure the Python environment for Giotto has been installed. +genv_exists = checkGiottoEnvironment() +if(!genv_exists){ + # The following command need only be run once to install the Giotto environment. + installGiottoEnvironment() +} +``` + +# 2. Create Giotto object with `dbMatrix` + +```{r} +# Get test dataset from Giotto Data package +visium = GiottoData::loadGiottoMini(dataset = "visium") + +# Extract the cell expression matrix as a test dataset +dgc = getExpression(visium, output = "matrix") + +# Create a DBI connection object +con = DBI::dbConnect(duckb::duckdb(), ":memory:") + +# Create a dbSparseMatrix using the dbMatrix constructor function +dbsm = dbMatrix::dbMatrix(value = dgc, + con = con, + name = 'dgc', + class = "dbSparseMatrix", + overwrite = TRUE) + +# Create Giotto exprObj with the dbMatrix +expObj_db = createExprObj(expression_data = dbsm, + expression_matrix_class = 'dbSparseMatrix', + name = 'raw') + +# Create the Giotto object consisting of only the cell count matrix +gobject_db = createGiottoObject(expression = expObj_db) +``` + +# 3. Preprocess Giotto object with `dbMatrix` +```{r} +# Perform filtering +gobject_db_filtered = filterGiotto(gobject_db, spat_unit = "cell", + feat_type = "rna", + expression_values = "raw") + +# Perform library normalization and scaling +gobject_db_filtered = normalizeGiotto(gobject = gobject_db_filtered, + spat_unit = 'cell', + feat_type = 'rna', + expression_values = 'raw', + library_size_norm = FALSE, + log_norm = FALSE, + scale_feats = TRUE, + scale_cells = TRUE) +``` + + +```{r} +sessionInfo() +``` \ No newline at end of file